feature gate unicode support in nucleo-matcher

2024-12-22 09:57:49 +00:00 · 2023-08-29 14:25:00 +02:00 · 2023-08-29 14:25:00 +02:00 · 8b22bc28cb
commit 8b22bc28cb
parent 2de732889f
4 changed files with 55 additions and 10 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -27,6 +27,8 @@ jobs:
      - name: Run cargo check
        run: cargo check
      - name: Run cargo check withoult default features
        run: cargo check --no-default-features
  test:
    name: Test
@ -62,6 +64,8 @@ jobs:
      - name: Run cargo clippy
        run: cargo clippy --workspace --all-targets -- -D warnings
      - name: Run cargo clippy withoult default features
        run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings
      - name: Run cargo doc
        run: cargo doc --no-deps --workspace --document-private-items
--- a/matcher/Cargo.toml
+++ b/matcher/Cargo.toml
@ -11,7 +11,13 @@ readme = "../README.md"
 [dependencies]
 memchr = "2.5.0"
 cov-mark = { version = "1.1.0", default-features = false }
-unicode-segmentation = "1.10"
+unicode-segmentation = { version  = "1.10", optional = true }
 [features]
 default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
 unicode-normalization = []
 unicode-casefold = []
 unicode-segmentation = ["dep:unicode-segmentation"]
 [dev-dependencies]
 cov-mark = { version = "1.1.0", default-features = true }
--- a/matcher/src/chars.rs
+++ b/matcher/src/chars.rs
@ -2,13 +2,16 @@
 use std::fmt::{self, Debug, Display};
 #[cfg(feature = "unicode-casefold")]
 use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
 use crate::Config;
 //autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
 #[cfg(feature = "unicode-casefold")]
 mod case_fold;
 #[cfg(feature = "unicode-normalization")]
 mod normalize;
 pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
@ -111,11 +114,14 @@ impl Char for char {
            return (c.0 as char, class);
        }
        let char_class = char_class_non_ascii(self);
        #[cfg(feature = "unicode-casefold")]
        let mut case_fold = char_class == CharClass::Upper;
        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
            case_fold = true
        }
        #[cfg(feature = "unicode-casefold")]
        if case_fold && config.ignore_case {
            self = CASE_FOLDING_SIMPLE
                .binary_search_by_key(&self, |(upper, _)| *upper)
@ -126,9 +132,11 @@ impl Char for char {
    #[inline(always)]
    fn normalize(mut self, config: &Config) -> Self {
        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
        }
        #[cfg(feature = "unicode-casefold")]
        if config.ignore_case {
            self = to_lower_case(self)
        }
@ -136,23 +144,31 @@ impl Char for char {
    }
 }
 #[cfg(feature = "unicode-normalization")]
 pub use normalize::normalize;
 #[cfg(feature = "unicode-segmentation")]
 use unicode_segmentation::UnicodeSegmentation;
 #[inline(always)]
 /// Converts a character to lower case using simple unicode case folding
 #[cfg(feature = "unicode-casefold")]
 #[inline(always)]
 pub fn to_lower_case(c: char) -> char {
    CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
        .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
 }
-/// Converts a character to upper case using simple unicode case folding
+/// Checks if a character is upper case according to simple unicode case folding.
 /// if the `unicode-casefold` feature is disable the equivalent std function is used
 #[inline(always)]
 pub fn is_upper_case(c: char) -> bool {
-    CASE_FOLDING_SIMPLE
+    #[cfg(feature = "unicode-casefold")]
    let val = CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
-        .is_ok()
+        .is_ok();
    #[cfg(not(feature = "unicode-casefold"))]
    let val = c.is_uppercase();
    val
 }
 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
@ -171,10 +187,14 @@ pub(crate) enum CharClass {
 /// iterator returns the first character of each unicode grapheme
 /// in a string and is used for constructing `Utf32Str(ing)`.
 pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
-    text.graphemes(true).map(|grapheme| {
+    #[cfg(feature = "unicode-segmentation")]
    let res = text.graphemes(true).map(|grapheme| {
        grapheme
            .chars()
            .next()
            .expect("graphemes must be non-empty")
-    })
+    });
    #[cfg(not(feature = "unicode-segmentation"))]
    let res = text.chars();
    res
 }
--- a/matcher/src/pattern.rs
+++ b/matcher/src/pattern.rs
@ -13,13 +13,15 @@ use crate::Utf32String;
 #[non_exhaustive]
 /// How to treat a case mismatch between two characters.
 pub enum CaseMatching {
    /// Characters always match their case folded version (`a == A`).
    Ignore,
    /// Characters never match their case folded version (`a != A`).
    Respect,
    /// Characters always match their case folded version (`a == A`).
    #[cfg(feature = "unicode-casefold")]
    Ignore,
    /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
    /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
    #[default]
    #[cfg(feature = "unicode-casefold")]
    Smart,
 }
@ -106,10 +108,12 @@ impl Atom {
            };
            match case {
                #[cfg(feature = "unicode-casefold")]
                CaseMatching::Ignore => {
                    ignore_case = true;
                    needle.make_ascii_lowercase()
                }
                #[cfg(feature = "unicode-casefold")]
                CaseMatching::Smart => {
                    ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
                }
@ -121,7 +125,14 @@ impl Atom {
            Utf32String::Ascii(needle.into_boxed_str())
        } else {
            let mut needle_ = Vec::with_capacity(needle.len());
-            ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
+            #[cfg(feature = "unicode-casefold")]
            {
                ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
            }
            #[cfg(not(feature = "unicode-casefold"))]
            {
                ignore_case = false;
            }
            if escape_whitespace {
                let mut saw_backslash = false;
                for mut c in chars::graphemes(needle) {
@ -136,7 +147,9 @@ impl Atom {
                    }
                    saw_backslash = c == '\\';
                    match case {
                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Ignore => c = chars::to_lower_case(c),
                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Smart => {
                            ignore_case = ignore_case && !chars::is_upper_case(c)
                        }
@ -147,7 +160,9 @@ impl Atom {
            } else {
                let chars = chars::graphemes(needle).map(|mut c| {
                    match case {
                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Ignore => c = chars::to_lower_case(c),
                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Smart => {
                            ignore_case = ignore_case && !chars::is_upper_case(c);
                        }