feature gate unicode support in nucleo-matcher

2024-12-22 01:47:49 +00:00 · 2023-08-29 14:25:00 +02:00 · 2023-08-29 14:25:00 +02:00 · 8b22bc28cb
commit 8b22bc28cb
parent 2de732889f
4 changed files with 55 additions and 10 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -27,6 +27,8 @@ jobs:

      - name: Run cargo check
        run: cargo check
+      - name: Run cargo check withoult default features
+        run: cargo check --no-default-features

  test:
    name: Test
@ -62,6 +64,8 @@ jobs:

      - name: Run cargo clippy
        run: cargo clippy --workspace --all-targets -- -D warnings
+      - name: Run cargo clippy withoult default features
+        run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings

      - name: Run cargo doc
        run: cargo doc --no-deps --workspace --document-private-items
--- a/matcher/Cargo.toml
+++ b/matcher/Cargo.toml
@ -11,7 +11,13 @@ readme = "../README.md"
 [dependencies]
 memchr = "2.5.0"
 cov-mark = { version = "1.1.0", default-features = false }
-unicode-segmentation = "1.10"
+unicode-segmentation = { version  = "1.10", optional = true }
+
+[features]
+default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
+unicode-normalization = []
+unicode-casefold = []
+unicode-segmentation = ["dep:unicode-segmentation"]

 [dev-dependencies]
 cov-mark = { version = "1.1.0", default-features = true }
--- a/matcher/src/chars.rs
+++ b/matcher/src/chars.rs
@ -2,13 +2,16 @@

 use std::fmt::{self, Debug, Display};

+#[cfg(feature = "unicode-casefold")]
 use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
 use crate::Config;

 //autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
+#[cfg(feature = "unicode-casefold")]
 mod case_fold;
+#[cfg(feature = "unicode-normalization")]
 mod normalize;

 pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
@ -111,11 +114,14 @@ impl Char for char {
            return (c.0 as char, class);
        }
        let char_class = char_class_non_ascii(self);
+        #[cfg(feature = "unicode-casefold")]
        let mut case_fold = char_class == CharClass::Upper;
+        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
            case_fold = true
        }
+        #[cfg(feature = "unicode-casefold")]
        if case_fold && config.ignore_case {
            self = CASE_FOLDING_SIMPLE
                .binary_search_by_key(&self, |(upper, _)| *upper)
@ -126,9 +132,11 @@ impl Char for char {

    #[inline(always)]
    fn normalize(mut self, config: &Config) -> Self {
+        #[cfg(feature = "unicode-normalization")]
        if config.normalize {
            self = normalize::normalize(self);
        }
+        #[cfg(feature = "unicode-casefold")]
        if config.ignore_case {
            self = to_lower_case(self)
        }
@ -136,23 +144,31 @@ impl Char for char {
    }
 }

+#[cfg(feature = "unicode-normalization")]
 pub use normalize::normalize;
+#[cfg(feature = "unicode-segmentation")]
 use unicode_segmentation::UnicodeSegmentation;

-#[inline(always)]
 /// Converts a character to lower case using simple unicode case folding
+#[cfg(feature = "unicode-casefold")]
+#[inline(always)]
 pub fn to_lower_case(c: char) -> char {
    CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
        .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
 }

-/// Converts a character to upper case using simple unicode case folding
+/// Checks if a character is upper case according to simple unicode case folding.
+/// if the `unicode-casefold` feature is disable the equivalent std function is used
 #[inline(always)]
 pub fn is_upper_case(c: char) -> bool {
-    CASE_FOLDING_SIMPLE
+    #[cfg(feature = "unicode-casefold")]
+    let val = CASE_FOLDING_SIMPLE
        .binary_search_by_key(&c, |(upper, _)| *upper)
-        .is_ok()
+        .is_ok();
+    #[cfg(not(feature = "unicode-casefold"))]
+    let val = c.is_uppercase();
+    val
 }

 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
@ -171,10 +187,14 @@ pub(crate) enum CharClass {
 /// iterator returns the first character of each unicode grapheme
 /// in a string and is used for constructing `Utf32Str(ing)`.
 pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
-    text.graphemes(true).map(|grapheme| {
+    #[cfg(feature = "unicode-segmentation")]
+    let res = text.graphemes(true).map(|grapheme| {
        grapheme
            .chars()
            .next()
            .expect("graphemes must be non-empty")
-    })
+    });
+    #[cfg(not(feature = "unicode-segmentation"))]
+    let res = text.chars();
+    res
 }
--- a/matcher/src/pattern.rs
+++ b/matcher/src/pattern.rs
@ -13,13 +13,15 @@ use crate::Utf32String;
 #[non_exhaustive]
 /// How to treat a case mismatch between two characters.
 pub enum CaseMatching {
-    /// Characters always match their case folded version (`a == A`).
-    Ignore,
    /// Characters never match their case folded version (`a != A`).
    Respect,
+    /// Characters always match their case folded version (`a == A`).
+    #[cfg(feature = "unicode-casefold")]
+    Ignore,
    /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
    /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
    #[default]
+    #[cfg(feature = "unicode-casefold")]
    Smart,
 }

@ -106,10 +108,12 @@ impl Atom {
            };

            match case {
+                #[cfg(feature = "unicode-casefold")]
                CaseMatching::Ignore => {
                    ignore_case = true;
                    needle.make_ascii_lowercase()
                }
+                #[cfg(feature = "unicode-casefold")]
                CaseMatching::Smart => {
                    ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
                }
@ -121,7 +125,14 @@ impl Atom {
            Utf32String::Ascii(needle.into_boxed_str())
        } else {
            let mut needle_ = Vec::with_capacity(needle.len());
+            #[cfg(feature = "unicode-casefold")]
+            {
                ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
+            }
+            #[cfg(not(feature = "unicode-casefold"))]
+            {
+                ignore_case = false;
+            }
            if escape_whitespace {
                let mut saw_backslash = false;
                for mut c in chars::graphemes(needle) {
@ -136,7 +147,9 @@ impl Atom {
                    }
                    saw_backslash = c == '\\';
                    match case {
+                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Smart => {
                            ignore_case = ignore_case && !chars::is_upper_case(c)
                        }
@ -147,7 +160,9 @@ impl Atom {
            } else {
                let chars = chars::graphemes(needle).map(|mut c| {
                    match case {
+                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
                        CaseMatching::Smart => {
                            ignore_case = ignore_case && !chars::is_upper_case(c);
                        }