From 8b22bc28cb7b6ef2e11a85e0f3eeb7a699e892b8 Mon Sep 17 00:00:00 2001
From: Pascal Kuthe <pascal.kuthe@semimod.de>
Date: Tue, 29 Aug 2023 14:25:00 +0200
Subject: [PATCH] feature gate unicode support in nucleo-matcher

---
 .github/workflows/ci.yml |  4 ++++
 matcher/Cargo.toml       |  8 +++++++-
 matcher/src/chars.rs     | 32 ++++++++++++++++++++++++++------
 matcher/src/pattern.rs   | 21 ++++++++++++++++++---
 4 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d95c023..a2c2d73 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,6 +27,8 @@ jobs:
 
       - name: Run cargo check
         run: cargo check
+      - name: Run cargo check withoult default features
+        run: cargo check --no-default-features
 
   test:
     name: Test
@@ -62,6 +64,8 @@ jobs:
 
       - name: Run cargo clippy
         run: cargo clippy --workspace --all-targets -- -D warnings
+      - name: Run cargo clippy withoult default features
+        run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings
 
       - name: Run cargo doc
         run: cargo doc --no-deps --workspace --document-private-items
diff --git a/matcher/Cargo.toml b/matcher/Cargo.toml
index 663a493..dbbdbfd 100644
--- a/matcher/Cargo.toml
+++ b/matcher/Cargo.toml
@@ -11,7 +11,13 @@ readme = "../README.md"
 [dependencies]
 memchr = "2.5.0"
 cov-mark = { version = "1.1.0", default-features = false }
-unicode-segmentation = "1.10"
+unicode-segmentation = { version  = "1.10", optional = true }
+
+[features]
+default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
+unicode-normalization = []
+unicode-casefold = []
+unicode-segmentation = ["dep:unicode-segmentation"]
 
 [dev-dependencies]
 cov-mark = { version = "1.1.0", default-features = true }
diff --git a/matcher/src/chars.rs b/matcher/src/chars.rs
index 9b3bc69..53555f5 100644
--- a/matcher/src/chars.rs
+++ b/matcher/src/chars.rs
@@ -2,13 +2,16 @@
 
 use std::fmt::{self, Debug, Display};
 
+#[cfg(feature = "unicode-casefold")]
 use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
 use crate::Config;
 
 //autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
+#[cfg(feature = "unicode-casefold")]
 mod case_fold;
+#[cfg(feature = "unicode-normalization")]
 mod normalize;
 
 pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
@@ -111,11 +114,14 @@ impl Char for char {
             return (c.0 as char, class);
         }
         let char_class = char_class_non_ascii(self);
+        #[cfg(feature = "unicode-casefold")]
         let mut case_fold = char_class == CharClass::Upper;
+        #[cfg(feature = "unicode-normalization")]
         if config.normalize {
             self = normalize::normalize(self);
             case_fold = true
         }
+        #[cfg(feature = "unicode-casefold")]
         if case_fold && config.ignore_case {
             self = CASE_FOLDING_SIMPLE
                 .binary_search_by_key(&self, |(upper, _)| *upper)
@@ -126,9 +132,11 @@ impl Char for char {
 
     #[inline(always)]
     fn normalize(mut self, config: &Config) -> Self {
+        #[cfg(feature = "unicode-normalization")]
         if config.normalize {
             self = normalize::normalize(self);
         }
+        #[cfg(feature = "unicode-casefold")]
         if config.ignore_case {
             self = to_lower_case(self)
         }
@@ -136,23 +144,31 @@ impl Char for char {
     }
 }
 
+#[cfg(feature = "unicode-normalization")]
 pub use normalize::normalize;
+#[cfg(feature = "unicode-segmentation")]
 use unicode_segmentation::UnicodeSegmentation;
 
-#[inline(always)]
 /// Converts a character to lower case using simple unicode case folding
+#[cfg(feature = "unicode-casefold")]
+#[inline(always)]
 pub fn to_lower_case(c: char) -> char {
     CASE_FOLDING_SIMPLE
         .binary_search_by_key(&c, |(upper, _)| *upper)
         .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
 }
 
-/// Converts a character to upper case using simple unicode case folding
+/// Checks if a character is upper case according to simple unicode case folding.
+/// if the `unicode-casefold` feature is disable the equivalent std function is used
 #[inline(always)]
 pub fn is_upper_case(c: char) -> bool {
-    CASE_FOLDING_SIMPLE
+    #[cfg(feature = "unicode-casefold")]
+    let val = CASE_FOLDING_SIMPLE
         .binary_search_by_key(&c, |(upper, _)| *upper)
-        .is_ok()
+        .is_ok();
+    #[cfg(not(feature = "unicode-casefold"))]
+    let val = c.is_uppercase();
+    val
 }
 
 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
@@ -171,10 +187,14 @@ pub(crate) enum CharClass {
 /// iterator returns the first character of each unicode grapheme
 /// in a string and is used for constructing `Utf32Str(ing)`.
 pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
-    text.graphemes(true).map(|grapheme| {
+    #[cfg(feature = "unicode-segmentation")]
+    let res = text.graphemes(true).map(|grapheme| {
         grapheme
             .chars()
             .next()
             .expect("graphemes must be non-empty")
-    })
+    });
+    #[cfg(not(feature = "unicode-segmentation"))]
+    let res = text.chars();
+    res
 }
diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs
index 1d6f3bf..37f747e 100644
--- a/matcher/src/pattern.rs
+++ b/matcher/src/pattern.rs
@@ -13,13 +13,15 @@ use crate::Utf32String;
 #[non_exhaustive]
 /// How to treat a case mismatch between two characters.
 pub enum CaseMatching {
-    /// Characters always match their case folded version (`a == A`).
-    Ignore,
     /// Characters never match their case folded version (`a != A`).
     Respect,
+    /// Characters always match their case folded version (`a == A`).
+    #[cfg(feature = "unicode-casefold")]
+    Ignore,
     /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
     /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
     #[default]
+    #[cfg(feature = "unicode-casefold")]
     Smart,
 }
 
@@ -106,10 +108,12 @@ impl Atom {
             };
 
             match case {
+                #[cfg(feature = "unicode-casefold")]
                 CaseMatching::Ignore => {
                     ignore_case = true;
                     needle.make_ascii_lowercase()
                 }
+                #[cfg(feature = "unicode-casefold")]
                 CaseMatching::Smart => {
                     ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
                 }
@@ -121,7 +125,14 @@ impl Atom {
             Utf32String::Ascii(needle.into_boxed_str())
         } else {
             let mut needle_ = Vec::with_capacity(needle.len());
-            ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
+            #[cfg(feature = "unicode-casefold")]
+            {
+                ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
+            }
+            #[cfg(not(feature = "unicode-casefold"))]
+            {
+                ignore_case = false;
+            }
             if escape_whitespace {
                 let mut saw_backslash = false;
                 for mut c in chars::graphemes(needle) {
@@ -136,7 +147,9 @@ impl Atom {
                     }
                     saw_backslash = c == '\\';
                     match case {
+                        #[cfg(feature = "unicode-casefold")]
                         CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
                         CaseMatching::Smart => {
                             ignore_case = ignore_case && !chars::is_upper_case(c)
                         }
@@ -147,7 +160,9 @@ impl Atom {
             } else {
                 let chars = chars::graphemes(needle).map(|mut c| {
                     match case {
+                        #[cfg(feature = "unicode-casefold")]
                         CaseMatching::Ignore => c = chars::to_lower_case(c),
+                        #[cfg(feature = "unicode-casefold")]
                         CaseMatching::Smart => {
                             ignore_case = ignore_case && !chars::is_upper_case(c);
                         }