From 8b22bc28cb7b6ef2e11a85e0f3eeb7a699e892b8 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Tue, 29 Aug 2023 14:25:00 +0200 Subject: [PATCH] feature gate unicode support in nucleo-matcher --- .github/workflows/ci.yml | 4 ++++ matcher/Cargo.toml | 8 +++++++- matcher/src/chars.rs | 32 ++++++++++++++++++++++++++------ matcher/src/pattern.rs | 21 ++++++++++++++++++--- 4 files changed, 55 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d95c023..a2c2d73 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,6 +27,8 @@ jobs: - name: Run cargo check run: cargo check + - name: Run cargo check withoult default features + run: cargo check --no-default-features test: name: Test @@ -62,6 +64,8 @@ jobs: - name: Run cargo clippy run: cargo clippy --workspace --all-targets -- -D warnings + - name: Run cargo clippy withoult default features + run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings - name: Run cargo doc run: cargo doc --no-deps --workspace --document-private-items diff --git a/matcher/Cargo.toml b/matcher/Cargo.toml index 663a493..dbbdbfd 100644 --- a/matcher/Cargo.toml +++ b/matcher/Cargo.toml @@ -11,7 +11,13 @@ readme = "../README.md" [dependencies] memchr = "2.5.0" cov-mark = { version = "1.1.0", default-features = false } -unicode-segmentation = "1.10" +unicode-segmentation = { version = "1.10", optional = true } + +[features] +default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"] +unicode-normalization = [] +unicode-casefold = [] +unicode-segmentation = ["dep:unicode-segmentation"] [dev-dependencies] cov-mark = { version = "1.1.0", default-features = true } diff --git a/matcher/src/chars.rs b/matcher/src/chars.rs index 9b3bc69..53555f5 100644 --- a/matcher/src/chars.rs +++ b/matcher/src/chars.rs @@ -2,13 +2,16 @@ use std::fmt::{self, Debug, Display}; +#[cfg(feature = "unicode-casefold")] use crate::chars::case_fold::CASE_FOLDING_SIMPLE; use crate::Config; //autogenerated by generate-ucd #[allow(warnings)] #[rustfmt::skip] +#[cfg(feature = "unicode-casefold")] mod case_fold; +#[cfg(feature = "unicode-normalization")] mod normalize; pub(crate) trait Char: Copy + Eq + Ord + fmt::Display { @@ -111,11 +114,14 @@ impl Char for char { return (c.0 as char, class); } let char_class = char_class_non_ascii(self); + #[cfg(feature = "unicode-casefold")] let mut case_fold = char_class == CharClass::Upper; + #[cfg(feature = "unicode-normalization")] if config.normalize { self = normalize::normalize(self); case_fold = true } + #[cfg(feature = "unicode-casefold")] if case_fold && config.ignore_case { self = CASE_FOLDING_SIMPLE .binary_search_by_key(&self, |(upper, _)| *upper) @@ -126,9 +132,11 @@ impl Char for char { #[inline(always)] fn normalize(mut self, config: &Config) -> Self { + #[cfg(feature = "unicode-normalization")] if config.normalize { self = normalize::normalize(self); } + #[cfg(feature = "unicode-casefold")] if config.ignore_case { self = to_lower_case(self) } @@ -136,23 +144,31 @@ impl Char for char { } } +#[cfg(feature = "unicode-normalization")] pub use normalize::normalize; +#[cfg(feature = "unicode-segmentation")] use unicode_segmentation::UnicodeSegmentation; -#[inline(always)] /// Converts a character to lower case using simple unicode case folding +#[cfg(feature = "unicode-casefold")] +#[inline(always)] pub fn to_lower_case(c: char) -> char { CASE_FOLDING_SIMPLE .binary_search_by_key(&c, |(upper, _)| *upper) .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1) } -/// Converts a character to upper case using simple unicode case folding +/// Checks if a character is upper case according to simple unicode case folding. +/// if the `unicode-casefold` feature is disable the equivalent std function is used #[inline(always)] pub fn is_upper_case(c: char) -> bool { - CASE_FOLDING_SIMPLE + #[cfg(feature = "unicode-casefold")] + let val = CASE_FOLDING_SIMPLE .binary_search_by_key(&c, |(upper, _)| *upper) - .is_ok() + .is_ok(); + #[cfg(not(feature = "unicode-casefold"))] + let val = c.is_uppercase(); + val } #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)] @@ -171,10 +187,14 @@ pub(crate) enum CharClass { /// iterator returns the first character of each unicode grapheme /// in a string and is used for constructing `Utf32Str(ing)`. pub fn graphemes(text: &str) -> impl Iterator + '_ { - text.graphemes(true).map(|grapheme| { + #[cfg(feature = "unicode-segmentation")] + let res = text.graphemes(true).map(|grapheme| { grapheme .chars() .next() .expect("graphemes must be non-empty") - }) + }); + #[cfg(not(feature = "unicode-segmentation"))] + let res = text.chars(); + res } diff --git a/matcher/src/pattern.rs b/matcher/src/pattern.rs index 1d6f3bf..37f747e 100644 --- a/matcher/src/pattern.rs +++ b/matcher/src/pattern.rs @@ -13,13 +13,15 @@ use crate::Utf32String; #[non_exhaustive] /// How to treat a case mismatch between two characters. pub enum CaseMatching { - /// Characters always match their case folded version (`a == A`). - Ignore, /// Characters never match their case folded version (`a != A`). Respect, + /// Characters always match their case folded version (`a == A`). + #[cfg(feature = "unicode-casefold")] + Ignore, /// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are /// lowercase and like [`Respect`](CaseMatching::Respect) otherwise. #[default] + #[cfg(feature = "unicode-casefold")] Smart, } @@ -106,10 +108,12 @@ impl Atom { }; match case { + #[cfg(feature = "unicode-casefold")] CaseMatching::Ignore => { ignore_case = true; needle.make_ascii_lowercase() } + #[cfg(feature = "unicode-casefold")] CaseMatching::Smart => { ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase()) } @@ -121,7 +125,14 @@ impl Atom { Utf32String::Ascii(needle.into_boxed_str()) } else { let mut needle_ = Vec::with_capacity(needle.len()); - ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart); + #[cfg(feature = "unicode-casefold")] + { + ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart); + } + #[cfg(not(feature = "unicode-casefold"))] + { + ignore_case = false; + } if escape_whitespace { let mut saw_backslash = false; for mut c in chars::graphemes(needle) { @@ -136,7 +147,9 @@ impl Atom { } saw_backslash = c == '\\'; match case { + #[cfg(feature = "unicode-casefold")] CaseMatching::Ignore => c = chars::to_lower_case(c), + #[cfg(feature = "unicode-casefold")] CaseMatching::Smart => { ignore_case = ignore_case && !chars::is_upper_case(c) } @@ -147,7 +160,9 @@ impl Atom { } else { let chars = chars::graphemes(needle).map(|mut c| { match case { + #[cfg(feature = "unicode-casefold")] CaseMatching::Ignore => c = chars::to_lower_case(c), + #[cfg(feature = "unicode-casefold")] CaseMatching::Smart => { ignore_case = ignore_case && !chars::is_upper_case(c); }