feature gate unicode support in nucleo-matcher

This commit is contained in:
Pascal Kuthe 2023-08-29 14:25:00 +02:00
parent 2de732889f
commit 8b22bc28cb
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
4 changed files with 55 additions and 10 deletions

View File

@ -27,6 +27,8 @@ jobs:
- name: Run cargo check
run: cargo check
- name: Run cargo check withoult default features
run: cargo check --no-default-features
test:
name: Test
@ -62,6 +64,8 @@ jobs:
- name: Run cargo clippy
run: cargo clippy --workspace --all-targets -- -D warnings
- name: Run cargo clippy withoult default features
run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings
- name: Run cargo doc
run: cargo doc --no-deps --workspace --document-private-items

View File

@ -11,7 +11,13 @@ readme = "../README.md"
[dependencies]
memchr = "2.5.0"
cov-mark = { version = "1.1.0", default-features = false }
unicode-segmentation = "1.10"
unicode-segmentation = { version = "1.10", optional = true }
[features]
default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
unicode-normalization = []
unicode-casefold = []
unicode-segmentation = ["dep:unicode-segmentation"]
[dev-dependencies]
cov-mark = { version = "1.1.0", default-features = true }

View File

@ -2,13 +2,16 @@
use std::fmt::{self, Debug, Display};
#[cfg(feature = "unicode-casefold")]
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
use crate::Config;
//autogenerated by generate-ucd
#[allow(warnings)]
#[rustfmt::skip]
#[cfg(feature = "unicode-casefold")]
mod case_fold;
#[cfg(feature = "unicode-normalization")]
mod normalize;
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
@ -111,11 +114,14 @@ impl Char for char {
return (c.0 as char, class);
}
let char_class = char_class_non_ascii(self);
#[cfg(feature = "unicode-casefold")]
let mut case_fold = char_class == CharClass::Upper;
#[cfg(feature = "unicode-normalization")]
if config.normalize {
self = normalize::normalize(self);
case_fold = true
}
#[cfg(feature = "unicode-casefold")]
if case_fold && config.ignore_case {
self = CASE_FOLDING_SIMPLE
.binary_search_by_key(&self, |(upper, _)| *upper)
@ -126,9 +132,11 @@ impl Char for char {
#[inline(always)]
fn normalize(mut self, config: &Config) -> Self {
#[cfg(feature = "unicode-normalization")]
if config.normalize {
self = normalize::normalize(self);
}
#[cfg(feature = "unicode-casefold")]
if config.ignore_case {
self = to_lower_case(self)
}
@ -136,23 +144,31 @@ impl Char for char {
}
}
#[cfg(feature = "unicode-normalization")]
pub use normalize::normalize;
#[cfg(feature = "unicode-segmentation")]
use unicode_segmentation::UnicodeSegmentation;
#[inline(always)]
/// Converts a character to lower case using simple unicode case folding
#[cfg(feature = "unicode-casefold")]
#[inline(always)]
pub fn to_lower_case(c: char) -> char {
CASE_FOLDING_SIMPLE
.binary_search_by_key(&c, |(upper, _)| *upper)
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
}
/// Converts a character to upper case using simple unicode case folding
/// Checks if a character is upper case according to simple unicode case folding.
/// if the `unicode-casefold` feature is disable the equivalent std function is used
#[inline(always)]
pub fn is_upper_case(c: char) -> bool {
CASE_FOLDING_SIMPLE
#[cfg(feature = "unicode-casefold")]
let val = CASE_FOLDING_SIMPLE
.binary_search_by_key(&c, |(upper, _)| *upper)
.is_ok()
.is_ok();
#[cfg(not(feature = "unicode-casefold"))]
let val = c.is_uppercase();
val
}
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
@ -171,10 +187,14 @@ pub(crate) enum CharClass {
/// iterator returns the first character of each unicode grapheme
/// in a string and is used for constructing `Utf32Str(ing)`.
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
text.graphemes(true).map(|grapheme| {
#[cfg(feature = "unicode-segmentation")]
let res = text.graphemes(true).map(|grapheme| {
grapheme
.chars()
.next()
.expect("graphemes must be non-empty")
})
});
#[cfg(not(feature = "unicode-segmentation"))]
let res = text.chars();
res
}

View File

@ -13,13 +13,15 @@ use crate::Utf32String;
#[non_exhaustive]
/// How to treat a case mismatch between two characters.
pub enum CaseMatching {
/// Characters always match their case folded version (`a == A`).
Ignore,
/// Characters never match their case folded version (`a != A`).
Respect,
/// Characters always match their case folded version (`a == A`).
#[cfg(feature = "unicode-casefold")]
Ignore,
/// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
/// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
#[default]
#[cfg(feature = "unicode-casefold")]
Smart,
}
@ -106,10 +108,12 @@ impl Atom {
};
match case {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => {
ignore_case = true;
needle.make_ascii_lowercase()
}
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
}
@ -121,7 +125,14 @@ impl Atom {
Utf32String::Ascii(needle.into_boxed_str())
} else {
let mut needle_ = Vec::with_capacity(needle.len());
#[cfg(feature = "unicode-casefold")]
{
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
}
#[cfg(not(feature = "unicode-casefold"))]
{
ignore_case = false;
}
if escape_whitespace {
let mut saw_backslash = false;
for mut c in chars::graphemes(needle) {
@ -136,7 +147,9 @@ impl Atom {
}
saw_backslash = c == '\\';
match case {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => c = chars::to_lower_case(c),
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = ignore_case && !chars::is_upper_case(c)
}
@ -147,7 +160,9 @@ impl Atom {
} else {
let chars = chars::graphemes(needle).map(|mut c| {
match case {
#[cfg(feature = "unicode-casefold")]
CaseMatching::Ignore => c = chars::to_lower_case(c),
#[cfg(feature = "unicode-casefold")]
CaseMatching::Smart => {
ignore_case = ignore_case && !chars::is_upper_case(c);
}