mirror of
https://github.com/solaeus/nucleo.git
synced 2024-11-12 18:17:11 +00:00
feature gate unicode support in nucleo-matcher
This commit is contained in:
parent
2de732889f
commit
8b22bc28cb
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -27,6 +27,8 @@ jobs:
|
||||
|
||||
- name: Run cargo check
|
||||
run: cargo check
|
||||
- name: Run cargo check withoult default features
|
||||
run: cargo check --no-default-features
|
||||
|
||||
test:
|
||||
name: Test
|
||||
@ -62,6 +64,8 @@ jobs:
|
||||
|
||||
- name: Run cargo clippy
|
||||
run: cargo clippy --workspace --all-targets -- -D warnings
|
||||
- name: Run cargo clippy withoult default features
|
||||
run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings
|
||||
|
||||
- name: Run cargo doc
|
||||
run: cargo doc --no-deps --workspace --document-private-items
|
||||
|
@ -11,7 +11,13 @@ readme = "../README.md"
|
||||
[dependencies]
|
||||
memchr = "2.5.0"
|
||||
cov-mark = { version = "1.1.0", default-features = false }
|
||||
unicode-segmentation = "1.10"
|
||||
unicode-segmentation = { version = "1.10", optional = true }
|
||||
|
||||
[features]
|
||||
default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
|
||||
unicode-normalization = []
|
||||
unicode-casefold = []
|
||||
unicode-segmentation = ["dep:unicode-segmentation"]
|
||||
|
||||
[dev-dependencies]
|
||||
cov-mark = { version = "1.1.0", default-features = true }
|
||||
|
@ -2,13 +2,16 @@
|
||||
|
||||
use std::fmt::{self, Debug, Display};
|
||||
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
|
||||
use crate::Config;
|
||||
|
||||
//autogenerated by generate-ucd
|
||||
#[allow(warnings)]
|
||||
#[rustfmt::skip]
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
mod case_fold;
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
mod normalize;
|
||||
|
||||
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
||||
@ -111,11 +114,14 @@ impl Char for char {
|
||||
return (c.0 as char, class);
|
||||
}
|
||||
let char_class = char_class_non_ascii(self);
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
let mut case_fold = char_class == CharClass::Upper;
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
if config.normalize {
|
||||
self = normalize::normalize(self);
|
||||
case_fold = true
|
||||
}
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
if case_fold && config.ignore_case {
|
||||
self = CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&self, |(upper, _)| *upper)
|
||||
@ -126,9 +132,11 @@ impl Char for char {
|
||||
|
||||
#[inline(always)]
|
||||
fn normalize(mut self, config: &Config) -> Self {
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
if config.normalize {
|
||||
self = normalize::normalize(self);
|
||||
}
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
if config.ignore_case {
|
||||
self = to_lower_case(self)
|
||||
}
|
||||
@ -136,23 +144,31 @@ impl Char for char {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
pub use normalize::normalize;
|
||||
#[cfg(feature = "unicode-segmentation")]
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
#[inline(always)]
|
||||
/// Converts a character to lower case using simple unicode case folding
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
#[inline(always)]
|
||||
pub fn to_lower_case(c: char) -> char {
|
||||
CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||
}
|
||||
|
||||
/// Converts a character to upper case using simple unicode case folding
|
||||
/// Checks if a character is upper case according to simple unicode case folding.
|
||||
/// if the `unicode-casefold` feature is disable the equivalent std function is used
|
||||
#[inline(always)]
|
||||
pub fn is_upper_case(c: char) -> bool {
|
||||
CASE_FOLDING_SIMPLE
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
let val = CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||
.is_ok()
|
||||
.is_ok();
|
||||
#[cfg(not(feature = "unicode-casefold"))]
|
||||
let val = c.is_uppercase();
|
||||
val
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
||||
@ -171,10 +187,14 @@ pub(crate) enum CharClass {
|
||||
/// iterator returns the first character of each unicode grapheme
|
||||
/// in a string and is used for constructing `Utf32Str(ing)`.
|
||||
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
||||
text.graphemes(true).map(|grapheme| {
|
||||
#[cfg(feature = "unicode-segmentation")]
|
||||
let res = text.graphemes(true).map(|grapheme| {
|
||||
grapheme
|
||||
.chars()
|
||||
.next()
|
||||
.expect("graphemes must be non-empty")
|
||||
})
|
||||
});
|
||||
#[cfg(not(feature = "unicode-segmentation"))]
|
||||
let res = text.chars();
|
||||
res
|
||||
}
|
||||
|
@ -13,13 +13,15 @@ use crate::Utf32String;
|
||||
#[non_exhaustive]
|
||||
/// How to treat a case mismatch between two characters.
|
||||
pub enum CaseMatching {
|
||||
/// Characters always match their case folded version (`a == A`).
|
||||
Ignore,
|
||||
/// Characters never match their case folded version (`a != A`).
|
||||
Respect,
|
||||
/// Characters always match their case folded version (`a == A`).
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
Ignore,
|
||||
/// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
|
||||
/// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
|
||||
#[default]
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
Smart,
|
||||
}
|
||||
|
||||
@ -106,10 +108,12 @@ impl Atom {
|
||||
};
|
||||
|
||||
match case {
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Ignore => {
|
||||
ignore_case = true;
|
||||
needle.make_ascii_lowercase()
|
||||
}
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||
}
|
||||
@ -121,7 +125,14 @@ impl Atom {
|
||||
Utf32String::Ascii(needle.into_boxed_str())
|
||||
} else {
|
||||
let mut needle_ = Vec::with_capacity(needle.len());
|
||||
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
{
|
||||
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
||||
}
|
||||
#[cfg(not(feature = "unicode-casefold"))]
|
||||
{
|
||||
ignore_case = false;
|
||||
}
|
||||
if escape_whitespace {
|
||||
let mut saw_backslash = false;
|
||||
for mut c in chars::graphemes(needle) {
|
||||
@ -136,7 +147,9 @@ impl Atom {
|
||||
}
|
||||
saw_backslash = c == '\\';
|
||||
match case {
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case && !chars::is_upper_case(c)
|
||||
}
|
||||
@ -147,7 +160,9 @@ impl Atom {
|
||||
} else {
|
||||
let chars = chars::graphemes(needle).map(|mut c| {
|
||||
match case {
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
#[cfg(feature = "unicode-casefold")]
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case && !chars::is_upper_case(c);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user