mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
feature gate unicode support in nucleo-matcher
This commit is contained in:
parent
2de732889f
commit
8b22bc28cb
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -27,6 +27,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Run cargo check
|
- name: Run cargo check
|
||||||
run: cargo check
|
run: cargo check
|
||||||
|
- name: Run cargo check withoult default features
|
||||||
|
run: cargo check --no-default-features
|
||||||
|
|
||||||
test:
|
test:
|
||||||
name: Test
|
name: Test
|
||||||
@ -62,6 +64,8 @@ jobs:
|
|||||||
|
|
||||||
- name: Run cargo clippy
|
- name: Run cargo clippy
|
||||||
run: cargo clippy --workspace --all-targets -- -D warnings
|
run: cargo clippy --workspace --all-targets -- -D warnings
|
||||||
|
- name: Run cargo clippy withoult default features
|
||||||
|
run: cargo clippy --workspace --all-targets --no-default-features -- -D warnings
|
||||||
|
|
||||||
- name: Run cargo doc
|
- name: Run cargo doc
|
||||||
run: cargo doc --no-deps --workspace --document-private-items
|
run: cargo doc --no-deps --workspace --document-private-items
|
||||||
|
@ -11,7 +11,13 @@ readme = "../README.md"
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
memchr = "2.5.0"
|
memchr = "2.5.0"
|
||||||
cov-mark = { version = "1.1.0", default-features = false }
|
cov-mark = { version = "1.1.0", default-features = false }
|
||||||
unicode-segmentation = "1.10"
|
unicode-segmentation = { version = "1.10", optional = true }
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = ["unicode-normalization", "unicode-casefold", "unicode-segmentation"]
|
||||||
|
unicode-normalization = []
|
||||||
|
unicode-casefold = []
|
||||||
|
unicode-segmentation = ["dep:unicode-segmentation"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
cov-mark = { version = "1.1.0", default-features = true }
|
cov-mark = { version = "1.1.0", default-features = true }
|
||||||
|
@ -2,13 +2,16 @@
|
|||||||
|
|
||||||
use std::fmt::{self, Debug, Display};
|
use std::fmt::{self, Debug, Display};
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
|
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
|
||||||
use crate::Config;
|
use crate::Config;
|
||||||
|
|
||||||
//autogenerated by generate-ucd
|
//autogenerated by generate-ucd
|
||||||
#[allow(warnings)]
|
#[allow(warnings)]
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
mod case_fold;
|
mod case_fold;
|
||||||
|
#[cfg(feature = "unicode-normalization")]
|
||||||
mod normalize;
|
mod normalize;
|
||||||
|
|
||||||
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
||||||
@ -111,11 +114,14 @@ impl Char for char {
|
|||||||
return (c.0 as char, class);
|
return (c.0 as char, class);
|
||||||
}
|
}
|
||||||
let char_class = char_class_non_ascii(self);
|
let char_class = char_class_non_ascii(self);
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
let mut case_fold = char_class == CharClass::Upper;
|
let mut case_fold = char_class == CharClass::Upper;
|
||||||
|
#[cfg(feature = "unicode-normalization")]
|
||||||
if config.normalize {
|
if config.normalize {
|
||||||
self = normalize::normalize(self);
|
self = normalize::normalize(self);
|
||||||
case_fold = true
|
case_fold = true
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
if case_fold && config.ignore_case {
|
if case_fold && config.ignore_case {
|
||||||
self = CASE_FOLDING_SIMPLE
|
self = CASE_FOLDING_SIMPLE
|
||||||
.binary_search_by_key(&self, |(upper, _)| *upper)
|
.binary_search_by_key(&self, |(upper, _)| *upper)
|
||||||
@ -126,9 +132,11 @@ impl Char for char {
|
|||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn normalize(mut self, config: &Config) -> Self {
|
fn normalize(mut self, config: &Config) -> Self {
|
||||||
|
#[cfg(feature = "unicode-normalization")]
|
||||||
if config.normalize {
|
if config.normalize {
|
||||||
self = normalize::normalize(self);
|
self = normalize::normalize(self);
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
if config.ignore_case {
|
if config.ignore_case {
|
||||||
self = to_lower_case(self)
|
self = to_lower_case(self)
|
||||||
}
|
}
|
||||||
@ -136,23 +144,31 @@ impl Char for char {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode-normalization")]
|
||||||
pub use normalize::normalize;
|
pub use normalize::normalize;
|
||||||
|
#[cfg(feature = "unicode-segmentation")]
|
||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[inline(always)]
|
|
||||||
/// Converts a character to lower case using simple unicode case folding
|
/// Converts a character to lower case using simple unicode case folding
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
|
#[inline(always)]
|
||||||
pub fn to_lower_case(c: char) -> char {
|
pub fn to_lower_case(c: char) -> char {
|
||||||
CASE_FOLDING_SIMPLE
|
CASE_FOLDING_SIMPLE
|
||||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts a character to upper case using simple unicode case folding
|
/// Checks if a character is upper case according to simple unicode case folding.
|
||||||
|
/// if the `unicode-casefold` feature is disable the equivalent std function is used
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn is_upper_case(c: char) -> bool {
|
pub fn is_upper_case(c: char) -> bool {
|
||||||
CASE_FOLDING_SIMPLE
|
#[cfg(feature = "unicode-casefold")]
|
||||||
|
let val = CASE_FOLDING_SIMPLE
|
||||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||||
.is_ok()
|
.is_ok();
|
||||||
|
#[cfg(not(feature = "unicode-casefold"))]
|
||||||
|
let val = c.is_uppercase();
|
||||||
|
val
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
||||||
@ -171,10 +187,14 @@ pub(crate) enum CharClass {
|
|||||||
/// iterator returns the first character of each unicode grapheme
|
/// iterator returns the first character of each unicode grapheme
|
||||||
/// in a string and is used for constructing `Utf32Str(ing)`.
|
/// in a string and is used for constructing `Utf32Str(ing)`.
|
||||||
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
||||||
text.graphemes(true).map(|grapheme| {
|
#[cfg(feature = "unicode-segmentation")]
|
||||||
|
let res = text.graphemes(true).map(|grapheme| {
|
||||||
grapheme
|
grapheme
|
||||||
.chars()
|
.chars()
|
||||||
.next()
|
.next()
|
||||||
.expect("graphemes must be non-empty")
|
.expect("graphemes must be non-empty")
|
||||||
})
|
});
|
||||||
|
#[cfg(not(feature = "unicode-segmentation"))]
|
||||||
|
let res = text.chars();
|
||||||
|
res
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,15 @@ use crate::Utf32String;
|
|||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
/// How to treat a case mismatch between two characters.
|
/// How to treat a case mismatch between two characters.
|
||||||
pub enum CaseMatching {
|
pub enum CaseMatching {
|
||||||
/// Characters always match their case folded version (`a == A`).
|
|
||||||
Ignore,
|
|
||||||
/// Characters never match their case folded version (`a != A`).
|
/// Characters never match their case folded version (`a != A`).
|
||||||
Respect,
|
Respect,
|
||||||
|
/// Characters always match their case folded version (`a == A`).
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
|
Ignore,
|
||||||
/// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
|
/// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
|
||||||
/// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
|
/// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
|
||||||
#[default]
|
#[default]
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
Smart,
|
Smart,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -106,10 +108,12 @@ impl Atom {
|
|||||||
};
|
};
|
||||||
|
|
||||||
match case {
|
match case {
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Ignore => {
|
CaseMatching::Ignore => {
|
||||||
ignore_case = true;
|
ignore_case = true;
|
||||||
needle.make_ascii_lowercase()
|
needle.make_ascii_lowercase()
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Smart => {
|
CaseMatching::Smart => {
|
||||||
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||||
}
|
}
|
||||||
@ -121,7 +125,14 @@ impl Atom {
|
|||||||
Utf32String::Ascii(needle.into_boxed_str())
|
Utf32String::Ascii(needle.into_boxed_str())
|
||||||
} else {
|
} else {
|
||||||
let mut needle_ = Vec::with_capacity(needle.len());
|
let mut needle_ = Vec::with_capacity(needle.len());
|
||||||
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
#[cfg(feature = "unicode-casefold")]
|
||||||
|
{
|
||||||
|
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
||||||
|
}
|
||||||
|
#[cfg(not(feature = "unicode-casefold"))]
|
||||||
|
{
|
||||||
|
ignore_case = false;
|
||||||
|
}
|
||||||
if escape_whitespace {
|
if escape_whitespace {
|
||||||
let mut saw_backslash = false;
|
let mut saw_backslash = false;
|
||||||
for mut c in chars::graphemes(needle) {
|
for mut c in chars::graphemes(needle) {
|
||||||
@ -136,7 +147,9 @@ impl Atom {
|
|||||||
}
|
}
|
||||||
saw_backslash = c == '\\';
|
saw_backslash = c == '\\';
|
||||||
match case {
|
match case {
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Smart => {
|
CaseMatching::Smart => {
|
||||||
ignore_case = ignore_case && !chars::is_upper_case(c)
|
ignore_case = ignore_case && !chars::is_upper_case(c)
|
||||||
}
|
}
|
||||||
@ -147,7 +160,9 @@ impl Atom {
|
|||||||
} else {
|
} else {
|
||||||
let chars = chars::graphemes(needle).map(|mut c| {
|
let chars = chars::graphemes(needle).map(|mut c| {
|
||||||
match case {
|
match case {
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||||
|
#[cfg(feature = "unicode-casefold")]
|
||||||
CaseMatching::Smart => {
|
CaseMatching::Smart => {
|
||||||
ignore_case = ignore_case && !chars::is_upper_case(c);
|
ignore_case = ignore_case && !chars::is_upper_case(c);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user