mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 01:47:49 +00:00
test pattrn parsing and fix edgecases
This commit is contained in:
parent
bb0b5f8726
commit
6b08991fac
@ -80,7 +80,7 @@ impl Char for AsciiChar {
|
||||
fn char_class_non_ascii(c: char) -> CharClass {
|
||||
if c.is_lowercase() {
|
||||
CharClass::Lower
|
||||
} else if c.is_uppercase() {
|
||||
} else if is_upper_case(c) {
|
||||
CharClass::Upper
|
||||
} else if c.is_numeric() {
|
||||
CharClass::Number
|
||||
@ -144,6 +144,13 @@ pub fn to_lower_case(c: char) -> char {
|
||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn is_upper_case(c: char) -> bool {
|
||||
CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
||||
#[non_exhaustive]
|
||||
pub enum CharClass {
|
||||
|
@ -140,11 +140,10 @@ impl fmt::Debug for Utf32Str<'_> {
|
||||
|
||||
impl fmt::Display for Utf32Str<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
for c in self.chars() {
|
||||
write!(f, "{c}")?
|
||||
}
|
||||
write!(f, "\"")
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,8 @@
|
||||
use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use crate::Utf32String;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
@ -35,7 +38,7 @@ impl PatternAtom {
|
||||
kind: PatternKind,
|
||||
escape_whitespace: bool,
|
||||
) -> PatternAtom {
|
||||
let mut ignore_case = case == CaseMatching::Ignore;
|
||||
let mut ignore_case;
|
||||
let needle = if needle.is_ascii() {
|
||||
let mut needle = if escape_whitespace {
|
||||
if let Some((start, rem)) = needle.split_once("\\ ") {
|
||||
@ -53,16 +56,20 @@ impl PatternAtom {
|
||||
};
|
||||
|
||||
match case {
|
||||
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
||||
CaseMatching::Ignore => {
|
||||
ignore_case = true;
|
||||
needle.make_ascii_lowercase()
|
||||
}
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
CaseMatching::Respect => ignore_case = false,
|
||||
}
|
||||
|
||||
Utf32String::Ascii(needle.into_boxed_str())
|
||||
} else {
|
||||
let mut needle_ = Vec::with_capacity(needle.len());
|
||||
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
||||
if escape_whitespace {
|
||||
let mut saw_backslash = false;
|
||||
for mut c in chars::graphemes(needle) {
|
||||
@ -82,7 +89,7 @@ impl PatternAtom {
|
||||
match case {
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case && !c.is_uppercase();
|
||||
ignore_case = ignore_case && !chars::is_upper_case(c)
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
@ -96,7 +103,7 @@ impl PatternAtom {
|
||||
match case {
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case && !c.is_uppercase();
|
||||
ignore_case = ignore_case && !chars::is_upper_case(c);
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
@ -116,10 +123,17 @@ impl PatternAtom {
|
||||
|
||||
fn parse(raw: &str, normalize: bool, case: CaseMatching) -> PatternAtom {
|
||||
let mut atom = raw;
|
||||
let invert = atom.starts_with('!');
|
||||
if invert {
|
||||
let invert = match atom.as_bytes() {
|
||||
[b'!', ..] => {
|
||||
atom = &atom[1..];
|
||||
true
|
||||
}
|
||||
[b'\\', b'!', ..] => {
|
||||
atom = &atom[1..];
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let mut kind = match atom.as_bytes() {
|
||||
[b'^', ..] => {
|
||||
@ -137,8 +151,12 @@ impl PatternAtom {
|
||||
_ => PatternKind::Fuzzy,
|
||||
};
|
||||
|
||||
let mut append_dollar = false;
|
||||
match atom.as_bytes() {
|
||||
[.., b'\\', b'$'] => (),
|
||||
[.., b'\\', b'$'] => {
|
||||
append_dollar = true;
|
||||
atom = &atom[..atom.len() - 2]
|
||||
}
|
||||
[.., b'$'] => {
|
||||
kind = if kind == PatternKind::Fuzzy {
|
||||
PatternKind::Postfix
|
||||
@ -156,6 +174,9 @@ impl PatternAtom {
|
||||
|
||||
let mut pattern = PatternAtom::literal(atom, normalize, case, kind, true);
|
||||
pattern.invert = invert;
|
||||
if append_dollar {
|
||||
pattern.needle.push('$');
|
||||
}
|
||||
pattern
|
||||
}
|
||||
}
|
||||
@ -221,7 +242,7 @@ impl MultiPattern {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Pattern {
|
||||
terms: Vec<PatternAtom>,
|
||||
atoms: Vec<PatternAtom>,
|
||||
case_matching: CaseMatching,
|
||||
normalize: bool,
|
||||
status: Status,
|
||||
@ -230,7 +251,7 @@ pub struct Pattern {
|
||||
impl Pattern {
|
||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching) -> Pattern {
|
||||
Pattern {
|
||||
terms: Vec::new(),
|
||||
atoms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
@ -242,7 +263,7 @@ impl Pattern {
|
||||
pattern: &str,
|
||||
) -> Pattern {
|
||||
let mut res = Pattern {
|
||||
terms: Vec::new(),
|
||||
atoms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
@ -252,11 +273,11 @@ impl Pattern {
|
||||
}
|
||||
|
||||
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
||||
if self.terms.is_empty() {
|
||||
if self.atoms.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut score = 0;
|
||||
for pattern in &self.terms {
|
||||
for pattern in &self.atoms {
|
||||
matcher.config.ignore_case = pattern.ignore_case;
|
||||
let pattern_score = match pattern.kind {
|
||||
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
||||
@ -284,11 +305,11 @@ impl Pattern {
|
||||
matcher: &mut Matcher,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u32> {
|
||||
if self.terms.is_empty() {
|
||||
if self.atoms.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut score = 0;
|
||||
for pattern in &self.terms {
|
||||
for pattern in &self.atoms {
|
||||
matcher.config.ignore_case = pattern.ignore_case;
|
||||
if pattern.invert {
|
||||
let pattern_score = match pattern.kind {
|
||||
@ -330,8 +351,8 @@ impl Pattern {
|
||||
}
|
||||
|
||||
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
||||
self.terms.clear();
|
||||
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
||||
self.atoms.clear();
|
||||
let invert = self.atoms.last().map_or(false, |pat| pat.invert);
|
||||
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||
let atom = PatternAtom::parse(atom, self.normalize, self.case_matching);
|
||||
if atom.needle.is_empty() {
|
||||
@ -339,7 +360,7 @@ impl Pattern {
|
||||
}
|
||||
Some(atom)
|
||||
});
|
||||
self.terms.extend(atoms);
|
||||
self.atoms.extend(atoms);
|
||||
|
||||
self.status = if append && !invert && self.status != Status::Rescore {
|
||||
Status::Update
|
||||
@ -349,10 +370,10 @@ impl Pattern {
|
||||
}
|
||||
|
||||
pub fn set_literal(&mut self, pattern: &str, kind: PatternKind, append: bool) {
|
||||
self.terms.clear();
|
||||
self.atoms.clear();
|
||||
let pattern =
|
||||
PatternAtom::literal(pattern, self.normalize, self.case_matching, kind, false);
|
||||
self.terms.push(pattern);
|
||||
self.atoms.push(pattern);
|
||||
self.status = if append && self.status != Status::Rescore {
|
||||
Status::Update
|
||||
} else {
|
||||
@ -361,14 +382,14 @@ impl Pattern {
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.terms.is_empty()
|
||||
self.atoms.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Pattern {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
terms: self.terms.clone(),
|
||||
atoms: self.atoms.clone(),
|
||||
case_matching: self.case_matching,
|
||||
normalize: self.normalize,
|
||||
status: self.status,
|
||||
@ -376,7 +397,7 @@ impl Clone for Pattern {
|
||||
}
|
||||
|
||||
fn clone_from(&mut self, source: &Self) {
|
||||
self.terms.clone_from(&source.terms);
|
||||
self.atoms.clone_from(&source.atoms);
|
||||
self.case_matching = source.case_matching;
|
||||
self.normalize = source.normalize;
|
||||
self.status = source.status;
|
||||
|
135
src/pattern/tests.rs
Normal file
135
src/pattern/tests.rs
Normal file
@ -0,0 +1,135 @@
|
||||
use crate::pattern::PatternAtom;
|
||||
use crate::{CaseMatching, Pattern, PatternKind};
|
||||
|
||||
fn parse_atom(pat: &str) -> PatternAtom {
|
||||
parse_atom_with(pat, CaseMatching::Smart)
|
||||
}
|
||||
|
||||
fn parse_atom_with(pat: &str, case_matching: CaseMatching) -> PatternAtom {
|
||||
let mut pat = parse_with(pat, case_matching);
|
||||
assert_eq!(pat.atoms.len(), 1);
|
||||
pat.atoms.remove(0)
|
||||
}
|
||||
|
||||
fn parse_with(pat: &str, case_matching: CaseMatching) -> Pattern {
|
||||
let mut res = Pattern::new(&nucleo_matcher::MatcherConfig::DEFAULT, case_matching);
|
||||
res.parse_from(pat, false);
|
||||
res
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative() {
|
||||
let pat = parse_atom("!foo");
|
||||
assert!(pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Substring);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("!^foo");
|
||||
assert!(pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("!foo$");
|
||||
assert!(pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Postfix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("!^foo$");
|
||||
assert!(pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Exact);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pattern_kinds() {
|
||||
let pat = parse_atom("foo");
|
||||
assert!(!pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("'foo");
|
||||
assert!(!pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Substring);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("^foo");
|
||||
assert!(!pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("foo$");
|
||||
assert!(!pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Postfix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom("^foo$");
|
||||
assert!(!pat.invert);
|
||||
assert_eq!(pat.kind, PatternKind::Exact);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_matching() {
|
||||
let pat = parse_atom_with("foo", CaseMatching::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom_with("Foo", CaseMatching::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = parse_atom_with("Foo", CaseMatching::Ignore);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = parse_atom_with("Foo", CaseMatching::Respect);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = parse_atom_with("Foo", CaseMatching::Respect);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = parse_atom_with("Äxx", CaseMatching::Ignore);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "axx");
|
||||
let pat = parse_atom_with("Äxx", CaseMatching::Respect);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Axx");
|
||||
let pat = parse_atom_with("Äxx", CaseMatching::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Axx");
|
||||
let pat = parse_atom_with("Äxx", CaseMatching::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Axx");
|
||||
let pat = parse_atom_with("你xx", CaseMatching::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "你xx");
|
||||
let pat = parse_atom_with("你xx", CaseMatching::Ignore);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "你xx");
|
||||
let pat = parse_atom_with("Ⲽxx", CaseMatching::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Ⲽxx");
|
||||
let pat = parse_atom_with("Ⲽxx", CaseMatching::Ignore);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "ⲽxx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape() {
|
||||
let pat = parse_atom("foo\\ bar");
|
||||
assert_eq!(pat.needle.to_string(), "foo bar");
|
||||
let pat = parse_atom("\\!foo");
|
||||
assert_eq!(pat.needle.to_string(), "!foo");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("\\'foo");
|
||||
assert_eq!(pat.needle.to_string(), "'foo");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("\\^foo");
|
||||
assert_eq!(pat.needle.to_string(), "^foo");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("foo\\$");
|
||||
assert_eq!(pat.needle.to_string(), "foo$");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("^foo\\$");
|
||||
assert_eq!(pat.needle.to_string(), "foo$");
|
||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
||||
let pat = parse_atom("\\^foo\\$");
|
||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("\\!^foo\\$");
|
||||
assert_eq!(pat.needle.to_string(), "!^foo$");
|
||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
||||
let pat = parse_atom("!\\^foo\\$");
|
||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||
assert_eq!(pat.kind, PatternKind::Substring);
|
||||
}
|
@ -186,10 +186,9 @@ impl fmt::Debug for Utf32String {
|
||||
|
||||
impl fmt::Display for Utf32String {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
for c in self.chars() {
|
||||
write!(f, "{c}")?
|
||||
}
|
||||
write!(f, "\"")
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user