Correctly handle normalization in pattern API

This commit is contained in:
Pascal Kuthe 2023-12-17 18:36:07 +01:00
parent b41c989daf
commit 1edf451192
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
7 changed files with 146 additions and 59 deletions

View File

@ -1,9 +1,14 @@
# Changelog # Changelog
# Unreleased # [0.3.0] - 2023-12-20
## **Breaking Changes**
* Pattern API method now requires a Unicode `Normalization` strategy in addition to a `CaseMatching` strategy.
## Bugfixes ## Bugfixes
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
* when the needle is composed of a single char, return the score and index * when the needle is composed of a single char, return the score and index
of the best position instead of always returning the first matched character of the best position instead of always returning the first matched character
in the haystack in the haystack
@ -19,5 +24,6 @@
*initial public release* *initial public release*
[0.3.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.3.0
[0.2.1]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.1 [0.2.1]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.1
[0.2.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.0 [0.2.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.0

View File

@ -16,12 +16,12 @@ can contain special characters to control what kind of match is performed (see
``` ```
# use nucleo_matcher::{Matcher, Config}; # use nucleo_matcher::{Matcher, Config};
# use nucleo_matcher::pattern::{Pattern, CaseMatching}; # use nucleo_matcher::pattern::{Pattern, Normalization, CaseMatching};
let paths = ["foo/bar", "bar/foo", "foobar"]; let paths = ["foo/bar", "bar/foo", "foobar"];
let mut matcher = Matcher::new(Config::DEFAULT.match_paths()); let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
let matches = Pattern::parse("foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher); let matches = Pattern::parse("foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]); assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
let matches = Pattern::parse("^foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher); let matches = Pattern::parse("^foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]); assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]);
``` ```
@ -30,13 +30,13 @@ If the pattern should be matched literally (without this special parsing)
``` ```
# use nucleo_matcher::{Matcher, Config}; # use nucleo_matcher::{Matcher, Config};
# use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind}; # use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind, Normalization};
let paths = ["foo/bar", "bar/foo", "foobar"]; let paths = ["foo/bar", "bar/foo", "foobar"];
let mut matcher = Matcher::new(Config::DEFAULT.match_paths()); let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
let matches = Pattern::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher); let matches = Pattern::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]); assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
let paths = ["^foo/bar", "bar/^foo", "foobar"]; let paths = ["^foo/bar", "bar/^foo", "foobar"];
let matches = Pattern::new("^foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher); let matches = Pattern::new("^foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]); assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
``` ```
@ -44,10 +44,10 @@ If word segmentation is also not desired, a single `Atom` can be constructed dir
``` ```
# use nucleo_matcher::{Matcher, Config}; # use nucleo_matcher::{Matcher, Config};
# use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, AtomKind}; # use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, Normalization, AtomKind};
let paths = ["foobar", "foo bar"]; let paths = ["foobar", "foo bar"];
let mut matcher = Matcher::new(Config::DEFAULT); let mut matcher = Matcher::new(Config::DEFAULT);
let matches = Atom::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy, false).match_list(paths, &mut matcher); let matches = Atom::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
assert_eq!(matches, vec![("foo bar", 192)]); assert_eq!(matches, vec![("foo bar", 192)]);
``` ```

View File

@ -25,6 +25,19 @@ pub enum CaseMatching {
Smart, Smart,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
#[non_exhaustive]
/// How to handle unicode normalization,
pub enum Normalization {
/// Characters never match their normalized version (`a != ä`).
Never,
/// Acts like [`Never`](Normalization::Never) if any character in a pattern atom
/// would need to be normalized. Otherwise normalization occurs (`a == ä` but `ä != a`).
#[default]
#[cfg(feature = "unicode-normalization")]
Smart,
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)] #[derive(Debug, PartialEq, Eq, Clone, Copy)]
#[non_exhaustive] #[non_exhaustive]
/// The kind of matching algorithm to run for an atom. /// The kind of matching algorithm to run for an atom.
@ -73,24 +86,41 @@ pub struct Atom {
pub kind: AtomKind, pub kind: AtomKind,
needle: Utf32String, needle: Utf32String,
ignore_case: bool, ignore_case: bool,
normalize: bool,
} }
impl Atom { impl Atom {
/// Creates a single [`Atom`] from a string by performing unicode /// Creates a single [`Atom`] from a string by performing unicode
/// normalization and case folding (if necessary). Optionally `\ ` can /// normalization and case folding (if necessary). Optionally `\ ` can
/// be escaped to ` `. /// be escaped to ` `.
pub fn new(needle: &str, case: CaseMatching, kind: AtomKind, escape_whitespace: bool) -> Atom { pub fn new(
Atom::new_inner(needle, case, kind, escape_whitespace, false) needle: &str,
case: CaseMatching,
normalize: Normalization,
kind: AtomKind,
escape_whitespace: bool,
) -> Atom {
Atom::new_inner(needle, case, normalize, kind, escape_whitespace, false)
} }
fn new_inner( fn new_inner(
needle: &str, needle: &str,
case: CaseMatching, case: CaseMatching,
normalization: Normalization,
kind: AtomKind, kind: AtomKind,
escape_whitespace: bool, escape_whitespace: bool,
append_dollar: bool, append_dollar: bool,
) -> Atom { ) -> Atom {
let mut ignore_case; let mut ignore_case;
let mut normalize;
#[cfg(feature = "unicode-normalization")]
{
normalize = matches!(normalization, Normalization::Smart);
}
#[cfg(not(feature = "unicode-normalization"))]
{
normalize = false;
}
let needle = if needle.is_ascii() { let needle = if needle.is_ascii() {
let mut needle = if escape_whitespace { let mut needle = if escape_whitespace {
if let Some((start, rem)) = needle.split_once("\\ ") { if let Some((start, rem)) = needle.split_once("\\ ") {
@ -133,6 +163,10 @@ impl Atom {
{ {
ignore_case = false; ignore_case = false;
} }
#[cfg(feature = "unicode-normalization")]
{
normalize = matches!(normalization, Normalization::Smart);
}
if escape_whitespace { if escape_whitespace {
let mut saw_backslash = false; let mut saw_backslash = false;
for mut c in chars::graphemes(needle) { for mut c in chars::graphemes(needle) {
@ -155,6 +189,13 @@ impl Atom {
} }
CaseMatching::Respect => (), CaseMatching::Respect => (),
} }
match normalization {
#[cfg(feature = "unicode-normalization")]
Normalization::Smart => {
normalize = normalize && chars::normalize(c) == c;
}
Normalization::Never => (),
}
needle_.push(c); needle_.push(c);
} }
} else { } else {
@ -168,6 +209,13 @@ impl Atom {
} }
CaseMatching::Respect => (), CaseMatching::Respect => (),
} }
match normalization {
#[cfg(feature = "unicode-normalization")]
Normalization::Smart => {
normalize = normalize && chars::normalize(c) == c;
}
Normalization::Never => (),
}
c c
}); });
needle_.extend(chars); needle_.extend(chars);
@ -182,13 +230,14 @@ impl Atom {
needle, needle,
negative: false, negative: false,
ignore_case, ignore_case,
normalize,
} }
} }
/// Parse a pattern atom from a string. Some special trailing and leading /// Parse a pattern atom from a string. Some special trailing and leading
/// characters can be used to control the atom kind. See [`AtomKind`] for /// characters can be used to control the atom kind. See [`AtomKind`] for
/// details. /// details.
pub fn parse(raw: &str, case: CaseMatching) -> Atom { pub fn parse(raw: &str, case: CaseMatching, normalize: Normalization) -> Atom {
let mut atom = raw; let mut atom = raw;
let invert = match atom.as_bytes() { let invert = match atom.as_bytes() {
[b'!', ..] => { [b'!', ..] => {
@ -239,7 +288,7 @@ impl Atom {
kind = AtomKind::Substring kind = AtomKind::Substring
} }
let mut pattern = Atom::new_inner(atom, case, kind, true, append_dollar); let mut pattern = Atom::new_inner(atom, case, normalize, kind, true, append_dollar);
pattern.negative = invert; pattern.negative = invert;
pattern pattern
} }
@ -252,6 +301,7 @@ impl Atom {
/// each pattern atom. /// each pattern atom.
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> { pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> {
matcher.config.ignore_case = self.ignore_case; matcher.config.ignore_case = self.ignore_case;
matcher.config.normalize = self.normalize;
let pattern_score = match self.kind { let pattern_score = match self.kind {
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)), AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)), AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
@ -285,6 +335,7 @@ impl Atom {
indices: &mut Vec<u32>, indices: &mut Vec<u32>,
) -> Option<u16> { ) -> Option<u16> {
matcher.config.ignore_case = self.ignore_case; matcher.config.ignore_case = self.ignore_case;
matcher.config.normalize = self.normalize;
if self.negative { if self.negative {
let pattern_score = match self.kind { let pattern_score = match self.kind {
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)), AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
@ -371,10 +422,15 @@ impl Pattern {
/// can be escaped with `\`). Otherwise no parsing is performed (so $, !, ' /// can be escaped with `\`). Otherwise no parsing is performed (so $, !, '
/// and ^ don't receive special treatment). If you want to match the entire /// and ^ don't receive special treatment). If you want to match the entire
/// pattern as a single needle use a single [`Atom`] instead. /// pattern as a single needle use a single [`Atom`] instead.
pub fn new(pattern: &str, case_matching: CaseMatching, kind: AtomKind) -> Pattern { pub fn new(
pattern: &str,
case_matching: CaseMatching,
normalize: Normalization,
kind: AtomKind,
) -> Pattern {
let atoms = pattern_atoms(pattern) let atoms = pattern_atoms(pattern)
.filter_map(|pat| { .filter_map(|pat| {
let pat = Atom::new(pat, case_matching, kind, true); let pat = Atom::new(pat, case_matching, normalize, kind, true);
(!pat.needle.is_empty()).then_some(pat) (!pat.needle.is_empty()).then_some(pat)
}) })
.collect(); .collect();
@ -384,10 +440,10 @@ impl Pattern {
/// can be escaped with `\`). And $, !, ' and ^ at word boundaries will /// can be escaped with `\`). And $, !, ' and ^ at word boundaries will
/// cause different matching behaviour (see [`AtomKind`]). These can be /// cause different matching behaviour (see [`AtomKind`]). These can be
/// escaped with backslash. /// escaped with backslash.
pub fn parse(pattern: &str, case_matching: CaseMatching) -> Pattern { pub fn parse(pattern: &str, case_matching: CaseMatching, normalize: Normalization) -> Pattern {
let atoms = pattern_atoms(pattern) let atoms = pattern_atoms(pattern)
.filter_map(|pat| { .filter_map(|pat| {
let pat = Atom::parse(pat, case_matching); let pat = Atom::parse(pat, case_matching, normalize);
(!pat.needle.is_empty()).then_some(pat) (!pat.needle.is_empty()).then_some(pat)
}) })
.collect(); .collect();
@ -477,10 +533,15 @@ impl Pattern {
/// Refreshes this pattern by reparsing it from a string. This is mostly /// Refreshes this pattern by reparsing it from a string. This is mostly
/// equivalent to just constructing a new pattern using [`Pattern::parse`] /// equivalent to just constructing a new pattern using [`Pattern::parse`]
/// but is slightly more efficient by reusing some allocations /// but is slightly more efficient by reusing some allocations
pub fn reparse(&mut self, pattern: &str, case_matching: CaseMatching) { pub fn reparse(
&mut self,
pattern: &str,
case_matching: CaseMatching,
normalize: Normalization,
) {
self.atoms.clear(); self.atoms.clear();
let atoms = pattern_atoms(pattern).filter_map(|atom| { let atoms = pattern_atoms(pattern).filter_map(|atom| {
let atom = Atom::parse(atom, case_matching); let atom = Atom::parse(atom, case_matching, normalize);
if atom.needle.is_empty() { if atom.needle.is_empty() {
return None; return None;
} }

View File

@ -1,20 +1,20 @@
use crate::pattern::{Atom, AtomKind, CaseMatching}; use crate::pattern::{Atom, AtomKind, CaseMatching, Normalization};
#[test] #[test]
fn negative() { fn negative() {
let pat = Atom::parse("!foo", CaseMatching::Smart); let pat = Atom::parse("!foo", CaseMatching::Smart, Normalization::Smart);
assert!(pat.negative); assert!(pat.negative);
assert_eq!(pat.kind, AtomKind::Substring); assert_eq!(pat.kind, AtomKind::Substring);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("!^foo", CaseMatching::Smart); let pat = Atom::parse("!^foo", CaseMatching::Smart, Normalization::Smart);
assert!(pat.negative); assert!(pat.negative);
assert_eq!(pat.kind, AtomKind::Prefix); assert_eq!(pat.kind, AtomKind::Prefix);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("!foo$", CaseMatching::Smart); let pat = Atom::parse("!foo$", CaseMatching::Smart, Normalization::Smart);
assert!(pat.negative); assert!(pat.negative);
assert_eq!(pat.kind, AtomKind::Postfix); assert_eq!(pat.kind, AtomKind::Postfix);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("!^foo$", CaseMatching::Smart); let pat = Atom::parse("!^foo$", CaseMatching::Smart, Normalization::Smart);
assert!(pat.negative); assert!(pat.negative);
assert_eq!(pat.kind, AtomKind::Exact); assert_eq!(pat.kind, AtomKind::Exact);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
@ -22,23 +22,23 @@ fn negative() {
#[test] #[test]
fn pattern_kinds() { fn pattern_kinds() {
let pat = Atom::parse("foo", CaseMatching::Smart); let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.negative); assert!(!pat.negative);
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("'foo", CaseMatching::Smart); let pat = Atom::parse("'foo", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.negative); assert!(!pat.negative);
assert_eq!(pat.kind, AtomKind::Substring); assert_eq!(pat.kind, AtomKind::Substring);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("^foo", CaseMatching::Smart); let pat = Atom::parse("^foo", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.negative); assert!(!pat.negative);
assert_eq!(pat.kind, AtomKind::Prefix); assert_eq!(pat.kind, AtomKind::Prefix);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("foo$", CaseMatching::Smart); let pat = Atom::parse("foo$", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.negative); assert!(!pat.negative);
assert_eq!(pat.kind, AtomKind::Postfix); assert_eq!(pat.kind, AtomKind::Postfix);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("^foo$", CaseMatching::Smart); let pat = Atom::parse("^foo$", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.negative); assert!(!pat.negative);
assert_eq!(pat.kind, AtomKind::Exact); assert_eq!(pat.kind, AtomKind::Exact);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
@ -46,69 +46,69 @@ fn pattern_kinds() {
#[test] #[test]
fn case_matching() { fn case_matching() {
let pat = Atom::parse("foo", CaseMatching::Smart); let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("Foo", CaseMatching::Smart); let pat = Atom::parse("Foo", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
assert_eq!(pat.needle.to_string(), "Foo"); assert_eq!(pat.needle.to_string(), "Foo");
let pat = Atom::parse("Foo", CaseMatching::Ignore); let pat = Atom::parse("Foo", CaseMatching::Ignore, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "foo"); assert_eq!(pat.needle.to_string(), "foo");
let pat = Atom::parse("Foo", CaseMatching::Respect); let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
assert_eq!(pat.needle.to_string(), "Foo"); assert_eq!(pat.needle.to_string(), "Foo");
let pat = Atom::parse("Foo", CaseMatching::Respect); let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
assert_eq!(pat.needle.to_string(), "Foo"); assert_eq!(pat.needle.to_string(), "Foo");
let pat = Atom::parse("Äxx", CaseMatching::Ignore); let pat = Atom::parse("Äxx", CaseMatching::Ignore, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "äxx"); assert_eq!(pat.needle.to_string(), "äxx");
let pat = Atom::parse("Äxx", CaseMatching::Respect); let pat = Atom::parse("Äxx", CaseMatching::Respect, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
let pat = Atom::parse("Axx", CaseMatching::Smart); let pat = Atom::parse("Axx", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
assert_eq!(pat.needle.to_string(), "Axx"); assert_eq!(pat.needle.to_string(), "Axx");
let pat = Atom::parse("你xx", CaseMatching::Smart); let pat = Atom::parse("你xx", CaseMatching::Smart, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "你xx"); assert_eq!(pat.needle.to_string(), "你xx");
let pat = Atom::parse("你xx", CaseMatching::Ignore); let pat = Atom::parse("你xx", CaseMatching::Ignore, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "你xx"); assert_eq!(pat.needle.to_string(), "你xx");
let pat = Atom::parse("Ⲽxx", CaseMatching::Smart); let pat = Atom::parse("Ⲽxx", CaseMatching::Smart, Normalization::Smart);
assert!(!pat.ignore_case); assert!(!pat.ignore_case);
assert_eq!(pat.needle.to_string(), "Ⲽxx"); assert_eq!(pat.needle.to_string(), "Ⲽxx");
let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore); let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore, Normalization::Smart);
assert!(pat.ignore_case); assert!(pat.ignore_case);
assert_eq!(pat.needle.to_string(), "ⲽxx"); assert_eq!(pat.needle.to_string(), "ⲽxx");
} }
#[test] #[test]
fn escape() { fn escape() {
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart); let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo bar"); assert_eq!(pat.needle.to_string(), "foo bar");
let pat = Atom::parse("\\!foo", CaseMatching::Smart); let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "!foo"); assert_eq!(pat.needle.to_string(), "!foo");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("\\'foo", CaseMatching::Smart); let pat = Atom::parse("\\'foo", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "'foo"); assert_eq!(pat.needle.to_string(), "'foo");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("\\^foo", CaseMatching::Smart); let pat = Atom::parse("\\^foo", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "^foo"); assert_eq!(pat.needle.to_string(), "^foo");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("foo\\$", CaseMatching::Smart); let pat = Atom::parse("foo\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo$"); assert_eq!(pat.needle.to_string(), "foo$");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("^foo\\$", CaseMatching::Smart); let pat = Atom::parse("^foo\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "foo$"); assert_eq!(pat.needle.to_string(), "foo$");
assert_eq!(pat.kind, AtomKind::Prefix); assert_eq!(pat.kind, AtomKind::Prefix);
let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart); let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "^foo$"); assert_eq!(pat.needle.to_string(), "^foo$");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart); let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "!^foo$"); assert_eq!(pat.needle.to_string(), "!^foo$");
assert_eq!(pat.kind, AtomKind::Fuzzy); assert_eq!(pat.kind, AtomKind::Fuzzy);
let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart); let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
assert_eq!(pat.needle.to_string(), "^foo$"); assert_eq!(pat.needle.to_string(), "^foo$");
assert_eq!(pat.kind, AtomKind::Substring); assert_eq!(pat.kind, AtomKind::Substring);
} }

View File

@ -1,4 +1,5 @@
use crate::chars::Char; use crate::chars::Char;
use crate::pattern::{CaseMatching, Normalization, Pattern};
use crate::score::{ use crate::score::{
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD, BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH, MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
@ -577,12 +578,13 @@ fn test_optimal() {
// the second f instead of the third yielding a higher score // the second f instead of the third yielding a higher score
// (despite using the same scoring function!) // (despite using the same scoring function!)
( (
"xf foo", "xf.foo",
"xfoo", "xfoo",
&[0, 3, 4, 5], &[0, 3, 4, 5],
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3) BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
- PENALTY_GAP_START - PENALTY_GAP_START
- PENALTY_GAP_EXTENSION, - PENALTY_GAP_EXTENSION
+ BONUS_BOUNDARY * 3,
), ),
( (
"xf fo", "xf fo",
@ -698,3 +700,18 @@ fn test_single_char_needle() {
)], )],
); );
} }
#[test]
fn umlaut() {
let paths = ["be", ""];
let mut matcher = Matcher::new(Config::DEFAULT);
let matches = Pattern::parse("ë", CaseMatching::Ignore, Normalization::Smart)
.match_list(paths, &mut matcher);
assert_eq!(matches.len(), 1);
let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Never)
.match_list(paths, &mut matcher);
assert_eq!(matches.len(), 1);
let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Smart)
.match_list(paths, &mut matcher);
assert_eq!(matches.len(), 2);
}

View File

@ -1,4 +1,4 @@
pub use nucleo_matcher::pattern::{Atom, AtomKind, CaseMatching, Pattern}; pub use nucleo_matcher::pattern::{Atom, AtomKind, CaseMatching, Normalization, Pattern};
use nucleo_matcher::{Matcher, Utf32String}; use nucleo_matcher::{Matcher, Utf32String};
#[cfg(test)] #[cfg(test)]
@ -46,6 +46,7 @@ impl MultiPattern {
column: usize, column: usize,
new_text: &str, new_text: &str,
case_matching: CaseMatching, case_matching: CaseMatching,
normalization: Normalization,
append: bool, append: bool,
) { ) {
let old_status = self.cols[column].1; let old_status = self.cols[column].1;
@ -61,7 +62,9 @@ impl MultiPattern {
} else { } else {
self.cols[column].1 = Status::Rescore; self.cols[column].1 = Status::Rescore;
} }
self.cols[column].0.reparse(new_text, case_matching); self.cols[column]
.0
.reparse(new_text, case_matching, normalization);
} }
pub fn column_pattern(&self, column: usize) -> &Pattern { pub fn column_pattern(&self, column: usize) -> &Pattern {

View File

@ -1,14 +1,14 @@
use nucleo_matcher::pattern::CaseMatching; use nucleo_matcher::pattern::{CaseMatching, Normalization};
use crate::pattern::{MultiPattern, Status}; use crate::pattern::{MultiPattern, Status};
#[test] #[test]
fn append() { fn append() {
let mut pat = MultiPattern::new(1); let mut pat = MultiPattern::new(1);
pat.reparse(0, "!", CaseMatching::Smart, true); pat.reparse(0, "!", CaseMatching::Smart, Normalization::Smart, true);
assert_eq!(pat.status(), Status::Update); assert_eq!(pat.status(), Status::Update);
pat.reparse(0, "!f", CaseMatching::Smart, true); pat.reparse(0, "!f", CaseMatching::Smart, Normalization::Smart, true);
assert_eq!(pat.status(), Status::Update); assert_eq!(pat.status(), Status::Update);
pat.reparse(0, "!fo", CaseMatching::Smart, true); pat.reparse(0, "!fo", CaseMatching::Smart, Normalization::Smart, true);
assert_eq!(pat.status(), Status::Rescore); assert_eq!(pat.status(), Status::Rescore);
} }