mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-21 09:37:48 +00:00
Correctly handle normalization in pattern API
This commit is contained in:
parent
b41c989daf
commit
1edf451192
@ -1,9 +1,14 @@
|
||||
# Changelog
|
||||
|
||||
# Unreleased
|
||||
# [0.3.0] - 2023-12-20
|
||||
|
||||
## **Breaking Changes**
|
||||
|
||||
* Pattern API method now requires a Unicode `Normalization` strategy in addition to a `CaseMatching` strategy.
|
||||
|
||||
## Bugfixes
|
||||
|
||||
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
|
||||
* when the needle is composed of a single char, return the score and index
|
||||
of the best position instead of always returning the first matched character
|
||||
in the haystack
|
||||
@ -19,5 +24,6 @@
|
||||
*initial public release*
|
||||
|
||||
|
||||
[0.3.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.3.0
|
||||
[0.2.1]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.1
|
||||
[0.2.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.0
|
||||
|
@ -16,12 +16,12 @@ can contain special characters to control what kind of match is performed (see
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, CaseMatching};
|
||||
# use nucleo_matcher::pattern::{Pattern, Normalization, CaseMatching};
|
||||
let paths = ["foo/bar", "bar/foo", "foobar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
|
||||
let matches = Pattern::parse("foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher);
|
||||
let matches = Pattern::parse("foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
|
||||
let matches = Pattern::parse("^foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher);
|
||||
let matches = Pattern::parse("^foo bar", CaseMatching::Ignore, Normalization::Smart).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]);
|
||||
```
|
||||
|
||||
@ -30,13 +30,13 @@ If the pattern should be matched literally (without this special parsing)
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind};
|
||||
# use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind, Normalization};
|
||||
let paths = ["foo/bar", "bar/foo", "foobar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
|
||||
let matches = Pattern::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
let matches = Pattern::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
|
||||
let paths = ["^foo/bar", "bar/^foo", "foobar"];
|
||||
let matches = Pattern::new("^foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
let matches = Pattern::new("^foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
|
||||
```
|
||||
|
||||
@ -44,10 +44,10 @@ If word segmentation is also not desired, a single `Atom` can be constructed dir
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, AtomKind};
|
||||
# use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, Normalization, AtomKind};
|
||||
let paths = ["foobar", "foo bar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT);
|
||||
let matches = Atom::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
|
||||
let matches = Atom::new("foo bar", CaseMatching::Ignore, Normalization::Smart, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo bar", 192)]);
|
||||
```
|
||||
|
||||
|
@ -25,6 +25,19 @@ pub enum CaseMatching {
|
||||
Smart,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
|
||||
#[non_exhaustive]
|
||||
/// How to handle unicode normalization,
|
||||
pub enum Normalization {
|
||||
/// Characters never match their normalized version (`a != ä`).
|
||||
Never,
|
||||
/// Acts like [`Never`](Normalization::Never) if any character in a pattern atom
|
||||
/// would need to be normalized. Otherwise normalization occurs (`a == ä` but `ä != a`).
|
||||
#[default]
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
Smart,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
#[non_exhaustive]
|
||||
/// The kind of matching algorithm to run for an atom.
|
||||
@ -73,24 +86,41 @@ pub struct Atom {
|
||||
pub kind: AtomKind,
|
||||
needle: Utf32String,
|
||||
ignore_case: bool,
|
||||
normalize: bool,
|
||||
}
|
||||
|
||||
impl Atom {
|
||||
/// Creates a single [`Atom`] from a string by performing unicode
|
||||
/// normalization and case folding (if necessary). Optionally `\ ` can
|
||||
/// be escaped to ` `.
|
||||
pub fn new(needle: &str, case: CaseMatching, kind: AtomKind, escape_whitespace: bool) -> Atom {
|
||||
Atom::new_inner(needle, case, kind, escape_whitespace, false)
|
||||
pub fn new(
|
||||
needle: &str,
|
||||
case: CaseMatching,
|
||||
normalize: Normalization,
|
||||
kind: AtomKind,
|
||||
escape_whitespace: bool,
|
||||
) -> Atom {
|
||||
Atom::new_inner(needle, case, normalize, kind, escape_whitespace, false)
|
||||
}
|
||||
|
||||
fn new_inner(
|
||||
needle: &str,
|
||||
case: CaseMatching,
|
||||
normalization: Normalization,
|
||||
kind: AtomKind,
|
||||
escape_whitespace: bool,
|
||||
append_dollar: bool,
|
||||
) -> Atom {
|
||||
let mut ignore_case;
|
||||
let mut normalize;
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
{
|
||||
normalize = matches!(normalization, Normalization::Smart);
|
||||
}
|
||||
#[cfg(not(feature = "unicode-normalization"))]
|
||||
{
|
||||
normalize = false;
|
||||
}
|
||||
let needle = if needle.is_ascii() {
|
||||
let mut needle = if escape_whitespace {
|
||||
if let Some((start, rem)) = needle.split_once("\\ ") {
|
||||
@ -133,6 +163,10 @@ impl Atom {
|
||||
{
|
||||
ignore_case = false;
|
||||
}
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
{
|
||||
normalize = matches!(normalization, Normalization::Smart);
|
||||
}
|
||||
if escape_whitespace {
|
||||
let mut saw_backslash = false;
|
||||
for mut c in chars::graphemes(needle) {
|
||||
@ -155,6 +189,13 @@ impl Atom {
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
match normalization {
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
Normalization::Smart => {
|
||||
normalize = normalize && chars::normalize(c) == c;
|
||||
}
|
||||
Normalization::Never => (),
|
||||
}
|
||||
needle_.push(c);
|
||||
}
|
||||
} else {
|
||||
@ -168,6 +209,13 @@ impl Atom {
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
match normalization {
|
||||
#[cfg(feature = "unicode-normalization")]
|
||||
Normalization::Smart => {
|
||||
normalize = normalize && chars::normalize(c) == c;
|
||||
}
|
||||
Normalization::Never => (),
|
||||
}
|
||||
c
|
||||
});
|
||||
needle_.extend(chars);
|
||||
@ -182,13 +230,14 @@ impl Atom {
|
||||
needle,
|
||||
negative: false,
|
||||
ignore_case,
|
||||
normalize,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a pattern atom from a string. Some special trailing and leading
|
||||
/// characters can be used to control the atom kind. See [`AtomKind`] for
|
||||
/// details.
|
||||
pub fn parse(raw: &str, case: CaseMatching) -> Atom {
|
||||
pub fn parse(raw: &str, case: CaseMatching, normalize: Normalization) -> Atom {
|
||||
let mut atom = raw;
|
||||
let invert = match atom.as_bytes() {
|
||||
[b'!', ..] => {
|
||||
@ -239,7 +288,7 @@ impl Atom {
|
||||
kind = AtomKind::Substring
|
||||
}
|
||||
|
||||
let mut pattern = Atom::new_inner(atom, case, kind, true, append_dollar);
|
||||
let mut pattern = Atom::new_inner(atom, case, normalize, kind, true, append_dollar);
|
||||
pattern.negative = invert;
|
||||
pattern
|
||||
}
|
||||
@ -252,6 +301,7 @@ impl Atom {
|
||||
/// each pattern atom.
|
||||
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> {
|
||||
matcher.config.ignore_case = self.ignore_case;
|
||||
matcher.config.normalize = self.normalize;
|
||||
let pattern_score = match self.kind {
|
||||
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
|
||||
AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
|
||||
@ -285,6 +335,7 @@ impl Atom {
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
matcher.config.ignore_case = self.ignore_case;
|
||||
matcher.config.normalize = self.normalize;
|
||||
if self.negative {
|
||||
let pattern_score = match self.kind {
|
||||
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
|
||||
@ -371,10 +422,15 @@ impl Pattern {
|
||||
/// can be escaped with `\`). Otherwise no parsing is performed (so $, !, '
|
||||
/// and ^ don't receive special treatment). If you want to match the entire
|
||||
/// pattern as a single needle use a single [`Atom`] instead.
|
||||
pub fn new(pattern: &str, case_matching: CaseMatching, kind: AtomKind) -> Pattern {
|
||||
pub fn new(
|
||||
pattern: &str,
|
||||
case_matching: CaseMatching,
|
||||
normalize: Normalization,
|
||||
kind: AtomKind,
|
||||
) -> Pattern {
|
||||
let atoms = pattern_atoms(pattern)
|
||||
.filter_map(|pat| {
|
||||
let pat = Atom::new(pat, case_matching, kind, true);
|
||||
let pat = Atom::new(pat, case_matching, normalize, kind, true);
|
||||
(!pat.needle.is_empty()).then_some(pat)
|
||||
})
|
||||
.collect();
|
||||
@ -384,10 +440,10 @@ impl Pattern {
|
||||
/// can be escaped with `\`). And $, !, ' and ^ at word boundaries will
|
||||
/// cause different matching behaviour (see [`AtomKind`]). These can be
|
||||
/// escaped with backslash.
|
||||
pub fn parse(pattern: &str, case_matching: CaseMatching) -> Pattern {
|
||||
pub fn parse(pattern: &str, case_matching: CaseMatching, normalize: Normalization) -> Pattern {
|
||||
let atoms = pattern_atoms(pattern)
|
||||
.filter_map(|pat| {
|
||||
let pat = Atom::parse(pat, case_matching);
|
||||
let pat = Atom::parse(pat, case_matching, normalize);
|
||||
(!pat.needle.is_empty()).then_some(pat)
|
||||
})
|
||||
.collect();
|
||||
@ -477,10 +533,15 @@ impl Pattern {
|
||||
/// Refreshes this pattern by reparsing it from a string. This is mostly
|
||||
/// equivalent to just constructing a new pattern using [`Pattern::parse`]
|
||||
/// but is slightly more efficient by reusing some allocations
|
||||
pub fn reparse(&mut self, pattern: &str, case_matching: CaseMatching) {
|
||||
pub fn reparse(
|
||||
&mut self,
|
||||
pattern: &str,
|
||||
case_matching: CaseMatching,
|
||||
normalize: Normalization,
|
||||
) {
|
||||
self.atoms.clear();
|
||||
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||
let atom = Atom::parse(atom, case_matching);
|
||||
let atom = Atom::parse(atom, case_matching, normalize);
|
||||
if atom.needle.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
@ -1,20 +1,20 @@
|
||||
use crate::pattern::{Atom, AtomKind, CaseMatching};
|
||||
use crate::pattern::{Atom, AtomKind, CaseMatching, Normalization};
|
||||
|
||||
#[test]
|
||||
fn negative() {
|
||||
let pat = Atom::parse("!foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("!foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Substring);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("!^foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("!^foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("!foo$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("!foo$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Postfix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("!^foo$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("!^foo$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Exact);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
@ -22,23 +22,23 @@ fn negative() {
|
||||
|
||||
#[test]
|
||||
fn pattern_kinds() {
|
||||
let pat = Atom::parse("foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("'foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("'foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Substring);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("^foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("^foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("foo$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("foo$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Postfix);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("^foo$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("^foo$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.negative);
|
||||
assert_eq!(pat.kind, AtomKind::Exact);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
@ -46,69 +46,69 @@ fn pattern_kinds() {
|
||||
|
||||
#[test]
|
||||
fn case_matching() {
|
||||
let pat = Atom::parse("foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("Foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("Foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = Atom::parse("Foo", CaseMatching::Ignore);
|
||||
let pat = Atom::parse("Foo", CaseMatching::Ignore, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "foo");
|
||||
let pat = Atom::parse("Foo", CaseMatching::Respect);
|
||||
let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = Atom::parse("Foo", CaseMatching::Respect);
|
||||
let pat = Atom::parse("Foo", CaseMatching::Respect, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Foo");
|
||||
let pat = Atom::parse("Äxx", CaseMatching::Ignore);
|
||||
let pat = Atom::parse("Äxx", CaseMatching::Ignore, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "äxx");
|
||||
let pat = Atom::parse("Äxx", CaseMatching::Respect);
|
||||
let pat = Atom::parse("Äxx", CaseMatching::Respect, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
let pat = Atom::parse("Axx", CaseMatching::Smart);
|
||||
let pat = Atom::parse("Axx", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Axx");
|
||||
let pat = Atom::parse("你xx", CaseMatching::Smart);
|
||||
let pat = Atom::parse("你xx", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "你xx");
|
||||
let pat = Atom::parse("你xx", CaseMatching::Ignore);
|
||||
let pat = Atom::parse("你xx", CaseMatching::Ignore, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "你xx");
|
||||
let pat = Atom::parse("Ⲽxx", CaseMatching::Smart);
|
||||
let pat = Atom::parse("Ⲽxx", CaseMatching::Smart, Normalization::Smart);
|
||||
assert!(!pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "Ⲽxx");
|
||||
let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore);
|
||||
let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore, Normalization::Smart);
|
||||
assert!(pat.ignore_case);
|
||||
assert_eq!(pat.needle.to_string(), "ⲽxx");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape() {
|
||||
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart);
|
||||
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "foo bar");
|
||||
let pat = Atom::parse("\\!foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("\\!foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "!foo");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("\\'foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("\\'foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "'foo");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("\\^foo", CaseMatching::Smart);
|
||||
let pat = Atom::parse("\\^foo", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "^foo");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("foo\\$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("foo\\$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "foo$");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("^foo\\$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("^foo\\$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "foo$");
|
||||
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||
let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "!^foo$");
|
||||
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||
let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart);
|
||||
let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart, Normalization::Smart);
|
||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||
assert_eq!(pat.kind, AtomKind::Substring);
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
use crate::chars::Char;
|
||||
use crate::pattern::{CaseMatching, Normalization, Pattern};
|
||||
use crate::score::{
|
||||
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
|
||||
MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
|
||||
@ -577,12 +578,13 @@ fn test_optimal() {
|
||||
// the second f instead of the third yielding a higher score
|
||||
// (despite using the same scoring function!)
|
||||
(
|
||||
"xf foo",
|
||||
"xf.foo",
|
||||
"xfoo",
|
||||
&[0, 3, 4, 5],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
- PENALTY_GAP_START
|
||||
- PENALTY_GAP_EXTENSION,
|
||||
- PENALTY_GAP_EXTENSION
|
||||
+ BONUS_BOUNDARY * 3,
|
||||
),
|
||||
(
|
||||
"xf fo",
|
||||
@ -698,3 +700,18 @@ fn test_single_char_needle() {
|
||||
)],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn umlaut() {
|
||||
let paths = ["be", "bë"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT);
|
||||
let matches = Pattern::parse("ë", CaseMatching::Ignore, Normalization::Smart)
|
||||
.match_list(paths, &mut matcher);
|
||||
assert_eq!(matches.len(), 1);
|
||||
let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Never)
|
||||
.match_list(paths, &mut matcher);
|
||||
assert_eq!(matches.len(), 1);
|
||||
let matches = Pattern::parse("e", CaseMatching::Ignore, Normalization::Smart)
|
||||
.match_list(paths, &mut matcher);
|
||||
assert_eq!(matches.len(), 2);
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
pub use nucleo_matcher::pattern::{Atom, AtomKind, CaseMatching, Pattern};
|
||||
pub use nucleo_matcher::pattern::{Atom, AtomKind, CaseMatching, Normalization, Pattern};
|
||||
use nucleo_matcher::{Matcher, Utf32String};
|
||||
|
||||
#[cfg(test)]
|
||||
@ -46,6 +46,7 @@ impl MultiPattern {
|
||||
column: usize,
|
||||
new_text: &str,
|
||||
case_matching: CaseMatching,
|
||||
normalization: Normalization,
|
||||
append: bool,
|
||||
) {
|
||||
let old_status = self.cols[column].1;
|
||||
@ -61,7 +62,9 @@ impl MultiPattern {
|
||||
} else {
|
||||
self.cols[column].1 = Status::Rescore;
|
||||
}
|
||||
self.cols[column].0.reparse(new_text, case_matching);
|
||||
self.cols[column]
|
||||
.0
|
||||
.reparse(new_text, case_matching, normalization);
|
||||
}
|
||||
|
||||
pub fn column_pattern(&self, column: usize) -> &Pattern {
|
||||
|
@ -1,14 +1,14 @@
|
||||
use nucleo_matcher::pattern::CaseMatching;
|
||||
use nucleo_matcher::pattern::{CaseMatching, Normalization};
|
||||
|
||||
use crate::pattern::{MultiPattern, Status};
|
||||
|
||||
#[test]
|
||||
fn append() {
|
||||
let mut pat = MultiPattern::new(1);
|
||||
pat.reparse(0, "!", CaseMatching::Smart, true);
|
||||
pat.reparse(0, "!", CaseMatching::Smart, Normalization::Smart, true);
|
||||
assert_eq!(pat.status(), Status::Update);
|
||||
pat.reparse(0, "!f", CaseMatching::Smart, true);
|
||||
pat.reparse(0, "!f", CaseMatching::Smart, Normalization::Smart, true);
|
||||
assert_eq!(pat.status(), Status::Update);
|
||||
pat.reparse(0, "!fo", CaseMatching::Smart, true);
|
||||
pat.reparse(0, "!fo", CaseMatching::Smart, Normalization::Smart, true);
|
||||
assert_eq!(pat.status(), Status::Rescore);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user