fix substring matching

This commit is contained in:
Pascal Kuthe 2023-08-03 00:59:24 +02:00
parent 960441f3b2
commit 4f59b0fb91
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
2 changed files with 62 additions and 13 deletions

View File

@ -80,7 +80,7 @@ impl Matcher {
let bonus = self.config.bonus_for(prev_char_class, char_class); let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH; let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score if score > max_score
&& haystack[i + prefilter_len..] && haystack[i + prefilter_len..(i + needle.len()).min(haystack.len())]
.iter() .iter()
.map(|&c| AsciiChar(c).normalize(&self.config).0) .map(|&c| AsciiChar(c).normalize(&self.config).0)
.eq(needle_without_prefilter.iter().copied()) .eq(needle_without_prefilter.iter().copied())

View File

@ -12,6 +12,7 @@ use Algorithm::*;
enum Algorithm { enum Algorithm {
FuzzyOptimal, FuzzyOptimal,
FuzzyGreedy, FuzzyGreedy,
Substring,
} }
fn assert_matches( fn assert_matches(
@ -46,12 +47,9 @@ fn assert_matches(
println!("xx {matched_indices:?} {algo:?}"); println!("xx {matched_indices:?} {algo:?}");
matched_indices.clear(); matched_indices.clear();
let res = match algo { let res = match algo {
Algorithm::FuzzyOptimal => { FuzzyOptimal => matcher.fuzzy_indices(haystack, needle, &mut matched_indices),
matcher.fuzzy_indices(haystack, needle, &mut matched_indices) FuzzyGreedy => matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices),
} Substring => matcher.substring_indices(haystack, needle, &mut matched_indices),
Algorithm::FuzzyGreedy => {
matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices)
}
}; };
println!("{matched_indices:?}"); println!("{matched_indices:?}");
let match_chars: Vec<_> = matched_indices let match_chars: Vec<_> = matched_indices
@ -126,9 +124,15 @@ fn test_fuzzy() {
&[ &[
( (
"fooBarbaz1", "fooBarbaz1",
"oBZ", "obr",
&[2, 3, 8], &[2, 3, 5],
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, BONUS_CONSECUTIVE - PENALTY_GAP_START,
),
(
"fooBarbaz1",
"br",
&[3, 5],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
), ),
( (
"foo bar baz", "foo bar baz",
@ -241,6 +245,51 @@ fn test_fuzzy() {
); );
} }
#[test]
fn test_substring() {
assert_matches(
&[Substring],
false,
false,
false,
&[
("fooBarbaz1", "oba", &[2, 3, 4], 2 * BONUS_CONSECUTIVE),
(
"foo bar baz",
"foo",
&[0, 1, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 2 * BONUS_CONSECUTIVE,
),
(
"foo bar baz",
"FOO",
&[0, 1, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 2 * BONUS_CONSECUTIVE,
),
(
"/AutomatorDocument.icns",
"rdoc",
&[9, 10, 11, 12],
BONUS_CAMEL123 + 2 * BONUS_CONSECUTIVE,
),
(
"/man1/zshcompctl.1",
"zshc",
&[6, 7, 8, 9],
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3,
),
(
"/.oh-my-zsh/cache",
"zsh/c",
&[8, 9, 10, 11, 12],
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CONSECUTIVE * 3
+ BONUS_BOUNDARY_DELIMITER,
),
],
);
}
#[test] #[test]
fn test_fuzzy_case_sensitive() { fn test_fuzzy_case_sensitive() {
assert_matches( assert_matches(
@ -251,9 +300,9 @@ fn test_fuzzy_case_sensitive() {
&[ &[
( (
"fooBarbaz1", "fooBarbaz1",
"oBz", "oBr",
&[2, 3, 8], &[2, 3, 5],
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, BONUS_CAMEL123 - PENALTY_GAP_START,
), ),
( (
"Foo/Bar/Baz", "Foo/Bar/Baz",