fix scoring and case-sensitive matching

This commit is contained in:
Pascal Kuthe 2023-07-20 21:19:11 +02:00
parent 52f1712a78
commit 9ffa5e63c2
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
4 changed files with 88 additions and 21 deletions

View File

@ -120,7 +120,7 @@ impl Char for char {
return (c.0 as char, class); return (c.0 as char, class);
} }
let char_class = char_class_non_ascii(self); let char_class = char_class_non_ascii(self);
if char_class == CharClass::Upper { if char_class == CharClass::Upper && config.ignore_case {
self = CASE_FOLDING_SIMPLE self = CASE_FOLDING_SIMPLE
.binary_search_by_key(&self, |(upper, _)| *upper) .binary_search_by_key(&self, |(upper, _)| *upper)
.map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1) .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
@ -136,7 +136,10 @@ impl Char for char {
if config.normalize { if config.normalize {
self = normalize::normalize(self); self = normalize::normalize(self);
} }
to_lower_case(self) if config.ignore_case {
self = to_lower_case(self)
}
self
} }
} }

View File

@ -178,18 +178,26 @@ impl<H: Char> Matrix<'_, H> {
} else { } else {
PENALTY_GAP_START PENALTY_GAP_START
}; };
let mut score1 = 0; // we calculate two scores:
let score2 = prev_matrix_cell.score.saturating_sub(gap_penalty); // * one for transversing the matrix horizontially (no match at
// the current char)
// * one for transversing the matrix diagonally (match at the
// current char)
// the maximum of those two scores is used
let mut score_diag = 0;
let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty);
let mut consecutive = 0; let mut consecutive = 0;
if haystack_char.char == needle_char { if haystack_char.char == needle_char {
score1 = diag_matrix_cell.score + SCORE_MATCH; // we have a match at the current char
score_diag = diag_matrix_cell.score + SCORE_MATCH;
let mut bonus = haystack_char.bonus; let mut bonus = haystack_char.bonus;
consecutive = diag_matrix_cell.consecutive_chars + 1; consecutive = diag_matrix_cell.consecutive_chars + 1;
if consecutive > 1 { if consecutive > 1 {
let first_bonus = self.bonus[col + 1 - consecutive as usize]; let first_bonus = self.bonus[col + 1 - consecutive as usize];
println!("xoxo {bonus} {first_bonus} {consecutive}");
if bonus > first_bonus { if bonus > first_bonus {
if bonus > BONUS_BOUNDARY { if bonus >= BONUS_BOUNDARY {
consecutive = 1 consecutive = 1
} else { } else {
bonus = max(bonus, BONUS_CONSECUTIVE) bonus = max(bonus, BONUS_CONSECUTIVE)
@ -198,15 +206,15 @@ impl<H: Char> Matrix<'_, H> {
bonus = max(first_bonus, BONUS_CONSECUTIVE) bonus = max(first_bonus, BONUS_CONSECUTIVE)
} }
} }
if score1 + bonus < score2 { if score_diag + bonus < score_hory {
score1 += haystack_char.bonus; score_diag += haystack_char.bonus;
consecutive = 0; consecutive = 0;
} else { } else {
score1 += bonus; score_diag += bonus;
} }
} }
in_gap = score1 < score2; in_gap = score_diag < score_hory;
let score = max(score1, score2); let score = max(score_diag, score_hory);
if i == needle.len() - 1 && score > max_score { if i == needle.len() - 1 && score > max_score {
max_score = score; max_score = score;
max_score_end = col as u16; max_score_end = col as u16;
@ -238,25 +246,31 @@ impl<H: Char> Matrix<'_, H> {
loop { loop {
let score = row[col].score; let score = row[col].score;
let mut score1 = 0; // we calculate two scores:
let mut score2 = 0; // * one for transversing the matrix horizontially (no match at
// the current char)
// * one for transversing the matrix diagonally (match at the
// current char)
// the maximum of those two scores is used
let mut score_diag = 0;
let mut score_horz = 0;
if let Some(&(prev_row, _)) = row_iter.peek() { if let Some(&(prev_row, _)) = row_iter.peek() {
if col >= prev_row.off { if col >= prev_row.off {
score1 = prev_row[col].score; score_diag = prev_row[col].score;
} }
} }
if col > row.off { if col > row.off {
score2 = row[col - 1].score; score_horz = row[col - 1].score;
} }
let mut new_prefer_match = row[col].consecutive_chars > 1; let mut new_prefer_match = row[col].consecutive_chars > 1;
if !new_prefer_match && col + 1 < haystack_len { if !new_prefer_match && col + 1 < haystack_len {
if let Some(next_row) = next_row { if let Some(next_row) = next_row {
if col + 1 > next_row.off { if col + 1 >= next_row.off {
new_prefer_match = next_row[col + 1].consecutive_chars > 0 new_prefer_match = next_row[col + 1].consecutive_chars > 0
} }
} }
} }
if score > score1 && (score > score2 || score == score2 && prefer_match) { if score > score_diag && (score > score_horz || score == score_horz && prefer_match) {
*matched_col_idx = col as u32 + start; *matched_col_idx = col as u32 + start;
next_row = Some(row); next_row = Some(row);
let Some(next) = row_iter.next() else { let Some(next) = row_iter.next() else {
@ -267,5 +281,6 @@ impl<H: Char> Matrix<'_, H> {
prefer_match = new_prefer_match; prefer_match = new_prefer_match;
col -= 1; col -= 1;
} }
println!("{:#?}", self);
} }
} }

View File

@ -51,11 +51,14 @@ pub fn assert_matches(
Some(score), Some(score),
"{needle:?} did not match {haystack:?}: {match_chars:?}" "{needle:?} did not match {haystack:?}: {match_chars:?}"
); );
assert_eq!(match_chars, needle_chars, "match indices are incorrect"); assert_eq!(
match_chars, needle_chars,
"match indices are incorrect {indices:?}"
);
assert_eq!( assert_eq!(
indices.first().copied()..indices.last().map(|&i| i + 1), indices.first().copied()..indices.last().map(|&i| i + 1),
Some(start)..Some(end), Some(start)..Some(end),
"{needle:?} match {haystack:?}[{start}..{end}]" "{needle:?} match {haystack:?}"
); );
} }
} }
@ -63,7 +66,7 @@ const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter; const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
#[test] #[test]
fn test_v2_fuzzy() { fn test_fuzzy() {
assert_matches( assert_matches(
false, false,
false, false,
@ -176,6 +179,52 @@ fn test_v2_fuzzy() {
); );
} }
#[test]
fn test_fuzzy_case_sensitive() {
assert_matches(
false,
false,
true,
false,
&[
(
"fooBarbaz1",
"oBz",
2,
9,
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
),
(
"Foo/Bar/Baz",
"FBB",
0,
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
- 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION,
),
(
"FooBarBaz",
"FBB",
0,
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
- 2 * PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION,
),
(
"FooBar Baz",
"FooB",
0,
4,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
),
// Consecutive bonus updated
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
],
);
}
#[test] #[test]
fn test_v1_fuzzy() { fn test_v1_fuzzy() {
assert_matches( assert_matches(

View File

@ -1,3 +1,3 @@
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"] default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
[files] [files]
extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"] extend-exclude = ["src/tests.rs"]