diff --git a/src/chars.rs b/src/chars.rs index a677355..731aca6 100644 --- a/src/chars.rs +++ b/src/chars.rs @@ -120,7 +120,7 @@ impl Char for char { return (c.0 as char, class); } let char_class = char_class_non_ascii(self); - if char_class == CharClass::Upper { + if char_class == CharClass::Upper && config.ignore_case { self = CASE_FOLDING_SIMPLE .binary_search_by_key(&self, |(upper, _)| *upper) .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1) @@ -136,7 +136,10 @@ impl Char for char { if config.normalize { self = normalize::normalize(self); } - to_lower_case(self) + if config.ignore_case { + self = to_lower_case(self) + } + self } } diff --git a/src/fuzzy_optimal.rs b/src/fuzzy_optimal.rs index 4153c83..beff56b 100644 --- a/src/fuzzy_optimal.rs +++ b/src/fuzzy_optimal.rs @@ -178,18 +178,26 @@ impl Matrix<'_, H> { } else { PENALTY_GAP_START }; - let mut score1 = 0; - let score2 = prev_matrix_cell.score.saturating_sub(gap_penalty); + // we calculate two scores: + // * one for transversing the matrix horizontially (no match at + // the current char) + // * one for transversing the matrix diagonally (match at the + // current char) + // the maximum of those two scores is used + let mut score_diag = 0; + let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty); let mut consecutive = 0; if haystack_char.char == needle_char { - score1 = diag_matrix_cell.score + SCORE_MATCH; + // we have a match at the current char + score_diag = diag_matrix_cell.score + SCORE_MATCH; let mut bonus = haystack_char.bonus; consecutive = diag_matrix_cell.consecutive_chars + 1; if consecutive > 1 { let first_bonus = self.bonus[col + 1 - consecutive as usize]; + println!("xoxo {bonus} {first_bonus} {consecutive}"); if bonus > first_bonus { - if bonus > BONUS_BOUNDARY { + if bonus >= BONUS_BOUNDARY { consecutive = 1 } else { bonus = max(bonus, BONUS_CONSECUTIVE) @@ -198,15 +206,15 @@ impl Matrix<'_, H> { bonus = max(first_bonus, BONUS_CONSECUTIVE) } } - if score1 + bonus < score2 { - score1 += haystack_char.bonus; + if score_diag + bonus < score_hory { + score_diag += haystack_char.bonus; consecutive = 0; } else { - score1 += bonus; + score_diag += bonus; } } - in_gap = score1 < score2; - let score = max(score1, score2); + in_gap = score_diag < score_hory; + let score = max(score_diag, score_hory); if i == needle.len() - 1 && score > max_score { max_score = score; max_score_end = col as u16; @@ -238,25 +246,31 @@ impl Matrix<'_, H> { loop { let score = row[col].score; - let mut score1 = 0; - let mut score2 = 0; + // we calculate two scores: + // * one for transversing the matrix horizontially (no match at + // the current char) + // * one for transversing the matrix diagonally (match at the + // current char) + // the maximum of those two scores is used + let mut score_diag = 0; + let mut score_horz = 0; if let Some(&(prev_row, _)) = row_iter.peek() { if col >= prev_row.off { - score1 = prev_row[col].score; + score_diag = prev_row[col].score; } } if col > row.off { - score2 = row[col - 1].score; + score_horz = row[col - 1].score; } let mut new_prefer_match = row[col].consecutive_chars > 1; if !new_prefer_match && col + 1 < haystack_len { if let Some(next_row) = next_row { - if col + 1 > next_row.off { + if col + 1 >= next_row.off { new_prefer_match = next_row[col + 1].consecutive_chars > 0 } } } - if score > score1 && (score > score2 || score == score2 && prefer_match) { + if score > score_diag && (score > score_horz || score == score_horz && prefer_match) { *matched_col_idx = col as u32 + start; next_row = Some(row); let Some(next) = row_iter.next() else { @@ -267,5 +281,6 @@ impl Matrix<'_, H> { prefer_match = new_prefer_match; col -= 1; } + println!("{:#?}", self); } } diff --git a/src/tests.rs b/src/tests.rs index 2d59402..d51dffe 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -51,11 +51,14 @@ pub fn assert_matches( Some(score), "{needle:?} did not match {haystack:?}: {match_chars:?}" ); - assert_eq!(match_chars, needle_chars, "match indices are incorrect"); + assert_eq!( + match_chars, needle_chars, + "match indices are incorrect {indices:?}" + ); assert_eq!( indices.first().copied()..indices.last().map(|&i| i + 1), Some(start)..Some(end), - "{needle:?} match {haystack:?}[{start}..{end}]" + "{needle:?} match {haystack:?}" ); } } @@ -63,7 +66,7 @@ const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white; const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter; #[test] -fn test_v2_fuzzy() { +fn test_fuzzy() { assert_matches( false, false, @@ -176,6 +179,52 @@ fn test_v2_fuzzy() { ); } +#[test] +fn test_fuzzy_case_sensitive() { + assert_matches( + false, + false, + true, + false, + &[ + ( + "fooBarbaz1", + "oBz", + 2, + 9, + BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, + ), + ( + "Foo/Bar/Baz", + "FBB", + 0, + 9, + BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 + - 2 * PENALTY_GAP_START + - 4 * PENALTY_GAP_EXTENSION, + ), + ( + "FooBarBaz", + "FBB", + 0, + 7, + BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 + - 2 * PENALTY_GAP_START + - 2 * PENALTY_GAP_EXTENSION, + ), + ( + "FooBar Baz", + "FooB", + 0, + 4, + BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, + ), + // Consecutive bonus updated + ("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD), + ], + ); +} + #[test] fn test_v1_fuzzy() { assert_matches( diff --git a/typos.toml b/typos.toml index 1408f63..63afa46 100644 --- a/typos.toml +++ b/typos.toml @@ -1,3 +1,3 @@ default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"] [files] -extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"] +extend-exclude = ["src/tests.rs"]