mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
fix scoring and case-sensitive matching
This commit is contained in:
parent
52f1712a78
commit
9ffa5e63c2
@ -120,7 +120,7 @@ impl Char for char {
|
|||||||
return (c.0 as char, class);
|
return (c.0 as char, class);
|
||||||
}
|
}
|
||||||
let char_class = char_class_non_ascii(self);
|
let char_class = char_class_non_ascii(self);
|
||||||
if char_class == CharClass::Upper {
|
if char_class == CharClass::Upper && config.ignore_case {
|
||||||
self = CASE_FOLDING_SIMPLE
|
self = CASE_FOLDING_SIMPLE
|
||||||
.binary_search_by_key(&self, |(upper, _)| *upper)
|
.binary_search_by_key(&self, |(upper, _)| *upper)
|
||||||
.map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
.map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||||
@ -136,7 +136,10 @@ impl Char for char {
|
|||||||
if config.normalize {
|
if config.normalize {
|
||||||
self = normalize::normalize(self);
|
self = normalize::normalize(self);
|
||||||
}
|
}
|
||||||
to_lower_case(self)
|
if config.ignore_case {
|
||||||
|
self = to_lower_case(self)
|
||||||
|
}
|
||||||
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -178,18 +178,26 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
} else {
|
} else {
|
||||||
PENALTY_GAP_START
|
PENALTY_GAP_START
|
||||||
};
|
};
|
||||||
let mut score1 = 0;
|
// we calculate two scores:
|
||||||
let score2 = prev_matrix_cell.score.saturating_sub(gap_penalty);
|
// * one for transversing the matrix horizontially (no match at
|
||||||
|
// the current char)
|
||||||
|
// * one for transversing the matrix diagonally (match at the
|
||||||
|
// current char)
|
||||||
|
// the maximum of those two scores is used
|
||||||
|
let mut score_diag = 0;
|
||||||
|
let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty);
|
||||||
|
|
||||||
let mut consecutive = 0;
|
let mut consecutive = 0;
|
||||||
if haystack_char.char == needle_char {
|
if haystack_char.char == needle_char {
|
||||||
score1 = diag_matrix_cell.score + SCORE_MATCH;
|
// we have a match at the current char
|
||||||
|
score_diag = diag_matrix_cell.score + SCORE_MATCH;
|
||||||
let mut bonus = haystack_char.bonus;
|
let mut bonus = haystack_char.bonus;
|
||||||
consecutive = diag_matrix_cell.consecutive_chars + 1;
|
consecutive = diag_matrix_cell.consecutive_chars + 1;
|
||||||
if consecutive > 1 {
|
if consecutive > 1 {
|
||||||
let first_bonus = self.bonus[col + 1 - consecutive as usize];
|
let first_bonus = self.bonus[col + 1 - consecutive as usize];
|
||||||
|
println!("xoxo {bonus} {first_bonus} {consecutive}");
|
||||||
if bonus > first_bonus {
|
if bonus > first_bonus {
|
||||||
if bonus > BONUS_BOUNDARY {
|
if bonus >= BONUS_BOUNDARY {
|
||||||
consecutive = 1
|
consecutive = 1
|
||||||
} else {
|
} else {
|
||||||
bonus = max(bonus, BONUS_CONSECUTIVE)
|
bonus = max(bonus, BONUS_CONSECUTIVE)
|
||||||
@ -198,15 +206,15 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
bonus = max(first_bonus, BONUS_CONSECUTIVE)
|
bonus = max(first_bonus, BONUS_CONSECUTIVE)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if score1 + bonus < score2 {
|
if score_diag + bonus < score_hory {
|
||||||
score1 += haystack_char.bonus;
|
score_diag += haystack_char.bonus;
|
||||||
consecutive = 0;
|
consecutive = 0;
|
||||||
} else {
|
} else {
|
||||||
score1 += bonus;
|
score_diag += bonus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
in_gap = score1 < score2;
|
in_gap = score_diag < score_hory;
|
||||||
let score = max(score1, score2);
|
let score = max(score_diag, score_hory);
|
||||||
if i == needle.len() - 1 && score > max_score {
|
if i == needle.len() - 1 && score > max_score {
|
||||||
max_score = score;
|
max_score = score;
|
||||||
max_score_end = col as u16;
|
max_score_end = col as u16;
|
||||||
@ -238,25 +246,31 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
|
|
||||||
loop {
|
loop {
|
||||||
let score = row[col].score;
|
let score = row[col].score;
|
||||||
let mut score1 = 0;
|
// we calculate two scores:
|
||||||
let mut score2 = 0;
|
// * one for transversing the matrix horizontially (no match at
|
||||||
|
// the current char)
|
||||||
|
// * one for transversing the matrix diagonally (match at the
|
||||||
|
// current char)
|
||||||
|
// the maximum of those two scores is used
|
||||||
|
let mut score_diag = 0;
|
||||||
|
let mut score_horz = 0;
|
||||||
if let Some(&(prev_row, _)) = row_iter.peek() {
|
if let Some(&(prev_row, _)) = row_iter.peek() {
|
||||||
if col >= prev_row.off {
|
if col >= prev_row.off {
|
||||||
score1 = prev_row[col].score;
|
score_diag = prev_row[col].score;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if col > row.off {
|
if col > row.off {
|
||||||
score2 = row[col - 1].score;
|
score_horz = row[col - 1].score;
|
||||||
}
|
}
|
||||||
let mut new_prefer_match = row[col].consecutive_chars > 1;
|
let mut new_prefer_match = row[col].consecutive_chars > 1;
|
||||||
if !new_prefer_match && col + 1 < haystack_len {
|
if !new_prefer_match && col + 1 < haystack_len {
|
||||||
if let Some(next_row) = next_row {
|
if let Some(next_row) = next_row {
|
||||||
if col + 1 > next_row.off {
|
if col + 1 >= next_row.off {
|
||||||
new_prefer_match = next_row[col + 1].consecutive_chars > 0
|
new_prefer_match = next_row[col + 1].consecutive_chars > 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if score > score1 && (score > score2 || score == score2 && prefer_match) {
|
if score > score_diag && (score > score_horz || score == score_horz && prefer_match) {
|
||||||
*matched_col_idx = col as u32 + start;
|
*matched_col_idx = col as u32 + start;
|
||||||
next_row = Some(row);
|
next_row = Some(row);
|
||||||
let Some(next) = row_iter.next() else {
|
let Some(next) = row_iter.next() else {
|
||||||
@ -267,5 +281,6 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
prefer_match = new_prefer_match;
|
prefer_match = new_prefer_match;
|
||||||
col -= 1;
|
col -= 1;
|
||||||
}
|
}
|
||||||
|
println!("{:#?}", self);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
55
src/tests.rs
55
src/tests.rs
@ -51,11 +51,14 @@ pub fn assert_matches(
|
|||||||
Some(score),
|
Some(score),
|
||||||
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
||||||
);
|
);
|
||||||
assert_eq!(match_chars, needle_chars, "match indices are incorrect");
|
assert_eq!(
|
||||||
|
match_chars, needle_chars,
|
||||||
|
"match indices are incorrect {indices:?}"
|
||||||
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
indices.first().copied()..indices.last().map(|&i| i + 1),
|
indices.first().copied()..indices.last().map(|&i| i + 1),
|
||||||
Some(start)..Some(end),
|
Some(start)..Some(end),
|
||||||
"{needle:?} match {haystack:?}[{start}..{end}]"
|
"{needle:?} match {haystack:?}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -63,7 +66,7 @@ const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
|
|||||||
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_v2_fuzzy() {
|
fn test_fuzzy() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
@ -176,6 +179,52 @@ fn test_v2_fuzzy() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_case_sensitive() {
|
||||||
|
assert_matches(
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"fooBarbaz1",
|
||||||
|
"oBz",
|
||||||
|
2,
|
||||||
|
9,
|
||||||
|
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Foo/Bar/Baz",
|
||||||
|
"FBB",
|
||||||
|
0,
|
||||||
|
9,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
||||||
|
- 2 * PENALTY_GAP_START
|
||||||
|
- 4 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"FooBarBaz",
|
||||||
|
"FBB",
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
||||||
|
- 2 * PENALTY_GAP_START
|
||||||
|
- 2 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"FooBar Baz",
|
||||||
|
"FooB",
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||||
|
),
|
||||||
|
// Consecutive bonus updated
|
||||||
|
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_v1_fuzzy() {
|
fn test_v1_fuzzy() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||||
[files]
|
[files]
|
||||||
extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"]
|
extend-exclude = ["src/tests.rs"]
|
||||||
|
Loading…
Reference in New Issue
Block a user