mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
fairly stable and consistent fuzzy matching
This commit is contained in:
parent
becd35c5de
commit
2ce871b70c
@ -9,5 +9,5 @@ unzip UCD.zip
|
||||
|
||||
cd "${dir}"
|
||||
cargo install ucd-generate
|
||||
ucd-generate case-folding-simple /tmp/ucd-15.0.0 --chars > src/case_fold.rs
|
||||
ucd-generate case-folding-simple /tmp/ucd-15.0.0 --chars > src/chars/case_fold.rs
|
||||
rm -rf /tmp/ucd-15.0.0
|
||||
|
10
src/chars.rs
10
src/chars.rs
@ -109,14 +109,16 @@ impl Char for char {
|
||||
return (c.0 as char, class);
|
||||
}
|
||||
let char_class = char_class_non_ascii(self);
|
||||
if char_class == CharClass::Upper && config.ignore_case {
|
||||
let mut case_fold = char_class == CharClass::Upper;
|
||||
if config.normalize {
|
||||
self = normalize::normalize(self);
|
||||
case_fold = true
|
||||
}
|
||||
if case_fold && config.ignore_case {
|
||||
self = CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&self, |(upper, _)| *upper)
|
||||
.map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||
}
|
||||
if config.normalize {
|
||||
self = normalize::normalize(self);
|
||||
}
|
||||
(self, char_class)
|
||||
}
|
||||
|
||||
|
@ -7,7 +7,11 @@
|
||||
// ucd-generate 0.3.0 is available on crates.io.
|
||||
|
||||
pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[
|
||||
('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
|
||||
('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'),
|
||||
('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'),
|
||||
('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'),
|
||||
('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'),
|
||||
('Y', 'y'), ('Z', 'z'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
|
||||
('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'),
|
||||
('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'),
|
||||
('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'),
|
||||
|
@ -78,9 +78,10 @@ impl Matcher {
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
for (i, &c) in haystack.iter().enumerate() {
|
||||
for (i, &c) in haystack[start..].iter().enumerate() {
|
||||
let (c, char_class) = c.char_class_and_normalize(&self.config);
|
||||
if c != needle {
|
||||
println!("ups {c} {needle}");
|
||||
continue;
|
||||
}
|
||||
let bonus = self.config.bonus_for(prev_class, char_class);
|
||||
@ -100,7 +101,7 @@ impl Matcher {
|
||||
|
||||
if INDICES {
|
||||
indices.clear();
|
||||
indices.push(max_pos);
|
||||
indices.push(max_pos + start as u32);
|
||||
}
|
||||
max_score
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ fn next_m_score(p_score: i32, m_score: i32, bonus: u16, next_bonus: u16) -> Scor
|
||||
let consecutive_bonus = max(bonus, max(next_bonus, BONUS_CONSECUTIVE));
|
||||
let score_match = m_score + consecutive_bonus as i32;
|
||||
let score_skip = p_score + next_bonus as i32;
|
||||
if score_match > score_skip {
|
||||
if score_match >= score_skip {
|
||||
ScoreCell {
|
||||
score: score_match + SCORE_MATCH as i32,
|
||||
bonus: consecutive_bonus,
|
||||
@ -74,16 +74,24 @@ fn next_m_score(p_score: i32, m_score: i32, bonus: u16, next_bonus: u16) -> Scor
|
||||
} else {
|
||||
ScoreCell {
|
||||
score: score_skip + SCORE_MATCH as i32,
|
||||
bonus: consecutive_bonus,
|
||||
bonus: next_bonus,
|
||||
matched: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn p_score(prev_p_score: i32, prev_m_score: i32) -> (i32, bool) {
|
||||
let score_match = prev_m_score - PENALTY_GAP_START as i32;
|
||||
let score_skip = prev_p_score - PENALTY_GAP_EXTENSION as i32;
|
||||
if score_match > score_skip {
|
||||
let score_match = if prev_m_score >= 0 {
|
||||
(prev_m_score - PENALTY_GAP_START as i32).max(0)
|
||||
} else {
|
||||
i32::MIN / 2
|
||||
};
|
||||
let score_skip = if prev_p_score >= 0 {
|
||||
(prev_p_score - PENALTY_GAP_EXTENSION as i32).max(0)
|
||||
} else {
|
||||
i32::MIN / 2
|
||||
};
|
||||
if score_match >= score_skip {
|
||||
(score_match, true)
|
||||
} else {
|
||||
(score_skip, false)
|
||||
|
@ -35,7 +35,7 @@ pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENS
|
||||
// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
|
||||
// The amount of the extra bonus should be limited so that the gap penalty is
|
||||
// still respected.
|
||||
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
|
||||
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 1;
|
||||
|
||||
impl MatcherConfig {
|
||||
#[inline]
|
||||
|
33
src/tests.rs
33
src/tests.rs
@ -224,6 +224,19 @@ fn test_fuzzy() {
|
||||
- PENALTY_GAP_START
|
||||
- 23 * PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
(
|
||||
"\nץ&`@ `---\0\0\0\0",
|
||||
"`@ `--\0\0",
|
||||
&[3, 4, 5, 6, 7, 8, 10, 11],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3) + BONUS_BOUNDARY_WHITE * 4
|
||||
- PENALTY_GAP_START,
|
||||
),
|
||||
(
|
||||
" 1111111u11111uuu111",
|
||||
"11111uuu1",
|
||||
&[9, 10, 11, 12, 13, 14, 15, 16, 17],
|
||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 8),
|
||||
),
|
||||
],
|
||||
);
|
||||
}
|
||||
@ -317,6 +330,7 @@ fn test_normalize() {
|
||||
&[1, 2, 3, 4, 5],
|
||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||
),
|
||||
("ۂ(GCGɴCG", "n", &[5], 0),
|
||||
],
|
||||
)
|
||||
}
|
||||
@ -399,7 +413,7 @@ fn test_optimal() {
|
||||
(
|
||||
"Hٷ!!-!!!\n--\u{4}\u{c}-\u{8}-!\u{c}",
|
||||
"-!--!",
|
||||
&[4, 5, 9, 10, 16],
|
||||
&[4, 5, 13, 15, 16],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4)
|
||||
- 2 * PENALTY_GAP_START
|
||||
- 6 * PENALTY_GAP_EXTENSION,
|
||||
@ -413,6 +427,23 @@ fn test_optimal() {
|
||||
- 3 * PENALTY_GAP_EXTENSION
|
||||
+ BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"\nץ&`@ `;;;\0\0\0\0",
|
||||
"`@ `;;\0\0",
|
||||
&[3, 4, 5, 6, 7, 9, 10, 11],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
|
||||
+ BONUS_BOUNDARY_DELIMITER * 3
|
||||
+ BONUS_BOUNDARY_WHITE * 3
|
||||
- PENALTY_GAP_START,
|
||||
),
|
||||
(
|
||||
"dddddd\0\0\0ddddfdddddd",
|
||||
"dddddfddddd",
|
||||
&[0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 10
|
||||
- PENALTY_GAP_START
|
||||
- 7 * PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
],
|
||||
);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user