enable chunk based bonuses

These are pedantically speaking not 100% correct but in practice
the edge cases where these cause issues are basically impossible
to trigger
This commit is contained in:
Pascal Kuthe 2023-08-03 20:27:57 +02:00
parent 6dcfb41545
commit af2c1e190c
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
4 changed files with 152 additions and 173 deletions

View File

@ -3,8 +3,8 @@ use std::cmp::max;
use crate::chars::{Char, CharClass}; use crate::chars::{Char, CharClass};
use crate::matrix::{MatcherDataView, MatrixCell, ScoreCell}; use crate::matrix::{MatcherDataView, MatrixCell, ScoreCell};
use crate::score::{ use crate::score::{
BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, PENALTY_GAP_EXTENSION,
SCORE_MATCH, PENALTY_GAP_START, SCORE_MATCH,
}; };
use crate::{Matcher, MatcherConfig}; use crate::{Matcher, MatcherConfig};
@ -57,38 +57,53 @@ impl Matcher {
if INDICES { if INDICES {
matrix.reconstruct_optimal_path(match_end as u16, indices, matrix_len, start as u32); matrix.reconstruct_optimal_path(match_end as u16, indices, matrix_len, start as u32);
} }
Some(match_score_cell.score as u16) Some(match_score_cell.score)
} }
} }
fn next_m_score(p_score: i32, m_score: i32, bonus: u16) -> ScoreCell { const UNMATCHED: ScoreCell = ScoreCell {
let consecutive_bonus = max(bonus, BONUS_CONSECUTIVE); score: 0,
let score_match = m_score + consecutive_bonus as i32; // if matched is true then the consecutive bonus
let score_skip = p_score + bonus as i32; // is always alteast BONUS_CONSECUTIVE so
// this constant can never occur naturally
consecutive_bonus: 0,
matched: true,
};
fn next_m_cell(p_score: u16, bonus: u16, m_cell: ScoreCell) -> ScoreCell {
if m_cell == UNMATCHED {
return ScoreCell {
score: p_score + bonus + SCORE_MATCH,
matched: false,
consecutive_bonus: bonus as u8,
};
}
let mut consecutive_bonus = max(m_cell.consecutive_bonus as u16, BONUS_CONSECUTIVE);
if bonus >= BONUS_BOUNDARY && bonus > consecutive_bonus {
consecutive_bonus = bonus
}
let score_match = m_cell.score + max(consecutive_bonus, bonus);
let score_skip = p_score + bonus;
if score_match > score_skip { if score_match > score_skip {
ScoreCell { ScoreCell {
score: score_match + SCORE_MATCH as i32, score: score_match + SCORE_MATCH,
matched: true, matched: true,
consecutive_bonus: consecutive_bonus as u8,
} }
} else { } else {
ScoreCell { ScoreCell {
score: score_skip + SCORE_MATCH as i32, score: score_skip + SCORE_MATCH,
matched: false, matched: false,
consecutive_bonus: bonus as u8,
} }
} }
} }
fn p_score(prev_p_score: i32, prev_m_score: i32) -> (i32, bool) { fn p_score(prev_p_score: u16, prev_m_score: u16) -> (u16, bool) {
let score_match = if prev_m_score >= 0 { let score_match = prev_m_score.saturating_sub(PENALTY_GAP_START);
(prev_m_score - PENALTY_GAP_START as i32).max(0) let score_skip = prev_p_score.saturating_sub(PENALTY_GAP_EXTENSION);
} else {
i32::MIN / 2
};
let score_skip = if prev_p_score >= 0 {
(prev_p_score - PENALTY_GAP_EXTENSION as i32).max(0)
} else {
i32::MIN / 2
};
if score_match > score_skip { if score_match > score_skip {
(score_match, true) (score_match, true)
} else { } else {
@ -122,7 +137,7 @@ impl<H: Char> MatcherDataView<'_, H> {
let bonus = config.bonus_for(prev_class, class); let bonus = config.bonus_for(prev_class, class);
// save bonus for later so we don't have to recompute it each time // save bonus for later so we don't have to recompute it each time
*bonus_ = bonus; *bonus_ = bonus as u8;
prev_class = class; prev_class = class;
let i = i as u16; let i = i as u16;
@ -160,7 +175,7 @@ impl<H: Char> MatcherDataView<'_, H> {
current_row: &mut [ScoreCell], current_row: &mut [ScoreCell],
matrix_cells: &mut [MatrixCell], matrix_cells: &mut [MatrixCell],
haystack: &[H], haystack: &[H],
bonus: &[u16], bonus: &[u8],
row_off: u16, row_off: u16,
mut next_row_off: u16, mut next_row_off: u16,
needle_idx: u16, needle_idx: u16,
@ -177,18 +192,19 @@ impl<H: Char> MatcherDataView<'_, H> {
.zip(bonus[row_off as usize..next_row_off as usize].iter()) .zip(bonus[row_off as usize..next_row_off as usize].iter())
.zip(current_row[relative_row_off as usize..next_relative_row_off as usize].iter_mut()) .zip(current_row[relative_row_off as usize..next_relative_row_off as usize].iter_mut())
.zip(matrix_cells.iter_mut()); .zip(matrix_cells.iter_mut());
let mut prev_p_score = i32::MIN / 2; let mut prev_p_score = 0;
let mut prev_m_score = i32::MIN / 2; let mut prev_m_score = 0;
for (((&c, bonus), score_cell), matrix_cell) in skipped_col_iter { for (((&c, bonus), score_cell), matrix_cell) in skipped_col_iter {
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score); let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
let m_cell = if FIRST_ROW { let m_cell = if FIRST_ROW {
if c == needle_char { if c == needle_char {
next_m_score(0, i32::MIN / 2, bonus * BONUS_FIRST_CHAR_MULTIPLIER)
} else {
ScoreCell { ScoreCell {
score: i32::MIN / 2, score: *bonus as u16 * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH,
matched: false, matched: false,
consecutive_bonus: *bonus,
} }
} else {
UNMATCHED
} }
} else { } else {
*score_cell *score_cell
@ -208,23 +224,21 @@ impl<H: Char> MatcherDataView<'_, H> {
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score); let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
let m_cell = if FIRST_ROW { let m_cell = if FIRST_ROW {
if c[0] == needle_char { if c[0] == needle_char {
next_m_score(0, i32::MIN / 2, bonus[0] * BONUS_FIRST_CHAR_MULTIPLIER)
} else {
ScoreCell { ScoreCell {
score: i32::MIN / 2, score: bonus[0] as u16 * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH,
matched: false, matched: false,
consecutive_bonus: bonus[0],
} }
} else {
UNMATCHED
} }
} else { } else {
*score_cell *score_cell
}; };
*score_cell = if c[1] == next_needle_char { *score_cell = if c[1] == next_needle_char {
next_m_score(p_score, m_cell.score, bonus[1]) next_m_cell(p_score, bonus[1] as u16, m_cell)
} else { } else {
ScoreCell { UNMATCHED
score: i32::MIN / 2,
matched: false,
}
}; };
if INDICES { if INDICES {
matrix_cell.set(p_matched, m_cell.matched); matrix_cell.set(p_matched, m_cell.matched);

View File

@ -29,7 +29,7 @@ impl<C: Char> MatrixLayout<C> {
assert!(haystack_len <= u32::MAX as usize); assert!(haystack_len <= u32::MAX as usize);
let mut layout = Layout::from_size_align(0, 1).unwrap(); let mut layout = Layout::from_size_align(0, 1).unwrap();
let haystack_layout = Layout::array::<C>(haystack_len).unwrap(); let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
let bonus_layout = Layout::array::<u16>(haystack_len).unwrap(); let bonus_layout = Layout::array::<u8>(haystack_len).unwrap();
let rows_layout = Layout::array::<u16>(needle_len).unwrap(); let rows_layout = Layout::array::<u16>(needle_len).unwrap();
let score_layout = Layout::array::<ScoreCell>(haystack_len + 1 - needle_len).unwrap(); let score_layout = Layout::array::<ScoreCell>(haystack_len + 1 - needle_len).unwrap();
let matrix_layout = let matrix_layout =
@ -65,7 +65,7 @@ impl<C: Char> MatrixLayout<C> {
ptr: NonNull<u8>, ptr: NonNull<u8>,
) -> ( ) -> (
*mut [C], *mut [C],
*mut [u16], *mut [u8],
*mut [u16], *mut [u16],
*mut [ScoreCell], *mut [ScoreCell],
*mut [MatrixCell], *mut [MatrixCell],
@ -73,7 +73,7 @@ impl<C: Char> MatrixLayout<C> {
let base = ptr.as_ptr(); let base = ptr.as_ptr();
let haystack = base.add(self.haystack_off) as *mut C; let haystack = base.add(self.haystack_off) as *mut C;
let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len); let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len);
let bonus = base.add(self.bonus_off) as *mut u16; let bonus = base.add(self.bonus_off) as *mut u8;
let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len); let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len);
let rows = base.add(self.rows_off) as *mut u16; let rows = base.add(self.rows_off) as *mut u16;
let rows = slice_from_raw_parts_mut(rows, self.needle_len); let rows = slice_from_raw_parts_mut(rows, self.needle_len);
@ -88,9 +88,18 @@ impl<C: Char> MatrixLayout<C> {
} }
} }
#[derive(Clone, Copy)] const _SIZE_CHECK: () = {
if size_of::<ScoreCell>() != 8 {
panic!()
}
};
// make this act like a u64
#[repr(align(8))]
#[derive(Clone, Copy, PartialEq, Eq)]
pub(crate) struct ScoreCell { pub(crate) struct ScoreCell {
pub score: i32, pub score: u16,
pub consecutive_bonus: u8,
pub matched: bool, pub matched: bool,
} }
@ -98,7 +107,7 @@ pub(crate) struct MatcherDataView<'a, C: Char> {
pub haystack: &'a mut [C], pub haystack: &'a mut [C],
// stored as a separate array instead of struct // stored as a separate array instead of struct
// to avoid padding sine char is too large and u8 too small :/ // to avoid padding sine char is too large and u8 too small :/
pub bonus: &'a mut [u16], pub bonus: &'a mut [u8],
pub current_row: &'a mut [ScoreCell], pub current_row: &'a mut [ScoreCell],
pub row_offs: &'a mut [u16], pub row_offs: &'a mut [u16],
pub matrix_cells: &'a mut [MatrixCell], pub matrix_cells: &'a mut [MatrixCell],
@ -121,7 +130,7 @@ impl MatrixCell {
#[allow(unused)] #[allow(unused)]
struct MatcherData { struct MatcherData {
haystack: [char; MAX_HAYSTACK_LEN], haystack: [char; MAX_HAYSTACK_LEN],
bonus: [u16; MAX_HAYSTACK_LEN], bonus: [u8; MAX_HAYSTACK_LEN],
row_offs: [u16; MAX_NEEDLE_LEN], row_offs: [u16; MAX_NEEDLE_LEN],
scratch_space: [ScoreCell; MAX_HAYSTACK_LEN], scratch_space: [ScoreCell; MAX_HAYSTACK_LEN],
matrix: [u8; MAX_MATRIX_SIZE], matrix: [u8; MAX_MATRIX_SIZE],
@ -150,7 +159,11 @@ impl MatrixSlab {
needle_len: usize, needle_len: usize,
) -> Option<MatcherDataView<'_, C>> { ) -> Option<MatcherDataView<'_, C>> {
let cells = haystack_.len() * needle_len; let cells = haystack_.len() * needle_len;
if cells > MAX_MATRIX_SIZE || haystack_.len() > u16::MAX as usize { if cells > MAX_MATRIX_SIZE
|| haystack_.len() > u16::MAX as usize
// ensures that socres never overflow
|| needle_len > MAX_NEEDLE_LEN
{
return None; return None;
} }
let matrix_layout = MatrixLayout::<C>::new(haystack_.len(), needle_len); let matrix_layout = MatrixLayout::<C>::new(haystack_.len(), needle_len);

View File

@ -24,13 +24,17 @@ pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
// usually camel case is wekaer boundary than actual wourd boundaries anyway // usually camel case is wekaer boundary than actual wourd boundaries anyway
// This also has the nice sideeffect of perfectly balancing out // This also has the nice sideeffect of perfectly balancing out
// camel case, snake case and the consecutive version of the word // camel case, snake case and the consecutive version of the word
pub(crate) const BONUS_CAMEL123: u16 = BONUS_CONSECUTIVE; pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_START;
/// Although bonus point for non-word characters is non-contextual, we need it
/// for computing bonus points for consecutive chunks starting with a non-word
/// character.
pub(crate) const BONUS_NON_WORD: u16 = BONUS_BOUNDARY;
// Minimum bonus point given to characters in consecutive chunks. // Minimum bonus point given to characters in consecutive chunks.
// Note that bonus points for consecutive matches shouldn't have needed if we // Note that bonus points for consecutive matches shouldn't have needed if we
// used fixed match score as in the original algorithm. // used fixed match score as in the original algorithm.
pub(crate) const BONUS_CONSECUTIVE: u16 = pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
PENALTY_GAP_START + PENALTY_GAP_EXTENSION + PENALTY_GAP_EXTENSION;
// The first character in the typed pattern usually has more significance // The first character in the typed pattern usually has more significance
// than the rest so it's important that it appears at special positions where // than the rest so it's important that it appears at special positions where
@ -58,6 +62,8 @@ impl MatcherConfig {
BONUS_CAMEL123 BONUS_CAMEL123
} else if class == CharClass::Whitespace { } else if class == CharClass::Whitespace {
self.bonus_boundary_white self.bonus_boundary_white
} else if class == CharClass::NonWord {
return BONUS_NON_WORD;
} else { } else {
0 0
} }
@ -96,8 +102,8 @@ impl Matcher {
indices.push(start as u32) indices.push(start as u32)
} }
let class = haystack[start].char_class(&self.config); let class = haystack[start].char_class(&self.config);
let mut bonus = self.bonus_for(prev_class, class); let mut first_bonus = self.bonus_for(prev_class, class);
let mut score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER; let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
prev_class = class; prev_class = class;
needle_char = *needle_iter.next().unwrap_or(&needle_char); needle_char = *needle_iter.next().unwrap_or(&needle_char);
@ -107,9 +113,14 @@ impl Matcher {
if INDICES { if INDICES {
indices.push(i as u32 + start as u32 + 1) indices.push(i as u32 + start as u32 + 1)
} }
bonus = self.bonus_for(prev_class, class); let mut bonus = self.bonus_for(prev_class, class);
if consecutive != 0 { if consecutive != 0 {
bonus = max(bonus, BONUS_CONSECUTIVE); if bonus >= BONUS_BOUNDARY && bonus > first_bonus {
first_bonus = bonus
}
bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
} else {
first_bonus = bonus;
} }
score += SCORE_MATCH + bonus; score += SCORE_MATCH + bonus;
in_gap = false; in_gap = false;

View File

@ -1,6 +1,6 @@
use crate::chars::Char; use crate::chars::Char;
use crate::score::{ use crate::score::{
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
}; };
use crate::utf32_str::Utf32Str; use crate::utf32_str::Utf32Str;
@ -126,7 +126,13 @@ fn test_fuzzy() {
"fooBarbaz1", "fooBarbaz1",
"obr", "obr",
&[2, 3, 5], &[2, 3, 5],
BONUS_CONSECUTIVE - PENALTY_GAP_START, BONUS_CAMEL123 - PENALTY_GAP_START,
),
(
"/usr/share/doc/at/ChangeLog",
"changelog",
&[18, 19, 20, 21, 22, 23, 24, 25, 26],
(BONUS_FIRST_CHAR_MULTIPLIER + 8) * BONUS_BOUNDARY_DELIMITER,
), ),
( (
"fooBarbaz1", "fooBarbaz1",
@ -152,14 +158,13 @@ fn test_fuzzy() {
"/man1/zshcompctl.1", "/man1/zshcompctl.1",
"zshc", "zshc",
&[6, 7, 8, 9], &[6, 7, 8, 9],
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3, BONUS_BOUNDARY_DELIMITER * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
), ),
( (
"/.oh-my-zsh/cache", "/.oh-my-zsh/cache",
"zshc", "zshc",
&[8, 9, 10, 12], &[8, 9, 10, 12],
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2 BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
- PENALTY_GAP_START
+ BONUS_BOUNDARY_DELIMITER, + BONUS_BOUNDARY_DELIMITER,
), ),
( (
@ -172,9 +177,10 @@ fn test_fuzzy() {
"abc123 456", "abc123 456",
"12356", "12356",
&[3, 4, 5, 8, 9], &[3, 4, 5, 8, 9],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3 BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
- PENALTY_GAP_START - PENALTY_GAP_START
- PENALTY_GAP_EXTENSION, - PENALTY_GAP_EXTENSION
+ BONUS_CONSECUTIVE,
), ),
( (
"foo/bar/baz", "foo/bar/baz",
@ -204,42 +210,13 @@ fn test_fuzzy() {
"fooBar Baz", "fooBar Baz",
"foob", "foob",
&[0, 1, 2, 3], &[0, 1, 2, 3],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+ BONUS_CONSECUTIVE * 2
+ BONUS_CAMEL123,
), ),
( (
"xFoo-Bar Baz", "xFoo-Bar Baz",
"foo-b", "foo-b",
&[1, 2, 3, 4, 5], &[1, 2, 3, 4, 5],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_NON_WORD,
+ BONUS_CONSECUTIVE * 3
+ BONUS_BOUNDARY,
),
(
"]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
"H\0\0VV",
&[4, 5, 6, 31, 32],
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2
- PENALTY_GAP_START
- 23 * PENALTY_GAP_EXTENSION
+ BONUS_CAMEL123
+ BONUS_CONSECUTIVE,
),
(
"\nץ&`@ `---\0\0\0\0",
"`@ `--\0\0",
&[3, 4, 5, 6, 7, 8, 10, 11],
BONUS_BOUNDARY_WHITE * 2 + 2 * BONUS_CONSECUTIVE - PENALTY_GAP_START
+ BONUS_CONSECUTIVE,
),
(
" 1111111u11111uuu111",
"11111uuu1",
&[9, 10, 11, 12, 13, 14, 15, 16, 17],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ 7 * BONUS_CONSECUTIVE
+ BONUS_CAMEL123,
), ),
], ],
); );
@ -253,18 +230,23 @@ fn test_substring() {
false, false,
false, false,
&[ &[
("fooBarbaz1", "oba", &[2, 3, 4], 2 * BONUS_CONSECUTIVE), (
"fooBarbaz1",
"oba",
&[2, 3, 4],
BONUS_CAMEL123 + BONUS_CONSECUTIVE,
),
( (
"foo bar baz", "foo bar baz",
"foo", "foo",
&[0, 1, 2], &[0, 1, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 2 * BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
), ),
( (
"foo bar baz", "foo bar baz",
"FOO", "FOO",
&[0, 1, 2], &[0, 1, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 2 * BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
), ),
( (
"/AutomatorDocument.icns", "/AutomatorDocument.icns",
@ -276,14 +258,14 @@ fn test_substring() {
"/man1/zshcompctl.1", "/man1/zshcompctl.1",
"zshc", "zshc",
&[6, 7, 8, 9], &[6, 7, 8, 9],
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3, BONUS_BOUNDARY_DELIMITER * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
), ),
( (
"/.oh-my-zsh/cache", "/.oh-my-zsh/cache",
"zsh/c", "zsh/c",
&[8, 9, 10, 11, 12], &[8, 9, 10, 11, 12],
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
+ BONUS_CONSECUTIVE * 3 + BONUS_NON_WORD
+ BONUS_BOUNDARY_DELIMITER, + BONUS_BOUNDARY_DELIMITER,
), ),
], ],
@ -324,16 +306,9 @@ fn test_fuzzy_case_sensitive() {
"FooBar Baz", "FooBar Baz",
"FooB", "FooB",
&[0, 1, 2, 3], &[0, 1, 2, 3],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
+ BONUS_CONSECUTIVE * 2
+ BONUS_CAMEL123,
),
(
"foo-bar",
"o-ba",
&[2, 3, 4, 5],
BONUS_BOUNDARY + 2 * BONUS_CONSECUTIVE,
), ),
("foo-bar", "o-ba", &[2, 3, 4, 5], BONUS_NON_WORD * 3),
], ],
); );
} }
@ -350,14 +325,13 @@ fn test_normalize() {
"Só Danço Samba", "Só Danço Samba",
"So", "So",
&[0, 1], &[0, 1],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
), ),
( (
"Só Danço Samba", "Só Danço Samba",
"sodc", "sodc",
&[0, 1, 3, 6], &[0, 1, 3, 6],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1) - PENALTY_GAP_START
- PENALTY_GAP_START
+ BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START - PENALTY_GAP_START
- PENALTY_GAP_EXTENSION, - PENALTY_GAP_EXTENSION,
@ -366,21 +340,19 @@ fn test_normalize() {
"Danço", "Danço",
"danco", "danco",
&[0, 1, 2, 3, 4], &[0, 1, 2, 3, 4],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
), ),
( (
"DanÇo", "DanÇo",
"danco", "danco",
&[0, 1, 2, 3, 4], &[0, 1, 2, 3, 4],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
+ BONUS_CAMEL123
+ 3 * BONUS_CONSECUTIVE,
), ),
( (
"xÇando", "xÇando",
"cando", "cando",
&[1, 2, 3, 4, 5], &[1, 2, 3, 4, 5],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE, BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
), ),
("ۂ(GCGɴCG", "n", &[5], 0), ("ۂ(GCGɴCG", "n", &[5], 0),
], ],
@ -388,7 +360,7 @@ fn test_normalize() {
} }
#[test] #[test]
fn test_unicode1() { fn test_unicode() {
assert_matches( assert_matches(
&[FuzzyGreedy, FuzzyOptimal], &[FuzzyGreedy, FuzzyOptimal],
true, true,
@ -399,7 +371,7 @@ fn test_unicode1() {
"你好世界", "你好世界",
"你好", "你好",
&[0, 1], &[0, 1],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
), ),
( (
"你好世界", "你好世界",
@ -422,7 +394,7 @@ fn test_long_str() {
&"x".repeat(u16::MAX as usize + 1), &"x".repeat(u16::MAX as usize + 1),
"xx", "xx",
&[0, 1], &[0, 1],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
)], )],
); );
} }
@ -435,38 +407,33 @@ fn test_casing() {
false, false,
false, false,
&[ &[
// score 143 we currently slightly prefer camel // these two have the same score
( (
"fooBar", "fooBar",
"foobar", "foobar",
&[0, 1, 2, 3, 4, 5], &[0, 1, 2, 3, 4, 5],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 5),
+ BONUS_CAMEL123
+ 4 * BONUS_CONSECUTIVE,
), ),
// score 141 for perfect match
( (
"foobar", "foobar",
"foobar", "foobar",
&[0, 1, 2, 3, 4, 5], &[0, 1, 2, 3, 4, 5],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + 5 * BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 5),
), ),
// score 141 here too since the boundary bonus and the gap penalty/missed consecutive bonus cancel perfectly // these two have the same score (slightly lower than the other two: 60 instead of 70)
( (
"foo-bar", "foo-bar",
"foobar", "foobar",
&[0, 1, 2, 4, 5, 6], &[0, 1, 2, 4, 5, 6],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
- PENALTY_GAP_START + BONUS_BOUNDARY * 3,
+ 4 * BONUS_CONSECUTIVE,
), ),
( (
"foo_bar", "foo_bar",
"foobar", "foobar",
&[0, 1, 2, 4, 5, 6], &[0, 1, 2, 4, 5, 6],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2) - PENALTY_GAP_START
- PENALTY_GAP_START + BONUS_BOUNDARY * 3,
+ 4 * BONUS_CONSECUTIVE,
), ),
], ],
) )
@ -483,64 +450,38 @@ fn test_optimal() {
"axxx xx ", "axxx xx ",
"xx", "xx",
&[5, 6], &[5, 6],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 1),
), ),
( (
"SS!H", "SS!H",
"S!", "S!",
&[0, 2], &[0, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START
+ BONUS_NON_WORD,
),
// this case is a cool example of why our algorithm is more than fzf
// we handle this corretly detect that it's better to match
// the second f instead of the third yielding a higher score
// (despite using the same scoring function!)
(
"xf foo",
"xfoo",
&[0, 3, 4, 5],
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
), ),
( (
"^^^\u{7f}\0\0E%\u{1a}^", "xf fo",
"^^\0E", "xfo",
&[1, 2, 5, 6], &[0, 3, 4],
BONUS_CONSECUTIVE + BONUS_BOUNDARY - PENALTY_GAP_START - PENALTY_GAP_EXTENSION, BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2)
),
(
"8gx(gecg)",
"8gcg",
&[0, 4, 6, 7],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
- PENALTY_GAP_START - PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION - PENALTY_GAP_EXTENSION,
+ BONUS_BOUNDARY
- PENALTY_GAP_START
+ BONUS_CONSECUTIVE,
),
(
"dddddd\0\0\0ddddfdddddd",
"dddddfddddd",
&[0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_BOUNDARY
+ 9 * BONUS_CONSECUTIVE
- PENALTY_GAP_START
- 7 * PENALTY_GAP_EXTENSION,
), ),
], ],
); );
} }
// #[test]
// fn test_greedy() {
// assert_matches(
// &[FuzzyGreedy],
// false,
// false,
// false,
// &[
// ("SS!H", "S!", &[1, 2], BONUS_NON_WORD),
// (
// "]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
// "H\0\0VV",
// &[4, 5, 6, 31, 32],
// BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
// - PENALTY_GAP_START
// - 23 * PENALTY_GAP_EXTENSION,
// ),
// ],
// );
// }
#[test] #[test]
fn test_reject() { fn test_reject() {