diff --git a/Cargo.lock b/Cargo.lock index 2abe53c..2867753 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,10 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cov-mark" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ffa3d3e0138386cd4361f63537765cac7ee40698028844635a54495a92f67f3" + [[package]] name = "fzf_oxide" version = "0.1.0" dependencies = [ + "cov-mark", "memchr", ] diff --git a/Cargo.toml b/Cargo.toml index acddf34..cf6b496 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,3 +7,8 @@ edition = "2021" [dependencies] memchr = "2.5.0" +cov-mark = { version = "1.1.0", default-features = false } + +[dev-dependencies] +cov-mark = { version = "1.1.0", default-features = true } + diff --git a/README.md b/README.md index 76556d5..a00ab8d 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,9 @@ # fzf_oxide + An optimized rust port of the fzf fuzzy matching algorithm + +## TODO: + +* case mismatch penalty +* substring/prefix/postfix/exact matcher +* high level API (worker thread, query parsing, sorting) diff --git a/fuzz.sh b/fuzz.sh new file mode 100755 index 0000000..d3ffa2c --- /dev/null +++ b/fuzz.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +cargo +nightly fuzz "${1}" fuzz_target_1 "${@:2:99}" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 0000000..1b9d8a7 --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,29 @@ +[package] +name = "fzf_oxide-fuzz" +version = "0.0.0" +publish = false +edition = "2021" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" +arbitrary = { version = "1", features = ["derive"] } + +[dependencies.fzf_oxide] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[profile.release] +debug = 1 + +[[bin]] +name = "fuzz_target_1" +path = "fuzz_targets/fuzz_target_1.rs" +test = false +doc = false + diff --git a/fuzz/fuzz_targets/fuzz_target_1.rs b/fuzz/fuzz_targets/fuzz_target_1.rs new file mode 100644 index 0000000..d9df7d3 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_target_1.rs @@ -0,0 +1,78 @@ +#![no_main] + +use fzf_oxide::{chars, Matcher, MatcherConfig, Utf32Str}; +use libfuzzer_sys::arbitrary::Arbitrary; +use libfuzzer_sys::fuzz_target; + +#[derive(Arbitrary, Debug)] +pub struct Input<'a> { + haystack: &'a str, + needle: &'a str, + ignore_case: bool, + normalize: bool, +} + +fuzz_target!(|data: Input<'_>| { + let mut data = data; + let mut config = MatcherConfig::DEFAULT; + config.ignore_case = data.ignore_case; + config.normalize = data.normalize; + let mut matcher = Matcher::new(config); + let mut indices_optimal = Vec::new(); + let mut indices_greedy = Vec::new(); + let mut needle_buf = Vec::new(); + let mut haystack_buf = Vec::new(); + let normalize = |mut c: char| { + if config.normalize { + c = chars::normalize(c); + } + if config.ignore_case { + c = chars::to_lower_case(c); + } + c + }; + let needle: String = data.needle.chars().map(normalize).collect(); + let needle_chars: Vec<_> = needle.chars().collect(); + let needle = Utf32Str::new(&needle, &mut needle_buf); + let haystack = Utf32Str::new(data.haystack, &mut haystack_buf); + + let greedy_score = matcher.fuzzy_indices_greedy(haystack, needle, &mut indices_greedy); + if greedy_score.is_some() { + let match_chars: Vec<_> = indices_greedy + .iter() + .map(|&i| normalize(haystack.get(i))) + .collect(); + assert_eq!( + match_chars, needle_chars, + "failed match, found {indices_greedy:?} {match_chars:?} (greedy)" + ); + } + let optimal_score = matcher.fuzzy_indices(haystack, needle, &mut indices_optimal); + if optimal_score.is_some() { + let match_chars: Vec<_> = indices_optimal + .iter() + .map(|&i| normalize(haystack.get(i))) + .collect(); + assert_eq!( + match_chars, needle_chars, + "failed match, found {indices_optimal:?} {match_chars:?}" + ); + } + match (greedy_score, optimal_score) { + (None, Some(score)) => unreachable!("optimal matched {score} but greedy did not match"), + (Some(score), None) => unreachable!("greedy matched {score} but optimal did not match"), + (Some(greedy), Some(optimal)) => { + assert!( + greedy <= optimal, + "optimal score must be atleast the same as greedy score {greedy} {optimal}" + ); + if indices_greedy == indices_optimal { + assert_eq!( + greedy, optimal, + "if matching same char greedy and optimal score should be identical" + ) + } + } + (None, None) => (), + } +}); diff --git a/src/chars.rs b/src/chars.rs index 391df76..5905f77 100644 --- a/src/chars.rs +++ b/src/chars.rs @@ -9,7 +9,7 @@ use crate::MatcherConfig; mod case_fold; mod normalize; -pub trait Char: Copy + Eq + Ord + fmt::Display { +pub(crate) trait Char: Copy + Eq + Ord + fmt::Display { const ASCII: bool; fn char_class(self, config: &MatcherConfig) -> CharClass; fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass); diff --git a/src/chars/case_fold.rs b/src/chars/case_fold.rs index aacbe46..3c6d01b 100644 --- a/src/chars/case_fold.rs +++ b/src/chars/case_fold.rs @@ -7,11 +7,7 @@ // ucd-generate 0.3.0 is available on crates.io. pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[ - ('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'), - ('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'), - ('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'), - ('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'), - ('Y', 'y'), ('Z', 'z'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'), + ('µ', 'μ'), ('À', 'à'), ('Á', 'á'), ('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'), ('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'), ('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'), diff --git a/src/config.rs b/src/config.rs index 85dfdc1..b228e82 100644 --- a/src/config.rs +++ b/src/config.rs @@ -2,6 +2,7 @@ use crate::chars::CharClass; use crate::score::BONUS_BOUNDARY; #[non_exhaustive] +#[derive(PartialEq, Eq, Debug, Clone, Copy)] pub struct MatcherConfig { pub delimiter_chars: &'static [u8], /// Extra bonus for word boundary after whitespace character or beginning of the string @@ -18,14 +19,6 @@ pub struct MatcherConfig { pub ignore_case: bool, } -// #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)] -// #[non_exhaustive] -// pub enum CaseMatching { -// Respect, -// Ignore, -// Smart, -// } - impl MatcherConfig { pub const DEFAULT: Self = { MatcherConfig { diff --git a/src/fuzzy_greedy.rs b/src/fuzzy_greedy.rs index 54fd340..963818d 100644 --- a/src/fuzzy_greedy.rs +++ b/src/fuzzy_greedy.rs @@ -37,6 +37,7 @@ impl Matcher { let mut needle_iter = needle.iter().rev().copied(); let mut needle_char = needle_iter.next().unwrap(); for (i, &c) in haystack[start..end].iter().enumerate().rev() { + let c = c.normalize(&self.config); if c == needle_char { let Some(next_needle_char) = needle_iter.next() else { start += i; diff --git a/src/fuzzy_optimal.rs b/src/fuzzy_optimal.rs index 25ee42d..6a9a247 100644 --- a/src/fuzzy_optimal.rs +++ b/src/fuzzy_optimal.rs @@ -19,7 +19,6 @@ impl Matcher { end: usize, indices: &mut Vec, ) -> Option { - println!("{start} {end}"); // construct a matrix (and copy the haystack), the matrix and haystack size are bounded // to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows // us to treat needle indices as u16 @@ -40,10 +39,12 @@ impl Matcher { let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config); // this only happened with unicode haystacks, for ASCII the prefilter handles all rejects if !matched { + debug_assert!(!(H::ASCII && N::ASCII)); return None; } if needle.len() == 1 { - indices.push(max_score_pos as u32); + indices.clear(); + indices.push(max_score_pos as u32 + start as u32); return Some(max_score); } debug_assert_eq!( @@ -112,27 +113,35 @@ impl Matrix<'_, H> { matched = true; } } - if c == first_needle_char { - let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER; + + // we calculate two scores: + // * one for transversing the matrix horizontially (no match at + // the current char) + // * one for transversing the matrix diagonally (match at the + // current char) + // the maximum of those two scores is used + let gap_penalty = if in_gap { + PENALTY_GAP_EXTENSION + } else { + PENALTY_GAP_START + }; + let score_gap = prev_score.saturating_sub(gap_penalty); + let score_match = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER; + if c == first_needle_char && score_match >= score_gap { matrix_cell.consecutive_chars = 1; - if needle.len() == 1 && score > max_score { - max_score = score; + matrix_cell.score = score_match; + in_gap = false; + if needle.len() == 1 && score_match > max_score { + max_score = score_match; max_score_pos = i; // can't get better than this if bonus >= BONUS_BOUNDARY { break; } } - matrix_cell.score = score; - in_gap = false; } else { - let gap_penalty = if in_gap { - PENALTY_GAP_EXTENSION - } else { - PENALTY_GAP_START - }; - matrix_cell.score = prev_score.saturating_sub(gap_penalty); matrix_cell.consecutive_chars = 0; + matrix_cell.score = score_gap; in_gap = true; } prev_score = matrix_cell.score; @@ -186,7 +195,7 @@ impl Matrix<'_, H> { // current char) // the maximum of those two scores is used let mut score_diag = 0; - let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty); + let score_hor = prev_matrix_cell.score.saturating_sub(gap_penalty); let mut consecutive = 0; if haystack_char.char == needle_char { @@ -206,15 +215,17 @@ impl Matrix<'_, H> { bonus = max(first_bonus, BONUS_CONSECUTIVE) } } - if score_diag + bonus < score_hory { + if score_diag + bonus < score_hor + || (consecutive == 1 && score_diag + bonus == score_hor) + { score_diag += haystack_char.bonus; consecutive = 0; } else { score_diag += bonus; } } - in_gap = score_diag < score_hory; - let score = max(score_diag, score_hory); + in_gap = consecutive == 0; + let score = max(score_diag, score_hor); if i == needle.len() - 1 && score > max_score { max_score = score; max_score_end = col as u16; @@ -235,6 +246,7 @@ impl Matrix<'_, H> { indices: &mut Vec, best_match_end: u16, ) { + indices.clear(); indices.resize(needle.len(), 0); let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable(); @@ -255,22 +267,22 @@ impl Matrix<'_, H> { let mut score_diag = 0; let mut score_horz = 0; if let Some(&(prev_row, _)) = row_iter.peek() { - if col >= prev_row.off { - score_diag = prev_row[col].score; - } + score_diag = prev_row[col - 1].score; } if col > row.off { score_horz = row[col - 1].score; } - let mut new_prefer_match = row[col].consecutive_chars > 1; - if !new_prefer_match && col + 1 < haystack_len { + let mut in_block = row[col].consecutive_chars > 1; + if !in_block && col + 1 < haystack_len { if let Some(next_row) = next_row { if col + 1 >= next_row.off { - new_prefer_match = next_row[col + 1].consecutive_chars > 0 + in_block = next_row[col + 1].consecutive_chars > 1 } } } - if score > score_diag && (score > score_horz || score == score_horz && prefer_match) { + if score > score_diag + && (score > score_horz || in_block || prefer_match && score == score_horz) + { *matched_col_idx = col as u32 + start; next_row = Some(row); let Some(next) = row_iter.next() else { @@ -278,8 +290,8 @@ impl Matrix<'_, H> { }; (row, matched_col_idx) = next } - prefer_match = new_prefer_match; col -= 1; + prefer_match = row[col].consecutive_chars != 0; } } } diff --git a/src/lib.rs b/src/lib.rs index 637e79a..e794bc0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ // sadly ranges don't optmimzie well #![allow(clippy::manual_range_contains)] -mod chars; +pub mod chars; mod config; #[cfg(test)] mod debug; @@ -15,11 +15,11 @@ mod utf32_str; #[cfg(test)] mod tests; -pub use config::MatcherConfig; +pub use crate::config::MatcherConfig; +pub use crate::utf32_str::Utf32Str; use crate::chars::AsciiChar; use crate::matrix::MatrixSlab; -use crate::utf32_str::Utf32Str; pub struct Matcher { pub config: MatcherConfig, @@ -131,7 +131,7 @@ impl Matcher { needle_: Utf32Str<'_>, indidies: &mut Vec, ) -> Option { - if needle_.len() > haystack.len() { + if needle_.len() > haystack.len() || needle_.is_empty() { return None; } // if needle_.len() == haystack.len() { diff --git a/src/prefilter.rs b/src/prefilter.rs index 6b7c58e..918c7b2 100644 --- a/src/prefilter.rs +++ b/src/prefilter.rs @@ -6,7 +6,7 @@ use crate::Matcher; #[inline(always)] fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option { - if c >= b'a' || c <= b'z' { + if c >= b'a' && c <= b'z' { memchr2(c, c - 32, haystack) } else { memchr(c, haystack) @@ -15,7 +15,7 @@ fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option { #[inline(always)] fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option { - if c >= b'a' || c <= b'z' { + if c >= b'a' && c <= b'z' { memrchr2(c, c - 32, haystack) } else { memrchr(c, haystack) @@ -84,6 +84,11 @@ impl Matcher { .iter() .rev() .position(|c| c.normalize(&self.config) == needle_char)?; + // matches are never possible in this case + if end - start < needle.len() { + cov_mark::hit!(small_haystack); + return None; + } Some((start, end)) } diff --git a/src/score.rs b/src/score.rs index 7725c93..ac487ac 100644 --- a/src/score.rs +++ b/src/score.rs @@ -103,8 +103,7 @@ impl Matcher { needle_char = *needle_iter.next().unwrap_or(&needle_char); for (i, c) in haystack[start + 1..end].iter().enumerate() { - let class = c.char_class(&self.config); - let c = c.normalize(&self.config); + let (c, class) = c.char_class_and_normalize(&self.config); if c == needle_char { if INDICES { indices.push(i as u32 + start as u32 + 1) diff --git a/src/tests.rs b/src/tests.rs index 713fa2a..1dd2fdd 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,3 +1,5 @@ +use cov_mark::check; + use crate::chars::Char; use crate::score::{ BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD, @@ -6,12 +8,20 @@ use crate::score::{ use crate::utf32_str::Utf32Str; use crate::{Matcher, MatcherConfig}; -pub fn assert_matches( - use_v1: bool, +use Algorithm::*; + +#[derive(Debug)] +enum Algorithm { + FuzzyOptimal, + FuzzyGreedy, +} + +fn assert_matches( + algorithm: &[Algorithm], normalize: bool, case_sensitive: bool, path: bool, - cases: &[(&str, &str, u32, u32, u16)], + cases: &[(&str, &str, &[u32], u16)], ) { let mut config = MatcherConfig { normalize, @@ -22,10 +32,10 @@ pub fn assert_matches( config.set_match_paths(); } let mut matcher = Matcher::new(config); - let mut indices = Vec::new(); + let mut matched_indices = Vec::new(); let mut needle_buf = Vec::new(); let mut haystack_buf = Vec::new(); - for &(haystack, needle, start, end, mut score) in cases { + for &(haystack, needle, indices, mut score) in cases { let needle = if !case_sensitive { needle.to_lowercase() } else { @@ -34,32 +44,37 @@ pub fn assert_matches( let needle = Utf32Str::new(&needle, &mut needle_buf); let haystack = Utf32Str::new(haystack, &mut haystack_buf); score += needle.len() as u16 * SCORE_MATCH; + for algo in algorithm { + println!("xx {matched_indices:?} {algo:?}"); + let res = match algo { + Algorithm::FuzzyOptimal => { + matcher.fuzzy_indices(haystack, needle, &mut matched_indices) + } + Algorithm::FuzzyGreedy => { + matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices) + } + }; + println!("{matched_indices:?}"); + let match_chars: Vec<_> = matched_indices + .iter() + .map(|&i| haystack.get(i).normalize(&matcher.config)) + .collect(); + let needle_chars: Vec<_> = needle.chars().collect(); - let res = if use_v1 { - matcher.fuzzy_indices_greedy(haystack, needle, &mut indices) - } else { - matcher.fuzzy_indices(haystack, needle, &mut indices) - }; - let match_chars: Vec<_> = indices - .iter() - .map(|&i| haystack.get(i).normalize(&matcher.config)) - .collect(); - let needle_chars: Vec<_> = needle.chars().collect(); - - assert_eq!( - res, - Some(score), - "{needle:?} did not match {haystack:?}: matched {match_chars:?} {indices:?}" - ); - assert_eq!( - match_chars, needle_chars, - "match indices are incorrect {indices:?}" - ); - assert_eq!( - indices.first().copied()..indices.last().map(|&i| i + 1), - Some(start)..Some(end), - "{needle:?} match {haystack:?}" - ); + assert_eq!( + res, + Some(score), + "{needle:?} did not match {haystack:?}: matched {match_chars:?} {matched_indices:?} {algo:?}" + ); + assert_eq!( + matched_indices, indices, + "{needle:?} match {haystack:?} {algo:?}" + ); + assert_eq!( + match_chars, needle_chars, + "{needle:?} match {haystack:?} indices are incorrect {matched_indices:?} {algo:?}" + ); + } } } @@ -104,7 +119,7 @@ const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_deli #[test] fn test_fuzzy() { assert_matches( - false, + &[FuzzyGreedy, FuzzyOptimal], false, false, false, @@ -112,15 +127,13 @@ fn test_fuzzy() { ( "fooBarbaz1", "oBZ", - 2, - 9, + &[2, 3, 8], BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, ), ( "foo bar baz", "fbb", - 0, - 9, + &[0, 4, 8], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2 - 2 * PENALTY_GAP_START - 4 * PENALTY_GAP_EXTENSION, @@ -128,23 +141,20 @@ fn test_fuzzy() { ( "/AutomatorDocument.icns", "rdoc", - 9, - 13, + &[9, 10, 11, 12], BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2, ), ( "/man1/zshcompctl.1", "zshc", - 6, - 10, + &[6, 7, 8, 9], BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 3, ), ( "/.oh-my-zsh/cache", "zshc", - 8, - 13, + &[8, 9, 10, 12], BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2 - PENALTY_GAP_START + BONUS_BOUNDARY_DELIMITER, @@ -152,15 +162,13 @@ fn test_fuzzy() { ( "ab0123 456", "12356", - 3, - 10, + &[3, 4, 5, 8, 9], BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION, ), ( "abc123 456", "12356", - 3, - 10, + &[3, 4, 5, 8, 9], BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 + BONUS_CONSECUTIVE @@ -170,8 +178,7 @@ fn test_fuzzy() { ( "foo/bar/baz", "fbb", - 0, - 9, + &[0, 4, 8], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 - 2 * PENALTY_GAP_START - 4 * PENALTY_GAP_EXTENSION, @@ -179,8 +186,7 @@ fn test_fuzzy() { ( "fooBarBaz", "fbb", - 0, - 7, + &[0, 3, 6], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 - 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_EXTENSION, @@ -188,8 +194,7 @@ fn test_fuzzy() { ( "foo barbaz", "fbb", - 0, - 8, + &[0, 4, 7], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE - PENALTY_GAP_START * 2 - PENALTY_GAP_EXTENSION * 3, @@ -197,20 +202,26 @@ fn test_fuzzy() { ( "fooBar Baz", "foob", - 0, - 4, + &[0, 1, 2, 3], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, ), ( "xFoo-Bar Baz", "foo-b", - 1, - 6, + &[1, 2, 3, 4, 5], BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 + BONUS_NON_WORD + BONUS_BOUNDARY, ), + ( + "]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0", + "H\0\0VV", + &[4, 5, 6, 31, 32], + BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123 + - PENALTY_GAP_START + - 23 * PENALTY_GAP_EXTENSION, + ), ], ); } @@ -218,7 +229,7 @@ fn test_fuzzy() { #[test] fn test_fuzzy_case_sensitive() { assert_matches( - false, + &[FuzzyGreedy, FuzzyOptimal], false, true, false, @@ -226,15 +237,13 @@ fn test_fuzzy_case_sensitive() { ( "fooBarbaz1", "oBz", - 2, - 9, + &[2, 3, 8], BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, ), ( "Foo/Bar/Baz", "FBB", - 0, - 9, + &[0, 4, 8], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 - 2 * PENALTY_GAP_START - 4 * PENALTY_GAP_EXTENSION, @@ -242,8 +251,7 @@ fn test_fuzzy_case_sensitive() { ( "FooBarBaz", "FBB", - 0, - 7, + &[0, 3, 6], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 - 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_EXTENSION, @@ -251,171 +259,15 @@ fn test_fuzzy_case_sensitive() { ( "FooBar Baz", "FooB", - 0, - 4, + &[0, 1, 2, 3], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, ), // Consecutive bonus updated - ("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD), - ], - ); -} - -#[test] -fn test_fuzzy_case_sensitive_v1() { - assert_matches( - true, - false, - true, - false, - &[ ( - "fooBarbaz1", - "oBz", - 2, - 9, - BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, - ), - ( - "Foo/Bar/Baz", - "FBB", - 0, - 9, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 - - 2 * PENALTY_GAP_START - - 4 * PENALTY_GAP_EXTENSION, - ), - ( - "FooBarBaz", - "FBB", - 0, - 7, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 - - 2 * PENALTY_GAP_START - - 2 * PENALTY_GAP_EXTENSION, - ), - ( - "FooBar Baz", - "FooB", - 0, - 4, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, - ), - // Consecutive bonus updated - ("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD), - ], - ); -} - -#[test] -fn test_v1_fuzzy() { - assert_matches( - true, - false, - false, - false, - &[ - ( - "fooBarbaz1", - "oBZ", - 2, - 9, - BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, - ), - ( - "foo bar baz", - "fbb", - 0, - 9, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2 - - 2 * PENALTY_GAP_START - - 4 * PENALTY_GAP_EXTENSION, - ), - ( - "/AutomatorDocument.icns", - "rdoc", - 9, - 13, - BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2, - ), - ( - "/man1/zshcompctl.1", - "zshc", - 6, - 10, - BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER - + BONUS_BOUNDARY_DELIMITER * 3, - ), - ( - "/.oh-my-zsh/cache", - "zshc", - 8, - 13, - BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2 - - PENALTY_GAP_START - + BONUS_BOUNDARY_DELIMITER, - ), - ( - "ab0123 456", - "12356", - 3, - 10, - BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION, - ), - ( - "abc123 456", - "12356", - 3, - 10, - BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER - + BONUS_CAMEL123 * 2 - + BONUS_CONSECUTIVE - - PENALTY_GAP_START - - PENALTY_GAP_EXTENSION, - ), - ( - "foo/bar/baz", - "fbb", - 0, - 9, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 - - 2 * PENALTY_GAP_START - - 4 * PENALTY_GAP_EXTENSION, - ), - ( - "fooBarBaz", - "fbb", - 0, - 7, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 - - 2 * PENALTY_GAP_START - - 2 * PENALTY_GAP_EXTENSION, - ), - ( - "foo barbaz", - "fbb", - 0, - 8, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE - - PENALTY_GAP_START * 2 - - PENALTY_GAP_EXTENSION * 3, - ), - ( - "fooBar Baz", - "foob", - 0, - 4, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, - ), - ( - "xFoo-Bar Baz", - "foo-b", - 1, - 6, - BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER - + BONUS_CAMEL123 * 2 - + BONUS_NON_WORD - + BONUS_BOUNDARY, + "foo-bar", + "o-ba", + &[2, 3, 4, 5], + BONUS_BOUNDARY * 2 + BONUS_NON_WORD, ), ], ); @@ -424,7 +276,7 @@ fn test_v1_fuzzy() { #[test] fn test_normalize() { assert_matches( - false, + &[FuzzyGreedy, FuzzyOptimal], true, false, false, @@ -432,15 +284,13 @@ fn test_normalize() { ( "Só Danço Samba", "So", - 0, - 2, + &[0, 1], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, ), ( "Só Danço Samba", "sodc", - 0, - 7, + &[0, 1, 3, 6], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE - PENALTY_GAP_START + BONUS_BOUNDARY_WHITE @@ -450,22 +300,19 @@ fn test_normalize() { ( "Danço", "danco", - 0, - 5, + &[0, 1, 2, 3, 4], BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), ), ( "DanÇo", "danco", - 0, - 5, + &[0, 1, 2, 3, 4], BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), ), ( "xÇando", "cando", - 1, - 6, + &[1, 2, 3, 4, 5], BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4), ), ], @@ -473,60 +320,9 @@ fn test_normalize() { } #[test] -fn test_normalize_v1() { +fn test_unicode1() { assert_matches( - true, - true, - false, - false, - &[ - ( - "Só Danço Samba", - "So", - 0, - 2, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, - ), - ( - "Só Danço Samba", - "sodc", - 0, - 7, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE - - PENALTY_GAP_START - + BONUS_BOUNDARY_WHITE - - PENALTY_GAP_START - - PENALTY_GAP_EXTENSION, - ), - ( - "Danço", - "danco", - 0, - 5, - BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), - ), - ( - "DanÇo", - "danco", - 0, - 5, - BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), - ), - ( - "xÇando", - "cando", - 1, - 6, - BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4), - ), - ], - ) -} - -#[test] -fn test_unicode_v1() { - assert_matches( - true, + &[FuzzyGreedy, FuzzyOptimal], true, false, false, @@ -534,41 +330,13 @@ fn test_unicode_v1() { ( "你好世界", "你好", - 0, - 2, + &[0, 1], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, ), ( "你好世界", "你世", - 0, - 3, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, - ), - ], - ) -} - -#[test] -fn test_unicode() { - assert_matches( - false, - true, - false, - false, - &[ - ( - "你好世界", - "你好", - 0, - 2, - BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, - ), - ( - "你好世界", - "你世", - 0, - 3, + &[0, 2], BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, ), ], @@ -578,15 +346,14 @@ fn test_unicode() { #[test] fn test_long_str() { assert_matches( - false, + &[FuzzyGreedy, FuzzyOptimal], false, false, false, &[( &"x".repeat(u16::MAX as usize + 1), "xx", - 0, - 2, + &[0, 1], (BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE, )], ); @@ -595,19 +362,69 @@ fn test_long_str() { #[test] fn test_optimal() { assert_matches( + &[FuzzyOptimal], false, false, false, - false, - &[( - "axxx xx ", - "xx", - 5, - 7, - (BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE, - )], - ) + &[ + ( + "axxx xx ", + "xx", + &[5, 6], + (BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE, + ), + ( + "I\0I", + "\0", + &[1], + BONUS_FIRST_CHAR_MULTIPLIER * BONUS_NON_WORD, + ), + ( + "SS!H", + "S!", + &[0, 2], + BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_NON_WORD + - PENALTY_GAP_START, + ), + ( + "^^^\u{7f}\0\0E%\u{1a}^", + "^^\0E", + &[1, 2, 5, 6], + BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3) + - PENALTY_GAP_START + - PENALTY_GAP_EXTENSION, + ), + ( + "Hٷ!!\0!!!\n\0\0\u{4}\u{c}\0\u{8}\0!\0\0\u{c}", + "\0!\0\0!", + &[4, 5, 9, 10, 16], + BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4) + - 2 * PENALTY_GAP_START + - 6 * PENALTY_GAP_EXTENSION, + ), + ], + ); } +// #[test] +// fn test_greedy() { +// assert_matches( +// &[FuzzyGreedy], +// false, +// false, +// false, +// &[ +// ("SS!H", "S!", &[1, 2], BONUS_NON_WORD), +// ( +// "]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0", +// "H\0\0VV", +// &[4, 5, 6, 31, 32], +// BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123 +// - PENALTY_GAP_START +// - 23 * PENALTY_GAP_EXTENSION, +// ), +// ], +// ); +// } #[test] fn test_reject() { @@ -641,5 +458,7 @@ fn test_reject() { true, false, &[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")], - ) + ); + check!(small_haystack); + assert_not_matches(false, false, false, &[("ۂۂfoۂۂ", "foo")]); } diff --git a/typos.toml b/typos.toml index 63afa46..322202d 100644 --- a/typos.toml +++ b/typos.toml @@ -1,3 +1,3 @@ default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"] [files] -extend-exclude = ["src/tests.rs"] +extend-exclude = ["src/tests.rs", "*.html"]