high test and fuzz coverage, fix lots of bugs

This commit is contained in:
Pascal Kuthe 2023-07-22 03:37:15 +02:00
parent 74e2b46f04
commit 8527340bc9
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
16 changed files with 333 additions and 379 deletions

7
Cargo.lock generated
View File

@ -2,10 +2,17 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "cov-mark"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ffa3d3e0138386cd4361f63537765cac7ee40698028844635a54495a92f67f3"
[[package]] [[package]]
name = "fzf_oxide" name = "fzf_oxide"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"cov-mark",
"memchr", "memchr",
] ]

View File

@ -7,3 +7,8 @@ edition = "2021"
[dependencies] [dependencies]
memchr = "2.5.0" memchr = "2.5.0"
cov-mark = { version = "1.1.0", default-features = false }
[dev-dependencies]
cov-mark = { version = "1.1.0", default-features = true }

View File

@ -1,2 +1,9 @@
# fzf_oxide # fzf_oxide
An optimized rust port of the fzf fuzzy matching algorithm An optimized rust port of the fzf fuzzy matching algorithm
## TODO:
* case mismatch penalty
* substring/prefix/postfix/exact matcher
* high level API (worker thread, query parsing, sorting)

3
fuzz.sh Executable file
View File

@ -0,0 +1,3 @@
#!/usr/bin/env bash
cargo +nightly fuzz "${1}" fuzz_target_1 "${@:2:99}"

29
fuzz/Cargo.toml Normal file
View File

@ -0,0 +1,29 @@
[package]
name = "fzf_oxide-fuzz"
version = "0.0.0"
publish = false
edition = "2021"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
arbitrary = { version = "1", features = ["derive"] }
[dependencies.fzf_oxide]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = 1
[[bin]]
name = "fuzz_target_1"
path = "fuzz_targets/fuzz_target_1.rs"
test = false
doc = false

View File

@ -0,0 +1,78 @@
#![no_main]
use fzf_oxide::{chars, Matcher, MatcherConfig, Utf32Str};
use libfuzzer_sys::arbitrary::Arbitrary;
use libfuzzer_sys::fuzz_target;
#[derive(Arbitrary, Debug)]
pub struct Input<'a> {
haystack: &'a str,
needle: &'a str,
ignore_case: bool,
normalize: bool,
}
fuzz_target!(|data: Input<'_>| {
let mut data = data;
let mut config = MatcherConfig::DEFAULT;
config.ignore_case = data.ignore_case;
config.normalize = data.normalize;
let mut matcher = Matcher::new(config);
let mut indices_optimal = Vec::new();
let mut indices_greedy = Vec::new();
let mut needle_buf = Vec::new();
let mut haystack_buf = Vec::new();
let normalize = |mut c: char| {
if config.normalize {
c = chars::normalize(c);
}
if config.ignore_case {
c = chars::to_lower_case(c);
}
c
};
let needle: String = data.needle.chars().map(normalize).collect();
let needle_chars: Vec<_> = needle.chars().collect();
let needle = Utf32Str::new(&needle, &mut needle_buf);
let haystack = Utf32Str::new(data.haystack, &mut haystack_buf);
let greedy_score = matcher.fuzzy_indices_greedy(haystack, needle, &mut indices_greedy);
if greedy_score.is_some() {
let match_chars: Vec<_> = indices_greedy
.iter()
.map(|&i| normalize(haystack.get(i)))
.collect();
assert_eq!(
match_chars, needle_chars,
"failed match, found {indices_greedy:?} {match_chars:?} (greedy)"
);
}
let optimal_score = matcher.fuzzy_indices(haystack, needle, &mut indices_optimal);
if optimal_score.is_some() {
let match_chars: Vec<_> = indices_optimal
.iter()
.map(|&i| normalize(haystack.get(i)))
.collect();
assert_eq!(
match_chars, needle_chars,
"failed match, found {indices_optimal:?} {match_chars:?}"
);
}
match (greedy_score, optimal_score) {
(None, Some(score)) => unreachable!("optimal matched {score} but greedy did not match"),
(Some(score), None) => unreachable!("greedy matched {score} but optimal did not match"),
(Some(greedy), Some(optimal)) => {
assert!(
greedy <= optimal,
"optimal score must be atleast the same as greedy score {greedy} {optimal}"
);
if indices_greedy == indices_optimal {
assert_eq!(
greedy, optimal,
"if matching same char greedy and optimal score should be identical"
)
}
}
(None, None) => (),
}
});

View File

@ -9,7 +9,7 @@ use crate::MatcherConfig;
mod case_fold; mod case_fold;
mod normalize; mod normalize;
pub trait Char: Copy + Eq + Ord + fmt::Display { pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
const ASCII: bool; const ASCII: bool;
fn char_class(self, config: &MatcherConfig) -> CharClass; fn char_class(self, config: &MatcherConfig) -> CharClass;
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass); fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);

View File

@ -7,11 +7,7 @@
// ucd-generate 0.3.0 is available on crates.io. // ucd-generate 0.3.0 is available on crates.io.
pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[ pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[
('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'),
('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'),
('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'),
('Y', 'y'), ('Z', 'z'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'), ('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'),
('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'), ('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'),
('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'), ('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'),

View File

@ -2,6 +2,7 @@ use crate::chars::CharClass;
use crate::score::BONUS_BOUNDARY; use crate::score::BONUS_BOUNDARY;
#[non_exhaustive] #[non_exhaustive]
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
pub struct MatcherConfig { pub struct MatcherConfig {
pub delimiter_chars: &'static [u8], pub delimiter_chars: &'static [u8],
/// Extra bonus for word boundary after whitespace character or beginning of the string /// Extra bonus for word boundary after whitespace character or beginning of the string
@ -18,14 +19,6 @@ pub struct MatcherConfig {
pub ignore_case: bool, pub ignore_case: bool,
} }
// #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
// #[non_exhaustive]
// pub enum CaseMatching {
// Respect,
// Ignore,
// Smart,
// }
impl MatcherConfig { impl MatcherConfig {
pub const DEFAULT: Self = { pub const DEFAULT: Self = {
MatcherConfig { MatcherConfig {

View File

@ -37,6 +37,7 @@ impl Matcher {
let mut needle_iter = needle.iter().rev().copied(); let mut needle_iter = needle.iter().rev().copied();
let mut needle_char = needle_iter.next().unwrap(); let mut needle_char = needle_iter.next().unwrap();
for (i, &c) in haystack[start..end].iter().enumerate().rev() { for (i, &c) in haystack[start..end].iter().enumerate().rev() {
let c = c.normalize(&self.config);
if c == needle_char { if c == needle_char {
let Some(next_needle_char) = needle_iter.next() else { let Some(next_needle_char) = needle_iter.next() else {
start += i; start += i;

View File

@ -19,7 +19,6 @@ impl Matcher {
end: usize, end: usize,
indices: &mut Vec<u32>, indices: &mut Vec<u32>,
) -> Option<u16> { ) -> Option<u16> {
println!("{start} {end}");
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded // construct a matrix (and copy the haystack), the matrix and haystack size are bounded
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows // to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
// us to treat needle indices as u16 // us to treat needle indices as u16
@ -40,10 +39,12 @@ impl Matcher {
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config); let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
// this only happened with unicode haystacks, for ASCII the prefilter handles all rejects // this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
if !matched { if !matched {
debug_assert!(!(H::ASCII && N::ASCII));
return None; return None;
} }
if needle.len() == 1 { if needle.len() == 1 {
indices.push(max_score_pos as u32); indices.clear();
indices.push(max_score_pos as u32 + start as u32);
return Some(max_score); return Some(max_score);
} }
debug_assert_eq!( debug_assert_eq!(
@ -112,27 +113,35 @@ impl<H: Char> Matrix<'_, H> {
matched = true; matched = true;
} }
} }
if c == first_needle_char {
let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER; // we calculate two scores:
// * one for transversing the matrix horizontially (no match at
// the current char)
// * one for transversing the matrix diagonally (match at the
// current char)
// the maximum of those two scores is used
let gap_penalty = if in_gap {
PENALTY_GAP_EXTENSION
} else {
PENALTY_GAP_START
};
let score_gap = prev_score.saturating_sub(gap_penalty);
let score_match = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
if c == first_needle_char && score_match >= score_gap {
matrix_cell.consecutive_chars = 1; matrix_cell.consecutive_chars = 1;
if needle.len() == 1 && score > max_score { matrix_cell.score = score_match;
max_score = score; in_gap = false;
if needle.len() == 1 && score_match > max_score {
max_score = score_match;
max_score_pos = i; max_score_pos = i;
// can't get better than this // can't get better than this
if bonus >= BONUS_BOUNDARY { if bonus >= BONUS_BOUNDARY {
break; break;
} }
} }
matrix_cell.score = score;
in_gap = false;
} else { } else {
let gap_penalty = if in_gap {
PENALTY_GAP_EXTENSION
} else {
PENALTY_GAP_START
};
matrix_cell.score = prev_score.saturating_sub(gap_penalty);
matrix_cell.consecutive_chars = 0; matrix_cell.consecutive_chars = 0;
matrix_cell.score = score_gap;
in_gap = true; in_gap = true;
} }
prev_score = matrix_cell.score; prev_score = matrix_cell.score;
@ -186,7 +195,7 @@ impl<H: Char> Matrix<'_, H> {
// current char) // current char)
// the maximum of those two scores is used // the maximum of those two scores is used
let mut score_diag = 0; let mut score_diag = 0;
let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty); let score_hor = prev_matrix_cell.score.saturating_sub(gap_penalty);
let mut consecutive = 0; let mut consecutive = 0;
if haystack_char.char == needle_char { if haystack_char.char == needle_char {
@ -206,15 +215,17 @@ impl<H: Char> Matrix<'_, H> {
bonus = max(first_bonus, BONUS_CONSECUTIVE) bonus = max(first_bonus, BONUS_CONSECUTIVE)
} }
} }
if score_diag + bonus < score_hory { if score_diag + bonus < score_hor
|| (consecutive == 1 && score_diag + bonus == score_hor)
{
score_diag += haystack_char.bonus; score_diag += haystack_char.bonus;
consecutive = 0; consecutive = 0;
} else { } else {
score_diag += bonus; score_diag += bonus;
} }
} }
in_gap = score_diag < score_hory; in_gap = consecutive == 0;
let score = max(score_diag, score_hory); let score = max(score_diag, score_hor);
if i == needle.len() - 1 && score > max_score { if i == needle.len() - 1 && score > max_score {
max_score = score; max_score = score;
max_score_end = col as u16; max_score_end = col as u16;
@ -235,6 +246,7 @@ impl<H: Char> Matrix<'_, H> {
indices: &mut Vec<u32>, indices: &mut Vec<u32>,
best_match_end: u16, best_match_end: u16,
) { ) {
indices.clear();
indices.resize(needle.len(), 0); indices.resize(needle.len(), 0);
let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable(); let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable();
@ -255,22 +267,22 @@ impl<H: Char> Matrix<'_, H> {
let mut score_diag = 0; let mut score_diag = 0;
let mut score_horz = 0; let mut score_horz = 0;
if let Some(&(prev_row, _)) = row_iter.peek() { if let Some(&(prev_row, _)) = row_iter.peek() {
if col >= prev_row.off { score_diag = prev_row[col - 1].score;
score_diag = prev_row[col].score;
}
} }
if col > row.off { if col > row.off {
score_horz = row[col - 1].score; score_horz = row[col - 1].score;
} }
let mut new_prefer_match = row[col].consecutive_chars > 1; let mut in_block = row[col].consecutive_chars > 1;
if !new_prefer_match && col + 1 < haystack_len { if !in_block && col + 1 < haystack_len {
if let Some(next_row) = next_row { if let Some(next_row) = next_row {
if col + 1 >= next_row.off { if col + 1 >= next_row.off {
new_prefer_match = next_row[col + 1].consecutive_chars > 0 in_block = next_row[col + 1].consecutive_chars > 1
} }
} }
} }
if score > score_diag && (score > score_horz || score == score_horz && prefer_match) { if score > score_diag
&& (score > score_horz || in_block || prefer_match && score == score_horz)
{
*matched_col_idx = col as u32 + start; *matched_col_idx = col as u32 + start;
next_row = Some(row); next_row = Some(row);
let Some(next) = row_iter.next() else { let Some(next) = row_iter.next() else {
@ -278,8 +290,8 @@ impl<H: Char> Matrix<'_, H> {
}; };
(row, matched_col_idx) = next (row, matched_col_idx) = next
} }
prefer_match = new_prefer_match;
col -= 1; col -= 1;
prefer_match = row[col].consecutive_chars != 0;
} }
} }
} }

View File

@ -1,7 +1,7 @@
// sadly ranges don't optmimzie well // sadly ranges don't optmimzie well
#![allow(clippy::manual_range_contains)] #![allow(clippy::manual_range_contains)]
mod chars; pub mod chars;
mod config; mod config;
#[cfg(test)] #[cfg(test)]
mod debug; mod debug;
@ -15,11 +15,11 @@ mod utf32_str;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub use config::MatcherConfig; pub use crate::config::MatcherConfig;
pub use crate::utf32_str::Utf32Str;
use crate::chars::AsciiChar; use crate::chars::AsciiChar;
use crate::matrix::MatrixSlab; use crate::matrix::MatrixSlab;
use crate::utf32_str::Utf32Str;
pub struct Matcher { pub struct Matcher {
pub config: MatcherConfig, pub config: MatcherConfig,
@ -131,7 +131,7 @@ impl Matcher {
needle_: Utf32Str<'_>, needle_: Utf32Str<'_>,
indidies: &mut Vec<u32>, indidies: &mut Vec<u32>,
) -> Option<u16> { ) -> Option<u16> {
if needle_.len() > haystack.len() { if needle_.len() > haystack.len() || needle_.is_empty() {
return None; return None;
} }
// if needle_.len() == haystack.len() { // if needle_.len() == haystack.len() {

View File

@ -6,7 +6,7 @@ use crate::Matcher;
#[inline(always)] #[inline(always)]
fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> { fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
if c >= b'a' || c <= b'z' { if c >= b'a' && c <= b'z' {
memchr2(c, c - 32, haystack) memchr2(c, c - 32, haystack)
} else { } else {
memchr(c, haystack) memchr(c, haystack)
@ -15,7 +15,7 @@ fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
#[inline(always)] #[inline(always)]
fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> { fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
if c >= b'a' || c <= b'z' { if c >= b'a' && c <= b'z' {
memrchr2(c, c - 32, haystack) memrchr2(c, c - 32, haystack)
} else { } else {
memrchr(c, haystack) memrchr(c, haystack)
@ -84,6 +84,11 @@ impl Matcher {
.iter() .iter()
.rev() .rev()
.position(|c| c.normalize(&self.config) == needle_char)?; .position(|c| c.normalize(&self.config) == needle_char)?;
// matches are never possible in this case
if end - start < needle.len() {
cov_mark::hit!(small_haystack);
return None;
}
Some((start, end)) Some((start, end))
} }

View File

@ -103,8 +103,7 @@ impl Matcher {
needle_char = *needle_iter.next().unwrap_or(&needle_char); needle_char = *needle_iter.next().unwrap_or(&needle_char);
for (i, c) in haystack[start + 1..end].iter().enumerate() { for (i, c) in haystack[start + 1..end].iter().enumerate() {
let class = c.char_class(&self.config); let (c, class) = c.char_class_and_normalize(&self.config);
let c = c.normalize(&self.config);
if c == needle_char { if c == needle_char {
if INDICES { if INDICES {
indices.push(i as u32 + start as u32 + 1) indices.push(i as u32 + start as u32 + 1)

View File

@ -1,3 +1,5 @@
use cov_mark::check;
use crate::chars::Char; use crate::chars::Char;
use crate::score::{ use crate::score::{
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD, BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
@ -6,12 +8,20 @@ use crate::score::{
use crate::utf32_str::Utf32Str; use crate::utf32_str::Utf32Str;
use crate::{Matcher, MatcherConfig}; use crate::{Matcher, MatcherConfig};
pub fn assert_matches( use Algorithm::*;
use_v1: bool,
#[derive(Debug)]
enum Algorithm {
FuzzyOptimal,
FuzzyGreedy,
}
fn assert_matches(
algorithm: &[Algorithm],
normalize: bool, normalize: bool,
case_sensitive: bool, case_sensitive: bool,
path: bool, path: bool,
cases: &[(&str, &str, u32, u32, u16)], cases: &[(&str, &str, &[u32], u16)],
) { ) {
let mut config = MatcherConfig { let mut config = MatcherConfig {
normalize, normalize,
@ -22,10 +32,10 @@ pub fn assert_matches(
config.set_match_paths(); config.set_match_paths();
} }
let mut matcher = Matcher::new(config); let mut matcher = Matcher::new(config);
let mut indices = Vec::new(); let mut matched_indices = Vec::new();
let mut needle_buf = Vec::new(); let mut needle_buf = Vec::new();
let mut haystack_buf = Vec::new(); let mut haystack_buf = Vec::new();
for &(haystack, needle, start, end, mut score) in cases { for &(haystack, needle, indices, mut score) in cases {
let needle = if !case_sensitive { let needle = if !case_sensitive {
needle.to_lowercase() needle.to_lowercase()
} else { } else {
@ -34,13 +44,18 @@ pub fn assert_matches(
let needle = Utf32Str::new(&needle, &mut needle_buf); let needle = Utf32Str::new(&needle, &mut needle_buf);
let haystack = Utf32Str::new(haystack, &mut haystack_buf); let haystack = Utf32Str::new(haystack, &mut haystack_buf);
score += needle.len() as u16 * SCORE_MATCH; score += needle.len() as u16 * SCORE_MATCH;
for algo in algorithm {
let res = if use_v1 { println!("xx {matched_indices:?} {algo:?}");
matcher.fuzzy_indices_greedy(haystack, needle, &mut indices) let res = match algo {
} else { Algorithm::FuzzyOptimal => {
matcher.fuzzy_indices(haystack, needle, &mut indices) matcher.fuzzy_indices(haystack, needle, &mut matched_indices)
}
Algorithm::FuzzyGreedy => {
matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices)
}
}; };
let match_chars: Vec<_> = indices println!("{matched_indices:?}");
let match_chars: Vec<_> = matched_indices
.iter() .iter()
.map(|&i| haystack.get(i).normalize(&matcher.config)) .map(|&i| haystack.get(i).normalize(&matcher.config))
.collect(); .collect();
@ -49,19 +64,19 @@ pub fn assert_matches(
assert_eq!( assert_eq!(
res, res,
Some(score), Some(score),
"{needle:?} did not match {haystack:?}: matched {match_chars:?} {indices:?}" "{needle:?} did not match {haystack:?}: matched {match_chars:?} {matched_indices:?} {algo:?}"
);
assert_eq!(
matched_indices, indices,
"{needle:?} match {haystack:?} {algo:?}"
); );
assert_eq!( assert_eq!(
match_chars, needle_chars, match_chars, needle_chars,
"match indices are incorrect {indices:?}" "{needle:?} match {haystack:?} indices are incorrect {matched_indices:?} {algo:?}"
);
assert_eq!(
indices.first().copied()..indices.last().map(|&i| i + 1),
Some(start)..Some(end),
"{needle:?} match {haystack:?}"
); );
} }
} }
}
pub fn assert_not_matches( pub fn assert_not_matches(
normalize: bool, normalize: bool,
@ -104,7 +119,7 @@ const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_deli
#[test] #[test]
fn test_fuzzy() { fn test_fuzzy() {
assert_matches( assert_matches(
false, &[FuzzyGreedy, FuzzyOptimal],
false, false,
false, false,
false, false,
@ -112,15 +127,13 @@ fn test_fuzzy() {
( (
"fooBarbaz1", "fooBarbaz1",
"oBZ", "oBZ",
2, &[2, 3, 8],
9,
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
), ),
( (
"foo bar baz", "foo bar baz",
"fbb", "fbb",
0, &[0, 4, 8],
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2 BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
- 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION, - 4 * PENALTY_GAP_EXTENSION,
@ -128,23 +141,20 @@ fn test_fuzzy() {
( (
"/AutomatorDocument.icns", "/AutomatorDocument.icns",
"rdoc", "rdoc",
9, &[9, 10, 11, 12],
13,
BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2, BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
), ),
( (
"/man1/zshcompctl.1", "/man1/zshcompctl.1",
"zshc", "zshc",
6, &[6, 7, 8, 9],
10,
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_BOUNDARY_DELIMITER * 3, + BONUS_BOUNDARY_DELIMITER * 3,
), ),
( (
"/.oh-my-zsh/cache", "/.oh-my-zsh/cache",
"zshc", "zshc",
8, &[8, 9, 10, 12],
13,
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2 BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
- PENALTY_GAP_START - PENALTY_GAP_START
+ BONUS_BOUNDARY_DELIMITER, + BONUS_BOUNDARY_DELIMITER,
@ -152,15 +162,13 @@ fn test_fuzzy() {
( (
"ab0123 456", "ab0123 456",
"12356", "12356",
3, &[3, 4, 5, 8, 9],
10,
BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION, BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
), ),
( (
"abc123 456", "abc123 456",
"12356", "12356",
3, &[3, 4, 5, 8, 9],
10,
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2 + BONUS_CAMEL123 * 2
+ BONUS_CONSECUTIVE + BONUS_CONSECUTIVE
@ -170,8 +178,7 @@ fn test_fuzzy() {
( (
"foo/bar/baz", "foo/bar/baz",
"fbb", "fbb",
0, &[0, 4, 8],
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
- 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION, - 4 * PENALTY_GAP_EXTENSION,
@ -179,8 +186,7 @@ fn test_fuzzy() {
( (
"fooBarBaz", "fooBarBaz",
"fbb", "fbb",
0, &[0, 3, 6],
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
- 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION, - 2 * PENALTY_GAP_EXTENSION,
@ -188,8 +194,7 @@ fn test_fuzzy() {
( (
"foo barbaz", "foo barbaz",
"fbb", "fbb",
0, &[0, 4, 7],
8,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START * 2 - PENALTY_GAP_START * 2
- PENALTY_GAP_EXTENSION * 3, - PENALTY_GAP_EXTENSION * 3,
@ -197,20 +202,26 @@ fn test_fuzzy() {
( (
"fooBar Baz", "fooBar Baz",
"foob", "foob",
0, &[0, 1, 2, 3],
4,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
), ),
( (
"xFoo-Bar Baz", "xFoo-Bar Baz",
"foo-b", "foo-b",
1, &[1, 2, 3, 4, 5],
6,
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2 + BONUS_CAMEL123 * 2
+ BONUS_NON_WORD + BONUS_NON_WORD
+ BONUS_BOUNDARY, + BONUS_BOUNDARY,
), ),
(
"]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
"H\0\0VV",
&[4, 5, 6, 31, 32],
BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
- PENALTY_GAP_START
- 23 * PENALTY_GAP_EXTENSION,
),
], ],
); );
} }
@ -218,7 +229,7 @@ fn test_fuzzy() {
#[test] #[test]
fn test_fuzzy_case_sensitive() { fn test_fuzzy_case_sensitive() {
assert_matches( assert_matches(
false, &[FuzzyGreedy, FuzzyOptimal],
false, false,
true, true,
false, false,
@ -226,15 +237,13 @@ fn test_fuzzy_case_sensitive() {
( (
"fooBarbaz1", "fooBarbaz1",
"oBz", "oBz",
2, &[2, 3, 8],
9,
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3, BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
), ),
( (
"Foo/Bar/Baz", "Foo/Bar/Baz",
"FBB", "FBB",
0, &[0, 4, 8],
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2 BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
- 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION, - 4 * PENALTY_GAP_EXTENSION,
@ -242,8 +251,7 @@ fn test_fuzzy_case_sensitive() {
( (
"FooBarBaz", "FooBarBaz",
"FBB", "FBB",
0, &[0, 3, 6],
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2 BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
- 2 * PENALTY_GAP_START - 2 * PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION, - 2 * PENALTY_GAP_EXTENSION,
@ -251,171 +259,15 @@ fn test_fuzzy_case_sensitive() {
( (
"FooBar Baz", "FooBar Baz",
"FooB", "FooB",
0, &[0, 1, 2, 3],
4,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
), ),
// Consecutive bonus updated // Consecutive bonus updated
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
],
);
}
#[test]
fn test_fuzzy_case_sensitive_v1() {
assert_matches(
true,
false,
true,
false,
&[
( (
"fooBarbaz1", "foo-bar",
"oBz", "o-ba",
2, &[2, 3, 4, 5],
9, BONUS_BOUNDARY * 2 + BONUS_NON_WORD,
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
),
(
"Foo/Bar/Baz",
"FBB",
0,
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
- 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION,
),
(
"FooBarBaz",
"FBB",
0,
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
- 2 * PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION,
),
(
"FooBar Baz",
"FooB",
0,
4,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
),
// Consecutive bonus updated
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
],
);
}
#[test]
fn test_v1_fuzzy() {
assert_matches(
true,
false,
false,
false,
&[
(
"fooBarbaz1",
"oBZ",
2,
9,
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
),
(
"foo bar baz",
"fbb",
0,
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
- 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION,
),
(
"/AutomatorDocument.icns",
"rdoc",
9,
13,
BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
),
(
"/man1/zshcompctl.1",
"zshc",
6,
10,
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_BOUNDARY_DELIMITER * 3,
),
(
"/.oh-my-zsh/cache",
"zshc",
8,
13,
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
- PENALTY_GAP_START
+ BONUS_BOUNDARY_DELIMITER,
),
(
"ab0123 456",
"12356",
3,
10,
BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
),
(
"abc123 456",
"12356",
3,
10,
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2
+ BONUS_CONSECUTIVE
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
),
(
"foo/bar/baz",
"fbb",
0,
9,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
- 2 * PENALTY_GAP_START
- 4 * PENALTY_GAP_EXTENSION,
),
(
"fooBarBaz",
"fbb",
0,
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
- 2 * PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION,
),
(
"foo barbaz",
"fbb",
0,
8,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START * 2
- PENALTY_GAP_EXTENSION * 3,
),
(
"fooBar Baz",
"foob",
0,
4,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
),
(
"xFoo-Bar Baz",
"foo-b",
1,
6,
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2
+ BONUS_NON_WORD
+ BONUS_BOUNDARY,
), ),
], ],
); );
@ -424,7 +276,7 @@ fn test_v1_fuzzy() {
#[test] #[test]
fn test_normalize() { fn test_normalize() {
assert_matches( assert_matches(
false, &[FuzzyGreedy, FuzzyOptimal],
true, true,
false, false,
false, false,
@ -432,15 +284,13 @@ fn test_normalize() {
( (
"Só Danço Samba", "Só Danço Samba",
"So", "So",
0, &[0, 1],
2,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
), ),
( (
"Só Danço Samba", "Só Danço Samba",
"sodc", "sodc",
0, &[0, 1, 3, 6],
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START - PENALTY_GAP_START
+ BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY_WHITE
@ -450,22 +300,19 @@ fn test_normalize() {
( (
"Danço", "Danço",
"danco", "danco",
0, &[0, 1, 2, 3, 4],
5,
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
), ),
( (
"DanÇo", "DanÇo",
"danco", "danco",
0, &[0, 1, 2, 3, 4],
5,
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4), BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
), ),
( (
"xÇando", "xÇando",
"cando", "cando",
1, &[1, 2, 3, 4, 5],
6,
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4), BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
), ),
], ],
@ -473,60 +320,9 @@ fn test_normalize() {
} }
#[test] #[test]
fn test_normalize_v1() { fn test_unicode1() {
assert_matches( assert_matches(
true, &[FuzzyGreedy, FuzzyOptimal],
true,
false,
false,
&[
(
"Só Danço Samba",
"So",
0,
2,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
),
(
"Só Danço Samba",
"sodc",
0,
7,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START
+ BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
),
(
"Danço",
"danco",
0,
5,
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
),
(
"DanÇo",
"danco",
0,
5,
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
),
(
"xÇando",
"cando",
1,
6,
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
),
],
)
}
#[test]
fn test_unicode_v1() {
assert_matches(
true,
true, true,
false, false,
false, false,
@ -534,41 +330,13 @@ fn test_unicode_v1() {
( (
"你好世界", "你好世界",
"你好", "你好",
0, &[0, 1],
2,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
), ),
( (
"你好世界", "你好世界",
"你世", "你世",
0, &[0, 2],
3,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
),
],
)
}
#[test]
fn test_unicode() {
assert_matches(
false,
true,
false,
false,
&[
(
"你好世界",
"你好",
0,
2,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
),
(
"你好世界",
"你世",
0,
3,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
), ),
], ],
@ -578,15 +346,14 @@ fn test_unicode() {
#[test] #[test]
fn test_long_str() { fn test_long_str() {
assert_matches( assert_matches(
false, &[FuzzyGreedy, FuzzyOptimal],
false, false,
false, false,
false, false,
&[( &[(
&"x".repeat(u16::MAX as usize + 1), &"x".repeat(u16::MAX as usize + 1),
"xx", "xx",
0, &[0, 1],
2,
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE, (BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
)], )],
); );
@ -595,19 +362,69 @@ fn test_long_str() {
#[test] #[test]
fn test_optimal() { fn test_optimal() {
assert_matches( assert_matches(
&[FuzzyOptimal],
false, false,
false, false,
false, false,
false, &[
&[( (
"axxx xx ", "axxx xx ",
"xx", "xx",
5, &[5, 6],
7,
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE, (BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
)], ),
) (
"I\0I",
"\0",
&[1],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_NON_WORD,
),
(
"SS!H",
"S!",
&[0, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_NON_WORD
- PENALTY_GAP_START,
),
(
"^^^\u{7f}\0\0E%\u{1a}^",
"^^\0E",
&[1, 2, 5, 6],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
),
(
"Hٷ!!\0!!!\n\0\0\u{4}\u{c}\0\u{8}\0!\0\0\u{c}",
"\0!\0\0!",
&[4, 5, 9, 10, 16],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4)
- 2 * PENALTY_GAP_START
- 6 * PENALTY_GAP_EXTENSION,
),
],
);
} }
// #[test]
// fn test_greedy() {
// assert_matches(
// &[FuzzyGreedy],
// false,
// false,
// false,
// &[
// ("SS!H", "S!", &[1, 2], BONUS_NON_WORD),
// (
// "]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
// "H\0\0VV",
// &[4, 5, 6, 31, 32],
// BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
// - PENALTY_GAP_START
// - 23 * PENALTY_GAP_EXTENSION,
// ),
// ],
// );
// }
#[test] #[test]
fn test_reject() { fn test_reject() {
@ -641,5 +458,7 @@ fn test_reject() {
true, true,
false, false,
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")], &[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
) );
check!(small_haystack);
assert_not_matches(false, false, false, &[("ۂۂfoۂۂ", "foo")]);
} }

View File

@ -1,3 +1,3 @@
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"] default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
[files] [files]
extend-exclude = ["src/tests.rs"] extend-exclude = ["src/tests.rs", "*.html"]