mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
high test and fuzz coverage, fix lots of bugs
This commit is contained in:
parent
74e2b46f04
commit
8527340bc9
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -2,10 +2,17 @@
|
|||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
version = 3
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cov-mark"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9ffa3d3e0138386cd4361f63537765cac7ee40698028844635a54495a92f67f3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fzf_oxide"
|
name = "fzf_oxide"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"cov-mark",
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -7,3 +7,8 @@ edition = "2021"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
memchr = "2.5.0"
|
memchr = "2.5.0"
|
||||||
|
cov-mark = { version = "1.1.0", default-features = false }
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
cov-mark = { version = "1.1.0", default-features = true }
|
||||||
|
|
||||||
|
@ -1,2 +1,9 @@
|
|||||||
# fzf_oxide
|
# fzf_oxide
|
||||||
|
|
||||||
An optimized rust port of the fzf fuzzy matching algorithm
|
An optimized rust port of the fzf fuzzy matching algorithm
|
||||||
|
|
||||||
|
## TODO:
|
||||||
|
|
||||||
|
* case mismatch penalty
|
||||||
|
* substring/prefix/postfix/exact matcher
|
||||||
|
* high level API (worker thread, query parsing, sorting)
|
||||||
|
3
fuzz.sh
Executable file
3
fuzz.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
cargo +nightly fuzz "${1}" fuzz_target_1 "${@:2:99}"
|
29
fuzz/Cargo.toml
Normal file
29
fuzz/Cargo.toml
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
[package]
|
||||||
|
name = "fzf_oxide-fuzz"
|
||||||
|
version = "0.0.0"
|
||||||
|
publish = false
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[package.metadata]
|
||||||
|
cargo-fuzz = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
libfuzzer-sys = "0.4"
|
||||||
|
arbitrary = { version = "1", features = ["derive"] }
|
||||||
|
|
||||||
|
[dependencies.fzf_oxide]
|
||||||
|
path = ".."
|
||||||
|
|
||||||
|
# Prevent this from interfering with workspaces
|
||||||
|
[workspace]
|
||||||
|
members = ["."]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
debug = 1
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "fuzz_target_1"
|
||||||
|
path = "fuzz_targets/fuzz_target_1.rs"
|
||||||
|
test = false
|
||||||
|
doc = false
|
||||||
|
|
78
fuzz/fuzz_targets/fuzz_target_1.rs
Normal file
78
fuzz/fuzz_targets/fuzz_target_1.rs
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#![no_main]
|
||||||
|
|
||||||
|
use fzf_oxide::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||||
|
use libfuzzer_sys::arbitrary::Arbitrary;
|
||||||
|
use libfuzzer_sys::fuzz_target;
|
||||||
|
|
||||||
|
#[derive(Arbitrary, Debug)]
|
||||||
|
pub struct Input<'a> {
|
||||||
|
haystack: &'a str,
|
||||||
|
needle: &'a str,
|
||||||
|
ignore_case: bool,
|
||||||
|
normalize: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fuzz_target!(|data: Input<'_>| {
|
||||||
|
let mut data = data;
|
||||||
|
let mut config = MatcherConfig::DEFAULT;
|
||||||
|
config.ignore_case = data.ignore_case;
|
||||||
|
config.normalize = data.normalize;
|
||||||
|
let mut matcher = Matcher::new(config);
|
||||||
|
let mut indices_optimal = Vec::new();
|
||||||
|
let mut indices_greedy = Vec::new();
|
||||||
|
let mut needle_buf = Vec::new();
|
||||||
|
let mut haystack_buf = Vec::new();
|
||||||
|
let normalize = |mut c: char| {
|
||||||
|
if config.normalize {
|
||||||
|
c = chars::normalize(c);
|
||||||
|
}
|
||||||
|
if config.ignore_case {
|
||||||
|
c = chars::to_lower_case(c);
|
||||||
|
}
|
||||||
|
c
|
||||||
|
};
|
||||||
|
let needle: String = data.needle.chars().map(normalize).collect();
|
||||||
|
let needle_chars: Vec<_> = needle.chars().collect();
|
||||||
|
let needle = Utf32Str::new(&needle, &mut needle_buf);
|
||||||
|
let haystack = Utf32Str::new(data.haystack, &mut haystack_buf);
|
||||||
|
|
||||||
|
let greedy_score = matcher.fuzzy_indices_greedy(haystack, needle, &mut indices_greedy);
|
||||||
|
if greedy_score.is_some() {
|
||||||
|
let match_chars: Vec<_> = indices_greedy
|
||||||
|
.iter()
|
||||||
|
.map(|&i| normalize(haystack.get(i)))
|
||||||
|
.collect();
|
||||||
|
assert_eq!(
|
||||||
|
match_chars, needle_chars,
|
||||||
|
"failed match, found {indices_greedy:?} {match_chars:?} (greedy)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let optimal_score = matcher.fuzzy_indices(haystack, needle, &mut indices_optimal);
|
||||||
|
if optimal_score.is_some() {
|
||||||
|
let match_chars: Vec<_> = indices_optimal
|
||||||
|
.iter()
|
||||||
|
.map(|&i| normalize(haystack.get(i)))
|
||||||
|
.collect();
|
||||||
|
assert_eq!(
|
||||||
|
match_chars, needle_chars,
|
||||||
|
"failed match, found {indices_optimal:?} {match_chars:?}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
match (greedy_score, optimal_score) {
|
||||||
|
(None, Some(score)) => unreachable!("optimal matched {score} but greedy did not match"),
|
||||||
|
(Some(score), None) => unreachable!("greedy matched {score} but optimal did not match"),
|
||||||
|
(Some(greedy), Some(optimal)) => {
|
||||||
|
assert!(
|
||||||
|
greedy <= optimal,
|
||||||
|
"optimal score must be atleast the same as greedy score {greedy} {optimal}"
|
||||||
|
);
|
||||||
|
if indices_greedy == indices_optimal {
|
||||||
|
assert_eq!(
|
||||||
|
greedy, optimal,
|
||||||
|
"if matching same char greedy and optimal score should be identical"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(None, None) => (),
|
||||||
|
}
|
||||||
|
});
|
@ -9,7 +9,7 @@ use crate::MatcherConfig;
|
|||||||
mod case_fold;
|
mod case_fold;
|
||||||
mod normalize;
|
mod normalize;
|
||||||
|
|
||||||
pub trait Char: Copy + Eq + Ord + fmt::Display {
|
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
||||||
const ASCII: bool;
|
const ASCII: bool;
|
||||||
fn char_class(self, config: &MatcherConfig) -> CharClass;
|
fn char_class(self, config: &MatcherConfig) -> CharClass;
|
||||||
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
|
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
|
||||||
|
@ -7,11 +7,7 @@
|
|||||||
// ucd-generate 0.3.0 is available on crates.io.
|
// ucd-generate 0.3.0 is available on crates.io.
|
||||||
|
|
||||||
pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[
|
pub const CASE_FOLDING_SIMPLE: &'static [(char, char)] = &[
|
||||||
('A', 'a'), ('B', 'b'), ('C', 'c'), ('D', 'd'), ('E', 'e'), ('F', 'f'),
|
('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
|
||||||
('G', 'g'), ('H', 'h'), ('I', 'i'), ('J', 'j'), ('K', 'k'), ('L', 'l'),
|
|
||||||
('M', 'm'), ('N', 'n'), ('O', 'o'), ('P', 'p'), ('Q', 'q'), ('R', 'r'),
|
|
||||||
('S', 's'), ('T', 't'), ('U', 'u'), ('V', 'v'), ('W', 'w'), ('X', 'x'),
|
|
||||||
('Y', 'y'), ('Z', 'z'), ('µ', 'μ'), ('À', 'à'), ('Á', 'á'),
|
|
||||||
('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'),
|
('Â', 'â'), ('Ã', 'ã'), ('Ä', 'ä'), ('Å', 'å'), ('Æ', 'æ'),
|
||||||
('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'),
|
('Ç', 'ç'), ('È', 'è'), ('É', 'é'), ('Ê', 'ê'), ('Ë', 'ë'),
|
||||||
('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'),
|
('Ì', 'ì'), ('Í', 'í'), ('Î', 'î'), ('Ï', 'ï'), ('Ð', 'ð'),
|
||||||
|
@ -2,6 +2,7 @@ use crate::chars::CharClass;
|
|||||||
use crate::score::BONUS_BOUNDARY;
|
use crate::score::BONUS_BOUNDARY;
|
||||||
|
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
|
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||||
pub struct MatcherConfig {
|
pub struct MatcherConfig {
|
||||||
pub delimiter_chars: &'static [u8],
|
pub delimiter_chars: &'static [u8],
|
||||||
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
||||||
@ -18,14 +19,6 @@ pub struct MatcherConfig {
|
|||||||
pub ignore_case: bool,
|
pub ignore_case: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
// #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
|
||||||
// #[non_exhaustive]
|
|
||||||
// pub enum CaseMatching {
|
|
||||||
// Respect,
|
|
||||||
// Ignore,
|
|
||||||
// Smart,
|
|
||||||
// }
|
|
||||||
|
|
||||||
impl MatcherConfig {
|
impl MatcherConfig {
|
||||||
pub const DEFAULT: Self = {
|
pub const DEFAULT: Self = {
|
||||||
MatcherConfig {
|
MatcherConfig {
|
||||||
|
@ -37,6 +37,7 @@ impl Matcher {
|
|||||||
let mut needle_iter = needle.iter().rev().copied();
|
let mut needle_iter = needle.iter().rev().copied();
|
||||||
let mut needle_char = needle_iter.next().unwrap();
|
let mut needle_char = needle_iter.next().unwrap();
|
||||||
for (i, &c) in haystack[start..end].iter().enumerate().rev() {
|
for (i, &c) in haystack[start..end].iter().enumerate().rev() {
|
||||||
|
let c = c.normalize(&self.config);
|
||||||
if c == needle_char {
|
if c == needle_char {
|
||||||
let Some(next_needle_char) = needle_iter.next() else {
|
let Some(next_needle_char) = needle_iter.next() else {
|
||||||
start += i;
|
start += i;
|
||||||
|
@ -19,7 +19,6 @@ impl Matcher {
|
|||||||
end: usize,
|
end: usize,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
println!("{start} {end}");
|
|
||||||
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
||||||
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
||||||
// us to treat needle indices as u16
|
// us to treat needle indices as u16
|
||||||
@ -40,10 +39,12 @@ impl Matcher {
|
|||||||
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
|
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
|
||||||
// this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
|
// this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
|
||||||
if !matched {
|
if !matched {
|
||||||
|
debug_assert!(!(H::ASCII && N::ASCII));
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if needle.len() == 1 {
|
if needle.len() == 1 {
|
||||||
indices.push(max_score_pos as u32);
|
indices.clear();
|
||||||
|
indices.push(max_score_pos as u32 + start as u32);
|
||||||
return Some(max_score);
|
return Some(max_score);
|
||||||
}
|
}
|
||||||
debug_assert_eq!(
|
debug_assert_eq!(
|
||||||
@ -112,27 +113,35 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
matched = true;
|
matched = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if c == first_needle_char {
|
|
||||||
let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
// we calculate two scores:
|
||||||
|
// * one for transversing the matrix horizontially (no match at
|
||||||
|
// the current char)
|
||||||
|
// * one for transversing the matrix diagonally (match at the
|
||||||
|
// current char)
|
||||||
|
// the maximum of those two scores is used
|
||||||
|
let gap_penalty = if in_gap {
|
||||||
|
PENALTY_GAP_EXTENSION
|
||||||
|
} else {
|
||||||
|
PENALTY_GAP_START
|
||||||
|
};
|
||||||
|
let score_gap = prev_score.saturating_sub(gap_penalty);
|
||||||
|
let score_match = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||||
|
if c == first_needle_char && score_match >= score_gap {
|
||||||
matrix_cell.consecutive_chars = 1;
|
matrix_cell.consecutive_chars = 1;
|
||||||
if needle.len() == 1 && score > max_score {
|
matrix_cell.score = score_match;
|
||||||
max_score = score;
|
in_gap = false;
|
||||||
|
if needle.len() == 1 && score_match > max_score {
|
||||||
|
max_score = score_match;
|
||||||
max_score_pos = i;
|
max_score_pos = i;
|
||||||
// can't get better than this
|
// can't get better than this
|
||||||
if bonus >= BONUS_BOUNDARY {
|
if bonus >= BONUS_BOUNDARY {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
matrix_cell.score = score;
|
|
||||||
in_gap = false;
|
|
||||||
} else {
|
} else {
|
||||||
let gap_penalty = if in_gap {
|
|
||||||
PENALTY_GAP_EXTENSION
|
|
||||||
} else {
|
|
||||||
PENALTY_GAP_START
|
|
||||||
};
|
|
||||||
matrix_cell.score = prev_score.saturating_sub(gap_penalty);
|
|
||||||
matrix_cell.consecutive_chars = 0;
|
matrix_cell.consecutive_chars = 0;
|
||||||
|
matrix_cell.score = score_gap;
|
||||||
in_gap = true;
|
in_gap = true;
|
||||||
}
|
}
|
||||||
prev_score = matrix_cell.score;
|
prev_score = matrix_cell.score;
|
||||||
@ -186,7 +195,7 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
// current char)
|
// current char)
|
||||||
// the maximum of those two scores is used
|
// the maximum of those two scores is used
|
||||||
let mut score_diag = 0;
|
let mut score_diag = 0;
|
||||||
let score_hory = prev_matrix_cell.score.saturating_sub(gap_penalty);
|
let score_hor = prev_matrix_cell.score.saturating_sub(gap_penalty);
|
||||||
|
|
||||||
let mut consecutive = 0;
|
let mut consecutive = 0;
|
||||||
if haystack_char.char == needle_char {
|
if haystack_char.char == needle_char {
|
||||||
@ -206,15 +215,17 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
bonus = max(first_bonus, BONUS_CONSECUTIVE)
|
bonus = max(first_bonus, BONUS_CONSECUTIVE)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if score_diag + bonus < score_hory {
|
if score_diag + bonus < score_hor
|
||||||
|
|| (consecutive == 1 && score_diag + bonus == score_hor)
|
||||||
|
{
|
||||||
score_diag += haystack_char.bonus;
|
score_diag += haystack_char.bonus;
|
||||||
consecutive = 0;
|
consecutive = 0;
|
||||||
} else {
|
} else {
|
||||||
score_diag += bonus;
|
score_diag += bonus;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
in_gap = score_diag < score_hory;
|
in_gap = consecutive == 0;
|
||||||
let score = max(score_diag, score_hory);
|
let score = max(score_diag, score_hor);
|
||||||
if i == needle.len() - 1 && score > max_score {
|
if i == needle.len() - 1 && score > max_score {
|
||||||
max_score = score;
|
max_score = score;
|
||||||
max_score_end = col as u16;
|
max_score_end = col as u16;
|
||||||
@ -235,6 +246,7 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
best_match_end: u16,
|
best_match_end: u16,
|
||||||
) {
|
) {
|
||||||
|
indices.clear();
|
||||||
indices.resize(needle.len(), 0);
|
indices.resize(needle.len(), 0);
|
||||||
|
|
||||||
let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable();
|
let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable();
|
||||||
@ -255,22 +267,22 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
let mut score_diag = 0;
|
let mut score_diag = 0;
|
||||||
let mut score_horz = 0;
|
let mut score_horz = 0;
|
||||||
if let Some(&(prev_row, _)) = row_iter.peek() {
|
if let Some(&(prev_row, _)) = row_iter.peek() {
|
||||||
if col >= prev_row.off {
|
score_diag = prev_row[col - 1].score;
|
||||||
score_diag = prev_row[col].score;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if col > row.off {
|
if col > row.off {
|
||||||
score_horz = row[col - 1].score;
|
score_horz = row[col - 1].score;
|
||||||
}
|
}
|
||||||
let mut new_prefer_match = row[col].consecutive_chars > 1;
|
let mut in_block = row[col].consecutive_chars > 1;
|
||||||
if !new_prefer_match && col + 1 < haystack_len {
|
if !in_block && col + 1 < haystack_len {
|
||||||
if let Some(next_row) = next_row {
|
if let Some(next_row) = next_row {
|
||||||
if col + 1 >= next_row.off {
|
if col + 1 >= next_row.off {
|
||||||
new_prefer_match = next_row[col + 1].consecutive_chars > 0
|
in_block = next_row[col + 1].consecutive_chars > 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if score > score_diag && (score > score_horz || score == score_horz && prefer_match) {
|
if score > score_diag
|
||||||
|
&& (score > score_horz || in_block || prefer_match && score == score_horz)
|
||||||
|
{
|
||||||
*matched_col_idx = col as u32 + start;
|
*matched_col_idx = col as u32 + start;
|
||||||
next_row = Some(row);
|
next_row = Some(row);
|
||||||
let Some(next) = row_iter.next() else {
|
let Some(next) = row_iter.next() else {
|
||||||
@ -278,8 +290,8 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
};
|
};
|
||||||
(row, matched_col_idx) = next
|
(row, matched_col_idx) = next
|
||||||
}
|
}
|
||||||
prefer_match = new_prefer_match;
|
|
||||||
col -= 1;
|
col -= 1;
|
||||||
|
prefer_match = row[col].consecutive_chars != 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
// sadly ranges don't optmimzie well
|
// sadly ranges don't optmimzie well
|
||||||
#![allow(clippy::manual_range_contains)]
|
#![allow(clippy::manual_range_contains)]
|
||||||
|
|
||||||
mod chars;
|
pub mod chars;
|
||||||
mod config;
|
mod config;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod debug;
|
mod debug;
|
||||||
@ -15,11 +15,11 @@ mod utf32_str;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
pub use config::MatcherConfig;
|
pub use crate::config::MatcherConfig;
|
||||||
|
pub use crate::utf32_str::Utf32Str;
|
||||||
|
|
||||||
use crate::chars::AsciiChar;
|
use crate::chars::AsciiChar;
|
||||||
use crate::matrix::MatrixSlab;
|
use crate::matrix::MatrixSlab;
|
||||||
use crate::utf32_str::Utf32Str;
|
|
||||||
|
|
||||||
pub struct Matcher {
|
pub struct Matcher {
|
||||||
pub config: MatcherConfig,
|
pub config: MatcherConfig,
|
||||||
@ -131,7 +131,7 @@ impl Matcher {
|
|||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indidies: &mut Vec<u32>,
|
indidies: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack.len() {
|
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
// if needle_.len() == haystack.len() {
|
// if needle_.len() == haystack.len() {
|
||||||
|
@ -6,7 +6,7 @@ use crate::Matcher;
|
|||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
|
fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
|
||||||
if c >= b'a' || c <= b'z' {
|
if c >= b'a' && c <= b'z' {
|
||||||
memchr2(c, c - 32, haystack)
|
memchr2(c, c - 32, haystack)
|
||||||
} else {
|
} else {
|
||||||
memchr(c, haystack)
|
memchr(c, haystack)
|
||||||
@ -15,7 +15,7 @@ fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
|
|||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
|
fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
|
||||||
if c >= b'a' || c <= b'z' {
|
if c >= b'a' && c <= b'z' {
|
||||||
memrchr2(c, c - 32, haystack)
|
memrchr2(c, c - 32, haystack)
|
||||||
} else {
|
} else {
|
||||||
memrchr(c, haystack)
|
memrchr(c, haystack)
|
||||||
@ -84,6 +84,11 @@ impl Matcher {
|
|||||||
.iter()
|
.iter()
|
||||||
.rev()
|
.rev()
|
||||||
.position(|c| c.normalize(&self.config) == needle_char)?;
|
.position(|c| c.normalize(&self.config) == needle_char)?;
|
||||||
|
// matches are never possible in this case
|
||||||
|
if end - start < needle.len() {
|
||||||
|
cov_mark::hit!(small_haystack);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
Some((start, end))
|
Some((start, end))
|
||||||
}
|
}
|
||||||
|
@ -103,8 +103,7 @@ impl Matcher {
|
|||||||
needle_char = *needle_iter.next().unwrap_or(&needle_char);
|
needle_char = *needle_iter.next().unwrap_or(&needle_char);
|
||||||
|
|
||||||
for (i, c) in haystack[start + 1..end].iter().enumerate() {
|
for (i, c) in haystack[start + 1..end].iter().enumerate() {
|
||||||
let class = c.char_class(&self.config);
|
let (c, class) = c.char_class_and_normalize(&self.config);
|
||||||
let c = c.normalize(&self.config);
|
|
||||||
if c == needle_char {
|
if c == needle_char {
|
||||||
if INDICES {
|
if INDICES {
|
||||||
indices.push(i as u32 + start as u32 + 1)
|
indices.push(i as u32 + start as u32 + 1)
|
||||||
|
449
src/tests.rs
449
src/tests.rs
@ -1,3 +1,5 @@
|
|||||||
|
use cov_mark::check;
|
||||||
|
|
||||||
use crate::chars::Char;
|
use crate::chars::Char;
|
||||||
use crate::score::{
|
use crate::score::{
|
||||||
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
|
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
|
||||||
@ -6,12 +8,20 @@ use crate::score::{
|
|||||||
use crate::utf32_str::Utf32Str;
|
use crate::utf32_str::Utf32Str;
|
||||||
use crate::{Matcher, MatcherConfig};
|
use crate::{Matcher, MatcherConfig};
|
||||||
|
|
||||||
pub fn assert_matches(
|
use Algorithm::*;
|
||||||
use_v1: bool,
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Algorithm {
|
||||||
|
FuzzyOptimal,
|
||||||
|
FuzzyGreedy,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn assert_matches(
|
||||||
|
algorithm: &[Algorithm],
|
||||||
normalize: bool,
|
normalize: bool,
|
||||||
case_sensitive: bool,
|
case_sensitive: bool,
|
||||||
path: bool,
|
path: bool,
|
||||||
cases: &[(&str, &str, u32, u32, u16)],
|
cases: &[(&str, &str, &[u32], u16)],
|
||||||
) {
|
) {
|
||||||
let mut config = MatcherConfig {
|
let mut config = MatcherConfig {
|
||||||
normalize,
|
normalize,
|
||||||
@ -22,10 +32,10 @@ pub fn assert_matches(
|
|||||||
config.set_match_paths();
|
config.set_match_paths();
|
||||||
}
|
}
|
||||||
let mut matcher = Matcher::new(config);
|
let mut matcher = Matcher::new(config);
|
||||||
let mut indices = Vec::new();
|
let mut matched_indices = Vec::new();
|
||||||
let mut needle_buf = Vec::new();
|
let mut needle_buf = Vec::new();
|
||||||
let mut haystack_buf = Vec::new();
|
let mut haystack_buf = Vec::new();
|
||||||
for &(haystack, needle, start, end, mut score) in cases {
|
for &(haystack, needle, indices, mut score) in cases {
|
||||||
let needle = if !case_sensitive {
|
let needle = if !case_sensitive {
|
||||||
needle.to_lowercase()
|
needle.to_lowercase()
|
||||||
} else {
|
} else {
|
||||||
@ -34,13 +44,18 @@ pub fn assert_matches(
|
|||||||
let needle = Utf32Str::new(&needle, &mut needle_buf);
|
let needle = Utf32Str::new(&needle, &mut needle_buf);
|
||||||
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
||||||
score += needle.len() as u16 * SCORE_MATCH;
|
score += needle.len() as u16 * SCORE_MATCH;
|
||||||
|
for algo in algorithm {
|
||||||
let res = if use_v1 {
|
println!("xx {matched_indices:?} {algo:?}");
|
||||||
matcher.fuzzy_indices_greedy(haystack, needle, &mut indices)
|
let res = match algo {
|
||||||
} else {
|
Algorithm::FuzzyOptimal => {
|
||||||
matcher.fuzzy_indices(haystack, needle, &mut indices)
|
matcher.fuzzy_indices(haystack, needle, &mut matched_indices)
|
||||||
|
}
|
||||||
|
Algorithm::FuzzyGreedy => {
|
||||||
|
matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let match_chars: Vec<_> = indices
|
println!("{matched_indices:?}");
|
||||||
|
let match_chars: Vec<_> = matched_indices
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&i| haystack.get(i).normalize(&matcher.config))
|
.map(|&i| haystack.get(i).normalize(&matcher.config))
|
||||||
.collect();
|
.collect();
|
||||||
@ -49,18 +64,18 @@ pub fn assert_matches(
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
res,
|
res,
|
||||||
Some(score),
|
Some(score),
|
||||||
"{needle:?} did not match {haystack:?}: matched {match_chars:?} {indices:?}"
|
"{needle:?} did not match {haystack:?}: matched {match_chars:?} {matched_indices:?} {algo:?}"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
matched_indices, indices,
|
||||||
|
"{needle:?} match {haystack:?} {algo:?}"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
match_chars, needle_chars,
|
match_chars, needle_chars,
|
||||||
"match indices are incorrect {indices:?}"
|
"{needle:?} match {haystack:?} indices are incorrect {matched_indices:?} {algo:?}"
|
||||||
);
|
|
||||||
assert_eq!(
|
|
||||||
indices.first().copied()..indices.last().map(|&i| i + 1),
|
|
||||||
Some(start)..Some(end),
|
|
||||||
"{needle:?} match {haystack:?}"
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assert_not_matches(
|
pub fn assert_not_matches(
|
||||||
@ -104,7 +119,7 @@ const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_deli
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_fuzzy() {
|
fn test_fuzzy() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
false,
|
&[FuzzyGreedy, FuzzyOptimal],
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
@ -112,15 +127,13 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"fooBarbaz1",
|
"fooBarbaz1",
|
||||||
"oBZ",
|
"oBZ",
|
||||||
2,
|
&[2, 3, 8],
|
||||||
9,
|
|
||||||
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"foo bar baz",
|
"foo bar baz",
|
||||||
"fbb",
|
"fbb",
|
||||||
0,
|
&[0, 4, 8],
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
|
||||||
- 2 * PENALTY_GAP_START
|
- 2 * PENALTY_GAP_START
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
- 4 * PENALTY_GAP_EXTENSION,
|
||||||
@ -128,23 +141,20 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"/AutomatorDocument.icns",
|
"/AutomatorDocument.icns",
|
||||||
"rdoc",
|
"rdoc",
|
||||||
9,
|
&[9, 10, 11, 12],
|
||||||
13,
|
|
||||||
BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
|
BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"/man1/zshcompctl.1",
|
"/man1/zshcompctl.1",
|
||||||
"zshc",
|
"zshc",
|
||||||
6,
|
&[6, 7, 8, 9],
|
||||||
10,
|
|
||||||
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
|
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
|
||||||
+ BONUS_BOUNDARY_DELIMITER * 3,
|
+ BONUS_BOUNDARY_DELIMITER * 3,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"/.oh-my-zsh/cache",
|
"/.oh-my-zsh/cache",
|
||||||
"zshc",
|
"zshc",
|
||||||
8,
|
&[8, 9, 10, 12],
|
||||||
13,
|
|
||||||
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
|
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
|
||||||
- PENALTY_GAP_START
|
- PENALTY_GAP_START
|
||||||
+ BONUS_BOUNDARY_DELIMITER,
|
+ BONUS_BOUNDARY_DELIMITER,
|
||||||
@ -152,15 +162,13 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"ab0123 456",
|
"ab0123 456",
|
||||||
"12356",
|
"12356",
|
||||||
3,
|
&[3, 4, 5, 8, 9],
|
||||||
10,
|
|
||||||
BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
|
BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"abc123 456",
|
"abc123 456",
|
||||||
"12356",
|
"12356",
|
||||||
3,
|
&[3, 4, 5, 8, 9],
|
||||||
10,
|
|
||||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
||||||
+ BONUS_CAMEL123 * 2
|
+ BONUS_CAMEL123 * 2
|
||||||
+ BONUS_CONSECUTIVE
|
+ BONUS_CONSECUTIVE
|
||||||
@ -170,8 +178,7 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"foo/bar/baz",
|
"foo/bar/baz",
|
||||||
"fbb",
|
"fbb",
|
||||||
0,
|
&[0, 4, 8],
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
||||||
- 2 * PENALTY_GAP_START
|
- 2 * PENALTY_GAP_START
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
- 4 * PENALTY_GAP_EXTENSION,
|
||||||
@ -179,8 +186,7 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"fooBarBaz",
|
"fooBarBaz",
|
||||||
"fbb",
|
"fbb",
|
||||||
0,
|
&[0, 3, 6],
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
||||||
- 2 * PENALTY_GAP_START
|
- 2 * PENALTY_GAP_START
|
||||||
- 2 * PENALTY_GAP_EXTENSION,
|
- 2 * PENALTY_GAP_EXTENSION,
|
||||||
@ -188,8 +194,7 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"foo barbaz",
|
"foo barbaz",
|
||||||
"fbb",
|
"fbb",
|
||||||
0,
|
&[0, 4, 7],
|
||||||
8,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
||||||
- PENALTY_GAP_START * 2
|
- PENALTY_GAP_START * 2
|
||||||
- PENALTY_GAP_EXTENSION * 3,
|
- PENALTY_GAP_EXTENSION * 3,
|
||||||
@ -197,20 +202,26 @@ fn test_fuzzy() {
|
|||||||
(
|
(
|
||||||
"fooBar Baz",
|
"fooBar Baz",
|
||||||
"foob",
|
"foob",
|
||||||
0,
|
&[0, 1, 2, 3],
|
||||||
4,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"xFoo-Bar Baz",
|
"xFoo-Bar Baz",
|
||||||
"foo-b",
|
"foo-b",
|
||||||
1,
|
&[1, 2, 3, 4, 5],
|
||||||
6,
|
|
||||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
||||||
+ BONUS_CAMEL123 * 2
|
+ BONUS_CAMEL123 * 2
|
||||||
+ BONUS_NON_WORD
|
+ BONUS_NON_WORD
|
||||||
+ BONUS_BOUNDARY,
|
+ BONUS_BOUNDARY,
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
|
||||||
|
"H\0\0VV",
|
||||||
|
&[4, 5, 6, 31, 32],
|
||||||
|
BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
- 23 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -218,7 +229,7 @@ fn test_fuzzy() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_fuzzy_case_sensitive() {
|
fn test_fuzzy_case_sensitive() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
false,
|
&[FuzzyGreedy, FuzzyOptimal],
|
||||||
false,
|
false,
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
@ -226,15 +237,13 @@ fn test_fuzzy_case_sensitive() {
|
|||||||
(
|
(
|
||||||
"fooBarbaz1",
|
"fooBarbaz1",
|
||||||
"oBz",
|
"oBz",
|
||||||
2,
|
&[2, 3, 8],
|
||||||
9,
|
|
||||||
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"Foo/Bar/Baz",
|
"Foo/Bar/Baz",
|
||||||
"FBB",
|
"FBB",
|
||||||
0,
|
&[0, 4, 8],
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
||||||
- 2 * PENALTY_GAP_START
|
- 2 * PENALTY_GAP_START
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
- 4 * PENALTY_GAP_EXTENSION,
|
||||||
@ -242,8 +251,7 @@ fn test_fuzzy_case_sensitive() {
|
|||||||
(
|
(
|
||||||
"FooBarBaz",
|
"FooBarBaz",
|
||||||
"FBB",
|
"FBB",
|
||||||
0,
|
&[0, 3, 6],
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
||||||
- 2 * PENALTY_GAP_START
|
- 2 * PENALTY_GAP_START
|
||||||
- 2 * PENALTY_GAP_EXTENSION,
|
- 2 * PENALTY_GAP_EXTENSION,
|
||||||
@ -251,171 +259,15 @@ fn test_fuzzy_case_sensitive() {
|
|||||||
(
|
(
|
||||||
"FooBar Baz",
|
"FooBar Baz",
|
||||||
"FooB",
|
"FooB",
|
||||||
0,
|
&[0, 1, 2, 3],
|
||||||
4,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||||
),
|
),
|
||||||
// Consecutive bonus updated
|
// Consecutive bonus updated
|
||||||
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_fuzzy_case_sensitive_v1() {
|
|
||||||
assert_matches(
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
&[
|
|
||||||
(
|
(
|
||||||
"fooBarbaz1",
|
"foo-bar",
|
||||||
"oBz",
|
"o-ba",
|
||||||
2,
|
&[2, 3, 4, 5],
|
||||||
9,
|
BONUS_BOUNDARY * 2 + BONUS_NON_WORD,
|
||||||
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Foo/Bar/Baz",
|
|
||||||
"FBB",
|
|
||||||
0,
|
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
|
||||||
- 2 * PENALTY_GAP_START
|
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"FooBarBaz",
|
|
||||||
"FBB",
|
|
||||||
0,
|
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
|
||||||
- 2 * PENALTY_GAP_START
|
|
||||||
- 2 * PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"FooBar Baz",
|
|
||||||
"FooB",
|
|
||||||
0,
|
|
||||||
4,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
|
||||||
),
|
|
||||||
// Consecutive bonus updated
|
|
||||||
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
|
|
||||||
],
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_v1_fuzzy() {
|
|
||||||
assert_matches(
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
&[
|
|
||||||
(
|
|
||||||
"fooBarbaz1",
|
|
||||||
"oBZ",
|
|
||||||
2,
|
|
||||||
9,
|
|
||||||
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"foo bar baz",
|
|
||||||
"fbb",
|
|
||||||
0,
|
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
|
|
||||||
- 2 * PENALTY_GAP_START
|
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"/AutomatorDocument.icns",
|
|
||||||
"rdoc",
|
|
||||||
9,
|
|
||||||
13,
|
|
||||||
BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"/man1/zshcompctl.1",
|
|
||||||
"zshc",
|
|
||||||
6,
|
|
||||||
10,
|
|
||||||
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
|
|
||||||
+ BONUS_BOUNDARY_DELIMITER * 3,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"/.oh-my-zsh/cache",
|
|
||||||
"zshc",
|
|
||||||
8,
|
|
||||||
13,
|
|
||||||
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
|
|
||||||
- PENALTY_GAP_START
|
|
||||||
+ BONUS_BOUNDARY_DELIMITER,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"ab0123 456",
|
|
||||||
"12356",
|
|
||||||
3,
|
|
||||||
10,
|
|
||||||
BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"abc123 456",
|
|
||||||
"12356",
|
|
||||||
3,
|
|
||||||
10,
|
|
||||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
|
||||||
+ BONUS_CAMEL123 * 2
|
|
||||||
+ BONUS_CONSECUTIVE
|
|
||||||
- PENALTY_GAP_START
|
|
||||||
- PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"foo/bar/baz",
|
|
||||||
"fbb",
|
|
||||||
0,
|
|
||||||
9,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
|
||||||
- 2 * PENALTY_GAP_START
|
|
||||||
- 4 * PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"fooBarBaz",
|
|
||||||
"fbb",
|
|
||||||
0,
|
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
|
||||||
- 2 * PENALTY_GAP_START
|
|
||||||
- 2 * PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"foo barbaz",
|
|
||||||
"fbb",
|
|
||||||
0,
|
|
||||||
8,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
|
||||||
- PENALTY_GAP_START * 2
|
|
||||||
- PENALTY_GAP_EXTENSION * 3,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"fooBar Baz",
|
|
||||||
"foob",
|
|
||||||
0,
|
|
||||||
4,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"xFoo-Bar Baz",
|
|
||||||
"foo-b",
|
|
||||||
1,
|
|
||||||
6,
|
|
||||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
|
||||||
+ BONUS_CAMEL123 * 2
|
|
||||||
+ BONUS_NON_WORD
|
|
||||||
+ BONUS_BOUNDARY,
|
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
);
|
);
|
||||||
@ -424,7 +276,7 @@ fn test_v1_fuzzy() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_normalize() {
|
fn test_normalize() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
false,
|
&[FuzzyGreedy, FuzzyOptimal],
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
@ -432,15 +284,13 @@ fn test_normalize() {
|
|||||||
(
|
(
|
||||||
"Só Danço Samba",
|
"Só Danço Samba",
|
||||||
"So",
|
"So",
|
||||||
0,
|
&[0, 1],
|
||||||
2,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"Só Danço Samba",
|
"Só Danço Samba",
|
||||||
"sodc",
|
"sodc",
|
||||||
0,
|
&[0, 1, 3, 6],
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
||||||
- PENALTY_GAP_START
|
- PENALTY_GAP_START
|
||||||
+ BONUS_BOUNDARY_WHITE
|
+ BONUS_BOUNDARY_WHITE
|
||||||
@ -450,22 +300,19 @@ fn test_normalize() {
|
|||||||
(
|
(
|
||||||
"Danço",
|
"Danço",
|
||||||
"danco",
|
"danco",
|
||||||
0,
|
&[0, 1, 2, 3, 4],
|
||||||
5,
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"DanÇo",
|
"DanÇo",
|
||||||
"danco",
|
"danco",
|
||||||
0,
|
&[0, 1, 2, 3, 4],
|
||||||
5,
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"xÇando",
|
"xÇando",
|
||||||
"cando",
|
"cando",
|
||||||
1,
|
&[1, 2, 3, 4, 5],
|
||||||
6,
|
|
||||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@ -473,60 +320,9 @@ fn test_normalize() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_normalize_v1() {
|
fn test_unicode1() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
true,
|
&[FuzzyGreedy, FuzzyOptimal],
|
||||||
true,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
&[
|
|
||||||
(
|
|
||||||
"Só Danço Samba",
|
|
||||||
"So",
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Só Danço Samba",
|
|
||||||
"sodc",
|
|
||||||
0,
|
|
||||||
7,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
|
||||||
- PENALTY_GAP_START
|
|
||||||
+ BONUS_BOUNDARY_WHITE
|
|
||||||
- PENALTY_GAP_START
|
|
||||||
- PENALTY_GAP_EXTENSION,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Danço",
|
|
||||||
"danco",
|
|
||||||
0,
|
|
||||||
5,
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"DanÇo",
|
|
||||||
"danco",
|
|
||||||
0,
|
|
||||||
5,
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"xÇando",
|
|
||||||
"cando",
|
|
||||||
1,
|
|
||||||
6,
|
|
||||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_unicode_v1() {
|
|
||||||
assert_matches(
|
|
||||||
true,
|
|
||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
@ -534,41 +330,13 @@ fn test_unicode_v1() {
|
|||||||
(
|
(
|
||||||
"你好世界",
|
"你好世界",
|
||||||
"你好",
|
"你好",
|
||||||
0,
|
&[0, 1],
|
||||||
2,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"你好世界",
|
"你好世界",
|
||||||
"你世",
|
"你世",
|
||||||
0,
|
&[0, 2],
|
||||||
3,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
|
||||||
),
|
|
||||||
],
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_unicode() {
|
|
||||||
assert_matches(
|
|
||||||
false,
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
&[
|
|
||||||
(
|
|
||||||
"你好世界",
|
|
||||||
"你好",
|
|
||||||
0,
|
|
||||||
2,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"你好世界",
|
|
||||||
"你世",
|
|
||||||
0,
|
|
||||||
3,
|
|
||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
@ -578,15 +346,14 @@ fn test_unicode() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_long_str() {
|
fn test_long_str() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
false,
|
&[FuzzyGreedy, FuzzyOptimal],
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
&[(
|
&[(
|
||||||
&"x".repeat(u16::MAX as usize + 1),
|
&"x".repeat(u16::MAX as usize + 1),
|
||||||
"xx",
|
"xx",
|
||||||
0,
|
&[0, 1],
|
||||||
2,
|
|
||||||
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||||
)],
|
)],
|
||||||
);
|
);
|
||||||
@ -595,19 +362,69 @@ fn test_long_str() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_optimal() {
|
fn test_optimal() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
|
&[FuzzyOptimal],
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
false,
|
&[
|
||||||
&[(
|
(
|
||||||
"axxx xx ",
|
"axxx xx ",
|
||||||
"xx",
|
"xx",
|
||||||
5,
|
&[5, 6],
|
||||||
7,
|
|
||||||
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||||
)],
|
),
|
||||||
)
|
(
|
||||||
|
"I\0I",
|
||||||
|
"\0",
|
||||||
|
&[1],
|
||||||
|
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_NON_WORD,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"SS!H",
|
||||||
|
"S!",
|
||||||
|
&[0, 2],
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_NON_WORD
|
||||||
|
- PENALTY_GAP_START,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"^^^\u{7f}\0\0E%\u{1a}^",
|
||||||
|
"^^\0E",
|
||||||
|
&[1, 2, 5, 6],
|
||||||
|
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
- PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Hٷ!!\0!!!\n\0\0\u{4}\u{c}\0\u{8}\0!\0\0\u{c}",
|
||||||
|
"\0!\0\0!",
|
||||||
|
&[4, 5, 9, 10, 16],
|
||||||
|
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4)
|
||||||
|
- 2 * PENALTY_GAP_START
|
||||||
|
- 6 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
// #[test]
|
||||||
|
// fn test_greedy() {
|
||||||
|
// assert_matches(
|
||||||
|
// &[FuzzyGreedy],
|
||||||
|
// false,
|
||||||
|
// false,
|
||||||
|
// false,
|
||||||
|
// &[
|
||||||
|
// ("SS!H", "S!", &[1, 2], BONUS_NON_WORD),
|
||||||
|
// (
|
||||||
|
// "]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
|
||||||
|
// "H\0\0VV",
|
||||||
|
// &[4, 5, 6, 31, 32],
|
||||||
|
// BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
|
||||||
|
// - PENALTY_GAP_START
|
||||||
|
// - 23 * PENALTY_GAP_EXTENSION,
|
||||||
|
// ),
|
||||||
|
// ],
|
||||||
|
// );
|
||||||
|
// }
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_reject() {
|
fn test_reject() {
|
||||||
@ -641,5 +458,7 @@ fn test_reject() {
|
|||||||
true,
|
true,
|
||||||
false,
|
false,
|
||||||
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
|
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
|
||||||
)
|
);
|
||||||
|
check!(small_haystack);
|
||||||
|
assert_not_matches(false, false, false, &[("ۂۂfoۂۂ", "foo")]);
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||||
[files]
|
[files]
|
||||||
extend-exclude = ["src/tests.rs"]
|
extend-exclude = ["src/tests.rs", "*.html"]
|
||||||
|
Loading…
Reference in New Issue
Block a user