mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
fix remaining bugs, achive high coverage
This commit is contained in:
parent
9ffa5e63c2
commit
74e2b46f04
13
src/chars.rs
13
src/chars.rs
@ -9,7 +9,7 @@ use crate::MatcherConfig;
|
|||||||
mod case_fold;
|
mod case_fold;
|
||||||
mod normalize;
|
mod normalize;
|
||||||
|
|
||||||
pub trait Char: Copy + Eq + Ord + fmt::Debug + fmt::Display {
|
pub trait Char: Copy + Eq + Ord + fmt::Display {
|
||||||
const ASCII: bool;
|
const ASCII: bool;
|
||||||
fn char_class(self, config: &MatcherConfig) -> CharClass;
|
fn char_class(self, config: &MatcherConfig) -> CharClass;
|
||||||
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
|
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
|
||||||
@ -27,23 +27,12 @@ impl AsciiChar {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for AsciiChar {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
Debug::fmt(&(self.0 as char), f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for AsciiChar {
|
impl fmt::Display for AsciiChar {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
Display::fmt(&(self.0 as char), f)
|
Display::fmt(&(self.0 as char), f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq<char> for AsciiChar {
|
|
||||||
fn eq(&self, other: &char) -> bool {
|
|
||||||
self.0 as char == *other
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl PartialEq<AsciiChar> for char {
|
impl PartialEq<AsciiChar> for char {
|
||||||
fn eq(&self, other: &AsciiChar) -> bool {
|
fn eq(&self, other: &AsciiChar) -> bool {
|
||||||
other.0 as char == *self
|
other.0 as char == *self
|
||||||
|
@ -495,7 +495,7 @@ static TABLE3: [char; LEN3] = generate_table(&DATA3);
|
|||||||
|
|
||||||
pub fn normalize(c: char) -> char {
|
pub fn normalize(c: char) -> char {
|
||||||
let i = c as u32;
|
let i = c as u32;
|
||||||
if i < DATA1_START || DATA3_END >= i {
|
if i < DATA1_START || i >= DATA3_END {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
if i < DATA1_END {
|
if i < DATA1_END {
|
||||||
|
69
src/debug.rs
Normal file
69
src/debug.rs
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
use crate::chars::Char;
|
||||||
|
use crate::matrix::{haystack, HaystackChar, Matrix, MatrixCell, MatrixRow, MatrixRowMut};
|
||||||
|
use std::fmt::{Debug, Formatter, Result};
|
||||||
|
|
||||||
|
impl<C: Char> Matrix<'_, C> {
|
||||||
|
pub fn rows(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator + Clone + Sized {
|
||||||
|
let mut cells = &*self.cells;
|
||||||
|
self.row_offs.iter().map(move |&off| {
|
||||||
|
let len = self.haystack.len() - off as usize;
|
||||||
|
let (row, tmp) = cells.split_at(len);
|
||||||
|
cells = tmp;
|
||||||
|
MatrixRow { off, cells: row }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn haystack(
|
||||||
|
&self,
|
||||||
|
) -> impl Iterator<Item = HaystackChar<C>> + ExactSizeIterator + '_ + Clone {
|
||||||
|
haystack(self.haystack, self.bonus, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for MatrixCell {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
write!(f, "({}, {})", self.score, self.consecutive_chars)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<C: Char> Debug for HaystackChar<C> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
write!(f, "({}, {})", self.char, self.bonus)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Debug for MatrixRow<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
let mut f = f.debug_list();
|
||||||
|
f.entries((0..self.off).map(|_| &MatrixCell {
|
||||||
|
score: 0,
|
||||||
|
consecutive_chars: 0,
|
||||||
|
}));
|
||||||
|
f.entries(self.cells.iter());
|
||||||
|
f.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Debug for MatrixRowMut<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
let mut f = f.debug_list();
|
||||||
|
f.entries((0..self.off).map(|_| &(0, 0)));
|
||||||
|
f.entries(self.cells.iter());
|
||||||
|
f.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub struct DebugList<I>(I);
|
||||||
|
impl<I> Debug for DebugList<I>
|
||||||
|
where
|
||||||
|
I: Iterator + Clone,
|
||||||
|
I::Item: Debug,
|
||||||
|
{
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
f.debug_list().entries(self.0.clone()).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a, C: Char> Debug for Matrix<'a, C> {
|
||||||
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
|
f.debug_struct("Matrix")
|
||||||
|
.field("haystack", &DebugList(self.haystack()))
|
||||||
|
.field("matrix", &DebugList(self.rows()))
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
@ -12,22 +12,27 @@ impl Matcher {
|
|||||||
mut end: usize,
|
mut end: usize,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
let first_char_end = if H::ASCII { start + 1 } else { end };
|
let first_char_end = if H::ASCII && N::ASCII { start + 1 } else { end };
|
||||||
if !H::ASCII && needle.len() != 1 {
|
'nonascii: {
|
||||||
let mut needle_iter = needle[1..].iter().copied();
|
if !H::ASCII || !N::ASCII {
|
||||||
if let Some(mut needle_char) = needle_iter.next() {
|
let mut needle_iter = needle[1..].iter().copied();
|
||||||
for (i, &c) in haystack[first_char_end..].iter().enumerate() {
|
if let Some(mut needle_char) = needle_iter.next() {
|
||||||
if c.normalize(&self.config) == needle_char {
|
for (i, &c) in haystack[first_char_end..].iter().enumerate() {
|
||||||
let Some(next_needle_char) = needle_iter.next() else {
|
if c.normalize(&self.config) == needle_char {
|
||||||
end = i + 1;
|
let Some(next_needle_char) = needle_iter.next() else {
|
||||||
break;
|
// we found a match so we are now in the same state
|
||||||
};
|
// as the prefilter would produce
|
||||||
needle_char = next_needle_char;
|
end = first_char_end + i + 1;
|
||||||
|
break 'nonascii;
|
||||||
|
};
|
||||||
|
needle_char = next_needle_char;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
// some needle chars were not matched bail out
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
} // minimize the greedly match by greedy matching in reverse
|
||||||
// minimize the greedly match by greedy matching in reverse
|
|
||||||
|
|
||||||
let mut needle_iter = needle.iter().rev().copied();
|
let mut needle_iter = needle.iter().rev().copied();
|
||||||
let mut needle_char = needle_iter.next().unwrap();
|
let mut needle_char = needle_iter.next().unwrap();
|
||||||
|
@ -19,6 +19,7 @@ impl Matcher {
|
|||||||
end: usize,
|
end: usize,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
|
println!("{start} {end}");
|
||||||
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
||||||
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
||||||
// us to treat needle indices as u16
|
// us to treat needle indices as u16
|
||||||
@ -88,9 +89,9 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
let first_needle_char = needle[0];
|
let first_needle_char = needle[0];
|
||||||
let mut matrix_cells = 0;
|
let mut matrix_cells = 0;
|
||||||
|
|
||||||
for (i, ((c, matrix_cell), bonus_)) in col_iter {
|
for (i, ((c_, matrix_cell), bonus_)) in col_iter {
|
||||||
let class = c.char_class(config);
|
let (c, class) = c_.char_class_and_normalize(config);
|
||||||
*c = c.normalize(config);
|
*c_ = c;
|
||||||
|
|
||||||
let bonus = config.bonus_for(prev_class, class);
|
let bonus = config.bonus_for(prev_class, class);
|
||||||
// save bonus for later so we don't have to recompute it each time
|
// save bonus for later so we don't have to recompute it each time
|
||||||
@ -98,7 +99,7 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
prev_class = class;
|
prev_class = class;
|
||||||
|
|
||||||
let i = i as u16;
|
let i = i as u16;
|
||||||
if *c == needle_char {
|
if c == needle_char {
|
||||||
// save the first idx of each char
|
// save the first idx of each char
|
||||||
if let Some(next) = row_iter.next() {
|
if let Some(next) = row_iter.next() {
|
||||||
matrix_cells += haystack_len - i;
|
matrix_cells += haystack_len - i;
|
||||||
@ -111,7 +112,7 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
matched = true;
|
matched = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if *c == first_needle_char {
|
if c == first_needle_char {
|
||||||
let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||||
matrix_cell.consecutive_chars = 1;
|
matrix_cell.consecutive_chars = 1;
|
||||||
if needle.len() == 1 && score > max_score {
|
if needle.len() == 1 && score > max_score {
|
||||||
@ -195,7 +196,6 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
consecutive = diag_matrix_cell.consecutive_chars + 1;
|
consecutive = diag_matrix_cell.consecutive_chars + 1;
|
||||||
if consecutive > 1 {
|
if consecutive > 1 {
|
||||||
let first_bonus = self.bonus[col + 1 - consecutive as usize];
|
let first_bonus = self.bonus[col + 1 - consecutive as usize];
|
||||||
println!("xoxo {bonus} {first_bonus} {consecutive}");
|
|
||||||
if bonus > first_bonus {
|
if bonus > first_bonus {
|
||||||
if bonus >= BONUS_BOUNDARY {
|
if bonus >= BONUS_BOUNDARY {
|
||||||
consecutive = 1
|
consecutive = 1
|
||||||
@ -281,6 +281,5 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
prefer_match = new_prefer_match;
|
prefer_match = new_prefer_match;
|
||||||
col -= 1;
|
col -= 1;
|
||||||
}
|
}
|
||||||
println!("{:#?}", self);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
30
src/lib.rs
30
src/lib.rs
@ -3,6 +3,8 @@
|
|||||||
|
|
||||||
mod chars;
|
mod chars;
|
||||||
mod config;
|
mod config;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod debug;
|
||||||
mod fuzzy_greedy;
|
mod fuzzy_greedy;
|
||||||
mod fuzzy_optimal;
|
mod fuzzy_optimal;
|
||||||
mod matrix;
|
mod matrix;
|
||||||
@ -24,32 +26,6 @@ pub struct Matcher {
|
|||||||
slab: MatrixSlab,
|
slab: MatrixSlab,
|
||||||
}
|
}
|
||||||
|
|
||||||
// // impl Query {
|
|
||||||
// // fn push(&mut self, needle: Utf32Str<'_>, normalize_: bool, smart_case: bool) {
|
|
||||||
// // self.needle_chars.reserve(needle.len());
|
|
||||||
// // self.needle_chars.extend(needle.chars().map(|mut c| {
|
|
||||||
// // if !c.is_ascii() {
|
|
||||||
// // self.is_ascii = false;
|
|
||||||
// // }
|
|
||||||
// // if smart_case {
|
|
||||||
// // if c.is_uppercase() {
|
|
||||||
// // self.ignore_case = false;
|
|
||||||
// // }
|
|
||||||
// // } else if self.ignore_case {
|
|
||||||
// // if self.is_ascii {
|
|
||||||
// // c = to_lower_case::<true>(c)
|
|
||||||
// // } else {
|
|
||||||
// // c = to_lower_case::<false>(c)
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
// // if normalize_ && !self.is_ascii {
|
|
||||||
// // c = normalize(c);
|
|
||||||
// // }
|
|
||||||
// // c
|
|
||||||
// // }))
|
|
||||||
// // }
|
|
||||||
// // }
|
|
||||||
|
|
||||||
impl Matcher {
|
impl Matcher {
|
||||||
pub fn new(config: MatcherConfig) -> Self {
|
pub fn new(config: MatcherConfig) -> Self {
|
||||||
Self {
|
Self {
|
||||||
@ -79,7 +55,7 @@ impl Matcher {
|
|||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indidies: &mut Vec<u32>,
|
indidies: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack.len() {
|
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
// if needle_.len() == haystack.len() {
|
// if needle_.len() == haystack.len() {
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
|
use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
|
||||||
use std::fmt::{Debug, Formatter, Result};
|
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::mem::{size_of, take};
|
use std::mem::{size_of, take};
|
||||||
use std::ops::Index;
|
use std::ops::Index;
|
||||||
@ -74,30 +73,18 @@ impl<C: Char> MatrixLayout<C> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
#[derive(Clone, Copy)]
|
||||||
pub(crate) struct MatrixCell {
|
pub(crate) struct MatrixCell {
|
||||||
pub score: u16,
|
pub score: u16,
|
||||||
pub consecutive_chars: u16,
|
pub consecutive_chars: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for MatrixCell {
|
#[derive(Clone, Copy)]
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
write!(f, "({}, {})", self.score, self.consecutive_chars)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub(crate) struct HaystackChar<C: Char> {
|
pub(crate) struct HaystackChar<C: Char> {
|
||||||
pub char: C,
|
pub char: C,
|
||||||
pub bonus: u16,
|
pub bonus: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: Char> Debug for HaystackChar<C> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
write!(f, "({:?}, {})", self.char, self.bonus)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
pub(crate) struct MatrixRow<'a> {
|
pub(crate) struct MatrixRow<'a> {
|
||||||
pub off: u16,
|
pub off: u16,
|
||||||
@ -116,43 +103,11 @@ impl Index<u16> for MatrixRow<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for MatrixRow<'_> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
let mut f = f.debug_list();
|
|
||||||
f.entries((0..self.off).map(|_| &MatrixCell {
|
|
||||||
score: 0,
|
|
||||||
consecutive_chars: 0,
|
|
||||||
}));
|
|
||||||
f.entries(self.cells.iter());
|
|
||||||
f.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) struct MatrixRowMut<'a> {
|
pub(crate) struct MatrixRowMut<'a> {
|
||||||
pub off: u16,
|
pub off: u16,
|
||||||
pub cells: &'a mut [MatrixCell],
|
pub cells: &'a mut [MatrixCell],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for MatrixRowMut<'_> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
let mut f = f.debug_list();
|
|
||||||
f.entries((0..self.off).map(|_| &(0, 0)));
|
|
||||||
f.entries(self.cells.iter());
|
|
||||||
f.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct DebugList<I>(I);
|
|
||||||
impl<I> Debug for DebugList<I>
|
|
||||||
where
|
|
||||||
I: Iterator + Clone,
|
|
||||||
I::Item: Debug,
|
|
||||||
{
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
f.debug_list().entries(self.0.clone()).finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) struct Matrix<'a, C: Char> {
|
pub(crate) struct Matrix<'a, C: Char> {
|
||||||
pub haystack: &'a mut [C],
|
pub haystack: &'a mut [C],
|
||||||
// stored as a separate array instead of struct
|
// stored as a separate array instead of struct
|
||||||
@ -163,16 +118,6 @@ pub(crate) struct Matrix<'a, C: Char> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, C: Char> Matrix<'a, C> {
|
impl<'a, C: Char> Matrix<'a, C> {
|
||||||
pub fn rows(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator + Clone + Sized {
|
|
||||||
let mut cells = &*self.cells;
|
|
||||||
self.row_offs.iter().map(move |&off| {
|
|
||||||
let len = self.haystack.len() - off as usize;
|
|
||||||
let (row, tmp) = cells.split_at(len);
|
|
||||||
cells = tmp;
|
|
||||||
MatrixRow { off, cells: row }
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn rows_rev(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator {
|
pub fn rows_rev(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator {
|
||||||
let mut cells = &*self.cells;
|
let mut cells = &*self.cells;
|
||||||
self.row_offs.iter().rev().map(move |&off| {
|
self.row_offs.iter().rev().map(move |&off| {
|
||||||
@ -182,21 +127,8 @@ impl<'a, C: Char> Matrix<'a, C> {
|
|||||||
MatrixRow { off, cells: row }
|
MatrixRow { off, cells: row }
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
pub fn haystack(
|
|
||||||
&self,
|
|
||||||
) -> impl Iterator<Item = HaystackChar<C>> + ExactSizeIterator + '_ + Clone {
|
|
||||||
haystack(self.haystack, self.bonus, 0)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, C: Char> Debug for Matrix<'a, C> {
|
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
|
||||||
f.debug_struct("Matrix")
|
|
||||||
.field("haystack", &DebugList(self.haystack()))
|
|
||||||
.field("matrix", &DebugList(self.rows()))
|
|
||||||
.finish()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub(crate) fn haystack<'a, C: Char>(
|
pub(crate) fn haystack<'a, C: Char>(
|
||||||
haystack: &'a [C],
|
haystack: &'a [C],
|
||||||
bonus: &'a [u16],
|
bonus: &'a [u16],
|
||||||
|
@ -79,7 +79,7 @@ impl Matcher {
|
|||||||
if only_greedy {
|
if only_greedy {
|
||||||
Some((start, start + 1))
|
Some((start, start + 1))
|
||||||
} else {
|
} else {
|
||||||
let end = start + haystack.len()
|
let end = haystack.len()
|
||||||
- haystack[start..]
|
- haystack[start..]
|
||||||
.iter()
|
.iter()
|
||||||
.rev()
|
.rev()
|
||||||
|
307
src/tests.rs
307
src/tests.rs
@ -49,7 +49,7 @@ pub fn assert_matches(
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
res,
|
res,
|
||||||
Some(score),
|
Some(score),
|
||||||
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
"{needle:?} did not match {haystack:?}: matched {match_chars:?} {indices:?}"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
match_chars, needle_chars,
|
match_chars, needle_chars,
|
||||||
@ -62,6 +62,42 @@ pub fn assert_matches(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn assert_not_matches(
|
||||||
|
normalize: bool,
|
||||||
|
case_sensitive: bool,
|
||||||
|
path: bool,
|
||||||
|
cases: &[(&str, &str)],
|
||||||
|
) {
|
||||||
|
let mut config = MatcherConfig {
|
||||||
|
normalize,
|
||||||
|
ignore_case: !case_sensitive,
|
||||||
|
..MatcherConfig::DEFAULT
|
||||||
|
};
|
||||||
|
if path {
|
||||||
|
config.set_match_paths();
|
||||||
|
}
|
||||||
|
let mut matcher = Matcher::new(config);
|
||||||
|
let mut needle_buf = Vec::new();
|
||||||
|
let mut haystack_buf = Vec::new();
|
||||||
|
for &(haystack, needle) in cases {
|
||||||
|
let needle = if !case_sensitive {
|
||||||
|
needle.to_lowercase()
|
||||||
|
} else {
|
||||||
|
needle.to_owned()
|
||||||
|
};
|
||||||
|
let needle = Utf32Str::new(&needle, &mut needle_buf);
|
||||||
|
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
||||||
|
|
||||||
|
let res = matcher.fuzzy_match(haystack, needle);
|
||||||
|
assert_eq!(res, None, "{needle:?} should not match {haystack:?}");
|
||||||
|
let res = matcher.fuzzy_match_greedy(haystack, needle);
|
||||||
|
assert_eq!(
|
||||||
|
res, None,
|
||||||
|
"{needle:?} should not match {haystack:?} (greedy)"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
|
const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
|
||||||
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
||||||
|
|
||||||
@ -225,6 +261,52 @@ fn test_fuzzy_case_sensitive() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_fuzzy_case_sensitive_v1() {
|
||||||
|
assert_matches(
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"fooBarbaz1",
|
||||||
|
"oBz",
|
||||||
|
2,
|
||||||
|
9,
|
||||||
|
BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Foo/Bar/Baz",
|
||||||
|
"FBB",
|
||||||
|
0,
|
||||||
|
9,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
|
||||||
|
- 2 * PENALTY_GAP_START
|
||||||
|
- 4 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"FooBarBaz",
|
||||||
|
"FBB",
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
|
||||||
|
- 2 * PENALTY_GAP_START
|
||||||
|
- 2 * PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"FooBar Baz",
|
||||||
|
"FooB",
|
||||||
|
0,
|
||||||
|
4,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||||
|
),
|
||||||
|
// Consecutive bonus updated
|
||||||
|
("foo-bar", "o-ba", 2, 6, BONUS_BOUNDARY * 2 + BONUS_NON_WORD),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_v1_fuzzy() {
|
fn test_v1_fuzzy() {
|
||||||
assert_matches(
|
assert_matches(
|
||||||
@ -338,3 +420,226 @@ fn test_v1_fuzzy() {
|
|||||||
],
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize() {
|
||||||
|
assert_matches(
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"Só Danço Samba",
|
||||||
|
"So",
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Só Danço Samba",
|
||||||
|
"sodc",
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
+ BONUS_BOUNDARY_WHITE
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
- PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Danço",
|
||||||
|
"danco",
|
||||||
|
0,
|
||||||
|
5,
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"DanÇo",
|
||||||
|
"danco",
|
||||||
|
0,
|
||||||
|
5,
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"xÇando",
|
||||||
|
"cando",
|
||||||
|
1,
|
||||||
|
6,
|
||||||
|
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_normalize_v1() {
|
||||||
|
assert_matches(
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"Só Danço Samba",
|
||||||
|
"So",
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Só Danço Samba",
|
||||||
|
"sodc",
|
||||||
|
0,
|
||||||
|
7,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
+ BONUS_BOUNDARY_WHITE
|
||||||
|
- PENALTY_GAP_START
|
||||||
|
- PENALTY_GAP_EXTENSION,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"Danço",
|
||||||
|
"danco",
|
||||||
|
0,
|
||||||
|
5,
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"DanÇo",
|
||||||
|
"danco",
|
||||||
|
0,
|
||||||
|
5,
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"xÇando",
|
||||||
|
"cando",
|
||||||
|
1,
|
||||||
|
6,
|
||||||
|
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unicode_v1() {
|
||||||
|
assert_matches(
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"你好世界",
|
||||||
|
"你好",
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"你好世界",
|
||||||
|
"你世",
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unicode() {
|
||||||
|
assert_matches(
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"你好世界",
|
||||||
|
"你好",
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"你好世界",
|
||||||
|
"你世",
|
||||||
|
0,
|
||||||
|
3,
|
||||||
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_long_str() {
|
||||||
|
assert_matches(
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[(
|
||||||
|
&"x".repeat(u16::MAX as usize + 1),
|
||||||
|
"xx",
|
||||||
|
0,
|
||||||
|
2,
|
||||||
|
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||||
|
)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_optimal() {
|
||||||
|
assert_matches(
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[(
|
||||||
|
"axxx xx ",
|
||||||
|
"xx",
|
||||||
|
5,
|
||||||
|
7,
|
||||||
|
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_reject() {
|
||||||
|
assert_not_matches(
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
("你好界", "abc"),
|
||||||
|
("你好世界", "富"),
|
||||||
|
("Só Danço Samba", "sox"),
|
||||||
|
("fooBarbaz", "fooBarbazz"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_not_matches(
|
||||||
|
true,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
("你好界", "abc"),
|
||||||
|
("abc", "你"),
|
||||||
|
("你好世界", "富"),
|
||||||
|
("Só Danço Samba", "sox"),
|
||||||
|
("fooBarbaz", "oBZ"),
|
||||||
|
("Foo Bar Baz", "fbb"),
|
||||||
|
("fooBarbaz", "fooBarbazz"),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_not_matches(
|
||||||
|
false,
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
@ -55,6 +55,13 @@ impl<'a> Utf32Str<'a> {
|
|||||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
|
||||||
|
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
|
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
|
||||||
@ -105,8 +112,8 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
pub fn last(&self) -> char {
|
pub fn last(&self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[bytes.len()] as char,
|
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len()],
|
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn chars(&self) -> Chars<'_> {
|
pub fn chars(&self) -> Chars<'_> {
|
||||||
|
3
tarpulin.toml
Normal file
3
tarpulin.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
exclude = ["src/tests.rs", "src/debug.rs", "src/chars/normalize.rs"]
|
||||||
|
[report]
|
||||||
|
out = ["Html", "Xml"]
|
Loading…
Reference in New Issue
Block a user