mirror of
https://github.com/solaeus/nucleo.git
synced 2024-11-10 01:27:10 +00:00
fix typos
This commit is contained in:
parent
33822be2ab
commit
d844ab7f3b
245
foo.c
245
foo.c
@ -1,245 +0,0 @@
|
|||||||
|
|
||||||
fzf_result_t fzf_fuzzy_match_v2(bool case_sensitive, bool normalize,
|
|
||||||
fzf_string_t *text, fzf_string_t *pattern,
|
|
||||||
fzf_position_t *pos, fzf_slab_t *slab) {
|
|
||||||
const size_t M = pattern->size;
|
|
||||||
const size_t N = text->size;
|
|
||||||
if (M == 0) {
|
|
||||||
return (fzf_result_t){0, 0, 0};
|
|
||||||
}
|
|
||||||
if (slab != NULL && N * M > slab->I16.cap) {
|
|
||||||
return fzf_fuzzy_match_v1(case_sensitive, normalize, text, pattern, pos,
|
|
||||||
slab);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t idx;
|
|
||||||
{
|
|
||||||
int32_t tmp_idx = ascii_fuzzy_index(text, pattern->data, M, case_sensitive);
|
|
||||||
if (tmp_idx < 0) {
|
|
||||||
return (fzf_result_t){-1, -1, 0};
|
|
||||||
}
|
|
||||||
idx = (size_t)tmp_idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t offset16 = 0;
|
|
||||||
size_t offset32 = 0;
|
|
||||||
|
|
||||||
fzf_i16_t h0 = alloc16(&offset16, slab, N);
|
|
||||||
fzf_i16_t c0 = alloc16(&offset16, slab, N);
|
|
||||||
// Bonus point for each positions
|
|
||||||
fzf_i16_t bo = alloc16(&offset16, slab, N);
|
|
||||||
// The first occurrence of each character in the pattern
|
|
||||||
fzf_i32_t f = alloc32(&offset32, slab, M);
|
|
||||||
// Rune array
|
|
||||||
fzf_i32_t t = alloc32(&offset32, slab, N);
|
|
||||||
copy_runes(text, &t); // input.CopyRunes(T)
|
|
||||||
|
|
||||||
// Phase 2. Calculate bonus for each point
|
|
||||||
int16_t max_score = 0;
|
|
||||||
size_t max_score_pos = 0;
|
|
||||||
|
|
||||||
size_t pidx = 0;
|
|
||||||
size_t last_idx = 0;
|
|
||||||
|
|
||||||
char pchar0 = pattern->data[0];
|
|
||||||
char pchar = pattern->data[0];
|
|
||||||
int16_t prev_h0 = 0;
|
|
||||||
int32_t prev_class = CharNonWord;
|
|
||||||
bool in_gap = false;
|
|
||||||
|
|
||||||
i32_slice_t t_sub = slice_i32(t.data, idx, t.size); // T[idx:];
|
|
||||||
i16_slice_t h0_sub =
|
|
||||||
slice_i16_right(slice_i16(h0.data, idx, h0.size).data, t_sub.size);
|
|
||||||
i16_slice_t c0_sub =
|
|
||||||
slice_i16_right(slice_i16(c0.data, idx, c0.size).data, t_sub.size);
|
|
||||||
i16_slice_t b_sub =
|
|
||||||
slice_i16_right(slice_i16(bo.data, idx, bo.size).data, t_sub.size);
|
|
||||||
|
|
||||||
for (size_t off = 0; off < t_sub.size; off++) {
|
|
||||||
char_class class;
|
|
||||||
char c = (char)t_sub.data[off];
|
|
||||||
class = char_class_of_ascii(c);
|
|
||||||
if (!case_sensitive && class == CharUpper) {
|
|
||||||
/* TODO(conni2461): unicode support */
|
|
||||||
c = (char)tolower((uint8_t)c);
|
|
||||||
}
|
|
||||||
if (normalize) {
|
|
||||||
c = normalize_rune(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
t_sub.data[off] = (uint8_t)c;
|
|
||||||
int16_t bonus = bonus_for(prev_class, class);
|
|
||||||
b_sub.data[off] = bonus;
|
|
||||||
prev_class = class;
|
|
||||||
if (c == pchar) {
|
|
||||||
if (pidx < M) {
|
|
||||||
f.data[pidx] = (int32_t)(idx + off);
|
|
||||||
pidx++;
|
|
||||||
pchar = pattern->data[min64u(pidx, M - 1)];
|
|
||||||
}
|
|
||||||
last_idx = idx + off;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c == pchar0) {
|
|
||||||
int16_t score = ScoreMatch + bonus * BonusFirstCharMultiplier;
|
|
||||||
h0_sub.data[off] = score;
|
|
||||||
c0_sub.data[off] = 1;
|
|
||||||
if (M == 1 && (score > max_score)) {
|
|
||||||
max_score = score;
|
|
||||||
max_score_pos = idx + off;
|
|
||||||
if (bonus == BonusBoundary) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
in_gap = false;
|
|
||||||
} else {
|
|
||||||
if (in_gap) {
|
|
||||||
h0_sub.data[off] = max16(prev_h0 + ScoreGapExtention, 0);
|
|
||||||
} else {
|
|
||||||
h0_sub.data[off] = max16(prev_h0 + ScoreGapStart, 0);
|
|
||||||
}
|
|
||||||
c0_sub.data[off] = 0;
|
|
||||||
in_gap = true;
|
|
||||||
}
|
|
||||||
prev_h0 = h0_sub.data[off];
|
|
||||||
}
|
|
||||||
if (pidx != M) {
|
|
||||||
free_alloc(t);
|
|
||||||
free_alloc(f);
|
|
||||||
free_alloc(bo);
|
|
||||||
free_alloc(c0);
|
|
||||||
free_alloc(h0);
|
|
||||||
return (fzf_result_t){-1, -1, 0};
|
|
||||||
}
|
|
||||||
if (M == 1) {
|
|
||||||
free_alloc(t);
|
|
||||||
free_alloc(f);
|
|
||||||
free_alloc(bo);
|
|
||||||
free_alloc(c0);
|
|
||||||
free_alloc(h0);
|
|
||||||
fzf_result_t res = {(int32_t)max_score_pos, (int32_t)max_score_pos + 1,
|
|
||||||
max_score};
|
|
||||||
append_pos(pos, max_score_pos);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t f0 = (size_t)f.data[0];
|
|
||||||
size_t width = last_idx - f0 + 1;
|
|
||||||
fzf_i16_t h = alloc16(&offset16, slab, width * M);
|
|
||||||
{
|
|
||||||
i16_slice_t h0_tmp_slice = slice_i16(h0.data, f0, last_idx + 1);
|
|
||||||
copy_into_i16(&h0_tmp_slice, &h);
|
|
||||||
}
|
|
||||||
|
|
||||||
fzf_i16_t c = alloc16(&offset16, slab, width * M);
|
|
||||||
{
|
|
||||||
i16_slice_t c0_tmp_slice = slice_i16(c0.data, f0, last_idx + 1);
|
|
||||||
copy_into_i16(&c0_tmp_slice, &c);
|
|
||||||
}
|
|
||||||
|
|
||||||
i32_slice_t f_sub = slice_i32(f.data, 1, f.size);
|
|
||||||
str_slice_t p_sub =
|
|
||||||
slice_str_right(slice_str(pattern->data, 1, M).data, f_sub.size);
|
|
||||||
for (size_t off = 0; off < f_sub.size; off++) {
|
|
||||||
size_t f = (size_t)f_sub.data[off];
|
|
||||||
pchar = p_sub.data[off];
|
|
||||||
pidx = off + 1;
|
|
||||||
size_t row = pidx * width;
|
|
||||||
in_gap = false;
|
|
||||||
t_sub = slice_i32(t.data, f, last_idx + 1);
|
|
||||||
b_sub = slice_i16_right(slice_i16(bo.data, f, bo.size).data, t_sub.size);
|
|
||||||
i16_slice_t c_sub = slice_i16_right(
|
|
||||||
slice_i16(c.data, row + f - f0, c.size).data, t_sub.size);
|
|
||||||
i16_slice_t c_diag = slice_i16_right(
|
|
||||||
slice_i16(c.data, row + f - f0 - 1 - width, c.size).data, t_sub.size);
|
|
||||||
i16_slice_t h_sub = slice_i16_right(
|
|
||||||
slice_i16(h.data, row + f - f0, h.size).data, t_sub.size);
|
|
||||||
i16_slice_t h_diag = slice_i16_right(
|
|
||||||
slice_i16(h.data, row + f - f0 - 1 - width, h.size).data, t_sub.size);
|
|
||||||
i16_slice_t h_left = slice_i16_right(
|
|
||||||
slice_i16(h.data, row + f - f0 - 1, h.size).data, t_sub.size);
|
|
||||||
h_left.data[0] = 0;
|
|
||||||
for (size_t j = 0; j < t_sub.size; j++) {
|
|
||||||
char ch = (char)t_sub.data[j];
|
|
||||||
size_t col = j + f;
|
|
||||||
int16_t s1 = 0;
|
|
||||||
int16_t s2 = 0;
|
|
||||||
int16_t consecutive = 0;
|
|
||||||
|
|
||||||
if (in_gap) {
|
|
||||||
s2 = h_left.data[j] + ScoreGapExtention;
|
|
||||||
} else {
|
|
||||||
s2 = h_left.data[j] + ScoreGapStart;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pchar == ch) {
|
|
||||||
s1 = h_diag.data[j] + ScoreMatch;
|
|
||||||
int16_t b = b_sub.data[j];
|
|
||||||
consecutive = c_diag.data[j] + 1;
|
|
||||||
if (b == BonusBoundary) {
|
|
||||||
consecutive = 1;
|
|
||||||
} else if (consecutive > 1) {
|
|
||||||
b = max16(b, max16(BonusConsecutive,
|
|
||||||
bo.data[col - ((size_t)consecutive) + 1]));
|
|
||||||
}
|
|
||||||
if (s1 + b < s2) {
|
|
||||||
s1 += b_sub.data[j];
|
|
||||||
consecutive = 0;
|
|
||||||
} else {
|
|
||||||
s1 += b;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
c_sub.data[j] = consecutive;
|
|
||||||
in_gap = s1 < s2;
|
|
||||||
int16_t score = max16(max16(s1, s2), 0);
|
|
||||||
if (pidx == M - 1 && (score > max_score)) {
|
|
||||||
max_score = score;
|
|
||||||
max_score_pos = col;
|
|
||||||
}
|
|
||||||
h_sub.data[j] = score;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resize_pos(pos, M, M);
|
|
||||||
size_t j = max_score_pos;
|
|
||||||
if (pos) {
|
|
||||||
size_t i = M - 1;
|
|
||||||
bool prefer_match = true;
|
|
||||||
for (;;) {
|
|
||||||
size_t ii = i * width;
|
|
||||||
size_t j0 = j - f0;
|
|
||||||
int16_t s = h.data[ii + j0];
|
|
||||||
|
|
||||||
int16_t s1 = 0;
|
|
||||||
int16_t s2 = 0;
|
|
||||||
if (i > 0 && j >= f.data[i]) {
|
|
||||||
s1 = h.data[ii - width + j0 - 1];
|
|
||||||
}
|
|
||||||
if (j > f.data[i]) {
|
|
||||||
s2 = h.data[ii + j0 - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (s > s1 && (s > s2 || (s == s2 && prefer_match))) {
|
|
||||||
unsafe_append_pos(pos, j);
|
|
||||||
if (i == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
prefer_match = c.data[ii + j0] > 1 || (ii + width + j0 + 1 < c.size &&
|
|
||||||
c.data[ii + width + j0 + 1] > 0);
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
free_alloc(h);
|
|
||||||
free_alloc(c);
|
|
||||||
free_alloc(t);
|
|
||||||
free_alloc(f);
|
|
||||||
free_alloc(bo);
|
|
||||||
free_alloc(c0);
|
|
||||||
free_alloc(h0);
|
|
||||||
return (fzf_result_t){(int32_t)j, (int32_t)max_score_pos + 1,
|
|
||||||
(int32_t)max_score};
|
|
||||||
}
|
|
||||||
|
|
@ -64,7 +64,7 @@ impl Char for AsciiChar {
|
|||||||
CharClass::Number
|
CharClass::Number
|
||||||
} else if c.is_ascii_whitespace() {
|
} else if c.is_ascii_whitespace() {
|
||||||
CharClass::Whitespace
|
CharClass::Whitespace
|
||||||
} else if config.delimeter_chars.contains(&c) {
|
} else if config.delimiter_chars.contains(&c) {
|
||||||
CharClass::Delimiter
|
CharClass::Delimiter
|
||||||
} else {
|
} else {
|
||||||
CharClass::NonWord
|
CharClass::NonWord
|
||||||
|
@ -3,16 +3,16 @@ use crate::score::BONUS_BOUNDARY;
|
|||||||
|
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
pub struct MatcherConfig {
|
pub struct MatcherConfig {
|
||||||
pub delimeter_chars: &'static [u8],
|
pub delimiter_chars: &'static [u8],
|
||||||
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
||||||
pub bonus_boundary_white: u16,
|
pub bonus_boundary_white: u16,
|
||||||
|
|
||||||
// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
||||||
pub bonus_boundary_delimiter: u16,
|
pub bonus_boundary_delimiter: u16,
|
||||||
pub inital_char_class: CharClass,
|
pub initial_char_class: CharClass,
|
||||||
/// Whether to normalize latin script charaters to ASCII
|
/// Whether to normalize latin script characters to ASCII
|
||||||
/// this significantly degrades performance so its not recommended
|
/// this significantly degrades performance so its not recommended
|
||||||
/// to be truned on by default
|
/// to be turned on by default
|
||||||
pub normalize: bool,
|
pub normalize: bool,
|
||||||
/// whether to ignore casing
|
/// whether to ignore casing
|
||||||
pub ignore_case: bool,
|
pub ignore_case: bool,
|
||||||
@ -29,10 +29,10 @@ pub struct MatcherConfig {
|
|||||||
impl MatcherConfig {
|
impl MatcherConfig {
|
||||||
pub const DEFAULT: Self = {
|
pub const DEFAULT: Self = {
|
||||||
MatcherConfig {
|
MatcherConfig {
|
||||||
delimeter_chars: b"/,:;|",
|
delimiter_chars: b"/,:;|",
|
||||||
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
||||||
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
||||||
inital_char_class: CharClass::Whitespace,
|
initial_char_class: CharClass::Whitespace,
|
||||||
normalize: false,
|
normalize: false,
|
||||||
ignore_case: true,
|
ignore_case: true,
|
||||||
}
|
}
|
||||||
@ -42,22 +42,22 @@ impl MatcherConfig {
|
|||||||
impl MatcherConfig {
|
impl MatcherConfig {
|
||||||
pub fn set_match_paths(&mut self) {
|
pub fn set_match_paths(&mut self) {
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
self.delimeter_chars = b"/\\";
|
self.delimiter_chars = b"/\\";
|
||||||
} else {
|
} else {
|
||||||
self.delimeter_chars = b"/";
|
self.delimiter_chars = b"/";
|
||||||
}
|
}
|
||||||
self.bonus_boundary_white = BONUS_BOUNDARY;
|
self.bonus_boundary_white = BONUS_BOUNDARY;
|
||||||
self.inital_char_class = CharClass::Delimiter;
|
self.initial_char_class = CharClass::Delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn match_paths(mut self) -> Self {
|
pub const fn match_paths(mut self) -> Self {
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
self.delimeter_chars = b"/\\";
|
self.delimiter_chars = b"/\\";
|
||||||
} else {
|
} else {
|
||||||
self.delimeter_chars = b"/";
|
self.delimiter_chars = b"/";
|
||||||
}
|
}
|
||||||
self.bonus_boundary_white = BONUS_BOUNDARY;
|
self.bonus_boundary_white = BONUS_BOUNDARY;
|
||||||
self.inital_char_class = CharClass::Delimiter;
|
self.initial_char_class = CharClass::Delimiter;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,15 +2,15 @@ use crate::chars::Char;
|
|||||||
use crate::Matcher;
|
use crate::Matcher;
|
||||||
|
|
||||||
impl Matcher {
|
impl Matcher {
|
||||||
/// greedy fallback algoritm, much faster (linear time) but reported scores/indicies
|
/// greedy fallback algorithm, much faster (linear time) but reported scores/indicies
|
||||||
/// might not be the best match
|
/// might not be the best match
|
||||||
pub(crate) fn fuzzy_match_greedy<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
pub(crate) fn fuzzy_match_greedy<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: &[H],
|
haystack: &[H],
|
||||||
needle: &[N],
|
needle: &[N],
|
||||||
mut start: usize,
|
mut start: usize,
|
||||||
mut end: usize,
|
mut end: usize,
|
||||||
indicies: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
let first_char_end = if H::ASCII { start + 1 } else { end };
|
let first_char_end = if H::ASCII { start + 1 } else { end };
|
||||||
if !H::ASCII && needle.len() != 1 {
|
if !H::ASCII && needle.len() != 1 {
|
||||||
@ -27,7 +27,7 @@ impl Matcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// mimimize the greedly match by greedy matching in reverse
|
// minimize the greedly match by greedy matching in reverse
|
||||||
|
|
||||||
let mut needle_iter = needle.iter().rev().copied();
|
let mut needle_iter = needle.iter().rev().copied();
|
||||||
let mut needle_char = needle_iter.next().unwrap();
|
let mut needle_char = needle_iter.next().unwrap();
|
||||||
@ -40,6 +40,6 @@ impl Matcher {
|
|||||||
needle_char = next_needle_char;
|
needle_char = next_needle_char;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(self.calculate_score::<INDICIES, H, N>(haystack, needle, start, end, indicies))
|
Some(self.calculate_score::<INDICES, H, N>(haystack, needle, start, end, indices))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,39 +10,39 @@ use crate::score::{
|
|||||||
use crate::{Matcher, MatcherConfig};
|
use crate::{Matcher, MatcherConfig};
|
||||||
|
|
||||||
impl Matcher {
|
impl Matcher {
|
||||||
pub(crate) fn fuzzy_match_optimal<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
pub(crate) fn fuzzy_match_optimal<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: &[H],
|
haystack: &[H],
|
||||||
needle: &[N],
|
needle: &[N],
|
||||||
start: usize,
|
start: usize,
|
||||||
greedy_end: usize,
|
greedy_end: usize,
|
||||||
end: usize,
|
end: usize,
|
||||||
indicies: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
||||||
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
||||||
// us to treat needle indecies as u16
|
// us to treat needle indices as u16
|
||||||
let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else {
|
let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else {
|
||||||
return self.fuzzy_match_greedy::<INDICIES, H, N>(
|
return self.fuzzy_match_greedy::<INDICES, H, N>(
|
||||||
haystack,
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
start,
|
start,
|
||||||
greedy_end,
|
greedy_end,
|
||||||
indicies,
|
indices,
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
let prev_class = start
|
let prev_class = start
|
||||||
.checked_sub(1)
|
.checked_sub(1)
|
||||||
.map(|i| haystack[i].char_class(&self.config))
|
.map(|i| haystack[i].char_class(&self.config))
|
||||||
.unwrap_or(self.config.inital_char_class);
|
.unwrap_or(self.config.initial_char_class);
|
||||||
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
|
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
|
||||||
// this only happend with unicode haystacks, for ASCII the prefilter handles all rejects
|
// this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
|
||||||
if !matched {
|
if !matched {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if needle.len() == 1 {
|
if needle.len() == 1 {
|
||||||
indicies.push(max_score_pos as u32);
|
indices.push(max_score_pos as u32);
|
||||||
return Some(max_score);
|
return Some(max_score);
|
||||||
}
|
}
|
||||||
debug_assert_eq!(
|
debug_assert_eq!(
|
||||||
@ -52,8 +52,8 @@ impl Matcher {
|
|||||||
|
|
||||||
// populate the matrix and find the best score
|
// populate the matrix and find the best score
|
||||||
let (max_score, best_match_end) = matrix.populate_matrix(needle);
|
let (max_score, best_match_end) = matrix.populate_matrix(needle);
|
||||||
if INDICIES {
|
if INDICES {
|
||||||
matrix.reconstruct_optimal_path(needle, start as u32, indicies, best_match_end);
|
matrix.reconstruct_optimal_path(needle, start as u32, indices, best_match_end);
|
||||||
}
|
}
|
||||||
Some(max_score)
|
Some(max_score)
|
||||||
}
|
}
|
||||||
@ -224,12 +224,12 @@ impl<H: Char> Matrix<'_, H> {
|
|||||||
&self,
|
&self,
|
||||||
needle: &[N],
|
needle: &[N],
|
||||||
start: u32,
|
start: u32,
|
||||||
indicies: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
best_match_end: u16,
|
best_match_end: u16,
|
||||||
) {
|
) {
|
||||||
indicies.resize(needle.len(), 0);
|
indices.resize(needle.len(), 0);
|
||||||
|
|
||||||
let mut row_iter = self.rows_rev().zip(indicies.iter_mut().rev()).peekable();
|
let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable();
|
||||||
let (mut row, mut matched_col_idx) = row_iter.next().unwrap();
|
let (mut row, mut matched_col_idx) = row_iter.next().unwrap();
|
||||||
let mut next_row: Option<MatrixRow> = None;
|
let mut next_row: Option<MatrixRow> = None;
|
||||||
let mut col = best_match_end;
|
let mut col = best_match_end;
|
||||||
|
24
src/lib.rs
24
src/lib.rs
@ -1,4 +1,4 @@
|
|||||||
// sadly this doens't optmimzie well currently
|
// sadly ranges don't optmimzie well
|
||||||
#![allow(clippy::manual_range_contains)]
|
#![allow(clippy::manual_range_contains)]
|
||||||
|
|
||||||
mod chars;
|
mod chars;
|
||||||
@ -63,7 +63,7 @@ impl Matcher {
|
|||||||
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
|
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn fuzzy_indicies(
|
pub fn fuzzy_indices(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: Utf32Str<'_>,
|
haystack: Utf32Str<'_>,
|
||||||
needle: Utf32Str<'_>,
|
needle: Utf32Str<'_>,
|
||||||
@ -73,7 +73,7 @@ impl Matcher {
|
|||||||
self.fuzzy_matcher_impl::<true>(haystack, needle, indidies)
|
self.fuzzy_matcher_impl::<true>(haystack, needle, indidies)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fuzzy_matcher_impl<const INDICIES: bool>(
|
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: Utf32Str<'_>,
|
haystack: Utf32Str<'_>,
|
||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
@ -92,7 +92,7 @@ impl Matcher {
|
|||||||
match (haystack, needle_) {
|
match (haystack, needle_) {
|
||||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle)?;
|
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle)?;
|
||||||
self.fuzzy_match_optimal::<INDICIES, AsciiChar, AsciiChar>(
|
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||||
AsciiChar::cast(haystack),
|
AsciiChar::cast(haystack),
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
start,
|
start,
|
||||||
@ -108,7 +108,7 @@ impl Matcher {
|
|||||||
}
|
}
|
||||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
||||||
self.fuzzy_match_optimal::<INDICIES, char, AsciiChar>(
|
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||||
haystack,
|
haystack,
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
start,
|
start,
|
||||||
@ -119,7 +119,7 @@ impl Matcher {
|
|||||||
}
|
}
|
||||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
||||||
self.fuzzy_match_optimal::<INDICIES, char, char>(
|
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||||
haystack,
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
start,
|
start,
|
||||||
@ -131,11 +131,11 @@ impl Matcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn fuzzy_indicies(
|
// pub fn fuzzy_indices(
|
||||||
// &mut self,
|
// &mut self,
|
||||||
// query: &Query,
|
// query: &Query,
|
||||||
// mut haystack: Utf32Str<'_>,
|
// mut haystack: Utf32Str<'_>,
|
||||||
// indicies: &mut Vec<u32>,
|
// indices: &mut Vec<u32>,
|
||||||
// ) -> Option<u16> {
|
// ) -> Option<u16> {
|
||||||
// if haystack.len() > u32::MAX as usize {
|
// if haystack.len() > u32::MAX as usize {
|
||||||
// haystack = &haystack[..u32::MAX as usize]
|
// haystack = &haystack[..u32::MAX as usize]
|
||||||
@ -146,14 +146,14 @@ impl Matcher {
|
|||||||
// );
|
// );
|
||||||
// if self.config.use_v1 {
|
// if self.config.use_v1 {
|
||||||
// if query.is_ascii && !self.config.normalize {
|
// if query.is_ascii && !self.config.normalize {
|
||||||
// self.fuzzy_matcher_v1::<true, true>(query, haystack, indicies)
|
// self.fuzzy_matcher_v1::<true, true>(query, haystack, indices)
|
||||||
// } else {
|
// } else {
|
||||||
// self.fuzzy_matcher_v1::<true, false>(query, haystack, indicies)
|
// self.fuzzy_matcher_v1::<true, false>(query, haystack, indices)
|
||||||
// }
|
// }
|
||||||
// } else if query.is_ascii && !self.config.normalize {
|
// } else if query.is_ascii && !self.config.normalize {
|
||||||
// self.fuzzy_matcher_v2::<true, true>(query, haystack, indicies)
|
// self.fuzzy_matcher_v2::<true, true>(query, haystack, indices)
|
||||||
// } else {
|
// } else {
|
||||||
// self.fuzzy_matcher_v2::<true, false>(query, haystack, indicies)
|
// self.fuzzy_matcher_v2::<true, false>(query, haystack, indices)
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
@ -155,7 +155,7 @@ where
|
|||||||
|
|
||||||
pub(crate) struct Matrix<'a, C: Char> {
|
pub(crate) struct Matrix<'a, C: Char> {
|
||||||
pub haystack: &'a mut [C],
|
pub haystack: &'a mut [C],
|
||||||
// stored as a seperate array instead of struct
|
// stored as a separate array instead of struct
|
||||||
// to avoid padding sine char is too large and u8 too small :/
|
// to avoid padding sine char is too large and u8 too small :/
|
||||||
pub bonus: &'a mut [u16],
|
pub bonus: &'a mut [u16],
|
||||||
pub row_offs: &'a mut [u16],
|
pub row_offs: &'a mut [u16],
|
||||||
@ -264,10 +264,10 @@ impl MatrixSlab {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
unsafe {
|
unsafe {
|
||||||
// safetly: this allocation is valid for MATRIX_ALLOC_LAYOUT
|
// safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
|
||||||
let (haystack, bonus, rows, cells) = matrix_layout.fieds_from_ptr(self.0);
|
let (haystack, bonus, rows, cells) = matrix_layout.fieds_from_ptr(self.0);
|
||||||
// copy haystack before creating refernces to ensure we donu't crate
|
// copy haystack before creating references to ensure we donu't crate
|
||||||
// refrences to invalid chars (which may or may not be UB)
|
// references to invalid chars (which may or may not be UB)
|
||||||
haystack_
|
haystack_
|
||||||
.as_ptr()
|
.as_ptr()
|
||||||
.copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
|
.copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
|
||||||
|
18
src/score.rs
18
src/score.rs
@ -69,22 +69,22 @@ impl Matcher {
|
|||||||
self.config.bonus_for(prev_class, class)
|
self.config.bonus_for(prev_class, class)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn calculate_score<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
pub(crate) fn calculate_score<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: &[H],
|
haystack: &[H],
|
||||||
needle: &[N],
|
needle: &[N],
|
||||||
start: usize,
|
start: usize,
|
||||||
end: usize,
|
end: usize,
|
||||||
indicies: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> u16 {
|
) -> u16 {
|
||||||
if INDICIES {
|
if INDICES {
|
||||||
indicies.reserve(needle.len());
|
indices.reserve(needle.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut prev_class = start
|
let mut prev_class = start
|
||||||
.checked_sub(1)
|
.checked_sub(1)
|
||||||
.map(|i| haystack[i].char_class(&self.config))
|
.map(|i| haystack[i].char_class(&self.config))
|
||||||
.unwrap_or(self.config.inital_char_class);
|
.unwrap_or(self.config.initial_char_class);
|
||||||
let mut needle_iter = needle.iter();
|
let mut needle_iter = needle.iter();
|
||||||
let mut needle_char = *needle_iter.next().unwrap();
|
let mut needle_char = *needle_iter.next().unwrap();
|
||||||
|
|
||||||
@ -92,8 +92,8 @@ impl Matcher {
|
|||||||
let mut consecutive = 1;
|
let mut consecutive = 1;
|
||||||
|
|
||||||
// unrolled the firs iteration to make applying the first char multiplier less akward
|
// unrolled the firs iteration to make applying the first char multiplier less akward
|
||||||
if INDICIES {
|
if INDICES {
|
||||||
indicies.push(start as u32)
|
indices.push(start as u32)
|
||||||
}
|
}
|
||||||
let mut first_bonus = self.bonus_for(prev_class, haystack[0].char_class(&self.config));
|
let mut first_bonus = self.bonus_for(prev_class, haystack[0].char_class(&self.config));
|
||||||
let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||||
@ -102,8 +102,8 @@ impl Matcher {
|
|||||||
let class = c.char_class(&self.config);
|
let class = c.char_class(&self.config);
|
||||||
let c = c.normalize(&self.config);
|
let c = c.normalize(&self.config);
|
||||||
if c == needle_char {
|
if c == needle_char {
|
||||||
if INDICIES {
|
if INDICES {
|
||||||
indicies.push(i as u32 + start as u32)
|
indices.push(i as u32 + start as u32)
|
||||||
}
|
}
|
||||||
let mut bonus = self.bonus_for(prev_class, class);
|
let mut bonus = self.bonus_for(prev_class, class);
|
||||||
if consecutive == 0 {
|
if consecutive == 0 {
|
||||||
|
10
src/tests.rs
10
src/tests.rs
@ -22,7 +22,7 @@ pub fn assert_matches(
|
|||||||
config.set_match_paths();
|
config.set_match_paths();
|
||||||
}
|
}
|
||||||
let mut matcher = Matcher::new(config);
|
let mut matcher = Matcher::new(config);
|
||||||
let mut indicies = Vec::new();
|
let mut indices = Vec::new();
|
||||||
let mut needle_buf = Vec::new();
|
let mut needle_buf = Vec::new();
|
||||||
let mut haystack_buf = Vec::new();
|
let mut haystack_buf = Vec::new();
|
||||||
for &(haystack, needle, start, end, mut score) in cases {
|
for &(haystack, needle, start, end, mut score) in cases {
|
||||||
@ -35,8 +35,8 @@ pub fn assert_matches(
|
|||||||
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
||||||
score += needle.len() as u16 * SCORE_MATCH;
|
score += needle.len() as u16 * SCORE_MATCH;
|
||||||
|
|
||||||
let res = matcher.fuzzy_indicies(haystack, needle, &mut indicies);
|
let res = matcher.fuzzy_indices(haystack, needle, &mut indices);
|
||||||
let match_chars: Vec<_> = indicies
|
let match_chars: Vec<_> = indices
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&i| haystack.get(i).normalize(&matcher.config))
|
.map(|&i| haystack.get(i).normalize(&matcher.config))
|
||||||
.collect();
|
.collect();
|
||||||
@ -47,9 +47,9 @@ pub fn assert_matches(
|
|||||||
Some(score),
|
Some(score),
|
||||||
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
||||||
);
|
);
|
||||||
assert_eq!(match_chars, needle_chars, "match indicies are incorrect");
|
assert_eq!(match_chars, needle_chars, "match indices are incorrect");
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
indicies.first().copied()..indicies.last().map(|&i| i + 1),
|
indices.first().copied()..indices.last().map(|&i| i + 1),
|
||||||
Some(start)..Some(end),
|
Some(start)..Some(end),
|
||||||
"{needle:?} match {haystack:?}[{start}..{end}]"
|
"{needle:?} match {haystack:?}[{start}..{end}]"
|
||||||
);
|
);
|
||||||
|
@ -13,7 +13,7 @@ use std::slice;
|
|||||||
/// matching itself. Furthermore there are a lot of exta optimizations available
|
/// matching itself. Furthermore there are a lot of exta optimizations available
|
||||||
/// for ascii only text (but checking during each match has too much overhead).
|
/// for ascii only text (but checking during each match has too much overhead).
|
||||||
///
|
///
|
||||||
/// Ofcourse this comes at exta memory cost as we usally still need the ut8
|
/// Ofcourse this comes at exta memory cost as we usually still need the ut8
|
||||||
/// encoded variant for rendenring. In the (dominant) case of ascii-only text
|
/// encoded variant for rendenring. In the (dominant) case of ascii-only text
|
||||||
/// we don't require a copy. Furthermore fuzzy matching usually is applied while
|
/// we don't require a copy. Furthermore fuzzy matching usually is applied while
|
||||||
/// the user is typing on the fly so the same item is potentially matched many
|
/// the user is typing on the fly so the same item is potentially matched many
|
||||||
@ -24,13 +24,13 @@ use std::slice;
|
|||||||
/// char buffer around that is filled with the presegmented chars
|
/// char buffer around that is filled with the presegmented chars
|
||||||
///
|
///
|
||||||
/// Another advantage of this approach is that the matcher will naturally
|
/// Another advantage of this approach is that the matcher will naturally
|
||||||
/// produce char indecies (instead of utf8 offsets) annyway. With a
|
/// produce char indices (instead of utf8 offsets) annyway. With a
|
||||||
/// codepoint basec representation like this the indecies can be used
|
/// codepoint basec representation like this the indices can be used
|
||||||
/// directly
|
/// directly
|
||||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug)]
|
||||||
pub enum Utf32Str<'a> {
|
pub enum Utf32Str<'a> {
|
||||||
/// A string represented as ASCII encoded bytes.
|
/// A string represented as ASCII encoded bytes.
|
||||||
/// Correctness invariant: must only contain vaild ASCII (<=127)
|
/// Correctness invariant: must only contain valid ASCII (<=127)
|
||||||
Ascii(&'a [u8]),
|
Ascii(&'a [u8]),
|
||||||
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
||||||
Unicode(&'a [char]),
|
Unicode(&'a [char]),
|
||||||
@ -75,7 +75,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `slice` but accepts a u32 range for convenicene sine
|
/// Same as `slice` but accepts a u32 range for convenicene sine
|
||||||
/// those are the indecies returned by the matcher
|
/// those are the indices returned by the matcher
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
|
3
typos.toml
Normal file
3
typos.toml
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||||
|
[files]
|
||||||
|
extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"]
|
Loading…
Reference in New Issue
Block a user