mirror of
https://github.com/solaeus/nucleo.git
synced 2025-01-22 07:47:47 +00:00
fix typos
This commit is contained in:
parent
33822be2ab
commit
d844ab7f3b
245
foo.c
245
foo.c
@ -1,245 +0,0 @@
|
||||
|
||||
fzf_result_t fzf_fuzzy_match_v2(bool case_sensitive, bool normalize,
|
||||
fzf_string_t *text, fzf_string_t *pattern,
|
||||
fzf_position_t *pos, fzf_slab_t *slab) {
|
||||
const size_t M = pattern->size;
|
||||
const size_t N = text->size;
|
||||
if (M == 0) {
|
||||
return (fzf_result_t){0, 0, 0};
|
||||
}
|
||||
if (slab != NULL && N * M > slab->I16.cap) {
|
||||
return fzf_fuzzy_match_v1(case_sensitive, normalize, text, pattern, pos,
|
||||
slab);
|
||||
}
|
||||
|
||||
size_t idx;
|
||||
{
|
||||
int32_t tmp_idx = ascii_fuzzy_index(text, pattern->data, M, case_sensitive);
|
||||
if (tmp_idx < 0) {
|
||||
return (fzf_result_t){-1, -1, 0};
|
||||
}
|
||||
idx = (size_t)tmp_idx;
|
||||
}
|
||||
|
||||
size_t offset16 = 0;
|
||||
size_t offset32 = 0;
|
||||
|
||||
fzf_i16_t h0 = alloc16(&offset16, slab, N);
|
||||
fzf_i16_t c0 = alloc16(&offset16, slab, N);
|
||||
// Bonus point for each positions
|
||||
fzf_i16_t bo = alloc16(&offset16, slab, N);
|
||||
// The first occurrence of each character in the pattern
|
||||
fzf_i32_t f = alloc32(&offset32, slab, M);
|
||||
// Rune array
|
||||
fzf_i32_t t = alloc32(&offset32, slab, N);
|
||||
copy_runes(text, &t); // input.CopyRunes(T)
|
||||
|
||||
// Phase 2. Calculate bonus for each point
|
||||
int16_t max_score = 0;
|
||||
size_t max_score_pos = 0;
|
||||
|
||||
size_t pidx = 0;
|
||||
size_t last_idx = 0;
|
||||
|
||||
char pchar0 = pattern->data[0];
|
||||
char pchar = pattern->data[0];
|
||||
int16_t prev_h0 = 0;
|
||||
int32_t prev_class = CharNonWord;
|
||||
bool in_gap = false;
|
||||
|
||||
i32_slice_t t_sub = slice_i32(t.data, idx, t.size); // T[idx:];
|
||||
i16_slice_t h0_sub =
|
||||
slice_i16_right(slice_i16(h0.data, idx, h0.size).data, t_sub.size);
|
||||
i16_slice_t c0_sub =
|
||||
slice_i16_right(slice_i16(c0.data, idx, c0.size).data, t_sub.size);
|
||||
i16_slice_t b_sub =
|
||||
slice_i16_right(slice_i16(bo.data, idx, bo.size).data, t_sub.size);
|
||||
|
||||
for (size_t off = 0; off < t_sub.size; off++) {
|
||||
char_class class;
|
||||
char c = (char)t_sub.data[off];
|
||||
class = char_class_of_ascii(c);
|
||||
if (!case_sensitive && class == CharUpper) {
|
||||
/* TODO(conni2461): unicode support */
|
||||
c = (char)tolower((uint8_t)c);
|
||||
}
|
||||
if (normalize) {
|
||||
c = normalize_rune(c);
|
||||
}
|
||||
|
||||
t_sub.data[off] = (uint8_t)c;
|
||||
int16_t bonus = bonus_for(prev_class, class);
|
||||
b_sub.data[off] = bonus;
|
||||
prev_class = class;
|
||||
if (c == pchar) {
|
||||
if (pidx < M) {
|
||||
f.data[pidx] = (int32_t)(idx + off);
|
||||
pidx++;
|
||||
pchar = pattern->data[min64u(pidx, M - 1)];
|
||||
}
|
||||
last_idx = idx + off;
|
||||
}
|
||||
|
||||
if (c == pchar0) {
|
||||
int16_t score = ScoreMatch + bonus * BonusFirstCharMultiplier;
|
||||
h0_sub.data[off] = score;
|
||||
c0_sub.data[off] = 1;
|
||||
if (M == 1 && (score > max_score)) {
|
||||
max_score = score;
|
||||
max_score_pos = idx + off;
|
||||
if (bonus == BonusBoundary) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
in_gap = false;
|
||||
} else {
|
||||
if (in_gap) {
|
||||
h0_sub.data[off] = max16(prev_h0 + ScoreGapExtention, 0);
|
||||
} else {
|
||||
h0_sub.data[off] = max16(prev_h0 + ScoreGapStart, 0);
|
||||
}
|
||||
c0_sub.data[off] = 0;
|
||||
in_gap = true;
|
||||
}
|
||||
prev_h0 = h0_sub.data[off];
|
||||
}
|
||||
if (pidx != M) {
|
||||
free_alloc(t);
|
||||
free_alloc(f);
|
||||
free_alloc(bo);
|
||||
free_alloc(c0);
|
||||
free_alloc(h0);
|
||||
return (fzf_result_t){-1, -1, 0};
|
||||
}
|
||||
if (M == 1) {
|
||||
free_alloc(t);
|
||||
free_alloc(f);
|
||||
free_alloc(bo);
|
||||
free_alloc(c0);
|
||||
free_alloc(h0);
|
||||
fzf_result_t res = {(int32_t)max_score_pos, (int32_t)max_score_pos + 1,
|
||||
max_score};
|
||||
append_pos(pos, max_score_pos);
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t f0 = (size_t)f.data[0];
|
||||
size_t width = last_idx - f0 + 1;
|
||||
fzf_i16_t h = alloc16(&offset16, slab, width * M);
|
||||
{
|
||||
i16_slice_t h0_tmp_slice = slice_i16(h0.data, f0, last_idx + 1);
|
||||
copy_into_i16(&h0_tmp_slice, &h);
|
||||
}
|
||||
|
||||
fzf_i16_t c = alloc16(&offset16, slab, width * M);
|
||||
{
|
||||
i16_slice_t c0_tmp_slice = slice_i16(c0.data, f0, last_idx + 1);
|
||||
copy_into_i16(&c0_tmp_slice, &c);
|
||||
}
|
||||
|
||||
i32_slice_t f_sub = slice_i32(f.data, 1, f.size);
|
||||
str_slice_t p_sub =
|
||||
slice_str_right(slice_str(pattern->data, 1, M).data, f_sub.size);
|
||||
for (size_t off = 0; off < f_sub.size; off++) {
|
||||
size_t f = (size_t)f_sub.data[off];
|
||||
pchar = p_sub.data[off];
|
||||
pidx = off + 1;
|
||||
size_t row = pidx * width;
|
||||
in_gap = false;
|
||||
t_sub = slice_i32(t.data, f, last_idx + 1);
|
||||
b_sub = slice_i16_right(slice_i16(bo.data, f, bo.size).data, t_sub.size);
|
||||
i16_slice_t c_sub = slice_i16_right(
|
||||
slice_i16(c.data, row + f - f0, c.size).data, t_sub.size);
|
||||
i16_slice_t c_diag = slice_i16_right(
|
||||
slice_i16(c.data, row + f - f0 - 1 - width, c.size).data, t_sub.size);
|
||||
i16_slice_t h_sub = slice_i16_right(
|
||||
slice_i16(h.data, row + f - f0, h.size).data, t_sub.size);
|
||||
i16_slice_t h_diag = slice_i16_right(
|
||||
slice_i16(h.data, row + f - f0 - 1 - width, h.size).data, t_sub.size);
|
||||
i16_slice_t h_left = slice_i16_right(
|
||||
slice_i16(h.data, row + f - f0 - 1, h.size).data, t_sub.size);
|
||||
h_left.data[0] = 0;
|
||||
for (size_t j = 0; j < t_sub.size; j++) {
|
||||
char ch = (char)t_sub.data[j];
|
||||
size_t col = j + f;
|
||||
int16_t s1 = 0;
|
||||
int16_t s2 = 0;
|
||||
int16_t consecutive = 0;
|
||||
|
||||
if (in_gap) {
|
||||
s2 = h_left.data[j] + ScoreGapExtention;
|
||||
} else {
|
||||
s2 = h_left.data[j] + ScoreGapStart;
|
||||
}
|
||||
|
||||
if (pchar == ch) {
|
||||
s1 = h_diag.data[j] + ScoreMatch;
|
||||
int16_t b = b_sub.data[j];
|
||||
consecutive = c_diag.data[j] + 1;
|
||||
if (b == BonusBoundary) {
|
||||
consecutive = 1;
|
||||
} else if (consecutive > 1) {
|
||||
b = max16(b, max16(BonusConsecutive,
|
||||
bo.data[col - ((size_t)consecutive) + 1]));
|
||||
}
|
||||
if (s1 + b < s2) {
|
||||
s1 += b_sub.data[j];
|
||||
consecutive = 0;
|
||||
} else {
|
||||
s1 += b;
|
||||
}
|
||||
}
|
||||
c_sub.data[j] = consecutive;
|
||||
in_gap = s1 < s2;
|
||||
int16_t score = max16(max16(s1, s2), 0);
|
||||
if (pidx == M - 1 && (score > max_score)) {
|
||||
max_score = score;
|
||||
max_score_pos = col;
|
||||
}
|
||||
h_sub.data[j] = score;
|
||||
}
|
||||
}
|
||||
|
||||
resize_pos(pos, M, M);
|
||||
size_t j = max_score_pos;
|
||||
if (pos) {
|
||||
size_t i = M - 1;
|
||||
bool prefer_match = true;
|
||||
for (;;) {
|
||||
size_t ii = i * width;
|
||||
size_t j0 = j - f0;
|
||||
int16_t s = h.data[ii + j0];
|
||||
|
||||
int16_t s1 = 0;
|
||||
int16_t s2 = 0;
|
||||
if (i > 0 && j >= f.data[i]) {
|
||||
s1 = h.data[ii - width + j0 - 1];
|
||||
}
|
||||
if (j > f.data[i]) {
|
||||
s2 = h.data[ii + j0 - 1];
|
||||
}
|
||||
|
||||
if (s > s1 && (s > s2 || (s == s2 && prefer_match))) {
|
||||
unsafe_append_pos(pos, j);
|
||||
if (i == 0) {
|
||||
break;
|
||||
}
|
||||
i--;
|
||||
}
|
||||
prefer_match = c.data[ii + j0] > 1 || (ii + width + j0 + 1 < c.size &&
|
||||
c.data[ii + width + j0 + 1] > 0);
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
free_alloc(h);
|
||||
free_alloc(c);
|
||||
free_alloc(t);
|
||||
free_alloc(f);
|
||||
free_alloc(bo);
|
||||
free_alloc(c0);
|
||||
free_alloc(h0);
|
||||
return (fzf_result_t){(int32_t)j, (int32_t)max_score_pos + 1,
|
||||
(int32_t)max_score};
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ impl Char for AsciiChar {
|
||||
CharClass::Number
|
||||
} else if c.is_ascii_whitespace() {
|
||||
CharClass::Whitespace
|
||||
} else if config.delimeter_chars.contains(&c) {
|
||||
} else if config.delimiter_chars.contains(&c) {
|
||||
CharClass::Delimiter
|
||||
} else {
|
||||
CharClass::NonWord
|
||||
|
@ -3,16 +3,16 @@ use crate::score::BONUS_BOUNDARY;
|
||||
|
||||
#[non_exhaustive]
|
||||
pub struct MatcherConfig {
|
||||
pub delimeter_chars: &'static [u8],
|
||||
pub delimiter_chars: &'static [u8],
|
||||
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
||||
pub bonus_boundary_white: u16,
|
||||
|
||||
// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
||||
pub bonus_boundary_delimiter: u16,
|
||||
pub inital_char_class: CharClass,
|
||||
/// Whether to normalize latin script charaters to ASCII
|
||||
pub initial_char_class: CharClass,
|
||||
/// Whether to normalize latin script characters to ASCII
|
||||
/// this significantly degrades performance so its not recommended
|
||||
/// to be truned on by default
|
||||
/// to be turned on by default
|
||||
pub normalize: bool,
|
||||
/// whether to ignore casing
|
||||
pub ignore_case: bool,
|
||||
@ -29,10 +29,10 @@ pub struct MatcherConfig {
|
||||
impl MatcherConfig {
|
||||
pub const DEFAULT: Self = {
|
||||
MatcherConfig {
|
||||
delimeter_chars: b"/,:;|",
|
||||
delimiter_chars: b"/,:;|",
|
||||
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
||||
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
||||
inital_char_class: CharClass::Whitespace,
|
||||
initial_char_class: CharClass::Whitespace,
|
||||
normalize: false,
|
||||
ignore_case: true,
|
||||
}
|
||||
@ -42,22 +42,22 @@ impl MatcherConfig {
|
||||
impl MatcherConfig {
|
||||
pub fn set_match_paths(&mut self) {
|
||||
if cfg!(windows) {
|
||||
self.delimeter_chars = b"/\\";
|
||||
self.delimiter_chars = b"/\\";
|
||||
} else {
|
||||
self.delimeter_chars = b"/";
|
||||
self.delimiter_chars = b"/";
|
||||
}
|
||||
self.bonus_boundary_white = BONUS_BOUNDARY;
|
||||
self.inital_char_class = CharClass::Delimiter;
|
||||
self.initial_char_class = CharClass::Delimiter;
|
||||
}
|
||||
|
||||
pub const fn match_paths(mut self) -> Self {
|
||||
if cfg!(windows) {
|
||||
self.delimeter_chars = b"/\\";
|
||||
self.delimiter_chars = b"/\\";
|
||||
} else {
|
||||
self.delimeter_chars = b"/";
|
||||
self.delimiter_chars = b"/";
|
||||
}
|
||||
self.bonus_boundary_white = BONUS_BOUNDARY;
|
||||
self.inital_char_class = CharClass::Delimiter;
|
||||
self.initial_char_class = CharClass::Delimiter;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
@ -2,15 +2,15 @@ use crate::chars::Char;
|
||||
use crate::Matcher;
|
||||
|
||||
impl Matcher {
|
||||
/// greedy fallback algoritm, much faster (linear time) but reported scores/indicies
|
||||
/// greedy fallback algorithm, much faster (linear time) but reported scores/indicies
|
||||
/// might not be the best match
|
||||
pub(crate) fn fuzzy_match_greedy<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
pub(crate) fn fuzzy_match_greedy<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
&mut self,
|
||||
haystack: &[H],
|
||||
needle: &[N],
|
||||
mut start: usize,
|
||||
mut end: usize,
|
||||
indicies: &mut Vec<u32>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
let first_char_end = if H::ASCII { start + 1 } else { end };
|
||||
if !H::ASCII && needle.len() != 1 {
|
||||
@ -27,7 +27,7 @@ impl Matcher {
|
||||
}
|
||||
}
|
||||
}
|
||||
// mimimize the greedly match by greedy matching in reverse
|
||||
// minimize the greedly match by greedy matching in reverse
|
||||
|
||||
let mut needle_iter = needle.iter().rev().copied();
|
||||
let mut needle_char = needle_iter.next().unwrap();
|
||||
@ -40,6 +40,6 @@ impl Matcher {
|
||||
needle_char = next_needle_char;
|
||||
}
|
||||
}
|
||||
Some(self.calculate_score::<INDICIES, H, N>(haystack, needle, start, end, indicies))
|
||||
Some(self.calculate_score::<INDICES, H, N>(haystack, needle, start, end, indices))
|
||||
}
|
||||
}
|
||||
|
@ -10,39 +10,39 @@ use crate::score::{
|
||||
use crate::{Matcher, MatcherConfig};
|
||||
|
||||
impl Matcher {
|
||||
pub(crate) fn fuzzy_match_optimal<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
pub(crate) fn fuzzy_match_optimal<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
&mut self,
|
||||
haystack: &[H],
|
||||
needle: &[N],
|
||||
start: usize,
|
||||
greedy_end: usize,
|
||||
end: usize,
|
||||
indicies: &mut Vec<u32>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
// construct a matrix (and copy the haystack), the matrix and haystack size are bounded
|
||||
// to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
|
||||
// us to treat needle indecies as u16
|
||||
// us to treat needle indices as u16
|
||||
let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else {
|
||||
return self.fuzzy_match_greedy::<INDICIES, H, N>(
|
||||
return self.fuzzy_match_greedy::<INDICES, H, N>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
greedy_end,
|
||||
indicies,
|
||||
indices,
|
||||
);
|
||||
};
|
||||
|
||||
let prev_class = start
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.inital_char_class);
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
|
||||
// this only happend with unicode haystacks, for ASCII the prefilter handles all rejects
|
||||
// this only happened with unicode haystacks, for ASCII the prefilter handles all rejects
|
||||
if !matched {
|
||||
return None;
|
||||
}
|
||||
if needle.len() == 1 {
|
||||
indicies.push(max_score_pos as u32);
|
||||
indices.push(max_score_pos as u32);
|
||||
return Some(max_score);
|
||||
}
|
||||
debug_assert_eq!(
|
||||
@ -52,8 +52,8 @@ impl Matcher {
|
||||
|
||||
// populate the matrix and find the best score
|
||||
let (max_score, best_match_end) = matrix.populate_matrix(needle);
|
||||
if INDICIES {
|
||||
matrix.reconstruct_optimal_path(needle, start as u32, indicies, best_match_end);
|
||||
if INDICES {
|
||||
matrix.reconstruct_optimal_path(needle, start as u32, indices, best_match_end);
|
||||
}
|
||||
Some(max_score)
|
||||
}
|
||||
@ -224,12 +224,12 @@ impl<H: Char> Matrix<'_, H> {
|
||||
&self,
|
||||
needle: &[N],
|
||||
start: u32,
|
||||
indicies: &mut Vec<u32>,
|
||||
indices: &mut Vec<u32>,
|
||||
best_match_end: u16,
|
||||
) {
|
||||
indicies.resize(needle.len(), 0);
|
||||
indices.resize(needle.len(), 0);
|
||||
|
||||
let mut row_iter = self.rows_rev().zip(indicies.iter_mut().rev()).peekable();
|
||||
let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable();
|
||||
let (mut row, mut matched_col_idx) = row_iter.next().unwrap();
|
||||
let mut next_row: Option<MatrixRow> = None;
|
||||
let mut col = best_match_end;
|
||||
|
24
src/lib.rs
24
src/lib.rs
@ -1,4 +1,4 @@
|
||||
// sadly this doens't optmimzie well currently
|
||||
// sadly ranges don't optmimzie well
|
||||
#![allow(clippy::manual_range_contains)]
|
||||
|
||||
mod chars;
|
||||
@ -63,7 +63,7 @@ impl Matcher {
|
||||
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
pub fn fuzzy_indicies(
|
||||
pub fn fuzzy_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
@ -73,7 +73,7 @@ impl Matcher {
|
||||
self.fuzzy_matcher_impl::<true>(haystack, needle, indidies)
|
||||
}
|
||||
|
||||
fn fuzzy_matcher_impl<const INDICIES: bool>(
|
||||
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
@ -92,7 +92,7 @@ impl Matcher {
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle)?;
|
||||
self.fuzzy_match_optimal::<INDICIES, AsciiChar, AsciiChar>(
|
||||
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
@ -108,7 +108,7 @@ impl Matcher {
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
||||
self.fuzzy_match_optimal::<INDICIES, char, AsciiChar>(
|
||||
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
@ -119,7 +119,7 @@ impl Matcher {
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
|
||||
self.fuzzy_match_optimal::<INDICIES, char, char>(
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
@ -131,11 +131,11 @@ impl Matcher {
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn fuzzy_indicies(
|
||||
// pub fn fuzzy_indices(
|
||||
// &mut self,
|
||||
// query: &Query,
|
||||
// mut haystack: Utf32Str<'_>,
|
||||
// indicies: &mut Vec<u32>,
|
||||
// indices: &mut Vec<u32>,
|
||||
// ) -> Option<u16> {
|
||||
// if haystack.len() > u32::MAX as usize {
|
||||
// haystack = &haystack[..u32::MAX as usize]
|
||||
@ -146,14 +146,14 @@ impl Matcher {
|
||||
// );
|
||||
// if self.config.use_v1 {
|
||||
// if query.is_ascii && !self.config.normalize {
|
||||
// self.fuzzy_matcher_v1::<true, true>(query, haystack, indicies)
|
||||
// self.fuzzy_matcher_v1::<true, true>(query, haystack, indices)
|
||||
// } else {
|
||||
// self.fuzzy_matcher_v1::<true, false>(query, haystack, indicies)
|
||||
// self.fuzzy_matcher_v1::<true, false>(query, haystack, indices)
|
||||
// }
|
||||
// } else if query.is_ascii && !self.config.normalize {
|
||||
// self.fuzzy_matcher_v2::<true, true>(query, haystack, indicies)
|
||||
// self.fuzzy_matcher_v2::<true, true>(query, haystack, indices)
|
||||
// } else {
|
||||
// self.fuzzy_matcher_v2::<true, false>(query, haystack, indicies)
|
||||
// self.fuzzy_matcher_v2::<true, false>(query, haystack, indices)
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
@ -155,7 +155,7 @@ where
|
||||
|
||||
pub(crate) struct Matrix<'a, C: Char> {
|
||||
pub haystack: &'a mut [C],
|
||||
// stored as a seperate array instead of struct
|
||||
// stored as a separate array instead of struct
|
||||
// to avoid padding sine char is too large and u8 too small :/
|
||||
pub bonus: &'a mut [u16],
|
||||
pub row_offs: &'a mut [u16],
|
||||
@ -264,10 +264,10 @@ impl MatrixSlab {
|
||||
return None;
|
||||
}
|
||||
unsafe {
|
||||
// safetly: this allocation is valid for MATRIX_ALLOC_LAYOUT
|
||||
// safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
|
||||
let (haystack, bonus, rows, cells) = matrix_layout.fieds_from_ptr(self.0);
|
||||
// copy haystack before creating refernces to ensure we donu't crate
|
||||
// refrences to invalid chars (which may or may not be UB)
|
||||
// copy haystack before creating references to ensure we donu't crate
|
||||
// references to invalid chars (which may or may not be UB)
|
||||
haystack_
|
||||
.as_ptr()
|
||||
.copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
|
||||
|
18
src/score.rs
18
src/score.rs
@ -69,22 +69,22 @@ impl Matcher {
|
||||
self.config.bonus_for(prev_class, class)
|
||||
}
|
||||
|
||||
pub(crate) fn calculate_score<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
pub(crate) fn calculate_score<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||
&mut self,
|
||||
haystack: &[H],
|
||||
needle: &[N],
|
||||
start: usize,
|
||||
end: usize,
|
||||
indicies: &mut Vec<u32>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> u16 {
|
||||
if INDICIES {
|
||||
indicies.reserve(needle.len());
|
||||
if INDICES {
|
||||
indices.reserve(needle.len());
|
||||
}
|
||||
|
||||
let mut prev_class = start
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.inital_char_class);
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let mut needle_iter = needle.iter();
|
||||
let mut needle_char = *needle_iter.next().unwrap();
|
||||
|
||||
@ -92,8 +92,8 @@ impl Matcher {
|
||||
let mut consecutive = 1;
|
||||
|
||||
// unrolled the firs iteration to make applying the first char multiplier less akward
|
||||
if INDICIES {
|
||||
indicies.push(start as u32)
|
||||
if INDICES {
|
||||
indices.push(start as u32)
|
||||
}
|
||||
let mut first_bonus = self.bonus_for(prev_class, haystack[0].char_class(&self.config));
|
||||
let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||
@ -102,8 +102,8 @@ impl Matcher {
|
||||
let class = c.char_class(&self.config);
|
||||
let c = c.normalize(&self.config);
|
||||
if c == needle_char {
|
||||
if INDICIES {
|
||||
indicies.push(i as u32 + start as u32)
|
||||
if INDICES {
|
||||
indices.push(i as u32 + start as u32)
|
||||
}
|
||||
let mut bonus = self.bonus_for(prev_class, class);
|
||||
if consecutive == 0 {
|
||||
|
10
src/tests.rs
10
src/tests.rs
@ -22,7 +22,7 @@ pub fn assert_matches(
|
||||
config.set_match_paths();
|
||||
}
|
||||
let mut matcher = Matcher::new(config);
|
||||
let mut indicies = Vec::new();
|
||||
let mut indices = Vec::new();
|
||||
let mut needle_buf = Vec::new();
|
||||
let mut haystack_buf = Vec::new();
|
||||
for &(haystack, needle, start, end, mut score) in cases {
|
||||
@ -35,8 +35,8 @@ pub fn assert_matches(
|
||||
let haystack = Utf32Str::new(haystack, &mut haystack_buf);
|
||||
score += needle.len() as u16 * SCORE_MATCH;
|
||||
|
||||
let res = matcher.fuzzy_indicies(haystack, needle, &mut indicies);
|
||||
let match_chars: Vec<_> = indicies
|
||||
let res = matcher.fuzzy_indices(haystack, needle, &mut indices);
|
||||
let match_chars: Vec<_> = indices
|
||||
.iter()
|
||||
.map(|&i| haystack.get(i).normalize(&matcher.config))
|
||||
.collect();
|
||||
@ -47,9 +47,9 @@ pub fn assert_matches(
|
||||
Some(score),
|
||||
"{needle:?} did not match {haystack:?}: {match_chars:?}"
|
||||
);
|
||||
assert_eq!(match_chars, needle_chars, "match indicies are incorrect");
|
||||
assert_eq!(match_chars, needle_chars, "match indices are incorrect");
|
||||
assert_eq!(
|
||||
indicies.first().copied()..indicies.last().map(|&i| i + 1),
|
||||
indices.first().copied()..indices.last().map(|&i| i + 1),
|
||||
Some(start)..Some(end),
|
||||
"{needle:?} match {haystack:?}[{start}..{end}]"
|
||||
);
|
||||
|
@ -13,7 +13,7 @@ use std::slice;
|
||||
/// matching itself. Furthermore there are a lot of exta optimizations available
|
||||
/// for ascii only text (but checking during each match has too much overhead).
|
||||
///
|
||||
/// Ofcourse this comes at exta memory cost as we usally still need the ut8
|
||||
/// Ofcourse this comes at exta memory cost as we usually still need the ut8
|
||||
/// encoded variant for rendenring. In the (dominant) case of ascii-only text
|
||||
/// we don't require a copy. Furthermore fuzzy matching usually is applied while
|
||||
/// the user is typing on the fly so the same item is potentially matched many
|
||||
@ -24,13 +24,13 @@ use std::slice;
|
||||
/// char buffer around that is filled with the presegmented chars
|
||||
///
|
||||
/// Another advantage of this approach is that the matcher will naturally
|
||||
/// produce char indecies (instead of utf8 offsets) annyway. With a
|
||||
/// codepoint basec representation like this the indecies can be used
|
||||
/// produce char indices (instead of utf8 offsets) annyway. With a
|
||||
/// codepoint basec representation like this the indices can be used
|
||||
/// directly
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug)]
|
||||
pub enum Utf32Str<'a> {
|
||||
/// A string represented as ASCII encoded bytes.
|
||||
/// Correctness invariant: must only contain vaild ASCII (<=127)
|
||||
/// Correctness invariant: must only contain valid ASCII (<=127)
|
||||
Ascii(&'a [u8]),
|
||||
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
||||
Unicode(&'a [char]),
|
||||
@ -75,7 +75,7 @@ impl<'a> Utf32Str<'a> {
|
||||
}
|
||||
|
||||
/// Same as `slice` but accepts a u32 range for convenicene sine
|
||||
/// those are the indecies returned by the matcher
|
||||
/// those are the indices returned by the matcher
|
||||
#[inline]
|
||||
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
||||
let start = match range.start_bound() {
|
||||
|
3
typos.toml
Normal file
3
typos.toml
Normal file
@ -0,0 +1,3 @@
|
||||
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||
[files]
|
||||
extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"]
|
Loading…
x
Reference in New Issue
Block a user