diff --git a/foo.c b/foo.c deleted file mode 100644 index 757385f..0000000 --- a/foo.c +++ /dev/null @@ -1,245 +0,0 @@ - -fzf_result_t fzf_fuzzy_match_v2(bool case_sensitive, bool normalize, - fzf_string_t *text, fzf_string_t *pattern, - fzf_position_t *pos, fzf_slab_t *slab) { - const size_t M = pattern->size; - const size_t N = text->size; - if (M == 0) { - return (fzf_result_t){0, 0, 0}; - } - if (slab != NULL && N * M > slab->I16.cap) { - return fzf_fuzzy_match_v1(case_sensitive, normalize, text, pattern, pos, - slab); - } - - size_t idx; - { - int32_t tmp_idx = ascii_fuzzy_index(text, pattern->data, M, case_sensitive); - if (tmp_idx < 0) { - return (fzf_result_t){-1, -1, 0}; - } - idx = (size_t)tmp_idx; - } - - size_t offset16 = 0; - size_t offset32 = 0; - - fzf_i16_t h0 = alloc16(&offset16, slab, N); - fzf_i16_t c0 = alloc16(&offset16, slab, N); - // Bonus point for each positions - fzf_i16_t bo = alloc16(&offset16, slab, N); - // The first occurrence of each character in the pattern - fzf_i32_t f = alloc32(&offset32, slab, M); - // Rune array - fzf_i32_t t = alloc32(&offset32, slab, N); - copy_runes(text, &t); // input.CopyRunes(T) - - // Phase 2. Calculate bonus for each point - int16_t max_score = 0; - size_t max_score_pos = 0; - - size_t pidx = 0; - size_t last_idx = 0; - - char pchar0 = pattern->data[0]; - char pchar = pattern->data[0]; - int16_t prev_h0 = 0; - int32_t prev_class = CharNonWord; - bool in_gap = false; - - i32_slice_t t_sub = slice_i32(t.data, idx, t.size); // T[idx:]; - i16_slice_t h0_sub = - slice_i16_right(slice_i16(h0.data, idx, h0.size).data, t_sub.size); - i16_slice_t c0_sub = - slice_i16_right(slice_i16(c0.data, idx, c0.size).data, t_sub.size); - i16_slice_t b_sub = - slice_i16_right(slice_i16(bo.data, idx, bo.size).data, t_sub.size); - - for (size_t off = 0; off < t_sub.size; off++) { - char_class class; - char c = (char)t_sub.data[off]; - class = char_class_of_ascii(c); - if (!case_sensitive && class == CharUpper) { - /* TODO(conni2461): unicode support */ - c = (char)tolower((uint8_t)c); - } - if (normalize) { - c = normalize_rune(c); - } - - t_sub.data[off] = (uint8_t)c; - int16_t bonus = bonus_for(prev_class, class); - b_sub.data[off] = bonus; - prev_class = class; - if (c == pchar) { - if (pidx < M) { - f.data[pidx] = (int32_t)(idx + off); - pidx++; - pchar = pattern->data[min64u(pidx, M - 1)]; - } - last_idx = idx + off; - } - - if (c == pchar0) { - int16_t score = ScoreMatch + bonus * BonusFirstCharMultiplier; - h0_sub.data[off] = score; - c0_sub.data[off] = 1; - if (M == 1 && (score > max_score)) { - max_score = score; - max_score_pos = idx + off; - if (bonus == BonusBoundary) { - break; - } - } - in_gap = false; - } else { - if (in_gap) { - h0_sub.data[off] = max16(prev_h0 + ScoreGapExtention, 0); - } else { - h0_sub.data[off] = max16(prev_h0 + ScoreGapStart, 0); - } - c0_sub.data[off] = 0; - in_gap = true; - } - prev_h0 = h0_sub.data[off]; - } - if (pidx != M) { - free_alloc(t); - free_alloc(f); - free_alloc(bo); - free_alloc(c0); - free_alloc(h0); - return (fzf_result_t){-1, -1, 0}; - } - if (M == 1) { - free_alloc(t); - free_alloc(f); - free_alloc(bo); - free_alloc(c0); - free_alloc(h0); - fzf_result_t res = {(int32_t)max_score_pos, (int32_t)max_score_pos + 1, - max_score}; - append_pos(pos, max_score_pos); - return res; - } - - size_t f0 = (size_t)f.data[0]; - size_t width = last_idx - f0 + 1; - fzf_i16_t h = alloc16(&offset16, slab, width * M); - { - i16_slice_t h0_tmp_slice = slice_i16(h0.data, f0, last_idx + 1); - copy_into_i16(&h0_tmp_slice, &h); - } - - fzf_i16_t c = alloc16(&offset16, slab, width * M); - { - i16_slice_t c0_tmp_slice = slice_i16(c0.data, f0, last_idx + 1); - copy_into_i16(&c0_tmp_slice, &c); - } - - i32_slice_t f_sub = slice_i32(f.data, 1, f.size); - str_slice_t p_sub = - slice_str_right(slice_str(pattern->data, 1, M).data, f_sub.size); - for (size_t off = 0; off < f_sub.size; off++) { - size_t f = (size_t)f_sub.data[off]; - pchar = p_sub.data[off]; - pidx = off + 1; - size_t row = pidx * width; - in_gap = false; - t_sub = slice_i32(t.data, f, last_idx + 1); - b_sub = slice_i16_right(slice_i16(bo.data, f, bo.size).data, t_sub.size); - i16_slice_t c_sub = slice_i16_right( - slice_i16(c.data, row + f - f0, c.size).data, t_sub.size); - i16_slice_t c_diag = slice_i16_right( - slice_i16(c.data, row + f - f0 - 1 - width, c.size).data, t_sub.size); - i16_slice_t h_sub = slice_i16_right( - slice_i16(h.data, row + f - f0, h.size).data, t_sub.size); - i16_slice_t h_diag = slice_i16_right( - slice_i16(h.data, row + f - f0 - 1 - width, h.size).data, t_sub.size); - i16_slice_t h_left = slice_i16_right( - slice_i16(h.data, row + f - f0 - 1, h.size).data, t_sub.size); - h_left.data[0] = 0; - for (size_t j = 0; j < t_sub.size; j++) { - char ch = (char)t_sub.data[j]; - size_t col = j + f; - int16_t s1 = 0; - int16_t s2 = 0; - int16_t consecutive = 0; - - if (in_gap) { - s2 = h_left.data[j] + ScoreGapExtention; - } else { - s2 = h_left.data[j] + ScoreGapStart; - } - - if (pchar == ch) { - s1 = h_diag.data[j] + ScoreMatch; - int16_t b = b_sub.data[j]; - consecutive = c_diag.data[j] + 1; - if (b == BonusBoundary) { - consecutive = 1; - } else if (consecutive > 1) { - b = max16(b, max16(BonusConsecutive, - bo.data[col - ((size_t)consecutive) + 1])); - } - if (s1 + b < s2) { - s1 += b_sub.data[j]; - consecutive = 0; - } else { - s1 += b; - } - } - c_sub.data[j] = consecutive; - in_gap = s1 < s2; - int16_t score = max16(max16(s1, s2), 0); - if (pidx == M - 1 && (score > max_score)) { - max_score = score; - max_score_pos = col; - } - h_sub.data[j] = score; - } - } - - resize_pos(pos, M, M); - size_t j = max_score_pos; - if (pos) { - size_t i = M - 1; - bool prefer_match = true; - for (;;) { - size_t ii = i * width; - size_t j0 = j - f0; - int16_t s = h.data[ii + j0]; - - int16_t s1 = 0; - int16_t s2 = 0; - if (i > 0 && j >= f.data[i]) { - s1 = h.data[ii - width + j0 - 1]; - } - if (j > f.data[i]) { - s2 = h.data[ii + j0 - 1]; - } - - if (s > s1 && (s > s2 || (s == s2 && prefer_match))) { - unsafe_append_pos(pos, j); - if (i == 0) { - break; - } - i--; - } - prefer_match = c.data[ii + j0] > 1 || (ii + width + j0 + 1 < c.size && - c.data[ii + width + j0 + 1] > 0); - j--; - } - } - - free_alloc(h); - free_alloc(c); - free_alloc(t); - free_alloc(f); - free_alloc(bo); - free_alloc(c0); - free_alloc(h0); - return (fzf_result_t){(int32_t)j, (int32_t)max_score_pos + 1, - (int32_t)max_score}; -} - diff --git a/src/chars.rs b/src/chars.rs index a3d6f50..a677355 100644 --- a/src/chars.rs +++ b/src/chars.rs @@ -64,7 +64,7 @@ impl Char for AsciiChar { CharClass::Number } else if c.is_ascii_whitespace() { CharClass::Whitespace - } else if config.delimeter_chars.contains(&c) { + } else if config.delimiter_chars.contains(&c) { CharClass::Delimiter } else { CharClass::NonWord diff --git a/src/config.rs b/src/config.rs index 41d12b4..85dfdc1 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,16 +3,16 @@ use crate::score::BONUS_BOUNDARY; #[non_exhaustive] pub struct MatcherConfig { - pub delimeter_chars: &'static [u8], + pub delimiter_chars: &'static [u8], /// Extra bonus for word boundary after whitespace character or beginning of the string pub bonus_boundary_white: u16, // Extra bonus for word boundary after slash, colon, semi-colon, and comma pub bonus_boundary_delimiter: u16, - pub inital_char_class: CharClass, - /// Whether to normalize latin script charaters to ASCII + pub initial_char_class: CharClass, + /// Whether to normalize latin script characters to ASCII /// this significantly degrades performance so its not recommended - /// to be truned on by default + /// to be turned on by default pub normalize: bool, /// whether to ignore casing pub ignore_case: bool, @@ -29,10 +29,10 @@ pub struct MatcherConfig { impl MatcherConfig { pub const DEFAULT: Self = { MatcherConfig { - delimeter_chars: b"/,:;|", + delimiter_chars: b"/,:;|", bonus_boundary_white: BONUS_BOUNDARY + 2, bonus_boundary_delimiter: BONUS_BOUNDARY + 1, - inital_char_class: CharClass::Whitespace, + initial_char_class: CharClass::Whitespace, normalize: false, ignore_case: true, } @@ -42,22 +42,22 @@ impl MatcherConfig { impl MatcherConfig { pub fn set_match_paths(&mut self) { if cfg!(windows) { - self.delimeter_chars = b"/\\"; + self.delimiter_chars = b"/\\"; } else { - self.delimeter_chars = b"/"; + self.delimiter_chars = b"/"; } self.bonus_boundary_white = BONUS_BOUNDARY; - self.inital_char_class = CharClass::Delimiter; + self.initial_char_class = CharClass::Delimiter; } pub const fn match_paths(mut self) -> Self { if cfg!(windows) { - self.delimeter_chars = b"/\\"; + self.delimiter_chars = b"/\\"; } else { - self.delimeter_chars = b"/"; + self.delimiter_chars = b"/"; } self.bonus_boundary_white = BONUS_BOUNDARY; - self.inital_char_class = CharClass::Delimiter; + self.initial_char_class = CharClass::Delimiter; self } } diff --git a/src/fuzzy_greedy.rs b/src/fuzzy_greedy.rs index 3dcd6d1..80dbce8 100644 --- a/src/fuzzy_greedy.rs +++ b/src/fuzzy_greedy.rs @@ -2,15 +2,15 @@ use crate::chars::Char; use crate::Matcher; impl Matcher { - /// greedy fallback algoritm, much faster (linear time) but reported scores/indicies + /// greedy fallback algorithm, much faster (linear time) but reported scores/indicies /// might not be the best match - pub(crate) fn fuzzy_match_greedy, N: Char>( + pub(crate) fn fuzzy_match_greedy, N: Char>( &mut self, haystack: &[H], needle: &[N], mut start: usize, mut end: usize, - indicies: &mut Vec, + indices: &mut Vec, ) -> Option { let first_char_end = if H::ASCII { start + 1 } else { end }; if !H::ASCII && needle.len() != 1 { @@ -27,7 +27,7 @@ impl Matcher { } } } - // mimimize the greedly match by greedy matching in reverse + // minimize the greedly match by greedy matching in reverse let mut needle_iter = needle.iter().rev().copied(); let mut needle_char = needle_iter.next().unwrap(); @@ -40,6 +40,6 @@ impl Matcher { needle_char = next_needle_char; } } - Some(self.calculate_score::(haystack, needle, start, end, indicies)) + Some(self.calculate_score::(haystack, needle, start, end, indices)) } } diff --git a/src/fuzzy_optimal.rs b/src/fuzzy_optimal.rs index 27d7840..188d6cd 100644 --- a/src/fuzzy_optimal.rs +++ b/src/fuzzy_optimal.rs @@ -10,39 +10,39 @@ use crate::score::{ use crate::{Matcher, MatcherConfig}; impl Matcher { - pub(crate) fn fuzzy_match_optimal, N: Char>( + pub(crate) fn fuzzy_match_optimal, N: Char>( &mut self, haystack: &[H], needle: &[N], start: usize, greedy_end: usize, end: usize, - indicies: &mut Vec, + indices: &mut Vec, ) -> Option { // construct a matrix (and copy the haystack), the matrix and haystack size are bounded // to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows - // us to treat needle indecies as u16 + // us to treat needle indices as u16 let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else { - return self.fuzzy_match_greedy::( + return self.fuzzy_match_greedy::( haystack, needle, start, greedy_end, - indicies, + indices, ); }; let prev_class = start .checked_sub(1) .map(|i| haystack[i].char_class(&self.config)) - .unwrap_or(self.config.inital_char_class); + .unwrap_or(self.config.initial_char_class); let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config); - // this only happend with unicode haystacks, for ASCII the prefilter handles all rejects + // this only happened with unicode haystacks, for ASCII the prefilter handles all rejects if !matched { return None; } if needle.len() == 1 { - indicies.push(max_score_pos as u32); + indices.push(max_score_pos as u32); return Some(max_score); } debug_assert_eq!( @@ -52,8 +52,8 @@ impl Matcher { // populate the matrix and find the best score let (max_score, best_match_end) = matrix.populate_matrix(needle); - if INDICIES { - matrix.reconstruct_optimal_path(needle, start as u32, indicies, best_match_end); + if INDICES { + matrix.reconstruct_optimal_path(needle, start as u32, indices, best_match_end); } Some(max_score) } @@ -224,12 +224,12 @@ impl Matrix<'_, H> { &self, needle: &[N], start: u32, - indicies: &mut Vec, + indices: &mut Vec, best_match_end: u16, ) { - indicies.resize(needle.len(), 0); + indices.resize(needle.len(), 0); - let mut row_iter = self.rows_rev().zip(indicies.iter_mut().rev()).peekable(); + let mut row_iter = self.rows_rev().zip(indices.iter_mut().rev()).peekable(); let (mut row, mut matched_col_idx) = row_iter.next().unwrap(); let mut next_row: Option = None; let mut col = best_match_end; diff --git a/src/lib.rs b/src/lib.rs index 9ae70a6..e3a3627 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -// sadly this doens't optmimzie well currently +// sadly ranges don't optmimzie well #![allow(clippy::manual_range_contains)] mod chars; @@ -63,7 +63,7 @@ impl Matcher { self.fuzzy_matcher_impl::(haystack, needle, &mut Vec::new()) } - pub fn fuzzy_indicies( + pub fn fuzzy_indices( &mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>, @@ -73,7 +73,7 @@ impl Matcher { self.fuzzy_matcher_impl::(haystack, needle, indidies) } - fn fuzzy_matcher_impl( + fn fuzzy_matcher_impl( &mut self, haystack: Utf32Str<'_>, needle_: Utf32Str<'_>, @@ -92,7 +92,7 @@ impl Matcher { match (haystack, needle_) { (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => { let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle)?; - self.fuzzy_match_optimal::( + self.fuzzy_match_optimal::( AsciiChar::cast(haystack), AsciiChar::cast(needle), start, @@ -108,7 +108,7 @@ impl Matcher { } (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => { let (start, end) = self.prefilter_non_ascii(haystack, needle_)?; - self.fuzzy_match_optimal::( + self.fuzzy_match_optimal::( haystack, AsciiChar::cast(needle), start, @@ -119,7 +119,7 @@ impl Matcher { } (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => { let (start, end) = self.prefilter_non_ascii(haystack, needle_)?; - self.fuzzy_match_optimal::( + self.fuzzy_match_optimal::( haystack, needle, start, @@ -131,11 +131,11 @@ impl Matcher { } } - // pub fn fuzzy_indicies( + // pub fn fuzzy_indices( // &mut self, // query: &Query, // mut haystack: Utf32Str<'_>, - // indicies: &mut Vec, + // indices: &mut Vec, // ) -> Option { // if haystack.len() > u32::MAX as usize { // haystack = &haystack[..u32::MAX as usize] @@ -146,14 +146,14 @@ impl Matcher { // ); // if self.config.use_v1 { // if query.is_ascii && !self.config.normalize { - // self.fuzzy_matcher_v1::(query, haystack, indicies) + // self.fuzzy_matcher_v1::(query, haystack, indices) // } else { - // self.fuzzy_matcher_v1::(query, haystack, indicies) + // self.fuzzy_matcher_v1::(query, haystack, indices) // } // } else if query.is_ascii && !self.config.normalize { - // self.fuzzy_matcher_v2::(query, haystack, indicies) + // self.fuzzy_matcher_v2::(query, haystack, indices) // } else { - // self.fuzzy_matcher_v2::(query, haystack, indicies) + // self.fuzzy_matcher_v2::(query, haystack, indices) // } // } } diff --git a/src/matrix.rs b/src/matrix.rs index dadd526..37eb6a9 100644 --- a/src/matrix.rs +++ b/src/matrix.rs @@ -155,7 +155,7 @@ where pub(crate) struct Matrix<'a, C: Char> { pub haystack: &'a mut [C], - // stored as a seperate array instead of struct + // stored as a separate array instead of struct // to avoid padding sine char is too large and u8 too small :/ pub bonus: &'a mut [u16], pub row_offs: &'a mut [u16], @@ -264,10 +264,10 @@ impl MatrixSlab { return None; } unsafe { - // safetly: this allocation is valid for MATRIX_ALLOC_LAYOUT + // safely: this allocation is valid for MATRIX_ALLOC_LAYOUT let (haystack, bonus, rows, cells) = matrix_layout.fieds_from_ptr(self.0); - // copy haystack before creating refernces to ensure we donu't crate - // refrences to invalid chars (which may or may not be UB) + // copy haystack before creating references to ensure we donu't crate + // references to invalid chars (which may or may not be UB) haystack_ .as_ptr() .copy_to_nonoverlapping(haystack as *mut _, haystack_.len()); diff --git a/src/score.rs b/src/score.rs index fca3f7d..fe96f61 100644 --- a/src/score.rs +++ b/src/score.rs @@ -69,22 +69,22 @@ impl Matcher { self.config.bonus_for(prev_class, class) } - pub(crate) fn calculate_score, N: Char>( + pub(crate) fn calculate_score, N: Char>( &mut self, haystack: &[H], needle: &[N], start: usize, end: usize, - indicies: &mut Vec, + indices: &mut Vec, ) -> u16 { - if INDICIES { - indicies.reserve(needle.len()); + if INDICES { + indices.reserve(needle.len()); } let mut prev_class = start .checked_sub(1) .map(|i| haystack[i].char_class(&self.config)) - .unwrap_or(self.config.inital_char_class); + .unwrap_or(self.config.initial_char_class); let mut needle_iter = needle.iter(); let mut needle_char = *needle_iter.next().unwrap(); @@ -92,8 +92,8 @@ impl Matcher { let mut consecutive = 1; // unrolled the firs iteration to make applying the first char multiplier less akward - if INDICIES { - indicies.push(start as u32) + if INDICES { + indices.push(start as u32) } let mut first_bonus = self.bonus_for(prev_class, haystack[0].char_class(&self.config)); let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER; @@ -102,8 +102,8 @@ impl Matcher { let class = c.char_class(&self.config); let c = c.normalize(&self.config); if c == needle_char { - if INDICIES { - indicies.push(i as u32 + start as u32) + if INDICES { + indices.push(i as u32 + start as u32) } let mut bonus = self.bonus_for(prev_class, class); if consecutive == 0 { diff --git a/src/tests.rs b/src/tests.rs index 6d8a903..9d16df6 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -22,7 +22,7 @@ pub fn assert_matches( config.set_match_paths(); } let mut matcher = Matcher::new(config); - let mut indicies = Vec::new(); + let mut indices = Vec::new(); let mut needle_buf = Vec::new(); let mut haystack_buf = Vec::new(); for &(haystack, needle, start, end, mut score) in cases { @@ -35,8 +35,8 @@ pub fn assert_matches( let haystack = Utf32Str::new(haystack, &mut haystack_buf); score += needle.len() as u16 * SCORE_MATCH; - let res = matcher.fuzzy_indicies(haystack, needle, &mut indicies); - let match_chars: Vec<_> = indicies + let res = matcher.fuzzy_indices(haystack, needle, &mut indices); + let match_chars: Vec<_> = indices .iter() .map(|&i| haystack.get(i).normalize(&matcher.config)) .collect(); @@ -47,9 +47,9 @@ pub fn assert_matches( Some(score), "{needle:?} did not match {haystack:?}: {match_chars:?}" ); - assert_eq!(match_chars, needle_chars, "match indicies are incorrect"); + assert_eq!(match_chars, needle_chars, "match indices are incorrect"); assert_eq!( - indicies.first().copied()..indicies.last().map(|&i| i + 1), + indices.first().copied()..indices.last().map(|&i| i + 1), Some(start)..Some(end), "{needle:?} match {haystack:?}[{start}..{end}]" ); diff --git a/src/utf32_str.rs b/src/utf32_str.rs index 67e1aff..9746205 100644 --- a/src/utf32_str.rs +++ b/src/utf32_str.rs @@ -13,7 +13,7 @@ use std::slice; /// matching itself. Furthermore there are a lot of exta optimizations available /// for ascii only text (but checking during each match has too much overhead). /// -/// Ofcourse this comes at exta memory cost as we usally still need the ut8 +/// Ofcourse this comes at exta memory cost as we usually still need the ut8 /// encoded variant for rendenring. In the (dominant) case of ascii-only text /// we don't require a copy. Furthermore fuzzy matching usually is applied while /// the user is typing on the fly so the same item is potentially matched many @@ -24,13 +24,13 @@ use std::slice; /// char buffer around that is filled with the presegmented chars /// /// Another advantage of this approach is that the matcher will naturally -/// produce char indecies (instead of utf8 offsets) annyway. With a -/// codepoint basec representation like this the indecies can be used +/// produce char indices (instead of utf8 offsets) annyway. With a +/// codepoint basec representation like this the indices can be used /// directly #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug)] pub enum Utf32Str<'a> { /// A string represented as ASCII encoded bytes. - /// Correctness invariant: must only contain vaild ASCII (<=127) + /// Correctness invariant: must only contain valid ASCII (<=127) Ascii(&'a [u8]), /// A string represented as an array of unicode codepoints (basically UTF-32). Unicode(&'a [char]), @@ -75,7 +75,7 @@ impl<'a> Utf32Str<'a> { } /// Same as `slice` but accepts a u32 range for convenicene sine - /// those are the indecies returned by the matcher + /// those are the indices returned by the matcher #[inline] pub fn slice_u32(&self, range: impl RangeBounds) -> Utf32Str { let start = match range.start_bound() { diff --git a/typos.toml b/typos.toml new file mode 100644 index 0000000..1408f63 --- /dev/null +++ b/typos.toml @@ -0,0 +1,3 @@ +default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"] +[files] +extend-exclude = ["integration_tests", "verilogae/tests", "*.mir", "openvaf/lexer/src/tests.rs"]