Fix typos

This commit is contained in:
Michael Davis 2023-08-05 10:25:01 -05:00
parent e774ca23b8
commit b4f547b0f4
No known key found for this signature in database
12 changed files with 33 additions and 33 deletions

View File

@ -4,7 +4,7 @@
Nucleo uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf. Nucleo uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf.
**Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion)more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII). **Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion) more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII).
Nucleo also handles Unicode graphemes more correctly. `Fzf` and `skim` both operate on Unicode code points (chars). That means that multi codepoint graphemes can have weird effects (match multiple times, weirdly change the score, ...). `nucleo` will always use the first codepoint of the grapheme for matching instead (and reports grapheme indices, so they can be highlighted correctly). Nucleo also handles Unicode graphemes more correctly. `Fzf` and `skim` both operate on Unicode code points (chars). That means that multi codepoint graphemes can have weird effects (match multiple times, weirdly change the score, ...). `nucleo` will always use the first codepoint of the grapheme for matching instead (and reports grapheme indices, so they can be highlighted correctly).

View File

@ -156,7 +156,7 @@ pub enum CharClass {
Number, Number,
} }
/// nucleo can not match graphemes as single units to work around /// nucleo cannot match graphemes as single units to work around
/// that we only use the first codepoint of each grapheme /// that we only use the first codepoint of each grapheme
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ { pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
text.graphemes(true).map(|grapheme| { text.graphemes(true).map(|grapheme| {

View File

@ -145,7 +145,7 @@ impl Matcher {
} }
} }
// in case we don't have any letter in the needle // in case we don't have any letter in the needle
// we can treat the search as case sensitive and use memmem dircedly which is way faster // we can treat the search as case sensitive and use memmem directly which is way faster
None => (), None => (),
} }
} }

View File

@ -36,7 +36,7 @@ use crate::matrix::MatrixSlab;
/// A matcher engine that can execute (fuzzy) matches. /// A matcher engine that can execute (fuzzy) matches.
/// ///
/// A matches contains **heap allocated** scratch memory that is reused during /// A matches contains **heap allocated** scratch memory that is reused during
/// matching. This scratch memory allows the matcher to garunte that it will /// matching. This scratch memory allows the matcher to guarantee that it will
/// **never allocate** during matching (with the exception of pushing to the /// **never allocate** during matching (with the exception of pushing to the
/// `indices` vector if there isn't enough capacity). However this scratch /// `indices` vector if there isn't enough capacity). However this scratch
/// memory is fairly large (around 135KB) so creating a matcher is expensive and /// memory is fairly large (around 135KB) so creating a matcher is expensive and
@ -58,7 +58,7 @@ pub struct Matcher {
slab: MatrixSlab, slab: MatrixSlab,
} }
// this is just here for convenience not ruse if we should implement this // this is just here for convenience not sure if we should implement this
impl Clone for Matcher { impl Clone for Matcher {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Matcher { Matcher {
@ -93,7 +93,7 @@ impl Matcher {
} }
} }
/// Find the fuzzy match with the higehest score in the `haystack`. /// Find the fuzzy match with the highest score in the `haystack`.
/// ///
/// This functions has `O(mn)` time complexity for short inputs. To /// This functions has `O(mn)` time complexity for short inputs. To
/// avoid slowdowns it automatically falls back to [greedy matching] /// avoid slowdowns it automatically falls back to [greedy matching]
@ -241,7 +241,7 @@ impl Matcher {
/// Greedly find a fuzzy match in the `haystack` and compute its indices. /// Greedly find a fuzzy match in the `haystack` and compute its indices.
/// ///
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal) /// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should /// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
/// be preferred. /// be preferred.
/// ///

View File

@ -107,7 +107,7 @@ pub(crate) struct ScoreCell {
pub(crate) struct MatcherDataView<'a, C: Char> { pub(crate) struct MatcherDataView<'a, C: Char> {
pub haystack: &'a mut [C], pub haystack: &'a mut [C],
// stored as a separate array instead of struct // stored as a separate array instead of struct
// to avoid padding sine char is too large and u8 too small :/ // to avoid padding since char is too large and u8 too small :/
pub bonus: &'a mut [u8], pub bonus: &'a mut [u8],
pub current_row: &'a mut [ScoreCell], pub current_row: &'a mut [ScoreCell],
pub row_offs: &'a mut [u16], pub row_offs: &'a mut [u16],
@ -162,7 +162,7 @@ impl MatrixSlab {
let cells = haystack_.len() * needle_len; let cells = haystack_.len() * needle_len;
if cells > MAX_MATRIX_SIZE if cells > MAX_MATRIX_SIZE
|| haystack_.len() > u16::MAX as usize || haystack_.len() > u16::MAX as usize
// ensures that socres never overflow // ensures that scores never overflow
|| needle_len > MAX_NEEDLE_LEN || needle_len > MAX_NEEDLE_LEN
{ {
return None; return None;
@ -175,7 +175,7 @@ impl MatrixSlab {
// safely: this allocation is valid for MATRIX_ALLOC_LAYOUT // safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
let (haystack, bonus, rows, current_row, matrix_cells) = let (haystack, bonus, rows, current_row, matrix_cells) =
matrix_layout.fieds_from_ptr(self.0); matrix_layout.fieds_from_ptr(self.0);
// copy haystack before creating references to ensure we donu't crate // copy haystack before creating references to ensure we don't create
// references to invalid chars (which may or may not be UB) // references to invalid chars (which may or may not be UB)
haystack_ haystack_
.as_ptr() .as_ptr()

View File

@ -20,7 +20,7 @@ pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
// However, this priporitzes camel case over non-camel case. // However, this priporitzes camel case over non-camel case.
// In fzf/skim this is not a problem since they score off the max // In fzf/skim this is not a problem since they score off the max
// consecutive bonus. However, we don't do that (because its incorrect) // consecutive bonus. However, we don't do that (because its incorrect)
// so to avoids prioritzing camel we use a lower bonus. I think that's fine // so to avoids prioritizing camel we use a lower bonus. I think that's fine
// usually camel case is wekaer boundary than actual wourd boundaries anyway // usually camel case is wekaer boundary than actual wourd boundaries anyway
// This also has the nice sideeffect of perfectly balancing out // This also has the nice sideeffect of perfectly balancing out
// camel case, snake case and the consecutive version of the word // camel case, snake case and the consecutive version of the word
@ -97,7 +97,7 @@ impl Matcher {
let mut in_gap = false; let mut in_gap = false;
let mut consecutive = 1; let mut consecutive = 1;
// unrolled the firs iteration to make applying the first char multiplier less akward // unrolled the first iteration to make applying the first char multiplier less awkward
if INDICES { if INDICES {
indices.push(start as u32) indices.push(start as u32)
} }

View File

@ -5,7 +5,7 @@ use std::{fmt, slice};
/// ///
/// Usually rusts' utf8 encoded strings are great. However during fuzzy matching /// Usually rusts' utf8 encoded strings are great. However during fuzzy matching
/// operates on codepoints (it should operate on graphemes but that's too much /// operates on codepoints (it should operate on graphemes but that's too much
/// hassle to deal with). We want to quickly iterate these codeboints between /// hassle to deal with). We want to quickly iterate these codepoints between
/// (up to 5 times) during matching. /// (up to 5 times) during matching.
/// ///
/// Doing codepoint segmentation on the fly not only blows trough the cache /// Doing codepoint segmentation on the fly not only blows trough the cache
@ -14,7 +14,7 @@ use std::{fmt, slice};
/// for ascii only text (but checking during each match has too much overhead). /// for ascii only text (but checking during each match has too much overhead).
/// ///
/// Ofcourse this comes at exta memory cost as we usually still need the ut8 /// Ofcourse this comes at exta memory cost as we usually still need the ut8
/// encoded variant for rendenring. In the (dominant) case of ascii-only text /// encoded variant for rendering. In the (dominant) case of ascii-only text
/// we don't require a copy. Furthermore fuzzy matching usually is applied while /// we don't require a copy. Furthermore fuzzy matching usually is applied while
/// the user is typing on the fly so the same item is potentially matched many /// the user is typing on the fly so the same item is potentially matched many
/// times (making the the upfront cost more worth it). That means that its /// times (making the the upfront cost more worth it). That means that its
@ -24,8 +24,8 @@ use std::{fmt, slice};
/// char buffer around that is filled with the presegmented chars /// char buffer around that is filled with the presegmented chars
/// ///
/// Another advantage of this approach is that the matcher will naturally /// Another advantage of this approach is that the matcher will naturally
/// produce char indices (instead of utf8 offsets) annyway. With a /// produce char indices (instead of utf8 offsets) anyway. With a
/// codepoint basec representation like this the indices can be used /// codepoint basic representation like this the indices can be used
/// directly /// directly
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Utf32Str<'a> { pub enum Utf32Str<'a> {
@ -84,7 +84,7 @@ impl<'a> Utf32Str<'a> {
} }
} }
/// Same as `slice` but accepts a u32 range for convenicene sine /// Same as `slice` but accepts a u32 range for convenience since
/// those are the indices returned by the matcher /// those are the indices returned by the matcher
#[inline] #[inline]
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str { pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {

View File

@ -43,9 +43,9 @@ pub(crate) struct Vec<T> {
buckets: [Bucket<T>; BUCKETS as usize], buckets: [Bucket<T>; BUCKETS as usize],
/// the number of initialized elements in this vector /// the number of initialized elements in this vector
count: AtomicU32, count: AtomicU32,
/// the number of matcher columns in this vector, its absoletly critical that /// the number of matcher columns in this vector, its absolutely critical that
/// this remains constant and after initilaziaton (safety invariant) since /// this remains constant and after initilaziaton (safety invariant) since
/// it is used to calculate the Entry layou /// it is used to calculate the Entry layout
columns: u32, columns: u32,
} }

View File

@ -8,7 +8,7 @@ use rayon::ThreadPool;
pub use crate::pattern::{CaseMatching, MultiPattern, Pattern, PatternKind}; pub use crate::pattern::{CaseMatching, MultiPattern, Pattern, PatternKind};
pub use crate::utf32_string::Utf32String; pub use crate::utf32_string::Utf32String;
use crate::worker::Woker; use crate::worker::Worker;
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str}; pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
mod boxcar; mod boxcar;
@ -85,7 +85,7 @@ pub struct Nucleo<T: Sync + Send + 'static> {
// but this lets us avoid some unsafe // but this lets us avoid some unsafe
canceled: Arc<AtomicBool>, canceled: Arc<AtomicBool>,
should_notify: Arc<AtomicBool>, should_notify: Arc<AtomicBool>,
worker: Arc<Mutex<Woker<T>>>, worker: Arc<Mutex<Worker<T>>>,
pool: ThreadPool, pool: ThreadPool,
cleared: bool, cleared: bool,
item_count: u32, item_count: u32,
@ -104,7 +104,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
case_matching: CaseMatching, case_matching: CaseMatching,
columns: u32, columns: u32,
) -> Self { ) -> Self {
let (pool, worker) = Woker::new(num_threads, config, notify.clone(), columns); let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns);
Self { Self {
canceled: worker.canceled.clone(), canceled: worker.canceled.clone(),
should_notify: worker.should_notify.clone(), should_notify: worker.should_notify.clone(),
@ -137,7 +137,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
/// # Safety /// # Safety
/// ///
/// Item at `index` must be initialized. That means you must have observed /// Item at `index` must be initialized. That means you must have observed
/// `push` returning this value or `get` retunring `Some` for this value. /// `push` returning this value or `get` returning `Some` for this value.
/// Just because a later index is initialized doesn't mean that this index /// Just because a later index is initialized doesn't mean that this index
/// is initialized /// is initialized
pub unsafe fn get_unchecked(&self, index: u32) -> Item<'_, T> { pub unsafe fn get_unchecked(&self, index: u32) -> Item<'_, T> {
@ -219,7 +219,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
impl<T: Sync + Send> Drop for Nucleo<T> { impl<T: Sync + Send> Drop for Nucleo<T> {
fn drop(&mut self) { fn drop(&mut self) {
// we ensure the worker quits before dropping items to ensure that // we ensure the worker quits before dropping items to ensure that
// the worker can always assume the items outlife it // the worker can always assume the items outlive it
self.canceled.store(true, atomic::Ordering::Relaxed); self.canceled.store(true, atomic::Ordering::Relaxed);
let lock = self.worker.try_lock_for(Duration::from_secs(1)); let lock = self.worker.try_lock_for(Duration::from_secs(1));
if lock.is_none() { if lock.is_none() {
@ -228,8 +228,8 @@ impl<T: Sync + Send> Drop for Nucleo<T> {
} }
} }
/// convenicne function to easily fuzzy match /// convenience function to easily fuzzy match
/// on a (relatively small list of inputs). This is not recommended for building a full tui /// on a (relatively small) list of inputs. This is not recommended for building a full tui
/// application that can match large numbers of matches as all matching is done on the current /// application that can match large numbers of matches as all matching is done on the current
/// thread, effectively blocking the UI /// thread, effectively blocking the UI
pub fn fuzzy_match<T: AsRef<str>>( pub fn fuzzy_match<T: AsRef<str>>(

View File

@ -2,7 +2,7 @@
//! //!
//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized. //! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
//! The only difference from the original is that calls to `recurse` are executed in parallel using //! The only difference from the original is that calls to `recurse` are executed in parallel using
//! `rayon_core::join`a. //! `rayon_core::join`.
//! Further modified for nucleo to allow canceling the sort //! Further modified for nucleo to allow canceling the sort
// Copyright (c) 2010 The Rust Project Developers // Copyright (c) 2010 The Rust Project Developers

View File

@ -36,7 +36,7 @@ impl Utf32String {
} }
} }
/// Same as `slice` but accepts a u32 range for convenicene sine /// Same as `slice` but accepts a u32 range for convenience since
/// those are the indices returned by the matcher /// those are the indices returned by the matcher
#[inline] #[inline]
pub fn slice(&self, range: impl RangeBounds<u32>) -> Utf32Str { pub fn slice(&self, range: impl RangeBounds<u32>) -> Utf32Str {

View File

@ -14,7 +14,7 @@ use crate::{boxcar, Match};
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>); struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
impl Matchers { impl Matchers {
// thiss is not a true mut from ref, we use a cell here // this is not a true mut from ref, we use a cell here
#[allow(clippy::mut_from_ref)] #[allow(clippy::mut_from_ref)]
unsafe fn get(&self) -> &mut nucleo_matcher::Matcher { unsafe fn get(&self) -> &mut nucleo_matcher::Matcher {
&mut *self.0[rayon::current_thread_index().unwrap()].get() &mut *self.0[rayon::current_thread_index().unwrap()].get()
@ -24,7 +24,7 @@ impl Matchers {
unsafe impl Sync for Matchers {} unsafe impl Sync for Matchers {}
unsafe impl Send for Matchers {} unsafe impl Send for Matchers {}
pub(crate) struct Woker<T: Sync + Send + 'static> { pub(crate) struct Worker<T: Sync + Send + 'static> {
pub(crate) running: bool, pub(crate) running: bool,
matchers: Matchers, matchers: Matchers,
pub(crate) matches: Vec<Match>, pub(crate) matches: Vec<Match>,
@ -38,7 +38,7 @@ pub(crate) struct Woker<T: Sync + Send + 'static> {
in_flight: Vec<u32>, in_flight: Vec<u32>,
} }
impl<T: Sync + Send + 'static> Woker<T> { impl<T: Sync + Send + 'static> Worker<T> {
pub(crate) fn item_count(&self) -> u32 { pub(crate) fn item_count(&self) -> u32 {
self.last_snapshot - self.in_flight.len() as u32 self.last_snapshot - self.in_flight.len() as u32
} }
@ -64,7 +64,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
let matchers = (0..worker_threads) let matchers = (0..worker_threads)
.map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config))) .map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config)))
.collect(); .collect();
let worker = Woker { let worker = Worker {
running: false, running: false,
matchers: Matchers(matchers), matchers: Matchers(matchers),
last_snapshot: 0, last_snapshot: 0,
@ -211,7 +211,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
if match2.idx == u32::MAX { if match2.idx == u32::MAX {
return true; return true;
} }
// the tie breaker is comparitevly rarely needed so we keep it // the tie breaker is comparatively rarely needed so we keep it
// in a branch especially because we need to access the items // in a branch especially because we need to access the items
// array here which involves some pointer chasing // array here which involves some pointer chasing
let item1 = self.items.get_unchecked(match1.idx); let item1 = self.items.get_unchecked(match1.idx);