Merge pull request #4 from helix-editor/typos

Fix typos and removed unused files
2024-12-22 01:47:49 +00:00 · 2023-08-05 18:31:41 +02:00 · 2023-08-05 18:31:41 +02:00 · 093ecafb01
commit 093ecafb01
parent e774ca23b8 f73a1988f2
15 changed files with 33 additions and 43 deletions
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@

 Nucleo uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf.

-**Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion)more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII).
+**Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion) more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII).

 Nucleo also handles Unicode graphemes more correctly. `Fzf` and `skim` both operate on Unicode code points (chars). That means that multi codepoint graphemes can have weird effects (match multiple times, weirdly change the score, ...). `nucleo` will always use the first codepoint of the grapheme for matching instead (and reports grapheme indices, so they can be highlighted correctly). 

--- a/matcher/src/chars.rs
+++ b/matcher/src/chars.rs
@ -156,7 +156,7 @@ pub enum CharClass {
    Number,
 }

-/// nucleo can not match graphemes as single units to work around
+/// nucleo cannot match graphemes as single units to work around
 /// that we only use the first codepoint of each grapheme
 pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
    text.graphemes(true).map(|grapheme| {
--- a/matcher/src/exact.rs
+++ b/matcher/src/exact.rs
@ -145,7 +145,7 @@ impl Matcher {
                    }
                }
                // in case we don't have any letter in the needle
-                // we can treat the search as case sensitive and use memmem dircedly which is way faster
+                // we can treat the search as case sensitive and use memmem directly which is way faster
                None => (),
            }
        }
--- a/matcher/src/lib.rs
+++ b/matcher/src/lib.rs
@ -36,7 +36,7 @@ use crate::matrix::MatrixSlab;
 /// A matcher engine that can execute (fuzzy) matches.
 ///
 /// A matches contains **heap allocated** scratch memory that is reused during
-/// matching. This scratch memory allows the matcher to garunte that it will
+/// matching. This scratch memory allows the matcher to guarantee that it will
 /// **never allocate** during matching (with the exception of pushing to the
 /// `indices` vector if there isn't enough capacity). However this scratch
 /// memory is fairly large (around 135KB) so creating a matcher is expensive and
@ -58,7 +58,7 @@ pub struct Matcher {
    slab: MatrixSlab,
 }

-// this is just here for convenience not ruse if we should implement this
+// this is just here for convenience not sure if we should implement this
 impl Clone for Matcher {
    fn clone(&self) -> Self {
        Matcher {
@ -93,7 +93,7 @@ impl Matcher {
        }
    }

-    /// Find the fuzzy match with the higehest score in the `haystack`.
+    /// Find the fuzzy match with the highest score in the `haystack`.
    ///
    /// This functions has `O(mn)` time complexity for short inputs. To
    /// avoid slowdowns it automatically falls back to [greedy matching]
@ -241,7 +241,7 @@ impl Matcher {

    /// Greedly find a fuzzy match in the `haystack` and compute its indices.
    ///
-    /// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
+    /// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
    /// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
    /// be preferred.
    ///
--- a/matcher/src/matrix.rs
+++ b/matcher/src/matrix.rs
@ -107,7 +107,7 @@ pub(crate) struct ScoreCell {
 pub(crate) struct MatcherDataView<'a, C: Char> {
    pub haystack: &'a mut [C],
    // stored as a separate array instead of struct
-    // to avoid padding sine char is too large and u8 too small :/
+    // to avoid padding since char is too large and u8 too small :/
    pub bonus: &'a mut [u8],
    pub current_row: &'a mut [ScoreCell],
    pub row_offs: &'a mut [u16],
@ -162,7 +162,7 @@ impl MatrixSlab {
        let cells = haystack_.len() * needle_len;
        if cells > MAX_MATRIX_SIZE
            || haystack_.len() > u16::MAX as usize
-            // ensures that socres never overflow
+            // ensures that scores never overflow
            || needle_len > MAX_NEEDLE_LEN
        {
            return None;
@ -175,7 +175,7 @@ impl MatrixSlab {
            // safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
            let (haystack, bonus, rows, current_row, matrix_cells) =
                matrix_layout.fieds_from_ptr(self.0);
-            // copy haystack before creating references to ensure we donu't crate
+            // copy haystack before creating references to ensure we don't create
            // references to invalid chars (which may or may not be UB)
            haystack_
                .as_ptr()
--- a/matcher/src/multizip.rs
+++ b/matcher/src/multizip.rs
--- a/matcher/src/score.rs
+++ b/matcher/src/score.rs
@ -20,7 +20,7 @@ pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
 // However, this priporitzes camel case over non-camel case.
 // In fzf/skim this is not a problem since they score off the max
 // consecutive bonus. However, we don't do that (because its incorrect)
-// so to avoids prioritzing camel we use a lower bonus. I think that's fine
+// so to avoids prioritizing camel we use a lower bonus. I think that's fine
 // usually camel case is wekaer boundary than actual wourd boundaries anyway
 // This also has the nice sideeffect of perfectly balancing out
 // camel case, snake case and the consecutive version of the word
@ -97,7 +97,7 @@ impl Matcher {
        let mut in_gap = false;
        let mut consecutive = 1;

-        // unrolled the firs iteration to make applying the first char multiplier less akward
+        // unrolled the first iteration to make applying the first char multiplier less awkward
        if INDICES {
            indices.push(start as u32)
        }
--- a/matcher/src/utf32_str.rs
+++ b/matcher/src/utf32_str.rs
@ -5,7 +5,7 @@ use std::{fmt, slice};
 ///
 /// Usually rusts' utf8 encoded strings are great. However during fuzzy matching
 /// operates on codepoints (it should operate on graphemes but that's too much
-/// hassle to deal with). We want to quickly iterate these codeboints between
+/// hassle to deal with). We want to quickly iterate these codepoints between
 /// (up to 5 times) during matching.
 ///
 /// Doing codepoint segmentation on the fly not only blows trough the cache
@ -14,7 +14,7 @@ use std::{fmt, slice};
 /// for ascii only text (but checking during each match has too much overhead).
 ///
 /// Ofcourse this comes at exta memory cost as we usually still need the ut8
-/// encoded variant for rendenring. In the (dominant) case of ascii-only text
+/// encoded variant for rendering. In the (dominant) case of ascii-only text
 /// we don't require a copy. Furthermore fuzzy matching usually is applied while
 /// the user is typing on the fly so the same item is potentially matched many
 /// times (making the the upfront cost more worth it). That means that its
@ -24,8 +24,8 @@ use std::{fmt, slice};
 /// char buffer around that is filled with the presegmented chars
 ///
 /// Another advantage of this approach is that the matcher will naturally
-/// produce char indices (instead of utf8 offsets) annyway. With a
-/// codepoint basec representation like this the indices can be used
+/// produce char indices (instead of utf8 offsets) anyway. With a
+/// codepoint basic representation like this the indices can be used
 /// directly
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub enum Utf32Str<'a> {
@ -84,7 +84,7 @@ impl<'a> Utf32Str<'a> {
        }
    }

-    /// Same as `slice` but accepts a u32 range for convenicene sine
+    /// Same as `slice` but accepts a u32 range for convenience since
    /// those are the indices returned by the matcher
    #[inline]
    pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
--- a/src/boxcar.rs
+++ b/src/boxcar.rs
@ -43,9 +43,9 @@ pub(crate) struct Vec<T> {
    buckets: [Bucket<T>; BUCKETS as usize],
    /// the number of initialized elements in this vector
    count: AtomicU32,
-    /// the number of matcher columns in this vector, its absoletly critical that
+    /// the number of matcher columns in this vector, its absolutely critical that
    /// this remains constant and after initilaziaton (safety invariant) since
-    /// it is used to calculate the Entry layou
+    /// it is used to calculate the Entry layout
    columns: u32,
 }

--- a/src/items.rs
+++ b/src/items.rs
@ -1 +0,0 @@
-
--- a/src/lib.rs
+++ b/src/lib.rs
@ -8,7 +8,7 @@ use rayon::ThreadPool;

 pub use crate::pattern::{CaseMatching, MultiPattern, Pattern, PatternKind};
 pub use crate::utf32_string::Utf32String;
-use crate::worker::Woker;
+use crate::worker::Worker;
 pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};

 mod boxcar;
@ -85,7 +85,7 @@ pub struct Nucleo<T: Sync + Send + 'static> {
    // but this lets us avoid some unsafe
    canceled: Arc<AtomicBool>,
    should_notify: Arc<AtomicBool>,
-    worker: Arc<Mutex<Woker<T>>>,
+    worker: Arc<Mutex<Worker<T>>>,
    pool: ThreadPool,
    cleared: bool,
    item_count: u32,
@ -104,7 +104,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
        case_matching: CaseMatching,
        columns: u32,
    ) -> Self {
-        let (pool, worker) = Woker::new(num_threads, config, notify.clone(), columns);
+        let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns);
        Self {
            canceled: worker.canceled.clone(),
            should_notify: worker.should_notify.clone(),
@ -137,7 +137,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
    /// # Safety
    ///
    /// Item at `index` must be initialized. That means you must have observed
-    /// `push` returning this value or `get` retunring `Some` for this value.
+    /// `push` returning this value or `get` returning `Some` for this value.
    /// Just because a later index is initialized doesn't mean that this index
    /// is initialized
    pub unsafe fn get_unchecked(&self, index: u32) -> Item<'_, T> {
@ -219,7 +219,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
 impl<T: Sync + Send> Drop for Nucleo<T> {
    fn drop(&mut self) {
        // we ensure the worker quits before dropping items to ensure that
-        // the worker can always assume the items outlife it
+        // the worker can always assume the items outlive it
        self.canceled.store(true, atomic::Ordering::Relaxed);
        let lock = self.worker.try_lock_for(Duration::from_secs(1));
        if lock.is_none() {
@ -228,8 +228,8 @@ impl<T: Sync + Send> Drop for Nucleo<T> {
    }
 }

-/// convenicne function to easily fuzzy match
-/// on a (relatively small list of inputs). This is not recommended for building a full tui
+/// convenience function to easily fuzzy match
+/// on a (relatively small) list of inputs. This is not recommended for building a full tui
 /// application that can match large numbers of matches as all matching is done on the current
 /// thread, effectively blocking the UI
 pub fn fuzzy_match<T: AsRef<str>>(
--- a/src/par_sort.rs
+++ b/src/par_sort.rs
@ -2,7 +2,7 @@
 //!
 //! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
 //! The only difference from the original is that calls to `recurse` are executed in parallel using
-//! `rayon_core::join`a.
+//! `rayon_core::join`.
 //! Further modified for nucleo to allow canceling the sort

 // Copyright (c) 2010 The Rust Project Developers
--- a/src/results.rs
+++ b/src/results.rs
@ -1,9 +0,0 @@
-pub struct MatchSnapshot {
-    chunks: Vec<Match>,
-}
-
-#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
-struct Match {
-    score: u32,
-    idx: u32,
-}
--- a/src/utf32_string.rs
+++ b/src/utf32_string.rs
@ -36,7 +36,7 @@ impl Utf32String {
        }
    }

-    /// Same as `slice` but accepts a u32 range for convenicene sine
+    /// Same as `slice` but accepts a u32 range for convenience since
    /// those are the indices returned by the matcher
    #[inline]
    pub fn slice(&self, range: impl RangeBounds<u32>) -> Utf32Str {
--- a/src/worker.rs
+++ b/src/worker.rs
@ -14,7 +14,7 @@ use crate::{boxcar, Match};
 struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);

 impl Matchers {
-    // thiss is not a true mut from ref, we use a cell here
+    // this is not a true mut from ref, we use a cell here
    #[allow(clippy::mut_from_ref)]
    unsafe fn get(&self) -> &mut nucleo_matcher::Matcher {
        &mut *self.0[rayon::current_thread_index().unwrap()].get()
@ -24,7 +24,7 @@ impl Matchers {
 unsafe impl Sync for Matchers {}
 unsafe impl Send for Matchers {}

-pub(crate) struct Woker<T: Sync + Send + 'static> {
+pub(crate) struct Worker<T: Sync + Send + 'static> {
    pub(crate) running: bool,
    matchers: Matchers,
    pub(crate) matches: Vec<Match>,
@ -38,7 +38,7 @@ pub(crate) struct Woker<T: Sync + Send + 'static> {
    in_flight: Vec<u32>,
 }

-impl<T: Sync + Send + 'static> Woker<T> {
+impl<T: Sync + Send + 'static> Worker<T> {
    pub(crate) fn item_count(&self) -> u32 {
        self.last_snapshot - self.in_flight.len() as u32
    }
@ -64,7 +64,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
        let matchers = (0..worker_threads)
            .map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config)))
            .collect();
-        let worker = Woker {
+        let worker = Worker {
            running: false,
            matchers: Matchers(matchers),
            last_snapshot: 0,
@ -211,7 +211,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
                if match2.idx == u32::MAX {
                    return true;
                }
-                // the tie breaker is comparitevly rarely needed so we keep it
+                // the tie breaker is comparatively rarely needed so we keep it
                // in a branch especially because we need to access the items
                // array here which involves some pointer chasing
                let item1 = self.items.get_unchecked(match1.idx);