Merge pull request #4 from helix-editor/typos

Fix typos and removed unused files
This commit is contained in:
Pascal Kuthe 2023-08-05 18:31:41 +02:00 committed by GitHub
commit 093ecafb01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 33 additions and 43 deletions

View File

@ -4,7 +4,7 @@
Nucleo uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf.
**Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion)more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII).
**Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion) more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII).
Nucleo also handles Unicode graphemes more correctly. `Fzf` and `skim` both operate on Unicode code points (chars). That means that multi codepoint graphemes can have weird effects (match multiple times, weirdly change the score, ...). `nucleo` will always use the first codepoint of the grapheme for matching instead (and reports grapheme indices, so they can be highlighted correctly).

View File

@ -156,7 +156,7 @@ pub enum CharClass {
Number,
}
/// nucleo can not match graphemes as single units to work around
/// nucleo cannot match graphemes as single units to work around
/// that we only use the first codepoint of each grapheme
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
text.graphemes(true).map(|grapheme| {

View File

@ -145,7 +145,7 @@ impl Matcher {
}
}
// in case we don't have any letter in the needle
// we can treat the search as case sensitive and use memmem dircedly which is way faster
// we can treat the search as case sensitive and use memmem directly which is way faster
None => (),
}
}

View File

@ -36,7 +36,7 @@ use crate::matrix::MatrixSlab;
/// A matcher engine that can execute (fuzzy) matches.
///
/// A matches contains **heap allocated** scratch memory that is reused during
/// matching. This scratch memory allows the matcher to garunte that it will
/// matching. This scratch memory allows the matcher to guarantee that it will
/// **never allocate** during matching (with the exception of pushing to the
/// `indices` vector if there isn't enough capacity). However this scratch
/// memory is fairly large (around 135KB) so creating a matcher is expensive and
@ -58,7 +58,7 @@ pub struct Matcher {
slab: MatrixSlab,
}
// this is just here for convenience not ruse if we should implement this
// this is just here for convenience not sure if we should implement this
impl Clone for Matcher {
fn clone(&self) -> Self {
Matcher {
@ -93,7 +93,7 @@ impl Matcher {
}
}
/// Find the fuzzy match with the higehest score in the `haystack`.
/// Find the fuzzy match with the highest score in the `haystack`.
///
/// This functions has `O(mn)` time complexity for short inputs. To
/// avoid slowdowns it automatically falls back to [greedy matching]
@ -241,7 +241,7 @@ impl Matcher {
/// Greedly find a fuzzy match in the `haystack` and compute its indices.
///
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
/// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
/// be preferred.
///

View File

@ -107,7 +107,7 @@ pub(crate) struct ScoreCell {
pub(crate) struct MatcherDataView<'a, C: Char> {
pub haystack: &'a mut [C],
// stored as a separate array instead of struct
// to avoid padding sine char is too large and u8 too small :/
// to avoid padding since char is too large and u8 too small :/
pub bonus: &'a mut [u8],
pub current_row: &'a mut [ScoreCell],
pub row_offs: &'a mut [u16],
@ -162,7 +162,7 @@ impl MatrixSlab {
let cells = haystack_.len() * needle_len;
if cells > MAX_MATRIX_SIZE
|| haystack_.len() > u16::MAX as usize
// ensures that socres never overflow
// ensures that scores never overflow
|| needle_len > MAX_NEEDLE_LEN
{
return None;
@ -175,7 +175,7 @@ impl MatrixSlab {
// safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
let (haystack, bonus, rows, current_row, matrix_cells) =
matrix_layout.fieds_from_ptr(self.0);
// copy haystack before creating references to ensure we donu't crate
// copy haystack before creating references to ensure we don't create
// references to invalid chars (which may or may not be UB)
haystack_
.as_ptr()

View File

@ -20,7 +20,7 @@ pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
// However, this priporitzes camel case over non-camel case.
// In fzf/skim this is not a problem since they score off the max
// consecutive bonus. However, we don't do that (because its incorrect)
// so to avoids prioritzing camel we use a lower bonus. I think that's fine
// so to avoids prioritizing camel we use a lower bonus. I think that's fine
// usually camel case is wekaer boundary than actual wourd boundaries anyway
// This also has the nice sideeffect of perfectly balancing out
// camel case, snake case and the consecutive version of the word
@ -97,7 +97,7 @@ impl Matcher {
let mut in_gap = false;
let mut consecutive = 1;
// unrolled the firs iteration to make applying the first char multiplier less akward
// unrolled the first iteration to make applying the first char multiplier less awkward
if INDICES {
indices.push(start as u32)
}

View File

@ -5,7 +5,7 @@ use std::{fmt, slice};
///
/// Usually rusts' utf8 encoded strings are great. However during fuzzy matching
/// operates on codepoints (it should operate on graphemes but that's too much
/// hassle to deal with). We want to quickly iterate these codeboints between
/// hassle to deal with). We want to quickly iterate these codepoints between
/// (up to 5 times) during matching.
///
/// Doing codepoint segmentation on the fly not only blows trough the cache
@ -14,7 +14,7 @@ use std::{fmt, slice};
/// for ascii only text (but checking during each match has too much overhead).
///
/// Ofcourse this comes at exta memory cost as we usually still need the ut8
/// encoded variant for rendenring. In the (dominant) case of ascii-only text
/// encoded variant for rendering. In the (dominant) case of ascii-only text
/// we don't require a copy. Furthermore fuzzy matching usually is applied while
/// the user is typing on the fly so the same item is potentially matched many
/// times (making the the upfront cost more worth it). That means that its
@ -24,8 +24,8 @@ use std::{fmt, slice};
/// char buffer around that is filled with the presegmented chars
///
/// Another advantage of this approach is that the matcher will naturally
/// produce char indices (instead of utf8 offsets) annyway. With a
/// codepoint basec representation like this the indices can be used
/// produce char indices (instead of utf8 offsets) anyway. With a
/// codepoint basic representation like this the indices can be used
/// directly
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub enum Utf32Str<'a> {
@ -84,7 +84,7 @@ impl<'a> Utf32Str<'a> {
}
}
/// Same as `slice` but accepts a u32 range for convenicene sine
/// Same as `slice` but accepts a u32 range for convenience since
/// those are the indices returned by the matcher
#[inline]
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {

View File

@ -43,9 +43,9 @@ pub(crate) struct Vec<T> {
buckets: [Bucket<T>; BUCKETS as usize],
/// the number of initialized elements in this vector
count: AtomicU32,
/// the number of matcher columns in this vector, its absoletly critical that
/// the number of matcher columns in this vector, its absolutely critical that
/// this remains constant and after initilaziaton (safety invariant) since
/// it is used to calculate the Entry layou
/// it is used to calculate the Entry layout
columns: u32,
}

View File

@ -1 +0,0 @@

View File

@ -8,7 +8,7 @@ use rayon::ThreadPool;
pub use crate::pattern::{CaseMatching, MultiPattern, Pattern, PatternKind};
pub use crate::utf32_string::Utf32String;
use crate::worker::Woker;
use crate::worker::Worker;
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
mod boxcar;
@ -85,7 +85,7 @@ pub struct Nucleo<T: Sync + Send + 'static> {
// but this lets us avoid some unsafe
canceled: Arc<AtomicBool>,
should_notify: Arc<AtomicBool>,
worker: Arc<Mutex<Woker<T>>>,
worker: Arc<Mutex<Worker<T>>>,
pool: ThreadPool,
cleared: bool,
item_count: u32,
@ -104,7 +104,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
case_matching: CaseMatching,
columns: u32,
) -> Self {
let (pool, worker) = Woker::new(num_threads, config, notify.clone(), columns);
let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns);
Self {
canceled: worker.canceled.clone(),
should_notify: worker.should_notify.clone(),
@ -137,7 +137,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
/// # Safety
///
/// Item at `index` must be initialized. That means you must have observed
/// `push` returning this value or `get` retunring `Some` for this value.
/// `push` returning this value or `get` returning `Some` for this value.
/// Just because a later index is initialized doesn't mean that this index
/// is initialized
pub unsafe fn get_unchecked(&self, index: u32) -> Item<'_, T> {
@ -219,7 +219,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
impl<T: Sync + Send> Drop for Nucleo<T> {
fn drop(&mut self) {
// we ensure the worker quits before dropping items to ensure that
// the worker can always assume the items outlife it
// the worker can always assume the items outlive it
self.canceled.store(true, atomic::Ordering::Relaxed);
let lock = self.worker.try_lock_for(Duration::from_secs(1));
if lock.is_none() {
@ -228,8 +228,8 @@ impl<T: Sync + Send> Drop for Nucleo<T> {
}
}
/// convenicne function to easily fuzzy match
/// on a (relatively small list of inputs). This is not recommended for building a full tui
/// convenience function to easily fuzzy match
/// on a (relatively small) list of inputs. This is not recommended for building a full tui
/// application that can match large numbers of matches as all matching is done on the current
/// thread, effectively blocking the UI
pub fn fuzzy_match<T: AsRef<str>>(

View File

@ -2,7 +2,7 @@
//!
//! This implementation is copied verbatim from `std::slice::sort_unstable` and then parallelized.
//! The only difference from the original is that calls to `recurse` are executed in parallel using
//! `rayon_core::join`a.
//! `rayon_core::join`.
//! Further modified for nucleo to allow canceling the sort
// Copyright (c) 2010 The Rust Project Developers

View File

@ -1,9 +0,0 @@
pub struct MatchSnapshot {
chunks: Vec<Match>,
}
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
struct Match {
score: u32,
idx: u32,
}

View File

@ -36,7 +36,7 @@ impl Utf32String {
}
}
/// Same as `slice` but accepts a u32 range for convenicene sine
/// Same as `slice` but accepts a u32 range for convenience since
/// those are the indices returned by the matcher
#[inline]
pub fn slice(&self, range: impl RangeBounds<u32>) -> Utf32Str {

View File

@ -14,7 +14,7 @@ use crate::{boxcar, Match};
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
impl Matchers {
// thiss is not a true mut from ref, we use a cell here
// this is not a true mut from ref, we use a cell here
#[allow(clippy::mut_from_ref)]
unsafe fn get(&self) -> &mut nucleo_matcher::Matcher {
&mut *self.0[rayon::current_thread_index().unwrap()].get()
@ -24,7 +24,7 @@ impl Matchers {
unsafe impl Sync for Matchers {}
unsafe impl Send for Matchers {}
pub(crate) struct Woker<T: Sync + Send + 'static> {
pub(crate) struct Worker<T: Sync + Send + 'static> {
pub(crate) running: bool,
matchers: Matchers,
pub(crate) matches: Vec<Match>,
@ -38,7 +38,7 @@ pub(crate) struct Woker<T: Sync + Send + 'static> {
in_flight: Vec<u32>,
}
impl<T: Sync + Send + 'static> Woker<T> {
impl<T: Sync + Send + 'static> Worker<T> {
pub(crate) fn item_count(&self) -> u32 {
self.last_snapshot - self.in_flight.len() as u32
}
@ -64,7 +64,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
let matchers = (0..worker_threads)
.map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config)))
.collect();
let worker = Woker {
let worker = Worker {
running: false,
matchers: Matchers(matchers),
last_snapshot: 0,
@ -211,7 +211,7 @@ impl<T: Sync + Send + 'static> Woker<T> {
if match2.idx == u32::MAX {
return true;
}
// the tie breaker is comparitevly rarely needed so we keep it
// the tie breaker is comparatively rarely needed so we keep it
// in a branch especially because we need to access the items
// array here which involves some pointer chasing
let item1 = self.items.get_unchecked(match1.idx);