mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
cleanup bugs
This commit is contained in:
parent
6b8ee0f585
commit
1ce8850f7e
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -15,7 +15,6 @@ dependencies = [
|
||||
"brunch",
|
||||
"fuzzy-matcher",
|
||||
"nucleo",
|
||||
"nucleo-matcher",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
18
Cargo.toml
18
Cargo.toml
@ -1,2 +1,18 @@
|
||||
[package]
|
||||
name = "nucleo"
|
||||
description = "plug and play high performance fuzzy matcher"
|
||||
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "MPL-2.0"
|
||||
repository = "https://github.com/helix-editor/nucleo"
|
||||
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
nucleo-matcher = { version = "0.1", path = "matcher" }
|
||||
parking_lot = { version = "0.12.1", features = ["send_guard", "arc_lock"]}
|
||||
rayon = "1.7.0"
|
||||
|
||||
[workspace]
|
||||
members = [ "matcher", "worker", "benches" ]
|
||||
members = [ "matcher", "bench" ]
|
||||
|
@ -6,8 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nucleo-matcher = { version = "0.1", path = "../matcher" }
|
||||
nucleo = { version = "0.1", path = "../worker" }
|
||||
nucleo = { version = "0.1", path = "../" }
|
||||
brunch = "0.5.0"
|
||||
fuzzy-matcher = "0.3.7"
|
||||
walkdir = "2"
|
@ -4,8 +4,7 @@ use std::process::Command;
|
||||
|
||||
use brunch::{Bench, Benches};
|
||||
use fuzzy_matcher::FuzzyMatcher;
|
||||
use nucleo::Utf32String;
|
||||
use nucleo_matcher::Utf32Str;
|
||||
use nucleo::{Utf32Str, Utf32String};
|
||||
|
||||
fn bench_dir() -> PathBuf {
|
||||
std::env::var_os("BENCHMARK_DIR")
|
||||
@ -44,9 +43,8 @@ fn main() {
|
||||
Some((path.as_str().into(), path))
|
||||
})
|
||||
.unzip();
|
||||
let mut nucleo =
|
||||
nucleo_matcher::Matcher::new(nucleo_matcher::MatcherConfig::DEFAULT.match_paths());
|
||||
let skim = fuzzy_matcher::skim::SkimMatcherV2::default().ignore_case();
|
||||
let mut nucleo = nucleo::Matcher::new(nucleo::MatcherConfig::DEFAULT.match_paths());
|
||||
let skim = fuzzy_matcher::skim::SkimMatcherV2::default();
|
||||
|
||||
// TODO: unicode?
|
||||
let needles = ["never_matches", "copying", "/doc/kernel", "//.h"];
|
@ -11,9 +11,7 @@ pub struct MatcherConfig {
|
||||
/// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
||||
pub(crate) bonus_boundary_delimiter: u16,
|
||||
pub initial_char_class: CharClass,
|
||||
/// Whether to normalize latin script characters to ASCII
|
||||
/// this significantly degrades performance so its not recommended
|
||||
/// to be turned on by default
|
||||
/// Whether to normalize latin script characters to ASCII (enabled by default)
|
||||
pub normalize: bool,
|
||||
/// whether to ignore casing
|
||||
pub ignore_case: bool,
|
||||
@ -26,7 +24,7 @@ impl MatcherConfig {
|
||||
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
||||
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
||||
initial_char_class: CharClass::Whitespace,
|
||||
normalize: false,
|
||||
normalize: true,
|
||||
ignore_case: true,
|
||||
}
|
||||
};
|
||||
|
@ -125,26 +125,41 @@ impl Matcher {
|
||||
|
||||
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
haystack_: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
if needle_.len() > haystack_.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
if needle_.len() == haystack_.len() {
|
||||
return self.exact_match_impl::<INDICES>(
|
||||
haystack_,
|
||||
needle_,
|
||||
0,
|
||||
haystack_.len(),
|
||||
indices,
|
||||
);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
haystack_.len() <= u32::MAX as usize,
|
||||
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
match (haystack_, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
|
||||
}
|
||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
|
||||
if needle_.len() == end - start {
|
||||
return Some(self.calculate_score::<INDICES, _, _>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
indices,
|
||||
));
|
||||
}
|
||||
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
@ -171,6 +186,10 @@ impl Matcher {
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
if needle_.len() == end - start {
|
||||
return self
|
||||
.exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
|
||||
}
|
||||
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
@ -188,6 +207,10 @@ impl Matcher {
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
if needle_.len() == end - start {
|
||||
return self
|
||||
.exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
|
||||
}
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
@ -243,7 +266,7 @@ impl Matcher {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
@ -252,6 +275,15 @@ impl Matcher {
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
|
||||
if needle_.len() == greedy_end - start {
|
||||
return Some(self.calculate_score::<INDICES, _, _>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
indices,
|
||||
));
|
||||
}
|
||||
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
@ -330,7 +362,7 @@ impl Matcher {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
@ -393,7 +425,7 @@ impl Matcher {
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
self.exact_match_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
self.exact_match_impl::<false>(haystack, needle, 0, haystack.len(), &mut Vec::new())
|
||||
}
|
||||
|
||||
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
||||
@ -407,7 +439,7 @@ impl Matcher {
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
self.exact_match_impl::<true>(haystack, needle, indices)
|
||||
self.exact_match_impl::<true>(haystack, needle, 0, haystack.len(), indices)
|
||||
}
|
||||
|
||||
/// Checks whether needle is a prefix of the haystack.
|
||||
@ -419,7 +451,7 @@ impl Matcher {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(haystack.slice(..needle.len()), needle, &mut Vec::new())
|
||||
self.exact_match_impl::<false>(haystack, needle, 0, needle.len(), &mut Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
@ -437,7 +469,7 @@ impl Matcher {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(haystack.slice(..needle.len()), needle, indices)
|
||||
self.exact_match_impl::<true>(haystack, needle, 0, needle.len(), indices)
|
||||
}
|
||||
}
|
||||
|
||||
@ -451,8 +483,10 @@ impl Matcher {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(
|
||||
haystack.slice(haystack.len() - needle.len()..),
|
||||
haystack,
|
||||
needle,
|
||||
haystack.len() - needle.len(),
|
||||
haystack.len(),
|
||||
&mut Vec::new(),
|
||||
)
|
||||
}
|
||||
@ -473,8 +507,10 @@ impl Matcher {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(
|
||||
haystack.slice(haystack.len() - needle.len()..),
|
||||
haystack,
|
||||
needle,
|
||||
haystack.len() - needle.len(),
|
||||
haystack.len(),
|
||||
indices,
|
||||
)
|
||||
}
|
||||
@ -484,9 +520,11 @@ impl Matcher {
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
start: usize,
|
||||
end: usize,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() != haystack.len() || needle_.is_empty() {
|
||||
if needle_.len() != end - start || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
assert!(
|
||||
@ -496,7 +534,7 @@ impl Matcher {
|
||||
let score = match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let matched = if self.config.ignore_case {
|
||||
AsciiChar::cast(haystack)
|
||||
AsciiChar::cast(haystack)[start..end]
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(AsciiChar::cast(needle)
|
||||
@ -511,8 +549,8 @@ impl Matcher {
|
||||
self.calculate_score::<INDICES, _, _>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
0,
|
||||
haystack.len(),
|
||||
start,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
@ -522,8 +560,7 @@ impl Matcher {
|
||||
return None;
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let matched =
|
||||
haystack
|
||||
let matched = haystack[start..end]
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(AsciiChar::cast(needle)
|
||||
@ -536,20 +573,20 @@ impl Matcher {
|
||||
self.calculate_score::<INDICES, _, _>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
0,
|
||||
haystack.len(),
|
||||
start,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
let matched = haystack
|
||||
let matched = haystack[start..end]
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(needle.iter().map(|c| c.normalize(&self.config)));
|
||||
if !matched {
|
||||
return None;
|
||||
}
|
||||
self.calculate_score::<INDICES, _, _>(haystack, needle, 0, haystack.len(), indices)
|
||||
self.calculate_score::<INDICES, _, _>(haystack, needle, start, end, indices)
|
||||
}
|
||||
};
|
||||
Some(score)
|
||||
|
@ -25,6 +25,8 @@ struct MatrixLayout<C: Char> {
|
||||
}
|
||||
impl<C: Char> MatrixLayout<C> {
|
||||
fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> {
|
||||
assert!(haystack_len >= needle_len);
|
||||
assert!(haystack_len <= u32::MAX as usize);
|
||||
let mut layout = Layout::from_size_align(0, 1).unwrap();
|
||||
let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
|
||||
let bonus_layout = Layout::array::<u16>(haystack_len).unwrap();
|
||||
|
@ -85,6 +85,9 @@ impl Matcher {
|
||||
.iter()
|
||||
.rev()
|
||||
.position(|c| c.normalize(&self.config) == needle_char)?;
|
||||
if end - start < needle.len() {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((start, end))
|
||||
}
|
||||
|
@ -89,9 +89,16 @@ pub(crate) struct ItemsSnapshot {
|
||||
}
|
||||
|
||||
impl ItemsSnapshot {
|
||||
pub(crate) fn new() -> Self {
|
||||
pub(crate) fn new(items: &ItemCache) -> Self {
|
||||
Self {
|
||||
items: Vec::with_capacity(1024),
|
||||
items: items
|
||||
.live
|
||||
.iter()
|
||||
.map(|item| ItemSnapshot {
|
||||
cols: item.cols,
|
||||
len: item.cols().iter().map(|s| s.len() as u32).sum(),
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -104,7 +111,7 @@ impl ItemsSnapshot {
|
||||
}
|
||||
|
||||
pub(crate) fn update(&mut self, items: &ItemCache) -> bool {
|
||||
let cleared = items.evicted.is_empty();
|
||||
let cleared = !items.evicted.is_empty();
|
||||
// drop in another thread to ensure we don't wait for a long drop here
|
||||
if cleared {
|
||||
self.items.clear();
|
207
src/lib.rs
Normal file
207
src/lib.rs
Normal file
@ -0,0 +1,207 @@
|
||||
use std::cmp::Reverse;
|
||||
use std::ops::Deref;
|
||||
use std::sync::atomic::{self, AtomicBool};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::items::{Item, ItemCache};
|
||||
use crate::worker::Worker;
|
||||
use parking_lot::lock_api::ArcMutexGuard;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
pub use crate::query::{CaseMatching, MultiPattern, Pattern, PatternKind};
|
||||
pub use crate::utf32_string::Utf32String;
|
||||
|
||||
mod items;
|
||||
mod query;
|
||||
mod utf32_string;
|
||||
mod worker;
|
||||
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||
|
||||
use parking_lot::{Mutex, MutexGuard, RawMutex};
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Match {
|
||||
pub score: u32,
|
||||
pub idx: u32,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Status {
|
||||
pub changed: bool,
|
||||
pub running: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Items<T> {
|
||||
cache: Arc<Mutex<ItemCache>>,
|
||||
items: Arc<Mutex<Vec<T>>>,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Items<T> {
|
||||
pub fn clear(&mut self) {
|
||||
self.items.lock().clear();
|
||||
self.cache.lock().clear();
|
||||
}
|
||||
|
||||
pub fn append(&mut self, items: impl Iterator<Item = (T, Box<[Utf32String]>)>) {
|
||||
let mut cache = self.cache.lock();
|
||||
let mut items_ = self.items.lock();
|
||||
items_.extend(items.map(|(item, text)| {
|
||||
cache.push(text);
|
||||
item
|
||||
}));
|
||||
// notify that a new tick will be necessary
|
||||
(self.notify)();
|
||||
}
|
||||
|
||||
pub fn get(&self) -> impl Deref<Target = [T]> + '_ {
|
||||
MutexGuard::map(self.items.lock(), |items| items.as_mut_slice())
|
||||
}
|
||||
|
||||
pub fn get_matcher_items(&self) -> impl Deref<Target = [Item]> + '_ {
|
||||
MutexGuard::map(self.cache.lock(), |items| items.get())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Nucleo<T: Sync + Send> {
|
||||
// the way the API is build we totally don't actually neeed these to be Arcs
|
||||
// but this lets us avoid some unsafe
|
||||
worker: Arc<Mutex<Worker>>,
|
||||
canceled: Arc<AtomicBool>,
|
||||
pool: ThreadPool,
|
||||
pub items: Items<T>,
|
||||
pub matches: Vec<Match>,
|
||||
pub pattern: MultiPattern,
|
||||
should_notify: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Nucleo<T> {
|
||||
pub fn new(
|
||||
config: MatcherConfig,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
num_threads: Option<usize>,
|
||||
case_matching: CaseMatching,
|
||||
cols: usize,
|
||||
items: impl Iterator<Item = (T, Box<[Utf32String]>)>,
|
||||
) -> Self {
|
||||
let mut cache = ItemCache::new();
|
||||
let items: Vec<_> = items
|
||||
.map(|(item, text)| {
|
||||
cache.push(text);
|
||||
item
|
||||
})
|
||||
.collect();
|
||||
let matches: Vec<_> = (0..items.len())
|
||||
.map(|i| Match {
|
||||
score: 0,
|
||||
idx: i as u32,
|
||||
})
|
||||
.collect();
|
||||
let (pool, worker) =
|
||||
Worker::new(notify.clone(), num_threads, config, matches.clone(), &cache);
|
||||
Self {
|
||||
canceled: worker.canceled.clone(),
|
||||
should_notify: worker.should_notify.clone(),
|
||||
items: Items {
|
||||
cache: Arc::new(Mutex::new(cache)),
|
||||
items: Arc::new(Mutex::new(items)),
|
||||
notify,
|
||||
},
|
||||
pool,
|
||||
matches,
|
||||
pattern: MultiPattern::new(&config, case_matching, cols),
|
||||
worker: Arc::new(Mutex::new(worker)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn update_config(&mut self, config: MatcherConfig) {
|
||||
self.worker.lock().update_config(config)
|
||||
}
|
||||
|
||||
pub fn tick(&mut self, timeout: u64) -> Status {
|
||||
self.should_notify.store(false, atomic::Ordering::Relaxed);
|
||||
let status = self.pattern.status();
|
||||
let items = self.items.cache.lock_arc();
|
||||
let canceled = status != query::Status::Unchanged || items.cleared();
|
||||
let res = self.tick_inner(timeout, canceled, items, status);
|
||||
if !canceled {
|
||||
self.should_notify.store(true, atomic::Ordering::Relaxed);
|
||||
return res;
|
||||
}
|
||||
let items = self.items.cache.lock_arc();
|
||||
let res = self.tick_inner(timeout, false, items, query::Status::Unchanged);
|
||||
self.should_notify.store(true, atomic::Ordering::Relaxed);
|
||||
res
|
||||
}
|
||||
|
||||
fn tick_inner(
|
||||
&mut self,
|
||||
timeout: u64,
|
||||
canceled: bool,
|
||||
items: ArcMutexGuard<RawMutex, ItemCache>,
|
||||
status: query::Status,
|
||||
) -> Status {
|
||||
let mut inner = if canceled {
|
||||
self.pattern.reset_status();
|
||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||
self.worker.lock_arc()
|
||||
} else {
|
||||
let Some(worker) = self.worker.try_lock_arc_for(Duration::from_millis(timeout)) else {
|
||||
return Status{ changed: false, running: true };
|
||||
};
|
||||
worker
|
||||
};
|
||||
|
||||
let changed = inner.running;
|
||||
if inner.running {
|
||||
inner.running = false;
|
||||
self.matches.clone_from(&inner.matches);
|
||||
}
|
||||
|
||||
let running = canceled || inner.items.outdated(&items);
|
||||
if running {
|
||||
inner.pattern.clone_from(&self.pattern);
|
||||
self.canceled.store(false, atomic::Ordering::Relaxed);
|
||||
self.pool.spawn(move || unsafe { inner.run(items, status) })
|
||||
}
|
||||
Status { changed, running }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Drop for Nucleo<T> {
|
||||
fn drop(&mut self) {
|
||||
// we ensure the worker quits before dropping items to ensure that
|
||||
// the worker can always assume the items outlife it
|
||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||
let lock = self.worker.try_lock_for(Duration::from_secs(1));
|
||||
if lock.is_none() {
|
||||
unreachable!("thread pool failed to shutdown properly")
|
||||
}
|
||||
}
|
||||
}
|
||||
/// convenicne function to easily fuzzy match
|
||||
/// on a (relatievly small list of inputs). This is not recommended for building a full tui
|
||||
/// application that can match large numbers of matches as all matching is done on the current
|
||||
/// thread, effectively blocking the UI
|
||||
pub fn fuzzy_match<T: AsRef<str>>(
|
||||
matcher: &mut Matcher,
|
||||
pattern: &str,
|
||||
items: impl IntoIterator<Item = T>,
|
||||
case_matching: CaseMatching,
|
||||
) -> Vec<(T, u32)> {
|
||||
let mut pattern_ = Pattern::new(&matcher.config, case_matching);
|
||||
pattern_.set_literal(pattern, PatternKind::Fuzzy, false);
|
||||
let mut buf = Vec::new();
|
||||
let mut items: Vec<_> = items
|
||||
.into_iter()
|
||||
.filter_map(|item| {
|
||||
pattern_
|
||||
.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
|
||||
.map(|score| (item, score))
|
||||
})
|
||||
.collect();
|
||||
items.sort_by_key(|(item, score)| (Reverse(*score), item.as_ref().len()));
|
||||
items
|
||||
}
|
@ -54,7 +54,9 @@ impl PatternAtom {
|
||||
|
||||
match case {
|
||||
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
||||
CaseMatching::Smart => ignore_case = needle.bytes().any(|b| b.is_ascii_uppercase()),
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
|
||||
@ -80,7 +82,7 @@ impl PatternAtom {
|
||||
match case {
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case || c.is_uppercase();
|
||||
ignore_case = ignore_case && !c.is_uppercase();
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
@ -149,22 +151,18 @@ pub enum Status {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Query {
|
||||
pub struct MultiPattern {
|
||||
pub cols: Vec<Pattern>,
|
||||
}
|
||||
|
||||
impl Query {
|
||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching, cols: usize) -> Query {
|
||||
Query {
|
||||
cols: vec![
|
||||
Pattern {
|
||||
terms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
};
|
||||
cols
|
||||
],
|
||||
impl MultiPattern {
|
||||
pub fn new(
|
||||
matcher_config: &MatcherConfig,
|
||||
case_matching: CaseMatching,
|
||||
cols: usize,
|
||||
) -> MultiPattern {
|
||||
MultiPattern {
|
||||
cols: vec![Pattern::new(matcher_config, case_matching); cols],
|
||||
}
|
||||
}
|
||||
|
||||
@ -201,7 +199,30 @@ pub struct Pattern {
|
||||
}
|
||||
|
||||
impl Pattern {
|
||||
pub(crate) fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching) -> Pattern {
|
||||
Pattern {
|
||||
terms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
}
|
||||
}
|
||||
pub fn new_fuzzy_literal(
|
||||
matcher_config: &MatcherConfig,
|
||||
case_matching: CaseMatching,
|
||||
pattern: &str,
|
||||
) -> Pattern {
|
||||
let mut res = Pattern {
|
||||
terms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
};
|
||||
res.set_literal(pattern, PatternKind::Fuzzy, false);
|
||||
res
|
||||
}
|
||||
|
||||
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
||||
if self.terms.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
@ -215,7 +236,7 @@ impl Pattern {
|
||||
matcher.substring_match(haystack, pattern.needle.slice(..))
|
||||
}
|
||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Postfix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Postfix => matcher.postfix_match(haystack, pattern.needle.slice(..)),
|
||||
};
|
||||
if pattern.invert {
|
||||
if pattern_score.is_some() {
|
||||
@ -249,7 +270,7 @@ impl Pattern {
|
||||
}
|
||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Postfix => {
|
||||
matcher.prefix_match(haystack, pattern.needle.slice(..))
|
||||
matcher.postfix_match(haystack, pattern.needle.slice(..))
|
||||
}
|
||||
};
|
||||
if pattern_score.is_some() {
|
||||
@ -262,16 +283,16 @@ impl Pattern {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Fuzzy => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
matcher.fuzzy_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Substring => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
matcher.substring_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Prefix => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
matcher.prefix_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Postfix => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
matcher.postfix_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
};
|
||||
score += pattern_score? as u32
|
||||
@ -282,10 +303,15 @@ impl Pattern {
|
||||
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
||||
self.terms.clear();
|
||||
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
||||
for atom in pattern_atoms(pattern) {
|
||||
self.terms
|
||||
.push(PatternAtom::parse(atom, self.normalize, self.case_matching));
|
||||
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||
let atom = PatternAtom::parse(atom, self.normalize, self.case_matching);
|
||||
if atom.needle.is_empty() {
|
||||
return None;
|
||||
}
|
||||
Some(atom)
|
||||
});
|
||||
self.terms.extend(atoms);
|
||||
|
||||
self.status = if append && !invert && self.status != Status::Rescore {
|
||||
Status::Update
|
||||
} else {
|
||||
@ -304,6 +330,10 @@ impl Pattern {
|
||||
Status::Rescore
|
||||
};
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.terms.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
@ -1,5 +1,7 @@
|
||||
use core::slice;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use std::mem::take;
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
|
||||
use nucleo_matcher::Utf32Str;
|
||||
@ -12,6 +14,12 @@ pub enum Utf32String {
|
||||
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
||||
Unicode(Box<[char]>),
|
||||
}
|
||||
|
||||
impl Default for Utf32String {
|
||||
fn default() -> Self {
|
||||
Self::Ascii(String::new().into_boxed_str())
|
||||
}
|
||||
}
|
||||
impl Utf32String {
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
@ -48,31 +56,69 @@ impl Utf32String {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_ascii(&self) -> bool {
|
||||
matches!(self, Utf32String::Ascii(_))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get(&self, idx: u32) -> char {
|
||||
match self {
|
||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[idx as usize] as char,
|
||||
Utf32String::Unicode(codepoints) => codepoints[idx as usize],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn last(&self) -> char {
|
||||
match self {
|
||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[bytes.len() - 1] as char,
|
||||
Utf32String::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn chars(&self) -> Chars<'_> {
|
||||
match self {
|
||||
Utf32String::Ascii(bytes) => Chars::Ascii(bytes.as_bytes().iter()),
|
||||
Utf32String::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn push_str(&mut self, text: &str) {
|
||||
let mut codeboints = match take(self) {
|
||||
Utf32String::Ascii(bytes) if text.is_ascii() => {
|
||||
let mut bytes = bytes.into_string();
|
||||
bytes.push_str(text);
|
||||
*self = Self::Ascii(bytes.into_boxed_str());
|
||||
return;
|
||||
}
|
||||
Utf32String::Ascii(bytes) => bytes.chars().collect(),
|
||||
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
||||
};
|
||||
codeboints.extend(text.chars());
|
||||
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
||||
}
|
||||
#[inline]
|
||||
pub fn push(&mut self, c: char) {
|
||||
let mut codeboints = match take(self) {
|
||||
Utf32String::Ascii(bytes) if c.is_ascii() => {
|
||||
let mut bytes = bytes.into_string();
|
||||
bytes.push(c);
|
||||
*self = Self::Ascii(bytes.into_boxed_str());
|
||||
return;
|
||||
}
|
||||
Utf32String::Ascii(bytes) => bytes.chars().collect(),
|
||||
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
||||
};
|
||||
codeboints.push(c);
|
||||
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for Utf32String {
|
||||
#[inline]
|
||||
fn from(value: &str) -> Self {
|
||||
if value.is_ascii() {
|
||||
Self::Ascii(value.to_owned().into_boxed_str())
|
||||
@ -91,12 +137,24 @@ impl From<Box<str>> for Utf32String {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for Utf32String {
|
||||
#[inline]
|
||||
fn from(value: String) -> Self {
|
||||
value.into_boxed_str().into()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Cow<'a, str>> for Utf32String {
|
||||
#[inline]
|
||||
fn from(value: Cow<'a, str>) -> Self {
|
||||
match value {
|
||||
Cow::Borrowed(value) => value.into(),
|
||||
Cow::Owned(value) => value.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum Chars<'a> {
|
||||
Ascii(slice::Iter<'a, u8>),
|
||||
Unicode(slice::Iter<'a, char>),
|
||||
@ -104,6 +162,7 @@ pub enum Chars<'a> {
|
||||
impl<'a> Iterator for Chars<'a> {
|
||||
type Item = char;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self {
|
||||
Chars::Ascii(iter) => iter.next().map(|&c| c as char),
|
@ -8,7 +8,7 @@ use parking_lot::RawMutex;
|
||||
use rayon::{prelude::*, ThreadPool};
|
||||
|
||||
use crate::items::{ItemCache, ItemsSnapshot};
|
||||
use crate::query::{self, Query};
|
||||
use crate::query::{self, MultiPattern};
|
||||
use crate::Match;
|
||||
|
||||
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
|
||||
@ -30,15 +30,24 @@ pub(crate) struct Worker {
|
||||
pub(crate) items: ItemsSnapshot,
|
||||
matchers: Matchers,
|
||||
pub(crate) matches: Vec<Match>,
|
||||
pub(crate) query: Query,
|
||||
pub(crate) pattern: MultiPattern,
|
||||
pub(crate) canceled: Arc<AtomicBool>,
|
||||
pub(crate) should_notify: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
pub(crate) fn update_config(&mut self, config: MatcherConfig) {
|
||||
for matcher in self.matchers.0.iter_mut() {
|
||||
matcher.get_mut().config = config;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn new(
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
worker_threads: Option<usize>,
|
||||
config: MatcherConfig,
|
||||
matches: Vec<Match>,
|
||||
items: &ItemCache,
|
||||
) -> (ThreadPool, Worker) {
|
||||
let worker_threads = worker_threads
|
||||
.unwrap_or_else(|| std::thread::available_parallelism().map_or(4, |it| it.get()));
|
||||
@ -53,15 +62,17 @@ impl Worker {
|
||||
let worker = Worker {
|
||||
notify,
|
||||
running: false,
|
||||
items: ItemsSnapshot::new(),
|
||||
items: ItemsSnapshot::new(items),
|
||||
matchers: Matchers(matchers),
|
||||
matches: Vec::with_capacity(1024),
|
||||
matches,
|
||||
// just a placeholder
|
||||
query: Query::new(&config, crate::CaseMatching::Ignore, 0),
|
||||
pattern: MultiPattern::new(&config, crate::CaseMatching::Ignore, 0),
|
||||
canceled: Arc::new(AtomicBool::new(false)),
|
||||
should_notify: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
(pool, worker)
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn run(
|
||||
&mut self,
|
||||
items_lock: ArcMutexGuard<RawMutex, ItemCache>,
|
||||
@ -77,48 +88,56 @@ impl Worker {
|
||||
self.matches.clear();
|
||||
last_scored_item = 0;
|
||||
}
|
||||
|
||||
let matchers = &self.matchers;
|
||||
let query = &self.query;
|
||||
let pattern = &self.pattern;
|
||||
let items = unsafe { self.items.get() };
|
||||
|
||||
if self.pattern.cols.iter().all(|pat| pat.is_empty()) {
|
||||
self.matches.clear();
|
||||
self.matches.extend((0..items.len()).map(|i| Match {
|
||||
score: 0,
|
||||
idx: i as u32,
|
||||
}));
|
||||
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
||||
(self.notify)();
|
||||
}
|
||||
return;
|
||||
}
|
||||
if query_status != query::Status::Unchanged && !self.matches.is_empty() {
|
||||
self.matches
|
||||
.par_iter_mut()
|
||||
.take_any_while(|_| self.canceled.load(atomic::Ordering::Relaxed))
|
||||
.take_any_while(|_| !self.canceled.load(atomic::Ordering::Relaxed))
|
||||
.for_each(|match_| {
|
||||
let item = &items[match_.idx as usize];
|
||||
match_.score = query
|
||||
match_.score = pattern
|
||||
.score(item.cols(), unsafe { matchers.get() })
|
||||
.unwrap_or(u32::MAX);
|
||||
});
|
||||
// TODO: do this in parallel?
|
||||
self.matches.retain(|m| m.score != u32::MAX)
|
||||
self.matches.retain(|m| m.score != u32::MAX);
|
||||
}
|
||||
|
||||
if last_scored_item != self.items.len() {
|
||||
self.running = true;
|
||||
let items = items[last_scored_item..]
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, item)| {
|
||||
let score = if self.canceled.load(atomic::Ordering::Relaxed) {
|
||||
0
|
||||
u32::MAX - 1
|
||||
} else {
|
||||
query.score(item.cols(), unsafe { matchers.get() })?
|
||||
pattern.score(item.cols(), unsafe { matchers.get() })?
|
||||
};
|
||||
Some(Match {
|
||||
score,
|
||||
idx: i as u32,
|
||||
})
|
||||
});
|
||||
self.matches.par_extend(items)
|
||||
self.matches.par_extend(items);
|
||||
}
|
||||
|
||||
if !self.canceled.load(atomic::Ordering::Relaxed) {
|
||||
// TODO: cancel sort in progess?
|
||||
self.matches.par_sort_unstable_by(|match1, match2| {
|
||||
match2.idx.cmp(&match1.idx).then_with(|| {
|
||||
match2.score.cmp(&match1.score).then_with(|| {
|
||||
// the tie breaker is comparitevly rarely needed so we keep it
|
||||
// in a branch especially beacuse we need to acceess the items
|
||||
// array here which invovles some pointer chasing
|
||||
@ -129,6 +148,8 @@ impl Worker {
|
||||
});
|
||||
}
|
||||
|
||||
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
||||
(self.notify)();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
use std::ops::Deref;
|
||||
use std::sync::atomic::{self, AtomicBool};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::items::{Item, ItemCache};
|
||||
use crate::worker::Worker;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
pub use crate::query::{CaseMatching, Pattern, PatternKind, Query};
|
||||
pub use crate::utf32_string::Utf32String;
|
||||
|
||||
mod items;
|
||||
mod query;
|
||||
mod utf32_string;
|
||||
mod worker;
|
||||
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||
|
||||
use parking_lot::{Mutex, MutexGuard};
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Match {
|
||||
pub score: u32,
|
||||
pub idx: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Items<T> {
|
||||
cache: Arc<Mutex<ItemCache>>,
|
||||
items: Arc<Mutex<Vec<T>>>,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Items<T> {
|
||||
pub fn clear(&mut self) {
|
||||
self.items.lock().clear();
|
||||
self.cache.lock().clear();
|
||||
}
|
||||
|
||||
pub fn append(&mut self, items: impl Iterator<Item = (T, Box<[Utf32String]>)>) {
|
||||
let mut cache = self.cache.lock();
|
||||
let mut items_ = self.items.lock();
|
||||
items_.extend(items.map(|(item, text)| {
|
||||
cache.push(text);
|
||||
item
|
||||
}));
|
||||
// notify that a new tick will be necessary
|
||||
(self.notify)();
|
||||
}
|
||||
|
||||
pub fn get(&self) -> impl Deref<Target = [T]> + '_ {
|
||||
MutexGuard::map(self.items.lock(), |items| items.as_mut_slice())
|
||||
}
|
||||
|
||||
pub fn get_matcher_items(&self) -> impl Deref<Target = [Item]> + '_ {
|
||||
MutexGuard::map(self.cache.lock(), |items| items.get())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Nucleo<T: Sync + Send> {
|
||||
// the way the API is build we totally don't actually neeed these to be Arcs
|
||||
// but this lets us avoid some unsafe
|
||||
worker: Arc<Mutex<Worker>>,
|
||||
canceled: Arc<AtomicBool>,
|
||||
pool: ThreadPool,
|
||||
pub items: Items<T>,
|
||||
pub matches: Vec<Match>,
|
||||
pub query: Query,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Nucleo<T> {
|
||||
pub fn new(
|
||||
config: MatcherConfig,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
num_threads: Option<usize>,
|
||||
case_matching: CaseMatching,
|
||||
cols: usize,
|
||||
) -> Self {
|
||||
let (pool, worker) = Worker::new(notify.clone(), num_threads, config);
|
||||
Self {
|
||||
canceled: worker.canceled.clone(),
|
||||
items: Items {
|
||||
cache: Arc::new(Mutex::new(ItemCache::new())),
|
||||
items: Arc::new(Mutex::new(Vec::with_capacity(1024))),
|
||||
notify,
|
||||
},
|
||||
pool,
|
||||
matches: Vec::with_capacity(1024),
|
||||
query: Query::new(&config, case_matching, cols),
|
||||
worker: Arc::new(Mutex::new(worker)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tick(&mut self, timeout: u64) -> bool {
|
||||
let status = self.query.status();
|
||||
let items = self.items.cache.lock_arc();
|
||||
let canceled = status != query::Status::Unchanged || items.cleared();
|
||||
let mut inner = if canceled {
|
||||
self.query.reset_status();
|
||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||
self.worker.lock_arc()
|
||||
} else {
|
||||
let Some(worker) = self.worker.try_lock_arc_for(Duration::from_millis(timeout)) else {
|
||||
return true;
|
||||
};
|
||||
worker
|
||||
};
|
||||
|
||||
if inner.running {
|
||||
inner.running = false;
|
||||
self.matches.clone_from(&inner.matches);
|
||||
} else if !canceled {
|
||||
// nothing has changed
|
||||
return false;
|
||||
}
|
||||
|
||||
if canceled || inner.items.outdated(&items) {
|
||||
self.pool.spawn(move || unsafe { inner.run(items, status) })
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Drop for Nucleo<T> {
|
||||
fn drop(&mut self) {
|
||||
// we ensure the worker quits before dropping items to ensure that
|
||||
// the worker can always assume the items outlife it
|
||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||
drop(self.worker.lock());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user