mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 18:07:53 +00:00
cleanup bugs
This commit is contained in:
parent
6b8ee0f585
commit
1ce8850f7e
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -15,7 +15,6 @@ dependencies = [
|
|||||||
"brunch",
|
"brunch",
|
||||||
"fuzzy-matcher",
|
"fuzzy-matcher",
|
||||||
"nucleo",
|
"nucleo",
|
||||||
"nucleo-matcher",
|
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
18
Cargo.toml
18
Cargo.toml
@ -1,2 +1,18 @@
|
|||||||
|
[package]
|
||||||
|
name = "nucleo"
|
||||||
|
description = "plug and play high performance fuzzy matcher"
|
||||||
|
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
license = "MPL-2.0"
|
||||||
|
repository = "https://github.com/helix-editor/nucleo"
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
nucleo-matcher = { version = "0.1", path = "matcher" }
|
||||||
|
parking_lot = { version = "0.12.1", features = ["send_guard", "arc_lock"]}
|
||||||
|
rayon = "1.7.0"
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
members = [ "matcher", "worker", "benches" ]
|
members = [ "matcher", "bench" ]
|
||||||
|
@ -6,8 +6,7 @@ edition = "2021"
|
|||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
nucleo-matcher = { version = "0.1", path = "../matcher" }
|
nucleo = { version = "0.1", path = "../" }
|
||||||
nucleo = { version = "0.1", path = "../worker" }
|
|
||||||
brunch = "0.5.0"
|
brunch = "0.5.0"
|
||||||
fuzzy-matcher = "0.3.7"
|
fuzzy-matcher = "0.3.7"
|
||||||
walkdir = "2"
|
walkdir = "2"
|
@ -4,8 +4,7 @@ use std::process::Command;
|
|||||||
|
|
||||||
use brunch::{Bench, Benches};
|
use brunch::{Bench, Benches};
|
||||||
use fuzzy_matcher::FuzzyMatcher;
|
use fuzzy_matcher::FuzzyMatcher;
|
||||||
use nucleo::Utf32String;
|
use nucleo::{Utf32Str, Utf32String};
|
||||||
use nucleo_matcher::Utf32Str;
|
|
||||||
|
|
||||||
fn bench_dir() -> PathBuf {
|
fn bench_dir() -> PathBuf {
|
||||||
std::env::var_os("BENCHMARK_DIR")
|
std::env::var_os("BENCHMARK_DIR")
|
||||||
@ -44,9 +43,8 @@ fn main() {
|
|||||||
Some((path.as_str().into(), path))
|
Some((path.as_str().into(), path))
|
||||||
})
|
})
|
||||||
.unzip();
|
.unzip();
|
||||||
let mut nucleo =
|
let mut nucleo = nucleo::Matcher::new(nucleo::MatcherConfig::DEFAULT.match_paths());
|
||||||
nucleo_matcher::Matcher::new(nucleo_matcher::MatcherConfig::DEFAULT.match_paths());
|
let skim = fuzzy_matcher::skim::SkimMatcherV2::default();
|
||||||
let skim = fuzzy_matcher::skim::SkimMatcherV2::default().ignore_case();
|
|
||||||
|
|
||||||
// TODO: unicode?
|
// TODO: unicode?
|
||||||
let needles = ["never_matches", "copying", "/doc/kernel", "//.h"];
|
let needles = ["never_matches", "copying", "/doc/kernel", "//.h"];
|
@ -11,9 +11,7 @@ pub struct MatcherConfig {
|
|||||||
/// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
/// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
||||||
pub(crate) bonus_boundary_delimiter: u16,
|
pub(crate) bonus_boundary_delimiter: u16,
|
||||||
pub initial_char_class: CharClass,
|
pub initial_char_class: CharClass,
|
||||||
/// Whether to normalize latin script characters to ASCII
|
/// Whether to normalize latin script characters to ASCII (enabled by default)
|
||||||
/// this significantly degrades performance so its not recommended
|
|
||||||
/// to be turned on by default
|
|
||||||
pub normalize: bool,
|
pub normalize: bool,
|
||||||
/// whether to ignore casing
|
/// whether to ignore casing
|
||||||
pub ignore_case: bool,
|
pub ignore_case: bool,
|
||||||
@ -26,7 +24,7 @@ impl MatcherConfig {
|
|||||||
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
||||||
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
||||||
initial_char_class: CharClass::Whitespace,
|
initial_char_class: CharClass::Whitespace,
|
||||||
normalize: false,
|
normalize: true,
|
||||||
ignore_case: true,
|
ignore_case: true,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -125,26 +125,41 @@ impl Matcher {
|
|||||||
|
|
||||||
fn fuzzy_matcher_impl<const INDICES: bool>(
|
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||||
&mut self,
|
&mut self,
|
||||||
haystack: Utf32Str<'_>,
|
haystack_: Utf32Str<'_>,
|
||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
if needle_.len() > haystack_.len() || needle_.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if needle_.len() == haystack.len() {
|
if needle_.len() == haystack_.len() {
|
||||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
return self.exact_match_impl::<INDICES>(
|
||||||
|
haystack_,
|
||||||
|
needle_,
|
||||||
|
0,
|
||||||
|
haystack_.len(),
|
||||||
|
indices,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
assert!(
|
assert!(
|
||||||
haystack.len() <= u32::MAX as usize,
|
haystack_.len() <= u32::MAX as usize,
|
||||||
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
||||||
);
|
);
|
||||||
match (haystack, needle_) {
|
match (haystack_, needle_) {
|
||||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
if let &[needle] = needle {
|
if let &[needle] = needle {
|
||||||
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
|
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
|
||||||
}
|
}
|
||||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
|
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
|
||||||
|
if needle_.len() == end - start {
|
||||||
|
return Some(self.calculate_score::<INDICES, _, _>(
|
||||||
|
AsciiChar::cast(haystack),
|
||||||
|
AsciiChar::cast(needle),
|
||||||
|
start,
|
||||||
|
greedy_end,
|
||||||
|
indices,
|
||||||
|
));
|
||||||
|
}
|
||||||
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||||
AsciiChar::cast(haystack),
|
AsciiChar::cast(haystack),
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
@ -171,6 +186,10 @@ impl Matcher {
|
|||||||
return Some(res);
|
return Some(res);
|
||||||
}
|
}
|
||||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||||
|
if needle_.len() == end - start {
|
||||||
|
return self
|
||||||
|
.exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
|
||||||
|
}
|
||||||
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||||
haystack,
|
haystack,
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
@ -188,6 +207,10 @@ impl Matcher {
|
|||||||
return Some(res);
|
return Some(res);
|
||||||
}
|
}
|
||||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||||
|
if needle_.len() == end - start {
|
||||||
|
return self
|
||||||
|
.exact_match_impl::<INDICES>(haystack_, needle_, start, end, indices);
|
||||||
|
}
|
||||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||||
haystack,
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
@ -243,7 +266,7 @@ impl Matcher {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if needle_.len() == haystack.len() {
|
if needle_.len() == haystack.len() {
|
||||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||||
}
|
}
|
||||||
assert!(
|
assert!(
|
||||||
haystack.len() <= u32::MAX as usize,
|
haystack.len() <= u32::MAX as usize,
|
||||||
@ -252,6 +275,15 @@ impl Matcher {
|
|||||||
match (haystack, needle_) {
|
match (haystack, needle_) {
|
||||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
|
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
|
||||||
|
if needle_.len() == greedy_end - start {
|
||||||
|
return Some(self.calculate_score::<INDICES, _, _>(
|
||||||
|
AsciiChar::cast(haystack),
|
||||||
|
AsciiChar::cast(needle),
|
||||||
|
start,
|
||||||
|
greedy_end,
|
||||||
|
indices,
|
||||||
|
));
|
||||||
|
}
|
||||||
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
|
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
|
||||||
AsciiChar::cast(haystack),
|
AsciiChar::cast(haystack),
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
@ -330,7 +362,7 @@ impl Matcher {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if needle_.len() == haystack.len() {
|
if needle_.len() == haystack.len() {
|
||||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||||
}
|
}
|
||||||
assert!(
|
assert!(
|
||||||
haystack.len() <= u32::MAX as usize,
|
haystack.len() <= u32::MAX as usize,
|
||||||
@ -393,7 +425,7 @@ impl Matcher {
|
|||||||
///
|
///
|
||||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||||
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||||
self.exact_match_impl::<false>(haystack, needle, &mut Vec::new())
|
self.exact_match_impl::<false>(haystack, needle, 0, haystack.len(), &mut Vec::new())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
||||||
@ -407,7 +439,7 @@ impl Matcher {
|
|||||||
needle: Utf32Str<'_>,
|
needle: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
self.exact_match_impl::<true>(haystack, needle, indices)
|
self.exact_match_impl::<true>(haystack, needle, 0, haystack.len(), indices)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether needle is a prefix of the haystack.
|
/// Checks whether needle is a prefix of the haystack.
|
||||||
@ -419,7 +451,7 @@ impl Matcher {
|
|||||||
if haystack.len() < needle.len() {
|
if haystack.len() < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<false>(haystack.slice(..needle.len()), needle, &mut Vec::new())
|
self.exact_match_impl::<false>(haystack, needle, 0, needle.len(), &mut Vec::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -437,7 +469,7 @@ impl Matcher {
|
|||||||
if haystack.len() < needle.len() {
|
if haystack.len() < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<true>(haystack.slice(..needle.len()), needle, indices)
|
self.exact_match_impl::<true>(haystack, needle, 0, needle.len(), indices)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -451,8 +483,10 @@ impl Matcher {
|
|||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<false>(
|
self.exact_match_impl::<false>(
|
||||||
haystack.slice(haystack.len() - needle.len()..),
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
|
haystack.len() - needle.len(),
|
||||||
|
haystack.len(),
|
||||||
&mut Vec::new(),
|
&mut Vec::new(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -473,8 +507,10 @@ impl Matcher {
|
|||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<true>(
|
self.exact_match_impl::<true>(
|
||||||
haystack.slice(haystack.len() - needle.len()..),
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
|
haystack.len() - needle.len(),
|
||||||
|
haystack.len(),
|
||||||
indices,
|
indices,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -484,9 +520,11 @@ impl Matcher {
|
|||||||
&mut self,
|
&mut self,
|
||||||
haystack: Utf32Str<'_>,
|
haystack: Utf32Str<'_>,
|
||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
|
start: usize,
|
||||||
|
end: usize,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() != haystack.len() || needle_.is_empty() {
|
if needle_.len() != end - start || needle_.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
assert!(
|
assert!(
|
||||||
@ -496,7 +534,7 @@ impl Matcher {
|
|||||||
let score = match (haystack, needle_) {
|
let score = match (haystack, needle_) {
|
||||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
let matched = if self.config.ignore_case {
|
let matched = if self.config.ignore_case {
|
||||||
AsciiChar::cast(haystack)
|
AsciiChar::cast(haystack)[start..end]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|c| c.normalize(&self.config))
|
.map(|c| c.normalize(&self.config))
|
||||||
.eq(AsciiChar::cast(needle)
|
.eq(AsciiChar::cast(needle)
|
||||||
@ -511,8 +549,8 @@ impl Matcher {
|
|||||||
self.calculate_score::<INDICES, _, _>(
|
self.calculate_score::<INDICES, _, _>(
|
||||||
AsciiChar::cast(haystack),
|
AsciiChar::cast(haystack),
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
0,
|
start,
|
||||||
haystack.len(),
|
end,
|
||||||
indices,
|
indices,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -522,13 +560,12 @@ impl Matcher {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||||
let matched =
|
let matched = haystack[start..end]
|
||||||
haystack
|
.iter()
|
||||||
|
.map(|c| c.normalize(&self.config))
|
||||||
|
.eq(AsciiChar::cast(needle)
|
||||||
.iter()
|
.iter()
|
||||||
.map(|c| c.normalize(&self.config))
|
.map(|c| c.normalize(&self.config)));
|
||||||
.eq(AsciiChar::cast(needle)
|
|
||||||
.iter()
|
|
||||||
.map(|c| c.normalize(&self.config)));
|
|
||||||
if !matched {
|
if !matched {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@ -536,20 +573,20 @@ impl Matcher {
|
|||||||
self.calculate_score::<INDICES, _, _>(
|
self.calculate_score::<INDICES, _, _>(
|
||||||
haystack,
|
haystack,
|
||||||
AsciiChar::cast(needle),
|
AsciiChar::cast(needle),
|
||||||
0,
|
start,
|
||||||
haystack.len(),
|
end,
|
||||||
indices,
|
indices,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||||
let matched = haystack
|
let matched = haystack[start..end]
|
||||||
.iter()
|
.iter()
|
||||||
.map(|c| c.normalize(&self.config))
|
.map(|c| c.normalize(&self.config))
|
||||||
.eq(needle.iter().map(|c| c.normalize(&self.config)));
|
.eq(needle.iter().map(|c| c.normalize(&self.config)));
|
||||||
if !matched {
|
if !matched {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
self.calculate_score::<INDICES, _, _>(haystack, needle, 0, haystack.len(), indices)
|
self.calculate_score::<INDICES, _, _>(haystack, needle, start, end, indices)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Some(score)
|
Some(score)
|
||||||
|
@ -25,6 +25,8 @@ struct MatrixLayout<C: Char> {
|
|||||||
}
|
}
|
||||||
impl<C: Char> MatrixLayout<C> {
|
impl<C: Char> MatrixLayout<C> {
|
||||||
fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> {
|
fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> {
|
||||||
|
assert!(haystack_len >= needle_len);
|
||||||
|
assert!(haystack_len <= u32::MAX as usize);
|
||||||
let mut layout = Layout::from_size_align(0, 1).unwrap();
|
let mut layout = Layout::from_size_align(0, 1).unwrap();
|
||||||
let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
|
let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
|
||||||
let bonus_layout = Layout::array::<u16>(haystack_len).unwrap();
|
let bonus_layout = Layout::array::<u16>(haystack_len).unwrap();
|
||||||
|
@ -85,6 +85,9 @@ impl Matcher {
|
|||||||
.iter()
|
.iter()
|
||||||
.rev()
|
.rev()
|
||||||
.position(|c| c.normalize(&self.config) == needle_char)?;
|
.position(|c| c.normalize(&self.config) == needle_char)?;
|
||||||
|
if end - start < needle.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
Some((start, end))
|
Some((start, end))
|
||||||
}
|
}
|
||||||
|
@ -89,9 +89,16 @@ pub(crate) struct ItemsSnapshot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl ItemsSnapshot {
|
impl ItemsSnapshot {
|
||||||
pub(crate) fn new() -> Self {
|
pub(crate) fn new(items: &ItemCache) -> Self {
|
||||||
Self {
|
Self {
|
||||||
items: Vec::with_capacity(1024),
|
items: items
|
||||||
|
.live
|
||||||
|
.iter()
|
||||||
|
.map(|item| ItemSnapshot {
|
||||||
|
cols: item.cols,
|
||||||
|
len: item.cols().iter().map(|s| s.len() as u32).sum(),
|
||||||
|
})
|
||||||
|
.collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,7 +111,7 @@ impl ItemsSnapshot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn update(&mut self, items: &ItemCache) -> bool {
|
pub(crate) fn update(&mut self, items: &ItemCache) -> bool {
|
||||||
let cleared = items.evicted.is_empty();
|
let cleared = !items.evicted.is_empty();
|
||||||
// drop in another thread to ensure we don't wait for a long drop here
|
// drop in another thread to ensure we don't wait for a long drop here
|
||||||
if cleared {
|
if cleared {
|
||||||
self.items.clear();
|
self.items.clear();
|
207
src/lib.rs
Normal file
207
src/lib.rs
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
use std::cmp::Reverse;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::sync::atomic::{self, AtomicBool};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
use crate::items::{Item, ItemCache};
|
||||||
|
use crate::worker::Worker;
|
||||||
|
use parking_lot::lock_api::ArcMutexGuard;
|
||||||
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
|
pub use crate::query::{CaseMatching, MultiPattern, Pattern, PatternKind};
|
||||||
|
pub use crate::utf32_string::Utf32String;
|
||||||
|
|
||||||
|
mod items;
|
||||||
|
mod query;
|
||||||
|
mod utf32_string;
|
||||||
|
mod worker;
|
||||||
|
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||||
|
|
||||||
|
use parking_lot::{Mutex, MutexGuard, RawMutex};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||||
|
pub struct Match {
|
||||||
|
pub score: u32,
|
||||||
|
pub idx: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||||
|
pub struct Status {
|
||||||
|
pub changed: bool,
|
||||||
|
pub running: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Items<T> {
|
||||||
|
cache: Arc<Mutex<ItemCache>>,
|
||||||
|
items: Arc<Mutex<Vec<T>>>,
|
||||||
|
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Sync + Send> Items<T> {
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
self.items.lock().clear();
|
||||||
|
self.cache.lock().clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn append(&mut self, items: impl Iterator<Item = (T, Box<[Utf32String]>)>) {
|
||||||
|
let mut cache = self.cache.lock();
|
||||||
|
let mut items_ = self.items.lock();
|
||||||
|
items_.extend(items.map(|(item, text)| {
|
||||||
|
cache.push(text);
|
||||||
|
item
|
||||||
|
}));
|
||||||
|
// notify that a new tick will be necessary
|
||||||
|
(self.notify)();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self) -> impl Deref<Target = [T]> + '_ {
|
||||||
|
MutexGuard::map(self.items.lock(), |items| items.as_mut_slice())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_matcher_items(&self) -> impl Deref<Target = [Item]> + '_ {
|
||||||
|
MutexGuard::map(self.cache.lock(), |items| items.get())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Nucleo<T: Sync + Send> {
|
||||||
|
// the way the API is build we totally don't actually neeed these to be Arcs
|
||||||
|
// but this lets us avoid some unsafe
|
||||||
|
worker: Arc<Mutex<Worker>>,
|
||||||
|
canceled: Arc<AtomicBool>,
|
||||||
|
pool: ThreadPool,
|
||||||
|
pub items: Items<T>,
|
||||||
|
pub matches: Vec<Match>,
|
||||||
|
pub pattern: MultiPattern,
|
||||||
|
should_notify: Arc<AtomicBool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Sync + Send> Nucleo<T> {
|
||||||
|
pub fn new(
|
||||||
|
config: MatcherConfig,
|
||||||
|
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||||
|
num_threads: Option<usize>,
|
||||||
|
case_matching: CaseMatching,
|
||||||
|
cols: usize,
|
||||||
|
items: impl Iterator<Item = (T, Box<[Utf32String]>)>,
|
||||||
|
) -> Self {
|
||||||
|
let mut cache = ItemCache::new();
|
||||||
|
let items: Vec<_> = items
|
||||||
|
.map(|(item, text)| {
|
||||||
|
cache.push(text);
|
||||||
|
item
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let matches: Vec<_> = (0..items.len())
|
||||||
|
.map(|i| Match {
|
||||||
|
score: 0,
|
||||||
|
idx: i as u32,
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let (pool, worker) =
|
||||||
|
Worker::new(notify.clone(), num_threads, config, matches.clone(), &cache);
|
||||||
|
Self {
|
||||||
|
canceled: worker.canceled.clone(),
|
||||||
|
should_notify: worker.should_notify.clone(),
|
||||||
|
items: Items {
|
||||||
|
cache: Arc::new(Mutex::new(cache)),
|
||||||
|
items: Arc::new(Mutex::new(items)),
|
||||||
|
notify,
|
||||||
|
},
|
||||||
|
pool,
|
||||||
|
matches,
|
||||||
|
pattern: MultiPattern::new(&config, case_matching, cols),
|
||||||
|
worker: Arc::new(Mutex::new(worker)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn update_config(&mut self, config: MatcherConfig) {
|
||||||
|
self.worker.lock().update_config(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tick(&mut self, timeout: u64) -> Status {
|
||||||
|
self.should_notify.store(false, atomic::Ordering::Relaxed);
|
||||||
|
let status = self.pattern.status();
|
||||||
|
let items = self.items.cache.lock_arc();
|
||||||
|
let canceled = status != query::Status::Unchanged || items.cleared();
|
||||||
|
let res = self.tick_inner(timeout, canceled, items, status);
|
||||||
|
if !canceled {
|
||||||
|
self.should_notify.store(true, atomic::Ordering::Relaxed);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
let items = self.items.cache.lock_arc();
|
||||||
|
let res = self.tick_inner(timeout, false, items, query::Status::Unchanged);
|
||||||
|
self.should_notify.store(true, atomic::Ordering::Relaxed);
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tick_inner(
|
||||||
|
&mut self,
|
||||||
|
timeout: u64,
|
||||||
|
canceled: bool,
|
||||||
|
items: ArcMutexGuard<RawMutex, ItemCache>,
|
||||||
|
status: query::Status,
|
||||||
|
) -> Status {
|
||||||
|
let mut inner = if canceled {
|
||||||
|
self.pattern.reset_status();
|
||||||
|
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||||
|
self.worker.lock_arc()
|
||||||
|
} else {
|
||||||
|
let Some(worker) = self.worker.try_lock_arc_for(Duration::from_millis(timeout)) else {
|
||||||
|
return Status{ changed: false, running: true };
|
||||||
|
};
|
||||||
|
worker
|
||||||
|
};
|
||||||
|
|
||||||
|
let changed = inner.running;
|
||||||
|
if inner.running {
|
||||||
|
inner.running = false;
|
||||||
|
self.matches.clone_from(&inner.matches);
|
||||||
|
}
|
||||||
|
|
||||||
|
let running = canceled || inner.items.outdated(&items);
|
||||||
|
if running {
|
||||||
|
inner.pattern.clone_from(&self.pattern);
|
||||||
|
self.canceled.store(false, atomic::Ordering::Relaxed);
|
||||||
|
self.pool.spawn(move || unsafe { inner.run(items, status) })
|
||||||
|
}
|
||||||
|
Status { changed, running }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Sync + Send> Drop for Nucleo<T> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
// we ensure the worker quits before dropping items to ensure that
|
||||||
|
// the worker can always assume the items outlife it
|
||||||
|
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||||
|
let lock = self.worker.try_lock_for(Duration::from_secs(1));
|
||||||
|
if lock.is_none() {
|
||||||
|
unreachable!("thread pool failed to shutdown properly")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// convenicne function to easily fuzzy match
|
||||||
|
/// on a (relatievly small list of inputs). This is not recommended for building a full tui
|
||||||
|
/// application that can match large numbers of matches as all matching is done on the current
|
||||||
|
/// thread, effectively blocking the UI
|
||||||
|
pub fn fuzzy_match<T: AsRef<str>>(
|
||||||
|
matcher: &mut Matcher,
|
||||||
|
pattern: &str,
|
||||||
|
items: impl IntoIterator<Item = T>,
|
||||||
|
case_matching: CaseMatching,
|
||||||
|
) -> Vec<(T, u32)> {
|
||||||
|
let mut pattern_ = Pattern::new(&matcher.config, case_matching);
|
||||||
|
pattern_.set_literal(pattern, PatternKind::Fuzzy, false);
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut items: Vec<_> = items
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
pattern_
|
||||||
|
.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
|
||||||
|
.map(|score| (item, score))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
items.sort_by_key(|(item, score)| (Reverse(*score), item.as_ref().len()));
|
||||||
|
items
|
||||||
|
}
|
@ -54,7 +54,9 @@ impl PatternAtom {
|
|||||||
|
|
||||||
match case {
|
match case {
|
||||||
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
||||||
CaseMatching::Smart => ignore_case = needle.bytes().any(|b| b.is_ascii_uppercase()),
|
CaseMatching::Smart => {
|
||||||
|
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||||
|
}
|
||||||
CaseMatching::Respect => (),
|
CaseMatching::Respect => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -80,7 +82,7 @@ impl PatternAtom {
|
|||||||
match case {
|
match case {
|
||||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||||
CaseMatching::Smart => {
|
CaseMatching::Smart => {
|
||||||
ignore_case = ignore_case || c.is_uppercase();
|
ignore_case = ignore_case && !c.is_uppercase();
|
||||||
}
|
}
|
||||||
CaseMatching::Respect => (),
|
CaseMatching::Respect => (),
|
||||||
}
|
}
|
||||||
@ -149,22 +151,18 @@ pub enum Status {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Query {
|
pub struct MultiPattern {
|
||||||
pub cols: Vec<Pattern>,
|
pub cols: Vec<Pattern>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Query {
|
impl MultiPattern {
|
||||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching, cols: usize) -> Query {
|
pub fn new(
|
||||||
Query {
|
matcher_config: &MatcherConfig,
|
||||||
cols: vec![
|
case_matching: CaseMatching,
|
||||||
Pattern {
|
cols: usize,
|
||||||
terms: Vec::new(),
|
) -> MultiPattern {
|
||||||
case_matching,
|
MultiPattern {
|
||||||
normalize: matcher_config.normalize,
|
cols: vec![Pattern::new(matcher_config, case_matching); cols],
|
||||||
status: Status::Unchanged,
|
|
||||||
};
|
|
||||||
cols
|
|
||||||
],
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,7 +199,30 @@ pub struct Pattern {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Pattern {
|
impl Pattern {
|
||||||
pub(crate) fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching) -> Pattern {
|
||||||
|
Pattern {
|
||||||
|
terms: Vec::new(),
|
||||||
|
case_matching,
|
||||||
|
normalize: matcher_config.normalize,
|
||||||
|
status: Status::Unchanged,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn new_fuzzy_literal(
|
||||||
|
matcher_config: &MatcherConfig,
|
||||||
|
case_matching: CaseMatching,
|
||||||
|
pattern: &str,
|
||||||
|
) -> Pattern {
|
||||||
|
let mut res = Pattern {
|
||||||
|
terms: Vec::new(),
|
||||||
|
case_matching,
|
||||||
|
normalize: matcher_config.normalize,
|
||||||
|
status: Status::Unchanged,
|
||||||
|
};
|
||||||
|
res.set_literal(pattern, PatternKind::Fuzzy, false);
|
||||||
|
res
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
||||||
if self.terms.is_empty() {
|
if self.terms.is_empty() {
|
||||||
return Some(0);
|
return Some(0);
|
||||||
}
|
}
|
||||||
@ -215,7 +236,7 @@ impl Pattern {
|
|||||||
matcher.substring_match(haystack, pattern.needle.slice(..))
|
matcher.substring_match(haystack, pattern.needle.slice(..))
|
||||||
}
|
}
|
||||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||||
PatternKind::Postfix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
PatternKind::Postfix => matcher.postfix_match(haystack, pattern.needle.slice(..)),
|
||||||
};
|
};
|
||||||
if pattern.invert {
|
if pattern.invert {
|
||||||
if pattern_score.is_some() {
|
if pattern_score.is_some() {
|
||||||
@ -249,7 +270,7 @@ impl Pattern {
|
|||||||
}
|
}
|
||||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||||
PatternKind::Postfix => {
|
PatternKind::Postfix => {
|
||||||
matcher.prefix_match(haystack, pattern.needle.slice(..))
|
matcher.postfix_match(haystack, pattern.needle.slice(..))
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if pattern_score.is_some() {
|
if pattern_score.is_some() {
|
||||||
@ -262,16 +283,16 @@ impl Pattern {
|
|||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||||
}
|
}
|
||||||
PatternKind::Fuzzy => {
|
PatternKind::Fuzzy => {
|
||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
matcher.fuzzy_indices(haystack, pattern.needle.slice(..), indices)
|
||||||
}
|
}
|
||||||
PatternKind::Substring => {
|
PatternKind::Substring => {
|
||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
matcher.substring_indices(haystack, pattern.needle.slice(..), indices)
|
||||||
}
|
}
|
||||||
PatternKind::Prefix => {
|
PatternKind::Prefix => {
|
||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
matcher.prefix_indices(haystack, pattern.needle.slice(..), indices)
|
||||||
}
|
}
|
||||||
PatternKind::Postfix => {
|
PatternKind::Postfix => {
|
||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
matcher.postfix_indices(haystack, pattern.needle.slice(..), indices)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
score += pattern_score? as u32
|
score += pattern_score? as u32
|
||||||
@ -282,10 +303,15 @@ impl Pattern {
|
|||||||
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
||||||
self.terms.clear();
|
self.terms.clear();
|
||||||
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
||||||
for atom in pattern_atoms(pattern) {
|
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||||
self.terms
|
let atom = PatternAtom::parse(atom, self.normalize, self.case_matching);
|
||||||
.push(PatternAtom::parse(atom, self.normalize, self.case_matching));
|
if atom.needle.is_empty() {
|
||||||
}
|
return None;
|
||||||
|
}
|
||||||
|
Some(atom)
|
||||||
|
});
|
||||||
|
self.terms.extend(atoms);
|
||||||
|
|
||||||
self.status = if append && !invert && self.status != Status::Rescore {
|
self.status = if append && !invert && self.status != Status::Rescore {
|
||||||
Status::Update
|
Status::Update
|
||||||
} else {
|
} else {
|
||||||
@ -304,6 +330,10 @@ impl Pattern {
|
|||||||
Status::Rescore
|
Status::Rescore
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.terms.is_empty()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
@ -1,5 +1,7 @@
|
|||||||
use core::slice;
|
use core::slice;
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::mem::take;
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
|
|
||||||
use nucleo_matcher::Utf32Str;
|
use nucleo_matcher::Utf32Str;
|
||||||
@ -12,6 +14,12 @@ pub enum Utf32String {
|
|||||||
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
/// A string represented as an array of unicode codepoints (basically UTF-32).
|
||||||
Unicode(Box<[char]>),
|
Unicode(Box<[char]>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for Utf32String {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Ascii(String::new().into_boxed_str())
|
||||||
|
}
|
||||||
|
}
|
||||||
impl Utf32String {
|
impl Utf32String {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
@ -48,31 +56,69 @@ impl Utf32String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn is_ascii(&self) -> bool {
|
pub fn is_ascii(&self) -> bool {
|
||||||
matches!(self, Utf32String::Ascii(_))
|
matches!(self, Utf32String::Ascii(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn get(&self, idx: u32) -> char {
|
pub fn get(&self, idx: u32) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[idx as usize] as char,
|
Utf32String::Ascii(bytes) => bytes.as_bytes()[idx as usize] as char,
|
||||||
Utf32String::Unicode(codepoints) => codepoints[idx as usize],
|
Utf32String::Unicode(codepoints) => codepoints[idx as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn last(&self) -> char {
|
pub fn last(&self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[bytes.len() - 1] as char,
|
Utf32String::Ascii(bytes) => bytes.as_bytes()[bytes.len() - 1] as char,
|
||||||
Utf32String::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
Utf32String::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn chars(&self) -> Chars<'_> {
|
pub fn chars(&self) -> Chars<'_> {
|
||||||
match self {
|
match self {
|
||||||
Utf32String::Ascii(bytes) => Chars::Ascii(bytes.as_bytes().iter()),
|
Utf32String::Ascii(bytes) => Chars::Ascii(bytes.as_bytes().iter()),
|
||||||
Utf32String::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
Utf32String::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn push_str(&mut self, text: &str) {
|
||||||
|
let mut codeboints = match take(self) {
|
||||||
|
Utf32String::Ascii(bytes) if text.is_ascii() => {
|
||||||
|
let mut bytes = bytes.into_string();
|
||||||
|
bytes.push_str(text);
|
||||||
|
*self = Self::Ascii(bytes.into_boxed_str());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Utf32String::Ascii(bytes) => bytes.chars().collect(),
|
||||||
|
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
||||||
|
};
|
||||||
|
codeboints.extend(text.chars());
|
||||||
|
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
pub fn push(&mut self, c: char) {
|
||||||
|
let mut codeboints = match take(self) {
|
||||||
|
Utf32String::Ascii(bytes) if c.is_ascii() => {
|
||||||
|
let mut bytes = bytes.into_string();
|
||||||
|
bytes.push(c);
|
||||||
|
*self = Self::Ascii(bytes.into_boxed_str());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Utf32String::Ascii(bytes) => bytes.chars().collect(),
|
||||||
|
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
||||||
|
};
|
||||||
|
codeboints.push(c);
|
||||||
|
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&str> for Utf32String {
|
impl From<&str> for Utf32String {
|
||||||
|
#[inline]
|
||||||
fn from(value: &str) -> Self {
|
fn from(value: &str) -> Self {
|
||||||
if value.is_ascii() {
|
if value.is_ascii() {
|
||||||
Self::Ascii(value.to_owned().into_boxed_str())
|
Self::Ascii(value.to_owned().into_boxed_str())
|
||||||
@ -91,12 +137,24 @@ impl From<Box<str>> for Utf32String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<String> for Utf32String {
|
impl From<String> for Utf32String {
|
||||||
|
#[inline]
|
||||||
fn from(value: String) -> Self {
|
fn from(value: String) -> Self {
|
||||||
value.into_boxed_str().into()
|
value.into_boxed_str().into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> From<Cow<'a, str>> for Utf32String {
|
||||||
|
#[inline]
|
||||||
|
fn from(value: Cow<'a, str>) -> Self {
|
||||||
|
match value {
|
||||||
|
Cow::Borrowed(value) => value.into(),
|
||||||
|
Cow::Owned(value) => value.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub enum Chars<'a> {
|
pub enum Chars<'a> {
|
||||||
Ascii(slice::Iter<'a, u8>),
|
Ascii(slice::Iter<'a, u8>),
|
||||||
Unicode(slice::Iter<'a, char>),
|
Unicode(slice::Iter<'a, char>),
|
||||||
@ -104,6 +162,7 @@ pub enum Chars<'a> {
|
|||||||
impl<'a> Iterator for Chars<'a> {
|
impl<'a> Iterator for Chars<'a> {
|
||||||
type Item = char;
|
type Item = char;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
match self {
|
match self {
|
||||||
Chars::Ascii(iter) => iter.next().map(|&c| c as char),
|
Chars::Ascii(iter) => iter.next().map(|&c| c as char),
|
@ -8,7 +8,7 @@ use parking_lot::RawMutex;
|
|||||||
use rayon::{prelude::*, ThreadPool};
|
use rayon::{prelude::*, ThreadPool};
|
||||||
|
|
||||||
use crate::items::{ItemCache, ItemsSnapshot};
|
use crate::items::{ItemCache, ItemsSnapshot};
|
||||||
use crate::query::{self, Query};
|
use crate::query::{self, MultiPattern};
|
||||||
use crate::Match;
|
use crate::Match;
|
||||||
|
|
||||||
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
|
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
|
||||||
@ -30,15 +30,24 @@ pub(crate) struct Worker {
|
|||||||
pub(crate) items: ItemsSnapshot,
|
pub(crate) items: ItemsSnapshot,
|
||||||
matchers: Matchers,
|
matchers: Matchers,
|
||||||
pub(crate) matches: Vec<Match>,
|
pub(crate) matches: Vec<Match>,
|
||||||
pub(crate) query: Query,
|
pub(crate) pattern: MultiPattern,
|
||||||
pub(crate) canceled: Arc<AtomicBool>,
|
pub(crate) canceled: Arc<AtomicBool>,
|
||||||
|
pub(crate) should_notify: Arc<AtomicBool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Worker {
|
impl Worker {
|
||||||
|
pub(crate) fn update_config(&mut self, config: MatcherConfig) {
|
||||||
|
for matcher in self.matchers.0.iter_mut() {
|
||||||
|
matcher.get_mut().config = config;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||||
worker_threads: Option<usize>,
|
worker_threads: Option<usize>,
|
||||||
config: MatcherConfig,
|
config: MatcherConfig,
|
||||||
|
matches: Vec<Match>,
|
||||||
|
items: &ItemCache,
|
||||||
) -> (ThreadPool, Worker) {
|
) -> (ThreadPool, Worker) {
|
||||||
let worker_threads = worker_threads
|
let worker_threads = worker_threads
|
||||||
.unwrap_or_else(|| std::thread::available_parallelism().map_or(4, |it| it.get()));
|
.unwrap_or_else(|| std::thread::available_parallelism().map_or(4, |it| it.get()));
|
||||||
@ -53,15 +62,17 @@ impl Worker {
|
|||||||
let worker = Worker {
|
let worker = Worker {
|
||||||
notify,
|
notify,
|
||||||
running: false,
|
running: false,
|
||||||
items: ItemsSnapshot::new(),
|
items: ItemsSnapshot::new(items),
|
||||||
matchers: Matchers(matchers),
|
matchers: Matchers(matchers),
|
||||||
matches: Vec::with_capacity(1024),
|
matches,
|
||||||
// just a placeholder
|
// just a placeholder
|
||||||
query: Query::new(&config, crate::CaseMatching::Ignore, 0),
|
pattern: MultiPattern::new(&config, crate::CaseMatching::Ignore, 0),
|
||||||
canceled: Arc::new(AtomicBool::new(false)),
|
canceled: Arc::new(AtomicBool::new(false)),
|
||||||
|
should_notify: Arc::new(AtomicBool::new(false)),
|
||||||
};
|
};
|
||||||
(pool, worker)
|
(pool, worker)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) unsafe fn run(
|
pub(crate) unsafe fn run(
|
||||||
&mut self,
|
&mut self,
|
||||||
items_lock: ArcMutexGuard<RawMutex, ItemCache>,
|
items_lock: ArcMutexGuard<RawMutex, ItemCache>,
|
||||||
@ -77,48 +88,56 @@ impl Worker {
|
|||||||
self.matches.clear();
|
self.matches.clear();
|
||||||
last_scored_item = 0;
|
last_scored_item = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let matchers = &self.matchers;
|
let matchers = &self.matchers;
|
||||||
let query = &self.query;
|
let pattern = &self.pattern;
|
||||||
let items = unsafe { self.items.get() };
|
let items = unsafe { self.items.get() };
|
||||||
|
|
||||||
|
if self.pattern.cols.iter().all(|pat| pat.is_empty()) {
|
||||||
|
self.matches.clear();
|
||||||
|
self.matches.extend((0..items.len()).map(|i| Match {
|
||||||
|
score: 0,
|
||||||
|
idx: i as u32,
|
||||||
|
}));
|
||||||
|
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
||||||
|
(self.notify)();
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
if query_status != query::Status::Unchanged && !self.matches.is_empty() {
|
if query_status != query::Status::Unchanged && !self.matches.is_empty() {
|
||||||
self.matches
|
self.matches
|
||||||
.par_iter_mut()
|
.par_iter_mut()
|
||||||
.take_any_while(|_| self.canceled.load(atomic::Ordering::Relaxed))
|
.take_any_while(|_| !self.canceled.load(atomic::Ordering::Relaxed))
|
||||||
.for_each(|match_| {
|
.for_each(|match_| {
|
||||||
let item = &items[match_.idx as usize];
|
let item = &items[match_.idx as usize];
|
||||||
match_.score = query
|
match_.score = pattern
|
||||||
.score(item.cols(), unsafe { matchers.get() })
|
.score(item.cols(), unsafe { matchers.get() })
|
||||||
.unwrap_or(u32::MAX);
|
.unwrap_or(u32::MAX);
|
||||||
});
|
});
|
||||||
// TODO: do this in parallel?
|
// TODO: do this in parallel?
|
||||||
self.matches.retain(|m| m.score != u32::MAX)
|
self.matches.retain(|m| m.score != u32::MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
if last_scored_item != self.items.len() {
|
if last_scored_item != self.items.len() {
|
||||||
self.running = true;
|
|
||||||
let items = items[last_scored_item..]
|
let items = items[last_scored_item..]
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.filter_map(|(i, item)| {
|
.filter_map(|(i, item)| {
|
||||||
let score = if self.canceled.load(atomic::Ordering::Relaxed) {
|
let score = if self.canceled.load(atomic::Ordering::Relaxed) {
|
||||||
0
|
u32::MAX - 1
|
||||||
} else {
|
} else {
|
||||||
query.score(item.cols(), unsafe { matchers.get() })?
|
pattern.score(item.cols(), unsafe { matchers.get() })?
|
||||||
};
|
};
|
||||||
Some(Match {
|
Some(Match {
|
||||||
score,
|
score,
|
||||||
idx: i as u32,
|
idx: i as u32,
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
self.matches.par_extend(items)
|
self.matches.par_extend(items);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !self.canceled.load(atomic::Ordering::Relaxed) {
|
if !self.canceled.load(atomic::Ordering::Relaxed) {
|
||||||
// TODO: cancel sort in progess?
|
// TODO: cancel sort in progess?
|
||||||
self.matches.par_sort_unstable_by(|match1, match2| {
|
self.matches.par_sort_unstable_by(|match1, match2| {
|
||||||
match2.idx.cmp(&match1.idx).then_with(|| {
|
match2.score.cmp(&match1.score).then_with(|| {
|
||||||
// the tie breaker is comparitevly rarely needed so we keep it
|
// the tie breaker is comparitevly rarely needed so we keep it
|
||||||
// in a branch especially beacuse we need to acceess the items
|
// in a branch especially beacuse we need to acceess the items
|
||||||
// array here which invovles some pointer chasing
|
// array here which invovles some pointer chasing
|
||||||
@ -129,6 +148,8 @@ impl Worker {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
(self.notify)();
|
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
||||||
|
(self.notify)();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,131 +0,0 @@
|
|||||||
use std::ops::Deref;
|
|
||||||
use std::sync::atomic::{self, AtomicBool};
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use crate::items::{Item, ItemCache};
|
|
||||||
use crate::worker::Worker;
|
|
||||||
use rayon::ThreadPool;
|
|
||||||
|
|
||||||
pub use crate::query::{CaseMatching, Pattern, PatternKind, Query};
|
|
||||||
pub use crate::utf32_string::Utf32String;
|
|
||||||
|
|
||||||
mod items;
|
|
||||||
mod query;
|
|
||||||
mod utf32_string;
|
|
||||||
mod worker;
|
|
||||||
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
|
||||||
|
|
||||||
use parking_lot::{Mutex, MutexGuard};
|
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
|
||||||
pub struct Match {
|
|
||||||
pub score: u32,
|
|
||||||
pub idx: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Items<T> {
|
|
||||||
cache: Arc<Mutex<ItemCache>>,
|
|
||||||
items: Arc<Mutex<Vec<T>>>,
|
|
||||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Sync + Send> Items<T> {
|
|
||||||
pub fn clear(&mut self) {
|
|
||||||
self.items.lock().clear();
|
|
||||||
self.cache.lock().clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn append(&mut self, items: impl Iterator<Item = (T, Box<[Utf32String]>)>) {
|
|
||||||
let mut cache = self.cache.lock();
|
|
||||||
let mut items_ = self.items.lock();
|
|
||||||
items_.extend(items.map(|(item, text)| {
|
|
||||||
cache.push(text);
|
|
||||||
item
|
|
||||||
}));
|
|
||||||
// notify that a new tick will be necessary
|
|
||||||
(self.notify)();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self) -> impl Deref<Target = [T]> + '_ {
|
|
||||||
MutexGuard::map(self.items.lock(), |items| items.as_mut_slice())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_matcher_items(&self) -> impl Deref<Target = [Item]> + '_ {
|
|
||||||
MutexGuard::map(self.cache.lock(), |items| items.get())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Nucleo<T: Sync + Send> {
|
|
||||||
// the way the API is build we totally don't actually neeed these to be Arcs
|
|
||||||
// but this lets us avoid some unsafe
|
|
||||||
worker: Arc<Mutex<Worker>>,
|
|
||||||
canceled: Arc<AtomicBool>,
|
|
||||||
pool: ThreadPool,
|
|
||||||
pub items: Items<T>,
|
|
||||||
pub matches: Vec<Match>,
|
|
||||||
pub query: Query,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Sync + Send> Nucleo<T> {
|
|
||||||
pub fn new(
|
|
||||||
config: MatcherConfig,
|
|
||||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
|
||||||
num_threads: Option<usize>,
|
|
||||||
case_matching: CaseMatching,
|
|
||||||
cols: usize,
|
|
||||||
) -> Self {
|
|
||||||
let (pool, worker) = Worker::new(notify.clone(), num_threads, config);
|
|
||||||
Self {
|
|
||||||
canceled: worker.canceled.clone(),
|
|
||||||
items: Items {
|
|
||||||
cache: Arc::new(Mutex::new(ItemCache::new())),
|
|
||||||
items: Arc::new(Mutex::new(Vec::with_capacity(1024))),
|
|
||||||
notify,
|
|
||||||
},
|
|
||||||
pool,
|
|
||||||
matches: Vec::with_capacity(1024),
|
|
||||||
query: Query::new(&config, case_matching, cols),
|
|
||||||
worker: Arc::new(Mutex::new(worker)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn tick(&mut self, timeout: u64) -> bool {
|
|
||||||
let status = self.query.status();
|
|
||||||
let items = self.items.cache.lock_arc();
|
|
||||||
let canceled = status != query::Status::Unchanged || items.cleared();
|
|
||||||
let mut inner = if canceled {
|
|
||||||
self.query.reset_status();
|
|
||||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
|
||||||
self.worker.lock_arc()
|
|
||||||
} else {
|
|
||||||
let Some(worker) = self.worker.try_lock_arc_for(Duration::from_millis(timeout)) else {
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
worker
|
|
||||||
};
|
|
||||||
|
|
||||||
if inner.running {
|
|
||||||
inner.running = false;
|
|
||||||
self.matches.clone_from(&inner.matches);
|
|
||||||
} else if !canceled {
|
|
||||||
// nothing has changed
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if canceled || inner.items.outdated(&items) {
|
|
||||||
self.pool.spawn(move || unsafe { inner.run(items, status) })
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Sync + Send> Drop for Nucleo<T> {
|
|
||||||
fn drop(&mut self) {
|
|
||||||
// we ensure the worker quits before dropping items to ensure that
|
|
||||||
// the worker can always assume the items outlife it
|
|
||||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
|
||||||
drop(self.worker.lock());
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user