mirror of
https://github.com/solaeus/nucleo.git
synced 2025-01-22 07:47:47 +00:00
progress on high level API
This commit is contained in:
parent
39982dc81a
commit
6b8ee0f585
@ -138,15 +138,9 @@ pub use normalize::normalize;
|
||||
|
||||
#[inline(always)]
|
||||
pub fn to_lower_case(c: char) -> char {
|
||||
if c >= 'A' && c <= 'Z' {
|
||||
char::from_u32(c as u32 + 32).unwrap()
|
||||
} else if !c.is_ascii() {
|
||||
CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||
} else {
|
||||
c
|
||||
}
|
||||
CASE_FOLDING_SIMPLE
|
||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
||||
|
@ -1,10 +1,11 @@
|
||||
[package]
|
||||
name = "nucleo"
|
||||
description = "plug and play high performance fuzzy matcher"
|
||||
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
license = "MPL-2.0"
|
||||
repository = "https://github.com/helix-editor/nucleo"
|
||||
|
||||
[dependencies]
|
||||
nucleo-matcher = { version = "0.1", path = "../matcher" }
|
||||
|
@ -8,13 +8,22 @@ pub(crate) struct ItemCache {
|
||||
evicted: Vec<Item>,
|
||||
}
|
||||
impl ItemCache {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
live: Vec::with_capacity(1024),
|
||||
evicted: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn clear(&mut self) {
|
||||
if self.evicted.is_empty() {
|
||||
self.evicted.reserve(1024);
|
||||
swap(&mut self.evicted, &mut self.live)
|
||||
} else {
|
||||
self.evicted.append(&mut self.live)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn cleared(&self) -> bool {
|
||||
!self.evicted.is_empty()
|
||||
}
|
||||
@ -24,19 +33,31 @@ impl ItemCache {
|
||||
cols: Box::leak(item).into(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn get(&mut self) -> &mut [Item] {
|
||||
&mut self.live
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||
pub(crate) struct Item {
|
||||
#[derive(PartialEq, Eq, Clone)]
|
||||
pub struct Item {
|
||||
// TODO: small vec optimization??
|
||||
cols: NonNull<[Utf32String]>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Item {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ItemText")
|
||||
.field("cols", &self.cols())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Item {}
|
||||
unsafe impl Sync for Item {}
|
||||
|
||||
impl Item {
|
||||
fn cols(&self) -> &[Utf32String] {
|
||||
pub fn cols(&self) -> &[Utf32String] {
|
||||
// safety: cols is basically a box and treated the same as a box,
|
||||
// however there can be other references so using a box (unique ptr)
|
||||
// would be an alias violation
|
||||
@ -68,6 +89,12 @@ pub(crate) struct ItemsSnapshot {
|
||||
}
|
||||
|
||||
impl ItemsSnapshot {
|
||||
pub(crate) fn new() -> Self {
|
||||
Self {
|
||||
items: Vec::with_capacity(1024),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn outdated(&self, items: &ItemCache) -> bool {
|
||||
items.live.len() != self.items.len()
|
||||
}
|
||||
|
@ -1,123 +1,30 @@
|
||||
use std::cell::UnsafeCell;
|
||||
use std::ops::Deref;
|
||||
use std::sync::atomic::{self, AtomicBool};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::items::{ItemCache, ItemsSnapshot};
|
||||
use crate::query::Query;
|
||||
use crate::items::{Item, ItemCache};
|
||||
use crate::worker::Worker;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
pub use crate::query::{CaseMatching, Pattern, PatternKind, Query};
|
||||
pub use crate::utf32_string::Utf32String;
|
||||
use parking_lot::lock_api::ArcMutexGuard;
|
||||
use rayon::prelude::*;
|
||||
|
||||
mod items;
|
||||
mod query;
|
||||
mod utf32_string;
|
||||
mod worker;
|
||||
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||
|
||||
use parking_lot::{Mutex, MutexGuard, RawMutex};
|
||||
use parking_lot::{Mutex, MutexGuard};
|
||||
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Match {
|
||||
score: u32,
|
||||
idx: u32,
|
||||
}
|
||||
|
||||
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
|
||||
|
||||
impl Matchers {
|
||||
// thiss is not a true mut from ref, we use a cell here
|
||||
#[allow(clippy::mut_from_ref)]
|
||||
unsafe fn get(&self) -> &mut nucleo_matcher::Matcher {
|
||||
&mut *self.0[rayon::current_thread_index().unwrap()].get()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Sync for Matchers {}
|
||||
unsafe impl Send for Matchers {}
|
||||
|
||||
struct Worker {
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
running: bool,
|
||||
items: ItemsSnapshot,
|
||||
matchers: Matchers,
|
||||
matches: Vec<Match>,
|
||||
query: Query,
|
||||
canceled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
unsafe fn run(
|
||||
&mut self,
|
||||
items_lock: ArcMutexGuard<RawMutex, ItemCache>,
|
||||
query_status: query::Status,
|
||||
canceled: Arc<AtomicBool>,
|
||||
) {
|
||||
self.running = true;
|
||||
let mut last_scored_item = self.items.len();
|
||||
let cleared = self.items.update(&items_lock);
|
||||
drop(items_lock);
|
||||
|
||||
// TODO: be smarter around reusing past results for rescoring
|
||||
if cleared || query_status == query::Status::Rescore {
|
||||
self.matches.clear();
|
||||
last_scored_item = 0;
|
||||
}
|
||||
|
||||
let matchers = &self.matchers;
|
||||
let query = &self.query;
|
||||
let items = unsafe { self.items.get() };
|
||||
|
||||
if query_status != query::Status::Unchanged && !self.matches.is_empty() {
|
||||
self.matches
|
||||
.par_iter_mut()
|
||||
.take_any_while(|_| canceled.load(atomic::Ordering::Relaxed))
|
||||
.for_each(|match_| {
|
||||
let item = &items[match_.idx as usize];
|
||||
match_.score = query
|
||||
.score(item.cols(), unsafe { matchers.get() })
|
||||
.unwrap_or(u32::MAX);
|
||||
});
|
||||
// TODO: do this in parallel?
|
||||
self.matches.retain(|m| m.score != u32::MAX)
|
||||
}
|
||||
|
||||
if last_scored_item != self.items.len() {
|
||||
self.running = true;
|
||||
let items = items[last_scored_item..]
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, item)| {
|
||||
let score = if canceled.load(atomic::Ordering::Relaxed) {
|
||||
0
|
||||
} else {
|
||||
query.score(item.cols(), unsafe { matchers.get() })?
|
||||
};
|
||||
Some(Match {
|
||||
score,
|
||||
idx: i as u32,
|
||||
})
|
||||
});
|
||||
self.matches.par_extend(items)
|
||||
}
|
||||
|
||||
if !self.canceled.load(atomic::Ordering::Relaxed) {
|
||||
// TODO: cancel sort in progess?
|
||||
self.matches.par_sort_unstable_by(|match1, match2| {
|
||||
match2.idx.cmp(&match1.idx).then_with(|| {
|
||||
// the tie breaker is comparitevly rarely needed so we keep it
|
||||
// in a branch especially beacuse we need to acceess the items
|
||||
// array here which invovles some pointer chasing
|
||||
let item1 = &items[match1.idx as usize];
|
||||
let item2 = &items[match2.idx as usize];
|
||||
(item1.len, match1.idx).cmp(&(item2.len, match2.idx))
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
(self.notify)();
|
||||
}
|
||||
pub score: u32,
|
||||
pub idx: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Items<T> {
|
||||
cache: Arc<Mutex<ItemCache>>,
|
||||
items: Arc<Mutex<Vec<T>>>,
|
||||
@ -145,7 +52,9 @@ impl<T: Sync + Send> Items<T> {
|
||||
MutexGuard::map(self.items.lock(), |items| items.as_mut_slice())
|
||||
}
|
||||
|
||||
pub fn push() {}
|
||||
pub fn get_matcher_items(&self) -> impl Deref<Target = [Item]> + '_ {
|
||||
MutexGuard::map(self.cache.lock(), |items| items.get())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Nucleo<T: Sync + Send> {
|
||||
@ -153,18 +62,41 @@ pub struct Nucleo<T: Sync + Send> {
|
||||
// but this lets us avoid some unsafe
|
||||
worker: Arc<Mutex<Worker>>,
|
||||
canceled: Arc<AtomicBool>,
|
||||
items: Items<T>,
|
||||
thread_pool: rayon::ThreadPool,
|
||||
pool: ThreadPool,
|
||||
pub items: Items<T>,
|
||||
pub matches: Vec<Match>,
|
||||
pub query: Query,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send> Nucleo<T> {
|
||||
pub fn new(
|
||||
config: MatcherConfig,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
num_threads: Option<usize>,
|
||||
case_matching: CaseMatching,
|
||||
cols: usize,
|
||||
) -> Self {
|
||||
let (pool, worker) = Worker::new(notify.clone(), num_threads, config);
|
||||
Self {
|
||||
canceled: worker.canceled.clone(),
|
||||
items: Items {
|
||||
cache: Arc::new(Mutex::new(ItemCache::new())),
|
||||
items: Arc::new(Mutex::new(Vec::with_capacity(1024))),
|
||||
notify,
|
||||
},
|
||||
pool,
|
||||
matches: Vec::with_capacity(1024),
|
||||
query: Query::new(&config, case_matching, cols),
|
||||
worker: Arc::new(Mutex::new(worker)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn tick(&mut self, timeout: u64) -> bool {
|
||||
let status = self.query.status();
|
||||
let items = self.items.cache.lock_arc();
|
||||
let canceled = status != query::Status::Unchanged || items.cleared();
|
||||
let mut inner = if canceled {
|
||||
self.query.reset_status();
|
||||
self.canceled.store(true, atomic::Ordering::Relaxed);
|
||||
self.worker.lock_arc()
|
||||
} else {
|
||||
@ -183,9 +115,7 @@ impl<T: Sync + Send> Nucleo<T> {
|
||||
}
|
||||
|
||||
if canceled || inner.items.outdated(&items) {
|
||||
let canceled = self.canceled.clone();
|
||||
self.thread_pool
|
||||
.spawn(move || unsafe { inner.run(items, status, canceled) })
|
||||
self.pool.spawn(move || unsafe { inner.run(items, status) })
|
||||
}
|
||||
true
|
||||
}
|
||||
|
@ -1,9 +1,18 @@
|
||||
use nucleo_matcher::{Matcher, Utf32Str};
|
||||
use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
||||
|
||||
use crate::Utf32String;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum CaseMatching {
|
||||
Ignore,
|
||||
Smart,
|
||||
Respect,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
enum PatternKind {
|
||||
#[non_exhaustive]
|
||||
pub enum PatternKind {
|
||||
Exact,
|
||||
Fuzzy,
|
||||
Substring,
|
||||
@ -16,6 +25,120 @@ struct PatternAtom {
|
||||
kind: PatternKind,
|
||||
needle: Utf32String,
|
||||
invert: bool,
|
||||
ignore_case: bool,
|
||||
}
|
||||
impl PatternAtom {
|
||||
fn literal(
|
||||
needle: &str,
|
||||
normalize: bool,
|
||||
case: CaseMatching,
|
||||
kind: PatternKind,
|
||||
escape_whitespace: bool,
|
||||
) -> PatternAtom {
|
||||
let mut ignore_case = case == CaseMatching::Ignore;
|
||||
let needle = if needle.is_ascii() {
|
||||
let mut needle = if escape_whitespace {
|
||||
if let Some((start, rem)) = needle.split_once("\\ ") {
|
||||
let mut needle = start.to_owned();
|
||||
for rem in rem.split("\\ ") {
|
||||
needle.push(' ');
|
||||
needle.push_str(rem);
|
||||
}
|
||||
needle
|
||||
} else {
|
||||
needle.to_owned()
|
||||
}
|
||||
} else {
|
||||
needle.to_owned()
|
||||
};
|
||||
|
||||
match case {
|
||||
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
||||
CaseMatching::Smart => ignore_case = needle.bytes().any(|b| b.is_ascii_uppercase()),
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
|
||||
Utf32String::Ascii(needle.into_boxed_str())
|
||||
} else {
|
||||
let mut needle_ = Vec::with_capacity(needle.len());
|
||||
if escape_whitespace {
|
||||
let mut saw_backslash = false;
|
||||
for mut c in needle.chars() {
|
||||
if saw_backslash {
|
||||
if c == ' ' {
|
||||
needle_.push(' ');
|
||||
saw_backslash = false;
|
||||
continue;
|
||||
} else {
|
||||
needle_.push('\\');
|
||||
}
|
||||
}
|
||||
saw_backslash = c == '\\';
|
||||
if normalize {
|
||||
c = chars::normalize(c);
|
||||
}
|
||||
match case {
|
||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||
CaseMatching::Smart => {
|
||||
ignore_case = ignore_case || c.is_uppercase();
|
||||
}
|
||||
CaseMatching::Respect => (),
|
||||
}
|
||||
needle_.push(c);
|
||||
}
|
||||
};
|
||||
Utf32String::Unicode(needle_.into_boxed_slice())
|
||||
};
|
||||
PatternAtom {
|
||||
kind,
|
||||
needle,
|
||||
invert: false,
|
||||
ignore_case,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(raw: &str, normalize: bool, case: CaseMatching) -> PatternAtom {
|
||||
let mut atom = raw;
|
||||
let inverse = atom.starts_with('!');
|
||||
if inverse {
|
||||
atom = &atom[1..];
|
||||
}
|
||||
|
||||
let mut kind = match atom.as_bytes() {
|
||||
[b'^', ..] => {
|
||||
atom = &atom[1..];
|
||||
PatternKind::Prefix
|
||||
}
|
||||
[b'\'', ..] => {
|
||||
atom = &atom[1..];
|
||||
PatternKind::Substring
|
||||
}
|
||||
[b'\\', b'^' | b'\'', ..] => {
|
||||
atom = &atom[1..];
|
||||
PatternKind::Fuzzy
|
||||
}
|
||||
_ => PatternKind::Fuzzy,
|
||||
};
|
||||
|
||||
match atom.as_bytes() {
|
||||
[.., b'\\', b'$'] => (),
|
||||
[.., b'$'] => {
|
||||
kind = if kind == PatternKind::Fuzzy {
|
||||
PatternKind::Postfix
|
||||
} else {
|
||||
PatternKind::Exact
|
||||
};
|
||||
atom = &atom[..atom.len() - 1]
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
if inverse && kind == PatternKind::Fuzzy {
|
||||
kind = PatternKind::Substring
|
||||
}
|
||||
|
||||
PatternAtom::literal(atom, normalize, case, kind, true)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
|
||||
@ -31,6 +154,20 @@ pub struct Query {
|
||||
}
|
||||
|
||||
impl Query {
|
||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching, cols: usize) -> Query {
|
||||
Query {
|
||||
cols: vec![
|
||||
Pattern {
|
||||
terms: Vec::new(),
|
||||
case_matching,
|
||||
normalize: matcher_config.normalize,
|
||||
status: Status::Unchanged,
|
||||
};
|
||||
cols
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn status(&self) -> Status {
|
||||
self.cols
|
||||
.iter()
|
||||
@ -39,7 +176,13 @@ impl Query {
|
||||
.unwrap_or(Status::Unchanged)
|
||||
}
|
||||
|
||||
pub(crate) fn score(&self, haystack: &[Utf32String], matcher: &mut Matcher) -> Option<u32> {
|
||||
pub(crate) fn reset_status(&mut self) {
|
||||
for col in &mut self.cols {
|
||||
col.status = Status::Unchanged
|
||||
}
|
||||
}
|
||||
|
||||
pub fn score(&self, haystack: &[Utf32String], matcher: &mut Matcher) -> Option<u32> {
|
||||
// TODO: wheight columns?
|
||||
let mut score = 0;
|
||||
for (pattern, haystack) in self.cols.iter().zip(haystack) {
|
||||
@ -52,6 +195,8 @@ impl Query {
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Pattern {
|
||||
terms: Vec<PatternAtom>,
|
||||
case_matching: CaseMatching,
|
||||
normalize: bool,
|
||||
status: Status,
|
||||
}
|
||||
|
||||
@ -62,6 +207,7 @@ impl Pattern {
|
||||
}
|
||||
let mut score = 0;
|
||||
for pattern in &self.terms {
|
||||
matcher.config.ignore_case = pattern.ignore_case;
|
||||
let pattern_score = match pattern.kind {
|
||||
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
||||
@ -81,4 +227,93 @@ impl Pattern {
|
||||
}
|
||||
Some(score)
|
||||
}
|
||||
|
||||
pub fn indices(
|
||||
&self,
|
||||
haystack: Utf32Str<'_>,
|
||||
matcher: &mut Matcher,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u32> {
|
||||
if self.terms.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut score = 0;
|
||||
for pattern in &self.terms {
|
||||
matcher.config.ignore_case = pattern.ignore_case;
|
||||
if pattern.invert {
|
||||
let pattern_score = match pattern.kind {
|
||||
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Substring => {
|
||||
matcher.substring_match(haystack, pattern.needle.slice(..))
|
||||
}
|
||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
||||
PatternKind::Postfix => {
|
||||
matcher.prefix_match(haystack, pattern.needle.slice(..))
|
||||
}
|
||||
};
|
||||
if pattern_score.is_some() {
|
||||
return None;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
let pattern_score = match pattern.kind {
|
||||
PatternKind::Exact => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Fuzzy => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Substring => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Prefix => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
PatternKind::Postfix => {
|
||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
||||
}
|
||||
};
|
||||
score += pattern_score? as u32
|
||||
}
|
||||
Some(score)
|
||||
}
|
||||
|
||||
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
||||
self.terms.clear();
|
||||
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
||||
for atom in pattern_atoms(pattern) {
|
||||
self.terms
|
||||
.push(PatternAtom::parse(atom, self.normalize, self.case_matching));
|
||||
}
|
||||
self.status = if append && !invert && self.status != Status::Rescore {
|
||||
Status::Update
|
||||
} else {
|
||||
Status::Rescore
|
||||
};
|
||||
}
|
||||
|
||||
pub fn set_literal(&mut self, pattern: &str, kind: PatternKind, append: bool) {
|
||||
self.terms.clear();
|
||||
let pattern =
|
||||
PatternAtom::literal(pattern, self.normalize, self.case_matching, kind, false);
|
||||
self.terms.push(pattern);
|
||||
self.status = if append && self.status != Status::Rescore {
|
||||
Status::Update
|
||||
} else {
|
||||
Status::Rescore
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
||||
let mut saw_backslash = false;
|
||||
pattern.split(move |c| {
|
||||
saw_backslash = match c {
|
||||
' ' if !saw_backslash => return true,
|
||||
'\\' => true,
|
||||
_ => false,
|
||||
};
|
||||
false
|
||||
})
|
||||
}
|
||||
|
134
worker/src/worker.rs
Normal file
134
worker/src/worker.rs
Normal file
@ -0,0 +1,134 @@
|
||||
use std::cell::UnsafeCell;
|
||||
use std::sync::atomic::{self, AtomicBool};
|
||||
use std::sync::Arc;
|
||||
|
||||
use nucleo_matcher::MatcherConfig;
|
||||
use parking_lot::lock_api::ArcMutexGuard;
|
||||
use parking_lot::RawMutex;
|
||||
use rayon::{prelude::*, ThreadPool};
|
||||
|
||||
use crate::items::{ItemCache, ItemsSnapshot};
|
||||
use crate::query::{self, Query};
|
||||
use crate::Match;
|
||||
|
||||
struct Matchers(Box<[UnsafeCell<nucleo_matcher::Matcher>]>);
|
||||
|
||||
impl Matchers {
|
||||
// thiss is not a true mut from ref, we use a cell here
|
||||
#[allow(clippy::mut_from_ref)]
|
||||
unsafe fn get(&self) -> &mut nucleo_matcher::Matcher {
|
||||
&mut *self.0[rayon::current_thread_index().unwrap()].get()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Sync for Matchers {}
|
||||
unsafe impl Send for Matchers {}
|
||||
|
||||
pub(crate) struct Worker {
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
pub(crate) running: bool,
|
||||
pub(crate) items: ItemsSnapshot,
|
||||
matchers: Matchers,
|
||||
pub(crate) matches: Vec<Match>,
|
||||
pub(crate) query: Query,
|
||||
pub(crate) canceled: Arc<AtomicBool>,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
pub(crate) fn new(
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
worker_threads: Option<usize>,
|
||||
config: MatcherConfig,
|
||||
) -> (ThreadPool, Worker) {
|
||||
let worker_threads = worker_threads
|
||||
.unwrap_or_else(|| std::thread::available_parallelism().map_or(4, |it| it.get()));
|
||||
let pool = rayon::ThreadPoolBuilder::new()
|
||||
.thread_name(|i| format!("nucleo worker {i}"))
|
||||
.num_threads(worker_threads)
|
||||
.build()
|
||||
.expect("creating threadpool failed");
|
||||
let matchers = (0..worker_threads)
|
||||
.map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config)))
|
||||
.collect();
|
||||
let worker = Worker {
|
||||
notify,
|
||||
running: false,
|
||||
items: ItemsSnapshot::new(),
|
||||
matchers: Matchers(matchers),
|
||||
matches: Vec::with_capacity(1024),
|
||||
// just a placeholder
|
||||
query: Query::new(&config, crate::CaseMatching::Ignore, 0),
|
||||
canceled: Arc::new(AtomicBool::new(false)),
|
||||
};
|
||||
(pool, worker)
|
||||
}
|
||||
pub(crate) unsafe fn run(
|
||||
&mut self,
|
||||
items_lock: ArcMutexGuard<RawMutex, ItemCache>,
|
||||
query_status: query::Status,
|
||||
) {
|
||||
self.running = true;
|
||||
let mut last_scored_item = self.items.len();
|
||||
let cleared = self.items.update(&items_lock);
|
||||
drop(items_lock);
|
||||
|
||||
// TODO: be smarter around reusing past results for rescoring
|
||||
if cleared || query_status == query::Status::Rescore {
|
||||
self.matches.clear();
|
||||
last_scored_item = 0;
|
||||
}
|
||||
|
||||
let matchers = &self.matchers;
|
||||
let query = &self.query;
|
||||
let items = unsafe { self.items.get() };
|
||||
|
||||
if query_status != query::Status::Unchanged && !self.matches.is_empty() {
|
||||
self.matches
|
||||
.par_iter_mut()
|
||||
.take_any_while(|_| self.canceled.load(atomic::Ordering::Relaxed))
|
||||
.for_each(|match_| {
|
||||
let item = &items[match_.idx as usize];
|
||||
match_.score = query
|
||||
.score(item.cols(), unsafe { matchers.get() })
|
||||
.unwrap_or(u32::MAX);
|
||||
});
|
||||
// TODO: do this in parallel?
|
||||
self.matches.retain(|m| m.score != u32::MAX)
|
||||
}
|
||||
|
||||
if last_scored_item != self.items.len() {
|
||||
self.running = true;
|
||||
let items = items[last_scored_item..]
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, item)| {
|
||||
let score = if self.canceled.load(atomic::Ordering::Relaxed) {
|
||||
0
|
||||
} else {
|
||||
query.score(item.cols(), unsafe { matchers.get() })?
|
||||
};
|
||||
Some(Match {
|
||||
score,
|
||||
idx: i as u32,
|
||||
})
|
||||
});
|
||||
self.matches.par_extend(items)
|
||||
}
|
||||
|
||||
if !self.canceled.load(atomic::Ordering::Relaxed) {
|
||||
// TODO: cancel sort in progess?
|
||||
self.matches.par_sort_unstable_by(|match1, match2| {
|
||||
match2.idx.cmp(&match1.idx).then_with(|| {
|
||||
// the tie breaker is comparitevly rarely needed so we keep it
|
||||
// in a branch especially beacuse we need to acceess the items
|
||||
// array here which invovles some pointer chasing
|
||||
let item1 = &items[match1.idx as usize];
|
||||
let item2 = &items[match2.idx as usize];
|
||||
(item1.len, match1.idx).cmp(&(item2.len, match2.idx))
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
(self.notify)();
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user