mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 01:47:49 +00:00
Prepare for 0.2 release
Co-authored-by: Michael Davis <mcarsondavis@gmail.com>
This commit is contained in:
parent
de844d6ace
commit
20bf02f0ac
10
CHANGELOG.md
Normal file
10
CHANGELOG.md
Normal file
@ -0,0 +1,10 @@
|
||||
# Changelog
|
||||
|
||||
## nucleo-matcher
|
||||
|
||||
# [0.2.0] - 2023-09-01
|
||||
|
||||
*initial public release*
|
||||
|
||||
|
||||
[0.2.0]: https://github.com/helix-editor/nucleo/releases/tag/nucleo-v0.2.0
|
4
Cargo.lock
generated
4
Cargo.lock
generated
@ -152,7 +152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "nucleo"
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"nucleo-matcher",
|
||||
"parking_lot",
|
||||
@ -161,7 +161,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "nucleo-matcher"
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"cov-mark",
|
||||
"memchr",
|
||||
|
@ -2,7 +2,7 @@
|
||||
name = "nucleo"
|
||||
description = "plug and play high performance fuzzy matcher"
|
||||
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||
version = "0.1.1"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
license = "MPL-2.0"
|
||||
repository = "https://github.com/helix-editor/nucleo"
|
||||
@ -11,7 +11,7 @@ readme = "README.md"
|
||||
[lib]
|
||||
|
||||
[dependencies]
|
||||
nucleo-matcher = { version = "0.1", path = "matcher" }
|
||||
nucleo-matcher = { version = "0.2.0", path = "matcher" }
|
||||
parking_lot = { version = "0.12.1", features = ["send_guard", "arc_lock"]}
|
||||
rayon = "1.7.0"
|
||||
|
||||
|
11
README.md
11
README.md
@ -1,10 +1,5 @@
|
||||
# Nucleo
|
||||
|
||||
> Disclaimer: An 0.1 version has been published to crates.io.
|
||||
> This allows us to merge the `nucleo` integration into helix.
|
||||
> However, the public API is not yet final and will likely
|
||||
> change quite a bit in the next release. The documentation
|
||||
> is also not yet complete
|
||||
|
||||
`nucleo` is a highly performant fuzzy matcher written in rust. It aims to fill the same use case as `fzf` and `skim`. Compared to `fzf` `nucleo` has a significantly faster matching algorithm. This mainly makes a difference when matching patterns with low selectivity on many items. An (unscientific) comparison is shown in the benchmark section below.
|
||||
|
||||
@ -14,6 +9,12 @@
|
||||
|
||||
Nucleo also handles Unicode graphemes more correctly. `Fzf` and `skim` both operate on Unicode code points (chars). That means that multi codepoint graphemes can have weird effects (match multiple times, weirdly change the score, ...). `nucleo` will always use the first codepoint of the grapheme for matching instead (and reports grapheme indices, so they can be highlighted correctly).
|
||||
|
||||
## Status
|
||||
|
||||
Nucleo is used in the helix-editor and therefore has a large user base with lots or real world testing. The core matcher implementation is considered complete and is unlikely to see major changes. The `nucleo-matcher` crate is finished and ready for widespread use, breaking changes should be very rare (a 1.0 release should not be far away).
|
||||
|
||||
While the high level `nucleo` crate also works well (and is also used in helix), there are still additional features that will be added in the future. The high level crate also need better documentation and will likely see a few API changes in the future.
|
||||
|
||||
## Benchmarks
|
||||
|
||||
> WIP currently more of a demonstration than a comprehensive benchmark suit
|
||||
|
@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nucleo = { version = "0.1", path = "../" }
|
||||
nucleo = { version = "0.2", path = "../" }
|
||||
brunch = "0.5.0"
|
||||
fuzzy-matcher = "0.3.7"
|
||||
walkdir = "2"
|
@ -2,7 +2,7 @@
|
||||
name = "nucleo-matcher"
|
||||
description = "plug and play high performance fuzzy matcher"
|
||||
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||
version = "0.1.0"
|
||||
version = "0.2.0"
|
||||
edition = "2021"
|
||||
license = "MPL-2.0"
|
||||
repository = "https://github.com/helix-editor/nucleo"
|
||||
|
@ -169,7 +169,7 @@ pub(crate) enum CharClass {
|
||||
/// Nucleo cannot match graphemes as single units. To work around
|
||||
/// that we only use the first codepoint of each grapheme. This
|
||||
/// iterator returns the first character of each unicode grapheme
|
||||
// in a string and is used for constructing `Utf32Str(ing)`.
|
||||
/// in a string and is used for constructing `Utf32Str(ing)`.
|
||||
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
||||
text.graphemes(true).map(|grapheme| {
|
||||
grapheme
|
||||
|
@ -4,8 +4,58 @@ used by the high level `nucleo` crate.
|
||||
|
||||
The matcher is hightly optimized and can significantly outperform `fzf` and
|
||||
`skim` (the `fuzzy-matcher` crate). However some of these optimizations require
|
||||
a slightly less convenient API. Be sure to carefully read the documentation of the
|
||||
[`Matcher`] to avoid unexpected behaviour..
|
||||
a slightly less convenient API. Be sure to carefully read the documentation of
|
||||
the [`Matcher`] to avoid unexpected behaviour.
|
||||
# Examples
|
||||
|
||||
For almost all usecases the [`pattern`] API should be used instead of calling
|
||||
the matcher methods directly. [`Pattern::parse`](pattern::Pattern::parse) will
|
||||
construct a single Atom (a single match operation) for each word. The pattern
|
||||
can contain special characters to control what kind of match is performed (see
|
||||
[`AtomKind`](crate::pattern::AtomKind)).
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, CaseMatching};
|
||||
let paths = ["foo/bar", "bar/foo", "foobar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
|
||||
let matches = Pattern::parse("foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
|
||||
let matches = Pattern::parse("^foo bar", CaseMatching::Ignore).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("foobar", 140)]);
|
||||
```
|
||||
|
||||
If the pattern should be matched literally (without this special parsing)
|
||||
[`Pattern::new`](pattern::Pattern::new) can be used instead.
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, CaseMatching, AtomKind};
|
||||
let paths = ["foo/bar", "bar/foo", "foobar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT.match_paths());
|
||||
let matches = Pattern::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo/bar", 168), ("bar/foo", 168), ("foobar", 140)]);
|
||||
let paths = ["^foo/bar", "bar/^foo", "foobar"];
|
||||
let matches = Pattern::new("^foo bar", CaseMatching::Ignore, AtomKind::Fuzzy).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("^foo/bar", 188), ("bar/^foo", 188)]);
|
||||
```
|
||||
|
||||
If word segmentation is also not desired, a single `Atom` can be constructed directly.
|
||||
|
||||
```
|
||||
# use nucleo_matcher::{Matcher, Config};
|
||||
# use nucleo_matcher::pattern::{Pattern, Atom, CaseMatching, AtomKind};
|
||||
let paths = ["foobar", "foo bar"];
|
||||
let mut matcher = Matcher::new(Config::DEFAULT);
|
||||
let matches = Atom::new("foo bar", CaseMatching::Ignore, AtomKind::Fuzzy, false).match_list(paths, &mut matcher);
|
||||
assert_eq!(matches, vec![("foo bar", 192)]);
|
||||
```
|
||||
|
||||
|
||||
# Status
|
||||
|
||||
Nucleo is used in the helix-editor and therefore has a large user base with lots or real world testing. The core matcher implementation is considered complete and is unlikely to see major changes. The `nucleo-matcher` crate is finished and ready for widespread use, breaking changes should be very rare (a 1.0 release should not be far away).
|
||||
|
||||
*/
|
||||
|
||||
// sadly ranges don't optmimzie well
|
||||
@ -40,46 +90,44 @@ use crate::matrix::MatrixSlab;
|
||||
/// matching. This scratch memory allows the matcher to guarantee that it will
|
||||
/// **never allocate** during matching (with the exception of pushing to the
|
||||
/// `indices` vector if there isn't enough capacity). However this scratch
|
||||
/// memory is fairly large (around 135KB) so creating a matcher is expensive and
|
||||
/// should be reused.
|
||||
/// memory is fairly large (around 135KB) so creating a matcher is expensive.
|
||||
///
|
||||
/// All `.._match` functions will not compute the indices of the matched chars
|
||||
/// and are therefore significantly faster. These should be used to prefitler
|
||||
/// and sort all matches. All `.._indices` functions will compute the indices of
|
||||
/// the computed chars. These should be used when rendering the best N matches.
|
||||
/// Note that the `indices` argument is **never cleared**. This allows running
|
||||
/// multiple different matches on the same haystack and merging the indices by
|
||||
/// sorting and deduplicating the vector.
|
||||
/// All `.._match` functions will not compute the indices of the matched
|
||||
/// characters. These should be used to prefitler to filter and rank all
|
||||
/// matches. All `.._indices` functions will also compute the indices of the
|
||||
/// matched characters but are slower compared to the `..match` variant. These
|
||||
/// should be used when rendering the best N matches. Note that the `indices`
|
||||
/// argument is **never cleared**. This allows running multiple different
|
||||
/// matches on the same haystack and merging the indices by sorting and
|
||||
/// deduplicating the vector.
|
||||
///
|
||||
/// The `needle` argument for each function must always be normalized by the caller
|
||||
/// (unicode normalization and case folding if a case insesnitive match is produced).
|
||||
/// Otherwise, the matcher may fail to produce a match. The [`pattern`] modules
|
||||
/// provides utilities to preprocess needles.
|
||||
/// The `needle` argument for each function must always be normalized by the
|
||||
/// caller (unicode normalization and case folding). Otherwise, the matcher
|
||||
/// may fail to produce a match. The [`pattern`] modules provides utilities
|
||||
/// to preprocess needles and **should usually be preferred over invoking the
|
||||
/// matcher directly**. Additionally it's recommend to perform separate matches
|
||||
/// for each word in the needle. Consider the folloling example:
|
||||
///
|
||||
/// Additionally it's recommend to perform separate matches for each word in
|
||||
/// the needle. Consider the folloling example: If `foo bar` as used at the
|
||||
/// needle it matches both `foo test baaar` and `foo hello-world bar`. However,
|
||||
/// `foo test baaar` will receive a lower score/rank lower. `baaar` contains a
|
||||
/// 2 character gap which will receive a penalty and therefore the user will
|
||||
/// likely expect it to rank lower. However, if `foo bar` is matched as a single
|
||||
/// query `hello-world` and `test` are both considered gaps too. As `hello-
|
||||
/// world` is a much longer gap then `test` the extra penalty for `baaar` is
|
||||
/// outweigh. If both words are matched individually the interspersed words
|
||||
/// do not receive a penalty and `foo hello-world bar` ranks higher.
|
||||
///
|
||||
/// In general nucleo is a **substring matching tool** with no penalty assigned
|
||||
/// to matches that start later within the same pattern (which enables the
|
||||
/// usecase shown above). This may be undesirable in one very particular usecase:
|
||||
/// For automatic suggestions for commands (like a shell). In these case the
|
||||
/// assumption is that the user is actually typing the full haystack. In other words:
|
||||
/// The matcher should prefer a prefix match. To accomedate that usecase the
|
||||
/// [`prefer_prefix`](MatcherConfig::prefer_prefix) option can be set
|
||||
/// to true. Note that the penalty given is quite small (and capped to a maximum)
|
||||
/// to avoid overwriting the normal scoring heuristic.
|
||||
/// If `foo bar` is used as the needle it matches both `foo test baaar` and
|
||||
/// `foo hello-world bar`. However, `foo test baaar` will receive a higher
|
||||
/// score than `foo hello-world bar`. `baaar` contains a 2 character gap which
|
||||
/// will receive a penalty and therefore the user will likely expect it to rank
|
||||
/// lower. However, if `foo bar` is matched as a single query `hello-world` and
|
||||
/// `test` are both considered gaps too. As `hello-world` is a much longer gap
|
||||
/// then `test` the extra penalty for `baaar` is canceled out. If both words
|
||||
/// are matched individually the interspersed words do not receive a penalty and
|
||||
/// `foo hello-world bar` ranks higher.
|
||||
///
|
||||
/// In general nucleo is a **substring matching tool** (except for the prefix/
|
||||
/// postfix matching modes) with no penalty assigned to matches that start
|
||||
/// later within the same pattern (which enables matching words individually
|
||||
/// as shown above). If patterns show a large variety in length and the syntax
|
||||
/// described above is not used it may be preferable to give preference to
|
||||
/// matches closer to the start of a haystack. To accommodate that usecase the
|
||||
/// [`prefer_prefix`](Config::prefer_prefix) option can be set to true.
|
||||
///
|
||||
/// Matching is limited to 2^32-1 codepoints, if the haystack is longer than
|
||||
/// that the matcher *will panic*. The caller must decide whether it wants to
|
||||
/// that the matcher **will panic**. The caller must decide whether it wants to
|
||||
/// filter out long haystacks or truncate them.
|
||||
pub struct Matcher {
|
||||
#[allow(missing_docs)]
|
||||
@ -115,9 +163,9 @@ impl Default for Matcher {
|
||||
}
|
||||
|
||||
impl Matcher {
|
||||
/// Creates a new matcher instance, note that this will eagerly allocate
|
||||
/// a fairly large chunk of heap memory (135KB currently but subject to
|
||||
/// change) so matchers should be reused if used in a loop.
|
||||
/// Creates a new matcher instance, note that this will eagerly allocate a
|
||||
/// fairly large chunk of heap memory (around 135KB currently but subject to
|
||||
/// change) so matchers should be reused if called often (like in a loop).
|
||||
pub fn new(config: Config) -> Self {
|
||||
Self {
|
||||
config,
|
||||
@ -127,9 +175,10 @@ impl Matcher {
|
||||
|
||||
/// Find the fuzzy match with the highest score in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(mn)` time complexity for short inputs. To
|
||||
/// avoid slowdowns it automatically falls back to [greedy matching]
|
||||
/// (crate::Matcher::fuzzy_match_greedy) for large needles and haystacks
|
||||
/// This functions has `O(mn)` time complexity for short inputs.
|
||||
/// To avoid slowdowns it automatically falls back to
|
||||
/// [greedy matching](crate::Matcher::fuzzy_match_greedy) for large
|
||||
/// needles and haystacks.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
@ -261,7 +310,7 @@ impl Matcher {
|
||||
/// Greedly find a fuzzy match in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
|
||||
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
|
||||
/// indices and scores. Usually [fuzzy_match](crate::Matcher::fuzzy_match) should
|
||||
/// be preferred.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
@ -277,7 +326,7 @@ impl Matcher {
|
||||
/// Greedly find a fuzzy match in the `haystack` and compute its indices.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity but may provide unintuitive (non-optimal)
|
||||
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
|
||||
/// indices and scores. Usually [fuzzy_indices](crate::Matcher::fuzzy_indices) should
|
||||
/// be preferred.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
@ -361,7 +410,7 @@ impl Matcher {
|
||||
/// Finds the substring match with the highest score in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(nm)` time complexity. However many cases can
|
||||
/// be significantly accelerated using prefilters so it's usually fast
|
||||
/// be significantly accelerated using prefilters so it's usually very fast
|
||||
/// in practice.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
|
@ -11,28 +11,28 @@ use crate::Utf32String;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
|
||||
#[non_exhaustive]
|
||||
/// How nucleo will treat case mismatch
|
||||
/// How to treat a case mismatch between two characters.
|
||||
pub enum CaseMatching {
|
||||
/// Characters always match their case folded version (`a == A`)
|
||||
/// Characters always match their case folded version (`a == A`).
|
||||
Ignore,
|
||||
/// Characters never match their case folded version (`a != A`)
|
||||
/// Characters never match their case folded version (`a != A`).
|
||||
Respect,
|
||||
/// Acts like `Ignore` if all characters in a pattern atom are
|
||||
/// lowercase and like `Respect` otherwire
|
||||
/// Acts like [`Ignore`](CaseMatching::Ignore) if all characters in a pattern atom are
|
||||
/// lowercase and like [`Respect`](CaseMatching::Respect) otherwise.
|
||||
#[default]
|
||||
Smart,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
#[non_exhaustive]
|
||||
/// The kind of matching algorithm to run for this atom
|
||||
/// The kind of matching algorithm to run for an atom.
|
||||
pub enum AtomKind {
|
||||
/// Fuzzy matching where the needle must match any haystack characters
|
||||
/// (match can contain gaps). This atom kind is used by default if no
|
||||
/// special syntax is used. There is no negated fuzzy matching (too
|
||||
/// many false positives).
|
||||
///
|
||||
/// See also [`Matcher::exact_match`](crate::Matcher::exact_match).
|
||||
/// See also [`Matcher::fuzzy_match`](crate::Matcher::fuzzy_match).
|
||||
Fuzzy,
|
||||
/// The needle must match a contiguous sequence of haystack characters
|
||||
/// without gaps. This atom kind is parsed from the following syntax:
|
||||
@ -41,8 +41,8 @@ pub enum AtomKind {
|
||||
/// See also [`Matcher::substring_match`](crate::Matcher::substring_match).
|
||||
Substring,
|
||||
/// The needle must match all leading haystack characters without gaps or
|
||||
/// prefix. This atom kind is parsed from the following syntax: `foo$` and
|
||||
/// `!foo$` (negated).
|
||||
/// prefix. This atom kind is parsed from the following syntax: `^foo` and
|
||||
/// `!^foo` (negated).
|
||||
///
|
||||
/// See also [`Matcher::prefix_match`](crate::Matcher::prefix_match).
|
||||
Prefix,
|
||||
@ -56,7 +56,7 @@ pub enum AtomKind {
|
||||
/// This atom kind is parsed from the following syntax: `^foo$` and `!^foo$`
|
||||
/// (negated).
|
||||
///
|
||||
/// See also [`Matcher::exact_match`] (crate::Matcher::exact_match).
|
||||
/// See also [`Matcher::exact_match`](crate::Matcher::exact_match).
|
||||
Exact,
|
||||
}
|
||||
|
||||
@ -74,8 +74,9 @@ pub struct Atom {
|
||||
}
|
||||
|
||||
impl Atom {
|
||||
/// Creates a single [`PatternAtom`] from a string by performing unicode
|
||||
/// normalization
|
||||
/// Creates a single [`Atom`] from a string by performing unicode
|
||||
/// normalization and case folding (if necessary). Optionally `\ ` can
|
||||
/// be escaped to ` `.
|
||||
pub fn new(needle: &str, case: CaseMatching, kind: AtomKind, escape_whitespace: bool) -> Atom {
|
||||
Atom::new_inner(needle, case, kind, escape_whitespace, false)
|
||||
}
|
||||
@ -254,12 +255,14 @@ impl Atom {
|
||||
}
|
||||
|
||||
/// Matches this pattern against `haystack` (using the allocation and
|
||||
/// configuration from `matcher`), calculates a ranking score and the matche
|
||||
/// configuration from `matcher`), calculates a ranking score and the match
|
||||
/// indices. See the [`Matcher`](crate::Matcher). Documentation for more
|
||||
/// details.
|
||||
///
|
||||
/// *Note:* The `ignore_case` setting is overwritten to match the casing of
|
||||
/// this pattern atom.
|
||||
/// each pattern atom.
|
||||
///
|
||||
/// *Note:* The `indices` vector is not cleared by this function.
|
||||
pub fn indices(
|
||||
&self,
|
||||
haystack: Utf32Str<'_>,
|
||||
@ -299,15 +302,18 @@ impl Atom {
|
||||
pub fn needle_text(&self) -> Utf32Str<'_> {
|
||||
self.needle.slice(..)
|
||||
}
|
||||
/// Convenience function to easily match on a (relatively small) list of
|
||||
/// inputs. This is not recommended for building a full fuzzy matching
|
||||
/// application that can match large numbers of matches (like all files in
|
||||
/// a directory) as all matching is done on the current thread, effectively
|
||||
/// blocking the UI.
|
||||
/// Convenience function to easily match (and sort) a (relatively small)
|
||||
/// list of inputs.
|
||||
///
|
||||
/// *Note* This function is not recommended for building a full fuzzy
|
||||
/// matching application that can match large numbers of matches (like all
|
||||
/// files in a directory) as all matching is done on the current thread,
|
||||
/// effectively blocking the UI. For such applications the high level
|
||||
/// `nucleo` crate can be used instead.
|
||||
pub fn match_list<T: AsRef<str>>(
|
||||
&self,
|
||||
matcher: &mut Matcher,
|
||||
items: impl IntoIterator<Item = T>,
|
||||
matcher: &mut Matcher,
|
||||
) -> Vec<(T, u16)> {
|
||||
if self.needle.is_empty() {
|
||||
return items.into_iter().map(|item| (item, 0)).collect();
|
||||
@ -338,7 +344,7 @@ fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
/// A fuzzy match pattern
|
||||
/// A text pattern made up of (potentially multiple) [atoms](crate::pattern::Atom).
|
||||
#[non_exhaustive]
|
||||
pub struct Pattern {
|
||||
/// The individual pattern (words) in this pattern
|
||||
@ -348,9 +354,9 @@ pub struct Pattern {
|
||||
impl Pattern {
|
||||
/// Creates a pattern where each word is matched individually (whitespaces
|
||||
/// can be escaped with `\`). Otherwise no parsing is performed (so $, !, '
|
||||
/// and ^ don't receive special treatment). If you want to match the entiru
|
||||
/// pattern as a single needle use a single [`PatternAtom`] instead
|
||||
pub fn new(case_matching: CaseMatching, kind: AtomKind, pattern: &str) -> Pattern {
|
||||
/// and ^ don't receive special treatment). If you want to match the entire
|
||||
/// pattern as a single needle use a single [`Atom`] instead.
|
||||
pub fn new(pattern: &str, case_matching: CaseMatching, kind: AtomKind) -> Pattern {
|
||||
let atoms = pattern_atoms(pattern)
|
||||
.filter_map(|pat| {
|
||||
let pat = Atom::new(pat, case_matching, kind, true);
|
||||
@ -361,9 +367,9 @@ impl Pattern {
|
||||
}
|
||||
/// Creates a pattern where each word is matched individually (whitespaces
|
||||
/// can be escaped with `\`). And $, !, ' and ^ at word boundaries will
|
||||
/// cause different matching behaviour (see [`PatternAtomKind`]). These can be
|
||||
/// cause different matching behaviour (see [`AtomKind`]). These can be
|
||||
/// escaped with backslash.
|
||||
pub fn parse(case_matching: CaseMatching, pattern: &str) -> Pattern {
|
||||
pub fn parse(pattern: &str, case_matching: CaseMatching) -> Pattern {
|
||||
let atoms = pattern_atoms(pattern)
|
||||
.filter_map(|pat| {
|
||||
let pat = Atom::parse(pat, case_matching);
|
||||
@ -373,15 +379,18 @@ impl Pattern {
|
||||
Pattern { atoms }
|
||||
}
|
||||
|
||||
/// Convenience function to easily match on a (relatively small) list of
|
||||
/// inputs. This is not recommended for building a full fuzzy matching
|
||||
/// application that can match large numbers of matches (like all files in
|
||||
/// a directory) as all matching is done on the current thread, effectively
|
||||
/// blocking the UI.
|
||||
/// Convenience function to easily match (and sort) a (relatively small)
|
||||
/// list of inputs.
|
||||
///
|
||||
/// *Note* This function is not recommended for building a full fuzzy
|
||||
/// matching application that can match large numbers of matches (like all
|
||||
/// files in a directory) as all matching is done on the current thread,
|
||||
/// effectively blocking the UI. For such applications the high level
|
||||
/// `nucleo` crate can be used instead.
|
||||
pub fn match_list<T: AsRef<str>>(
|
||||
&self,
|
||||
matcher: &mut Matcher,
|
||||
items: impl IntoIterator<Item = T>,
|
||||
matcher: &mut Matcher,
|
||||
) -> Vec<(T, u32)> {
|
||||
if self.atoms.is_empty() {
|
||||
return items.into_iter().map(|item| (item, 0)).collect();
|
||||
@ -416,7 +425,7 @@ impl Pattern {
|
||||
}
|
||||
|
||||
/// Matches this pattern against `haystack` (using the allocation and
|
||||
/// configuration from `matcher`), calculates a ranking score and the matche
|
||||
/// configuration from `matcher`), calculates a ranking score and the match
|
||||
/// indices. See the [`Matcher`](crate::Matcher). Documentation for more
|
||||
/// details.
|
||||
///
|
||||
@ -424,8 +433,16 @@ impl Pattern {
|
||||
/// each pattern atom.
|
||||
///
|
||||
/// *Note:* The indices for each pattern are calculated individually
|
||||
/// and simply appended to the `indices` vector. This allows
|
||||
/// and simply appended to the `indices` vector and not deduplicated/sorted.
|
||||
/// This allows associating the match indices to their source pattern. If
|
||||
/// required (like for highlighting) unique/sorted indices can be obtained
|
||||
/// as follows:
|
||||
///
|
||||
/// ```
|
||||
/// # let mut indices: Vec<u32> = Vec::new();
|
||||
/// indices.sort_unstable();
|
||||
/// indices.dedup();
|
||||
/// ```
|
||||
pub fn indices(
|
||||
&self,
|
||||
haystack: Utf32Str<'_>,
|
||||
@ -442,7 +459,9 @@ impl Pattern {
|
||||
Some(score)
|
||||
}
|
||||
|
||||
/// Refreshes this pattern by reparsing a
|
||||
/// Refreshes this pattern by reparsing it from a string. This is mostly
|
||||
/// equivalent to just constructing a new pattern using [`Pattern::parse`]
|
||||
/// but is slightly more efficient by reusing some allocations
|
||||
pub fn reparse(&mut self, pattern: &str, case_matching: CaseMatching) {
|
||||
self.atoms.clear();
|
||||
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||
|
83
src/lib.rs
83
src/lib.rs
@ -1,3 +1,31 @@
|
||||
/*!
|
||||
`nucleo` is a high level crate that provides a high level matcher API that
|
||||
provides a highly effective (parallel) matcher worker. It's designed to allow
|
||||
quickly plugging a fully featured (and faster) fzf/skim like fuzzy matcher into
|
||||
your TUI application.
|
||||
|
||||
It's designed to run matching on a background threadpool while providing a
|
||||
snapshot of the last complete match. That means the matcher can update the
|
||||
results live while the user is typing while never blocking the main UI thread
|
||||
(beyond a user provided timeout). Nucleo also supports fully concurrent lock-free
|
||||
(and wait-free) streaming of input items.
|
||||
|
||||
The [`Nucleo`] struct servers as the main API entrypoint for this crate.
|
||||
|
||||
# Status
|
||||
|
||||
Nucleo is used in the helix-editor and therefore has a large user base with lots
|
||||
or real world testing. The core matcher implementation is considered complete
|
||||
and is unlikely to see major changes. The `nucleo-matcher` crate is finished and
|
||||
ready for widespread use, breaking changes should be very rare (a 1.0 release
|
||||
should not be far away).
|
||||
|
||||
While the high level `nucleo` crate also works well (and is also used in helix),
|
||||
there are still additional features that will be added in the future. The high
|
||||
level crate also need better documentation and will likely see a few API
|
||||
changes in the future.
|
||||
|
||||
*/
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::sync::atomic::{self, AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
@ -15,11 +43,16 @@ mod par_sort;
|
||||
pub mod pattern;
|
||||
mod worker;
|
||||
|
||||
/// A match candidate stored in a [`Nucleo`] worker.
|
||||
pub struct Item<'a, T> {
|
||||
pub data: &'a T,
|
||||
pub matcher_columns: &'a [Utf32String],
|
||||
}
|
||||
|
||||
/// A handle that allow adding new items [`Nucleo`] worker.
|
||||
///
|
||||
/// It's internally reference counted and can be cheaply cloned
|
||||
/// and send acsorss tread
|
||||
pub struct Injector<T> {
|
||||
items: Arc<boxcar::Vec<T>>,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
@ -35,15 +68,17 @@ impl<T> Clone for Injector<T> {
|
||||
}
|
||||
|
||||
impl<T> Injector<T> {
|
||||
/// Appends an element to the back of the vector.
|
||||
/// Appends an element to the list of matched items.
|
||||
/// This function is lock-free and wait-free.
|
||||
pub fn push(&self, value: T, fill_columns: impl FnOnce(&mut [Utf32String])) -> u32 {
|
||||
let idx = self.items.push(value, fill_columns);
|
||||
(self.notify)();
|
||||
idx
|
||||
}
|
||||
|
||||
/// Returns the total number of items in the current
|
||||
/// queue
|
||||
/// Returns the total number of items injected in the matcher. This might
|
||||
/// not match the number of items in the match snapshot (if the matcher
|
||||
/// is still running)
|
||||
pub fn injected_items(&self) -> u32 {
|
||||
self.items.count()
|
||||
}
|
||||
@ -66,18 +101,24 @@ impl<T> Injector<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// An [item](crate::Item) that was successfully matched by a [`Nucleo`] worker.
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Match {
|
||||
pub score: u32,
|
||||
pub idx: u32,
|
||||
}
|
||||
|
||||
/// That status of a [`Nucleo`] worker after a match.
|
||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
||||
pub struct Status {
|
||||
/// Whether the current snapshot has changed.
|
||||
pub changed: bool,
|
||||
/// Whether the matcher is still processing in the background.
|
||||
pub running: bool,
|
||||
}
|
||||
|
||||
/// A snapshot represent the results of a [`Nucleo`] worker after
|
||||
/// finishing a [`tick`](Nucleo::tick).
|
||||
pub struct Snapshot<T: Sync + Send + 'static> {
|
||||
item_count: u32,
|
||||
matches: Vec<Match>,
|
||||
@ -178,6 +219,8 @@ impl<T: Sync + Send + 'static> Snapshot<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A high level matcher worker that quickly computes matches in a background
|
||||
/// threadpool.
|
||||
pub struct Nucleo<T: Sync + Send + 'static> {
|
||||
// the way the API is build we totally don't actually need these to be Arcs
|
||||
// but this lets us avoid some unsafe
|
||||
@ -189,10 +232,27 @@ pub struct Nucleo<T: Sync + Send + 'static> {
|
||||
items: Arc<boxcar::Vec<T>>,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
snapshot: Snapshot<T>,
|
||||
/// The pattern matched by this matcher. To update the match pattern
|
||||
/// [`MultiPattern::reparse`](`pattern::MultiPattern::reparse`) should be used.
|
||||
/// Note that the matcher worker will only become aware of the new pattern
|
||||
/// after a call to [`tick`](Nucleo::tick).
|
||||
pub pattern: MultiPattern,
|
||||
}
|
||||
|
||||
impl<T: Sync + Send + 'static> Nucleo<T> {
|
||||
/// Constructs a new `nucleo` worker threadpool with the provided `config`.
|
||||
///
|
||||
/// `notify` is called everytime new information is available and
|
||||
/// [`tick`](Nucleo::tick) should be called. Note that `notify` is not
|
||||
/// debounced, that should be handled by the downstream crate (for example
|
||||
/// debouncing to only redraw at most every 1/60 seconds).
|
||||
///
|
||||
/// If `None` is passed for the number of worker threads, nucleo will use
|
||||
/// one thread per hardware thread.
|
||||
///
|
||||
/// Nucleo can match items with multiple orthogonal properties. `columns`
|
||||
/// indicates how many matching columns each item (and the pattern) has. The
|
||||
/// number of columns can not be changed after construction.
|
||||
pub fn new(
|
||||
config: Config,
|
||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||
@ -218,11 +278,12 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a snapshot of all items
|
||||
/// Returns a snapshot of the current matcher state.
|
||||
pub fn snapshot(&self) -> &Snapshot<T> {
|
||||
&self.snapshot
|
||||
}
|
||||
|
||||
/// Returns an injector that can be used for adding candidates to the matcher.
|
||||
pub fn injector(&self) -> Injector<T> {
|
||||
Injector {
|
||||
items: self.items.clone(),
|
||||
@ -230,11 +291,11 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Restart the the item stream. Removes all items disconnects all
|
||||
/// previously created injectors from this instance. If `clear_snapshot` is
|
||||
/// `true` then all items and matched are removed from the
|
||||
/// [`Snapshot`](crate::Snapshot) immediately. Otherwise the snapshot will
|
||||
/// keep the current matches until the matcher has run again.
|
||||
/// Restart the the item stream. Removes all items and disconnects all
|
||||
/// previously created injectors from this instance. If `clear_snapshot`
|
||||
/// is `true` then all items and matched are removed from the [`Snapshot`]
|
||||
/// (crate::Snapshot) immediately. Otherwise the snapshot will keep the
|
||||
/// current matches until the matcher has run again.
|
||||
///
|
||||
/// # Note
|
||||
///
|
||||
@ -254,6 +315,10 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
|
||||
self.worker.lock().update_config(config)
|
||||
}
|
||||
|
||||
/// The main way to interact with the matcher, this should be called
|
||||
/// regularly (for example each time a frame is rendered). To avoid
|
||||
/// excessive redraws this method will wait `timeout` milliseconds for the
|
||||
/// worker therad to finish. It is recommend to set the timeout to 10ms.
|
||||
pub fn tick(&mut self, timeout: u64) -> Status {
|
||||
self.should_notify.store(false, atomic::Ordering::Relaxed);
|
||||
let status = self.pattern.status();
|
||||
|
@ -5,7 +5,7 @@ use nucleo_matcher::{Matcher, Utf32String};
|
||||
mod tests;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Default)]
|
||||
pub enum Status {
|
||||
pub(crate) enum Status {
|
||||
#[default]
|
||||
Unchanged,
|
||||
Update,
|
||||
|
Loading…
Reference in New Issue
Block a user