mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 01:47:49 +00:00
move pattern API to nucleo-matcher
This commit is contained in:
parent
3e48c9f1ee
commit
de844d6ace
@ -43,7 +43,7 @@ fn main() {
|
|||||||
Some((path.as_str().into(), path))
|
Some((path.as_str().into(), path))
|
||||||
})
|
})
|
||||||
.unzip();
|
.unzip();
|
||||||
let mut nucleo = nucleo::Matcher::new(nucleo::MatcherConfig::DEFAULT.match_paths());
|
let mut nucleo = nucleo::Matcher::new(nucleo::Config::DEFAULT.match_paths());
|
||||||
let skim = fuzzy_matcher::skim::SkimMatcherV2::default();
|
let skim = fuzzy_matcher::skim::SkimMatcherV2::default();
|
||||||
|
|
||||||
// TODO: unicode?
|
// TODO: unicode?
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
|
//! Utilities for working with (unicode) characters/codepoints
|
||||||
|
|
||||||
use std::fmt::{self, Debug, Display};
|
use std::fmt::{self, Debug, Display};
|
||||||
|
|
||||||
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
|
use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
|
||||||
use crate::MatcherConfig;
|
use crate::Config;
|
||||||
|
|
||||||
//autogenerated by generate-ucd
|
//autogenerated by generate-ucd
|
||||||
#[allow(warnings)]
|
#[allow(warnings)]
|
||||||
@ -11,9 +13,9 @@ mod normalize;
|
|||||||
|
|
||||||
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
pub(crate) trait Char: Copy + Eq + Ord + fmt::Display {
|
||||||
const ASCII: bool;
|
const ASCII: bool;
|
||||||
fn char_class(self, config: &MatcherConfig) -> CharClass;
|
fn char_class(self, config: &Config) -> CharClass;
|
||||||
fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
|
fn char_class_and_normalize(self, config: &Config) -> (Self, CharClass);
|
||||||
fn normalize(self, config: &MatcherConfig) -> Self;
|
fn normalize(self, config: &Config) -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// repr tansparent wrapper around u8 with better formatting and `PartialEq<char>` implementation
|
/// repr tansparent wrapper around u8 with better formatting and `PartialEq<char>` implementation
|
||||||
@ -42,7 +44,7 @@ impl PartialEq<AsciiChar> for char {
|
|||||||
impl Char for AsciiChar {
|
impl Char for AsciiChar {
|
||||||
const ASCII: bool = true;
|
const ASCII: bool = true;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn char_class(self, config: &MatcherConfig) -> CharClass {
|
fn char_class(self, config: &Config) -> CharClass {
|
||||||
let c = self.0;
|
let c = self.0;
|
||||||
// using manual if conditions instead optimizes better
|
// using manual if conditions instead optimizes better
|
||||||
if c >= b'a' && c <= b'z' {
|
if c >= b'a' && c <= b'z' {
|
||||||
@ -61,7 +63,7 @@ impl Char for AsciiChar {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn char_class_and_normalize(mut self, config: &MatcherConfig) -> (Self, CharClass) {
|
fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
|
||||||
let char_class = self.char_class(config);
|
let char_class = self.char_class(config);
|
||||||
if config.ignore_case && char_class == CharClass::Upper {
|
if config.ignore_case && char_class == CharClass::Upper {
|
||||||
self.0 += 32
|
self.0 += 32
|
||||||
@ -70,7 +72,7 @@ impl Char for AsciiChar {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn normalize(mut self, config: &MatcherConfig) -> Self {
|
fn normalize(mut self, config: &Config) -> Self {
|
||||||
if config.ignore_case && self.0 >= b'A' && self.0 <= b'Z' {
|
if config.ignore_case && self.0 >= b'A' && self.0 <= b'Z' {
|
||||||
self.0 += 32
|
self.0 += 32
|
||||||
}
|
}
|
||||||
@ -95,7 +97,7 @@ fn char_class_non_ascii(c: char) -> CharClass {
|
|||||||
impl Char for char {
|
impl Char for char {
|
||||||
const ASCII: bool = false;
|
const ASCII: bool = false;
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn char_class(self, config: &MatcherConfig) -> CharClass {
|
fn char_class(self, config: &Config) -> CharClass {
|
||||||
if self.is_ascii() {
|
if self.is_ascii() {
|
||||||
return AsciiChar(self as u8).char_class(config);
|
return AsciiChar(self as u8).char_class(config);
|
||||||
}
|
}
|
||||||
@ -103,7 +105,7 @@ impl Char for char {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn char_class_and_normalize(mut self, config: &MatcherConfig) -> (Self, CharClass) {
|
fn char_class_and_normalize(mut self, config: &Config) -> (Self, CharClass) {
|
||||||
if self.is_ascii() {
|
if self.is_ascii() {
|
||||||
let (c, class) = AsciiChar(self as u8).char_class_and_normalize(config);
|
let (c, class) = AsciiChar(self as u8).char_class_and_normalize(config);
|
||||||
return (c.0 as char, class);
|
return (c.0 as char, class);
|
||||||
@ -123,7 +125,7 @@ impl Char for char {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn normalize(mut self, config: &MatcherConfig) -> Self {
|
fn normalize(mut self, config: &Config) -> Self {
|
||||||
if config.normalize {
|
if config.normalize {
|
||||||
self = normalize::normalize(self);
|
self = normalize::normalize(self);
|
||||||
}
|
}
|
||||||
@ -138,12 +140,14 @@ pub use normalize::normalize;
|
|||||||
use unicode_segmentation::UnicodeSegmentation;
|
use unicode_segmentation::UnicodeSegmentation;
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
|
/// Converts a character to lower case using simple unicode case folding
|
||||||
pub fn to_lower_case(c: char) -> char {
|
pub fn to_lower_case(c: char) -> char {
|
||||||
CASE_FOLDING_SIMPLE
|
CASE_FOLDING_SIMPLE
|
||||||
.binary_search_by_key(&c, |(upper, _)| *upper)
|
.binary_search_by_key(&c, |(upper, _)| *upper)
|
||||||
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
.map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a character to upper case using simple unicode case folding
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn is_upper_case(c: char) -> bool {
|
pub fn is_upper_case(c: char) -> bool {
|
||||||
CASE_FOLDING_SIMPLE
|
CASE_FOLDING_SIMPLE
|
||||||
@ -152,8 +156,7 @@ pub fn is_upper_case(c: char) -> bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
|
||||||
#[non_exhaustive]
|
pub(crate) enum CharClass {
|
||||||
pub enum CharClass {
|
|
||||||
Whitespace,
|
Whitespace,
|
||||||
NonWord,
|
NonWord,
|
||||||
Delimiter,
|
Delimiter,
|
||||||
@ -163,8 +166,10 @@ pub enum CharClass {
|
|||||||
Number,
|
Number,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// nucleo cannot match graphemes as single units to work around
|
/// Nucleo cannot match graphemes as single units. To work around
|
||||||
/// that we only use the first codepoint of each grapheme
|
/// that we only use the first codepoint of each grapheme. This
|
||||||
|
/// iterator returns the first character of each unicode grapheme
|
||||||
|
// in a string and is used for constructing `Utf32Str(ing)`.
|
||||||
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
pub fn graphemes(text: &str) -> impl Iterator<Item = char> + '_ {
|
||||||
text.graphemes(true).map(|grapheme| {
|
text.graphemes(true).map(|grapheme| {
|
||||||
grapheme
|
grapheme
|
||||||
|
@ -495,6 +495,16 @@ const DATA3_END: u32 = DATA3[DATA3.len() - 1].0 as u32 + 1;
|
|||||||
const LEN3: usize = (DATA3_END - DATA3_START) as usize;
|
const LEN3: usize = (DATA3_END - DATA3_START) as usize;
|
||||||
static TABLE3: [char; LEN3] = generate_table(&DATA3);
|
static TABLE3: [char; LEN3] = generate_table(&DATA3);
|
||||||
|
|
||||||
|
/// Normalizes a unicode character by converting latin characters
|
||||||
|
/// which are variants of ASCII characters to their latin equivant.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ``` rust
|
||||||
|
/// # use nucleo_matcher::chars::normalize;
|
||||||
|
///
|
||||||
|
/// assert_eq!(normalize('ä'), 'a');
|
||||||
|
/// ```
|
||||||
pub fn normalize(c: char) -> char {
|
pub fn normalize(c: char) -> char {
|
||||||
let i = c as u32;
|
let i = c as u32;
|
||||||
if i < DATA1_START || i >= DATA3_END {
|
if i < DATA1_START || i >= DATA3_END {
|
||||||
|
@ -1,16 +1,19 @@
|
|||||||
use crate::chars::CharClass;
|
use crate::chars::CharClass;
|
||||||
use crate::score::BONUS_BOUNDARY;
|
use crate::score::BONUS_BOUNDARY;
|
||||||
|
|
||||||
|
/// Configuration data that controls how a matcher behaves
|
||||||
#[non_exhaustive]
|
#[non_exhaustive]
|
||||||
#[derive(PartialEq, Eq, Debug, Clone, Copy)]
|
#[derive(PartialEq, Eq, Debug, Clone)]
|
||||||
pub struct MatcherConfig {
|
pub struct Config {
|
||||||
pub delimiter_chars: &'static [u8],
|
/// Characters that act as delimiters and provide bonus
|
||||||
|
/// for matching the following char
|
||||||
|
pub(crate) delimiter_chars: &'static [u8],
|
||||||
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
/// Extra bonus for word boundary after whitespace character or beginning of the string
|
||||||
pub(crate) bonus_boundary_white: u16,
|
pub(crate) bonus_boundary_white: u16,
|
||||||
|
|
||||||
/// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
/// Extra bonus for word boundary after slash, colon, semi-colon, and comma
|
||||||
pub(crate) bonus_boundary_delimiter: u16,
|
pub(crate) bonus_boundary_delimiter: u16,
|
||||||
pub initial_char_class: CharClass,
|
pub(crate) initial_char_class: CharClass,
|
||||||
|
|
||||||
/// Whether to normalize latin script characters to ASCII (enabled by default)
|
/// Whether to normalize latin script characters to ASCII (enabled by default)
|
||||||
pub normalize: bool,
|
pub normalize: bool,
|
||||||
/// whether to ignore casing
|
/// whether to ignore casing
|
||||||
@ -25,9 +28,11 @@ pub struct MatcherConfig {
|
|||||||
pub prefer_prefix: bool,
|
pub prefer_prefix: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherConfig {
|
impl Config {
|
||||||
|
/// The default config for nucleo, implemented as a constant since
|
||||||
|
/// Default::default can not be called in a const context
|
||||||
pub const DEFAULT: Self = {
|
pub const DEFAULT: Self = {
|
||||||
MatcherConfig {
|
Config {
|
||||||
delimiter_chars: b"/,:;|",
|
delimiter_chars: b"/,:;|",
|
||||||
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
bonus_boundary_white: BONUS_BOUNDARY + 2,
|
||||||
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
|
||||||
@ -39,9 +44,9 @@ impl MatcherConfig {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatcherConfig {
|
impl Config {
|
||||||
|
/// Configures the matcher with bonuses appropriate for matching file paths.
|
||||||
pub fn set_match_paths(&mut self) {
|
pub fn set_match_paths(&mut self) {
|
||||||
// compared to fzf we include
|
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
self.delimiter_chars = b"/:\\";
|
self.delimiter_chars = b"/:\\";
|
||||||
} else {
|
} else {
|
||||||
@ -51,6 +56,7 @@ impl MatcherConfig {
|
|||||||
self.initial_char_class = CharClass::Delimiter;
|
self.initial_char_class = CharClass::Delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Configures the matcher with bonuses appropriate for matching file paths.
|
||||||
pub const fn match_paths(mut self) -> Self {
|
pub const fn match_paths(mut self) -> Self {
|
||||||
if cfg!(windows) {
|
if cfg!(windows) {
|
||||||
self.delimiter_chars = b"/\\";
|
self.delimiter_chars = b"/\\";
|
||||||
|
@ -6,7 +6,7 @@ use crate::score::{
|
|||||||
BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, MAX_PREFIX_BONUS,
|
BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, MAX_PREFIX_BONUS,
|
||||||
PENALTY_GAP_EXTENSION, PENALTY_GAP_START, PREFIX_BONUS_SCALE, SCORE_MATCH,
|
PENALTY_GAP_EXTENSION, PENALTY_GAP_START, PREFIX_BONUS_SCALE, SCORE_MATCH,
|
||||||
};
|
};
|
||||||
use crate::{Matcher, MatcherConfig};
|
use crate::{Config, Matcher};
|
||||||
|
|
||||||
impl Matcher {
|
impl Matcher {
|
||||||
pub(crate) fn fuzzy_match_optimal<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
pub(crate) fn fuzzy_match_optimal<const INDICES: bool, H: Char + PartialEq<N>, N: Char>(
|
||||||
@ -112,7 +112,7 @@ impl<H: Char> MatcherDataView<'_, H> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
needle: &[N],
|
needle: &[N],
|
||||||
mut prev_class: CharClass,
|
mut prev_class: CharClass,
|
||||||
config: &MatcherConfig,
|
config: &Config,
|
||||||
start: u32,
|
start: u32,
|
||||||
) -> bool
|
) -> bool
|
||||||
where
|
where
|
||||||
|
@ -10,6 +10,7 @@ a slightly less convenient API. Be sure to carefully read the documentation of t
|
|||||||
|
|
||||||
// sadly ranges don't optmimzie well
|
// sadly ranges don't optmimzie well
|
||||||
#![allow(clippy::manual_range_contains)]
|
#![allow(clippy::manual_range_contains)]
|
||||||
|
#![warn(missing_docs)]
|
||||||
|
|
||||||
pub mod chars;
|
pub mod chars;
|
||||||
mod config;
|
mod config;
|
||||||
@ -19,6 +20,7 @@ mod exact;
|
|||||||
mod fuzzy_greedy;
|
mod fuzzy_greedy;
|
||||||
mod fuzzy_optimal;
|
mod fuzzy_optimal;
|
||||||
mod matrix;
|
mod matrix;
|
||||||
|
pub mod pattern;
|
||||||
mod prefilter;
|
mod prefilter;
|
||||||
mod score;
|
mod score;
|
||||||
mod utf32_str;
|
mod utf32_str;
|
||||||
@ -26,7 +28,7 @@ mod utf32_str;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
pub use crate::config::MatcherConfig;
|
pub use crate::config::Config;
|
||||||
pub use crate::utf32_str::{Utf32Str, Utf32String};
|
pub use crate::utf32_str::{Utf32Str, Utf32String};
|
||||||
|
|
||||||
use crate::chars::{AsciiChar, Char};
|
use crate::chars::{AsciiChar, Char};
|
||||||
@ -80,7 +82,8 @@ use crate::matrix::MatrixSlab;
|
|||||||
/// that the matcher *will panic*. The caller must decide whether it wants to
|
/// that the matcher *will panic*. The caller must decide whether it wants to
|
||||||
/// filter out long haystacks or truncate them.
|
/// filter out long haystacks or truncate them.
|
||||||
pub struct Matcher {
|
pub struct Matcher {
|
||||||
pub config: MatcherConfig,
|
#[allow(missing_docs)]
|
||||||
|
pub config: Config,
|
||||||
slab: MatrixSlab,
|
slab: MatrixSlab,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -88,7 +91,7 @@ pub struct Matcher {
|
|||||||
impl Clone for Matcher {
|
impl Clone for Matcher {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
Matcher {
|
Matcher {
|
||||||
config: self.config,
|
config: self.config.clone(),
|
||||||
slab: MatrixSlab::new(),
|
slab: MatrixSlab::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -105,14 +108,17 @@ impl std::fmt::Debug for Matcher {
|
|||||||
impl Default for Matcher {
|
impl Default for Matcher {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Matcher {
|
Matcher {
|
||||||
config: MatcherConfig::DEFAULT,
|
config: Config::DEFAULT,
|
||||||
slab: MatrixSlab::new(),
|
slab: MatrixSlab::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Matcher {
|
impl Matcher {
|
||||||
pub fn new(config: MatcherConfig) -> Self {
|
/// Creates a new matcher instance, note that this will eagerly allocate
|
||||||
|
/// a fairly large chunk of heap memory (135KB currently but subject to
|
||||||
|
/// change) so matchers should be reused if used in a loop.
|
||||||
|
pub fn new(config: Config) -> Self {
|
||||||
Self {
|
Self {
|
||||||
config,
|
config,
|
||||||
slab: MatrixSlab::new(),
|
slab: MatrixSlab::new(),
|
||||||
|
469
matcher/src/pattern.rs
Normal file
469
matcher/src/pattern.rs
Normal file
@ -0,0 +1,469 @@
|
|||||||
|
//! This module provides a slightly higher level API for matching strings.
|
||||||
|
|
||||||
|
use std::cmp::Reverse;
|
||||||
|
|
||||||
|
use crate::{chars, Matcher, Utf32Str};
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
|
||||||
|
use crate::Utf32String;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
/// How nucleo will treat case mismatch
|
||||||
|
pub enum CaseMatching {
|
||||||
|
/// Characters always match their case folded version (`a == A`)
|
||||||
|
Ignore,
|
||||||
|
/// Characters never match their case folded version (`a != A`)
|
||||||
|
Respect,
|
||||||
|
/// Acts like `Ignore` if all characters in a pattern atom are
|
||||||
|
/// lowercase and like `Respect` otherwire
|
||||||
|
#[default]
|
||||||
|
Smart,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
#[non_exhaustive]
|
||||||
|
/// The kind of matching algorithm to run for this atom
|
||||||
|
pub enum AtomKind {
|
||||||
|
/// Fuzzy matching where the needle must match any haystack characters
|
||||||
|
/// (match can contain gaps). This atom kind is used by default if no
|
||||||
|
/// special syntax is used. There is no negated fuzzy matching (too
|
||||||
|
/// many false positives).
|
||||||
|
///
|
||||||
|
/// See also [`Matcher::exact_match`](crate::Matcher::exact_match).
|
||||||
|
Fuzzy,
|
||||||
|
/// The needle must match a contiguous sequence of haystack characters
|
||||||
|
/// without gaps. This atom kind is parsed from the following syntax:
|
||||||
|
/// `'foo` and `!foo` (negated).
|
||||||
|
///
|
||||||
|
/// See also [`Matcher::substring_match`](crate::Matcher::substring_match).
|
||||||
|
Substring,
|
||||||
|
/// The needle must match all leading haystack characters without gaps or
|
||||||
|
/// prefix. This atom kind is parsed from the following syntax: `foo$` and
|
||||||
|
/// `!foo$` (negated).
|
||||||
|
///
|
||||||
|
/// See also [`Matcher::prefix_match`](crate::Matcher::prefix_match).
|
||||||
|
Prefix,
|
||||||
|
/// The needle must match all trailing haystack characters without gaps or
|
||||||
|
/// postfix. This atom kind is parsed from the following syntax: `foo$` and
|
||||||
|
/// `!foo$` (negated).
|
||||||
|
///
|
||||||
|
/// See also [`Matcher::postfix_match`](crate::Matcher::postfix_match).
|
||||||
|
Postfix,
|
||||||
|
/// The needle must match all haystack characters without gaps or prefix.
|
||||||
|
/// This atom kind is parsed from the following syntax: `^foo$` and `!^foo$`
|
||||||
|
/// (negated).
|
||||||
|
///
|
||||||
|
/// See also [`Matcher::exact_match`] (crate::Matcher::exact_match).
|
||||||
|
Exact,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A single pattern component that is matched with a single [`Matcher`](crate::Matcher) function
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||||
|
pub struct Atom {
|
||||||
|
/// Whether this pattern atom is a negative match.
|
||||||
|
/// A negative pattern atom will prevent haystacks matching it from
|
||||||
|
/// being matchend. It does not contribute to scoring/indices
|
||||||
|
pub negative: bool,
|
||||||
|
/// The kind of match that this pattern performs
|
||||||
|
pub kind: AtomKind,
|
||||||
|
needle: Utf32String,
|
||||||
|
ignore_case: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Atom {
|
||||||
|
/// Creates a single [`PatternAtom`] from a string by performing unicode
|
||||||
|
/// normalization
|
||||||
|
pub fn new(needle: &str, case: CaseMatching, kind: AtomKind, escape_whitespace: bool) -> Atom {
|
||||||
|
Atom::new_inner(needle, case, kind, escape_whitespace, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_inner(
|
||||||
|
needle: &str,
|
||||||
|
case: CaseMatching,
|
||||||
|
kind: AtomKind,
|
||||||
|
escape_whitespace: bool,
|
||||||
|
append_dollar: bool,
|
||||||
|
) -> Atom {
|
||||||
|
let mut ignore_case;
|
||||||
|
let needle = if needle.is_ascii() {
|
||||||
|
let mut needle = if escape_whitespace {
|
||||||
|
if let Some((start, rem)) = needle.split_once("\\ ") {
|
||||||
|
let mut needle = start.to_owned();
|
||||||
|
for rem in rem.split("\\ ") {
|
||||||
|
needle.push(' ');
|
||||||
|
needle.push_str(rem);
|
||||||
|
}
|
||||||
|
needle
|
||||||
|
} else {
|
||||||
|
needle.to_owned()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
needle.to_owned()
|
||||||
|
};
|
||||||
|
|
||||||
|
match case {
|
||||||
|
CaseMatching::Ignore => {
|
||||||
|
ignore_case = true;
|
||||||
|
needle.make_ascii_lowercase()
|
||||||
|
}
|
||||||
|
CaseMatching::Smart => {
|
||||||
|
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
||||||
|
}
|
||||||
|
CaseMatching::Respect => ignore_case = false,
|
||||||
|
}
|
||||||
|
if append_dollar {
|
||||||
|
needle.push('$');
|
||||||
|
}
|
||||||
|
Utf32String::Ascii(needle.into_boxed_str())
|
||||||
|
} else {
|
||||||
|
let mut needle_ = Vec::with_capacity(needle.len());
|
||||||
|
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
||||||
|
if escape_whitespace {
|
||||||
|
let mut saw_backslash = false;
|
||||||
|
for mut c in chars::graphemes(needle) {
|
||||||
|
if saw_backslash {
|
||||||
|
if c == ' ' {
|
||||||
|
needle_.push(' ');
|
||||||
|
saw_backslash = false;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
needle_.push('\\');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
saw_backslash = c == '\\';
|
||||||
|
match case {
|
||||||
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||||
|
CaseMatching::Smart => {
|
||||||
|
ignore_case = ignore_case && !chars::is_upper_case(c)
|
||||||
|
}
|
||||||
|
CaseMatching::Respect => (),
|
||||||
|
}
|
||||||
|
needle_.push(c);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let chars = chars::graphemes(needle).map(|mut c| {
|
||||||
|
match case {
|
||||||
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
||||||
|
CaseMatching::Smart => {
|
||||||
|
ignore_case = ignore_case && !chars::is_upper_case(c);
|
||||||
|
}
|
||||||
|
CaseMatching::Respect => (),
|
||||||
|
}
|
||||||
|
c
|
||||||
|
});
|
||||||
|
needle_.extend(chars);
|
||||||
|
};
|
||||||
|
if append_dollar {
|
||||||
|
needle_.push('$');
|
||||||
|
}
|
||||||
|
Utf32String::Unicode(needle_.into_boxed_slice())
|
||||||
|
};
|
||||||
|
Atom {
|
||||||
|
kind,
|
||||||
|
needle,
|
||||||
|
negative: false,
|
||||||
|
ignore_case,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a pattern atom from a string. Some special trailing and leading
|
||||||
|
/// characters can be used to control the atom kind. See [`AtomKind`] for
|
||||||
|
/// details.
|
||||||
|
pub fn parse(raw: &str, case: CaseMatching) -> Atom {
|
||||||
|
let mut atom = raw;
|
||||||
|
let invert = match atom.as_bytes() {
|
||||||
|
[b'!', ..] => {
|
||||||
|
atom = &atom[1..];
|
||||||
|
true
|
||||||
|
}
|
||||||
|
[b'\\', b'!', ..] => {
|
||||||
|
atom = &atom[1..];
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut kind = match atom.as_bytes() {
|
||||||
|
[b'^', ..] => {
|
||||||
|
atom = &atom[1..];
|
||||||
|
AtomKind::Prefix
|
||||||
|
}
|
||||||
|
[b'\'', ..] => {
|
||||||
|
atom = &atom[1..];
|
||||||
|
AtomKind::Substring
|
||||||
|
}
|
||||||
|
[b'\\', b'^' | b'\'', ..] => {
|
||||||
|
atom = &atom[1..];
|
||||||
|
AtomKind::Fuzzy
|
||||||
|
}
|
||||||
|
_ => AtomKind::Fuzzy,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut append_dollar = false;
|
||||||
|
match atom.as_bytes() {
|
||||||
|
[.., b'\\', b'$'] => {
|
||||||
|
append_dollar = true;
|
||||||
|
atom = &atom[..atom.len() - 2]
|
||||||
|
}
|
||||||
|
[.., b'$'] => {
|
||||||
|
kind = if kind == AtomKind::Fuzzy {
|
||||||
|
AtomKind::Postfix
|
||||||
|
} else {
|
||||||
|
AtomKind::Exact
|
||||||
|
};
|
||||||
|
atom = &atom[..atom.len() - 1]
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
if invert && kind == AtomKind::Fuzzy {
|
||||||
|
kind = AtomKind::Substring
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut pattern = Atom::new_inner(atom, case, kind, true, append_dollar);
|
||||||
|
pattern.negative = invert;
|
||||||
|
pattern
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Matches this pattern against `haystack` (using the allocation and configuration
|
||||||
|
/// from `matcher`) and calculates a ranking score. See the [`Matcher`](crate::Matcher).
|
||||||
|
/// Documentation for more details.
|
||||||
|
///
|
||||||
|
/// *Note:* The `ignore_case` setting is overwritten to match the casing of
|
||||||
|
/// each pattern atom.
|
||||||
|
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u16> {
|
||||||
|
matcher.config.ignore_case = self.ignore_case;
|
||||||
|
let pattern_score = match self.kind {
|
||||||
|
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
|
||||||
|
};
|
||||||
|
if self.negative {
|
||||||
|
if pattern_score.is_some() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(0)
|
||||||
|
} else {
|
||||||
|
pattern_score
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Matches this pattern against `haystack` (using the allocation and
|
||||||
|
/// configuration from `matcher`), calculates a ranking score and the matche
|
||||||
|
/// indices. See the [`Matcher`](crate::Matcher). Documentation for more
|
||||||
|
/// details.
|
||||||
|
///
|
||||||
|
/// *Note:* The `ignore_case` setting is overwritten to match the casing of
|
||||||
|
/// this pattern atom.
|
||||||
|
pub fn indices(
|
||||||
|
&self,
|
||||||
|
haystack: Utf32Str<'_>,
|
||||||
|
matcher: &mut Matcher,
|
||||||
|
indices: &mut Vec<u32>,
|
||||||
|
) -> Option<u16> {
|
||||||
|
matcher.config.ignore_case = self.ignore_case;
|
||||||
|
if self.negative {
|
||||||
|
let pattern_score = match self.kind {
|
||||||
|
AtomKind::Exact => matcher.exact_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Fuzzy => matcher.fuzzy_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Substring => matcher.substring_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Prefix => matcher.prefix_match(haystack, self.needle.slice(..)),
|
||||||
|
AtomKind::Postfix => matcher.postfix_match(haystack, self.needle.slice(..)),
|
||||||
|
};
|
||||||
|
pattern_score.is_none().then_some(0)
|
||||||
|
} else {
|
||||||
|
match self.kind {
|
||||||
|
AtomKind::Exact => matcher.exact_indices(haystack, self.needle.slice(..), indices),
|
||||||
|
AtomKind::Fuzzy => matcher.fuzzy_indices(haystack, self.needle.slice(..), indices),
|
||||||
|
AtomKind::Substring => {
|
||||||
|
matcher.substring_indices(haystack, self.needle.slice(..), indices)
|
||||||
|
}
|
||||||
|
AtomKind::Prefix => {
|
||||||
|
matcher.prefix_indices(haystack, self.needle.slice(..), indices)
|
||||||
|
}
|
||||||
|
AtomKind::Postfix => {
|
||||||
|
matcher.postfix_indices(haystack, self.needle.slice(..), indices)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the needle text that is passed to the matcher. All indices
|
||||||
|
/// produced by the `indices` functions produce char indices used to index
|
||||||
|
/// this text
|
||||||
|
pub fn needle_text(&self) -> Utf32Str<'_> {
|
||||||
|
self.needle.slice(..)
|
||||||
|
}
|
||||||
|
/// Convenience function to easily match on a (relatively small) list of
|
||||||
|
/// inputs. This is not recommended for building a full fuzzy matching
|
||||||
|
/// application that can match large numbers of matches (like all files in
|
||||||
|
/// a directory) as all matching is done on the current thread, effectively
|
||||||
|
/// blocking the UI.
|
||||||
|
pub fn match_list<T: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
matcher: &mut Matcher,
|
||||||
|
items: impl IntoIterator<Item = T>,
|
||||||
|
) -> Vec<(T, u16)> {
|
||||||
|
if self.needle.is_empty() {
|
||||||
|
return items.into_iter().map(|item| (item, 0)).collect();
|
||||||
|
}
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut items: Vec<_> = items
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
|
||||||
|
.map(|score| (item, score))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
items.sort_by_key(|(_, score)| Reverse(*score));
|
||||||
|
items
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
||||||
|
let mut saw_backslash = false;
|
||||||
|
pattern.split(move |c| {
|
||||||
|
saw_backslash = match c {
|
||||||
|
' ' if !saw_backslash => return true,
|
||||||
|
'\\' => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
false
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
/// A fuzzy match pattern
|
||||||
|
#[non_exhaustive]
|
||||||
|
pub struct Pattern {
|
||||||
|
/// The individual pattern (words) in this pattern
|
||||||
|
pub atoms: Vec<Atom>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Pattern {
|
||||||
|
/// Creates a pattern where each word is matched individually (whitespaces
|
||||||
|
/// can be escaped with `\`). Otherwise no parsing is performed (so $, !, '
|
||||||
|
/// and ^ don't receive special treatment). If you want to match the entiru
|
||||||
|
/// pattern as a single needle use a single [`PatternAtom`] instead
|
||||||
|
pub fn new(case_matching: CaseMatching, kind: AtomKind, pattern: &str) -> Pattern {
|
||||||
|
let atoms = pattern_atoms(pattern)
|
||||||
|
.filter_map(|pat| {
|
||||||
|
let pat = Atom::new(pat, case_matching, kind, true);
|
||||||
|
(!pat.needle.is_empty()).then_some(pat)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Pattern { atoms }
|
||||||
|
}
|
||||||
|
/// Creates a pattern where each word is matched individually (whitespaces
|
||||||
|
/// can be escaped with `\`). And $, !, ' and ^ at word boundaries will
|
||||||
|
/// cause different matching behaviour (see [`PatternAtomKind`]). These can be
|
||||||
|
/// escaped with backslash.
|
||||||
|
pub fn parse(case_matching: CaseMatching, pattern: &str) -> Pattern {
|
||||||
|
let atoms = pattern_atoms(pattern)
|
||||||
|
.filter_map(|pat| {
|
||||||
|
let pat = Atom::parse(pat, case_matching);
|
||||||
|
(!pat.needle.is_empty()).then_some(pat)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
Pattern { atoms }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience function to easily match on a (relatively small) list of
|
||||||
|
/// inputs. This is not recommended for building a full fuzzy matching
|
||||||
|
/// application that can match large numbers of matches (like all files in
|
||||||
|
/// a directory) as all matching is done on the current thread, effectively
|
||||||
|
/// blocking the UI.
|
||||||
|
pub fn match_list<T: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
matcher: &mut Matcher,
|
||||||
|
items: impl IntoIterator<Item = T>,
|
||||||
|
) -> Vec<(T, u32)> {
|
||||||
|
if self.atoms.is_empty() {
|
||||||
|
return items.into_iter().map(|item| (item, 0)).collect();
|
||||||
|
}
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
let mut items: Vec<_> = items
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|item| {
|
||||||
|
self.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
|
||||||
|
.map(|score| (item, score))
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
items.sort_by_key(|(_, score)| Reverse(*score));
|
||||||
|
items
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Matches this pattern against `haystack` (using the allocation and configuration
|
||||||
|
/// from `matcher`) and calculates a ranking score. See the [`Matcher`](crate::Matcher).
|
||||||
|
/// Documentation for more details.
|
||||||
|
///
|
||||||
|
/// *Note:* The `ignore_case` setting is overwritten to match the casing of
|
||||||
|
/// each pattern atom.
|
||||||
|
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
||||||
|
if self.atoms.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut score = 0;
|
||||||
|
for pattern in &self.atoms {
|
||||||
|
score += pattern.score(haystack, matcher)? as u32;
|
||||||
|
}
|
||||||
|
Some(score)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Matches this pattern against `haystack` (using the allocation and
|
||||||
|
/// configuration from `matcher`), calculates a ranking score and the matche
|
||||||
|
/// indices. See the [`Matcher`](crate::Matcher). Documentation for more
|
||||||
|
/// details.
|
||||||
|
///
|
||||||
|
/// *Note:* The `ignore_case` setting is overwritten to match the casing of
|
||||||
|
/// each pattern atom.
|
||||||
|
///
|
||||||
|
/// *Note:* The indices for each pattern are calculated individually
|
||||||
|
/// and simply appended to the `indices` vector. This allows
|
||||||
|
///
|
||||||
|
pub fn indices(
|
||||||
|
&self,
|
||||||
|
haystack: Utf32Str<'_>,
|
||||||
|
matcher: &mut Matcher,
|
||||||
|
indices: &mut Vec<u32>,
|
||||||
|
) -> Option<u32> {
|
||||||
|
if self.atoms.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut score = 0;
|
||||||
|
for pattern in &self.atoms {
|
||||||
|
score += pattern.indices(haystack, matcher, indices)? as u32;
|
||||||
|
}
|
||||||
|
Some(score)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Refreshes this pattern by reparsing a
|
||||||
|
pub fn reparse(&mut self, pattern: &str, case_matching: CaseMatching) {
|
||||||
|
self.atoms.clear();
|
||||||
|
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
||||||
|
let atom = Atom::parse(atom, case_matching);
|
||||||
|
if atom.needle.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(atom)
|
||||||
|
});
|
||||||
|
self.atoms.extend(atoms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Clone for Pattern {
|
||||||
|
fn clone(&self) -> Self {
|
||||||
|
Self {
|
||||||
|
atoms: self.atoms.clone(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clone_from(&mut self, source: &Self) {
|
||||||
|
self.atoms.clone_from(&source.atoms);
|
||||||
|
}
|
||||||
|
}
|
114
matcher/src/pattern/tests.rs
Normal file
114
matcher/src/pattern/tests.rs
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
use crate::pattern::{Atom, AtomKind, CaseMatching};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn negative() {
|
||||||
|
let pat = Atom::parse("!foo", CaseMatching::Smart);
|
||||||
|
assert!(pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Substring);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("!^foo", CaseMatching::Smart);
|
||||||
|
assert!(pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("!foo$", CaseMatching::Smart);
|
||||||
|
assert!(pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Postfix);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("!^foo$", CaseMatching::Smart);
|
||||||
|
assert!(pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Exact);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pattern_kinds() {
|
||||||
|
let pat = Atom::parse("foo", CaseMatching::Smart);
|
||||||
|
assert!(!pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("'foo", CaseMatching::Smart);
|
||||||
|
assert!(!pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Substring);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("^foo", CaseMatching::Smart);
|
||||||
|
assert!(!pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("foo$", CaseMatching::Smart);
|
||||||
|
assert!(!pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Postfix);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("^foo$", CaseMatching::Smart);
|
||||||
|
assert!(!pat.negative);
|
||||||
|
assert_eq!(pat.kind, AtomKind::Exact);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn case_matching() {
|
||||||
|
let pat = Atom::parse("foo", CaseMatching::Smart);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("Foo", CaseMatching::Smart);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "Foo");
|
||||||
|
let pat = Atom::parse("Foo", CaseMatching::Ignore);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo");
|
||||||
|
let pat = Atom::parse("Foo", CaseMatching::Respect);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "Foo");
|
||||||
|
let pat = Atom::parse("Foo", CaseMatching::Respect);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "Foo");
|
||||||
|
let pat = Atom::parse("Äxx", CaseMatching::Ignore);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "äxx");
|
||||||
|
let pat = Atom::parse("Äxx", CaseMatching::Respect);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
let pat = Atom::parse("Axx", CaseMatching::Smart);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "Axx");
|
||||||
|
let pat = Atom::parse("你xx", CaseMatching::Smart);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "你xx");
|
||||||
|
let pat = Atom::parse("你xx", CaseMatching::Ignore);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "你xx");
|
||||||
|
let pat = Atom::parse("Ⲽxx", CaseMatching::Smart);
|
||||||
|
assert!(!pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "Ⲽxx");
|
||||||
|
let pat = Atom::parse("Ⲽxx", CaseMatching::Ignore);
|
||||||
|
assert!(pat.ignore_case);
|
||||||
|
assert_eq!(pat.needle.to_string(), "ⲽxx");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape() {
|
||||||
|
let pat = Atom::parse("foo\\ bar", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo bar");
|
||||||
|
let pat = Atom::parse("\\!foo", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "!foo");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("\\'foo", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "'foo");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("\\^foo", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "^foo");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("foo\\$", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo$");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("^foo\\$", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "foo$");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Prefix);
|
||||||
|
let pat = Atom::parse("\\^foo\\$", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("\\!^foo\\$", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "!^foo$");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Fuzzy);
|
||||||
|
let pat = Atom::parse("!\\^foo\\$", CaseMatching::Smart);
|
||||||
|
assert_eq!(pat.needle.to_string(), "^foo$");
|
||||||
|
assert_eq!(pat.kind, AtomKind::Substring);
|
||||||
|
}
|
@ -1,7 +1,7 @@
|
|||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
|
|
||||||
use crate::chars::{Char, CharClass};
|
use crate::chars::{Char, CharClass};
|
||||||
use crate::{Matcher, MatcherConfig};
|
use crate::{Config, Matcher};
|
||||||
|
|
||||||
pub(crate) const SCORE_MATCH: u16 = 16;
|
pub(crate) const SCORE_MATCH: u16 = 16;
|
||||||
pub(crate) const PENALTY_GAP_START: u16 = 3;
|
pub(crate) const PENALTY_GAP_START: u16 = 3;
|
||||||
@ -47,7 +47,7 @@ pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENS
|
|||||||
// still respected.
|
// still respected.
|
||||||
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
|
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
|
||||||
|
|
||||||
impl MatcherConfig {
|
impl Config {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
|
pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
|
||||||
if class > CharClass::Delimiter {
|
if class > CharClass::Delimiter {
|
||||||
|
@ -4,7 +4,7 @@ use crate::score::{
|
|||||||
MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
|
MAX_PREFIX_BONUS, PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
|
||||||
};
|
};
|
||||||
use crate::utf32_str::Utf32Str;
|
use crate::utf32_str::Utf32Str;
|
||||||
use crate::{Matcher, MatcherConfig};
|
use crate::{Config, Matcher};
|
||||||
|
|
||||||
use Algorithm::*;
|
use Algorithm::*;
|
||||||
|
|
||||||
@ -26,11 +26,11 @@ fn assert_matches(
|
|||||||
prefer_prefix: bool,
|
prefer_prefix: bool,
|
||||||
cases: &[(&str, &str, &[u32], u16)],
|
cases: &[(&str, &str, &[u32], u16)],
|
||||||
) {
|
) {
|
||||||
let mut config = MatcherConfig {
|
let mut config = Config {
|
||||||
normalize,
|
normalize,
|
||||||
ignore_case: !case_sensitive,
|
ignore_case: !case_sensitive,
|
||||||
prefer_prefix,
|
prefer_prefix,
|
||||||
..MatcherConfig::DEFAULT
|
..Config::DEFAULT
|
||||||
};
|
};
|
||||||
if path {
|
if path {
|
||||||
config.set_match_paths();
|
config.set_match_paths();
|
||||||
@ -89,10 +89,10 @@ pub fn assert_not_matches(
|
|||||||
path: bool,
|
path: bool,
|
||||||
cases: &[(&str, &str)],
|
cases: &[(&str, &str)],
|
||||||
) {
|
) {
|
||||||
let mut config = MatcherConfig {
|
let mut config = Config {
|
||||||
normalize,
|
normalize,
|
||||||
ignore_case: !case_sensitive,
|
ignore_case: !case_sensitive,
|
||||||
..MatcherConfig::DEFAULT
|
..Config::DEFAULT
|
||||||
};
|
};
|
||||||
if path {
|
if path {
|
||||||
config.set_match_paths();
|
config.set_match_paths();
|
||||||
@ -134,8 +134,8 @@ pub fn assert_not_matches(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
|
const BONUS_BOUNDARY_WHITE: u16 = Config::DEFAULT.bonus_boundary_white;
|
||||||
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
const BONUS_BOUNDARY_DELIMITER: u16 = Config::DEFAULT.bonus_boundary_delimiter;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_fuzzy() {
|
fn test_fuzzy() {
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::mem::take;
|
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::{fmt, slice};
|
use std::{fmt, slice};
|
||||||
|
|
||||||
@ -55,6 +54,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of characters in this string.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn len(self) -> usize {
|
pub fn len(self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
@ -62,6 +62,8 @@ impl<'a> Utf32Str<'a> {
|
|||||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether this string is empty.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_empty(self) -> bool {
|
pub fn is_empty(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
@ -70,6 +72,8 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a slice with a string that contains the characters in
|
||||||
|
/// the specified **character range**.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
|
pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
@ -90,7 +94,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
|
|
||||||
/// Returns the number of leading whitespaces in this string
|
/// Returns the number of leading whitespaces in this string
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn leading_white_space(self) -> usize {
|
pub(crate) fn leading_white_space(self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes
|
Utf32Str::Ascii(bytes) => bytes
|
||||||
.iter()
|
.iter()
|
||||||
@ -105,7 +109,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
|
|
||||||
/// Returns the number of leading whitespaces in this string
|
/// Returns the number of leading whitespaces in this string
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn trailing_white_space(self) -> usize {
|
pub(crate) fn trailing_white_space(self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes
|
Utf32Str::Ascii(bytes) => bytes
|
||||||
.iter()
|
.iter()
|
||||||
@ -121,7 +125,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `slice` but accepts a u32 range for convenience since
|
/// Same as `slice` but accepts a u32 range for convenience since
|
||||||
/// those are the indices returned by the matcher
|
/// those are the indices returned by the matcher.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
|
pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
@ -139,29 +143,34 @@ impl<'a> Utf32Str<'a> {
|
|||||||
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
|
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether this string only contains ascii text.
|
||||||
pub fn is_ascii(self) -> bool {
|
pub fn is_ascii(self) -> bool {
|
||||||
matches!(self, Utf32Str::Ascii(_))
|
matches!(self, Utf32Str::Ascii(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(self, idx: u32) -> char {
|
/// Returns the `n`th character in this string.
|
||||||
|
pub fn get(self, n: u32) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[idx as usize] as char,
|
Utf32Str::Ascii(bytes) => bytes[n as usize] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[idx as usize],
|
Utf32Str::Unicode(codepoints) => codepoints[n as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn last(self) -> char {
|
pub(crate) fn last(self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn first(self) -> char {
|
|
||||||
|
pub(crate) fn first(self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[0] as char,
|
Utf32Str::Ascii(bytes) => bytes[0] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[0],
|
Utf32Str::Unicode(codepoints) => codepoints[0],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over the characters in this string
|
||||||
pub fn chars(self) -> Chars<'a> {
|
pub fn chars(self) -> Chars<'a> {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
|
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
|
||||||
@ -169,6 +178,7 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for Utf32Str<'_> {
|
impl fmt::Debug for Utf32Str<'_> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "\"")?;
|
write!(f, "\"")?;
|
||||||
@ -215,6 +225,7 @@ impl DoubleEndedIterator for Chars<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
|
||||||
|
/// An owned version of [`Utf32Str`].
|
||||||
pub enum Utf32String {
|
pub enum Utf32String {
|
||||||
/// A string represented as ASCII encoded bytes.
|
/// A string represented as ASCII encoded bytes.
|
||||||
/// Correctness invariant: must only contain valid ASCII (<=127)
|
/// Correctness invariant: must only contain valid ASCII (<=127)
|
||||||
@ -230,6 +241,7 @@ impl Default for Utf32String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Utf32String {
|
impl Utf32String {
|
||||||
|
/// Returns the number of characters in this string.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
@ -237,6 +249,8 @@ impl Utf32String {
|
|||||||
Utf32String::Ascii(ascii_bytes) => ascii_bytes.len(),
|
Utf32String::Ascii(ascii_bytes) => ascii_bytes.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns whether this string is empty.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
@ -245,18 +259,18 @@ impl Utf32String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Same as `slice` but accepts a u32 range for convenience since
|
/// Creates a slice with a string that contains the characters in
|
||||||
/// those are the indices returned by the matcher
|
/// the specified **character range**.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
Bound::Included(&start) => start as usize,
|
Bound::Included(&start) => start,
|
||||||
Bound::Excluded(&start) => start as usize + 1,
|
Bound::Excluded(&start) => start + 1,
|
||||||
Bound::Unbounded => 0,
|
Bound::Unbounded => 0,
|
||||||
};
|
};
|
||||||
let end = match range.end_bound() {
|
let end = match range.end_bound() {
|
||||||
Bound::Included(&end) => end as usize + 1,
|
Bound::Included(&end) => end + 1,
|
||||||
Bound::Excluded(&end) => end as usize,
|
Bound::Excluded(&end) => end,
|
||||||
Bound::Unbounded => self.len(),
|
Bound::Unbounded => self.len(),
|
||||||
};
|
};
|
||||||
match self {
|
match self {
|
||||||
@ -265,65 +279,28 @@ impl Utf32String {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Same as `slice` but accepts a u32 range for convenience since
|
||||||
|
/// those are the indices returned by the matcher.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_ascii(&self) -> bool {
|
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
||||||
matches!(self, Utf32String::Ascii(_))
|
let start = match range.start_bound() {
|
||||||
}
|
Bound::Included(&start) => start,
|
||||||
|
Bound::Excluded(&start) => start + 1,
|
||||||
#[inline]
|
Bound::Unbounded => 0,
|
||||||
pub fn get(&self, idx: u32) -> char {
|
|
||||||
match self {
|
|
||||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[idx as usize] as char,
|
|
||||||
Utf32String::Unicode(codepoints) => codepoints[idx as usize],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn last(&self) -> char {
|
|
||||||
match self {
|
|
||||||
Utf32String::Ascii(bytes) => bytes.as_bytes()[bytes.len() - 1] as char,
|
|
||||||
Utf32String::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn chars(&self) -> Chars<'_> {
|
|
||||||
match self {
|
|
||||||
Utf32String::Ascii(bytes) => Chars::Ascii(bytes.as_bytes().iter()),
|
|
||||||
Utf32String::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
pub fn push_str(&mut self, text: &str) {
|
|
||||||
let mut codeboints = match take(self) {
|
|
||||||
Utf32String::Ascii(bytes) if text.is_ascii() => {
|
|
||||||
let mut bytes = bytes.into_string();
|
|
||||||
bytes.push_str(text);
|
|
||||||
*self = Self::Ascii(bytes.into_boxed_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Utf32String::Ascii(bytes) => bytes.bytes().map(|c| c as char).collect(),
|
|
||||||
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
|
||||||
};
|
};
|
||||||
codeboints.extend(chars::graphemes(text));
|
let end = match range.end_bound() {
|
||||||
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
Bound::Included(&end) => end + 1,
|
||||||
}
|
Bound::Excluded(&end) => end,
|
||||||
|
Bound::Unbounded => self.len() as u32,
|
||||||
#[inline]
|
|
||||||
pub fn push(&mut self, c: char) {
|
|
||||||
let mut codeboints = match take(self) {
|
|
||||||
Utf32String::Ascii(bytes) if c.is_ascii() => {
|
|
||||||
let mut bytes = bytes.into_string();
|
|
||||||
bytes.push(c);
|
|
||||||
*self = Self::Ascii(bytes.into_boxed_str());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Utf32String::Ascii(bytes) => bytes.bytes().map(|c| c as char).collect(),
|
|
||||||
Utf32String::Unicode(codepoints) => Vec::from(codepoints),
|
|
||||||
};
|
};
|
||||||
codeboints.push(c);
|
match self {
|
||||||
*self = Utf32String::Unicode(codeboints.into_boxed_slice());
|
Utf32String::Ascii(bytes) => {
|
||||||
|
Utf32Str::Ascii(&bytes.as_bytes()[start as usize..end as usize])
|
||||||
|
}
|
||||||
|
Utf32String::Unicode(codepoints) => {
|
||||||
|
Utf32Str::Unicode(&codepoints[start as usize..end as usize])
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -367,21 +344,12 @@ impl<'a> From<Cow<'a, str>> for Utf32String {
|
|||||||
|
|
||||||
impl fmt::Debug for Utf32String {
|
impl fmt::Debug for Utf32String {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "\"")?;
|
write!(f, "{:?}", self.slice(..))
|
||||||
for c in self.chars() {
|
|
||||||
for c in c.escape_debug() {
|
|
||||||
write!(f, "{c}")?
|
|
||||||
}
|
|
||||||
}
|
|
||||||
write!(f, "\"")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Utf32String {
|
impl fmt::Display for Utf32String {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
for c in self.chars() {
|
write!(f, "{}", self.slice(..))
|
||||||
write!(f, "{c}")?
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
44
src/lib.rs
44
src/lib.rs
@ -1,4 +1,3 @@
|
|||||||
use std::cmp::Reverse;
|
|
||||||
use std::ops::{Bound, RangeBounds};
|
use std::ops::{Bound, RangeBounds};
|
||||||
use std::sync::atomic::{self, AtomicBool, Ordering};
|
use std::sync::atomic::{self, AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@ -7,13 +6,13 @@ use std::time::Duration;
|
|||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
|
|
||||||
pub use crate::pattern::{CaseMatching, MultiPattern, Pattern, PatternKind};
|
use crate::pattern::MultiPattern;
|
||||||
use crate::worker::Worker;
|
use crate::worker::Worker;
|
||||||
pub use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str, Utf32String};
|
pub use nucleo_matcher::{chars, Config, Matcher, Utf32Str, Utf32String};
|
||||||
|
|
||||||
mod boxcar;
|
mod boxcar;
|
||||||
mod par_sort;
|
mod par_sort;
|
||||||
mod pattern;
|
pub mod pattern;
|
||||||
mod worker;
|
mod worker;
|
||||||
|
|
||||||
pub struct Item<'a, T> {
|
pub struct Item<'a, T> {
|
||||||
@ -195,10 +194,9 @@ pub struct Nucleo<T: Sync + Send + 'static> {
|
|||||||
|
|
||||||
impl<T: Sync + Send + 'static> Nucleo<T> {
|
impl<T: Sync + Send + 'static> Nucleo<T> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
config: MatcherConfig,
|
config: Config,
|
||||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||||
num_threads: Option<usize>,
|
num_threads: Option<usize>,
|
||||||
case_matching: CaseMatching,
|
|
||||||
columns: u32,
|
columns: u32,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns);
|
let (pool, worker) = Worker::new(num_threads, config, notify.clone(), columns);
|
||||||
@ -207,10 +205,10 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
|
|||||||
should_notify: worker.should_notify.clone(),
|
should_notify: worker.should_notify.clone(),
|
||||||
items: worker.items.clone(),
|
items: worker.items.clone(),
|
||||||
pool,
|
pool,
|
||||||
pattern: MultiPattern::new(&config, case_matching, columns as usize),
|
pattern: MultiPattern::new(columns as usize),
|
||||||
snapshot: Snapshot {
|
snapshot: Snapshot {
|
||||||
matches: Vec::with_capacity(2 * 1024),
|
matches: Vec::with_capacity(2 * 1024),
|
||||||
pattern: MultiPattern::new(&config, case_matching, columns as usize),
|
pattern: MultiPattern::new(columns as usize),
|
||||||
item_count: 0,
|
item_count: 0,
|
||||||
items: worker.items.clone(),
|
items: worker.items.clone(),
|
||||||
},
|
},
|
||||||
@ -252,7 +250,7 @@ impl<T: Sync + Send + 'static> Nucleo<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn update_config(&mut self, config: MatcherConfig) {
|
pub fn update_config(&mut self, config: Config) {
|
||||||
self.worker.lock().update_config(config)
|
self.worker.lock().update_config(config)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -321,31 +319,3 @@ impl<T: Sync + Send> Drop for Nucleo<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// convenience function to easily fuzzy match
|
|
||||||
/// on a (relatively small) list of inputs. This is not recommended for building a full tui
|
|
||||||
/// application that can match large numbers of matches as all matching is done on the current
|
|
||||||
/// thread, effectively blocking the UI
|
|
||||||
pub fn fuzzy_match<T: AsRef<str>>(
|
|
||||||
matcher: &mut Matcher,
|
|
||||||
pattern: &str,
|
|
||||||
items: impl IntoIterator<Item = T>,
|
|
||||||
case_matching: CaseMatching,
|
|
||||||
) -> Vec<(T, u32)> {
|
|
||||||
let mut pattern_ = Pattern::new(&matcher.config, case_matching);
|
|
||||||
pattern_.set_literal(pattern, PatternKind::Fuzzy, false);
|
|
||||||
if pattern_.is_empty() {
|
|
||||||
return items.into_iter().map(|item| (item, 0)).collect();
|
|
||||||
}
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
let mut items: Vec<_> = items
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|item| {
|
|
||||||
pattern_
|
|
||||||
.score(Utf32Str::new(item.as_ref(), &mut buf), matcher)
|
|
||||||
.map(|score| (item, score))
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
items.sort_by_key(|(_, score)| Reverse(*score));
|
|
||||||
items
|
|
||||||
}
|
|
||||||
|
408
src/pattern.rs
408
src/pattern.rs
@ -1,188 +1,12 @@
|
|||||||
use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
pub use nucleo_matcher::pattern::{Atom, AtomKind, CaseMatching, Pattern};
|
||||||
|
use nucleo_matcher::{Matcher, Utf32String};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
use crate::Utf32String;
|
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord, Default)]
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
||||||
#[non_exhaustive]
|
|
||||||
pub enum CaseMatching {
|
|
||||||
Ignore,
|
|
||||||
Smart,
|
|
||||||
Respect,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
|
||||||
#[non_exhaustive]
|
|
||||||
pub enum PatternKind {
|
|
||||||
Exact,
|
|
||||||
Fuzzy,
|
|
||||||
Substring,
|
|
||||||
Prefix,
|
|
||||||
Postfix,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
||||||
struct PatternAtom {
|
|
||||||
kind: PatternKind,
|
|
||||||
needle: Utf32String,
|
|
||||||
invert: bool,
|
|
||||||
ignore_case: bool,
|
|
||||||
}
|
|
||||||
impl PatternAtom {
|
|
||||||
fn literal(
|
|
||||||
needle: &str,
|
|
||||||
normalize: bool,
|
|
||||||
case: CaseMatching,
|
|
||||||
kind: PatternKind,
|
|
||||||
escape_whitespace: bool,
|
|
||||||
) -> PatternAtom {
|
|
||||||
let mut ignore_case;
|
|
||||||
let needle = if needle.is_ascii() {
|
|
||||||
let mut needle = if escape_whitespace {
|
|
||||||
if let Some((start, rem)) = needle.split_once("\\ ") {
|
|
||||||
let mut needle = start.to_owned();
|
|
||||||
for rem in rem.split("\\ ") {
|
|
||||||
needle.push(' ');
|
|
||||||
needle.push_str(rem);
|
|
||||||
}
|
|
||||||
needle
|
|
||||||
} else {
|
|
||||||
needle.to_owned()
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
needle.to_owned()
|
|
||||||
};
|
|
||||||
|
|
||||||
match case {
|
|
||||||
CaseMatching::Ignore => {
|
|
||||||
ignore_case = true;
|
|
||||||
needle.make_ascii_lowercase()
|
|
||||||
}
|
|
||||||
CaseMatching::Smart => {
|
|
||||||
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
|
||||||
}
|
|
||||||
CaseMatching::Respect => ignore_case = false,
|
|
||||||
}
|
|
||||||
|
|
||||||
Utf32String::Ascii(needle.into_boxed_str())
|
|
||||||
} else {
|
|
||||||
let mut needle_ = Vec::with_capacity(needle.len());
|
|
||||||
ignore_case = matches!(case, CaseMatching::Ignore | CaseMatching::Smart);
|
|
||||||
if escape_whitespace {
|
|
||||||
let mut saw_backslash = false;
|
|
||||||
for mut c in chars::graphemes(needle) {
|
|
||||||
if saw_backslash {
|
|
||||||
if c == ' ' {
|
|
||||||
needle_.push(' ');
|
|
||||||
saw_backslash = false;
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
needle_.push('\\');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
saw_backslash = c == '\\';
|
|
||||||
if normalize {
|
|
||||||
c = chars::normalize(c);
|
|
||||||
}
|
|
||||||
match case {
|
|
||||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
|
||||||
CaseMatching::Smart => {
|
|
||||||
ignore_case = ignore_case && !chars::is_upper_case(c)
|
|
||||||
}
|
|
||||||
CaseMatching::Respect => (),
|
|
||||||
}
|
|
||||||
needle_.push(c);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let chars = chars::graphemes(needle).map(|mut c| {
|
|
||||||
if normalize {
|
|
||||||
c = chars::normalize(c);
|
|
||||||
}
|
|
||||||
match case {
|
|
||||||
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
|
||||||
CaseMatching::Smart => {
|
|
||||||
ignore_case = ignore_case && !chars::is_upper_case(c);
|
|
||||||
}
|
|
||||||
CaseMatching::Respect => (),
|
|
||||||
}
|
|
||||||
c
|
|
||||||
});
|
|
||||||
needle_.extend(chars);
|
|
||||||
};
|
|
||||||
Utf32String::Unicode(needle_.into_boxed_slice())
|
|
||||||
};
|
|
||||||
PatternAtom {
|
|
||||||
kind,
|
|
||||||
needle,
|
|
||||||
invert: false,
|
|
||||||
ignore_case,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse(raw: &str, normalize: bool, case: CaseMatching) -> PatternAtom {
|
|
||||||
let mut atom = raw;
|
|
||||||
let invert = match atom.as_bytes() {
|
|
||||||
[b'!', ..] => {
|
|
||||||
atom = &atom[1..];
|
|
||||||
true
|
|
||||||
}
|
|
||||||
[b'\\', b'!', ..] => {
|
|
||||||
atom = &atom[1..];
|
|
||||||
false
|
|
||||||
}
|
|
||||||
_ => false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut kind = match atom.as_bytes() {
|
|
||||||
[b'^', ..] => {
|
|
||||||
atom = &atom[1..];
|
|
||||||
PatternKind::Prefix
|
|
||||||
}
|
|
||||||
[b'\'', ..] => {
|
|
||||||
atom = &atom[1..];
|
|
||||||
PatternKind::Substring
|
|
||||||
}
|
|
||||||
[b'\\', b'^' | b'\'', ..] => {
|
|
||||||
atom = &atom[1..];
|
|
||||||
PatternKind::Fuzzy
|
|
||||||
}
|
|
||||||
_ => PatternKind::Fuzzy,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut append_dollar = false;
|
|
||||||
match atom.as_bytes() {
|
|
||||||
[.., b'\\', b'$'] => {
|
|
||||||
append_dollar = true;
|
|
||||||
atom = &atom[..atom.len() - 2]
|
|
||||||
}
|
|
||||||
[.., b'$'] => {
|
|
||||||
kind = if kind == PatternKind::Fuzzy {
|
|
||||||
PatternKind::Postfix
|
|
||||||
} else {
|
|
||||||
PatternKind::Exact
|
|
||||||
};
|
|
||||||
atom = &atom[..atom.len() - 1]
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
if invert && kind == PatternKind::Fuzzy {
|
|
||||||
kind = PatternKind::Substring
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut pattern = PatternAtom::literal(atom, normalize, case, kind, true);
|
|
||||||
pattern.invert = invert;
|
|
||||||
if append_dollar {
|
|
||||||
pattern.needle.push('$');
|
|
||||||
}
|
|
||||||
pattern
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
|
|
||||||
pub enum Status {
|
pub enum Status {
|
||||||
|
#[default]
|
||||||
Unchanged,
|
Unchanged,
|
||||||
Update,
|
Update,
|
||||||
Rescore,
|
Rescore,
|
||||||
@ -190,7 +14,7 @@ pub enum Status {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct MultiPattern {
|
pub struct MultiPattern {
|
||||||
pub cols: Vec<Pattern>,
|
cols: Vec<(Pattern, Status)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for MultiPattern {
|
impl Clone for MultiPattern {
|
||||||
@ -206,214 +30,64 @@ impl Clone for MultiPattern {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl MultiPattern {
|
impl MultiPattern {
|
||||||
pub fn new(
|
/// Creates a multi pattern with `columns` empty column patterns.
|
||||||
matcher_config: &MatcherConfig,
|
pub fn new(columns: usize) -> Self {
|
||||||
case_matching: CaseMatching,
|
Self {
|
||||||
columns: usize,
|
cols: vec![Default::default(); columns],
|
||||||
) -> MultiPattern {
|
|
||||||
MultiPattern {
|
|
||||||
cols: vec![Pattern::new(matcher_config, case_matching); columns],
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reparses a column. By specifying `append` the caller promises that text passed
|
||||||
|
/// to the previous `reparse` invocation is a prefix of `new_text`. This enables
|
||||||
|
/// additional optimizations but can lead to missing matches if an incorrect value
|
||||||
|
/// is passed.
|
||||||
|
pub fn reparse(
|
||||||
|
&mut self,
|
||||||
|
column: usize,
|
||||||
|
new_text: &str,
|
||||||
|
case_matching: CaseMatching,
|
||||||
|
append: bool,
|
||||||
|
) {
|
||||||
|
let old_status = self.cols[column].1;
|
||||||
|
if append
|
||||||
|
&& old_status != Status::Rescore
|
||||||
|
&& self.cols[column]
|
||||||
|
.0
|
||||||
|
.atoms
|
||||||
|
.last()
|
||||||
|
.map_or(true, |last| !last.negative)
|
||||||
|
{
|
||||||
|
self.cols[column].1 = Status::Update;
|
||||||
|
} else {
|
||||||
|
self.cols[column].1 = Status::Rescore;
|
||||||
|
}
|
||||||
|
self.cols[column].0.reparse(new_text, case_matching);
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn status(&self) -> Status {
|
pub(crate) fn status(&self) -> Status {
|
||||||
self.cols
|
self.cols
|
||||||
.iter()
|
.iter()
|
||||||
.map(|col| col.status)
|
.map(|&(_, status)| status)
|
||||||
.max()
|
.max()
|
||||||
.unwrap_or(Status::Unchanged)
|
.unwrap_or(Status::Unchanged)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn reset_status(&mut self) {
|
pub(crate) fn reset_status(&mut self) {
|
||||||
for col in &mut self.cols {
|
for (_, status) in &mut self.cols {
|
||||||
col.status = Status::Unchanged
|
*status = Status::Unchanged
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn score(&self, haystack: &[Utf32String], matcher: &mut Matcher) -> Option<u32> {
|
pub fn score(&self, haystack: &[Utf32String], matcher: &mut Matcher) -> Option<u32> {
|
||||||
// TODO: wheight columns?
|
// TODO: wheight columns?
|
||||||
let mut score = 0;
|
let mut score = 0;
|
||||||
for (pattern, haystack) in self.cols.iter().zip(haystack) {
|
for ((pattern, _), haystack) in self.cols.iter().zip(haystack) {
|
||||||
score += pattern.score(haystack.slice(..), matcher)?
|
score += pattern.score(haystack.slice(..), matcher)?
|
||||||
}
|
}
|
||||||
Some(score)
|
Some(score)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Pattern {
|
|
||||||
atoms: Vec<PatternAtom>,
|
|
||||||
case_matching: CaseMatching,
|
|
||||||
normalize: bool,
|
|
||||||
status: Status,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Pattern {
|
|
||||||
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching) -> Pattern {
|
|
||||||
Pattern {
|
|
||||||
atoms: Vec::new(),
|
|
||||||
case_matching,
|
|
||||||
normalize: matcher_config.normalize,
|
|
||||||
status: Status::Unchanged,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn new_fuzzy_literal(
|
|
||||||
matcher_config: &MatcherConfig,
|
|
||||||
case_matching: CaseMatching,
|
|
||||||
pattern: &str,
|
|
||||||
) -> Pattern {
|
|
||||||
let mut res = Pattern {
|
|
||||||
atoms: Vec::new(),
|
|
||||||
case_matching,
|
|
||||||
normalize: matcher_config.normalize,
|
|
||||||
status: Status::Unchanged,
|
|
||||||
};
|
|
||||||
res.set_literal(pattern, PatternKind::Fuzzy, false);
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
|
||||||
if self.atoms.is_empty() {
|
|
||||||
return Some(0);
|
|
||||||
}
|
|
||||||
let mut score = 0;
|
|
||||||
for pattern in &self.atoms {
|
|
||||||
matcher.config.ignore_case = pattern.ignore_case;
|
|
||||||
let pattern_score = match pattern.kind {
|
|
||||||
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Substring => {
|
|
||||||
matcher.substring_match(haystack, pattern.needle.slice(..))
|
|
||||||
}
|
|
||||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Postfix => matcher.postfix_match(haystack, pattern.needle.slice(..)),
|
|
||||||
};
|
|
||||||
if pattern.invert {
|
|
||||||
if pattern_score.is_some() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
score += pattern_score? as u32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(score)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn indices(
|
|
||||||
&self,
|
|
||||||
haystack: Utf32Str<'_>,
|
|
||||||
matcher: &mut Matcher,
|
|
||||||
indices: &mut Vec<u32>,
|
|
||||||
) -> Option<u32> {
|
|
||||||
if self.atoms.is_empty() {
|
|
||||||
return Some(0);
|
|
||||||
}
|
|
||||||
let mut score = 0;
|
|
||||||
for pattern in &self.atoms {
|
|
||||||
matcher.config.ignore_case = pattern.ignore_case;
|
|
||||||
if pattern.invert {
|
|
||||||
let pattern_score = match pattern.kind {
|
|
||||||
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Substring => {
|
|
||||||
matcher.substring_match(haystack, pattern.needle.slice(..))
|
|
||||||
}
|
|
||||||
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
|
||||||
PatternKind::Postfix => {
|
|
||||||
matcher.postfix_match(haystack, pattern.needle.slice(..))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if pattern_score.is_some() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let pattern_score = match pattern.kind {
|
|
||||||
PatternKind::Exact => {
|
|
||||||
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
|
||||||
}
|
|
||||||
PatternKind::Fuzzy => {
|
|
||||||
matcher.fuzzy_indices(haystack, pattern.needle.slice(..), indices)
|
|
||||||
}
|
|
||||||
PatternKind::Substring => {
|
|
||||||
matcher.substring_indices(haystack, pattern.needle.slice(..), indices)
|
|
||||||
}
|
|
||||||
PatternKind::Prefix => {
|
|
||||||
matcher.prefix_indices(haystack, pattern.needle.slice(..), indices)
|
|
||||||
}
|
|
||||||
PatternKind::Postfix => {
|
|
||||||
matcher.postfix_indices(haystack, pattern.needle.slice(..), indices)
|
|
||||||
}
|
|
||||||
};
|
|
||||||
score += pattern_score? as u32
|
|
||||||
}
|
|
||||||
Some(score)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
|
||||||
let invert = self.atoms.last().map_or(false, |pat| pat.invert);
|
|
||||||
self.atoms.clear();
|
|
||||||
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
|
||||||
let atom = PatternAtom::parse(atom, self.normalize, self.case_matching);
|
|
||||||
if atom.needle.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(atom)
|
|
||||||
});
|
|
||||||
self.atoms.extend(atoms);
|
|
||||||
|
|
||||||
self.status = if append && !invert && self.status != Status::Rescore {
|
|
||||||
Status::Update
|
|
||||||
} else {
|
|
||||||
Status::Rescore
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_literal(&mut self, pattern: &str, kind: PatternKind, append: bool) {
|
|
||||||
self.atoms.clear();
|
|
||||||
let pattern =
|
|
||||||
PatternAtom::literal(pattern, self.normalize, self.case_matching, kind, false);
|
|
||||||
if !pattern.needle.is_empty() {
|
|
||||||
self.atoms.push(pattern);
|
|
||||||
}
|
|
||||||
self.status = if append && self.status != Status::Rescore {
|
|
||||||
Status::Update
|
|
||||||
} else {
|
|
||||||
Status::Rescore
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
self.atoms.is_empty()
|
self.cols.iter().all(|(pat, _)| pat.atoms.is_empty())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for Pattern {
|
|
||||||
fn clone(&self) -> Self {
|
|
||||||
Self {
|
|
||||||
atoms: self.atoms.clone(),
|
|
||||||
case_matching: self.case_matching,
|
|
||||||
normalize: self.normalize,
|
|
||||||
status: self.status,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clone_from(&mut self, source: &Self) {
|
|
||||||
self.atoms.clone_from(&source.atoms);
|
|
||||||
self.case_matching = source.case_matching;
|
|
||||||
self.normalize = source.normalize;
|
|
||||||
self.status = source.status;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
|
||||||
let mut saw_backslash = false;
|
|
||||||
pattern.split(move |c| {
|
|
||||||
saw_backslash = match c {
|
|
||||||
' ' if !saw_backslash => return true,
|
|
||||||
'\\' => true,
|
|
||||||
_ => false,
|
|
||||||
};
|
|
||||||
false
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
@ -1,145 +1,14 @@
|
|||||||
use crate::pattern::{PatternAtom, Status};
|
use nucleo_matcher::pattern::CaseMatching;
|
||||||
use crate::{CaseMatching, Pattern, PatternKind};
|
|
||||||
|
|
||||||
fn parse_atom(pat: &str) -> PatternAtom {
|
use crate::pattern::{MultiPattern, Status};
|
||||||
parse_atom_with(pat, CaseMatching::Smart)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_atom_with(pat: &str, case_matching: CaseMatching) -> PatternAtom {
|
|
||||||
let mut pat = parse_with(pat, case_matching, false);
|
|
||||||
assert_eq!(pat.atoms.len(), 1);
|
|
||||||
pat.atoms.remove(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_with(pat: &str, case_matching: CaseMatching, append: bool) -> Pattern {
|
|
||||||
let mut res = Pattern::new(&nucleo_matcher::MatcherConfig::DEFAULT, case_matching);
|
|
||||||
res.parse_from(pat, append);
|
|
||||||
res
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn negative() {
|
|
||||||
let pat = parse_atom("!foo");
|
|
||||||
assert!(pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Substring);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("!^foo");
|
|
||||||
assert!(pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("!foo$");
|
|
||||||
assert!(pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Postfix);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("!^foo$");
|
|
||||||
assert!(pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Exact);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn pattern_kinds() {
|
|
||||||
let pat = parse_atom("foo");
|
|
||||||
assert!(!pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("'foo");
|
|
||||||
assert!(!pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Substring);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("^foo");
|
|
||||||
assert!(!pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("foo$");
|
|
||||||
assert!(!pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Postfix);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom("^foo$");
|
|
||||||
assert!(!pat.invert);
|
|
||||||
assert_eq!(pat.kind, PatternKind::Exact);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn case_matching() {
|
|
||||||
let pat = parse_atom_with("foo", CaseMatching::Smart);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom_with("Foo", CaseMatching::Smart);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Foo");
|
|
||||||
let pat = parse_atom_with("Foo", CaseMatching::Ignore);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo");
|
|
||||||
let pat = parse_atom_with("Foo", CaseMatching::Respect);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Foo");
|
|
||||||
let pat = parse_atom_with("Foo", CaseMatching::Respect);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Foo");
|
|
||||||
let pat = parse_atom_with("Äxx", CaseMatching::Ignore);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "axx");
|
|
||||||
let pat = parse_atom_with("Äxx", CaseMatching::Respect);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Axx");
|
|
||||||
let pat = parse_atom_with("Äxx", CaseMatching::Smart);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Axx");
|
|
||||||
let pat = parse_atom_with("Äxx", CaseMatching::Smart);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Axx");
|
|
||||||
let pat = parse_atom_with("你xx", CaseMatching::Smart);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "你xx");
|
|
||||||
let pat = parse_atom_with("你xx", CaseMatching::Ignore);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "你xx");
|
|
||||||
let pat = parse_atom_with("Ⲽxx", CaseMatching::Smart);
|
|
||||||
assert!(!pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "Ⲽxx");
|
|
||||||
let pat = parse_atom_with("Ⲽxx", CaseMatching::Ignore);
|
|
||||||
assert!(pat.ignore_case);
|
|
||||||
assert_eq!(pat.needle.to_string(), "ⲽxx");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn escape() {
|
|
||||||
let pat = parse_atom("foo\\ bar");
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo bar");
|
|
||||||
let pat = parse_atom("\\!foo");
|
|
||||||
assert_eq!(pat.needle.to_string(), "!foo");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("\\'foo");
|
|
||||||
assert_eq!(pat.needle.to_string(), "'foo");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("\\^foo");
|
|
||||||
assert_eq!(pat.needle.to_string(), "^foo");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("foo\\$");
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo$");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("^foo\\$");
|
|
||||||
assert_eq!(pat.needle.to_string(), "foo$");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Prefix);
|
|
||||||
let pat = parse_atom("\\^foo\\$");
|
|
||||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("\\!^foo\\$");
|
|
||||||
assert_eq!(pat.needle.to_string(), "!^foo$");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Fuzzy);
|
|
||||||
let pat = parse_atom("!\\^foo\\$");
|
|
||||||
assert_eq!(pat.needle.to_string(), "^foo$");
|
|
||||||
assert_eq!(pat.kind, PatternKind::Substring);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn append() {
|
fn append() {
|
||||||
let mut pat = parse_with("!", CaseMatching::Smart, true);
|
let mut pat = MultiPattern::new(1);
|
||||||
assert_eq!(pat.status, Status::Update);
|
pat.reparse(0, "!", CaseMatching::Smart, true);
|
||||||
pat.parse_from("!f", true);
|
assert_eq!(pat.status(), Status::Update);
|
||||||
assert_eq!(pat.status, Status::Update);
|
pat.reparse(0, "!f", CaseMatching::Smart, true);
|
||||||
pat.parse_from("!fo", true);
|
assert_eq!(pat.status(), Status::Update);
|
||||||
assert_eq!(pat.status, Status::Rescore);
|
pat.reparse(0, "!fo", CaseMatching::Smart, true);
|
||||||
|
assert_eq!(pat.status(), Status::Rescore);
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use std::mem::take;
|
|||||||
use std::sync::atomic::{self, AtomicBool, AtomicU32};
|
use std::sync::atomic::{self, AtomicBool, AtomicU32};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use nucleo_matcher::MatcherConfig;
|
use nucleo_matcher::Config;
|
||||||
use parking_lot::Mutex;
|
use parking_lot::Mutex;
|
||||||
use rayon::{prelude::*, ThreadPool};
|
use rayon::{prelude::*, ThreadPool};
|
||||||
|
|
||||||
@ -42,15 +42,15 @@ impl<T: Sync + Send + 'static> Worker<T> {
|
|||||||
pub(crate) fn item_count(&self) -> u32 {
|
pub(crate) fn item_count(&self) -> u32 {
|
||||||
self.last_snapshot - self.in_flight.len() as u32
|
self.last_snapshot - self.in_flight.len() as u32
|
||||||
}
|
}
|
||||||
pub(crate) fn update_config(&mut self, config: MatcherConfig) {
|
pub(crate) fn update_config(&mut self, config: Config) {
|
||||||
for matcher in self.matchers.0.iter_mut() {
|
for matcher in self.matchers.0.iter_mut() {
|
||||||
matcher.get_mut().config = config;
|
matcher.get_mut().config = config.clone();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
worker_threads: Option<usize>,
|
worker_threads: Option<usize>,
|
||||||
config: MatcherConfig,
|
config: Config,
|
||||||
notify: Arc<(dyn Fn() + Sync + Send)>,
|
notify: Arc<(dyn Fn() + Sync + Send)>,
|
||||||
cols: u32,
|
cols: u32,
|
||||||
) -> (ThreadPool, Self) {
|
) -> (ThreadPool, Self) {
|
||||||
@ -62,7 +62,7 @@ impl<T: Sync + Send + 'static> Worker<T> {
|
|||||||
.build()
|
.build()
|
||||||
.expect("creating threadpool failed");
|
.expect("creating threadpool failed");
|
||||||
let matchers = (0..worker_threads)
|
let matchers = (0..worker_threads)
|
||||||
.map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config)))
|
.map(|_| UnsafeCell::new(nucleo_matcher::Matcher::new(config.clone())))
|
||||||
.collect();
|
.collect();
|
||||||
let worker = Worker {
|
let worker = Worker {
|
||||||
running: false,
|
running: false,
|
||||||
@ -70,7 +70,7 @@ impl<T: Sync + Send + 'static> Worker<T> {
|
|||||||
last_snapshot: 0,
|
last_snapshot: 0,
|
||||||
matches: Vec::new(),
|
matches: Vec::new(),
|
||||||
// just a placeholder
|
// just a placeholder
|
||||||
pattern: MultiPattern::new(&config, crate::CaseMatching::Ignore, 0),
|
pattern: MultiPattern::new(cols as usize),
|
||||||
canceled: Arc::new(AtomicBool::new(false)),
|
canceled: Arc::new(AtomicBool::new(false)),
|
||||||
should_notify: Arc::new(AtomicBool::new(false)),
|
should_notify: Arc::new(AtomicBool::new(false)),
|
||||||
was_canceled: false,
|
was_canceled: false,
|
||||||
@ -162,7 +162,7 @@ impl<T: Sync + Send + 'static> Worker<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: be smarter around reusing past results for rescoring
|
// TODO: be smarter around reusing past results for rescoring
|
||||||
if self.pattern.cols.iter().all(|pat| pat.is_empty()) {
|
if self.pattern.is_empty() {
|
||||||
self.reset_matches();
|
self.reset_matches();
|
||||||
self.process_new_items_trivial();
|
self.process_new_items_trivial();
|
||||||
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
if self.should_notify.load(atomic::Ordering::Relaxed) {
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||||
[files]
|
[files]
|
||||||
extend-exclude = ["matcher/src/tests.rs", "*.html"]
|
extend-exclude = ["matcher/src/tests.rs","src/pattern/tests.rs", "*.html"]
|
||||||
|
Loading…
Reference in New Issue
Block a user