mirror of
https://github.com/solaeus/nucleo.git
synced 2025-03-12 15:08:05 +00:00
365 lines
11 KiB
Rust
365 lines
11 KiB
Rust
use nucleo_matcher::{chars, Matcher, MatcherConfig, Utf32Str};
|
|
|
|
use crate::Utf32String;
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
#[non_exhaustive]
|
|
pub enum CaseMatching {
|
|
Ignore,
|
|
Smart,
|
|
Respect,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
|
#[non_exhaustive]
|
|
pub enum PatternKind {
|
|
Exact,
|
|
Fuzzy,
|
|
Substring,
|
|
Prefix,
|
|
Postfix,
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
struct PatternAtom {
|
|
kind: PatternKind,
|
|
needle: Utf32String,
|
|
invert: bool,
|
|
ignore_case: bool,
|
|
}
|
|
impl PatternAtom {
|
|
fn literal(
|
|
needle: &str,
|
|
normalize: bool,
|
|
case: CaseMatching,
|
|
kind: PatternKind,
|
|
escape_whitespace: bool,
|
|
) -> PatternAtom {
|
|
let mut ignore_case = case == CaseMatching::Ignore;
|
|
let needle = if needle.is_ascii() {
|
|
let mut needle = if escape_whitespace {
|
|
if let Some((start, rem)) = needle.split_once("\\ ") {
|
|
let mut needle = start.to_owned();
|
|
for rem in rem.split("\\ ") {
|
|
needle.push(' ');
|
|
needle.push_str(rem);
|
|
}
|
|
needle
|
|
} else {
|
|
needle.to_owned()
|
|
}
|
|
} else {
|
|
needle.to_owned()
|
|
};
|
|
|
|
match case {
|
|
CaseMatching::Ignore => needle.make_ascii_lowercase(),
|
|
CaseMatching::Smart => {
|
|
ignore_case = !needle.bytes().any(|b| b.is_ascii_uppercase())
|
|
}
|
|
CaseMatching::Respect => (),
|
|
}
|
|
|
|
Utf32String::Ascii(needle.into_boxed_str())
|
|
} else {
|
|
let mut needle_ = Vec::with_capacity(needle.len());
|
|
if escape_whitespace {
|
|
let mut saw_backslash = false;
|
|
for mut c in chars::graphemes(needle) {
|
|
if saw_backslash {
|
|
if c == ' ' {
|
|
needle_.push(' ');
|
|
saw_backslash = false;
|
|
continue;
|
|
} else {
|
|
needle_.push('\\');
|
|
}
|
|
}
|
|
saw_backslash = c == '\\';
|
|
if normalize {
|
|
c = chars::normalize(c);
|
|
}
|
|
match case {
|
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
|
CaseMatching::Smart => {
|
|
ignore_case = ignore_case && !c.is_uppercase();
|
|
}
|
|
CaseMatching::Respect => (),
|
|
}
|
|
needle_.push(c);
|
|
}
|
|
} else {
|
|
let chars = chars::graphemes(needle).map(|mut c| {
|
|
if normalize {
|
|
c = chars::normalize(c);
|
|
}
|
|
match case {
|
|
CaseMatching::Ignore => c = chars::to_lower_case(c),
|
|
CaseMatching::Smart => {
|
|
ignore_case = ignore_case && !c.is_uppercase();
|
|
}
|
|
CaseMatching::Respect => (),
|
|
}
|
|
c
|
|
});
|
|
needle_.extend(chars);
|
|
};
|
|
Utf32String::Unicode(needle_.into_boxed_slice())
|
|
};
|
|
PatternAtom {
|
|
kind,
|
|
needle,
|
|
invert: false,
|
|
ignore_case,
|
|
}
|
|
}
|
|
|
|
fn parse(raw: &str, normalize: bool, case: CaseMatching) -> PatternAtom {
|
|
let mut atom = raw;
|
|
let inverse = atom.starts_with('!');
|
|
if inverse {
|
|
atom = &atom[1..];
|
|
}
|
|
|
|
let mut kind = match atom.as_bytes() {
|
|
[b'^', ..] => {
|
|
atom = &atom[1..];
|
|
PatternKind::Prefix
|
|
}
|
|
[b'\'', ..] => {
|
|
atom = &atom[1..];
|
|
PatternKind::Substring
|
|
}
|
|
[b'\\', b'^' | b'\'', ..] => {
|
|
atom = &atom[1..];
|
|
PatternKind::Fuzzy
|
|
}
|
|
_ => PatternKind::Fuzzy,
|
|
};
|
|
|
|
match atom.as_bytes() {
|
|
[.., b'\\', b'$'] => (),
|
|
[.., b'$'] => {
|
|
kind = if kind == PatternKind::Fuzzy {
|
|
PatternKind::Postfix
|
|
} else {
|
|
PatternKind::Exact
|
|
};
|
|
atom = &atom[..atom.len() - 1]
|
|
}
|
|
_ => (),
|
|
}
|
|
|
|
if inverse && kind == PatternKind::Fuzzy {
|
|
kind = PatternKind::Substring
|
|
}
|
|
|
|
PatternAtom::literal(atom, normalize, case, kind, true)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone, Copy, PartialOrd, Ord)]
|
|
pub enum Status {
|
|
Unchanged,
|
|
Update,
|
|
Rescore,
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct MultiPattern {
|
|
pub cols: Vec<Pattern>,
|
|
}
|
|
|
|
impl MultiPattern {
|
|
pub fn new(
|
|
matcher_config: &MatcherConfig,
|
|
case_matching: CaseMatching,
|
|
columns: usize,
|
|
) -> MultiPattern {
|
|
MultiPattern {
|
|
cols: vec![Pattern::new(matcher_config, case_matching); columns],
|
|
}
|
|
}
|
|
|
|
pub(crate) fn status(&self) -> Status {
|
|
self.cols
|
|
.iter()
|
|
.map(|col| col.status)
|
|
.max()
|
|
.unwrap_or(Status::Unchanged)
|
|
}
|
|
|
|
pub(crate) fn reset_status(&mut self) {
|
|
for col in &mut self.cols {
|
|
col.status = Status::Unchanged
|
|
}
|
|
}
|
|
|
|
pub fn score(&self, haystack: &[Utf32String], matcher: &mut Matcher) -> Option<u32> {
|
|
// TODO: wheight columns?
|
|
let mut score = 0;
|
|
for (pattern, haystack) in self.cols.iter().zip(haystack) {
|
|
score += pattern.score(haystack.slice(..), matcher)?
|
|
}
|
|
Some(score)
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct Pattern {
|
|
terms: Vec<PatternAtom>,
|
|
case_matching: CaseMatching,
|
|
normalize: bool,
|
|
status: Status,
|
|
}
|
|
|
|
impl Pattern {
|
|
pub fn new(matcher_config: &MatcherConfig, case_matching: CaseMatching) -> Pattern {
|
|
Pattern {
|
|
terms: Vec::new(),
|
|
case_matching,
|
|
normalize: matcher_config.normalize,
|
|
status: Status::Unchanged,
|
|
}
|
|
}
|
|
pub fn new_fuzzy_literal(
|
|
matcher_config: &MatcherConfig,
|
|
case_matching: CaseMatching,
|
|
pattern: &str,
|
|
) -> Pattern {
|
|
let mut res = Pattern {
|
|
terms: Vec::new(),
|
|
case_matching,
|
|
normalize: matcher_config.normalize,
|
|
status: Status::Unchanged,
|
|
};
|
|
res.set_literal(pattern, PatternKind::Fuzzy, false);
|
|
res
|
|
}
|
|
|
|
pub fn score(&self, haystack: Utf32Str<'_>, matcher: &mut Matcher) -> Option<u32> {
|
|
if self.terms.is_empty() {
|
|
return Some(0);
|
|
}
|
|
let mut score = 0;
|
|
for pattern in &self.terms {
|
|
matcher.config.ignore_case = pattern.ignore_case;
|
|
let pattern_score = match pattern.kind {
|
|
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Substring => {
|
|
matcher.substring_match(haystack, pattern.needle.slice(..))
|
|
}
|
|
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Postfix => matcher.postfix_match(haystack, pattern.needle.slice(..)),
|
|
};
|
|
if pattern.invert {
|
|
if pattern_score.is_some() {
|
|
return None;
|
|
}
|
|
} else {
|
|
score += pattern_score? as u32
|
|
}
|
|
}
|
|
Some(score)
|
|
}
|
|
|
|
pub fn indices(
|
|
&self,
|
|
haystack: Utf32Str<'_>,
|
|
matcher: &mut Matcher,
|
|
indices: &mut Vec<u32>,
|
|
) -> Option<u32> {
|
|
if self.terms.is_empty() {
|
|
return Some(0);
|
|
}
|
|
let mut score = 0;
|
|
for pattern in &self.terms {
|
|
matcher.config.ignore_case = pattern.ignore_case;
|
|
if pattern.invert {
|
|
let pattern_score = match pattern.kind {
|
|
PatternKind::Exact => matcher.exact_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Fuzzy => matcher.fuzzy_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Substring => {
|
|
matcher.substring_match(haystack, pattern.needle.slice(..))
|
|
}
|
|
PatternKind::Prefix => matcher.prefix_match(haystack, pattern.needle.slice(..)),
|
|
PatternKind::Postfix => {
|
|
matcher.postfix_match(haystack, pattern.needle.slice(..))
|
|
}
|
|
};
|
|
if pattern_score.is_some() {
|
|
return None;
|
|
}
|
|
continue;
|
|
}
|
|
let pattern_score = match pattern.kind {
|
|
PatternKind::Exact => {
|
|
matcher.exact_indices(haystack, pattern.needle.slice(..), indices)
|
|
}
|
|
PatternKind::Fuzzy => {
|
|
matcher.fuzzy_indices(haystack, pattern.needle.slice(..), indices)
|
|
}
|
|
PatternKind::Substring => {
|
|
matcher.substring_indices(haystack, pattern.needle.slice(..), indices)
|
|
}
|
|
PatternKind::Prefix => {
|
|
matcher.prefix_indices(haystack, pattern.needle.slice(..), indices)
|
|
}
|
|
PatternKind::Postfix => {
|
|
matcher.postfix_indices(haystack, pattern.needle.slice(..), indices)
|
|
}
|
|
};
|
|
score += pattern_score? as u32
|
|
}
|
|
Some(score)
|
|
}
|
|
|
|
pub fn parse_from(&mut self, pattern: &str, append: bool) {
|
|
self.terms.clear();
|
|
let invert = self.terms.last().map_or(false, |pat| pat.invert);
|
|
let atoms = pattern_atoms(pattern).filter_map(|atom| {
|
|
let atom = PatternAtom::parse(atom, self.normalize, self.case_matching);
|
|
if atom.needle.is_empty() {
|
|
return None;
|
|
}
|
|
Some(atom)
|
|
});
|
|
self.terms.extend(atoms);
|
|
|
|
self.status = if append && !invert && self.status != Status::Rescore {
|
|
Status::Update
|
|
} else {
|
|
Status::Rescore
|
|
};
|
|
}
|
|
|
|
pub fn set_literal(&mut self, pattern: &str, kind: PatternKind, append: bool) {
|
|
self.terms.clear();
|
|
let pattern =
|
|
PatternAtom::literal(pattern, self.normalize, self.case_matching, kind, false);
|
|
self.terms.push(pattern);
|
|
self.status = if append && self.status != Status::Rescore {
|
|
Status::Update
|
|
} else {
|
|
Status::Rescore
|
|
};
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.terms.is_empty()
|
|
}
|
|
}
|
|
|
|
fn pattern_atoms(pattern: &str) -> impl Iterator<Item = &str> + '_ {
|
|
let mut saw_backslash = false;
|
|
pattern.split(move |c| {
|
|
saw_backslash = match c {
|
|
' ' if !saw_backslash => return true,
|
|
'\\' => true,
|
|
_ => false,
|
|
};
|
|
false
|
|
})
|
|
}
|