mirror of
https://github.com/solaeus/nucleo.git
synced 2025-01-21 23:37:47 +00:00
correctly handle empty strings and strip leading and trailing whitespace
This commit is contained in:
parent
6b08991fac
commit
4fc0281dd7
@ -129,9 +129,12 @@ impl Matcher {
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack_.len() || needle_.is_empty() {
|
||||
if needle_.len() > haystack_.len() {
|
||||
return None;
|
||||
}
|
||||
if needle_.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
if needle_.len() == haystack_.len() {
|
||||
return self.exact_match_impl::<INDICES>(
|
||||
haystack_,
|
||||
@ -262,9 +265,12 @@ impl Matcher {
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
if needle_.len() > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
if needle_.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||
}
|
||||
@ -358,9 +364,12 @@ impl Matcher {
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
if needle_.len() > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
if needle_.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||
}
|
||||
@ -425,7 +434,28 @@ impl Matcher {
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
self.exact_match_impl::<false>(haystack, needle, 0, haystack.len(), &mut Vec::new())
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut leading_space = 0;
|
||||
let mut trailing_space = 0;
|
||||
if !needle.first().is_whitespace() {
|
||||
leading_space = haystack.leading_white_space()
|
||||
}
|
||||
if !needle.last().is_whitespace() {
|
||||
trailing_space = haystack.trailing_white_space()
|
||||
}
|
||||
// avoid wraparound in size check
|
||||
if trailing_space == haystack.len() {
|
||||
return None;
|
||||
}
|
||||
self.exact_match_impl::<false>(
|
||||
haystack,
|
||||
needle,
|
||||
leading_space,
|
||||
haystack.len() - trailing_space,
|
||||
&mut Vec::new(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
||||
@ -439,7 +469,28 @@ impl Matcher {
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
self.exact_match_impl::<true>(haystack, needle, 0, haystack.len(), indices)
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut leading_space = 0;
|
||||
let mut trailing_space = 0;
|
||||
if !needle.first().is_whitespace() {
|
||||
leading_space = haystack.leading_white_space()
|
||||
}
|
||||
if !needle.last().is_whitespace() {
|
||||
trailing_space = haystack.trailing_white_space()
|
||||
}
|
||||
// avoid wraparound in size check
|
||||
if trailing_space == haystack.len() {
|
||||
return None;
|
||||
}
|
||||
self.exact_match_impl::<true>(
|
||||
haystack,
|
||||
needle,
|
||||
leading_space,
|
||||
haystack.len() - trailing_space,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
|
||||
/// Checks whether needle is a prefix of the haystack.
|
||||
@ -448,10 +499,23 @@ impl Matcher {
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut leading_space = 0;
|
||||
if !needle.first().is_whitespace() {
|
||||
leading_space = haystack.leading_white_space()
|
||||
}
|
||||
if haystack.len() - leading_space < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(haystack, needle, 0, needle.len(), &mut Vec::new())
|
||||
self.exact_match_impl::<false>(
|
||||
haystack,
|
||||
needle,
|
||||
leading_space,
|
||||
needle.len() + leading_space,
|
||||
&mut Vec::new(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -466,10 +530,23 @@ impl Matcher {
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut leading_space = 0;
|
||||
if !needle.first().is_whitespace() {
|
||||
leading_space = haystack.leading_white_space()
|
||||
}
|
||||
if haystack.len() - leading_space < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(haystack, needle, 0, needle.len(), indices)
|
||||
self.exact_match_impl::<true>(
|
||||
haystack,
|
||||
needle,
|
||||
leading_space,
|
||||
needle.len() + leading_space,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@ -479,14 +556,21 @@ impl Matcher {
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut trailing_spaces = 0;
|
||||
if !needle.last().is_whitespace() {
|
||||
trailing_spaces = haystack.trailing_white_space()
|
||||
}
|
||||
if haystack.len() - trailing_spaces < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(
|
||||
haystack,
|
||||
needle,
|
||||
haystack.len() - needle.len(),
|
||||
haystack.len(),
|
||||
haystack.len() - needle.len() - trailing_spaces,
|
||||
haystack.len() - trailing_spaces,
|
||||
&mut Vec::new(),
|
||||
)
|
||||
}
|
||||
@ -503,14 +587,21 @@ impl Matcher {
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
if needle.is_empty() {
|
||||
return Some(0);
|
||||
}
|
||||
let mut trailing_spaces = 0;
|
||||
if !needle.last().is_whitespace() {
|
||||
trailing_spaces = haystack.trailing_white_space()
|
||||
}
|
||||
if haystack.len() - trailing_spaces < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(
|
||||
haystack,
|
||||
needle,
|
||||
haystack.len() - needle.len(),
|
||||
haystack.len(),
|
||||
haystack.len() - needle.len() - trailing_spaces,
|
||||
haystack.len() - trailing_spaces,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
@ -524,7 +615,7 @@ impl Matcher {
|
||||
end: usize,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() != end - start || needle_.is_empty() {
|
||||
if needle_.len() != end - start {
|
||||
return None;
|
||||
}
|
||||
assert!(
|
||||
|
@ -13,6 +13,9 @@ enum Algorithm {
|
||||
FuzzyOptimal,
|
||||
FuzzyGreedy,
|
||||
Substring,
|
||||
Prefix,
|
||||
Postfix,
|
||||
Exact,
|
||||
}
|
||||
|
||||
fn assert_matches(
|
||||
@ -50,6 +53,9 @@ fn assert_matches(
|
||||
FuzzyOptimal => matcher.fuzzy_indices(haystack, needle, &mut matched_indices),
|
||||
FuzzyGreedy => matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices),
|
||||
Substring => matcher.substring_indices(haystack, needle, &mut matched_indices),
|
||||
Prefix => matcher.prefix_indices(haystack, needle, &mut matched_indices),
|
||||
Postfix => matcher.postfix_indices(haystack, needle, &mut matched_indices),
|
||||
Exact => matcher.exact_indices(haystack, needle, &mut matched_indices),
|
||||
};
|
||||
println!("{matched_indices:?}");
|
||||
let match_chars: Vec<_> = matched_indices
|
||||
@ -107,7 +113,22 @@ pub fn assert_not_matches(
|
||||
assert_eq!(
|
||||
res, None,
|
||||
"{needle:?} should not match {haystack:?} (greedy)"
|
||||
)
|
||||
);
|
||||
let res = matcher.substring_match(haystack, needle);
|
||||
assert_eq!(
|
||||
res, None,
|
||||
"{needle:?} should not match {haystack:?} (substring)"
|
||||
);
|
||||
let res = matcher.prefix_match(haystack, needle);
|
||||
assert_eq!(
|
||||
res, None,
|
||||
"{needle:?} should not match {haystack:?} (prefix)"
|
||||
);
|
||||
let res = matcher.postfix_match(haystack, needle);
|
||||
assert_eq!(
|
||||
res, None,
|
||||
"{needle:?} should not match {haystack:?} (postfix)"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -222,8 +243,97 @@ fn test_fuzzy() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_needle() {
|
||||
assert_matches(
|
||||
&[Substring, Prefix, Postfix, FuzzyGreedy, FuzzyOptimal, Exact],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[("foo bar baz", "", &[], 0)],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_substring() {
|
||||
assert_matches(
|
||||
&[Substring, Prefix],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[
|
||||
(
|
||||
"foo bar baz",
|
||||
"foo",
|
||||
&[0, 1, 2],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
" foo bar baz",
|
||||
"FOO",
|
||||
&[1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
" foo bar baz",
|
||||
" FOO",
|
||||
&[0, 1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||
),
|
||||
],
|
||||
);
|
||||
assert_matches(
|
||||
&[Substring, Postfix],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[
|
||||
(
|
||||
"foo bar baz",
|
||||
"baz",
|
||||
&[8, 9, 10],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
"foo bar baz ",
|
||||
"baz",
|
||||
&[8, 9, 10],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
"foo bar baz ",
|
||||
"baz ",
|
||||
&[8, 9, 10, 11],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||
),
|
||||
],
|
||||
);
|
||||
assert_matches(
|
||||
&[Substring, Prefix, Postfix, Exact, FuzzyGreedy, FuzzyOptimal],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[
|
||||
(
|
||||
"foo",
|
||||
"foo",
|
||||
&[0, 1, 2],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
" foo",
|
||||
"foo",
|
||||
&[1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
" foo",
|
||||
" foo",
|
||||
&[0, 1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||
),
|
||||
],
|
||||
);
|
||||
assert_matches(
|
||||
&[Substring],
|
||||
false,
|
||||
@ -236,18 +346,6 @@ fn test_substring() {
|
||||
&[2, 3, 4],
|
||||
BONUS_CAMEL123 + BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"foo bar baz",
|
||||
"foo",
|
||||
&[0, 1, 2],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
"foo bar baz",
|
||||
"FOO",
|
||||
&[0, 1, 2],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||
),
|
||||
(
|
||||
"/AutomatorDocument.icns",
|
||||
"rdoc",
|
||||
|
@ -52,14 +52,14 @@ impl<'a> Utf32Str<'a> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
pub fn len(self) -> usize {
|
||||
match self {
|
||||
Utf32Str::Unicode(codepoints) => codepoints.len(),
|
||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
||||
}
|
||||
}
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
pub fn is_empty(self) -> bool {
|
||||
match self {
|
||||
Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
|
||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
|
||||
@ -67,15 +67,15 @@ impl<'a> Utf32Str<'a> {
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
|
||||
pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
|
||||
let start = match range.start_bound() {
|
||||
Bound::Included(&start) => start,
|
||||
Bound::Excluded(&start) => start + 1,
|
||||
Bound::Unbounded => 0,
|
||||
};
|
||||
let end = match range.end_bound() {
|
||||
Bound::Included(&end) => end,
|
||||
Bound::Excluded(&end) => end + 1,
|
||||
Bound::Included(&end) => end + 1,
|
||||
Bound::Excluded(&end) => end,
|
||||
Bound::Unbounded => self.len(),
|
||||
};
|
||||
match self {
|
||||
@ -84,18 +84,50 @@ impl<'a> Utf32Str<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of leading whitespaces in this string
|
||||
#[inline]
|
||||
pub fn leading_white_space(self) -> usize {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => bytes
|
||||
.iter()
|
||||
.position(|b| !b.is_ascii_whitespace())
|
||||
.unwrap_or(0),
|
||||
Utf32Str::Unicode(codepoints) => codepoints
|
||||
.iter()
|
||||
.position(|c| !c.is_whitespace())
|
||||
.unwrap_or(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of leading whitespaces in this string
|
||||
#[inline]
|
||||
pub fn trailing_white_space(self) -> usize {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => bytes
|
||||
.iter()
|
||||
.rev()
|
||||
.position(|b| !b.is_ascii_whitespace())
|
||||
.unwrap_or(0),
|
||||
Utf32Str::Unicode(codepoints) => codepoints
|
||||
.iter()
|
||||
.rev()
|
||||
.position(|c| !c.is_whitespace())
|
||||
.unwrap_or(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Same as `slice` but accepts a u32 range for convenience since
|
||||
/// those are the indices returned by the matcher
|
||||
#[inline]
|
||||
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
||||
pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
|
||||
let start = match range.start_bound() {
|
||||
Bound::Included(&start) => start as usize,
|
||||
Bound::Excluded(&start) => start as usize + 1,
|
||||
Bound::Unbounded => 0,
|
||||
};
|
||||
let end = match range.end_bound() {
|
||||
Bound::Included(&end) => end as usize,
|
||||
Bound::Excluded(&end) => end as usize + 1,
|
||||
Bound::Included(&end) => end as usize + 1,
|
||||
Bound::Excluded(&end) => end as usize,
|
||||
Bound::Unbounded => self.len(),
|
||||
};
|
||||
match self {
|
||||
@ -103,23 +135,30 @@ impl<'a> Utf32Str<'a> {
|
||||
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
|
||||
}
|
||||
}
|
||||
pub fn is_ascii(&self) -> bool {
|
||||
pub fn is_ascii(self) -> bool {
|
||||
matches!(self, Utf32Str::Ascii(_))
|
||||
}
|
||||
|
||||
pub fn get(&self, idx: u32) -> char {
|
||||
pub fn get(self, idx: u32) -> char {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => bytes[idx as usize] as char,
|
||||
Utf32Str::Unicode(codepoints) => codepoints[idx as usize],
|
||||
}
|
||||
}
|
||||
pub fn last(&self) -> char {
|
||||
pub fn last(self) -> char {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
||||
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||
}
|
||||
}
|
||||
pub fn chars(&self) -> Chars<'_> {
|
||||
pub fn first(self) -> char {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => bytes[0] as char,
|
||||
Utf32Str::Unicode(codepoints) => codepoints[0],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn chars(self) -> Chars<'a> {
|
||||
match self {
|
||||
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
|
||||
Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
||||
@ -161,3 +200,12 @@ impl<'a> Iterator for Chars<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for Chars<'_> {
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
match self {
|
||||
Chars::Ascii(iter) => iter.next_back().map(|&c| c as char),
|
||||
Chars::Unicode(iter) => iter.next_back().copied(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -46,8 +46,8 @@ impl Utf32String {
|
||||
Bound::Unbounded => 0,
|
||||
};
|
||||
let end = match range.end_bound() {
|
||||
Bound::Included(&end) => end as usize,
|
||||
Bound::Excluded(&end) => end as usize + 1,
|
||||
Bound::Included(&end) => end as usize + 1,
|
||||
Bound::Excluded(&end) => end as usize,
|
||||
Bound::Unbounded => self.len(),
|
||||
};
|
||||
match self {
|
||||
|
Loading…
x
Reference in New Issue
Block a user