mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
correctly handle empty strings and strip leading and trailing whitespace
This commit is contained in:
parent
6b08991fac
commit
4fc0281dd7
@ -129,9 +129,12 @@ impl Matcher {
|
|||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack_.len() || needle_.is_empty() {
|
if needle_.len() > haystack_.len() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
if needle_.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
if needle_.len() == haystack_.len() {
|
if needle_.len() == haystack_.len() {
|
||||||
return self.exact_match_impl::<INDICES>(
|
return self.exact_match_impl::<INDICES>(
|
||||||
haystack_,
|
haystack_,
|
||||||
@ -262,9 +265,12 @@ impl Matcher {
|
|||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
if needle_.len() > haystack.len() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
if needle_.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
if needle_.len() == haystack.len() {
|
if needle_.len() == haystack.len() {
|
||||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||||
}
|
}
|
||||||
@ -358,9 +364,12 @@ impl Matcher {
|
|||||||
needle_: Utf32Str<'_>,
|
needle_: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
if needle_.len() > haystack.len() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
if needle_.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
if needle_.len() == haystack.len() {
|
if needle_.len() == haystack.len() {
|
||||||
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
return self.exact_match_impl::<INDICES>(haystack, needle_, 0, haystack.len(), indices);
|
||||||
}
|
}
|
||||||
@ -425,7 +434,28 @@ impl Matcher {
|
|||||||
///
|
///
|
||||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||||
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||||
self.exact_match_impl::<false>(haystack, needle, 0, haystack.len(), &mut Vec::new())
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut leading_space = 0;
|
||||||
|
let mut trailing_space = 0;
|
||||||
|
if !needle.first().is_whitespace() {
|
||||||
|
leading_space = haystack.leading_white_space()
|
||||||
|
}
|
||||||
|
if !needle.last().is_whitespace() {
|
||||||
|
trailing_space = haystack.trailing_white_space()
|
||||||
|
}
|
||||||
|
// avoid wraparound in size check
|
||||||
|
if trailing_space == haystack.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.exact_match_impl::<false>(
|
||||||
|
haystack,
|
||||||
|
needle,
|
||||||
|
leading_space,
|
||||||
|
haystack.len() - trailing_space,
|
||||||
|
&mut Vec::new(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
||||||
@ -439,7 +469,28 @@ impl Matcher {
|
|||||||
needle: Utf32Str<'_>,
|
needle: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
self.exact_match_impl::<true>(haystack, needle, 0, haystack.len(), indices)
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut leading_space = 0;
|
||||||
|
let mut trailing_space = 0;
|
||||||
|
if !needle.first().is_whitespace() {
|
||||||
|
leading_space = haystack.leading_white_space()
|
||||||
|
}
|
||||||
|
if !needle.last().is_whitespace() {
|
||||||
|
trailing_space = haystack.trailing_white_space()
|
||||||
|
}
|
||||||
|
// avoid wraparound in size check
|
||||||
|
if trailing_space == haystack.len() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
self.exact_match_impl::<true>(
|
||||||
|
haystack,
|
||||||
|
needle,
|
||||||
|
leading_space,
|
||||||
|
haystack.len() - trailing_space,
|
||||||
|
indices,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks whether needle is a prefix of the haystack.
|
/// Checks whether needle is a prefix of the haystack.
|
||||||
@ -448,10 +499,23 @@ impl Matcher {
|
|||||||
///
|
///
|
||||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||||
pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||||
if haystack.len() < needle.len() {
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut leading_space = 0;
|
||||||
|
if !needle.first().is_whitespace() {
|
||||||
|
leading_space = haystack.leading_white_space()
|
||||||
|
}
|
||||||
|
if haystack.len() - leading_space < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<false>(haystack, needle, 0, needle.len(), &mut Vec::new())
|
self.exact_match_impl::<false>(
|
||||||
|
haystack,
|
||||||
|
needle,
|
||||||
|
leading_space,
|
||||||
|
needle.len() + leading_space,
|
||||||
|
&mut Vec::new(),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -466,10 +530,23 @@ impl Matcher {
|
|||||||
needle: Utf32Str<'_>,
|
needle: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if haystack.len() < needle.len() {
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut leading_space = 0;
|
||||||
|
if !needle.first().is_whitespace() {
|
||||||
|
leading_space = haystack.leading_white_space()
|
||||||
|
}
|
||||||
|
if haystack.len() - leading_space < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<true>(haystack, needle, 0, needle.len(), indices)
|
self.exact_match_impl::<true>(
|
||||||
|
haystack,
|
||||||
|
needle,
|
||||||
|
leading_space,
|
||||||
|
needle.len() + leading_space,
|
||||||
|
indices,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -479,14 +556,21 @@ impl Matcher {
|
|||||||
///
|
///
|
||||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||||
pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||||
if haystack.len() < needle.len() {
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut trailing_spaces = 0;
|
||||||
|
if !needle.last().is_whitespace() {
|
||||||
|
trailing_spaces = haystack.trailing_white_space()
|
||||||
|
}
|
||||||
|
if haystack.len() - trailing_spaces < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<false>(
|
self.exact_match_impl::<false>(
|
||||||
haystack,
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
haystack.len() - needle.len(),
|
haystack.len() - needle.len() - trailing_spaces,
|
||||||
haystack.len(),
|
haystack.len() - trailing_spaces,
|
||||||
&mut Vec::new(),
|
&mut Vec::new(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -503,14 +587,21 @@ impl Matcher {
|
|||||||
needle: Utf32Str<'_>,
|
needle: Utf32Str<'_>,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if haystack.len() < needle.len() {
|
if needle.is_empty() {
|
||||||
|
return Some(0);
|
||||||
|
}
|
||||||
|
let mut trailing_spaces = 0;
|
||||||
|
if !needle.last().is_whitespace() {
|
||||||
|
trailing_spaces = haystack.trailing_white_space()
|
||||||
|
}
|
||||||
|
if haystack.len() - trailing_spaces < needle.len() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.exact_match_impl::<true>(
|
self.exact_match_impl::<true>(
|
||||||
haystack,
|
haystack,
|
||||||
needle,
|
needle,
|
||||||
haystack.len() - needle.len(),
|
haystack.len() - needle.len() - trailing_spaces,
|
||||||
haystack.len(),
|
haystack.len() - trailing_spaces,
|
||||||
indices,
|
indices,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -524,7 +615,7 @@ impl Matcher {
|
|||||||
end: usize,
|
end: usize,
|
||||||
indices: &mut Vec<u32>,
|
indices: &mut Vec<u32>,
|
||||||
) -> Option<u16> {
|
) -> Option<u16> {
|
||||||
if needle_.len() != end - start || needle_.is_empty() {
|
if needle_.len() != end - start {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
assert!(
|
assert!(
|
||||||
|
@ -13,6 +13,9 @@ enum Algorithm {
|
|||||||
FuzzyOptimal,
|
FuzzyOptimal,
|
||||||
FuzzyGreedy,
|
FuzzyGreedy,
|
||||||
Substring,
|
Substring,
|
||||||
|
Prefix,
|
||||||
|
Postfix,
|
||||||
|
Exact,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_matches(
|
fn assert_matches(
|
||||||
@ -50,6 +53,9 @@ fn assert_matches(
|
|||||||
FuzzyOptimal => matcher.fuzzy_indices(haystack, needle, &mut matched_indices),
|
FuzzyOptimal => matcher.fuzzy_indices(haystack, needle, &mut matched_indices),
|
||||||
FuzzyGreedy => matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices),
|
FuzzyGreedy => matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices),
|
||||||
Substring => matcher.substring_indices(haystack, needle, &mut matched_indices),
|
Substring => matcher.substring_indices(haystack, needle, &mut matched_indices),
|
||||||
|
Prefix => matcher.prefix_indices(haystack, needle, &mut matched_indices),
|
||||||
|
Postfix => matcher.postfix_indices(haystack, needle, &mut matched_indices),
|
||||||
|
Exact => matcher.exact_indices(haystack, needle, &mut matched_indices),
|
||||||
};
|
};
|
||||||
println!("{matched_indices:?}");
|
println!("{matched_indices:?}");
|
||||||
let match_chars: Vec<_> = matched_indices
|
let match_chars: Vec<_> = matched_indices
|
||||||
@ -107,7 +113,22 @@ pub fn assert_not_matches(
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
res, None,
|
res, None,
|
||||||
"{needle:?} should not match {haystack:?} (greedy)"
|
"{needle:?} should not match {haystack:?} (greedy)"
|
||||||
)
|
);
|
||||||
|
let res = matcher.substring_match(haystack, needle);
|
||||||
|
assert_eq!(
|
||||||
|
res, None,
|
||||||
|
"{needle:?} should not match {haystack:?} (substring)"
|
||||||
|
);
|
||||||
|
let res = matcher.prefix_match(haystack, needle);
|
||||||
|
assert_eq!(
|
||||||
|
res, None,
|
||||||
|
"{needle:?} should not match {haystack:?} (prefix)"
|
||||||
|
);
|
||||||
|
let res = matcher.postfix_match(haystack, needle);
|
||||||
|
assert_eq!(
|
||||||
|
res, None,
|
||||||
|
"{needle:?} should not match {haystack:?} (postfix)"
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -222,8 +243,97 @@ fn test_fuzzy() {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty_needle() {
|
||||||
|
assert_matches(
|
||||||
|
&[Substring, Prefix, Postfix, FuzzyGreedy, FuzzyOptimal, Exact],
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[("foo bar baz", "", &[], 0)],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_substring() {
|
fn test_substring() {
|
||||||
|
assert_matches(
|
||||||
|
&[Substring, Prefix],
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"foo bar baz",
|
||||||
|
"foo",
|
||||||
|
&[0, 1, 2],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
" foo bar baz",
|
||||||
|
"FOO",
|
||||||
|
&[1, 2, 3],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
" foo bar baz",
|
||||||
|
" FOO",
|
||||||
|
&[0, 1, 2, 3],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_matches(
|
||||||
|
&[Substring, Postfix],
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"foo bar baz",
|
||||||
|
"baz",
|
||||||
|
&[8, 9, 10],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"foo bar baz ",
|
||||||
|
"baz",
|
||||||
|
&[8, 9, 10],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"foo bar baz ",
|
||||||
|
"baz ",
|
||||||
|
&[8, 9, 10, 11],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_matches(
|
||||||
|
&[Substring, Prefix, Postfix, Exact, FuzzyGreedy, FuzzyOptimal],
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[
|
||||||
|
(
|
||||||
|
"foo",
|
||||||
|
"foo",
|
||||||
|
&[0, 1, 2],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
" foo",
|
||||||
|
"foo",
|
||||||
|
&[1, 2, 3],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
" foo",
|
||||||
|
" foo",
|
||||||
|
&[0, 1, 2, 3],
|
||||||
|
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 3),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
);
|
||||||
assert_matches(
|
assert_matches(
|
||||||
&[Substring],
|
&[Substring],
|
||||||
false,
|
false,
|
||||||
@ -236,18 +346,6 @@ fn test_substring() {
|
|||||||
&[2, 3, 4],
|
&[2, 3, 4],
|
||||||
BONUS_CAMEL123 + BONUS_CONSECUTIVE,
|
BONUS_CAMEL123 + BONUS_CONSECUTIVE,
|
||||||
),
|
),
|
||||||
(
|
|
||||||
"foo bar baz",
|
|
||||||
"foo",
|
|
||||||
&[0, 1, 2],
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"foo bar baz",
|
|
||||||
"FOO",
|
|
||||||
&[0, 1, 2],
|
|
||||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 2),
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"/AutomatorDocument.icns",
|
"/AutomatorDocument.icns",
|
||||||
"rdoc",
|
"rdoc",
|
||||||
|
@ -52,14 +52,14 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Unicode(codepoints) => codepoints.len(),
|
Utf32Str::Unicode(codepoints) => codepoints.len(),
|
||||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
|
Utf32Str::Unicode(codepoints) => codepoints.is_empty(),
|
||||||
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
|
Utf32Str::Ascii(ascii_bytes) => ascii_bytes.is_empty(),
|
||||||
@ -67,15 +67,15 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
|
pub fn slice(self, range: impl RangeBounds<usize>) -> Utf32Str<'a> {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
Bound::Included(&start) => start,
|
Bound::Included(&start) => start,
|
||||||
Bound::Excluded(&start) => start + 1,
|
Bound::Excluded(&start) => start + 1,
|
||||||
Bound::Unbounded => 0,
|
Bound::Unbounded => 0,
|
||||||
};
|
};
|
||||||
let end = match range.end_bound() {
|
let end = match range.end_bound() {
|
||||||
Bound::Included(&end) => end,
|
Bound::Included(&end) => end + 1,
|
||||||
Bound::Excluded(&end) => end + 1,
|
Bound::Excluded(&end) => end,
|
||||||
Bound::Unbounded => self.len(),
|
Bound::Unbounded => self.len(),
|
||||||
};
|
};
|
||||||
match self {
|
match self {
|
||||||
@ -84,18 +84,50 @@ impl<'a> Utf32Str<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the number of leading whitespaces in this string
|
||||||
|
#[inline]
|
||||||
|
pub fn leading_white_space(self) -> usize {
|
||||||
|
match self {
|
||||||
|
Utf32Str::Ascii(bytes) => bytes
|
||||||
|
.iter()
|
||||||
|
.position(|b| !b.is_ascii_whitespace())
|
||||||
|
.unwrap_or(0),
|
||||||
|
Utf32Str::Unicode(codepoints) => codepoints
|
||||||
|
.iter()
|
||||||
|
.position(|c| !c.is_whitespace())
|
||||||
|
.unwrap_or(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the number of leading whitespaces in this string
|
||||||
|
#[inline]
|
||||||
|
pub fn trailing_white_space(self) -> usize {
|
||||||
|
match self {
|
||||||
|
Utf32Str::Ascii(bytes) => bytes
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.position(|b| !b.is_ascii_whitespace())
|
||||||
|
.unwrap_or(0),
|
||||||
|
Utf32Str::Unicode(codepoints) => codepoints
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.position(|c| !c.is_whitespace())
|
||||||
|
.unwrap_or(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Same as `slice` but accepts a u32 range for convenience since
|
/// Same as `slice` but accepts a u32 range for convenience since
|
||||||
/// those are the indices returned by the matcher
|
/// those are the indices returned by the matcher
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
|
pub fn slice_u32(self, range: impl RangeBounds<u32>) -> Utf32Str<'a> {
|
||||||
let start = match range.start_bound() {
|
let start = match range.start_bound() {
|
||||||
Bound::Included(&start) => start as usize,
|
Bound::Included(&start) => start as usize,
|
||||||
Bound::Excluded(&start) => start as usize + 1,
|
Bound::Excluded(&start) => start as usize + 1,
|
||||||
Bound::Unbounded => 0,
|
Bound::Unbounded => 0,
|
||||||
};
|
};
|
||||||
let end = match range.end_bound() {
|
let end = match range.end_bound() {
|
||||||
Bound::Included(&end) => end as usize,
|
Bound::Included(&end) => end as usize + 1,
|
||||||
Bound::Excluded(&end) => end as usize + 1,
|
Bound::Excluded(&end) => end as usize,
|
||||||
Bound::Unbounded => self.len(),
|
Bound::Unbounded => self.len(),
|
||||||
};
|
};
|
||||||
match self {
|
match self {
|
||||||
@ -103,23 +135,30 @@ impl<'a> Utf32Str<'a> {
|
|||||||
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
|
Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn is_ascii(&self) -> bool {
|
pub fn is_ascii(self) -> bool {
|
||||||
matches!(self, Utf32Str::Ascii(_))
|
matches!(self, Utf32Str::Ascii(_))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get(&self, idx: u32) -> char {
|
pub fn get(self, idx: u32) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[idx as usize] as char,
|
Utf32Str::Ascii(bytes) => bytes[idx as usize] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[idx as usize],
|
Utf32Str::Unicode(codepoints) => codepoints[idx as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn last(&self) -> char {
|
pub fn last(self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
Utf32Str::Ascii(bytes) => bytes[bytes.len() - 1] as char,
|
||||||
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
Utf32Str::Unicode(codepoints) => codepoints[codepoints.len() - 1],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn chars(&self) -> Chars<'_> {
|
pub fn first(self) -> char {
|
||||||
|
match self {
|
||||||
|
Utf32Str::Ascii(bytes) => bytes[0] as char,
|
||||||
|
Utf32Str::Unicode(codepoints) => codepoints[0],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn chars(self) -> Chars<'a> {
|
||||||
match self {
|
match self {
|
||||||
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
|
Utf32Str::Ascii(bytes) => Chars::Ascii(bytes.iter()),
|
||||||
Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
Utf32Str::Unicode(codepoints) => Chars::Unicode(codepoints.iter()),
|
||||||
@ -161,3 +200,12 @@ impl<'a> Iterator for Chars<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DoubleEndedIterator for Chars<'_> {
|
||||||
|
fn next_back(&mut self) -> Option<Self::Item> {
|
||||||
|
match self {
|
||||||
|
Chars::Ascii(iter) => iter.next_back().map(|&c| c as char),
|
||||||
|
Chars::Unicode(iter) => iter.next_back().copied(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -46,8 +46,8 @@ impl Utf32String {
|
|||||||
Bound::Unbounded => 0,
|
Bound::Unbounded => 0,
|
||||||
};
|
};
|
||||||
let end = match range.end_bound() {
|
let end = match range.end_bound() {
|
||||||
Bound::Included(&end) => end as usize,
|
Bound::Included(&end) => end as usize + 1,
|
||||||
Bound::Excluded(&end) => end as usize + 1,
|
Bound::Excluded(&end) => end as usize,
|
||||||
Bound::Unbounded => self.len(),
|
Bound::Unbounded => self.len(),
|
||||||
};
|
};
|
||||||
match self {
|
match self {
|
||||||
|
Loading…
Reference in New Issue
Block a user