2023-07-20 00:09:51 +00:00
|
|
|
use ::memchr::{memchr, memchr2, memrchr, memrchr2};
|
|
|
|
|
|
|
|
use crate::chars::Char;
|
|
|
|
use crate::utf32_str::Utf32Str;
|
|
|
|
use crate::Matcher;
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
|
2023-07-22 01:37:15 +00:00
|
|
|
if c >= b'a' && c <= b'z' {
|
2023-07-20 00:09:51 +00:00
|
|
|
memchr2(c, c - 32, haystack)
|
|
|
|
} else {
|
|
|
|
memchr(c, haystack)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
|
2023-07-22 01:37:15 +00:00
|
|
|
if c >= b'a' && c <= b'z' {
|
2023-07-20 00:09:51 +00:00
|
|
|
memrchr2(c, c - 32, haystack)
|
|
|
|
} else {
|
|
|
|
memrchr(c, haystack)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Matcher {
|
|
|
|
pub(crate) fn prefilter_ascii(
|
|
|
|
&self,
|
|
|
|
mut haystack: &[u8],
|
|
|
|
needle: &[u8],
|
2023-07-20 14:33:14 +00:00
|
|
|
only_greedy: bool,
|
2023-07-20 00:09:51 +00:00
|
|
|
) -> Option<(usize, usize, usize)> {
|
|
|
|
if self.config.ignore_case {
|
|
|
|
let start = find_ascii_ignore_case(needle[0], haystack)?;
|
2023-07-20 14:33:14 +00:00
|
|
|
let mut greedy_end = start + 1;
|
|
|
|
haystack = &haystack[greedy_end..];
|
2023-07-20 00:09:51 +00:00
|
|
|
for &c in &needle[1..] {
|
|
|
|
let idx = find_ascii_ignore_case(c, haystack)? + 1;
|
2023-07-20 14:33:14 +00:00
|
|
|
greedy_end += idx;
|
2023-07-20 00:09:51 +00:00
|
|
|
haystack = &haystack[idx..];
|
|
|
|
}
|
2023-07-20 14:33:14 +00:00
|
|
|
if only_greedy {
|
|
|
|
Some((start, greedy_end, greedy_end))
|
|
|
|
} else {
|
|
|
|
let end = greedy_end
|
|
|
|
+ find_ascii_ignore_case_rev(*needle.last().unwrap(), haystack)
|
|
|
|
.map_or(0, |i| i + 1);
|
|
|
|
Some((start, greedy_end, end))
|
|
|
|
}
|
2023-07-20 00:09:51 +00:00
|
|
|
} else {
|
|
|
|
let start = memchr(needle[0], haystack)?;
|
2023-07-20 14:33:14 +00:00
|
|
|
let mut greedy_end = start + 1;
|
|
|
|
haystack = &haystack[greedy_end..];
|
2023-07-20 00:09:51 +00:00
|
|
|
for &c in &needle[1..] {
|
|
|
|
let idx = memchr(c, haystack)? + 1;
|
2023-07-20 14:33:14 +00:00
|
|
|
greedy_end += idx;
|
2023-07-20 00:09:51 +00:00
|
|
|
haystack = &haystack[idx..];
|
|
|
|
}
|
2023-07-20 14:33:14 +00:00
|
|
|
if only_greedy {
|
|
|
|
Some((start, greedy_end, greedy_end))
|
|
|
|
} else {
|
|
|
|
let end =
|
|
|
|
greedy_end + memrchr(*needle.last().unwrap(), haystack).map_or(0, |i| i + 1);
|
|
|
|
Some((start, greedy_end, end))
|
|
|
|
}
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn prefilter_non_ascii(
|
|
|
|
&self,
|
|
|
|
haystack: &[char],
|
|
|
|
needle: Utf32Str<'_>,
|
2023-07-20 14:33:14 +00:00
|
|
|
only_greedy: bool,
|
2023-07-20 00:09:51 +00:00
|
|
|
) -> Option<(usize, usize)> {
|
|
|
|
let needle_char = needle.get(0);
|
|
|
|
let start = haystack
|
|
|
|
.iter()
|
|
|
|
.position(|c| c.normalize(&self.config) == needle_char)?;
|
|
|
|
let needle_char = needle.last();
|
2023-07-20 14:33:14 +00:00
|
|
|
if only_greedy {
|
|
|
|
Some((start, start + 1))
|
|
|
|
} else {
|
2023-07-20 22:16:15 +00:00
|
|
|
let end = haystack.len()
|
2023-07-20 14:33:14 +00:00
|
|
|
- haystack[start..]
|
|
|
|
.iter()
|
|
|
|
.rev()
|
|
|
|
.position(|c| c.normalize(&self.config) == needle_char)?;
|
2023-07-22 01:37:15 +00:00
|
|
|
// matches are never possible in this case
|
|
|
|
if end - start < needle.len() {
|
|
|
|
cov_mark::hit!(small_haystack);
|
|
|
|
return None;
|
|
|
|
}
|
2023-07-20 00:09:51 +00:00
|
|
|
|
2023-07-20 14:33:14 +00:00
|
|
|
Some((start, end))
|
|
|
|
}
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
}
|