mirror of
https://github.com/solaeus/nucleo.git
synced 2024-11-12 18:17:11 +00:00
fix substring matching Unicode haystacks (#34)
This commit is contained in:
parent
1edf451192
commit
34553f009d
@ -8,6 +8,7 @@
|
||||
|
||||
## Bugfixes
|
||||
|
||||
* avoid incorrect matches when searching for ASCII needles in a Unicode haystack
|
||||
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
|
||||
* when the needle is composed of a single char, return the score and index
|
||||
of the best position instead of always returning the first matched character
|
||||
|
@ -258,6 +258,9 @@ impl Matcher {
|
||||
}
|
||||
}
|
||||
}
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let score = self.calculate_score::<INDICES, _, _>(
|
||||
haystack,
|
||||
|
@ -496,15 +496,8 @@ impl Matcher {
|
||||
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -490,7 +490,13 @@ fn test_unicode() {
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||
),
|
||||
],
|
||||
)
|
||||
);
|
||||
assert_not_matches(
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[("Flibbertigibbet / イタズラっ子たち", "lying")],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Loading…
Reference in New Issue
Block a user