From 34553f009d7d0b0d29195aefc9e93f3138513918 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Fri, 22 Dec 2023 19:27:20 +0100 Subject: [PATCH] fix substring matching Unicode haystacks (#34) --- CHANGELOG.md | 1 + matcher/src/exact.rs | 3 +++ matcher/src/lib.rs | 11 ++--------- matcher/src/tests.rs | 8 +++++++- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75b9725..ee231df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Bugfixes +* avoid incorrect matches when searching for ASCII needles in a Unicode haystack * correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts * when the needle is composed of a single char, return the score and index of the best position instead of always returning the first matched character diff --git a/matcher/src/exact.rs b/matcher/src/exact.rs index 4c7d5dd..9ebf31c 100644 --- a/matcher/src/exact.rs +++ b/matcher/src/exact.rs @@ -258,6 +258,9 @@ impl Matcher { } } } + if max_score == 0 { + return None; + } let score = self.calculate_score::( haystack, diff --git a/matcher/src/lib.rs b/matcher/src/lib.rs index 2cd4b67..e175dd6 100644 --- a/matcher/src/lib.rs +++ b/matcher/src/lib.rs @@ -496,15 +496,8 @@ impl Matcher { .substring_match_1_non_ascii::(haystack, needle, start, indices); return Some(res); } - let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?; - self.fuzzy_match_optimal::( - haystack, - needle, - start, - start + 1, - end, - indices, - ) + let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?; + self.substring_match_non_ascii::(haystack, needle, start, indices) } } } diff --git a/matcher/src/tests.rs b/matcher/src/tests.rs index 64fcc3c..f024023 100644 --- a/matcher/src/tests.rs +++ b/matcher/src/tests.rs @@ -490,7 +490,13 @@ fn test_unicode() { BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, ), ], - ) + ); + assert_not_matches( + false, + false, + false, + &[("Flibbertigibbet / イタズラっ子たち", "lying")], + ); } #[test]