fix substring matching Unicode haystacks (#34)

This commit is contained in:
Pascal Kuthe 2023-12-22 19:27:20 +01:00
parent 1edf451192
commit 34553f009d
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
4 changed files with 13 additions and 10 deletions

View File

@ -8,6 +8,7 @@
## Bugfixes ## Bugfixes
* avoid incorrect matches when searching for ASCII needles in a Unicode haystack
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts * correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
* when the needle is composed of a single char, return the score and index * when the needle is composed of a single char, return the score and index
of the best position instead of always returning the first matched character of the best position instead of always returning the first matched character

View File

@ -258,6 +258,9 @@ impl Matcher {
} }
} }
} }
if max_score == 0 {
return None;
}
let score = self.calculate_score::<INDICES, _, _>( let score = self.calculate_score::<INDICES, _, _>(
haystack, haystack,

View File

@ -496,15 +496,8 @@ impl Matcher {
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices); .substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
return Some(res); return Some(res);
} }
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?; let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, char>( self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
haystack,
needle,
start,
start + 1,
end,
indices,
)
} }
} }
} }

View File

@ -490,7 +490,13 @@ fn test_unicode() {
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START, BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
), ),
], ],
) );
assert_not_matches(
false,
false,
false,
&[("Flibbertigibbet / イタズラっ子たち", "lying")],
);
} }
#[test] #[test]