fix substring matching Unicode haystacks (#34)

This commit is contained in:
Pascal Kuthe 2023-12-22 19:27:20 +01:00
parent 1edf451192
commit 34553f009d
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
4 changed files with 13 additions and 10 deletions

View File

@ -8,6 +8,7 @@
## Bugfixes
* avoid incorrect matches when searching for ASCII needles in a Unicode haystack
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
* when the needle is composed of a single char, return the score and index
of the best position instead of always returning the first matched character

View File

@ -258,6 +258,9 @@ impl Matcher {
}
}
}
if max_score == 0 {
return None;
}
let score = self.calculate_score::<INDICES, _, _>(
haystack,

View File

@ -496,15 +496,8 @@ impl Matcher {
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
end,
indices,
)
let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
}
}
}

View File

@ -490,7 +490,13 @@ fn test_unicode() {
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
),
],
)
);
assert_not_matches(
false,
false,
false,
&[("Flibbertigibbet / イタズラっ子たち", "lying")],
);
}
#[test]