mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
fix substring matching Unicode haystacks (#34)
This commit is contained in:
parent
1edf451192
commit
34553f009d
@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
## Bugfixes
|
## Bugfixes
|
||||||
|
|
||||||
|
* avoid incorrect matches when searching for ASCII needles in a Unicode haystack
|
||||||
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
|
* correctly handle Unicode normalization when there are normalizable characters in the pattern, for example characters with umlauts
|
||||||
* when the needle is composed of a single char, return the score and index
|
* when the needle is composed of a single char, return the score and index
|
||||||
of the best position instead of always returning the first matched character
|
of the best position instead of always returning the first matched character
|
||||||
|
@ -258,6 +258,9 @@ impl Matcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if max_score == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let score = self.calculate_score::<INDICES, _, _>(
|
let score = self.calculate_score::<INDICES, _, _>(
|
||||||
haystack,
|
haystack,
|
||||||
|
@ -496,15 +496,8 @@ impl Matcher {
|
|||||||
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
|
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
|
||||||
return Some(res);
|
return Some(res);
|
||||||
}
|
}
|
||||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
self.substring_match_non_ascii::<INDICES, _>(haystack, needle, start, indices)
|
||||||
haystack,
|
|
||||||
needle,
|
|
||||||
start,
|
|
||||||
start + 1,
|
|
||||||
end,
|
|
||||||
indices,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -490,7 +490,13 @@ fn test_unicode() {
|
|||||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
);
|
||||||
|
assert_not_matches(
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
&[("Flibbertigibbet / イタズラっ子たち", "lying")],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
Reference in New Issue
Block a user