better implementation

2024-12-22 09:57:49 +00:00 · 2023-07-20 02:09:51 +02:00 · 2023-07-20 02:09:51 +02:00 · e964d42849
commit e964d42849
parent 6837b4e2cb
13 changed files with 1467 additions and 714 deletions
--- a/src/chars.rs
+++ b/src/chars.rs
@ -0,0 +1,135 @@
 use crate::chars::case_fold::CASE_FOLDING_SIMPLE;
 use crate::MatcherConfig;
 //autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
 mod case_fold;
 mod normalize;
 pub trait Char: Copy + Eq + Ord + std::fmt::Debug {
    const ASCII: bool;
    fn char_class(self, config: &MatcherConfig) -> CharClass;
    fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass);
    fn normalize(self, config: &MatcherConfig) -> Self;
 }
 impl Char for u8 {
    const ASCII: bool = true;
    #[inline]
    fn char_class(self, config: &MatcherConfig) -> CharClass {
        let c = self;
        // using manual if conditions instead optimizes better
        if c >= b'a' && c <= b'z' {
            CharClass::Lower
        } else if c >= b'A' && c <= b'Z' {
            CharClass::Upper
        } else if c >= b'0' && c <= b'9' {
            CharClass::Number
        } else if c.is_ascii_whitespace() {
            CharClass::Whitespace
        } else if config.delimeter_chars.contains(&c) {
            CharClass::Delimiter
        } else {
            CharClass::NonWord
        }
    }
    #[inline(always)]
    fn char_class_and_normalize(self, config: &MatcherConfig) -> (Self, CharClass) {
        let char_class = self.char_class(config);
        let normalized = if config.ignore_case && char_class == CharClass::Upper {
            self + 32
        } else {
            self
        };
        (normalized, char_class)
    }
    #[inline(always)]
    fn normalize(self, config: &MatcherConfig) -> Self {
        if config.ignore_case && self >= b'A' && self <= b'Z' {
            self + 32
        } else {
            self
        }
    }
 }
 fn char_class_non_ascii(c: char) -> CharClass {
    if c.is_lowercase() {
        CharClass::Lower
    } else if c.is_uppercase() {
        CharClass::Upper
    } else if c.is_numeric() {
        CharClass::Number
    } else if c.is_alphabetic() {
        CharClass::Letter
    } else if c.is_whitespace() {
        CharClass::Whitespace
    } else {
        CharClass::NonWord
    }
 }
 impl Char for char {
    const ASCII: bool = false;
    #[inline(always)]
    fn char_class(self, config: &MatcherConfig) -> CharClass {
        if self.is_ascii() {
            return (self as u8).char_class(config);
        }
        char_class_non_ascii(self)
    }
    #[inline(always)]
    fn char_class_and_normalize(mut self, config: &MatcherConfig) -> (Self, CharClass) {
        if self.is_ascii() {
            let (c, class) = (self as u8).char_class_and_normalize(config);
            return (c as char, class);
        }
        let char_class = char_class_non_ascii(self);
        if char_class == CharClass::Upper {
            self = CASE_FOLDING_SIMPLE
                .binary_search_by_key(&self, |(upper, _)| *upper)
                .map_or(self, |idx| CASE_FOLDING_SIMPLE[idx].1)
        }
        if config.normalize {
            self = normalize::normalize(self);
        }
        (self, char_class)
    }
    #[inline(always)]
    fn normalize(mut self, config: &MatcherConfig) -> Self {
        if config.normalize {
            self = normalize::normalize(self);
        }
        to_lower_case(self)
    }
 }
 pub use normalize::normalize;
 #[inline(always)]
 pub fn to_lower_case(c: char) -> char {
    if c >= 'A' && c <= 'Z' {
        char::from_u32(c as u32 + 32).unwrap()
    } else if !c.is_ascii() {
        CASE_FOLDING_SIMPLE
            .binary_search_by_key(&c, |(upper, _)| *upper)
            .map_or(c, |idx| CASE_FOLDING_SIMPLE[idx].1)
    } else {
        c
    }
 }
 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
 #[non_exhaustive]
 pub enum CharClass {
    Whitespace,
    NonWord,
    Delimiter,
    Lower,
    Upper,
    Letter,
    Number,
 }
--- a/src/chars/case_fold.rs
+++ b/src/chars/case_fold.rs
--- a/src/chars/normalize.rs
+++ b/src/chars/normalize.rs
--- a/src/config.rs
+++ b/src/config.rs
@ -1,37 +1,7 @@
-pub(crate) const SCORE_MATCH: u16 = 16;
+use crate::chars::CharClass;
-pub(crate) const PENALTY_GAP_START: u16 = 3;
+use crate::score::BONUS_BOUNDARY;
 pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
 // We prefer matches at the beginning of a word, but the bonus should not be
 // too great to prevent the longer acronym matches from always winning over
 // shorter fuzzy matches. The bonus point here was specifically chosen that
 // the bonus is cancelled when the gap between the acronyms grows over
 // 8 characters, which is approximately the average length of the words found
 // in web2 dictionary and my file system.
 pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
 // Although bonus point for non-word characters is non-contextual, we need it
 // for computing bonus points for consecutive chunks starting with a non-word
 // character.
 pub(crate) const BONUS_NON_WORD: u16 = SCORE_MATCH / 2;
 // Edge-triggered bonus for matches in camelCase words.
 // Compared to word-boundary case, they don't accompany single-character gaps
 // (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
 pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_EXTENSION;
 // Minimum bonus point given to characters in consecutive chunks.
 // Note that bonus points for consecutive matches shouldn't have needed if we
 // used fixed match score as in the original algorithm.
 pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
 // The first character in the typed pattern usually has more significance
 // than the rest so it's important that it appears at special positions where
 // bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
 // The amount of the extra bonus should be limited so that the gap penalty is
 // still respected.
 pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
 #[non_exhaustive]
 pub struct MatcherConfig {
    pub delimeter_chars: &'static [u8],
    /// Extra bonus for word boundary after whitespace character or beginning of the string
@ -44,33 +14,17 @@ pub struct MatcherConfig {
    /// this significantly degrades performance so its not recommended
    /// to be truned on by default
    pub normalize: bool,
-    /// use faster/simpler algorithm at the cost of (potentially) much worse results
+    /// whether to ignore casing
-    /// For long inputs this algorith is always used as a fallbach to avoid
+    pub ignore_case: bool,
    /// blowups in time complexity
    pub use_v1: bool,
    /// The case matching to perform
    pub case_matching: CaseMatching,
 }
-#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
+// #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
-#[non_exhaustive]
+// #[non_exhaustive]
-pub enum CharClass {
+// pub enum CaseMatching {
-    Whitespace,
+//     Respect,
-    NonWord,
+//     Ignore,
-    Delimiter,
+//     Smart,
-    Lower,
+// }
    Upper,
    Letter,
    Number,
 }
 #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Copy, Clone, Hash)]
 #[non_exhaustive]
 pub enum CaseMatching {
    Respect,
    Ignore,
    Smart,
 }
 impl MatcherConfig {
    pub const DEFAULT: Self = {
@ -80,8 +34,7 @@ impl MatcherConfig {
            bonus_boundary_delimiter: BONUS_BOUNDARY + 1,
            inital_char_class: CharClass::Whitespace,
            normalize: false,
-            use_v1: false,
+            ignore_case: true,
            case_matching: CaseMatching::Smart,
        }
    };
 }
@ -107,69 +60,4 @@ impl MatcherConfig {
        self.inital_char_class = CharClass::Delimiter;
        self
    }
    fn char_class_non_ascii(c: char) -> CharClass {
        if c.is_lowercase() {
            CharClass::Lower
        } else if c.is_uppercase() {
            CharClass::Upper
        } else if c.is_numeric() {
            CharClass::Number
        } else if c.is_alphabetic() {
            CharClass::Letter
        } else if c.is_whitespace() {
            CharClass::Whitespace
        } else {
            CharClass::NonWord
        }
    }
    fn char_class_ascii(&self, c: char) -> CharClass {
        // using manual if conditions instead optimizes better
        if c >= 'a' && c <= 'z' {
            CharClass::Lower
        } else if c >= 'A' && c <= 'Z' {
            CharClass::Upper
        } else if c >= '0' && c <= '9' {
            CharClass::Number
        } else if c.is_ascii_whitespace() {
            CharClass::Whitespace
        } else if self.delimeter_chars.contains(&(c as u8)) {
            CharClass::Delimiter
        } else {
            CharClass::NonWord
        }
    }
    pub(crate) fn char_class(&self, c: char) -> CharClass {
        if c.is_ascii() {
            self.char_class_ascii(c)
        } else {
            Self::char_class_non_ascii(c)
        }
    }
    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
        if class > CharClass::NonWord {
            // transition from non word to word
            match prev_class {
                CharClass::Whitespace => return self.bonus_boundary_white,
                CharClass::Delimiter => return self.bonus_boundary_delimiter,
                CharClass::NonWord => return BONUS_BOUNDARY,
                _ => (),
            }
        }
        if prev_class == CharClass::Lower && class == CharClass::Upper
            || prev_class != CharClass::Number && class == CharClass::Number
        {
            // camelCase letter123
            BONUS_CAMEL123
        } else if class == CharClass::NonWord {
            BONUS_NON_WORD
        } else if class == CharClass::Whitespace {
            self.bonus_boundary_white
        } else {
            0
        }
    }
 }
--- a/src/fuzzy_greedy.rs
+++ b/src/fuzzy_greedy.rs
@ -0,0 +1,46 @@
 use crate::chars::Char;
 use crate::Matcher;
 impl Matcher {
    /// greedy fallback algoritm, much faster (linear time) but reported scores/indicies
    /// might not be the best match
    pub(crate) fn fuzzy_match_greedy<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
        &mut self,
        haystack: &[H],
        needle: &[N],
        mut start: usize,
        mut end: usize,
        indicies: &mut Vec<u32>,
    ) -> Option<u16> {
        let first_char_end = if H::ASCII { start + 1 } else { end };
        if !H::ASCII && needle.len() != 1 {
            let mut needle_iter = needle[1..].iter().copied();
            if let Some(mut needle_char) = needle_iter.next() {
                for (i, &c) in haystack[first_char_end..].iter().enumerate() {
                    if c.normalize(&self.config) == needle_char {
                        let Some(next_needle_char) = needle_iter.next() else {
                            end = i + 1;
                            break;
                        };
                        needle_char = next_needle_char;
                    }
                }
            }
        }
        // mimimize the greedly match by greedy matching in reverse
        let mut needle_iter = needle.iter().rev().copied();
        let mut needle_char = needle_iter.next().unwrap();
        for (i, &c) in haystack[start..end].iter().enumerate().rev() {
            println!("{c:?} {i} {needle_char:?}");
            if c == needle_char {
                let Some(next_needle_char) = needle_iter.next() else {
                    start += i;
                    break;
                };
                needle_char = next_needle_char;
            }
        }
        Some(self.calculate_score::<INDICIES, H, N>(haystack, needle, start, end, indicies))
    }
 }
--- a/src/fuzzy_optimal.rs
+++ b/src/fuzzy_optimal.rs
@ -0,0 +1,272 @@
 use std::cmp::max;
 use crate::chars::{Char, CharClass};
 use crate::matrix::{haystack, rows_mut, Matrix, MatrixCell, MatrixRow};
 use crate::score::{
    BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, PENALTY_GAP_EXTENSION,
    PENALTY_GAP_START, SCORE_MATCH,
 };
 use crate::{Matcher, MatcherConfig};
 impl Matcher {
    pub(crate) fn fuzzy_match_optimal<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
        &mut self,
        haystack: &[H],
        needle: &[N],
        start: usize,
        greedy_end: usize,
        end: usize,
        indicies: &mut Vec<u32>,
    ) -> Option<u16> {
        // construct a matrix (and copy the haystack), the matrix and haystack size are bounded
        // to avoid the slow O(mn) time complexity for large inputs. Furthermore, it allows
        // us to treat needle indecies as u16
        let Some(mut matrix) = self.slab.alloc(&haystack[start..end], needle.len()) else {
            return self.fuzzy_match_greedy::<INDICIES, H, N>(
                haystack,
                needle,
                start,
                greedy_end,
                indicies,
            );
        };
        let prev_class = start
            .checked_sub(1)
            .map(|i| haystack[i].char_class(&self.config))
            .unwrap_or(self.config.inital_char_class);
        let (max_score_pos, max_score, matched) = matrix.setup(needle, prev_class, &self.config);
        // this only happend with unicode haystacks, for ASCII the prefilter handles all rejects
        if !matched {
            return None;
        }
        if needle.len() == 1 {
            indicies.push(max_score_pos as u32);
            return Some(max_score);
        }
        debug_assert_eq!(
            matrix.row_offs[0], 0,
            "prefilter should have put us at the start of the match"
        );
        // populate the matrix and find the best score
        let (max_score, best_match_end) = matrix.populate_matrix(needle);
        if INDICIES {
            matrix.reconstruct_optimal_path(needle, start as u32, indicies, best_match_end);
        }
        println!("{indicies:?}");
        println!("{}", max_score);
        Some(max_score)
    }
 }
 impl<H: Char> Matrix<'_, H> {
    fn setup<N: Char>(
        &mut self,
        needle: &[N],
        mut prev_class: CharClass,
        config: &MatcherConfig,
    ) -> (u16, u16, bool)
    where
        H: PartialEq<N>,
    {
        let mut row_iter = needle.iter().copied().zip(self.row_offs.iter_mut());
        let (mut needle_char, mut row_start) = row_iter.next().unwrap();
        let col_iter = self
            .haystack
            .iter_mut()
            .zip(self.cells.iter_mut())
            .zip(self.bonus.iter_mut())
            .enumerate();
        let mut max_score = 0;
        let mut max_score_pos = 0;
        let mut in_gap = false;
        let mut prev_score = 0u16;
        let mut matched = false;
        let first_needle_char = needle[0];
        for (i, ((c, matrix_cell), bonus_)) in col_iter {
            let class = c.char_class(config);
            *c = c.normalize(config);
            let bonus = config.bonus_for(prev_class, class);
            // save bonus for later so we don't have to recompute it each time
            *bonus_ = bonus;
            prev_class = class;
            let i = i as u16;
            println!("{i} {needle_char:?} {c:?}");
            if *c == needle_char {
                // save the first idx of each char
                if let Some(next) = row_iter.next() {
                    *row_start = i;
                    (needle_char, row_start) = next;
                } else {
                    if !matched {
                        *row_start = i;
                    }
                    // we have atleast one match
                    matched = true;
                }
            }
            if *c == first_needle_char {
                let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
                println!("start match {score}");
                matrix_cell.consecutive_chars = 1;
                if needle.len() == 1 && score > max_score {
                    max_score = score;
                    max_score_pos = i;
                    // can't get better than this
                    if bonus >= BONUS_BOUNDARY {
                        break;
                    }
                }
                matrix_cell.score = score;
                in_gap = false;
            } else {
                let gap_penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                matrix_cell.score = prev_score.saturating_sub(gap_penalty);
                matrix_cell.consecutive_chars = 0;
                in_gap = true;
            }
            prev_score = matrix_cell.score;
        }
        (max_score_pos, max_score, matched)
    }
    fn populate_matrix<N: Char>(&mut self, needle: &[N]) -> (u16, u16)
    where
        H: PartialEq<N>,
    {
        let mut max_score = 0;
        let mut max_score_end = 0;
        let mut row_iter = needle
            .iter()
            .zip(rows_mut(self.row_offs, self.cells, self.haystack.len()))
            .enumerate();
        // skip the first row we already calculated the in `setup` initial scores
        let (_, mut prev_matrix_row) = row_iter.next().unwrap().1;
        for (i, (&needle_char, row)) in row_iter {
            let haystack = haystack(self.haystack, self.bonus, row.off);
            let mut in_gap = false;
            let mut prev_matrix_cell = MatrixCell {
                score: 0,
                consecutive_chars: 0,
            };
            // we are interested in the score of the previous character
            // in the previous row. This represents the previous char
            // for each possible pattern. This is equivalent to diagonal movement
            let diagonal_start = row.off - prev_matrix_row.off - 1;
            let diagonal = &mut prev_matrix_row.cells[diagonal_start as usize..];
            for (j, ((haystack_char, matrix_cell), &diag_matrix_cell)) in haystack
                .zip(row.cells.iter_mut())
                .zip(diagonal.iter())
                .enumerate()
            {
                let col = j + row.off as usize;
                let gap_penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                let mut score1 = 0;
                let score2 = prev_matrix_cell.score.saturating_sub(gap_penalty);
                let mut consecutive = 0;
                if haystack_char.char == needle_char {
                    score1 = diag_matrix_cell.score + SCORE_MATCH;
                    let mut bonus = haystack_char.bonus;
                    consecutive = diag_matrix_cell.consecutive_chars + 1;
                    if consecutive > 1 {
                        let first_bonus = self.bonus[col + 1 - consecutive as usize];
                        if bonus > first_bonus {
                            if bonus > BONUS_BOUNDARY {
                                consecutive = 1
                            } else {
                                bonus = max(bonus, BONUS_CONSECUTIVE)
                            }
                        } else {
                            bonus = max(first_bonus, BONUS_CONSECUTIVE)
                        }
                    }
                    if score1 + bonus < score2 {
                        score1 += haystack_char.bonus;
                        consecutive = 0;
                    } else {
                        score1 += bonus;
                    }
                }
                in_gap = score1 < score2;
                let score = max(score1, score2);
                println!("{score} {score1} {score2}");
                if i == needle.len() - 1 && score > max_score {
                    max_score = score;
                    max_score_end = col as u16;
                }
                matrix_cell.consecutive_chars = consecutive;
                matrix_cell.score = score;
                prev_matrix_cell = *matrix_cell;
            }
            prev_matrix_row = row;
        }
        (max_score, max_score_end)
    }
    fn reconstruct_optimal_path<N: Char>(
        &self,
        needle: &[N],
        start: u32,
        indicies: &mut Vec<u32>,
        best_match_end: u16,
    ) {
        indicies.resize(needle.len(), 0);
        let mut row_iter = self.rows_rev().zip(indicies.iter_mut()).peekable();
        let (mut row, mut matched_col_idx) = row_iter.next().unwrap();
        let mut next_row: Option<MatrixRow> = None;
        let mut col = best_match_end;
        let mut prefer_match = true;
        let haystack_len = self.haystack.len() as u16;
        loop {
            let score = row.cells[col as usize].score;
            let mut score1 = 0;
            let mut score2 = 0;
            if let Some(&(prev_row, _)) = row_iter.peek() {
                if col >= prev_row.off {
                    score1 = prev_row[col].score;
                }
            }
            if col > row.off {
                score2 = row[col - 1].score;
            }
            println!("{score} {score2} {score1} {prefer_match}");
            let mut new_prefer_match = row[col].consecutive_chars > 1;
            if !new_prefer_match && col + 1 < haystack_len {
                if let Some(next_row) = next_row {
                    new_prefer_match = next_row[col + 1].consecutive_chars > 0
                }
            }
            if score > score1 && (score > score2 || score == score2 && prefer_match) {
                *matched_col_idx = col as u32 + start;
                next_row = Some(row);
                let Some(next) = row_iter.next() else {
                                break;
                            };
                (row, matched_col_idx) = next
            }
            prefer_match = new_prefer_match;
            col -= 1;
        }
    }
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,616 +1,137 @@
 // sadly this doens't optmimzie well currently
 #![allow(clippy::manual_range_contains)]
-use std::alloc::Layout;
+mod chars;
 use std::cmp::max;
 use memchr::{memchr, memchr2};
 use normalize::normalize;
 //autogenerated by generate-ucd
 #[allow(warnings)]
 #[rustfmt::skip]
 mod case_fold;
 mod config;
-mod normalize;
+mod fuzzy_greedy;
 mod fuzzy_optimal;
 mod matrix;
 mod prefilter;
 mod score;
 mod utf32_str;
-pub use config::{CaseMatching, CharClass, MatcherConfig};
+// #[cfg(test)]
 // mod tests;
-use crate::config::{
+pub use config::MatcherConfig;
    BONUS_BOUNDARY, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, PENALTY_GAP_EXTENSION,
    PENALTY_GAP_START, SCORE_MATCH,
 };
-const MAX_MATRIX_SIZE: usize = 75 * 1024; // 300KB
+use crate::matrix::MatrixSlab;
-const MAX_HAYSTACK_LEN: usize = 8192; // 64KB
+use crate::utf32_str::Utf32Str;
 #[derive(Clone, Copy, PartialEq, Eq)]
 struct MatrixCell {
    score: u16,
    consecutive_chars: u16,
 }
 #[derive(Clone, Copy, PartialEq, Eq)]
 struct HaystackChar {
    char: char,
    bonus: u16,
 }
 pub struct Matcher {
    pub config: MatcherConfig,
-    matrix: Box<[MatrixCell; MAX_MATRIX_SIZE]>,
+    slab: MatrixSlab,
    haystack: Box<[HaystackChar; MAX_HAYSTACK_LEN]>,
    // needle can be at most as long as the haystack
    first_needle_occurance: Box<[u16; MAX_HAYSTACK_LEN]>,
 }
-pub struct Query {
+// // impl Query {
-    needle_chars: Vec<char>,
+// //     fn push(&mut self, needle: Utf32Str<'_>, normalize_: bool, smart_case: bool) {
-    is_ascii: bool,
+// //         self.needle_chars.reserve(needle.len());
-    ignore_case: bool,
+// //         self.needle_chars.extend(needle.chars().map(|mut c| {
-}
+// //             if !c.is_ascii() {
-
+// //                 self.is_ascii = false;
-impl Query {
+// //             }
-    fn push(&mut self, needle: &str, normalize_: bool, smart_case: bool) {
+// //             if smart_case {
-        self.needle_chars.reserve(needle.len());
+// //                 if c.is_uppercase() {
-        self.needle_chars.extend(needle.chars().map(|mut c| {
+// //                     self.ignore_case = false;
-            if !c.is_ascii() {
+// //                 }
-                self.is_ascii = false;
+// //             } else if self.ignore_case {
-            }
+// //                 if self.is_ascii {
-            if smart_case {
+// //                     c = to_lower_case::<true>(c)
-                if c.is_uppercase() {
+// //                 } else {
-                    self.ignore_case = false;
+// //                     c = to_lower_case::<false>(c)
-                }
+// //                 }
-            } else if self.ignore_case {
+// //             }
-                if self.is_ascii {
+// //             if normalize_ && !self.is_ascii {
-                    c = to_lower_case::<true>(c)
+// //                 c = normalize(c);
-                } else {
+// //             }
-                    c = to_lower_case::<false>(c)
+// //             c
-                }
+// //         }))
-            }
+// //     }
-            if normalize_ && !self.is_ascii {
+// // }
                c = normalize(c);
            }
            c
        }))
    }
 }
 #[inline(always)]
 fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
    if c >= b'a' || c <= b'z' {
        memchr2(c, c + 32, haystack)
    } else {
        memchr(c, haystack)
    }
 }
 /// Safety: T must be vaind if initalized with zeros
 unsafe fn zeroed_array_on_heap<T: Copy, const LEN: usize>() -> Box<[T; LEN]> {
    let layout = Layout::new::<[T; LEN]>();
    let res = std::alloc::alloc_zeroed(layout);
    if res.is_null() {
        std::alloc::handle_alloc_error(layout)
    }
    Box::from_raw(res as _)
 }
 impl Matcher {
    pub fn new(config: MatcherConfig) -> Self {
        // Safety: all data allocated here is just integers/structs that contain
        // integers so zeroed values are legal
        unsafe {
        Self {
            config,
-                matrix: zeroed_array_on_heap(),
+            slab: MatrixSlab::new(),
                haystack: zeroed_array_on_heap(),
                first_needle_occurance: zeroed_array_on_heap(),
            }
        }
    }
    pub fn compile_query(&self, needle: &str) -> Query {
        let mut query = Query {
            needle_chars: Vec::new(),
            is_ascii: true,
            ignore_case: self.config.case_matching == CaseMatching::Ignore,
        };
        query.push(
            needle,
            self.config.normalize,
            self.config.case_matching == CaseMatching::Smart,
        );
        query
    }
    pub fn recompile_query(&self, query: &mut Query, needle: &str) {
        query.needle_chars.clear();
        query.is_ascii = false;
        query.ignore_case = self.config.case_matching == CaseMatching::Ignore;
        query.push(
            needle,
            self.config.normalize,
            self.config.case_matching == CaseMatching::Smart,
        );
    }
    pub fn append_query(&self, query: &mut Query, needle: &str) {
        query.push(
            needle,
            self.config.normalize,
            self.config.case_matching == CaseMatching::Smart,
        );
    }
    pub fn fuzzy_match(&mut self, query: &Query, mut haystack: &str) -> Option<u16> {
        if haystack.len() > u32::MAX as usize {
            haystack = &haystack[..u32::MAX as usize]
        }
        if self.config.use_v1 {
            if query.is_ascii && !self.config.normalize {
                self.fuzzy_matcher_v1::<false, true>(query, haystack, &mut Vec::new())
            } else {
                self.fuzzy_matcher_v1::<false, false>(query, haystack, &mut Vec::new())
            }
        } else if query.is_ascii && !self.config.normalize {
            self.fuzzy_matcher_v2::<false, true>(query, haystack, &mut Vec::new())
        } else {
            self.fuzzy_matcher_v2::<false, false>(query, haystack, &mut Vec::new())
        }
    }
-    pub fn fuzzy_indicies(
+    pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
        assert!(haystack.len() <= u32::MAX as usize);
        self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
    }
    fn fuzzy_matcher_impl<const INDICIES: bool>(
        &mut self,
-        query: &Query,
+        haystack: Utf32Str<'_>,
-        mut haystack: &str,
+        needle_: Utf32Str<'_>,
-        indicies: &mut Vec<u32>,
+        indidies: &mut Vec<u32>,
    ) -> Option<u16> {
-        if haystack.len() > u32::MAX as usize {
+        assert!(
-            haystack = &haystack[..u32::MAX as usize]
+            haystack.len() <= u32::MAX as usize,
-        }
+            "fuzzy matching is only support for up to 2^32-1 codepoints"
-        if self.config.use_v1 {
+        );
-            if query.is_ascii && !self.config.normalize {
+        match (haystack, needle_) {
-                self.fuzzy_matcher_v1::<true, true>(query, haystack, indicies)
+            (Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
-            } else {
+                let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle)?;
-                self.fuzzy_matcher_v1::<true, false>(query, haystack, indicies)
+                self.fuzzy_match_optimal::<INDICIES, u8, u8>(
-            }
+                    haystack, needle, start, greedy_end, end, indidies,
        } else if query.is_ascii && !self.config.normalize {
            self.fuzzy_matcher_v2::<true, true>(query, haystack, indicies)
        } else {
            self.fuzzy_matcher_v2::<true, false>(query, haystack, indicies)
        }
    }
    #[inline(always)]
    fn normalize_char<const ASCII_ONLY: bool>(&self, ignore_case: bool, mut c: char) -> char {
        if ignore_case {
            c = to_lower_case::<ASCII_ONLY>(c)
        }
        if !ASCII_ONLY && self.config.normalize {
            c = normalize(c)
        }
        c
    }
    fn prefilter_ascii(&self, query: &Query, mut haystack: &[u8]) -> Option<(usize, usize)> {
        let needle = &query.needle_chars;
        if query.ignore_case {
            let first_idx = find_ascii_ignore_case(needle[0] as u8, haystack)?;
            let mut last_idx = first_idx + 1;
            haystack = &haystack[last_idx..];
            for &c in &needle[1..] {
                let idx = find_ascii_ignore_case(c as u8, haystack)? + 1;
                last_idx += idx;
                haystack = &haystack[idx..];
            }
            Some((first_idx, last_idx))
        } else {
            let first_idx = memchr(needle[0] as u8, haystack)?;
            let mut last_idx = first_idx + 1;
            haystack = &haystack[last_idx..];
            for &c in &needle[1..] {
                let idx = memchr(c as u8, haystack)? + 1;
                last_idx += idx;
                haystack = &haystack[idx..];
            }
            Some((first_idx, last_idx))
        }
    }
    fn prefilter_non_ascii(&self, query: &Query, haystack: &str) -> Option<(usize, usize)> {
        let needle_char = query.needle_chars[0];
        let mut text = haystack
            .char_indices()
            .map(|(i, c)| (i, self.normalize_char::<false>(query.ignore_case, c)));
        let (match_start, c) = text.find(|&(_, c)| c == needle_char)?;
        Some((match_start, match_start + c.len_utf8()))
    }
    fn prefilter(&self, query: &Query, haystack: &str) -> Option<(usize, usize)> {
        // quickly reject small matches
        if query.needle_chars.len() > haystack.len() {
            return None;
        }
        if query.is_ascii {
            self.prefilter_ascii(query, haystack.as_bytes())
        } else {
            self.prefilter_non_ascii(query, haystack)
        }
    }
    fn fuzzy_matcher_v1<const INDICIES: bool, const ASCII_ONLY: bool>(
        &mut self,
        query: &Query,
        haystack: &str,
        indicies: &mut Vec<u32>,
    ) -> Option<u16> {
        let (start, end) = self.prefilter(query, haystack)?;
        self.fuzzy_matcher_v1_with_prefilter::<INDICIES, ASCII_ONLY>(
            query, haystack, start, end, indicies,
                )
            }
-
+            (Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
-    fn fuzzy_matcher_v1_with_prefilter<const INDICIES: bool, const ASCII_ONLY: bool>(
+                // a purely ascii haystack can never be transformed to match
-        &mut self,
+                // a needle that contains non-ascii chars since we don't allow gaps
-        query: &Query,
+                None
        haystack: &str,
        mut start: usize,
        mut end: usize,
        indicies: &mut Vec<u32>,
    ) -> Option<u16> {
        let first_char_end = if ASCII_ONLY { start + 1 } else { end };
        if !ASCII_ONLY && query.needle_chars.len() != 1 {
            let mut needle_iter = query.needle_chars[1..].iter().copied();
            if let Some(mut needle_char) = needle_iter.next() {
                let haystack = haystack[first_char_end..]
                    .char_indices()
                    .rev()
                    .map(|(i, c)| (i, self.normalize_char::<false>(query.ignore_case, c)));
                for (i, c) in haystack {
                    if c == needle_char {
                        let Some(next_needle_char) = needle_iter.next() else {
                            end = i + c.len_utf8();
                            break;
                        };
                        needle_char = next_needle_char;
            }
            (Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
                todo!()
                // let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
                // self.fuzzy_match_optimal::<INDICIES, char, u8>(
                //     haystack,
                //     needle,
                //     start,
                //     start + 1,
                //     end,
                //     indidies,
                // )
            }
-            }
+            (Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
-        }
+                let (start, end) = self.prefilter_non_ascii(haystack, needle_)?;
-        // very simple, just mimimize from the back
+                self.fuzzy_match_optimal::<INDICIES, char, char>(
-        let match_ = haystack[first_char_end..end]
+                    haystack,
-            .char_indices()
+                    needle,
            .rev()
            .map(|(i, c)| (i, self.normalize_char::<ASCII_ONLY>(query.ignore_case, c)));
        let mut needle_iter = query.needle_chars[..].iter().rev().copied();
        let mut needle_char = needle_iter.next().unwrap();
        for (i, c) in match_ {
            if c == needle_char {
                let Some(next_needle_char) = needle_iter.next() else {
                    start = i;
                    break;
                };
                needle_char = next_needle_char;
            }
        }
        Some(self.calculate_score::<INDICIES, ASCII_ONLY>(query, haystack, start, end, indicies))
    }
    fn calculate_score<const INDICIES: bool, const ASCII_ONLY: bool>(
        &mut self,
        query: &Query,
        text: &str,
        match_start: usize,
        match_end: usize,
        indicies: &mut Vec<u32>,
    ) -> u16 {
        if INDICIES {
            indicies.reserve(query.needle_chars.len());
        }
        let mut prev_class = text[..match_start]
            .chars()
            .next_back()
            .map(|c| self.config.char_class(c))
            .unwrap_or(self.config.inital_char_class);
        let mut needle_idx = 0;
        let mut score = 0u16;
        let mut in_gap = false;
        let mut consecutive = 0;
        let mut first_bonus = 0u16;
        for (i, mut c) in text[match_start..match_end].char_indices() {
            let class = self.config.char_class(c);
            if (ASCII_ONLY || class == CharClass::Upper) && query.ignore_case {
                c = to_lower_case::<ASCII_ONLY>(c);
            }
            if self.config.normalize && !ASCII_ONLY {
                c = normalize(c)
            }
            if c == query.needle_chars[needle_idx] {
                if INDICIES {
                    indicies.push(i as u32)
                }
                score += SCORE_MATCH;
                let mut bonus = self.config.bonus_for(prev_class, class);
                if consecutive == 0 {
                    first_bonus = bonus
                } else {
                    // Break consecutive chunk
                    if bonus > first_bonus {
                        if bonus >= BONUS_BOUNDARY {
                            first_bonus = bonus;
                        } else {
                            bonus = max(bonus, BONUS_CONSECUTIVE);
                        }
                    } else {
                        bonus = max(first_bonus, BONUS_CONSECUTIVE);
                    }
                }
                if needle_idx == 0 {
                    bonus *= BONUS_FIRST_CHAR_MULTIPLIER;
                }
                score += bonus;
                needle_idx += 1;
                in_gap = false;
                consecutive += 1;
            } else {
                let penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                score = score.saturating_sub(penalty);
                in_gap = true;
                consecutive = 0;
                first_bonus = 0;
            }
            prev_class = class;
        }
        score
    }
    fn fuzzy_matcher_v2<const INDICIES: bool, const ASCII_ONLY: bool>(
        &mut self,
        query: &Query,
        text: &str,
        indicies: &mut Vec<u32>,
    ) -> Option<u16> {
        let (start, prefilter_end) = self.prefilter(query, text)?;
        let text_len = text.len() - start;
        // fallback to v1 algorithms for long haystacks
        // technically we need to multiply by char len here
        // but counting chars has a lot of unecessary overhead that we can avoid
        // here in practice using bytelen should be a reasonable approximation
        // we also differ from fzf here in that we never allocate and instead stringintly check here
        if text_len > u16::MAX as usize || text_len * query.needle_chars.len() > MAX_HAYSTACK_LEN {
            return self.fuzzy_matcher_v1_with_prefilter::<INDICIES, ASCII_ONLY>(
                query,
                text,
                    start,
-                prefilter_end,
+                    start + 1,
-                indicies,
+                    end,
-            );
+                    indidies,
                )
            }
        let mut prev_class = text[..start]
            .chars()
            .next_back()
            .map(|c| self.config.char_class(c))
            .unwrap_or(self.config.inital_char_class);
        let text = &text[start..];
        let mut needle_iter = query.needle_chars[..]
            .iter()
            .copied()
            .zip(self.first_needle_occurance.iter_mut());
        let (mut needle_char, mut needle_char_idx) = needle_iter.next().unwrap();
        let iter = text[start..]
            .chars()
            .zip(self.matrix.iter_mut())
            .zip(self.haystack.iter_mut())
            .enumerate();
        let mut last_matched_idx = 0;
        let mut max_score = 0;
        let mut max_score_pos = 0;
        let mut in_gap = false;
        let mut prev_score = 0u16;
        let mut matched = false;
        let first_needle_char = query.needle_chars[0];
        for (i, ((mut c, matrix_cell), char_info)) in iter {
            let class = self.config.char_class(c);
            if (ASCII_ONLY || class == CharClass::Upper) && query.ignore_case {
                c = to_lower_case::<ASCII_ONLY>(c);
            }
            if self.config.normalize && !ASCII_ONLY {
                c = normalize(c)
            }
            char_info.char = c;
            let bonus = self.config.bonus_for(prev_class, class);
            char_info.char = c;
            prev_class = class;
            let i = i as u16;
            if c == needle_char {
                // save the first idx of each char
                if let Some(next) = needle_iter.next() {
                    *needle_char_idx = i;
                    (needle_char, needle_char_idx) = next
                } else {
                    // we have atleast one match
                    matched = true;
                }
                // and the last matched char
                last_matched_idx = i;
            }
            if c == first_needle_char {
                let score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
                matrix_cell.consecutive_chars = 1;
                if query.needle_chars.len() == 1 && score > max_score {
                    max_score = score;
                    max_score_pos = i;
                    // can't get better than this
                    if bonus >= BONUS_BOUNDARY {
                        break;
                    }
                }
                in_gap = false;
            } else {
                let gap_penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                matrix_cell.score = prev_score.saturating_sub(gap_penalty);
                matrix_cell.consecutive_chars = 0;
                in_gap = true;
            }
            prev_score = matrix_cell.score;
        }
        if !matched {
            debug_assert!(!ASCII_ONLY, "prefilter should have rejected");
            return None;
        }
        if query.needle_chars.len() == 1 {
            indicies.push(max_score_pos as u32);
            return Some(max_score);
        }
        assert_eq!(
            self.first_needle_occurance[0], 0,
            "prefilter should have put us at the start of the match"
        );
        let haystack_len = last_matched_idx as usize + 1;
        let (max_score, best_match_end) = self.popultate_matrix(haystack_len, query);
        if INDICIES {
            indicies.reserve(query.needle_chars.len());
            let mut col = best_match_end;
            let mut needle_iter = self.matrix[..haystack_len * query.needle_chars.len()]
                .windows(haystack_len)
                .zip(self.first_needle_occurance[..haystack_len].iter())
                .rev()
                .peekable();
            let mut next_row = None;
            let (mut row, mut first_needle_occurance) = needle_iter.next().unwrap();
            let mut prefer_match = true;
            loop {
                let score = row[col as usize].score;
                let mut score1 = 0;
                let mut score2 = 0;
                if let Some((prev_row, _)) = needle_iter.peek() {
                    if col >= *first_needle_occurance {
                        score1 = prev_row[col as usize].score;
                    }
                }
                if col > *first_needle_occurance {
                    score2 = row[col as usize - 1].score;
                }
                if score > score1 && (score > score2 || score == score2 && prefer_match) {
                    indicies.push(col as u32 + start as u32);
                    next_row = Some(row);
                    let Some(next) = needle_iter.next() else {
                        break;
                    };
                    (row, first_needle_occurance) = next
                }
                prefer_match = row[col as usize].consecutive_chars > 1;
                if !prefer_match && col + 1 < query.needle_chars.len() as u16 {
                    if let Some(next_row) = next_row {
                        prefer_match = next_row[col as usize + 1].consecutive_chars > 0
                    }
                }
                col -= 1;
        }
    }
-        Some(max_score)
+    // pub fn fuzzy_indicies(
-    }
+    //     &mut self,
-
+    //     query: &Query,
-    fn popultate_matrix(&mut self, haystack_len: usize, query: &Query) -> (u16, u16) {
+    //     mut haystack: Utf32Str<'_>,
-        let mut max_score = 0;
+    //     indicies: &mut Vec<u32>,
-        let mut max_score_end = 0;
+    // ) -> Option<u16> {
-        let mut iter = query
+    //     if haystack.len() > u32::MAX as usize {
-            .needle_chars
+    //         haystack = &haystack[..u32::MAX as usize]
-            .iter()
+    //     }
-            .zip(self.first_needle_occurance.iter())
+    //     println!(
-            .zip(self.matrix.chunks_mut(haystack_len))
+    //         "start {haystack:?}, {:?} {} {}",
-            .enumerate();
+    //         query.needle_chars, query.ignore_case, query.is_ascii
-        // skip the first row we already calculated the initial scores
+    //     );
-        let (_, ((&_, &_), mut prev_matrix_row)) = iter.next().unwrap();
+    //     if self.config.use_v1 {
-        for (i, ((&needle_char, &first_occurance), matrix_row)) in iter {
+    //         if query.is_ascii && !self.config.normalize {
-            // help the optimizer out a little
+    //             self.fuzzy_matcher_v1::<true, true>(query, haystack, indicies)
-            assert!((first_occurance as usize) < matrix_row.len());
+    //         } else {
-            assert!(first_occurance != 0);
+    //             self.fuzzy_matcher_v1::<true, false>(query, haystack, indicies)
-            let mut in_gap = false;
+    //         }
-            let haystack = &self.haystack[first_occurance as usize..haystack_len];
+    //     } else if query.is_ascii && !self.config.normalize {
-            let mut prev_matrix_cell = matrix_row[first_occurance as usize - 1];
+    //         self.fuzzy_matcher_v2::<true, true>(query, haystack, indicies)
-            let matrix_row = &mut matrix_row[first_occurance as usize..haystack_len];
+    //     } else {
-            let prev_matrix_diagonal =
+    //         self.fuzzy_matcher_v2::<true, false>(query, haystack, indicies)
-                &mut prev_matrix_row[first_occurance as usize - 1..haystack_len - 1];
+    //     }
-            for (j, ((&haystack_char, matrix_cell), &diag_matrix_cell)) in haystack
+    // }
                .iter()
                .zip(matrix_row.iter_mut())
                .zip(prev_matrix_diagonal.iter())
                .enumerate()
            {
                let col = j + first_occurance as usize;
                let gap_penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                let mut score1 = 0;
                let score2 = prev_matrix_cell.score.saturating_sub(gap_penalty);
                let mut consecutive = 0;
                if haystack_char.char == needle_char {
                    score1 = diag_matrix_cell.score + SCORE_MATCH;
                    let mut bonus = haystack_char.bonus;
                    consecutive = diag_matrix_cell.consecutive_chars + 1;
                    if consecutive > 1 {
                        let first_bonus = self.haystack[col - consecutive as usize].bonus;
                        if bonus > first_bonus {
                            if bonus > BONUS_BOUNDARY {
                                consecutive = 1
                            } else {
                                bonus = max(bonus, BONUS_CONSECUTIVE)
                            }
                        } else {
                            bonus = max(first_bonus, BONUS_CONSECUTIVE)
                        }
                    }
                    if score1 + bonus < score2 {
                        score1 += haystack_char.bonus;
                        consecutive = 0;
                    } else {
                        score1 += bonus;
                    }
                }
                in_gap = score1 < score2;
                let score = max(max(score1, score2), 0);
                prev_matrix_cell = *matrix_cell;
                if i == query.needle_chars.len() - 1 && score > max_score {
                    max_score = score;
                    max_score_end = col as u16;
                }
                matrix_cell.consecutive_chars = consecutive;
                matrix_cell.score = score;
            }
            prev_matrix_row = matrix_row;
        }
        (max_score, max_score_end)
    }
 }
 #[inline(always)]
 fn to_lower_case<const ASCII_ONLY: bool>(c: char) -> char {
    if c >= 'A' && c <= 'Z' {
        char::from_u32(c as u32 + 32).unwrap()
    } else if !c.is_ascii() && !ASCII_ONLY {
        case_fold::CASE_FOLDING_SIMPLE
            .binary_search_by_key(&c, |(upper, _)| *upper)
            .map_or(c, |idx| case_fold::CASE_FOLDING_SIMPLE[idx].1)
    } else {
        c
    }
 }
--- a/src/matrix.rs
+++ b/src/matrix.rs
@ -0,0 +1,280 @@
 use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
 use std::fmt::{Debug, Formatter, Result};
 use std::marker::PhantomData;
 use std::mem::{size_of, take};
 use std::ops::Index;
 use std::ptr::{slice_from_raw_parts_mut, NonNull};
 use crate::chars::Char;
 const MAX_MATRIX_SIZE: usize = 100 * 1024; // 4*60*1024 = 240KB
 // these two aren't hard maxima, instead we simply allow whatever will fit into memory
 const MAX_HAYSTACK_LEN: usize = 2048; // 64KB
 const MAX_NEEDLE_LEN: usize = 2048; // 64KB
 struct MatrixLayout<C: Char> {
    haystack_len: usize,
    needle_len: usize,
    cell_count: usize,
    layout: Layout,
    haystack_off: usize,
    bonus_off: usize,
    rows_off: usize,
    cells_off: usize,
    _phantom: PhantomData<C>,
 }
 impl<C: Char> MatrixLayout<C> {
    fn new(haystack_len: usize, needle_len: usize, cell_count: usize) -> MatrixLayout<C> {
        let mut layout = Layout::from_size_align(0, 1).unwrap();
        let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
        let bonus_layout = Layout::array::<u16>(haystack_len).unwrap();
        let rows_layout = Layout::array::<u16>(needle_len).unwrap();
        let cells_layout = Layout::array::<MatrixCell>(cell_count).unwrap();
        let haystack_off;
        (layout, haystack_off) = layout.extend(haystack_layout).unwrap();
        let bonus_off;
        (layout, bonus_off) = layout.extend(bonus_layout).unwrap();
        let rows_off;
        (layout, rows_off) = layout.extend(rows_layout).unwrap();
        let cells_off;
        (layout, cells_off) = layout.extend(cells_layout).unwrap();
        MatrixLayout {
            haystack_len,
            needle_len,
            cell_count,
            layout,
            haystack_off,
            bonus_off,
            rows_off,
            cells_off,
            _phantom: PhantomData,
        }
    }
    /// # Safety
    ///
    /// `ptr` must point at an allocated with MARTIX_ALLOC_LAYOUT
    unsafe fn fieds_from_ptr(
        &self,
        ptr: NonNull<u8>,
    ) -> (*mut [C], *mut [u16], *mut [u16], *mut [MatrixCell]) {
        // sanity checks, should not be necessary
        let base = ptr.as_ptr();
        let haystack = base.add(self.haystack_off) as *mut C;
        let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len);
        let bonus = base.add(self.bonus_off) as *mut u16;
        let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len);
        let rows = base.add(self.rows_off) as *mut u16;
        let rows = slice_from_raw_parts_mut(rows, self.needle_len);
        let cells = base.add(self.cells_off) as *mut MatrixCell;
        let cells = slice_from_raw_parts_mut(cells, self.cell_count);
        (haystack, bonus, rows, cells)
    }
 }
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub(crate) struct MatrixCell {
    pub score: u16,
    pub consecutive_chars: u16,
 }
 impl Debug for MatrixCell {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        (self.score, self.consecutive_chars).fmt(f)
    }
 }
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub(crate) struct HaystackChar<C: Char> {
    pub char: C,
    pub bonus: u16,
 }
 impl<C: Char> Debug for HaystackChar<C> {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        (self.char, self.bonus).fmt(f)
    }
 }
 #[derive(Clone, Copy)]
 pub(crate) struct MatrixRow<'a> {
    pub off: u16,
    pub cells: &'a [MatrixCell],
 }
 impl Index<u16> for MatrixRow<'_> {
    type Output = MatrixCell;
    fn index(&self, index: u16) -> &Self::Output {
        &self.cells[index as usize]
    }
 }
 impl Debug for MatrixRow<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        let mut f = f.debug_list();
        f.entries((0..self.off).map(|_| &(0, 0)));
        f.entries(self.cells.iter());
        f.finish()
    }
 }
 pub(crate) struct MatrixRowMut<'a> {
    pub off: u16,
    pub cells: &'a mut [MatrixCell],
 }
 impl Debug for MatrixRowMut<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        let mut f = f.debug_list();
        f.entries((0..self.off).map(|_| &(0, 0)));
        f.entries(self.cells.iter());
        f.finish()
    }
 }
 pub struct DebugList<I>(I);
 impl<I> Debug for DebugList<I>
 where
    I: Iterator + Clone,
    I::Item: Debug,
 {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        f.debug_list().entries(self.0.clone()).finish()
    }
 }
 pub(crate) struct Matrix<'a, C: Char> {
    pub haystack: &'a mut [C],
    // stored as a seperate array instead of struct
    // to avoid padding sine char is too large and u8 too small :/
    pub bonus: &'a mut [u16],
    pub row_offs: &'a mut [u16],
    pub cells: &'a mut [MatrixCell],
 }
 impl<'a, C: Char> Matrix<'a, C> {
    pub fn rows(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator + Clone + Sized {
        let mut cells = &*self.cells;
        self.row_offs.iter().map(move |&off| {
            let len = self.haystack.len() - off as usize;
            let (row, tmp) = cells.split_at(len);
            cells = tmp;
            MatrixRow { off, cells: row }
        })
    }
    pub fn rows_rev(&self) -> impl Iterator<Item = MatrixRow> + ExactSizeIterator {
        let mut cells = &*self.cells;
        self.row_offs.iter().rev().map(move |&off| {
            let len = self.haystack.len() - off as usize;
            let (tmp, row) = cells.split_at(cells.len() - len);
            cells = tmp;
            MatrixRow { off, cells: row }
        })
    }
    pub fn haystack(
        &self,
    ) -> impl Iterator<Item = HaystackChar<C>> + ExactSizeIterator + '_ + Clone {
        haystack(self.haystack, self.bonus, 0)
    }
 }
 impl<'a, C: Char> Debug for Matrix<'a, C> {
    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
        f.debug_struct("Matrix")
            .field("haystack", &DebugList(self.haystack()))
            .field("matrix", &DebugList(self.rows()))
            .finish()
    }
 }
 pub(crate) fn haystack<'a, C: Char>(
    haystack: &'a [C],
    bonus: &'a [u16],
    skip: u16,
 ) -> impl Iterator<Item = HaystackChar<C>> + ExactSizeIterator + Clone + 'a {
    haystack[skip as usize..]
        .iter()
        .zip(bonus[skip as usize..].iter())
        .map(|(&char, &bonus)| HaystackChar { char, bonus })
 }
 pub(crate) fn rows_mut<'a>(
    row_offs: &'a [u16],
    mut cells: &'a mut [MatrixCell],
    haystack_len: usize,
 ) -> impl Iterator<Item = MatrixRowMut<'a>> + ExactSizeIterator + 'a {
    row_offs.iter().map(move |&off| {
        let len = haystack_len - off as usize;
        let (row, tmp) = take(&mut cells).split_at_mut(len);
        cells = tmp;
        MatrixRowMut { off, cells: row }
    })
 }
 // we only use this to construct the layout for the slab allocation
 #[allow(unused)]
 struct MatrixData {
    haystack: [char; MAX_HAYSTACK_LEN],
    bonus: [u16; MAX_HAYSTACK_LEN],
    row_offs: [u16; MAX_NEEDLE_LEN],
    cells: [MatrixCell; MAX_MATRIX_SIZE],
 }
 // const MATRIX_ALLOC_LAYOUT: Layout =
 //     MatrixLayout::<char>::new(MAX_HAYSTACK_LEN, MAX_NEEDLE_LEN, MAX_MATRIX_SIZE).layout;
 pub(crate) struct MatrixSlab(NonNull<u8>);
 impl MatrixSlab {
    pub fn new() -> Self {
        let layout = Layout::new::<MatrixData>();
        // safety: the matrix is never zero sized (hardcoded constants)
        let ptr = unsafe { alloc_zeroed(layout) };
        let Some(ptr) = NonNull::new(ptr) else{
            handle_alloc_error(layout)
        };
        MatrixSlab(ptr.cast())
    }
    pub(crate) fn alloc<C: Char>(
        &mut self,
        haystack_: &[C],
        needle_len: usize,
    ) -> Option<Matrix<'_, C>> {
        let cells = haystack_.len() * needle_len;
        if cells > MAX_MATRIX_SIZE || haystack_.len() > u16::MAX as usize {
            return None;
        }
        let matrix_layout = MatrixLayout::<C>::new(
            haystack_.len(),
            needle_len,
            (haystack_.len() - needle_len / 2) * needle_len,
        );
        if matrix_layout.layout.size() > size_of::<MatrixData>() {
            return None;
        }
        unsafe {
            // safetly: this allocation is valid for MATRIX_ALLOC_LAYOUT
            let (haystack, bonus, rows, cells) = matrix_layout.fieds_from_ptr(self.0);
            // copy haystack before creating refernces to ensure we donu't crate
            // refrences to invalid chars (which may or may not be UB)
            haystack_
                .as_ptr()
                .copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
            Some(Matrix {
                haystack: &mut *haystack,
                row_offs: &mut *rows,
                bonus: &mut *bonus,
                cells: &mut *cells,
            })
        }
    }
 }
 impl Drop for MatrixSlab {
    fn drop(&mut self) {
        unsafe { dealloc(self.0.as_ptr(), Layout::new::<MatrixData>()) };
    }
 }
--- a/src/multizip.rs
+++ b/src/multizip.rs
--- a/src/prefilter.rs
+++ b/src/prefilter.rs
@ -0,0 +1,73 @@
 use ::memchr::{memchr, memchr2, memrchr, memrchr2};
 use crate::chars::Char;
 use crate::utf32_str::Utf32Str;
 use crate::Matcher;
 #[inline(always)]
 fn find_ascii_ignore_case(c: u8, haystack: &[u8]) -> Option<usize> {
    if c >= b'a' || c <= b'z' {
        memchr2(c, c - 32, haystack)
    } else {
        memchr(c, haystack)
    }
 }
 #[inline(always)]
 fn find_ascii_ignore_case_rev(c: u8, haystack: &[u8]) -> Option<usize> {
    if c >= b'a' || c <= b'z' {
        memrchr2(c, c - 32, haystack)
    } else {
        memrchr(c, haystack)
    }
 }
 impl Matcher {
    pub(crate) fn prefilter_ascii(
        &self,
        mut haystack: &[u8],
        needle: &[u8],
    ) -> Option<(usize, usize, usize)> {
        if self.config.ignore_case {
            let start = find_ascii_ignore_case(needle[0], haystack)?;
            let mut eager_end = start + 1;
            haystack = &haystack[eager_end..];
            for &c in &needle[1..] {
                let idx = find_ascii_ignore_case(c, haystack)? + 1;
                eager_end += idx;
                haystack = &haystack[idx..];
            }
            let end = eager_end
                + find_ascii_ignore_case_rev(*needle.last().unwrap(), haystack).unwrap_or(0);
            Some((start, eager_end, end))
        } else {
            let start = memchr(needle[0], haystack)?;
            let mut eager_end = start + 1;
            haystack = &haystack[eager_end..];
            for &c in &needle[1..] {
                let idx = memchr(c, haystack)? + 1;
                eager_end += idx;
                haystack = &haystack[idx..];
            }
            let end = eager_end + memrchr(*needle.last().unwrap(), haystack).unwrap_or(0);
            Some((start, eager_end, end))
        }
    }
    pub(crate) fn prefilter_non_ascii(
        &self,
        haystack: &[char],
        needle: Utf32Str<'_>,
    ) -> Option<(usize, usize)> {
        let needle_char = needle.get(0);
        let start = haystack
            .iter()
            .position(|c| c.normalize(&self.config) == needle_char)?;
        let needle_char = needle.last();
        let end = haystack[start..]
            .iter()
            .position(|c| c.normalize(&self.config) == needle_char)?;
        Some((start, end))
    }
 }
--- a/src/score.rs
+++ b/src/score.rs
@ -0,0 +1,145 @@
 use std::cmp::max;
 use crate::chars::{Char, CharClass};
 use crate::{Matcher, MatcherConfig};
 pub(crate) const SCORE_MATCH: u16 = 16;
 pub(crate) const PENALTY_GAP_START: u16 = 3;
 pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
 // We prefer matches at the beginning of a word, but the bonus should not be
 // too great to prevent the longer acronym matches from always winning over
 // shorter fuzzy matches. The bonus point here was specifically chosen that
 // the bonus is cancelled when the gap between the acronyms grows over
 // 8 characters, which is approximately the average length of the words found
 // in web2 dictionary and my file system.
 pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
 // Although bonus point for non-word characters is non-contextual, we need it
 // for computing bonus points for consecutive chunks starting with a non-word
 // character.
 pub(crate) const BONUS_NON_WORD: u16 = SCORE_MATCH / 2;
 // Edge-triggered bonus for matches in camelCase words.
 // Compared to word-boundary case, they don't accompany single-character gaps
 // (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
 pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_EXTENSION;
 // Minimum bonus point given to characters in consecutive chunks.
 // Note that bonus points for consecutive matches shouldn't have needed if we
 // used fixed match score as in the original algorithm.
 pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
 // The first character in the typed pattern usually has more significance
 // than the rest so it's important that it appears at special positions where
 // bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
 // The amount of the extra bonus should be limited so that the gap penalty is
 // still respected.
 pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
 impl MatcherConfig {
    #[inline]
    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
        if class > CharClass::NonWord {
            // transition from non word to word
            match prev_class {
                CharClass::Whitespace => return self.bonus_boundary_white,
                CharClass::Delimiter => return self.bonus_boundary_delimiter,
                CharClass::NonWord => return BONUS_BOUNDARY,
                _ => (),
            }
        }
        if prev_class == CharClass::Lower && class == CharClass::Upper
            || prev_class != CharClass::Number && class == CharClass::Number
        {
            // camelCase letter123
            BONUS_CAMEL123
        } else if class == CharClass::NonWord {
            BONUS_NON_WORD
        } else if class == CharClass::Whitespace {
            self.bonus_boundary_white
        } else {
            0
        }
    }
 }
 impl Matcher {
    #[inline(always)]
    pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
        self.config.bonus_for(prev_class, class)
    }
    pub(crate) fn calculate_score<const INDICIES: bool, H: Char + PartialEq<N>, N: Char>(
        &mut self,
        haystack: &[H],
        needle: &[N],
        start: usize,
        end: usize,
        indicies: &mut Vec<u32>,
    ) -> u16 {
        if INDICIES {
            indicies.reserve(needle.len());
        }
        let mut prev_class = start
            .checked_sub(1)
            .map(|i| haystack[i].char_class(&self.config))
            .unwrap_or(self.config.inital_char_class);
        let mut needle_iter = needle.iter();
        let mut needle_char = *needle_iter.next().unwrap();
        let mut in_gap = false;
        let mut consecutive = 1;
        // unrolled the firs iteration to make applying the first char multiplier less akward
        if INDICIES {
            indicies.push(start as u32)
        }
        let mut first_bonus = self.bonus_for(prev_class, haystack[0].char_class(&self.config));
        let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
        for (i, c) in haystack[start + 1..end].iter().enumerate() {
            let class = c.char_class(&self.config);
            let c = c.normalize(&self.config);
            if c == needle_char {
                if INDICIES {
                    indicies.push(i as u32 + start as u32)
                }
                let mut bonus = self.bonus_for(prev_class, class);
                if consecutive == 0 {
                    first_bonus = bonus
                } else {
                    // Break consecutive chunk
                    if bonus > first_bonus {
                        if bonus >= BONUS_BOUNDARY {
                            first_bonus = bonus;
                        } else {
                            bonus = max(bonus, BONUS_CONSECUTIVE);
                        }
                    } else {
                        bonus = max(first_bonus, BONUS_CONSECUTIVE);
                    }
                }
                score += SCORE_MATCH + bonus;
                in_gap = false;
                consecutive += 1;
                if let Some(&next) = needle_iter.next() {
                    needle_char = next;
                }
            } else {
                let penalty = if in_gap {
                    PENALTY_GAP_EXTENSION
                } else {
                    PENALTY_GAP_START
                };
                score = score.saturating_sub(penalty);
                in_gap = true;
                consecutive = 0;
                first_bonus = 0;
            }
            prev_class = class;
        }
        score
    }
 }
--- a/src/tests.rs
+++ b/src/tests.rs
@ -0,0 +1,270 @@
 use crate::config::{
    BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
    PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
 };
 use crate::{CaseMatching, Matcher, MatcherConfig};
 pub fn assert_matches(
    use_v1: bool,
    normalize: bool,
    case_sensitive: bool,
    path: bool,
    cases: &[(&str, &str, u32, u32, u16)],
 ) {
    let mut config = MatcherConfig {
        use_v1,
        normalize,
        case_matching: if case_sensitive {
            CaseMatching::Respect
        } else {
            CaseMatching::Ignore
        },
        ..MatcherConfig::DEFAULT
    };
    if path {
        config.set_match_paths();
    }
    let mut matcher = Matcher::new(config);
    let mut indicies = Vec::new();
    for &(haystack, needle, start, end, mut score) in cases {
        score += needle.chars().count() as u16 * SCORE_MATCH;
        let query = matcher.compile_query(needle);
        let res = matcher.fuzzy_indicies(&query, haystack, &mut indicies);
        assert_eq!(res, Some(score), "{needle:?} did not match {haystack:?}");
        assert_eq!(
            indicies.first().copied()..indicies.last().map(|&i| i + 1),
            Some(start)..Some(end),
            "{needle:?} match {haystack:?}[{start}..{end}]"
        );
    }
 }
 const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
 const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
 #[test]
 fn test_v2_fuzzy() {
    assert_matches(
        false,
        false,
        false,
        false,
        &[
            (
                "fooBarbaz1",
                "oBZ",
                2,
                9,
                BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
            ),
            (
                "foo bar baz",
                "fbb",
                0,
                9,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
                    - 2 * PENALTY_GAP_START
                    - 4 * PENALTY_GAP_EXTENSION,
            ),
            (
                "/AutomatorDocument.icns",
                "rdoc",
                9,
                13,
                BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
            ),
            (
                "/man1/zshcompctl.1",
                "zshc",
                6,
                10,
                BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_BOUNDARY_DELIMITER * 3,
            ),
            (
                "/.oh-my-zsh/cache",
                "zshc",
                8,
                13,
                BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
                    - PENALTY_GAP_START
                    + BONUS_BOUNDARY_DELIMITER,
            ),
            (
                "ab0123 456",
                "12356",
                3,
                10,
                BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
            ),
            (
                "abc123 456",
                "12356",
                3,
                10,
                BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_CAMEL123 * 2
                    + BONUS_CONSECUTIVE
                    - PENALTY_GAP_START
                    - PENALTY_GAP_EXTENSION,
            ),
            (
                "foo/bar/baz",
                "fbb",
                0,
                9,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
                    - 2 * PENALTY_GAP_START
                    - 4 * PENALTY_GAP_EXTENSION,
            ),
            (
                "fooBarBaz",
                "fbb",
                0,
                7,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
                    - 2 * PENALTY_GAP_START
                    - 2 * PENALTY_GAP_EXTENSION,
            ),
            (
                "foo barbaz",
                "fbb",
                0,
                8,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
                    - PENALTY_GAP_START * 2
                    - PENALTY_GAP_EXTENSION * 3,
            ),
            (
                "fooBar Baz",
                "foob",
                0,
                4,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
            ),
            (
                "xFoo-Bar Baz",
                "foo-b",
                1,
                6,
                BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_CAMEL123 * 2
                    + BONUS_NON_WORD
                    + BONUS_BOUNDARY,
            ),
        ],
    );
 }
 #[test]
 fn test_v1_fuzzy() {
    assert_matches(
        true,
        false,
        false,
        false,
        &[
            (
                "fooBarbaz1",
                "oBZ",
                2,
                9,
                BONUS_CAMEL123 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION * 3,
            ),
            (
                "foo bar baz",
                "fbb",
                0,
                9,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 2
                    - 2 * PENALTY_GAP_START
                    - 4 * PENALTY_GAP_EXTENSION,
            ),
            (
                "/AutomatorDocument.icns",
                "rdoc",
                9,
                13,
                BONUS_CAMEL123 + BONUS_CONSECUTIVE * 2,
            ),
            (
                "/man1/zshcompctl.1",
                "zshc",
                6,
                10,
                BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_BOUNDARY_DELIMITER * 3,
            ),
            (
                "/.oh-my-zsh/cache",
                "zshc",
                8,
                13,
                BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
                    - PENALTY_GAP_START
                    + BONUS_BOUNDARY_DELIMITER,
            ),
            (
                "ab0123 456",
                "12356",
                3,
                10,
                BONUS_CONSECUTIVE * 3 - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
            ),
            (
                "abc123 456",
                "12356",
                3,
                10,
                BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_CAMEL123 * 2
                    + BONUS_CONSECUTIVE
                    - PENALTY_GAP_START
                    - PENALTY_GAP_EXTENSION,
            ),
            (
                "foo/bar/baz",
                "fbb",
                0,
                9,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_DELIMITER * 2
                    - 2 * PENALTY_GAP_START
                    - 4 * PENALTY_GAP_EXTENSION,
            ),
            (
                "fooBarBaz",
                "fbb",
                0,
                7,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CAMEL123 * 2
                    - 2 * PENALTY_GAP_START
                    - 2 * PENALTY_GAP_EXTENSION,
            ),
            (
                "foo barbaz",
                "fbb",
                0,
                8,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
                    - PENALTY_GAP_START * 2
                    - PENALTY_GAP_EXTENSION * 3,
            ),
            (
                "fooBar Baz",
                "foob",
                0,
                4,
                BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
            ),
            (
                "xFoo-Bar Baz",
                "foo-b",
                1,
                6,
                BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
                    + BONUS_CAMEL123 * 2
                    + BONUS_NON_WORD
                    + BONUS_BOUNDARY,
            ),
        ],
    );
 }
--- a/src/utf32_str.rs
+++ b/src/utf32_str.rs
@ -0,0 +1,123 @@
 use std::ops::{Bound, RangeBounds};
 /// A UTF32 encoded (char array) String that can be used as an input to fuzzy matching.
 ///
 /// Usually rusts utf8 encoded strings are great. However during fuzzy matching
 /// operates on codepoints (it should operate on graphemes but that's too much
 /// hassle to deal with). We want to quickly iterate these codeboints between
 /// (up to 5 times) during matching.
 ///
 /// Doing codepoint segmentation on the fly not only blows trough the cache
 /// (lookuptables and Icache) but also has nontrivial runtime compared to the
 /// matching itself. Furthermore there are a lot of exta optimizations available
 /// for ascii only text (but checking during each match has too much overhead).
 ///
 /// Ofcourse this comes at exta memory cost as we usally still need the ut8
 /// encoded variant for rendenring. In the (dominant) case of ascii-only text
 /// we don't require a copy. Furthermore fuzzy matching usually is applied while
 /// the user is typing on the fly so the same item is potentially matched many
 /// times (making the the upfront cost more worth it). That means that its
 /// basically always worth it to presegment the string.
 ///
 /// For usecases that only match (a lot of) strings once its possible to keep
 /// char buffer around that is filled with the presegmented chars
 ///
 /// Another advantage of this approach is that the matcher will naturally
 /// produce char indecies (instead of utf8 offsets) annyway. With a
 /// codepoint basec representation like this the indecies can be used
 /// directly
 #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, Debug)]
 pub enum Utf32Str<'a> {
    /// A string represented as ASCII encoded bytes.
    /// Correctness invariant: must only contain vaild ASCII (<=127)
    Ascii(&'a [u8]),
    /// A string represented as an array of unicode codepoints (basically UTF-32).
    Unicode(&'a [char]),
 }
 impl<'a> Utf32Str<'a> {
    /// Convenience method to construct a `Utf32Str` from a normal utf8 str
    pub fn new(str: &'a str, buf: &'a mut Vec<char>) -> Self {
        if str.is_ascii() {
            Utf32Str::Ascii(str.as_bytes())
        } else {
            buf.clear();
            buf.extend(str.chars());
            Utf32Str::Unicode(&*buf)
        }
    }
    #[inline]
    pub fn len(&self) -> usize {
        match self {
            Utf32Str::Unicode(codepoints) => codepoints.len(),
            Utf32Str::Ascii(ascii_bytes) => ascii_bytes.len(),
        }
    }
    #[inline]
    pub fn slice(&self, range: impl RangeBounds<usize>) -> Utf32Str {
        let start = match range.start_bound() {
            Bound::Included(&start) => start,
            Bound::Excluded(&start) => start + 1,
            Bound::Unbounded => 0,
        };
        let end = match range.end_bound() {
            Bound::Included(&end) => end,
            Bound::Excluded(&end) => end + 1,
            Bound::Unbounded => self.len(),
        };
        match self {
            Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
            Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
        }
    }
    /// Same as `slice` but accepts a u32 range for convenicene sine
    /// those are the indecies returned by the matcher
    #[inline]
    pub fn slice_u32(&self, range: impl RangeBounds<u32>) -> Utf32Str {
        let start = match range.start_bound() {
            Bound::Included(&start) => start as usize,
            Bound::Excluded(&start) => start as usize + 1,
            Bound::Unbounded => 0,
        };
        let end = match range.end_bound() {
            Bound::Included(&end) => end as usize,
            Bound::Excluded(&end) => end as usize + 1,
            Bound::Unbounded => self.len(),
        };
        match self {
            Utf32Str::Ascii(bytes) => Utf32Str::Ascii(&bytes[start..end]),
            Utf32Str::Unicode(codepoints) => Utf32Str::Unicode(&codepoints[start..end]),
        }
    }
    pub fn is_ascii(&self) -> bool {
        matches!(self, Utf32Str::Ascii(_))
    }
    pub fn get(&self, idx: u32) -> char {
        match self {
            Utf32Str::Ascii(bytes) => bytes[idx as usize] as char,
            Utf32Str::Unicode(codepoints) => codepoints[idx as usize],
        }
    }
    pub fn last(&self) -> char {
        match self {
            Utf32Str::Ascii(bytes) => bytes[bytes.len()] as char,
            Utf32Str::Unicode(codepoints) => codepoints[codepoints.len()],
        }
    }
 }
 // impl Str for &[char] {
 //     type Chars;
 //     fn chars(&self) -> Self::Chars {
 //         todo!()
 //     }
 //     fn slice(&self, range: impl RangeBounds<u32>) {
 //         todo!()
 //     }
 // }