2023-07-20 00:09:51 +00:00
|
|
|
use std::alloc::{alloc_zeroed, dealloc, handle_alloc_error, Layout};
|
|
|
|
use std::marker::PhantomData;
|
2023-07-24 01:32:11 +00:00
|
|
|
use std::mem::size_of;
|
2023-07-27 20:08:06 +00:00
|
|
|
use std::panic::{RefUnwindSafe, UnwindSafe};
|
2023-07-20 00:09:51 +00:00
|
|
|
use std::ptr::{slice_from_raw_parts_mut, NonNull};
|
|
|
|
|
|
|
|
use crate::chars::Char;
|
|
|
|
|
2023-07-24 01:32:11 +00:00
|
|
|
const MAX_MATRIX_SIZE: usize = 100 * 1024; // 100*1024 = 100KB
|
2023-07-20 00:09:51 +00:00
|
|
|
|
|
|
|
// these two aren't hard maxima, instead we simply allow whatever will fit into memory
|
|
|
|
const MAX_HAYSTACK_LEN: usize = 2048; // 64KB
|
|
|
|
const MAX_NEEDLE_LEN: usize = 2048; // 64KB
|
|
|
|
|
|
|
|
struct MatrixLayout<C: Char> {
|
|
|
|
haystack_len: usize,
|
|
|
|
needle_len: usize,
|
|
|
|
layout: Layout,
|
|
|
|
haystack_off: usize,
|
|
|
|
bonus_off: usize,
|
|
|
|
rows_off: usize,
|
2023-07-24 01:32:11 +00:00
|
|
|
score_off: usize,
|
|
|
|
matrix_off: usize,
|
2023-07-20 00:09:51 +00:00
|
|
|
_phantom: PhantomData<C>,
|
|
|
|
}
|
|
|
|
impl<C: Char> MatrixLayout<C> {
|
2023-07-24 01:32:11 +00:00
|
|
|
fn new(haystack_len: usize, needle_len: usize) -> MatrixLayout<C> {
|
2023-07-30 02:52:44 +00:00
|
|
|
assert!(haystack_len >= needle_len);
|
|
|
|
assert!(haystack_len <= u32::MAX as usize);
|
2023-07-20 00:09:51 +00:00
|
|
|
let mut layout = Layout::from_size_align(0, 1).unwrap();
|
|
|
|
let haystack_layout = Layout::array::<C>(haystack_len).unwrap();
|
|
|
|
let bonus_layout = Layout::array::<u16>(haystack_len).unwrap();
|
|
|
|
let rows_layout = Layout::array::<u16>(needle_len).unwrap();
|
2023-07-24 01:32:11 +00:00
|
|
|
let score_layout = Layout::array::<ScoreCell>(haystack_len + 1 - needle_len).unwrap();
|
|
|
|
let matrix_layout =
|
|
|
|
Layout::array::<MatrixCell>((haystack_len + 1 - needle_len) * needle_len).unwrap();
|
2023-07-20 00:09:51 +00:00
|
|
|
|
|
|
|
let haystack_off;
|
|
|
|
(layout, haystack_off) = layout.extend(haystack_layout).unwrap();
|
|
|
|
let bonus_off;
|
|
|
|
(layout, bonus_off) = layout.extend(bonus_layout).unwrap();
|
|
|
|
let rows_off;
|
|
|
|
(layout, rows_off) = layout.extend(rows_layout).unwrap();
|
2023-07-24 01:32:11 +00:00
|
|
|
let score_off;
|
|
|
|
(layout, score_off) = layout.extend(score_layout).unwrap();
|
|
|
|
let matrix_off;
|
|
|
|
(layout, matrix_off) = layout.extend(matrix_layout).unwrap();
|
2023-07-20 00:09:51 +00:00
|
|
|
MatrixLayout {
|
|
|
|
haystack_len,
|
|
|
|
needle_len,
|
|
|
|
layout,
|
|
|
|
haystack_off,
|
|
|
|
bonus_off,
|
|
|
|
rows_off,
|
2023-07-24 01:32:11 +00:00
|
|
|
score_off,
|
|
|
|
matrix_off,
|
2023-07-20 00:09:51 +00:00
|
|
|
_phantom: PhantomData,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/// # Safety
|
|
|
|
///
|
|
|
|
/// `ptr` must point at an allocated with MARTIX_ALLOC_LAYOUT
|
|
|
|
unsafe fn fieds_from_ptr(
|
|
|
|
&self,
|
|
|
|
ptr: NonNull<u8>,
|
2023-07-24 01:32:11 +00:00
|
|
|
) -> (
|
|
|
|
*mut [C],
|
|
|
|
*mut [u16],
|
|
|
|
*mut [u16],
|
|
|
|
*mut [ScoreCell],
|
|
|
|
*mut [MatrixCell],
|
|
|
|
) {
|
2023-07-20 00:09:51 +00:00
|
|
|
let base = ptr.as_ptr();
|
|
|
|
let haystack = base.add(self.haystack_off) as *mut C;
|
|
|
|
let haystack = slice_from_raw_parts_mut(haystack, self.haystack_len);
|
|
|
|
let bonus = base.add(self.bonus_off) as *mut u16;
|
|
|
|
let bonus = slice_from_raw_parts_mut(bonus, self.haystack_len);
|
|
|
|
let rows = base.add(self.rows_off) as *mut u16;
|
|
|
|
let rows = slice_from_raw_parts_mut(rows, self.needle_len);
|
2023-07-24 01:32:11 +00:00
|
|
|
let cells = base.add(self.score_off) as *mut ScoreCell;
|
|
|
|
let cells = slice_from_raw_parts_mut(cells, self.haystack_len + 1 - self.needle_len);
|
|
|
|
let matrix = base.add(self.matrix_off) as *mut MatrixCell;
|
|
|
|
let matrix = slice_from_raw_parts_mut(
|
|
|
|
matrix,
|
|
|
|
(self.haystack_len + 1 - self.needle_len) * self.haystack_len,
|
|
|
|
);
|
|
|
|
(haystack, bonus, rows, cells, matrix)
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-20 22:16:15 +00:00
|
|
|
#[derive(Clone, Copy)]
|
2023-07-24 01:32:11 +00:00
|
|
|
pub(crate) struct ScoreCell {
|
|
|
|
pub score: i32,
|
|
|
|
pub matched: bool,
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
|
2023-07-24 01:32:11 +00:00
|
|
|
pub(crate) struct MatcherDataView<'a, C: Char> {
|
2023-07-20 00:09:51 +00:00
|
|
|
pub haystack: &'a mut [C],
|
2023-07-20 14:03:31 +00:00
|
|
|
// stored as a separate array instead of struct
|
2023-07-20 00:09:51 +00:00
|
|
|
// to avoid padding sine char is too large and u8 too small :/
|
|
|
|
pub bonus: &'a mut [u16],
|
2023-07-24 01:32:11 +00:00
|
|
|
pub current_row: &'a mut [ScoreCell],
|
2023-07-20 00:09:51 +00:00
|
|
|
pub row_offs: &'a mut [u16],
|
2023-07-24 01:32:11 +00:00
|
|
|
pub matrix_cells: &'a mut [MatrixCell],
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
2023-07-24 01:32:11 +00:00
|
|
|
#[repr(transparent)]
|
|
|
|
pub struct MatrixCell(pub(crate) u8);
|
2023-07-20 00:09:51 +00:00
|
|
|
|
2023-07-24 01:32:11 +00:00
|
|
|
impl MatrixCell {
|
|
|
|
pub fn set(&mut self, p_match: bool, m_match: bool) {
|
|
|
|
self.0 = p_match as u8 | ((m_match as u8) << 1);
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
|
2023-07-24 01:32:11 +00:00
|
|
|
pub fn get(&self, m_matrix: bool) -> bool {
|
|
|
|
let mask = m_matrix as u8 + 1;
|
|
|
|
(self.0 & mask) != 0
|
|
|
|
}
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// we only use this to construct the layout for the slab allocation
|
|
|
|
#[allow(unused)]
|
2023-07-24 01:32:11 +00:00
|
|
|
struct MatcherData {
|
2023-07-20 00:09:51 +00:00
|
|
|
haystack: [char; MAX_HAYSTACK_LEN],
|
|
|
|
bonus: [u16; MAX_HAYSTACK_LEN],
|
|
|
|
row_offs: [u16; MAX_NEEDLE_LEN],
|
2023-07-24 01:32:11 +00:00
|
|
|
scratch_space: [ScoreCell; MAX_HAYSTACK_LEN],
|
|
|
|
matrix: [u8; MAX_MATRIX_SIZE],
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) struct MatrixSlab(NonNull<u8>);
|
2023-07-27 20:08:06 +00:00
|
|
|
unsafe impl Sync for MatrixSlab {}
|
|
|
|
unsafe impl Send for MatrixSlab {}
|
|
|
|
impl UnwindSafe for MatrixSlab {}
|
|
|
|
impl RefUnwindSafe for MatrixSlab {}
|
2023-07-20 00:09:51 +00:00
|
|
|
|
|
|
|
impl MatrixSlab {
|
|
|
|
pub fn new() -> Self {
|
2023-07-24 01:32:11 +00:00
|
|
|
let layout = Layout::new::<MatcherData>();
|
2023-07-20 00:09:51 +00:00
|
|
|
// safety: the matrix is never zero sized (hardcoded constants)
|
|
|
|
let ptr = unsafe { alloc_zeroed(layout) };
|
|
|
|
let Some(ptr) = NonNull::new(ptr) else{
|
|
|
|
handle_alloc_error(layout)
|
|
|
|
};
|
|
|
|
MatrixSlab(ptr.cast())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn alloc<C: Char>(
|
|
|
|
&mut self,
|
|
|
|
haystack_: &[C],
|
|
|
|
needle_len: usize,
|
2023-07-24 01:32:11 +00:00
|
|
|
) -> Option<MatcherDataView<'_, C>> {
|
2023-07-20 00:09:51 +00:00
|
|
|
let cells = haystack_.len() * needle_len;
|
|
|
|
if cells > MAX_MATRIX_SIZE || haystack_.len() > u16::MAX as usize {
|
|
|
|
return None;
|
|
|
|
}
|
2023-07-24 01:32:11 +00:00
|
|
|
let matrix_layout = MatrixLayout::<C>::new(haystack_.len(), needle_len);
|
|
|
|
if matrix_layout.layout.size() > size_of::<MatcherData>() {
|
2023-07-20 00:09:51 +00:00
|
|
|
return None;
|
|
|
|
}
|
|
|
|
unsafe {
|
2023-07-20 14:03:31 +00:00
|
|
|
// safely: this allocation is valid for MATRIX_ALLOC_LAYOUT
|
2023-07-24 01:32:11 +00:00
|
|
|
let (haystack, bonus, rows, current_row, matrix_cells) =
|
|
|
|
matrix_layout.fieds_from_ptr(self.0);
|
2023-07-20 14:03:31 +00:00
|
|
|
// copy haystack before creating references to ensure we donu't crate
|
|
|
|
// references to invalid chars (which may or may not be UB)
|
2023-07-20 00:09:51 +00:00
|
|
|
haystack_
|
|
|
|
.as_ptr()
|
|
|
|
.copy_to_nonoverlapping(haystack as *mut _, haystack_.len());
|
2023-07-24 01:32:11 +00:00
|
|
|
Some(MatcherDataView {
|
2023-07-20 00:09:51 +00:00
|
|
|
haystack: &mut *haystack,
|
|
|
|
row_offs: &mut *rows,
|
|
|
|
bonus: &mut *bonus,
|
2023-07-24 01:32:11 +00:00
|
|
|
current_row: &mut *current_row,
|
|
|
|
matrix_cells: &mut *matrix_cells,
|
2023-07-20 00:09:51 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Drop for MatrixSlab {
|
|
|
|
fn drop(&mut self) {
|
2023-07-24 01:32:11 +00:00
|
|
|
unsafe { dealloc(self.0.as_ptr(), Layout::new::<MatcherData>()) };
|
2023-07-20 00:09:51 +00:00
|
|
|
}
|
|
|
|
}
|