From 272f2bc97191ef4f3bb510d1d03def0fffa233b7 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Fri, 11 Aug 2023 18:31:14 +0200 Subject: [PATCH] publish temporary release to crates.io --- Cargo.toml | 1 + README.md | 8 +++++++- matcher/Cargo.toml | 6 ++++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index af4e3b6..4a9732a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ version = "0.1.0" edition = "2021" license = "MPL-2.0" repository = "https://github.com/helix-editor/nucleo" +readme = "README.md" [lib] diff --git a/README.md b/README.md index 50fc660..1a044c2 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,14 @@ # Nucleo +> Disclaimer: An 0.1 version has been published to crates.io. +> This allows us to merge the `nulceo` integration into helix. +> However, the public API is not yet final and will likely +> change quite a bit in the next release. The documentation +> is also not yet complete + `nucleo` is a highly performant fuzzy matcher written in rust. It aims to fill the same use case as `fzf` and `skim`. Compared to `fzf` `nucleo` has a significantly faster matching algorithm. This mainly makes a difference when matching patterns with low selectivity on many items. An (unscientific) comparison is shown in the benchmark section below. -Nucleo uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf. +`nucleo` uses the exact **same scoring system as fzf**. That means you should get the same ranking quality (or better) as you are used to from fzf. However, `nucleo` has a more faithful implementation of the Smith-Waterman algorithm which is normally used in DNA sequence alignment (see https://www.cs.cmu.edu/~ckingsf/bioinfo-lectures/gaps.pdf) with two separate matrices (instead of one like fzf). This means that `nucleo` finds the optimal match more often. For example if you match `foo` in `xf foo` `nucleo` will match `x__foo` but `fzf` will match `xf_oo` (you can increase the word length the result will stay the same). The former is the more intuitive match and has a higher score according to the ranking system that both `nucleo` and fzf. **Compared to `skim`** (and the `fuzzy-matcher` crate) `nucleo` has an even larger performance advantage and is often around **six times faster** (see benchmarks below). Furthermore, the bonus system used by nucleo and fzf is (in my opinion) more consistent/superior. `nulceo` also handles non-ascii text much better. (`skim`s bonus system and even case insensitivity only work for ASCII). diff --git a/matcher/Cargo.toml b/matcher/Cargo.toml index bdcfb8e..eee20da 100644 --- a/matcher/Cargo.toml +++ b/matcher/Cargo.toml @@ -1,10 +1,12 @@ [package] name = "nucleo-matcher" +description = "plug and play high performance fuzzy matcher" authors = ["Pascal Kuthe "] version = "0.1.0" edition = "2021" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +license = "MPL-2.0" +repository = "https://github.com/helix-editor/nucleo" +readme = "../README.md" [dependencies] memchr = "2.5.0"