mirror of
https://github.com/solaeus/nucleo.git
synced 2024-12-22 09:57:49 +00:00
move matcher to subcrate
This commit is contained in:
parent
2ce871b70c
commit
8d7a149b30
713
Cargo.lock
generated
713
Cargo.lock
generated
@ -2,6 +2,115 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anes"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
|
||||
|
||||
[[package]]
|
||||
name = "cast"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"ciborium-ll",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-io"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
|
||||
|
||||
[[package]]
|
||||
name = "ciborium-ll"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
|
||||
dependencies = [
|
||||
"ciborium-io",
|
||||
"half",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "4.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d"
|
||||
dependencies = [
|
||||
"clap_builder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_builder"
|
||||
version = "4.3.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap_lex"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
|
||||
|
||||
[[package]]
|
||||
name = "cov-mark"
|
||||
version = "1.1.0"
|
||||
@ -9,15 +118,611 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ffa3d3e0138386cd4361f63537765cac7ee40698028844635a54495a92f67f3"
|
||||
|
||||
[[package]]
|
||||
name = "fzf_oxide"
|
||||
version = "0.1.0"
|
||||
name = "criterion"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
|
||||
dependencies = [
|
||||
"cov-mark",
|
||||
"memchr",
|
||||
"anes",
|
||||
"cast",
|
||||
"ciborium",
|
||||
"clap",
|
||||
"criterion-plot",
|
||||
"is-terminal",
|
||||
"itertools",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"oorandom",
|
||||
"plotters",
|
||||
"rayon",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"tinytemplate",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "criterion-plot"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
|
||||
dependencies = [
|
||||
"cast",
|
||||
"itertools",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-deque"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-epoch",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-epoch"
|
||||
version = "0.9.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
"memoffset",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno-dragonfly"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuzzy-matcher"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
|
||||
dependencies = [
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "1.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
|
||||
|
||||
[[package]]
|
||||
name = "is-terminal"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"rustix",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
|
||||
dependencies = [
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nucleo-matcher"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"cov-mark",
|
||||
"criterion",
|
||||
"fuzzy-matcher",
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
|
||||
[[package]]
|
||||
name = "oorandom"
|
||||
version = "11.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
|
||||
|
||||
[[package]]
|
||||
name = "plotters"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
"plotters-backend",
|
||||
"plotters-svg",
|
||||
"wasm-bindgen",
|
||||
"web-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "plotters-backend"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
|
||||
|
||||
[[package]]
|
||||
name = "plotters-svg"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
|
||||
dependencies = [
|
||||
"plotters-backend",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
"num_cpus",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.175"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.175"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tinytemplate"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"wasm-bindgen-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.87"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
|
||||
|
||||
[[package]]
|
||||
name = "web-sys"
|
||||
version = "0.3.64"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
|
||||
dependencies = [
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu",
|
||||
"winapi-x86_64-pc-windows-gnu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
|
||||
dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
|
||||
|
||||
[[package]]
|
||||
name = "worker"
|
||||
version = "0.1.0"
|
||||
|
16
Cargo.toml
16
Cargo.toml
@ -1,14 +1,2 @@
|
||||
[package]
|
||||
name = "fzf_oxide"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.5.0"
|
||||
cov-mark = { version = "1.1.0", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
cov-mark = { version = "1.1.0", default-features = true }
|
||||
|
||||
[workspace]
|
||||
members = [ "matcher", "worker" ]
|
||||
|
17
matcher/Cargo.toml
Normal file
17
matcher/Cargo.toml
Normal file
@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "nucleo-matcher"
|
||||
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.5.0"
|
||||
cov-mark = { version = "1.1.0", default-features = false }
|
||||
|
||||
[dev-dependencies]
|
||||
cov-mark = { version = "1.1.0", default-features = true }
|
||||
criterion = "0.5.1"
|
||||
fuzzy-matcher = "0.3.7"
|
||||
|
4
matcher/fuzz/.gitignore
vendored
Normal file
4
matcher/fuzz/.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
target
|
||||
corpus
|
||||
artifacts
|
||||
coverage
|
@ -1,3 +1,5 @@
|
||||
use std::mem::transmute;
|
||||
|
||||
const DATA1: [(char, char); 277] = [
|
||||
('\u{00C0}', 'A'), // WITH GRAVE, LATIN CAPITAL LETTER
|
||||
('\u{00C1}', 'A'), // WITH ACUTE, LATIN CAPITAL LETTER
|
||||
@ -471,7 +473,7 @@ const fn generate_table<const LEN: usize>(sparse_data: &[(char, char)]) -> [char
|
||||
let mut i = 0u32;
|
||||
let mut j = 0;
|
||||
while i < table.len() as u32 {
|
||||
let Some(key) = char::from_u32(start + i) else { panic!("invalid char") };
|
||||
let key = unsafe { transmute(start + i) };
|
||||
if sparse_data[j].0 == key {
|
||||
table[i as usize] = DATA1[j].1;
|
||||
j += 1;
|
@ -21,7 +21,7 @@ use std::fmt::{Debug, Formatter, Result};
|
||||
|
||||
impl Debug for ScoreCell {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||
write!(f, "({}, {}, {})", self.score, self.bonus, self.matched)
|
||||
write!(f, "({}, {})", self.score, self.matched)
|
||||
}
|
||||
}
|
||||
|
271
matcher/src/exact.rs
Normal file
271
matcher/src/exact.rs
Normal file
@ -0,0 +1,271 @@
|
||||
use memchr::memmem;
|
||||
use memchr::{Memchr, Memchr2};
|
||||
|
||||
use crate::chars::{AsciiChar, Char};
|
||||
use crate::score::{BONUS_FIRST_CHAR_MULTIPLIER, SCORE_MATCH};
|
||||
use crate::Matcher;
|
||||
|
||||
impl Matcher {
|
||||
pub(crate) fn substring_match_1_ascii<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: &[u8],
|
||||
c: u8,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
if self.config.ignore_case && c >= b'a' && c <= b'z' {
|
||||
for i in Memchr2::new(c, c - 32, haystack) {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let char_class = AsciiChar(c).char_class(&self.config);
|
||||
for i in Memchr::new(c, haystack) {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
if INDICES {
|
||||
indices.push(max_pos);
|
||||
}
|
||||
Some(max_score)
|
||||
}
|
||||
|
||||
pub(crate) fn substring_match_ascii_with_prefilter(
|
||||
&mut self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
prefilter_len: usize,
|
||||
prefilter: impl Iterator<Item = usize>,
|
||||
) -> (u16, usize) {
|
||||
let needle_without_prefilter = &needle[prefilter_len..];
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
for i in prefilter {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score
|
||||
&& haystack[i + prefilter_len..]
|
||||
.iter()
|
||||
.map(|&c| AsciiChar(c).normalize(&self.config).0)
|
||||
.eq(needle_without_prefilter.iter().copied())
|
||||
{
|
||||
max_pos = i;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
(max_score, max_pos)
|
||||
}
|
||||
|
||||
pub(crate) fn substring_match_ascii<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
if self.config.ignore_case {
|
||||
match needle.iter().position(|&c| c >= b'a' && c <= b'z') {
|
||||
// start with char do case insensitive search
|
||||
Some(0) => {
|
||||
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
|
||||
haystack,
|
||||
needle,
|
||||
1,
|
||||
Memchr2::new(
|
||||
needle[0],
|
||||
needle[0] - 32,
|
||||
&haystack[..haystack.len() - needle.len() + 1],
|
||||
),
|
||||
);
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(1) => {
|
||||
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
|
||||
haystack,
|
||||
needle,
|
||||
1,
|
||||
Memchr::new(needle[0], &haystack[..haystack.len() - needle.len() + 1]),
|
||||
);
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(len) => {
|
||||
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
|
||||
haystack,
|
||||
needle,
|
||||
1,
|
||||
memmem::find_iter(&haystack[..haystack.len() - needle.len() + len], needle),
|
||||
);
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
// in case we don't have any letter in the needle
|
||||
// we can treat the search as case sensitive and use memmem dircedly which is way faster
|
||||
None => (),
|
||||
}
|
||||
}
|
||||
|
||||
if max_score == 0 {
|
||||
let char_class = AsciiChar(needle[0]).char_class(&self.config);
|
||||
for i in memmem::find_iter(haystack, needle) {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let score = self.calculate_score::<INDICES, _, _>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
max_pos,
|
||||
max_pos + needle.len(),
|
||||
indices,
|
||||
);
|
||||
Some(score)
|
||||
}
|
||||
|
||||
pub(crate) fn substring_match_1_non_ascii<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: &[char],
|
||||
needle: char,
|
||||
start: usize,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> u16 {
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
let mut prev_class = start
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
for (i, &c) in haystack[start..].iter().enumerate() {
|
||||
let (c, char_class) = c.char_class_and_normalize(&self.config);
|
||||
if c != needle {
|
||||
continue;
|
||||
}
|
||||
let bonus = self.config.bonus_for(prev_class, char_class);
|
||||
prev_class = char_class;
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if INDICES {
|
||||
indices.push(max_pos + start as u32);
|
||||
}
|
||||
max_score
|
||||
}
|
||||
|
||||
pub(crate) fn substring_match_non_ascii<const INDICES: bool, N>(
|
||||
&mut self,
|
||||
haystack: &[char],
|
||||
needle: &[N],
|
||||
start: usize,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16>
|
||||
where
|
||||
N: Char,
|
||||
char: PartialEq<N>,
|
||||
{
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
let mut prev_class = start
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
for (i, &c) in haystack[start..].iter().enumerate() {
|
||||
let (c, char_class) = c.char_class_and_normalize(&self.config);
|
||||
if c != needle[0] {
|
||||
continue;
|
||||
}
|
||||
let bonus = self.config.bonus_for(prev_class, char_class);
|
||||
prev_class = char_class;
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score
|
||||
&& haystack[i + 1..]
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(needle[1..].iter().copied())
|
||||
{
|
||||
max_pos = i;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let score = self.calculate_score::<INDICES, _, _>(
|
||||
haystack,
|
||||
needle,
|
||||
max_pos,
|
||||
max_pos + needle.len(),
|
||||
indices,
|
||||
);
|
||||
Some(score)
|
||||
}
|
||||
}
|
@ -61,20 +61,18 @@ impl Matcher {
|
||||
}
|
||||
}
|
||||
|
||||
fn next_m_score(p_score: i32, m_score: i32, bonus: u16, next_bonus: u16) -> ScoreCell {
|
||||
let consecutive_bonus = max(bonus, max(next_bonus, BONUS_CONSECUTIVE));
|
||||
fn next_m_score(p_score: i32, m_score: i32, bonus: u16) -> ScoreCell {
|
||||
let consecutive_bonus = max(bonus, BONUS_CONSECUTIVE);
|
||||
let score_match = m_score + consecutive_bonus as i32;
|
||||
let score_skip = p_score + next_bonus as i32;
|
||||
if score_match >= score_skip {
|
||||
let score_skip = p_score + bonus as i32;
|
||||
if score_match > score_skip {
|
||||
ScoreCell {
|
||||
score: score_match + SCORE_MATCH as i32,
|
||||
bonus: consecutive_bonus,
|
||||
matched: true,
|
||||
}
|
||||
} else {
|
||||
ScoreCell {
|
||||
score: score_skip + SCORE_MATCH as i32,
|
||||
bonus: next_bonus,
|
||||
matched: false,
|
||||
}
|
||||
}
|
||||
@ -91,7 +89,7 @@ fn p_score(prev_p_score: i32, prev_m_score: i32) -> (i32, bool) {
|
||||
} else {
|
||||
i32::MIN / 2
|
||||
};
|
||||
if score_match >= score_skip {
|
||||
if score_match > score_skip {
|
||||
(score_match, true)
|
||||
} else {
|
||||
(score_skip, false)
|
||||
@ -185,15 +183,10 @@ impl<H: Char> MatcherDataView<'_, H> {
|
||||
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
|
||||
let m_cell = if FIRST_ROW {
|
||||
if c == needle_char {
|
||||
// TODO: do we really want to start with a penalty here??
|
||||
let mut cell =
|
||||
next_m_score(0, i32::MIN / 2, 0, bonus * BONUS_FIRST_CHAR_MULTIPLIER);
|
||||
cell.bonus = *bonus;
|
||||
cell
|
||||
next_m_score(0, i32::MIN / 2, bonus * BONUS_FIRST_CHAR_MULTIPLIER)
|
||||
} else {
|
||||
ScoreCell {
|
||||
score: i32::MIN / 2,
|
||||
bonus: 0,
|
||||
matched: false,
|
||||
}
|
||||
}
|
||||
@ -215,15 +208,10 @@ impl<H: Char> MatcherDataView<'_, H> {
|
||||
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
|
||||
let m_cell = if FIRST_ROW {
|
||||
if c[0] == needle_char {
|
||||
// TODO: do we really want to start with a penalty here??
|
||||
let mut cell =
|
||||
next_m_score(0, i32::MIN / 2, 0, bonus[0] * BONUS_FIRST_CHAR_MULTIPLIER);
|
||||
cell.bonus = bonus[0];
|
||||
cell
|
||||
next_m_score(0, i32::MIN / 2, bonus[0] * BONUS_FIRST_CHAR_MULTIPLIER)
|
||||
} else {
|
||||
ScoreCell {
|
||||
score: i32::MIN / 2,
|
||||
bonus: 0,
|
||||
matched: false,
|
||||
}
|
||||
}
|
||||
@ -231,11 +219,10 @@ impl<H: Char> MatcherDataView<'_, H> {
|
||||
*score_cell
|
||||
};
|
||||
*score_cell = if c[1] == next_needle_char {
|
||||
next_m_score(p_score, m_cell.score, m_cell.bonus, bonus[1])
|
||||
next_m_score(p_score, m_cell.score, bonus[1])
|
||||
} else {
|
||||
ScoreCell {
|
||||
score: i32::MIN / 2,
|
||||
bonus: 0,
|
||||
matched: false,
|
||||
}
|
||||
};
|
||||
@ -284,8 +271,9 @@ impl<H: Char> MatcherDataView<'_, H> {
|
||||
matrix_len: usize,
|
||||
start: u32,
|
||||
) {
|
||||
indices.clear();
|
||||
indices.resize(self.row_offs.len(), 0);
|
||||
let indices_start = indices.len();
|
||||
indices.resize(indices_start + self.row_offs.len(), 0);
|
||||
let indices = &mut indices[indices_start..];
|
||||
let last_row_off = *self.row_offs.last().unwrap();
|
||||
indices[self.row_offs.len() - 1] = start + max_score_end as u32 + last_row_off as u32;
|
||||
|
535
matcher/src/lib.rs
Normal file
535
matcher/src/lib.rs
Normal file
@ -0,0 +1,535 @@
|
||||
/*!
|
||||
`nucleo_matcher` is a low level crate that contains the matcher implementation
|
||||
used by the other nucleo crates.
|
||||
|
||||
The matcher is hightly optimized and can significantly outperform `fzf` and
|
||||
`skim` (the `fuzzy-matcher` crate). However some of these optimizations require
|
||||
a slightly less convenient API. Particularly, `nucleo_matcher` requires that
|
||||
needles and haystacks are provided as [UTF32 strings](crate::Utf32Str) instead
|
||||
of rusts normal utf32 strings.
|
||||
*/
|
||||
|
||||
// sadly ranges don't optmimzie well
|
||||
#![allow(clippy::manual_range_contains)]
|
||||
|
||||
pub mod chars;
|
||||
mod config;
|
||||
#[cfg(test)]
|
||||
mod debug;
|
||||
mod exact;
|
||||
mod fuzzy_greedy;
|
||||
mod fuzzy_optimal;
|
||||
mod matrix;
|
||||
mod prefilter;
|
||||
mod score;
|
||||
mod utf32_str;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use crate::config::MatcherConfig;
|
||||
pub use crate::utf32_str::Utf32Str;
|
||||
|
||||
use crate::chars::{AsciiChar, Char};
|
||||
use crate::matrix::MatrixSlab;
|
||||
|
||||
/// A matcher engine that can execute (fuzzy) matches.
|
||||
///
|
||||
/// A matches contains **heap allocated** scratch memory that is reused during
|
||||
/// matching. This scratch memory allows the matcher to garunte that it will
|
||||
/// **never allocate** during matching (with the exception of pushing to the
|
||||
/// `indices` vector if there isn't enough capacity). However this scratch
|
||||
/// memory is fairly large (around 135KB) so creating a matcher is expensive and
|
||||
/// should be reused.
|
||||
///
|
||||
/// All `.._match` functions will not compute the indices of the matched chars
|
||||
/// and are therefore significantly faster. These should be used to prefitler
|
||||
/// and sort all matches. All `.._indices` functions will compute the indices of
|
||||
/// the computed chars. These should be used when rendering the best N matches.
|
||||
/// Note that the `indices` argument is **never cleared**. This allows running
|
||||
/// multiple different matches on the same haystack and merging the indices by
|
||||
/// sorting and deduplicating the vector.
|
||||
///
|
||||
/// Matching is limited to 2^32-1 codepoints, if the haystack is longer than
|
||||
/// that the matcher *will panic*. The caller must decide whether it wants to
|
||||
/// filter out long haystacks or truncate them.
|
||||
pub struct Matcher {
|
||||
pub config: MatcherConfig,
|
||||
slab: MatrixSlab,
|
||||
}
|
||||
|
||||
impl Default for Matcher {
|
||||
fn default() -> Self {
|
||||
Matcher {
|
||||
config: MatcherConfig::DEFAULT,
|
||||
slab: MatrixSlab::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher {
|
||||
pub fn new(config: MatcherConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
slab: MatrixSlab::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the fuzzy match with the higehest score in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(mn)` time complexity for short inputs. To
|
||||
/// avoid slowdowns it automatically falls back to [greedy matching]
|
||||
/// (crate::Matcher::fuzzy_match_greedy) for large needles and haystacks
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
/// Find the fuzzy match with the higehest score in the `haystack` and
|
||||
/// compute its indices.
|
||||
///
|
||||
/// This functions has `O(mn)` time complexity for short inputs. To
|
||||
/// avoid slowdowns it automatically falls back to [greedy matching]
|
||||
/// (crate::Matcher::fuzzy_match_greedy) for large needles and haystacks
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn fuzzy_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_matcher_impl::<true>(haystack, needle, indices)
|
||||
}
|
||||
|
||||
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
|
||||
}
|
||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
None
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self.substring_match_1_non_ascii::<INDICES>(
|
||||
haystack,
|
||||
needle as char,
|
||||
start,
|
||||
indices,
|
||||
);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self
|
||||
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Greedly find a fuzzy match in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
|
||||
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
|
||||
/// be preferred.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn fuzzy_match_greedy(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
/// Greedly find a fuzzy match in the `haystack` and compute its indices.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
|
||||
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
|
||||
/// be preferred.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn fuzzy_indices_greedy(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_match_greedy_impl::<true>(haystack, needle, indices)
|
||||
}
|
||||
|
||||
fn fuzzy_match_greedy_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
None
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
start + 1,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds the substring match with the highest score in the `haystack`.
|
||||
///
|
||||
/// This functions has `O(nm)` time complexity. However many cases can
|
||||
/// be significantly accelerated using prefilters so it's usually fast
|
||||
/// in practice.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn substring_match(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
) -> Option<u16> {
|
||||
self.substring_match_impl::<false>(haystack, needle_, &mut Vec::new())
|
||||
}
|
||||
|
||||
/// Finds the substring match with the highest score in the `haystack` and
|
||||
/// compute its indices.
|
||||
///
|
||||
/// This functions has `O(nm)` time complexity. However many cases can
|
||||
/// be significantly accelerated using prefilters so it's usually fast
|
||||
/// in practice.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn substring_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
self.substring_match_impl::<true>(haystack, needle_, indices)
|
||||
}
|
||||
|
||||
fn substring_match_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if needle_.len() == haystack.len() {
|
||||
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
|
||||
}
|
||||
self.substring_match_ascii::<INDICES>(haystack, needle, indices)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
None
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self.substring_match_1_non_ascii::<INDICES>(
|
||||
haystack,
|
||||
needle as char,
|
||||
start,
|
||||
indices,
|
||||
);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.substring_match_non_ascii::<INDICES, _>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self
|
||||
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether needle and haystack match exactly.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
self.exact_match_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
/// Checks whether needle and haystack match exactly and compute the matches indices.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn exact_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
self.exact_match_impl::<true>(haystack, needle, indices)
|
||||
}
|
||||
|
||||
/// Checks whether needle is a prefix of the haystack.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(haystack.slice(..needle.len()), needle, &mut Vec::new())
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether needle is a prefix of the haystack and compute the matches indices.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn prefix_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(haystack.slice(..needle.len()), needle, indices)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether needle is a postfix of the haystack.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<false>(
|
||||
haystack.slice(haystack.len() - needle.len()..),
|
||||
needle,
|
||||
&mut Vec::new(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks whether needle is a postfix of the haystack and compute the matches indices.
|
||||
///
|
||||
/// This functions has `O(n)` time complexity.
|
||||
///
|
||||
/// See the [matcher documentation](crate::Matcher) for more details.
|
||||
pub fn postfix_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.exact_match_impl::<true>(
|
||||
haystack.slice(haystack.len() - needle.len()..),
|
||||
needle,
|
||||
indices,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn exact_match_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() != haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
let score = match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let matched = if self.config.ignore_case {
|
||||
AsciiChar::cast(haystack)
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(AsciiChar::cast(needle)
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config)))
|
||||
} else {
|
||||
haystack == needle
|
||||
};
|
||||
if !matched {
|
||||
return None;
|
||||
}
|
||||
self.calculate_score::<INDICES, _, _>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
0,
|
||||
haystack.len(),
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
return None;
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
haystack
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(AsciiChar::cast(needle)
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config)));
|
||||
|
||||
self.calculate_score::<INDICES, _, _>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
0,
|
||||
haystack.len(),
|
||||
indices,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
let matched = haystack
|
||||
.iter()
|
||||
.map(|c| c.normalize(&self.config))
|
||||
.eq(needle.iter().map(|c| c.normalize(&self.config)));
|
||||
if !matched {
|
||||
return None;
|
||||
}
|
||||
self.calculate_score::<INDICES, _, _>(haystack, needle, 0, haystack.len(), indices)
|
||||
}
|
||||
};
|
||||
Some(score)
|
||||
}
|
||||
}
|
@ -88,7 +88,6 @@ impl<C: Char> MatrixLayout<C> {
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct ScoreCell {
|
||||
pub score: i32,
|
||||
pub bonus: u16,
|
||||
pub matched: bool,
|
||||
}
|
||||
|
@ -30,7 +30,8 @@ impl Matcher {
|
||||
only_greedy: bool,
|
||||
) -> Option<(usize, usize, usize)> {
|
||||
if self.config.ignore_case {
|
||||
let start = find_ascii_ignore_case(needle[0], haystack)?;
|
||||
let start =
|
||||
find_ascii_ignore_case(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
|
||||
let mut greedy_end = start + 1;
|
||||
haystack = &haystack[greedy_end..];
|
||||
for &c in &needle[1..] {
|
||||
@ -47,7 +48,7 @@ impl Matcher {
|
||||
Some((start, greedy_end, end))
|
||||
}
|
||||
} else {
|
||||
let start = memchr(needle[0], haystack)?;
|
||||
let start = memchr(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
|
||||
let mut greedy_end = start + 1;
|
||||
haystack = &haystack[greedy_end..];
|
||||
for &c in &needle[1..] {
|
||||
@ -72,7 +73,7 @@ impl Matcher {
|
||||
only_greedy: bool,
|
||||
) -> Option<(usize, usize)> {
|
||||
let needle_char = needle.get(0);
|
||||
let start = haystack
|
||||
let start = haystack[..haystack.len() - needle.len() + 1]
|
||||
.iter()
|
||||
.position(|c| c.normalize(&self.config) == needle_char)?;
|
||||
let needle_char = needle.last();
|
||||
@ -80,15 +81,10 @@ impl Matcher {
|
||||
Some((start, start + 1))
|
||||
} else {
|
||||
let end = haystack.len()
|
||||
- haystack[start..]
|
||||
- haystack[start + 1..]
|
||||
.iter()
|
||||
.rev()
|
||||
.position(|c| c.normalize(&self.config) == needle_char)?;
|
||||
// matches are never possible in this case
|
||||
if end - start < needle.len() {
|
||||
cov_mark::hit!(small_haystack);
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((start, end))
|
||||
}
|
@ -15,11 +15,6 @@ pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
|
||||
// in web2 dictionary and my file system.
|
||||
pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
|
||||
|
||||
// Although bonus point for non-word characters is non-contextual, we need it
|
||||
// for computing bonus points for consecutive chunks starting with a non-word
|
||||
// character.
|
||||
pub(crate) const BONUS_NON_WORD: u16 = SCORE_MATCH / 2;
|
||||
|
||||
// Edge-triggered bonus for matches in camelCase words.
|
||||
// Compared to word-boundary case, they don't accompany single-character gaps
|
||||
// (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
|
||||
@ -28,19 +23,20 @@ pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_EXTENSION;
|
||||
// Minimum bonus point given to characters in consecutive chunks.
|
||||
// Note that bonus points for consecutive matches shouldn't have needed if we
|
||||
// used fixed match score as in the original algorithm.
|
||||
pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
|
||||
pub(crate) const BONUS_CONSECUTIVE: u16 =
|
||||
PENALTY_GAP_START + PENALTY_GAP_EXTENSION + PENALTY_GAP_EXTENSION;
|
||||
|
||||
// The first character in the typed pattern usually has more significance
|
||||
// than the rest so it's important that it appears at special positions where
|
||||
// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
|
||||
// The amount of the extra bonus should be limited so that the gap penalty is
|
||||
// still respected.
|
||||
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 1;
|
||||
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
|
||||
|
||||
impl MatcherConfig {
|
||||
#[inline]
|
||||
pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
|
||||
if class > CharClass::NonWord {
|
||||
if class > CharClass::Delimiter {
|
||||
// transition from non word to word
|
||||
match prev_class {
|
||||
CharClass::Whitespace => return self.bonus_boundary_white,
|
||||
@ -54,8 +50,6 @@ impl MatcherConfig {
|
||||
{
|
||||
// camelCase letter123
|
||||
BONUS_CAMEL123
|
||||
} else if class == CharClass::NonWord {
|
||||
BONUS_NON_WORD
|
||||
} else if class == CharClass::Whitespace {
|
||||
self.bonus_boundary_white
|
||||
} else {
|
||||
@ -78,7 +72,6 @@ impl Matcher {
|
||||
indices: &mut Vec<u32>,
|
||||
) -> u16 {
|
||||
if INDICES {
|
||||
indices.clear();
|
||||
indices.reserve(needle.len());
|
||||
}
|
||||
|
||||
@ -97,8 +90,8 @@ impl Matcher {
|
||||
indices.push(start as u32)
|
||||
}
|
||||
let class = haystack[start].char_class(&self.config);
|
||||
let mut first_bonus = self.bonus_for(prev_class, class);
|
||||
let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||
let mut bonus = self.bonus_for(prev_class, class);
|
||||
let mut score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
|
||||
prev_class = class;
|
||||
needle_char = *needle_iter.next().unwrap_or(&needle_char);
|
||||
|
||||
@ -108,17 +101,9 @@ impl Matcher {
|
||||
if INDICES {
|
||||
indices.push(i as u32 + start as u32 + 1)
|
||||
}
|
||||
let mut bonus = self.bonus_for(prev_class, class);
|
||||
if consecutive == 0 {
|
||||
first_bonus = bonus
|
||||
} else {
|
||||
// Break consecutive chunk
|
||||
if bonus > first_bonus {
|
||||
first_bonus = bonus;
|
||||
bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
|
||||
} else {
|
||||
bonus = max(first_bonus, BONUS_CONSECUTIVE);
|
||||
}
|
||||
bonus = self.bonus_for(prev_class, class);
|
||||
if consecutive != 0 {
|
||||
bonus = max(bonus, BONUS_CONSECUTIVE);
|
||||
}
|
||||
score += SCORE_MATCH + bonus;
|
||||
in_gap = false;
|
||||
@ -135,7 +120,6 @@ impl Matcher {
|
||||
score = score.saturating_sub(penalty);
|
||||
in_gap = true;
|
||||
consecutive = 0;
|
||||
first_bonus = 0;
|
||||
}
|
||||
prev_class = class;
|
||||
}
|
@ -1,8 +1,6 @@
|
||||
use cov_mark::check;
|
||||
|
||||
use crate::chars::Char;
|
||||
use crate::score::{
|
||||
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
|
||||
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER,
|
||||
PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
|
||||
};
|
||||
use crate::utf32_str::Utf32Str;
|
||||
@ -46,13 +44,12 @@ fn assert_matches(
|
||||
score += needle.len() as u16 * SCORE_MATCH;
|
||||
for algo in algorithm {
|
||||
println!("xx {matched_indices:?} {algo:?}");
|
||||
matched_indices.clear();
|
||||
let res = match algo {
|
||||
Algorithm::FuzzyOptimal => {
|
||||
matched_indices.clear();
|
||||
matcher.fuzzy_indices(haystack, needle, &mut matched_indices)
|
||||
}
|
||||
Algorithm::FuzzyGreedy => {
|
||||
matched_indices.clear();
|
||||
matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices)
|
||||
}
|
||||
};
|
||||
@ -115,6 +112,7 @@ pub fn assert_not_matches(
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
|
||||
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
|
||||
|
||||
@ -144,20 +142,19 @@ fn test_fuzzy() {
|
||||
"/AutomatorDocument.icns",
|
||||
"rdoc",
|
||||
&[9, 10, 11, 12],
|
||||
BONUS_CAMEL123 * 3,
|
||||
BONUS_CAMEL123 + 2 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"/man1/zshcompctl.1",
|
||||
"zshc",
|
||||
&[6, 7, 8, 9],
|
||||
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_BOUNDARY_DELIMITER * 3,
|
||||
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3,
|
||||
),
|
||||
(
|
||||
"/.oh-my-zsh/cache",
|
||||
"zshc",
|
||||
&[8, 9, 10, 12],
|
||||
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
|
||||
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2
|
||||
- PENALTY_GAP_START
|
||||
+ BONUS_BOUNDARY_DELIMITER,
|
||||
),
|
||||
@ -171,9 +168,7 @@ fn test_fuzzy() {
|
||||
"abc123 456",
|
||||
"12356",
|
||||
&[3, 4, 5, 8, 9],
|
||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_CAMEL123 * 2
|
||||
+ BONUS_CONSECUTIVE
|
||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3
|
||||
- PENALTY_GAP_START
|
||||
- PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
@ -205,37 +200,42 @@ fn test_fuzzy() {
|
||||
"fooBar Baz",
|
||||
"foob",
|
||||
&[0, 1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_CONSECUTIVE * 2
|
||||
+ BONUS_CAMEL123,
|
||||
),
|
||||
(
|
||||
"xFoo-Bar Baz",
|
||||
"foo-b",
|
||||
&[1, 2, 3, 4, 5],
|
||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_CAMEL123 * 2
|
||||
+ BONUS_NON_WORD
|
||||
+ BONUS_CONSECUTIVE * 3
|
||||
+ BONUS_BOUNDARY,
|
||||
),
|
||||
(
|
||||
"]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
|
||||
"H\0\0VV",
|
||||
&[4, 5, 6, 31, 32],
|
||||
BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
|
||||
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2
|
||||
- PENALTY_GAP_START
|
||||
- 23 * PENALTY_GAP_EXTENSION,
|
||||
- 23 * PENALTY_GAP_EXTENSION
|
||||
+ BONUS_CAMEL123
|
||||
+ BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"\nץ&`@ `---\0\0\0\0",
|
||||
"`@ `--\0\0",
|
||||
&[3, 4, 5, 6, 7, 8, 10, 11],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3) + BONUS_BOUNDARY_WHITE * 4
|
||||
- PENALTY_GAP_START,
|
||||
BONUS_BOUNDARY_WHITE * 2 + 2 * BONUS_CONSECUTIVE - PENALTY_GAP_START
|
||||
+ BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
" 1111111u11111uuu111",
|
||||
"11111uuu1",
|
||||
&[9, 10, 11, 12, 13, 14, 15, 16, 17],
|
||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 8),
|
||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ 7 * BONUS_CONSECUTIVE
|
||||
+ BONUS_CAMEL123,
|
||||
),
|
||||
],
|
||||
);
|
||||
@ -275,14 +275,15 @@ fn test_fuzzy_case_sensitive() {
|
||||
"FooBar Baz",
|
||||
"FooB",
|
||||
&[0, 1, 2, 3],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_CONSECUTIVE * 2
|
||||
+ BONUS_CAMEL123,
|
||||
),
|
||||
// Consecutive bonus updated
|
||||
(
|
||||
"foo-bar",
|
||||
"o-ba",
|
||||
&[2, 3, 4, 5],
|
||||
BONUS_BOUNDARY * 2 + BONUS_NON_WORD,
|
||||
BONUS_BOUNDARY + 2 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
],
|
||||
);
|
||||
@ -300,13 +301,13 @@ fn test_normalize() {
|
||||
"Só Danço Samba",
|
||||
"So",
|
||||
&[0, 1],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"Só Danço Samba",
|
||||
"sodc",
|
||||
&[0, 1, 3, 6],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE
|
||||
- PENALTY_GAP_START
|
||||
+ BONUS_BOUNDARY_WHITE
|
||||
- PENALTY_GAP_START
|
||||
@ -316,19 +317,21 @@ fn test_normalize() {
|
||||
"Danço",
|
||||
"danco",
|
||||
&[0, 1, 2, 3, 4],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"DanÇo",
|
||||
"danco",
|
||||
&[0, 1, 2, 3, 4],
|
||||
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_CAMEL123
|
||||
+ 3 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"xÇando",
|
||||
"cando",
|
||||
&[1, 2, 3, 4, 5],
|
||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
|
||||
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
("ۂ(GCGɴCG", "n", &[5], 0),
|
||||
],
|
||||
@ -347,7 +350,7 @@ fn test_unicode1() {
|
||||
"你好世界",
|
||||
"你好",
|
||||
&[0, 1],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"你好世界",
|
||||
@ -370,11 +373,55 @@ fn test_long_str() {
|
||||
&"x".repeat(u16::MAX as usize + 1),
|
||||
"xx",
|
||||
&[0, 1],
|
||||
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE,
|
||||
)],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_casing() {
|
||||
assert_matches(
|
||||
&[FuzzyGreedy, FuzzyOptimal],
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
&[
|
||||
// score 143 we currently slightly prefer camel
|
||||
(
|
||||
"fooBar",
|
||||
"foobar",
|
||||
&[0, 1, 2, 3, 4, 5],
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE
|
||||
+ BONUS_CAMEL123
|
||||
+ 4 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
// score 141 for perfect match
|
||||
(
|
||||
"foobar",
|
||||
"foobar",
|
||||
&[0, 1, 2, 3, 4, 5],
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + 5 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
// score 141 here too since the boundary bonus and the gap penalty/missed consecutive bonus cancel perfectly
|
||||
(
|
||||
"foo-bar",
|
||||
"foobar",
|
||||
&[0, 1, 2, 4, 5, 6],
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY
|
||||
- PENALTY_GAP_START
|
||||
+ 4 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"foo_bar",
|
||||
"foobar",
|
||||
&[0, 1, 2, 4, 5, 6],
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY
|
||||
- PENALTY_GAP_START
|
||||
+ 4 * BONUS_CONSECUTIVE,
|
||||
),
|
||||
],
|
||||
)
|
||||
}
|
||||
#[test]
|
||||
fn test_optimal() {
|
||||
assert_matches(
|
||||
@ -387,60 +434,38 @@ fn test_optimal() {
|
||||
"axxx xx ",
|
||||
"xx",
|
||||
&[5, 6],
|
||||
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
|
||||
),
|
||||
(
|
||||
"I\0I",
|
||||
"\0",
|
||||
&[1],
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_NON_WORD,
|
||||
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"SS!H",
|
||||
"S!",
|
||||
&[0, 2],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_NON_WORD
|
||||
- PENALTY_GAP_START,
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
|
||||
),
|
||||
(
|
||||
"^^^\u{7f}\0\0E%\u{1a}^",
|
||||
"^^\0E",
|
||||
&[1, 2, 5, 6],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
|
||||
- PENALTY_GAP_START
|
||||
- PENALTY_GAP_EXTENSION,
|
||||
BONUS_CONSECUTIVE + BONUS_BOUNDARY - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
(
|
||||
"Hٷ!!-!!!\n--\u{4}\u{c}-\u{8}-!\u{c}",
|
||||
"-!--!",
|
||||
&[4, 5, 13, 15, 16],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4)
|
||||
- 2 * PENALTY_GAP_START
|
||||
- 6 * PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
(
|
||||
"C8Gۂ(GECGS",
|
||||
"8GCG",
|
||||
&[1, 2, 7, 8],
|
||||
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
|
||||
"8gx(gecg)",
|
||||
"8gcg",
|
||||
&[0, 4, 6, 7],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
- PENALTY_GAP_START
|
||||
- 2 * PENALTY_GAP_EXTENSION
|
||||
+ BONUS_BOUNDARY
|
||||
- PENALTY_GAP_START
|
||||
- 3 * PENALTY_GAP_EXTENSION
|
||||
+ BONUS_CONSECUTIVE,
|
||||
),
|
||||
(
|
||||
"\nץ&`@ `;;;\0\0\0\0",
|
||||
"`@ `;;\0\0",
|
||||
&[3, 4, 5, 6, 7, 9, 10, 11],
|
||||
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
|
||||
+ BONUS_BOUNDARY_DELIMITER * 3
|
||||
+ BONUS_BOUNDARY_WHITE * 3
|
||||
- PENALTY_GAP_START,
|
||||
),
|
||||
(
|
||||
"dddddd\0\0\0ddddfdddddd",
|
||||
"dddddfddddd",
|
||||
&[0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 10
|
||||
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
|
||||
+ BONUS_BOUNDARY
|
||||
+ 9 * BONUS_CONSECUTIVE
|
||||
- PENALTY_GAP_START
|
||||
- 7 * PENALTY_GAP_EXTENSION,
|
||||
),
|
||||
@ -476,9 +501,11 @@ fn test_reject() {
|
||||
false,
|
||||
&[
|
||||
("你好界", "abc"),
|
||||
("你好界", "a"),
|
||||
("你好世界", "富"),
|
||||
("Só Danço Samba", "sox"),
|
||||
("fooBarbaz", "fooBarbazz"),
|
||||
("fooBarbaz", "c"),
|
||||
],
|
||||
);
|
||||
assert_not_matches(
|
||||
@ -488,6 +515,8 @@ fn test_reject() {
|
||||
&[
|
||||
("你好界", "abc"),
|
||||
("abc", "你"),
|
||||
("abc", "A"),
|
||||
("abc", "d"),
|
||||
("你好世界", "富"),
|
||||
("Só Danço Samba", "sox"),
|
||||
("fooBarbaz", "oBZ"),
|
||||
@ -499,8 +528,11 @@ fn test_reject() {
|
||||
false,
|
||||
true,
|
||||
false,
|
||||
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
|
||||
&[
|
||||
("Só Danço Samba", "sod"),
|
||||
("Só Danço Samba", "soc"),
|
||||
("Só Danç", "So"),
|
||||
],
|
||||
);
|
||||
check!(small_haystack);
|
||||
assert_not_matches(false, false, false, &[("ۂۂfoۂۂ", "foo")]);
|
||||
}
|
@ -1,9 +1,9 @@
|
||||
use std::ops::{Bound, RangeBounds};
|
||||
use std::{fmt, slice};
|
||||
|
||||
/// A UTF32 encoded (char array) String that can be used as an input to fuzzy matching.
|
||||
/// A UTF32 encoded (char array) string that is used as an input to (fuzzy) matching.
|
||||
///
|
||||
/// Usually rusts utf8 encoded strings are great. However during fuzzy matching
|
||||
/// Usually rusts' utf8 encoded strings are great. However during fuzzy matching
|
||||
/// operates on codepoints (it should operate on graphemes but that's too much
|
||||
/// hassle to deal with). We want to quickly iterate these codeboints between
|
||||
/// (up to 5 times) during matching.
|
108
src/exact.rs
108
src/exact.rs
@ -1,108 +0,0 @@
|
||||
use memchr::{Memchr, Memchr2};
|
||||
|
||||
use crate::chars::{AsciiChar, Char};
|
||||
use crate::score::{BONUS_FIRST_CHAR_MULTIPLIER, SCORE_MATCH};
|
||||
use crate::Matcher;
|
||||
|
||||
impl Matcher {
|
||||
pub(crate) fn substring_match_1_ascii<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: &[u8],
|
||||
c: u8,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
if self.config.ignore_case && c >= b'a' && c <= b'z' {
|
||||
for i in Memchr2::new(c, c - 32, haystack) {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white
|
||||
&& score >= self.config.bonus_boundary_delimiter
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let char_class = AsciiChar(c).char_class(&self.config);
|
||||
for i in Memchr::new(c, haystack) {
|
||||
let prev_char_class = i
|
||||
.checked_sub(1)
|
||||
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
let bonus = self.config.bonus_for(prev_char_class, char_class);
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white
|
||||
&& score >= self.config.bonus_boundary_delimiter
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if max_score == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
if INDICES {
|
||||
indices.clear();
|
||||
indices.push(max_pos);
|
||||
}
|
||||
Some(max_score)
|
||||
}
|
||||
|
||||
pub(crate) fn substring_match_1_non_ascii<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: &[char],
|
||||
needle: char,
|
||||
start: usize,
|
||||
indices: &mut Vec<u32>,
|
||||
) -> u16 {
|
||||
let mut max_score = 0;
|
||||
let mut max_pos = 0;
|
||||
let mut prev_class = start
|
||||
.checked_sub(1)
|
||||
.map(|i| haystack[i].char_class(&self.config))
|
||||
.unwrap_or(self.config.initial_char_class);
|
||||
for (i, &c) in haystack[start..].iter().enumerate() {
|
||||
let (c, char_class) = c.char_class_and_normalize(&self.config);
|
||||
if c != needle {
|
||||
println!("ups {c} {needle}");
|
||||
continue;
|
||||
}
|
||||
let bonus = self.config.bonus_for(prev_class, char_class);
|
||||
prev_class = char_class;
|
||||
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
|
||||
if score > max_score {
|
||||
max_pos = i as u32;
|
||||
max_score = score;
|
||||
// can't get better than this
|
||||
if score >= self.config.bonus_boundary_white
|
||||
&& score >= self.config.bonus_boundary_delimiter
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if INDICES {
|
||||
indices.clear();
|
||||
indices.push(max_pos + start as u32);
|
||||
}
|
||||
max_score
|
||||
}
|
||||
}
|
202
src/lib.rs
202
src/lib.rs
@ -1,202 +0,0 @@
|
||||
// sadly ranges don't optmimzie well
|
||||
#![allow(clippy::manual_range_contains)]
|
||||
|
||||
pub mod chars;
|
||||
mod config;
|
||||
#[cfg(test)]
|
||||
mod debug;
|
||||
mod exact;
|
||||
mod fuzzy_greedy;
|
||||
mod fuzzy_optimal;
|
||||
mod matrix;
|
||||
mod prefilter;
|
||||
mod score;
|
||||
mod utf32_str;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
pub use crate::config::MatcherConfig;
|
||||
pub use crate::utf32_str::Utf32Str;
|
||||
|
||||
use crate::chars::AsciiChar;
|
||||
use crate::matrix::MatrixSlab;
|
||||
|
||||
pub struct Matcher {
|
||||
pub config: MatcherConfig,
|
||||
slab: MatrixSlab,
|
||||
}
|
||||
|
||||
impl Matcher {
|
||||
pub fn new(config: MatcherConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
slab: MatrixSlab::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
pub fn fuzzy_indices(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indidies: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_matcher_impl::<true>(haystack, needle, indidies)
|
||||
}
|
||||
|
||||
fn fuzzy_matcher_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indidies: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// if needle_.len() == haystack.len() {
|
||||
// return self.exact_match();
|
||||
// }
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indidies);
|
||||
}
|
||||
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
end,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
None
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self.substring_match_1_non_ascii::<INDICES>(
|
||||
haystack,
|
||||
needle as char,
|
||||
start,
|
||||
indidies,
|
||||
);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
if let &[needle] = needle {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
let res = self
|
||||
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indidies);
|
||||
return Some(res);
|
||||
}
|
||||
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
|
||||
self.fuzzy_match_optimal::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
end,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn fuzzy_match_greedy(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
|
||||
}
|
||||
|
||||
pub fn fuzzy_indices_greedy(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle: Utf32Str<'_>,
|
||||
indidies: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
assert!(haystack.len() <= u32::MAX as usize);
|
||||
self.fuzzy_match_greedy_impl::<true>(haystack, needle, indidies)
|
||||
}
|
||||
|
||||
fn fuzzy_match_greedy_impl<const INDICES: bool>(
|
||||
&mut self,
|
||||
haystack: Utf32Str<'_>,
|
||||
needle_: Utf32Str<'_>,
|
||||
indidies: &mut Vec<u32>,
|
||||
) -> Option<u16> {
|
||||
if needle_.len() > haystack.len() || needle_.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// if needle_.len() == haystack.len() {
|
||||
// return self.exact_match();
|
||||
// }
|
||||
assert!(
|
||||
haystack.len() <= u32::MAX as usize,
|
||||
"fuzzy matching is only support for up to 2^32-1 codepoints"
|
||||
);
|
||||
match (haystack, needle_) {
|
||||
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
|
||||
AsciiChar::cast(haystack),
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
greedy_end,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
|
||||
// a purely ascii haystack can never be transformed to match
|
||||
// a needle that contains non-ascii chars since we don't allow gaps
|
||||
None
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
|
||||
haystack,
|
||||
AsciiChar::cast(needle),
|
||||
start,
|
||||
start + 1,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
|
||||
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
|
||||
self.fuzzy_match_greedy_::<INDICES, char, char>(
|
||||
haystack,
|
||||
needle,
|
||||
start,
|
||||
start + 1,
|
||||
indidies,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,3 +1 @@
|
||||
exclude = ["src/tests.rs", "src/debug.rs", "src/chars/normalize.rs"]
|
||||
[report]
|
||||
out = ["Html", "Xml"]
|
||||
exclude = ["matcher/src/tests.rs", "matcher/src/debug.rs", "matcher/src/chars/normalize.rs"]
|
||||
|
@ -1,3 +1,3 @@
|
||||
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
|
||||
[files]
|
||||
extend-exclude = ["src/tests.rs", "*.html"]
|
||||
extend-exclude = ["matcher/src/tests.rs", "*.html"]
|
||||
|
Loading…
Reference in New Issue
Block a user