move matcher to subcrate

This commit is contained in:
Pascal Kuthe 2023-07-26 15:32:04 +02:00
parent 2ce871b70c
commit 8d7a149b30
No known key found for this signature in database
GPG Key ID: D715E8655AE166A6
27 changed files with 1670 additions and 461 deletions

713
Cargo.lock generated
View File

@ -2,6 +2,115 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
dependencies = [
"memchr",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstyle"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "bitflags"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
[[package]]
name = "bumpalo"
version = "3.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "ciborium"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656"
[[package]]
name = "ciborium-ll"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
version = "4.3.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1"
dependencies = [
"anstyle",
"clap_lex",
]
[[package]]
name = "clap_lex"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
[[package]]
name = "cov-mark"
version = "1.1.0"
@ -9,15 +118,611 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ffa3d3e0138386cd4361f63537765cac7ee40698028844635a54495a92f67f3"
[[package]]
name = "fzf_oxide"
version = "0.1.0"
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"cov-mark",
"memchr",
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"is-terminal",
"itertools",
"num-traits",
"once_cell",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
dependencies = [
"cfg-if",
]
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "errno"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
dependencies = [
"errno-dragonfly",
"libc",
"windows-sys",
]
[[package]]
name = "errno-dragonfly"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "fuzzy-matcher"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94"
dependencies = [
"thread_local",
]
[[package]]
name = "half"
version = "1.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7"
[[package]]
name = "hermit-abi"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
[[package]]
name = "is-terminal"
version = "0.4.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys",
]
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
[[package]]
name = "js-sys"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "linux-raw-sys"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
[[package]]
name = "log"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
[[package]]
name = "memchr"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]]
name = "memoffset"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
dependencies = [
"autocfg",
]
[[package]]
name = "nucleo-matcher"
version = "0.1.0"
dependencies = [
"cov-mark",
"criterion",
"fuzzy-matcher",
"memchr",
]
[[package]]
name = "num-traits"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "once_cell"
version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
[[package]]
name = "oorandom"
version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "plotters"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609"
[[package]]
name = "plotters-svg"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab"
dependencies = [
"plotters-backend",
]
[[package]]
name = "proc-macro2"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rayon"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b8f95bd6966f5c87776639160a66bd8ab9895d9d4ab01ddba9fc60661aebe8d"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"num_cpus",
]
[[package]]
name = "regex"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39354c10dd07468c2e73926b23bb9c2caca74c5501e38a35da70406f1d923310"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
[[package]]
name = "rustix"
version = "0.38.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
dependencies = [
"bitflags",
"errno",
"libc",
"linux-raw-sys",
"windows-sys",
]
[[package]]
name = "ryu"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d25439cd7397d044e2748a6fe2432b5e85db703d6d097bd014b3c0ad1ebff0b"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.175"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b23f7ade6f110613c0d63858ddb8b94c1041f550eab58a16b371bdf2c9c80ab4"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "syn"
version = "2.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thread_local"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "unicode-ident"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "walkdir"
version = "2.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "wasm-bindgen"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd"
dependencies = [
"bumpalo",
"log",
"once_cell",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.87"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
[[package]]
name = "web-sys"
version = "0.3.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b85cbef8c220a6abc02aefd892dfc0fc23afb1c6a426316ec33253a3877249b"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
[[package]]
name = "windows_i686_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
[[package]]
name = "windows_i686_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "worker"
version = "0.1.0"

View File

@ -1,14 +1,2 @@
[package]
name = "fzf_oxide"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
memchr = "2.5.0"
cov-mark = { version = "1.1.0", default-features = false }
[dev-dependencies]
cov-mark = { version = "1.1.0", default-features = true }
[workspace]
members = [ "matcher", "worker" ]

17
matcher/Cargo.toml Normal file
View File

@ -0,0 +1,17 @@
[package]
name = "nucleo-matcher"
authors = ["Pascal Kuthe <pascal.kuthe@semimod.de>"]
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
memchr = "2.5.0"
cov-mark = { version = "1.1.0", default-features = false }
[dev-dependencies]
cov-mark = { version = "1.1.0", default-features = true }
criterion = "0.5.1"
fuzzy-matcher = "0.3.7"

4
matcher/fuzz/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
target
corpus
artifacts
coverage

View File

@ -1,3 +1,5 @@
use std::mem::transmute;
const DATA1: [(char, char); 277] = [
('\u{00C0}', 'A'), // WITH GRAVE, LATIN CAPITAL LETTER
('\u{00C1}', 'A'), // WITH ACUTE, LATIN CAPITAL LETTER
@ -471,7 +473,7 @@ const fn generate_table<const LEN: usize>(sparse_data: &[(char, char)]) -> [char
let mut i = 0u32;
let mut j = 0;
while i < table.len() as u32 {
let Some(key) = char::from_u32(start + i) else { panic!("invalid char") };
let key = unsafe { transmute(start + i) };
if sparse_data[j].0 == key {
table[i as usize] = DATA1[j].1;
j += 1;

View File

@ -21,7 +21,7 @@ use std::fmt::{Debug, Formatter, Result};
impl Debug for ScoreCell {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
write!(f, "({}, {}, {})", self.score, self.bonus, self.matched)
write!(f, "({}, {})", self.score, self.matched)
}
}

271
matcher/src/exact.rs Normal file
View File

@ -0,0 +1,271 @@
use memchr::memmem;
use memchr::{Memchr, Memchr2};
use crate::chars::{AsciiChar, Char};
use crate::score::{BONUS_FIRST_CHAR_MULTIPLIER, SCORE_MATCH};
use crate::Matcher;
impl Matcher {
pub(crate) fn substring_match_1_ascii<const INDICES: bool>(
&mut self,
haystack: &[u8],
c: u8,
indices: &mut Vec<u32>,
) -> Option<u16> {
let mut max_score = 0;
let mut max_pos = 0;
if self.config.ignore_case && c >= b'a' && c <= b'z' {
for i in Memchr2::new(c, c - 32, haystack) {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
} else {
let char_class = AsciiChar(c).char_class(&self.config);
for i in Memchr::new(c, haystack) {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
}
if max_score == 0 {
return None;
}
if INDICES {
indices.push(max_pos);
}
Some(max_score)
}
pub(crate) fn substring_match_ascii_with_prefilter(
&mut self,
haystack: &[u8],
needle: &[u8],
prefilter_len: usize,
prefilter: impl Iterator<Item = usize>,
) -> (u16, usize) {
let needle_without_prefilter = &needle[prefilter_len..];
let mut max_score = 0;
let mut max_pos = 0;
for i in prefilter {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score
&& haystack[i + prefilter_len..]
.iter()
.map(|&c| AsciiChar(c).normalize(&self.config).0)
.eq(needle_without_prefilter.iter().copied())
{
max_pos = i;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
(max_score, max_pos)
}
pub(crate) fn substring_match_ascii<const INDICES: bool>(
&mut self,
haystack: &[u8],
needle: &[u8],
indices: &mut Vec<u32>,
) -> Option<u16> {
let mut max_score = 0;
let mut max_pos = 0;
if self.config.ignore_case {
match needle.iter().position(|&c| c >= b'a' && c <= b'z') {
// start with char do case insensitive search
Some(0) => {
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
haystack,
needle,
1,
Memchr2::new(
needle[0],
needle[0] - 32,
&haystack[..haystack.len() - needle.len() + 1],
),
);
if max_score == 0 {
return None;
}
}
Some(1) => {
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
haystack,
needle,
1,
Memchr::new(needle[0], &haystack[..haystack.len() - needle.len() + 1]),
);
if max_score == 0 {
return None;
}
}
Some(len) => {
(max_score, max_pos) = self.substring_match_ascii_with_prefilter(
haystack,
needle,
1,
memmem::find_iter(&haystack[..haystack.len() - needle.len() + len], needle),
);
if max_score == 0 {
return None;
}
}
// in case we don't have any letter in the needle
// we can treat the search as case sensitive and use memmem dircedly which is way faster
None => (),
}
}
if max_score == 0 {
let char_class = AsciiChar(needle[0]).char_class(&self.config);
for i in memmem::find_iter(haystack, needle) {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
if max_score == 0 {
return None;
}
}
let score = self.calculate_score::<INDICES, _, _>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
max_pos,
max_pos + needle.len(),
indices,
);
Some(score)
}
pub(crate) fn substring_match_1_non_ascii<const INDICES: bool>(
&mut self,
haystack: &[char],
needle: char,
start: usize,
indices: &mut Vec<u32>,
) -> u16 {
let mut max_score = 0;
let mut max_pos = 0;
let mut prev_class = start
.checked_sub(1)
.map(|i| haystack[i].char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
for (i, &c) in haystack[start..].iter().enumerate() {
let (c, char_class) = c.char_class_and_normalize(&self.config);
if c != needle {
continue;
}
let bonus = self.config.bonus_for(prev_class, char_class);
prev_class = char_class;
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
if INDICES {
indices.push(max_pos + start as u32);
}
max_score
}
pub(crate) fn substring_match_non_ascii<const INDICES: bool, N>(
&mut self,
haystack: &[char],
needle: &[N],
start: usize,
indices: &mut Vec<u32>,
) -> Option<u16>
where
N: Char,
char: PartialEq<N>,
{
let mut max_score = 0;
let mut max_pos = 0;
let mut prev_class = start
.checked_sub(1)
.map(|i| haystack[i].char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
for (i, &c) in haystack[start..].iter().enumerate() {
let (c, char_class) = c.char_class_and_normalize(&self.config);
if c != needle[0] {
continue;
}
let bonus = self.config.bonus_for(prev_class, char_class);
prev_class = char_class;
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score
&& haystack[i + 1..]
.iter()
.map(|c| c.normalize(&self.config))
.eq(needle[1..].iter().copied())
{
max_pos = i;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white {
break;
}
}
}
let score = self.calculate_score::<INDICES, _, _>(
haystack,
needle,
max_pos,
max_pos + needle.len(),
indices,
);
Some(score)
}
}

View File

@ -61,20 +61,18 @@ impl Matcher {
}
}
fn next_m_score(p_score: i32, m_score: i32, bonus: u16, next_bonus: u16) -> ScoreCell {
let consecutive_bonus = max(bonus, max(next_bonus, BONUS_CONSECUTIVE));
fn next_m_score(p_score: i32, m_score: i32, bonus: u16) -> ScoreCell {
let consecutive_bonus = max(bonus, BONUS_CONSECUTIVE);
let score_match = m_score + consecutive_bonus as i32;
let score_skip = p_score + next_bonus as i32;
if score_match >= score_skip {
let score_skip = p_score + bonus as i32;
if score_match > score_skip {
ScoreCell {
score: score_match + SCORE_MATCH as i32,
bonus: consecutive_bonus,
matched: true,
}
} else {
ScoreCell {
score: score_skip + SCORE_MATCH as i32,
bonus: next_bonus,
matched: false,
}
}
@ -91,7 +89,7 @@ fn p_score(prev_p_score: i32, prev_m_score: i32) -> (i32, bool) {
} else {
i32::MIN / 2
};
if score_match >= score_skip {
if score_match > score_skip {
(score_match, true)
} else {
(score_skip, false)
@ -185,15 +183,10 @@ impl<H: Char> MatcherDataView<'_, H> {
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
let m_cell = if FIRST_ROW {
if c == needle_char {
// TODO: do we really want to start with a penalty here??
let mut cell =
next_m_score(0, i32::MIN / 2, 0, bonus * BONUS_FIRST_CHAR_MULTIPLIER);
cell.bonus = *bonus;
cell
next_m_score(0, i32::MIN / 2, bonus * BONUS_FIRST_CHAR_MULTIPLIER)
} else {
ScoreCell {
score: i32::MIN / 2,
bonus: 0,
matched: false,
}
}
@ -215,15 +208,10 @@ impl<H: Char> MatcherDataView<'_, H> {
let (p_score, p_matched) = p_score(prev_p_score, prev_m_score);
let m_cell = if FIRST_ROW {
if c[0] == needle_char {
// TODO: do we really want to start with a penalty here??
let mut cell =
next_m_score(0, i32::MIN / 2, 0, bonus[0] * BONUS_FIRST_CHAR_MULTIPLIER);
cell.bonus = bonus[0];
cell
next_m_score(0, i32::MIN / 2, bonus[0] * BONUS_FIRST_CHAR_MULTIPLIER)
} else {
ScoreCell {
score: i32::MIN / 2,
bonus: 0,
matched: false,
}
}
@ -231,11 +219,10 @@ impl<H: Char> MatcherDataView<'_, H> {
*score_cell
};
*score_cell = if c[1] == next_needle_char {
next_m_score(p_score, m_cell.score, m_cell.bonus, bonus[1])
next_m_score(p_score, m_cell.score, bonus[1])
} else {
ScoreCell {
score: i32::MIN / 2,
bonus: 0,
matched: false,
}
};
@ -284,8 +271,9 @@ impl<H: Char> MatcherDataView<'_, H> {
matrix_len: usize,
start: u32,
) {
indices.clear();
indices.resize(self.row_offs.len(), 0);
let indices_start = indices.len();
indices.resize(indices_start + self.row_offs.len(), 0);
let indices = &mut indices[indices_start..];
let last_row_off = *self.row_offs.last().unwrap();
indices[self.row_offs.len() - 1] = start + max_score_end as u32 + last_row_off as u32;

535
matcher/src/lib.rs Normal file
View File

@ -0,0 +1,535 @@
/*!
`nucleo_matcher` is a low level crate that contains the matcher implementation
used by the other nucleo crates.
The matcher is hightly optimized and can significantly outperform `fzf` and
`skim` (the `fuzzy-matcher` crate). However some of these optimizations require
a slightly less convenient API. Particularly, `nucleo_matcher` requires that
needles and haystacks are provided as [UTF32 strings](crate::Utf32Str) instead
of rusts normal utf32 strings.
*/
// sadly ranges don't optmimzie well
#![allow(clippy::manual_range_contains)]
pub mod chars;
mod config;
#[cfg(test)]
mod debug;
mod exact;
mod fuzzy_greedy;
mod fuzzy_optimal;
mod matrix;
mod prefilter;
mod score;
mod utf32_str;
#[cfg(test)]
mod tests;
pub use crate::config::MatcherConfig;
pub use crate::utf32_str::Utf32Str;
use crate::chars::{AsciiChar, Char};
use crate::matrix::MatrixSlab;
/// A matcher engine that can execute (fuzzy) matches.
///
/// A matches contains **heap allocated** scratch memory that is reused during
/// matching. This scratch memory allows the matcher to garunte that it will
/// **never allocate** during matching (with the exception of pushing to the
/// `indices` vector if there isn't enough capacity). However this scratch
/// memory is fairly large (around 135KB) so creating a matcher is expensive and
/// should be reused.
///
/// All `.._match` functions will not compute the indices of the matched chars
/// and are therefore significantly faster. These should be used to prefitler
/// and sort all matches. All `.._indices` functions will compute the indices of
/// the computed chars. These should be used when rendering the best N matches.
/// Note that the `indices` argument is **never cleared**. This allows running
/// multiple different matches on the same haystack and merging the indices by
/// sorting and deduplicating the vector.
///
/// Matching is limited to 2^32-1 codepoints, if the haystack is longer than
/// that the matcher *will panic*. The caller must decide whether it wants to
/// filter out long haystacks or truncate them.
pub struct Matcher {
pub config: MatcherConfig,
slab: MatrixSlab,
}
impl Default for Matcher {
fn default() -> Self {
Matcher {
config: MatcherConfig::DEFAULT,
slab: MatrixSlab::new(),
}
}
}
impl Matcher {
pub fn new(config: MatcherConfig) -> Self {
Self {
config,
slab: MatrixSlab::new(),
}
}
/// Find the fuzzy match with the higehest score in the `haystack`.
///
/// This functions has `O(mn)` time complexity for short inputs. To
/// avoid slowdowns it automatically falls back to [greedy matching]
/// (crate::Matcher::fuzzy_match_greedy) for large needles and haystacks
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
}
/// Find the fuzzy match with the higehest score in the `haystack` and
/// compute its indices.
///
/// This functions has `O(mn)` time complexity for short inputs. To
/// avoid slowdowns it automatically falls back to [greedy matching]
/// (crate::Matcher::fuzzy_match_greedy) for large needles and haystacks
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn fuzzy_indices(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_matcher_impl::<true>(haystack, needle, indices)
}
fn fuzzy_matcher_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() > haystack.len() || needle_.is_empty() {
return None;
}
if needle_.len() == haystack.len() {
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
}
assert!(
haystack.len() <= u32::MAX as usize,
"fuzzy matching is only support for up to 2^32-1 codepoints"
);
match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
}
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
start,
greedy_end,
end,
indices,
)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
None
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self.substring_match_1_non_ascii::<INDICES>(
haystack,
needle as char,
start,
indices,
);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
haystack,
AsciiChar::cast(needle),
start,
start + 1,
end,
indices,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
end,
indices,
)
}
}
}
/// Greedly find a fuzzy match in the `haystack`.
///
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
/// be preferred.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn fuzzy_match_greedy(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
}
/// Greedly find a fuzzy match in the `haystack` and compute its indices.
///
/// This functions has `O(n)` time complexity but may provide unintutive (non-optimal)
/// indices and scores. Usually [fuzz_indices](crate::Matcher::fuzzy_indices) should
/// be preferred.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn fuzzy_indices_greedy(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_match_greedy_impl::<true>(haystack, needle, indices)
}
fn fuzzy_match_greedy_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() > haystack.len() || needle_.is_empty() {
return None;
}
if needle_.len() == haystack.len() {
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
}
assert!(
haystack.len() <= u32::MAX as usize,
"matching is only support for up to 2^32-1 codepoints"
);
match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
start,
greedy_end,
indices,
)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
None
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
haystack,
AsciiChar::cast(needle),
start,
start + 1,
indices,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
self.fuzzy_match_greedy_::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
indices,
)
}
}
}
/// Finds the substring match with the highest score in the `haystack`.
///
/// This functions has `O(nm)` time complexity. However many cases can
/// be significantly accelerated using prefilters so it's usually fast
/// in practice.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn substring_match(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
) -> Option<u16> {
self.substring_match_impl::<false>(haystack, needle_, &mut Vec::new())
}
/// Finds the substring match with the highest score in the `haystack` and
/// compute its indices.
///
/// This functions has `O(nm)` time complexity. However many cases can
/// be significantly accelerated using prefilters so it's usually fast
/// in practice.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn substring_indices(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
self.substring_match_impl::<true>(haystack, needle_, indices)
}
fn substring_match_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() > haystack.len() || needle_.is_empty() {
return None;
}
if needle_.len() == haystack.len() {
return self.exact_match_impl::<INDICES>(haystack, needle_, indices);
}
assert!(
haystack.len() <= u32::MAX as usize,
"matching is only support for up to 2^32-1 codepoints"
);
match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indices);
}
self.substring_match_ascii::<INDICES>(haystack, needle, indices)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
None
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self.substring_match_1_non_ascii::<INDICES>(
haystack,
needle as char,
start,
indices,
);
return Some(res);
}
let (start, _) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.substring_match_non_ascii::<INDICES, _>(
haystack,
AsciiChar::cast(needle),
start,
indices,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indices);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
end,
indices,
)
}
}
}
/// Checks whether needle and haystack match exactly.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn exact_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
self.exact_match_impl::<false>(haystack, needle, &mut Vec::new())
}
/// Checks whether needle and haystack match exactly and compute the matches indices.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn exact_indices(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
self.exact_match_impl::<true>(haystack, needle, indices)
}
/// Checks whether needle is a prefix of the haystack.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn prefix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
if haystack.len() < needle.len() {
None
} else {
self.exact_match_impl::<false>(haystack.slice(..needle.len()), needle, &mut Vec::new())
}
}
/// Checks whether needle is a prefix of the haystack and compute the matches indices.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn prefix_indices(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if haystack.len() < needle.len() {
None
} else {
self.exact_match_impl::<true>(haystack.slice(..needle.len()), needle, indices)
}
}
/// Checks whether needle is a postfix of the haystack.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn postfix_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
if haystack.len() < needle.len() {
None
} else {
self.exact_match_impl::<false>(
haystack.slice(haystack.len() - needle.len()..),
needle,
&mut Vec::new(),
)
}
}
/// Checks whether needle is a postfix of the haystack and compute the matches indices.
///
/// This functions has `O(n)` time complexity.
///
/// See the [matcher documentation](crate::Matcher) for more details.
pub fn postfix_indices(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if haystack.len() < needle.len() {
None
} else {
self.exact_match_impl::<true>(
haystack.slice(haystack.len() - needle.len()..),
needle,
indices,
)
}
}
fn exact_match_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indices: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() != haystack.len() || needle_.is_empty() {
return None;
}
assert!(
haystack.len() <= u32::MAX as usize,
"matching is only support for up to 2^32-1 codepoints"
);
let score = match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
let matched = if self.config.ignore_case {
AsciiChar::cast(haystack)
.iter()
.map(|c| c.normalize(&self.config))
.eq(AsciiChar::cast(needle)
.iter()
.map(|c| c.normalize(&self.config)))
} else {
haystack == needle
};
if !matched {
return None;
}
self.calculate_score::<INDICES, _, _>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
0,
haystack.len(),
indices,
)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
return None;
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
haystack
.iter()
.map(|c| c.normalize(&self.config))
.eq(AsciiChar::cast(needle)
.iter()
.map(|c| c.normalize(&self.config)));
self.calculate_score::<INDICES, _, _>(
haystack,
AsciiChar::cast(needle),
0,
haystack.len(),
indices,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
let matched = haystack
.iter()
.map(|c| c.normalize(&self.config))
.eq(needle.iter().map(|c| c.normalize(&self.config)));
if !matched {
return None;
}
self.calculate_score::<INDICES, _, _>(haystack, needle, 0, haystack.len(), indices)
}
};
Some(score)
}
}

View File

@ -88,7 +88,6 @@ impl<C: Char> MatrixLayout<C> {
#[derive(Clone, Copy)]
pub(crate) struct ScoreCell {
pub score: i32,
pub bonus: u16,
pub matched: bool,
}

View File

@ -30,7 +30,8 @@ impl Matcher {
only_greedy: bool,
) -> Option<(usize, usize, usize)> {
if self.config.ignore_case {
let start = find_ascii_ignore_case(needle[0], haystack)?;
let start =
find_ascii_ignore_case(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
let mut greedy_end = start + 1;
haystack = &haystack[greedy_end..];
for &c in &needle[1..] {
@ -47,7 +48,7 @@ impl Matcher {
Some((start, greedy_end, end))
}
} else {
let start = memchr(needle[0], haystack)?;
let start = memchr(needle[0], &haystack[..haystack.len() - needle.len() + 1])?;
let mut greedy_end = start + 1;
haystack = &haystack[greedy_end..];
for &c in &needle[1..] {
@ -72,7 +73,7 @@ impl Matcher {
only_greedy: bool,
) -> Option<(usize, usize)> {
let needle_char = needle.get(0);
let start = haystack
let start = haystack[..haystack.len() - needle.len() + 1]
.iter()
.position(|c| c.normalize(&self.config) == needle_char)?;
let needle_char = needle.last();
@ -80,15 +81,10 @@ impl Matcher {
Some((start, start + 1))
} else {
let end = haystack.len()
- haystack[start..]
- haystack[start + 1..]
.iter()
.rev()
.position(|c| c.normalize(&self.config) == needle_char)?;
// matches are never possible in this case
if end - start < needle.len() {
cov_mark::hit!(small_haystack);
return None;
}
Some((start, end))
}

View File

@ -15,11 +15,6 @@ pub(crate) const PENALTY_GAP_EXTENSION: u16 = 1;
// in web2 dictionary and my file system.
pub(crate) const BONUS_BOUNDARY: u16 = SCORE_MATCH / 2;
// Although bonus point for non-word characters is non-contextual, we need it
// for computing bonus points for consecutive chunks starting with a non-word
// character.
pub(crate) const BONUS_NON_WORD: u16 = SCORE_MATCH / 2;
// Edge-triggered bonus for matches in camelCase words.
// Compared to word-boundary case, they don't accompany single-character gaps
// (e.g. FooBar vs. foo-bar), so we deduct bonus point accordingly.
@ -28,19 +23,20 @@ pub(crate) const BONUS_CAMEL123: u16 = BONUS_BOUNDARY - PENALTY_GAP_EXTENSION;
// Minimum bonus point given to characters in consecutive chunks.
// Note that bonus points for consecutive matches shouldn't have needed if we
// used fixed match score as in the original algorithm.
pub(crate) const BONUS_CONSECUTIVE: u16 = PENALTY_GAP_START + PENALTY_GAP_EXTENSION;
pub(crate) const BONUS_CONSECUTIVE: u16 =
PENALTY_GAP_START + PENALTY_GAP_EXTENSION + PENALTY_GAP_EXTENSION;
// The first character in the typed pattern usually has more significance
// than the rest so it's important that it appears at special positions where
// bonus points are given, e.g. "to-go" vs. "ongoing" on "og" or on "ogo".
// The amount of the extra bonus should be limited so that the gap penalty is
// still respected.
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 1;
pub(crate) const BONUS_FIRST_CHAR_MULTIPLIER: u16 = 2;
impl MatcherConfig {
#[inline]
pub(crate) fn bonus_for(&self, prev_class: CharClass, class: CharClass) -> u16 {
if class > CharClass::NonWord {
if class > CharClass::Delimiter {
// transition from non word to word
match prev_class {
CharClass::Whitespace => return self.bonus_boundary_white,
@ -54,8 +50,6 @@ impl MatcherConfig {
{
// camelCase letter123
BONUS_CAMEL123
} else if class == CharClass::NonWord {
BONUS_NON_WORD
} else if class == CharClass::Whitespace {
self.bonus_boundary_white
} else {
@ -78,7 +72,6 @@ impl Matcher {
indices: &mut Vec<u32>,
) -> u16 {
if INDICES {
indices.clear();
indices.reserve(needle.len());
}
@ -97,8 +90,8 @@ impl Matcher {
indices.push(start as u32)
}
let class = haystack[start].char_class(&self.config);
let mut first_bonus = self.bonus_for(prev_class, class);
let mut score = SCORE_MATCH + first_bonus * BONUS_FIRST_CHAR_MULTIPLIER;
let mut bonus = self.bonus_for(prev_class, class);
let mut score = SCORE_MATCH + bonus * BONUS_FIRST_CHAR_MULTIPLIER;
prev_class = class;
needle_char = *needle_iter.next().unwrap_or(&needle_char);
@ -108,17 +101,9 @@ impl Matcher {
if INDICES {
indices.push(i as u32 + start as u32 + 1)
}
let mut bonus = self.bonus_for(prev_class, class);
if consecutive == 0 {
first_bonus = bonus
} else {
// Break consecutive chunk
if bonus > first_bonus {
first_bonus = bonus;
bonus = max(max(bonus, first_bonus), BONUS_CONSECUTIVE);
} else {
bonus = max(first_bonus, BONUS_CONSECUTIVE);
}
bonus = self.bonus_for(prev_class, class);
if consecutive != 0 {
bonus = max(bonus, BONUS_CONSECUTIVE);
}
score += SCORE_MATCH + bonus;
in_gap = false;
@ -135,7 +120,6 @@ impl Matcher {
score = score.saturating_sub(penalty);
in_gap = true;
consecutive = 0;
first_bonus = 0;
}
prev_class = class;
}

View File

@ -1,8 +1,6 @@
use cov_mark::check;
use crate::chars::Char;
use crate::score::{
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER, BONUS_NON_WORD,
BONUS_BOUNDARY, BONUS_CAMEL123, BONUS_CONSECUTIVE, BONUS_FIRST_CHAR_MULTIPLIER,
PENALTY_GAP_EXTENSION, PENALTY_GAP_START, SCORE_MATCH,
};
use crate::utf32_str::Utf32Str;
@ -46,13 +44,12 @@ fn assert_matches(
score += needle.len() as u16 * SCORE_MATCH;
for algo in algorithm {
println!("xx {matched_indices:?} {algo:?}");
matched_indices.clear();
let res = match algo {
Algorithm::FuzzyOptimal => {
matched_indices.clear();
matcher.fuzzy_indices(haystack, needle, &mut matched_indices)
}
Algorithm::FuzzyGreedy => {
matched_indices.clear();
matcher.fuzzy_indices_greedy(haystack, needle, &mut matched_indices)
}
};
@ -115,6 +112,7 @@ pub fn assert_not_matches(
)
}
}
const BONUS_BOUNDARY_WHITE: u16 = MatcherConfig::DEFAULT.bonus_boundary_white;
const BONUS_BOUNDARY_DELIMITER: u16 = MatcherConfig::DEFAULT.bonus_boundary_delimiter;
@ -144,20 +142,19 @@ fn test_fuzzy() {
"/AutomatorDocument.icns",
"rdoc",
&[9, 10, 11, 12],
BONUS_CAMEL123 * 3,
BONUS_CAMEL123 + 2 * BONUS_CONSECUTIVE,
),
(
"/man1/zshcompctl.1",
"zshc",
&[6, 7, 8, 9],
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_BOUNDARY_DELIMITER * 3,
BONUS_BOUNDARY_DELIMITER * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3,
),
(
"/.oh-my-zsh/cache",
"zshc",
&[8, 9, 10, 12],
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 2
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2
- PENALTY_GAP_START
+ BONUS_BOUNDARY_DELIMITER,
),
@ -171,9 +168,7 @@ fn test_fuzzy() {
"abc123 456",
"12356",
&[3, 4, 5, 8, 9],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2
+ BONUS_CONSECUTIVE
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 3
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
),
@ -205,37 +200,42 @@ fn test_fuzzy() {
"fooBar Baz",
"foob",
&[0, 1, 2, 3],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CONSECUTIVE * 2
+ BONUS_CAMEL123,
),
(
"xFoo-Bar Baz",
"foo-b",
&[1, 2, 3, 4, 5],
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123 * 2
+ BONUS_NON_WORD
+ BONUS_CONSECUTIVE * 3
+ BONUS_BOUNDARY,
),
(
"]\0\0\0H\0\0\0rrrrrrrrrrrrrrrrrrrrrrrVVVVVVVV\0",
"H\0\0VV",
&[4, 5, 6, 31, 32],
BONUS_BOUNDARY * (BONUS_FIRST_CHAR_MULTIPLIER + 2) + 2 * BONUS_CAMEL123
BONUS_BOUNDARY * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE * 2
- PENALTY_GAP_START
- 23 * PENALTY_GAP_EXTENSION,
- 23 * PENALTY_GAP_EXTENSION
+ BONUS_CAMEL123
+ BONUS_CONSECUTIVE,
),
(
"\nץ&`@ `---\0\0\0\0",
"`@ `--\0\0",
&[3, 4, 5, 6, 7, 8, 10, 11],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3) + BONUS_BOUNDARY_WHITE * 4
- PENALTY_GAP_START,
BONUS_BOUNDARY_WHITE * 2 + 2 * BONUS_CONSECUTIVE - PENALTY_GAP_START
+ BONUS_CONSECUTIVE,
),
(
" 1111111u11111uuu111",
"11111uuu1",
&[9, 10, 11, 12, 13, 14, 15, 16, 17],
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 8),
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER
+ 7 * BONUS_CONSECUTIVE
+ BONUS_CAMEL123,
),
],
);
@ -275,14 +275,15 @@ fn test_fuzzy_case_sensitive() {
"FooBar Baz",
"FooB",
&[0, 1, 2, 3],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE * 3,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CONSECUTIVE * 2
+ BONUS_CAMEL123,
),
// Consecutive bonus updated
(
"foo-bar",
"o-ba",
&[2, 3, 4, 5],
BONUS_BOUNDARY * 2 + BONUS_NON_WORD,
BONUS_BOUNDARY + 2 * BONUS_CONSECUTIVE,
),
],
);
@ -300,13 +301,13 @@ fn test_normalize() {
"Só Danço Samba",
"So",
&[0, 1],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE,
),
(
"Só Danço Samba",
"sodc",
&[0, 1, 3, 6],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE
- PENALTY_GAP_START
+ BONUS_BOUNDARY_WHITE
- PENALTY_GAP_START
@ -316,19 +317,21 @@ fn test_normalize() {
"Danço",
"danco",
&[0, 1, 2, 3, 4],
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE,
),
(
"DanÇo",
"danco",
&[0, 1, 2, 3, 4],
BONUS_BOUNDARY_WHITE * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_CAMEL123
+ 3 * BONUS_CONSECUTIVE,
),
(
"xÇando",
"cando",
&[1, 2, 3, 4, 5],
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 4),
BONUS_CAMEL123 * BONUS_FIRST_CHAR_MULTIPLIER + 4 * BONUS_CONSECUTIVE,
),
("ۂ(GCGɴCG", "n", &[5], 0),
],
@ -347,7 +350,7 @@ fn test_unicode1() {
"你好世界",
"你好",
&[0, 1],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY_WHITE,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_CONSECUTIVE,
),
(
"你好世界",
@ -370,11 +373,55 @@ fn test_long_str() {
&"x".repeat(u16::MAX as usize + 1),
"xx",
&[0, 1],
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE,
)],
);
}
#[test]
fn test_casing() {
assert_matches(
&[FuzzyGreedy, FuzzyOptimal],
false,
false,
false,
&[
// score 143 we currently slightly prefer camel
(
"fooBar",
"foobar",
&[0, 1, 2, 3, 4, 5],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE
+ BONUS_CAMEL123
+ 4 * BONUS_CONSECUTIVE,
),
// score 141 for perfect match
(
"foobar",
"foobar",
&[0, 1, 2, 3, 4, 5],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + 5 * BONUS_CONSECUTIVE,
),
// score 141 here too since the boundary bonus and the gap penalty/missed consecutive bonus cancel perfectly
(
"foo-bar",
"foobar",
&[0, 1, 2, 4, 5, 6],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY
- PENALTY_GAP_START
+ 4 * BONUS_CONSECUTIVE,
),
(
"foo_bar",
"foobar",
&[0, 1, 2, 4, 5, 6],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_BOUNDARY
- PENALTY_GAP_START
+ 4 * BONUS_CONSECUTIVE,
),
],
)
}
#[test]
fn test_optimal() {
assert_matches(
@ -387,60 +434,38 @@ fn test_optimal() {
"axxx xx ",
"xx",
&[5, 6],
(BONUS_FIRST_CHAR_MULTIPLIER + 1) * BONUS_BOUNDARY_WHITE,
),
(
"I\0I",
"\0",
&[1],
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_NON_WORD,
BONUS_FIRST_CHAR_MULTIPLIER * BONUS_BOUNDARY_WHITE + BONUS_CONSECUTIVE,
),
(
"SS!H",
"S!",
&[0, 2],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_NON_WORD
- PENALTY_GAP_START,
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER - PENALTY_GAP_START,
),
(
"^^^\u{7f}\0\0E%\u{1a}^",
"^^\0E",
&[1, 2, 5, 6],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 3)
- PENALTY_GAP_START
- PENALTY_GAP_EXTENSION,
BONUS_CONSECUTIVE + BONUS_BOUNDARY - PENALTY_GAP_START - PENALTY_GAP_EXTENSION,
),
(
"Hٷ!!-!!!\n--\u{4}\u{c}-\u{8}-!\u{c}",
"-!--!",
&[4, 5, 13, 15, 16],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 4)
- 2 * PENALTY_GAP_START
- 6 * PENALTY_GAP_EXTENSION,
),
(
"C8Gۂ(GECGS",
"8GCG",
&[1, 2, 7, 8],
BONUS_CAMEL123 * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
"8gx(gecg)",
"8gcg",
&[0, 4, 6, 7],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
- PENALTY_GAP_START
- 2 * PENALTY_GAP_EXTENSION
+ BONUS_BOUNDARY
- PENALTY_GAP_START
- 3 * PENALTY_GAP_EXTENSION
+ BONUS_CONSECUTIVE,
),
(
"\nץ&`@ `;;;\0\0\0\0",
"`@ `;;\0\0",
&[3, 4, 5, 6, 7, 9, 10, 11],
BONUS_NON_WORD * (BONUS_FIRST_CHAR_MULTIPLIER + 1)
+ BONUS_BOUNDARY_DELIMITER * 3
+ BONUS_BOUNDARY_WHITE * 3
- PENALTY_GAP_START,
),
(
"dddddd\0\0\0ddddfdddddd",
"dddddfddddd",
&[0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER + BONUS_BOUNDARY * 10
BONUS_BOUNDARY_WHITE * BONUS_FIRST_CHAR_MULTIPLIER
+ BONUS_BOUNDARY
+ 9 * BONUS_CONSECUTIVE
- PENALTY_GAP_START
- 7 * PENALTY_GAP_EXTENSION,
),
@ -476,9 +501,11 @@ fn test_reject() {
false,
&[
("你好界", "abc"),
("你好界", "a"),
("你好世界", ""),
("Só Danço Samba", "sox"),
("fooBarbaz", "fooBarbazz"),
("fooBarbaz", "c"),
],
);
assert_not_matches(
@ -488,6 +515,8 @@ fn test_reject() {
&[
("你好界", "abc"),
("abc", ""),
("abc", "A"),
("abc", "d"),
("你好世界", ""),
("Só Danço Samba", "sox"),
("fooBarbaz", "oBZ"),
@ -499,8 +528,11 @@ fn test_reject() {
false,
true,
false,
&[("Só Danço Samba", "sod"), ("Só Danço Samba", "soc")],
&[
("Só Danço Samba", "sod"),
("Só Danço Samba", "soc"),
("Só Danç", "So"),
],
);
check!(small_haystack);
assert_not_matches(false, false, false, &[("ۂۂfoۂۂ", "foo")]);
}

View File

@ -1,9 +1,9 @@
use std::ops::{Bound, RangeBounds};
use std::{fmt, slice};
/// A UTF32 encoded (char array) String that can be used as an input to fuzzy matching.
/// A UTF32 encoded (char array) string that is used as an input to (fuzzy) matching.
///
/// Usually rusts utf8 encoded strings are great. However during fuzzy matching
/// Usually rusts' utf8 encoded strings are great. However during fuzzy matching
/// operates on codepoints (it should operate on graphemes but that's too much
/// hassle to deal with). We want to quickly iterate these codeboints between
/// (up to 5 times) during matching.

View File

@ -1,108 +0,0 @@
use memchr::{Memchr, Memchr2};
use crate::chars::{AsciiChar, Char};
use crate::score::{BONUS_FIRST_CHAR_MULTIPLIER, SCORE_MATCH};
use crate::Matcher;
impl Matcher {
pub(crate) fn substring_match_1_ascii<const INDICES: bool>(
&mut self,
haystack: &[u8],
c: u8,
indices: &mut Vec<u32>,
) -> Option<u16> {
let mut max_score = 0;
let mut max_pos = 0;
if self.config.ignore_case && c >= b'a' && c <= b'z' {
for i in Memchr2::new(c, c - 32, haystack) {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let char_class = AsciiChar(haystack[i]).char_class(&self.config);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white
&& score >= self.config.bonus_boundary_delimiter
{
break;
}
}
}
} else {
let char_class = AsciiChar(c).char_class(&self.config);
for i in Memchr::new(c, haystack) {
let prev_char_class = i
.checked_sub(1)
.map(|i| AsciiChar(haystack[i]).char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
let bonus = self.config.bonus_for(prev_char_class, char_class);
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white
&& score >= self.config.bonus_boundary_delimiter
{
break;
}
}
}
}
if max_score == 0 {
return None;
}
if INDICES {
indices.clear();
indices.push(max_pos);
}
Some(max_score)
}
pub(crate) fn substring_match_1_non_ascii<const INDICES: bool>(
&mut self,
haystack: &[char],
needle: char,
start: usize,
indices: &mut Vec<u32>,
) -> u16 {
let mut max_score = 0;
let mut max_pos = 0;
let mut prev_class = start
.checked_sub(1)
.map(|i| haystack[i].char_class(&self.config))
.unwrap_or(self.config.initial_char_class);
for (i, &c) in haystack[start..].iter().enumerate() {
let (c, char_class) = c.char_class_and_normalize(&self.config);
if c != needle {
println!("ups {c} {needle}");
continue;
}
let bonus = self.config.bonus_for(prev_class, char_class);
prev_class = char_class;
let score = bonus * BONUS_FIRST_CHAR_MULTIPLIER + SCORE_MATCH;
if score > max_score {
max_pos = i as u32;
max_score = score;
// can't get better than this
if score >= self.config.bonus_boundary_white
&& score >= self.config.bonus_boundary_delimiter
{
break;
}
}
}
if INDICES {
indices.clear();
indices.push(max_pos + start as u32);
}
max_score
}
}

View File

@ -1,202 +0,0 @@
// sadly ranges don't optmimzie well
#![allow(clippy::manual_range_contains)]
pub mod chars;
mod config;
#[cfg(test)]
mod debug;
mod exact;
mod fuzzy_greedy;
mod fuzzy_optimal;
mod matrix;
mod prefilter;
mod score;
mod utf32_str;
#[cfg(test)]
mod tests;
pub use crate::config::MatcherConfig;
pub use crate::utf32_str::Utf32Str;
use crate::chars::AsciiChar;
use crate::matrix::MatrixSlab;
pub struct Matcher {
pub config: MatcherConfig,
slab: MatrixSlab,
}
impl Matcher {
pub fn new(config: MatcherConfig) -> Self {
Self {
config,
slab: MatrixSlab::new(),
}
}
pub fn fuzzy_match(&mut self, haystack: Utf32Str<'_>, needle: Utf32Str<'_>) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_matcher_impl::<false>(haystack, needle, &mut Vec::new())
}
pub fn fuzzy_indices(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indidies: &mut Vec<u32>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_matcher_impl::<true>(haystack, needle, indidies)
}
fn fuzzy_matcher_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indidies: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() > haystack.len() || needle_.is_empty() {
return None;
}
// if needle_.len() == haystack.len() {
// return self.exact_match();
// }
assert!(
haystack.len() <= u32::MAX as usize,
"fuzzy matching is only support for up to 2^32-1 codepoints"
);
match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
return self.substring_match_1_ascii::<INDICES>(haystack, needle, indidies);
}
let (start, greedy_end, end) = self.prefilter_ascii(haystack, needle, false)?;
self.fuzzy_match_optimal::<INDICES, AsciiChar, AsciiChar>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
start,
greedy_end,
end,
indidies,
)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
None
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self.substring_match_1_non_ascii::<INDICES>(
haystack,
needle as char,
start,
indidies,
);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, AsciiChar>(
haystack,
AsciiChar::cast(needle),
start,
start + 1,
end,
indidies,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
if let &[needle] = needle {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
let res = self
.substring_match_1_non_ascii::<INDICES>(haystack, needle, start, indidies);
return Some(res);
}
let (start, end) = self.prefilter_non_ascii(haystack, needle_, false)?;
self.fuzzy_match_optimal::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
end,
indidies,
)
}
}
}
pub fn fuzzy_match_greedy(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_match_greedy_impl::<false>(haystack, needle, &mut Vec::new())
}
pub fn fuzzy_indices_greedy(
&mut self,
haystack: Utf32Str<'_>,
needle: Utf32Str<'_>,
indidies: &mut Vec<u32>,
) -> Option<u16> {
assert!(haystack.len() <= u32::MAX as usize);
self.fuzzy_match_greedy_impl::<true>(haystack, needle, indidies)
}
fn fuzzy_match_greedy_impl<const INDICES: bool>(
&mut self,
haystack: Utf32Str<'_>,
needle_: Utf32Str<'_>,
indidies: &mut Vec<u32>,
) -> Option<u16> {
if needle_.len() > haystack.len() || needle_.is_empty() {
return None;
}
// if needle_.len() == haystack.len() {
// return self.exact_match();
// }
assert!(
haystack.len() <= u32::MAX as usize,
"fuzzy matching is only support for up to 2^32-1 codepoints"
);
match (haystack, needle_) {
(Utf32Str::Ascii(haystack), Utf32Str::Ascii(needle)) => {
let (start, greedy_end, _) = self.prefilter_ascii(haystack, needle, true)?;
self.fuzzy_match_greedy_::<INDICES, AsciiChar, AsciiChar>(
AsciiChar::cast(haystack),
AsciiChar::cast(needle),
start,
greedy_end,
indidies,
)
}
(Utf32Str::Ascii(_), Utf32Str::Unicode(_)) => {
// a purely ascii haystack can never be transformed to match
// a needle that contains non-ascii chars since we don't allow gaps
None
}
(Utf32Str::Unicode(haystack), Utf32Str::Ascii(needle)) => {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
self.fuzzy_match_greedy_::<INDICES, char, AsciiChar>(
haystack,
AsciiChar::cast(needle),
start,
start + 1,
indidies,
)
}
(Utf32Str::Unicode(haystack), Utf32Str::Unicode(needle)) => {
let (start, _) = self.prefilter_non_ascii(haystack, needle_, true)?;
self.fuzzy_match_greedy_::<INDICES, char, char>(
haystack,
needle,
start,
start + 1,
indidies,
)
}
}
}
}

View File

@ -1,3 +1 @@
exclude = ["src/tests.rs", "src/debug.rs", "src/chars/normalize.rs"]
[report]
out = ["Html", "Xml"]
exclude = ["matcher/src/tests.rs", "matcher/src/debug.rs", "matcher/src/chars/normalize.rs"]

View File

@ -1,3 +1,3 @@
default.extend-ignore-re = ["\\\\u\\{[0-9A-Za-z]*\\}"]
[files]
extend-exclude = ["src/tests.rs", "*.html"]
extend-exclude = ["matcher/src/tests.rs", "*.html"]