mirror of
https://github.com/crate-ci/typos.git
synced 2025-02-13 00:36:04 -05:00
perf(dict): Compare map to trie
This commit is contained in:
parent
661825438c
commit
fad1637b6c
5 changed files with 371740 additions and 14 deletions
|
@ -24,7 +24,7 @@ itertools = "0.13"
|
|||
edit-distance = "2.1"
|
||||
unicase = "2.7"
|
||||
codegenrs = "3.0"
|
||||
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] }
|
||||
dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen", "map"] }
|
||||
varcon = { version = "^1.0", path = "../varcon" }
|
||||
snapbox = "0.6.5"
|
||||
indexmap = "2.2.6"
|
||||
|
|
|
@ -1,19 +1,37 @@
|
|||
#![allow(clippy::wildcard_imports)]
|
||||
|
||||
const MISS: &str = "finalizes";
|
||||
const HIT: &str = "finallizes";
|
||||
mod map_codegen;
|
||||
mod trie_codegen;
|
||||
|
||||
mod trie {
|
||||
mod miss {
|
||||
use super::*;
|
||||
|
||||
const MISS: &str = "finalizes";
|
||||
|
||||
#[divan::bench(args = [unicase::UniCase::new(MISS)])]
|
||||
fn miss(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
typos_dict::WORD_TRIE.find(&word)
|
||||
fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
map_codegen::WORD.find(&word)
|
||||
}
|
||||
|
||||
#[divan::bench(args = [unicase::UniCase::new(MISS)])]
|
||||
fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
trie_codegen::WORD_TRIE.find(&word)
|
||||
}
|
||||
}
|
||||
|
||||
mod hit {
|
||||
use super::*;
|
||||
|
||||
const HIT: &str = "finallizes";
|
||||
|
||||
#[divan::bench(args = [unicase::UniCase::new(HIT)])]
|
||||
fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
map_codegen::WORD.find(&word)
|
||||
}
|
||||
|
||||
#[divan::bench(args = [unicase::UniCase::new(HIT)])]
|
||||
fn hit(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
typos_dict::WORD_TRIE.find(&word)
|
||||
fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
|
||||
trie_codegen::WORD_TRIE.find(&word)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
138037
crates/typos-dict/benches/benches/map_codegen.rs
Normal file
138037
crates/typos-dict/benches/benches/map_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
233619
crates/typos-dict/benches/benches/trie_codegen.rs
Normal file
233619
crates/typos-dict/benches/benches/trie_codegen.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,15 +1,32 @@
|
|||
#[test]
|
||||
fn codegen() {
|
||||
let mut content = vec![];
|
||||
const DICT: &[u8] = include_bytes!("../assets/words.csv");
|
||||
generate(&mut content, "WORD", DICT);
|
||||
|
||||
let content = String::from_utf8(content).unwrap();
|
||||
let content = codegenrs::rustfmt(&content, None).unwrap();
|
||||
snapbox::assert_data_eq!(content, snapbox::file!["../src/word_codegen.rs"].raw());
|
||||
let mut trie_content = vec![];
|
||||
generate_trie(&mut trie_content, "WORD", DICT);
|
||||
let trie_content = String::from_utf8(trie_content).unwrap();
|
||||
let trie_content = codegenrs::rustfmt(&trie_content, None).unwrap();
|
||||
snapbox::assert_data_eq!(
|
||||
&trie_content,
|
||||
snapbox::file!["../benches/benches/trie_codegen.rs"].raw()
|
||||
);
|
||||
|
||||
let mut map_content = vec![];
|
||||
generate_map(&mut map_content, "WORD", DICT);
|
||||
let map_content = String::from_utf8(map_content).unwrap();
|
||||
let map_content = codegenrs::rustfmt(&map_content, None).unwrap();
|
||||
snapbox::assert_data_eq!(
|
||||
&map_content,
|
||||
snapbox::file!["../benches/benches/map_codegen.rs"].raw()
|
||||
);
|
||||
|
||||
snapbox::assert_data_eq!(
|
||||
&trie_content,
|
||||
snapbox::file!["../src/word_codegen.rs"].raw()
|
||||
);
|
||||
}
|
||||
|
||||
fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
|
||||
fn generate_trie<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
|
||||
writeln!(
|
||||
file,
|
||||
"// This file is @generated by {}",
|
||||
|
@ -44,3 +61,38 @@ fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
|
|||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn generate_map<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
|
||||
writeln!(
|
||||
file,
|
||||
"// This file is @generated by {}",
|
||||
file!().replace('\\', "/")
|
||||
)
|
||||
.unwrap();
|
||||
writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
|
||||
writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
|
||||
writeln!(file).unwrap();
|
||||
|
||||
let records: Vec<_> = csv::ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.flexible(true)
|
||||
.from_reader(dict)
|
||||
.records()
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
dictgen::generate_map(
|
||||
file,
|
||||
prefix,
|
||||
"&'static [&'static str]",
|
||||
records.iter().map(|record| {
|
||||
let mut record_fields = record.iter();
|
||||
let key = record_fields.next().unwrap();
|
||||
let value = format!(
|
||||
"&[{}]",
|
||||
itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
|
||||
);
|
||||
(key, value)
|
||||
}),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue