perf(dict): Compare map to trie

2025-02-13 08:40:29 -05:00 · 2024-12-24 21:34:34 -06:00 · 2024-12-24 21:34:34 -06:00 · fad1637b6c
commit fad1637b6c
parent 661825438c
5 changed files with 371740 additions and 14 deletions
--- a/crates/typos-dict/Cargo.toml
+++ b/crates/typos-dict/Cargo.toml
@ -24,7 +24,7 @@ itertools = "0.13"
 edit-distance = "2.1"
 unicase = "2.7"
 codegenrs = "3.0"
-dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen"] }
+dictgen = { version = "^0.2", path = "../dictgen", features = ["codegen", "map"] }
 varcon = { version = "^1.0", path = "../varcon" }
 snapbox = "0.6.5"
 indexmap = "2.2.6"
--- a/crates/typos-dict/benches/benches/main.rs
+++ b/crates/typos-dict/benches/benches/main.rs
@ -1,19 +1,37 @@
 #![allow(clippy::wildcard_imports)]
-const MISS: &str = "finalizes";
+mod map_codegen;
-const HIT: &str = "finallizes";
+mod trie_codegen;
-mod trie {
+mod miss {
    use super::*;
    const MISS: &str = "finalizes";
    #[divan::bench(args = [unicase::UniCase::new(MISS)])]
-    fn miss(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
+    fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
-        typos_dict::WORD_TRIE.find(&word)
+        map_codegen::WORD.find(&word)
    }
    #[divan::bench(args = [unicase::UniCase::new(MISS)])]
    fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
        trie_codegen::WORD_TRIE.find(&word)
    }
 }
 mod hit {
    use super::*;
    const HIT: &str = "finallizes";
    #[divan::bench(args = [unicase::UniCase::new(HIT)])]
    fn map(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
        map_codegen::WORD.find(&word)
    }
    #[divan::bench(args = [unicase::UniCase::new(HIT)])]
-    fn hit(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
+    fn trie(word: unicase::UniCase<&str>) -> Option<&'static &[&str]> {
-        typos_dict::WORD_TRIE.find(&word)
+        trie_codegen::WORD_TRIE.find(&word)
    }
 }
--- a/crates/typos-dict/benches/benches/map_codegen.rs
+++ b/crates/typos-dict/benches/benches/map_codegen.rs
--- a/crates/typos-dict/benches/benches/trie_codegen.rs
+++ b/crates/typos-dict/benches/benches/trie_codegen.rs
--- a/crates/typos-dict/tests/codegen.rs
+++ b/crates/typos-dict/tests/codegen.rs
@ -1,15 +1,32 @@
 #[test]
 fn codegen() {
    let mut content = vec![];
    const DICT: &[u8] = include_bytes!("../assets/words.csv");
    generate(&mut content, "WORD", DICT);
-    let content = String::from_utf8(content).unwrap();
+    let mut trie_content = vec![];
-    let content = codegenrs::rustfmt(&content, None).unwrap();
+    generate_trie(&mut trie_content, "WORD", DICT);
-    snapbox::assert_data_eq!(content, snapbox::file!["../src/word_codegen.rs"].raw());
+    let trie_content = String::from_utf8(trie_content).unwrap();
    let trie_content = codegenrs::rustfmt(&trie_content, None).unwrap();
    snapbox::assert_data_eq!(
        &trie_content,
        snapbox::file!["../benches/benches/trie_codegen.rs"].raw()
    );
    let mut map_content = vec![];
    generate_map(&mut map_content, "WORD", DICT);
    let map_content = String::from_utf8(map_content).unwrap();
    let map_content = codegenrs::rustfmt(&map_content, None).unwrap();
    snapbox::assert_data_eq!(
        &map_content,
        snapbox::file!["../benches/benches/map_codegen.rs"].raw()
    );
    snapbox::assert_data_eq!(
        &trie_content,
        snapbox::file!["../src/word_codegen.rs"].raw()
    );
 }
-fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
+fn generate_trie<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
    writeln!(
        file,
        "// This file is @generated by {}",
@ -44,3 +61,38 @@ fn generate<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
    )
    .unwrap();
 }
 fn generate_map<W: std::io::Write>(file: &mut W, prefix: &str, dict: &[u8]) {
    writeln!(
        file,
        "// This file is @generated by {}",
        file!().replace('\\', "/")
    )
    .unwrap();
    writeln!(file, "#![allow(clippy::unreadable_literal)]",).unwrap();
    writeln!(file, "#![allow(unreachable_pub)]",).unwrap();
    writeln!(file).unwrap();
    let records: Vec<_> = csv::ReaderBuilder::new()
        .has_headers(false)
        .flexible(true)
        .from_reader(dict)
        .records()
        .map(|r| r.unwrap())
        .collect();
    dictgen::generate_map(
        file,
        prefix,
        "&'static [&'static str]",
        records.iter().map(|record| {
            let mut record_fields = record.iter();
            let key = record_fields.next().unwrap();
            let value = format!(
                "&[{}]",
                itertools::join(record_fields.map(|field| format!(r#""{field}""#)), ", ")
            );
            (key, value)
        }),
    )
    .unwrap();
 }