use std::collections::BTreeMap;
use std::collections::HashSet;
use std::io::Write;

#[test]
fn codegen() {
    let mut content = vec![];
    generate_variations(&mut content);

    let content = String::from_utf8(content).unwrap();
    let content = codegenrs::rustfmt(&content, None).unwrap();
    snapbox::assert_data_eq!(content, snapbox::file!["../src/vars_codegen.rs"].raw());
}

static CATEGORIES: [varcon::Category; 4] = [
    varcon::Category::American,
    varcon::Category::BritishIse,
    // For now, only want to support one form of British, so going with -ise as it seems more
    // popular.
    varcon::Category::Canadian,
    varcon::Category::Australian,
    // Other basically means all
];

fn generate_variations<W: Write>(file: &mut W) {
    let entries = entries();

    writeln!(
        file,
        "// This file is @generated by {}",
        file!().replace('\\', "/")
    )
    .unwrap();
    writeln!(
        file,
        "#![allow(clippy::unreadable_literal, clippy::type_complexity)]",
    )
    .unwrap();
    writeln!(file).unwrap();

    writeln!(file, "pub type Variants = &'static [&'static str];",).unwrap();
    writeln!(
        file,
        "pub type VariantsMap = [Variants; {}];",
        CATEGORIES.len()
    )
    .unwrap();
    writeln!(file).unwrap();

    writeln!(file, "pub fn all_categories() -> crate::CategorySet {{",).unwrap();
    writeln!(
        file,
        "    {}",
        itertools::join(
            CATEGORIES.iter().map(|c| format!("crate::Category::{c:?}")),
            " | "
        )
    )
    .unwrap();
    writeln!(file, "}}",).unwrap();
    writeln!(file).unwrap();

    writeln!(
        file,
        "pub fn corrections(category: crate::Category, options: VariantsMap) -> &'static [&'static str] {{",
    )
    .unwrap();
    writeln!(file, "  match category {{").unwrap();
    for (index, category) in CATEGORIES.iter().enumerate() {
        writeln!(
            file,
            "    crate::Category::{category:?} => options[{index}],"
        )
        .unwrap();
    }
    writeln!(
        file,
        "    crate::Category::BritishIze | crate::Category::Other => unreachable!(\"{{:?}} is unused\", category),",
    )
    .unwrap();
    writeln!(file, "  }}").unwrap();
    writeln!(file, "}}").unwrap();
    writeln!(file).unwrap();

    let entry_sets = entry_sets(entries.iter());
    let mut referenced_symbols: HashSet<&str> = HashSet::new();
    dictgen::DictGen::new()
        .name("VARS")
        .value_type("&[(u8, &VariantsMap)]")
        .trie()
        .write(
            file,
            entry_sets.iter().filter_map(|kv| {
                let (word, data) = kv;
                if is_always_valid(data) {
                    // No need to convert from current form to target form
                    None
                } else {
                    referenced_symbols.extend(data.iter().map(|(s, _)| s));
                    let value = generate_link(data);
                    Some((*word, value))
                }
            }),
        )
        .unwrap();

    let no_invalid = entry_sets.values().all(|data| !is_always_invalid(data));
    writeln!(file).unwrap();
    writeln!(file, "pub const NO_INVALID: bool = {no_invalid:?};",).unwrap();

    writeln!(file).unwrap();
    for (symbol, entry) in entries.iter() {
        if !referenced_symbols.contains(symbol.as_str()) {
            continue;
        }
        generate_entry(file, symbol, entry);
    }
}

fn generate_entry(file: &mut impl Write, symbol: &str, entry: &varcon_core::Entry) {
    writeln!(file, "pub(crate) static {symbol}: VariantsMap = [").unwrap();
    for category in &CATEGORIES {
        let corrections = collect_correct(entry, *category);
        let mut corrections: Vec<_> = corrections.iter().collect();
        corrections.sort_unstable();
        writeln!(file, "  &[").unwrap();
        for correction in &corrections {
            writeln!(file, "    {correction:?},").unwrap();
        }
        writeln!(file, "  ],").unwrap();
    }
    writeln!(file, "];").unwrap();
    writeln!(file).unwrap();
}

fn generate_link(data: &[(&str, varcon::CategorySet)]) -> String {
    let mut output = Vec::new();

    write!(output, "&[").unwrap();
    for (symbol, set) in data.iter() {
        write!(output, "(0b{:05b}, &{}), ", set.bits(), symbol).unwrap();
    }
    write!(output, "]").unwrap();

    String::from_utf8(output).unwrap()
}

fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
    let valid_categories = valid_categories();
    for (_symbol, set) in data.iter() {
        if *set == valid_categories {
            return true;
        }
    }
    false
}

fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool {
    for (_symbol, set) in data.iter() {
        if set.is_empty() {
            return true;
        }
    }
    false
}

fn entries() -> BTreeMap<String, varcon_core::Entry> {
    varcon::VARCON
        .iter()
        .filter(|c| c.verified)
        .flat_map(|c| c.entries.iter())
        .filter(|e| {
            e.variants
                .iter()
                .all(|v| typos::tokens::Word::new(v.word, 0).is_ok())
        })
        .map(|e| {
            let mut e = e.into_owned();
            for variant in e.variants.iter_mut() {
                variant.word.make_ascii_lowercase();
            }
            (entry_symbol(&e), e)
        })
        .collect()
}

fn entry_symbol(entry: &varcon_core::Entry) -> String {
    let mut hasher = std::collections::hash_map::DefaultHasher::new();
    std::hash::Hash::hash(entry, &mut hasher);
    let hash = std::hash::Hasher::finish(&hasher);
    format!(
        "ENTRY_{}_{}",
        entry.variants[0].word.to_ascii_uppercase(),
        hash
    )
}

fn entry_sets<'e>(
    entries: impl Iterator<Item = (&'e String, &'e varcon_core::Entry)>,
) -> BTreeMap<&'e str, Vec<(&'e str, varcon::CategorySet)>> {
    let mut sets = BTreeMap::new();
    for (symbol, entry) in entries {
        for (word, set) in entry_set(entry).iter() {
            let v = sets.entry(*word).or_insert_with(Vec::new);
            v.push((symbol.as_str(), *set));
        }
    }
    sets
}

fn entry_set(entry: &varcon_core::Entry) -> BTreeMap<&str, varcon::CategorySet> {
    let mut sets = BTreeMap::new();
    let valid_categories = valid_categories();
    for variant in entry.variants.iter() {
        let set = sets
            .entry(variant.word.as_str())
            .or_insert_with(varcon::CategorySet::empty);
        for t in variant.types.iter() {
            match t.category {
                varcon::Category::Other => *set |= valid_categories,
                varcon::Category::BritishIze => (),
                _ => set.insert(t.category),
            }
        }
    }
    sets
}

fn valid_categories() -> varcon::CategorySet {
    let mut c = varcon::CategorySet::empty();
    for cat in CATEGORIES.iter() {
        c.insert(*cat);
    }
    c
}

fn collect_correct(entry: &varcon_core::Entry, category: varcon::Category) -> HashSet<&str> {
    // If there is ambiguity, collect all potential options.
    let mut primary = HashSet::new();
    let mut backup = HashSet::new();
    for variant in entry.variants.iter().filter(|v| !ignore_variant(v)) {
        for t in variant
            .types
            .iter()
            .filter(|t| t.category == category || t.category == varcon::Category::Other)
        {
            let tag = t.tag.unwrap_or(varcon::Tag::Eq);
            if tag == varcon::Tag::Eq {
                primary.insert(variant.word.as_str());
            }
            if tag != varcon::Tag::Improper {
                backup.insert(variant.word.as_str());
            }
        }
    }

    if primary.len() == 1 {
        primary
    } else {
        backup
    }
}

fn ignore_variant(variant: &varcon_core::Variant) -> bool {
    if variant.word == "anesthetisation"
        && variant.types.len() == 1
        && variant.types[0].category == varcon::Category::Australian
        && (variant.types[0].tag == Some(varcon::Tag::Variant)
            || variant.types[0].tag == Some(varcon::Tag::Seldom))
    {
        return true;
    }

    false
}

// dict needs
// all words, with bitfags, pointing to list of entry names
//
// varcon needs
// all entries by name