From 043692afe0bbc78b327aa2184cc9ada6d9ed877f Mon Sep 17 00:00:00 2001 From: Ed Page Date: Wed, 2 Sep 2020 20:12:49 -0500 Subject: [PATCH] feat(dict): Override builtin dictionary Sometimes you just have to live with a typo or its done intentionally (like weird company names). With this commit, a user can now identifier blessed identifiers and words. This is ostly what is needed for #9 but sometimes people will have common typos that they'll want to provide corrections for. --- docs/reference.md | 4 +++- src/config.rs | 30 ++++++++++++++++++++++++++++++ src/dict.rs | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 15 +++++++++++++++ 4 files changed, 92 insertions(+), 1 deletion(-) diff --git a/docs/reference.md b/docs/reference.md index 3233f9a..e8c0b96 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -29,4 +29,6 @@ Configuration is read from the following (in precedence order) | default.identifier-include-digits | \- | bool | Allow identifiers to include digits, in addition to letters. | | default.identifier-leading-chars | \- | string | Allow identifiers to start with one of these characters. | | default.identifier-include-chars | \- | string | Allow identifiers to include these characters. | -| default.locale | \- | en, en-us, en-gb, en-ca, en-au | | +| default.locale | \- | en, en-us, en-gb, en-ca, en-au | English dialect to correct to. | +| default.extend-valid-identifiers | \- | list of strings | Identifiers to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. | +| default.extend-valid-words | \- | list of strings | Words to presume as correct, skipping spell checking. This extends the list when layering configuration, rather than replacing it. | diff --git a/src/config.rs b/src/config.rs index cad3854..f4ed293 100644 --- a/src/config.rs +++ b/src/config.rs @@ -96,6 +96,14 @@ pub trait FileSource { fn locale(&self) -> Option { None } + + fn extend_valid_identifiers(&self) -> &[String] { + &[] + } + + fn extend_valid_words(&self) -> &[String] { + &[] + } } #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] @@ -287,6 +295,8 @@ pub struct FileConfig { pub identifier_include_digits: Option, pub identifier_include_chars: Option, pub locale: Option, + pub extend_valid_identifiers: Vec, + pub extend_valid_words: Vec, } impl FileConfig { @@ -315,6 +325,10 @@ impl FileConfig { if let Some(source) = source.locale() { self.locale = Some(source); } + self.extend_valid_identifiers + .extend(source.extend_valid_identifiers().iter().cloned()); + self.extend_valid_words + .extend(source.extend_valid_words().iter().cloned()); } pub fn check_filename(&self) -> bool { @@ -348,6 +362,14 @@ impl FileConfig { pub fn locale(&self) -> Locale { self.locale.unwrap_or_default() } + + pub fn extend_valid_identifiers(&self) -> &[String] { + self.extend_valid_identifiers.as_slice() + } + + pub fn extend_valid_words(&self) -> &[String] { + self.extend_valid_words.as_slice() + } } impl FileSource for FileConfig { @@ -382,6 +404,14 @@ impl FileSource for FileConfig { fn locale(&self) -> Option { self.locale } + + fn extend_valid_identifiers(&self) -> &[String] { + self.extend_valid_identifiers.as_slice() + } + + fn extend_valid_words(&self) -> &[String] { + self.extend_valid_words.as_slice() + } } fn find_project_file(dir: std::path::PathBuf, name: &str) -> Option { diff --git a/src/dict.rs b/src/dict.rs index e888606..c79450c 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -1,4 +1,5 @@ use std::borrow::Cow; +use std::collections::HashSet; use unicase::UniCase; @@ -130,6 +131,49 @@ fn case_correct(correction: &str, case: Case) -> Cow<'_, str> { } } +pub struct Override<'i, 'w, D> { + valid_identifiers: HashSet<&'i str>, + valid_words: HashSet>, + inner: D, +} + +impl<'i, 'w, D: typos::Dictionary> Override<'i, 'w, D> { + pub fn new(inner: D) -> Self { + Self { + valid_identifiers: Default::default(), + valid_words: Default::default(), + inner, + } + } + + pub fn valid_identifiers>(&mut self, valid_identifiers: I) { + self.valid_identifiers = valid_identifiers.collect(); + } + + pub fn valid_words>(&mut self, valid_words: I) { + self.valid_words = valid_words.map(UniCase::new).collect(); + } +} + +impl<'i, 'w, D: typos::Dictionary> typos::Dictionary for Override<'i, 'w, D> { + fn correct_ident<'s, 't>(&'s self, ident: typos::tokens::Identifier<'t>) -> Vec> { + if self.valid_identifiers.contains(ident.token()) { + Vec::new() + } else { + self.inner.correct_ident(ident) + } + } + + fn correct_word<'s, 't>(&'s self, word: typos::tokens::Word<'t>) -> Vec> { + let w = UniCase::new(word.token()); + if self.valid_words.contains(&w) { + Vec::new() + } else { + self.inner.correct_word(word) + } + } +} + #[cfg(test)] mod test { use super::*; diff --git a/src/main.rs b/src/main.rs index 8275ec9..b1d8fb6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -57,6 +57,21 @@ fn run() -> Result { .build(); let dictionary = crate::dict::BuiltIn::new(config.default.locale()); + let mut dictionary = crate::dict::Override::new(dictionary); + dictionary.valid_identifiers( + config + .default + .extend_valid_identifiers() + .iter() + .map(|s| s.as_str()), + ); + dictionary.valid_words( + config + .default + .extend_valid_words() + .iter() + .map(|s| s.as_str()), + ); let mut settings = typos::checks::TyposSettings::new(); settings