diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 7c92d5b..971ca86 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -1,5 +1,34 @@ use std::borrow::Cow; +/// Look up the validity of a term. +pub trait Dictionary: Send + Sync { + /// Look up the validity of an Identifier. + /// + /// `None` if the status is unknown. + fn correct_ident<'s, 'w>(&'s self, ident: crate::tokens::Identifier<'w>) -> Option>; + + /// Look up the validity of a Word. + /// + /// `None` if the status is unknown. + fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; +} + +pub(crate) struct NullDictionary; + +impl Dictionary for NullDictionary { + fn correct_ident<'s, 'w>( + &'s self, + _ident: crate::tokens::Identifier<'w>, + ) -> Option> { + None + } + + fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { + None + } +} + +/// Validity of a term in a Dictionary. #[derive(Clone, PartialEq, Eq, Debug, serde::Serialize)] #[serde(rename_all = "snake_case")] #[serde(untagged)] @@ -54,25 +83,3 @@ impl<'c> Status<'c> { } } } - -pub trait Dictionary: Send + Sync { - fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) - -> Option>; - - fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; -} - -pub(crate) struct NullDictionary; - -impl Dictionary for NullDictionary { - fn correct_ident<'s, 'w>( - &'s self, - _ident: crate::tokens::Identifier<'w>, - ) -> Option> { - None - } - - fn correct_word<'s, 'w>(&'s self, _word: crate::tokens::Word<'w>) -> Option> { - None - } -} diff --git a/crates/typos/src/parser.rs b/crates/typos/src/parser.rs index 95d0d4c..ae23bfd 100644 --- a/crates/typos/src/parser.rs +++ b/crates/typos/src/parser.rs @@ -15,11 +15,13 @@ impl<'p> ParserBuilder<'p, 'static> { } impl<'p, 'd> ParserBuilder<'p, 'd> { + /// Set the Tokenizer used when parsing. pub fn tokenizer(mut self, tokenizer: &'p tokens::Tokenizer) -> Self { self.tokenizer = Some(tokenizer); self } + /// Set the dictionary used when parsing. pub fn dictionary<'d1>(self, dictionary: &'d1 dyn Dictionary) -> ParserBuilder<'p, 'd1> { ParserBuilder { tokenizer: self.tokenizer, @@ -27,6 +29,7 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { } } + /// Extract typos from the buffer. pub fn typos(&self) -> TyposParser<'p, 'd> { TyposParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), @@ -34,12 +37,14 @@ impl<'p, 'd> ParserBuilder<'p, 'd> { } } + /// Parse for Identifiers. pub fn identifiers(&self) -> IdentifiersParser<'p> { IdentifiersParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), } } + /// Parse for Words. pub fn words(&self) -> WordsParser<'p> { WordsParser { tokenizer: self.tokenizer.unwrap_or_else(|| &DEFAULT_TOKENIZER), @@ -59,6 +64,7 @@ impl<'p> Default for ParserBuilder<'p, 'static> { static DEFAULT_TOKENIZER: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| tokens::Tokenizer::new()); +/// Extract typos from the buffer. #[derive(Clone)] pub struct TyposParser<'p, 'd> { tokenizer: &'p tokens::Tokenizer, @@ -116,6 +122,7 @@ impl<'p, 'd> TyposParser<'p, 'd> { } } +/// An invalid term found in the buffer. #[derive(Clone, Debug)] #[non_exhaustive] pub struct Typo<'m> { @@ -152,6 +159,7 @@ impl<'m> Default for Typo<'m> { } } +/// Parse for Identifiers. #[derive(Debug, Clone)] pub struct IdentifiersParser<'p> { tokenizer: &'p tokens::Tokenizer, @@ -167,6 +175,7 @@ impl<'p> IdentifiersParser<'p> { } } +/// Parse for Words. #[derive(Debug, Clone)] pub struct WordsParser<'p> { tokenizer: &'p tokens::Tokenizer, diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index f372c96..3f5aefc 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,3 +1,4 @@ +/// Define rules for tokenizaing a buffer. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TokenizerBuilder { ignore_hex: bool, @@ -12,26 +13,31 @@ impl TokenizerBuilder { Default::default() } + /// Specify that hexadecimal numbers should be ignored. pub fn ignore_hex(&mut self, yes: bool) -> &mut Self { self.ignore_hex = yes; self } + /// Specify that leading digits are allowed for Identifiers. pub fn leading_digits(&mut self, yes: bool) -> &mut Self { self.leading_digits = yes; self } + /// Extend accepted leading characters for Identifiers. pub fn leading_chars(&mut self, chars: String) -> &mut Self { self.leading_chars = chars; self } + /// Specify that digits can be included in Identifiers. pub fn include_digits(&mut self, yes: bool) -> &mut Self { self.include_digits = yes; self } + /// Extend accepted characters for Identifiers. pub fn include_chars(&mut self, chars: String) -> &mut Self { self.include_chars = chars; self @@ -81,6 +87,7 @@ impl Default for TokenizerBuilder { } } +/// Extract Identifiers from a buffer. #[derive(Debug, Clone)] pub struct Tokenizer { words_str: regex::Regex, @@ -148,6 +155,7 @@ fn is_hex(ident: &[u8]) -> bool { HEX.is_match(ident) } +/// A term composed of Words. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Identifier<'t> { token: &'t str, @@ -171,11 +179,13 @@ impl<'t> Identifier<'t> { self.offset } + /// Split into individual Words. pub fn split(&self) -> impl Iterator> { split_ident(self.token, self.offset) } } +/// An indivisible term. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Word<'t> { token: &'t str, @@ -325,6 +335,7 @@ impl<'s> Iterator for SplitIdent<'s> { } } +/// Format of the term. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Case { Title,