diff --git a/benches/checks.rs b/benches/checks.rs index 8f8695e..f6aa46c 100644 --- a/benches/checks.rs +++ b/benches/checks.rs @@ -9,7 +9,7 @@ use typos::checks::Check; fn bench_parse_ident_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_identifier_parser(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -46,7 +46,7 @@ fn parse_idents_corpus_str(b: &mut test::Bencher) { fn bench_parse_ident_bytes(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_identifier_parser(); b.iter(|| { checks.check_bytes( @@ -90,7 +90,7 @@ fn parse_idents_corpus_bytes(b: &mut test::Bencher) { fn bench_parse_word_str(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_word_parser(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -127,7 +127,7 @@ fn parse_words_corpus(b: &mut test::Bencher) { fn bench_typos(data: &str, b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_typos(); b.iter(|| checks.check_str(data, &parser, &corrections, &typos::report::PrintSilent)); } @@ -168,7 +168,7 @@ fn bench_check_file(data: &str, b: &mut test::Bencher) { sample_path.write_str(data).unwrap(); let corrections = typos_cli::dict::BuiltIn::new(Default::default()); - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); let checks = typos::checks::TyposSettings::new().build_typos(); b.iter(|| { checks.check_file( diff --git a/benches/tokenize.rs b/benches/tokenize.rs index 32e6a74..efcce0b 100644 --- a/benches/tokenize.rs +++ b/benches/tokenize.rs @@ -6,19 +6,19 @@ mod data; #[bench] fn ident_parse_empty(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::EMPTY.as_bytes()).last()); } #[bench] fn ident_parse_no_tokens(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::NO_TOKENS.as_bytes()).last()); } #[bench] fn ident_parse_single_token(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| { parser.parse_bytes(data::SINGLE_TOKEN.as_bytes()).last(); }); @@ -26,19 +26,19 @@ fn ident_parse_single_token(b: &mut test::Bencher) { #[bench] fn ident_parse_sherlock(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::SHERLOCK.as_bytes()).last()); } #[bench] fn ident_parse_code(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::CODE.as_bytes()).last()); } #[bench] fn ident_parse_corpus(b: &mut test::Bencher) { - let parser = typos::tokens::Parser::new(); + let parser = typos::tokens::Tokenizer::new(); b.iter(|| parser.parse_bytes(data::CORPUS.as_bytes()).last()); } diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs index 6040440..62bae7d 100644 --- a/crates/typos/src/checks.rs +++ b/crates/typos/src/checks.rs @@ -9,7 +9,7 @@ pub trait Check: Send + Sync { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; @@ -17,7 +17,7 @@ pub trait Check: Send + Sync { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error>; @@ -31,7 +31,7 @@ pub trait Check: Send + Sync { fn check_filename( &self, path: &std::path::Path, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -54,7 +54,7 @@ pub trait Check: Send + Sync { &self, path: &std::path::Path, explicit: bool, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -172,7 +172,7 @@ impl Check for Typos { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -217,7 +217,7 @@ impl Check for Typos { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -284,7 +284,7 @@ impl Check for ParseIdentifiers { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -303,7 +303,7 @@ impl Check for ParseIdentifiers { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -343,7 +343,7 @@ impl Check for ParseWords { fn check_str( &self, buffer: &str, - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -365,7 +365,7 @@ impl Check for ParseWords { fn check_bytes( &self, buffer: &[u8], - parser: &tokens::Parser, + parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -404,7 +404,7 @@ impl Check for Files { fn check_str( &self, _buffer: &str, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -414,7 +414,7 @@ impl Check for Files { fn check_bytes( &self, _buffer: &[u8], - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -436,7 +436,7 @@ impl Check for Files { fn check_filename( &self, _path: &std::path::Path, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, _reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { @@ -447,7 +447,7 @@ impl Check for Files { &self, path: &std::path::Path, _explicit: bool, - _parser: &tokens::Parser, + _parser: &tokens::Tokenizer, _dictionary: &dyn Dictionary, reporter: &dyn report::Report, ) -> Result<(), std::io::Error> { diff --git a/crates/typos/src/tokens.rs b/crates/typos/src/tokens.rs index 0a9e7f2..f372c96 100644 --- a/crates/typos/src/tokens.rs +++ b/crates/typos/src/tokens.rs @@ -1,5 +1,5 @@ #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct ParserBuilder { +pub struct TokenizerBuilder { ignore_hex: bool, leading_digits: bool, leading_chars: String, @@ -7,7 +7,7 @@ pub struct ParserBuilder { include_chars: String, } -impl ParserBuilder { +impl TokenizerBuilder { pub fn new() -> Self { Default::default() } @@ -37,7 +37,7 @@ impl ParserBuilder { self } - pub fn build(&self) -> Parser { + pub fn build(&self) -> Tokenizer { let mut pattern = r#"\b("#.to_owned(); Self::push_pattern(&mut pattern, self.leading_digits, &self.leading_chars); Self::push_pattern(&mut pattern, self.include_digits, &self.include_chars); @@ -46,7 +46,7 @@ impl ParserBuilder { let words_str = regex::Regex::new(&pattern).unwrap(); let words_bytes = regex::bytes::Regex::new(&pattern).unwrap(); - Parser { + Tokenizer { words_str, words_bytes, // `leading_digits` let's us bypass the regexes since you can't have a decimal or @@ -69,7 +69,7 @@ impl ParserBuilder { } } -impl Default for ParserBuilder { +impl Default for TokenizerBuilder { fn default() -> Self { Self { ignore_hex: true, @@ -82,16 +82,16 @@ impl Default for ParserBuilder { } #[derive(Debug, Clone)] -pub struct Parser { +pub struct Tokenizer { words_str: regex::Regex, words_bytes: regex::bytes::Regex, ignore_numbers: bool, ignore_hex: bool, } -impl Parser { +impl Tokenizer { pub fn new() -> Self { - ParserBuilder::default().build() + TokenizerBuilder::default().build() } pub fn parse_str<'c>(&'c self, content: &'c str) -> impl Iterator> { @@ -124,7 +124,7 @@ impl Parser { } } -impl Default for Parser { +impl Default for Tokenizer { fn default() -> Self { Self::new() } @@ -387,7 +387,7 @@ mod test { #[test] fn tokenize_empty_is_empty() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = ""; let expected: Vec = vec![]; @@ -399,7 +399,7 @@ mod test { #[test] fn tokenize_word_is_word() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "word"; let expected: Vec = vec![Identifier::new_unchecked("word", 0)]; @@ -411,7 +411,7 @@ mod test { #[test] fn tokenize_space_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A B"; let expected: Vec = vec![ @@ -426,7 +426,7 @@ mod test { #[test] fn tokenize_dot_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A.B"; let expected: Vec = vec![ @@ -441,7 +441,7 @@ mod test { #[test] fn tokenize_namespace_separated_words() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A::B"; let expected: Vec = vec![ @@ -456,7 +456,7 @@ mod test { #[test] fn tokenize_underscore_doesnt_separate() { - let parser = Parser::new(); + let parser = Tokenizer::new(); let input = "A_B"; let expected: Vec = vec![Identifier::new_unchecked("A_B", 0)]; @@ -468,7 +468,7 @@ mod test { #[test] fn tokenize_ignore_hex_enabled() { - let parser = ParserBuilder::new().ignore_hex(true).build(); + let parser = TokenizerBuilder::new().ignore_hex(true).build(); let input = "Hello 0xDEADBEEF World"; let expected: Vec = vec![ @@ -483,7 +483,7 @@ mod test { #[test] fn tokenize_ignore_hex_disabled() { - let parser = ParserBuilder::new() + let parser = TokenizerBuilder::new() .ignore_hex(false) .leading_digits(true) .build(); @@ -523,11 +523,11 @@ mod test { &[("A", Case::Scream, 0), ("String", Case::Title, 1)], ), ( - "SimpleXMLParser", + "SimpleXMLTokenizer", &[ ("Simple", Case::Title, 0), ("XML", Case::Scream, 6), - ("Parser", Case::Title, 9), + ("Tokenizer", Case::Title, 9), ], ), ( diff --git a/src/checks.rs b/src/checks.rs index 35d51c6..78b9718 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -1,7 +1,7 @@ pub(crate) fn check_path( walk: ignore::Walk, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { @@ -14,7 +14,7 @@ pub(crate) fn check_path( pub(crate) fn check_path_parallel( walk: ignore::WalkParallel, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { @@ -37,7 +37,7 @@ pub(crate) fn check_path_parallel( fn check_entry( entry: Result, checks: &dyn typos::checks::Check, - parser: &typos::tokens::Parser, + parser: &typos::tokens::Tokenizer, dictionary: &dyn typos::Dictionary, reporter: &dyn typos::report::Report, ) -> Result<(), ignore::Error> { diff --git a/src/main.rs b/src/main.rs index 80d0b99..6275b95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,7 @@ fn run() -> proc_exit::ExitResult { config.default.update(&args.overrides); let config = config; - let parser = typos::tokens::ParserBuilder::new() + let parser = typos::tokens::TokenizerBuilder::new() .ignore_hex(config.default.ignore_hex()) .leading_digits(config.default.identifier_leading_digits()) .leading_chars(config.default.identifier_leading_chars().to_owned())