From bc1302f01bad8c3aeb7931b027ac76d0a4da651e Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 30 Jun 2020 20:47:40 -0500 Subject: [PATCH] feat: Support multiple, valid corrections Some of the other spell checkers already do this. While I've not checked where we might need it for our dictionary, this will be important for dialects. --- benches/corrections.rs | 4 +-- crates/typos/src/checks.rs | 20 ++++++++------ crates/typos/src/dict.rs | 7 ++--- crates/typos/src/report.rs | 26 +++++++++++------- src/dict.rs | 18 ++++++------ src/replace.rs | 56 +++++++++++++++++++++++--------------- 6 files changed, 75 insertions(+), 56 deletions(-) diff --git a/benches/corrections.rs b/benches/corrections.rs index 4cb0fde..9093826 100644 --- a/benches/corrections.rs +++ b/benches/corrections.rs @@ -13,7 +13,7 @@ fn correct_word_hit(b: &mut test::Bencher) { let input = typos::tokens::Word::new("successs", 0).unwrap(); assert_eq!( corrections.correct_word(input), - Some(std::borrow::Cow::Borrowed("successes")) + vec![std::borrow::Cow::Borrowed("successes")] ); b.iter(|| corrections.correct_word(input)); } @@ -22,6 +22,6 @@ fn correct_word_hit(b: &mut test::Bencher) { fn correct_word_miss(b: &mut test::Bencher) { let corrections = typos_cli::dict::BuiltIn::new(); let input = typos::tokens::Word::new("success", 0).unwrap(); - assert_eq!(corrections.correct_word(input), None); + assert!(corrections.correct_word(input).is_empty()); b.iter(|| corrections.correct_word(input)); } diff --git a/crates/typos/src/checks.rs b/crates/typos/src/checks.rs index cf6cc3c..ee32f83 100644 --- a/crates/typos/src/checks.rs +++ b/crates/typos/src/checks.rs @@ -227,24 +227,26 @@ impl Checks { if let Some(part) = path.file_name().and_then(|s| s.to_str()) { for ident in parser.parse(part) { - if let Some(correction) = dictionary.correct_ident(ident) { + let corrections = dictionary.correct_ident(ident); + if !corrections.is_empty() { let byte_offset = ident.offset(); let msg = report::PathCorrection { path, byte_offset, typo: ident.token(), - correction, + corrections, }; typos_found |= reporter.report(msg.into()); } else { for word in ident.split() { - if let Some(correction) = dictionary.correct_word(word) { + let corrections = dictionary.correct_word(word); + if !corrections.is_empty() { let byte_offset = word.offset(); let msg = report::PathCorrection { path, byte_offset, typo: word.token(), - correction, + corrections, }; typos_found |= reporter.report(msg.into()); } @@ -281,7 +283,8 @@ impl Checks { for (line_idx, line) in buffer.lines().enumerate() { let line_num = line_idx + 1; for ident in parser.parse_bytes(line) { - if let Some(correction) = dictionary.correct_ident(ident) { + let corrections = dictionary.correct_ident(ident); + if !corrections.is_empty() { let byte_offset = ident.offset(); let msg = report::Correction { path, @@ -289,12 +292,13 @@ impl Checks { line_num, byte_offset, typo: ident.token(), - correction, + corrections, }; typos_found |= reporter.report(msg.into()); } else { for word in ident.split() { - if let Some(correction) = dictionary.correct_word(word) { + let corrections = dictionary.correct_word(word); + if !corrections.is_empty() { let byte_offset = word.offset(); let msg = report::Correction { path, @@ -302,7 +306,7 @@ impl Checks { line_num, byte_offset, typo: word.token(), - correction, + corrections, }; typos_found |= reporter.report(msg.into()); } diff --git a/crates/typos/src/dict.rs b/crates/typos/src/dict.rs index 76cea74..88f1d60 100644 --- a/crates/typos/src/dict.rs +++ b/crates/typos/src/dict.rs @@ -1,10 +1,7 @@ use std::borrow::Cow; pub trait Dictionary: Send + Sync { - fn correct_ident<'s, 'w>( - &'s self, - _ident: crate::tokens::Identifier<'w>, - ) -> Option>; + fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec>; - fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option>; + fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec>; } diff --git a/crates/typos/src/report.rs b/crates/typos/src/report.rs index 8e80c41..67dc312 100644 --- a/crates/typos/src/report.rs +++ b/crates/typos/src/report.rs @@ -57,7 +57,7 @@ pub struct Correction<'m> { pub line_num: usize, pub byte_offset: usize, pub typo: &'m str, - pub correction: Cow<'m, str>, + pub corrections: Vec>, } impl<'m> Default for Correction<'m> { @@ -68,7 +68,7 @@ impl<'m> Default for Correction<'m> { line_num: 0, byte_offset: 0, typo: "", - correction: Cow::Borrowed(""), + corrections: Vec::new(), } } } @@ -79,7 +79,7 @@ pub struct PathCorrection<'m> { pub path: &'m std::path::Path, pub byte_offset: usize, pub typo: &'m str, - pub correction: Cow<'m, str>, + pub corrections: Vec>, } impl<'m> Default for PathCorrection<'m> { @@ -88,7 +88,7 @@ impl<'m> Default for PathCorrection<'m> { path: std::path::Path::new("-"), byte_offset: 0, typo: "", - correction: Cow::Borrowed(""), + corrections: Vec::new(), } } } @@ -201,11 +201,16 @@ impl Report for PrintBrief { msg.line_num, msg.byte_offset, msg.typo, - msg.correction + itertools::join(msg.corrections.iter(), ", ") ); } Message::PathCorrection(msg) => { - println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction); + println!( + "{}: {} -> {}", + msg.path.display(), + msg.typo, + itertools::join(msg.corrections.iter(), ", ") + ); } Message::File(msg) => { println!("{}", msg.path.display()); @@ -236,10 +241,10 @@ impl Report for PrintLong { Message::Correction(msg) => print_long_correction(msg), Message::PathCorrection(msg) => { println!( - "{}: error: `{}` should be `{}`", + "{}: error: `{}` should be {}", msg.path.display(), msg.typo, - msg.correction + itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ") ); } Message::File(msg) => { @@ -274,8 +279,9 @@ fn print_long_correction(msg: &Correction) { writeln!( handle, - "error: `{}` should be `{}`", - msg.typo, msg.correction + "error: `{}` should be {}", + msg.typo, + itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ") ) .unwrap(); writeln!( diff --git a/src/dict.rs b/src/dict.rs index 70ab37b..0dcf9fd 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -15,24 +15,24 @@ impl BuiltIn { pub fn correct_ident<'s, 'w>( &'s self, _ident: typos::tokens::Identifier<'w>, - ) -> Option> { - None + ) -> Vec> { + Vec::new() } - pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { - map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case())) + pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec> { + map_lookup(&typos_dict::WORD_DICTIONARY, word.token()) + .map(|s| case_correct(s, word.case())) + .into_iter() + .collect() } } impl typos::Dictionary for BuiltIn { - fn correct_ident<'s, 'w>( - &'s self, - ident: typos::tokens::Identifier<'w>, - ) -> Option> { + fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec> { BuiltIn::correct_ident(self, ident) } - fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option> { + fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec> { BuiltIn::correct_word(self, word) } } diff --git a/src/replace.rs b/src/replace.rs index adce0c6..bfeec82 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -58,26 +58,36 @@ impl<'r> typos::report::Report for Replace<'r> { fn report(&self, msg: typos::report::Message<'_>) -> bool { match msg { typos::report::Message::Correction(msg) => { - let path = msg.path.to_owned(); - let line_num = msg.line_num; - let correction = Correction::from_content(msg); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred - .content - .entry(path) - .or_insert_with(BTreeMap::new) - .entry(line_num) - .or_insert_with(Vec::new); - content.push(correction); - false + if msg.corrections.len() == 1 { + let path = msg.path.to_owned(); + let line_num = msg.line_num; + let correction = Correction::from_content(msg); + let mut deferred = self.deferred.lock().unwrap(); + let content = deferred + .content + .entry(path) + .or_insert_with(BTreeMap::new) + .entry(line_num) + .or_insert_with(Vec::new); + content.push(correction); + false + } else { + self.reporter + .report(typos::report::Message::Correction(msg)) + } } typos::report::Message::PathCorrection(msg) => { - let path = msg.path.to_owned(); - let correction = Correction::from_path(msg); - let mut deferred = self.deferred.lock().unwrap(); - let content = deferred.paths.entry(path).or_insert_with(Vec::new); - content.push(correction); - false + if msg.corrections.len() == 1 { + let path = msg.path.to_owned(); + let correction = Correction::from_path(msg); + let mut deferred = self.deferred.lock().unwrap(); + let content = deferred.paths.entry(path).or_insert_with(Vec::new); + content.push(correction); + false + } else { + self.reporter + .report(typos::report::Message::PathCorrection(msg)) + } } _ => self.reporter.report(msg), } @@ -99,18 +109,20 @@ struct Correction { impl Correction { fn from_content(other: typos::report::Correction<'_>) -> Self { + assert_eq!(other.corrections.len(), 1); Self { byte_offset: other.byte_offset, typo: other.typo.as_bytes().to_vec(), - correction: other.correction.as_bytes().to_vec(), + correction: other.corrections[0].as_bytes().to_vec(), } } fn from_path(other: typos::report::PathCorrection<'_>) -> Self { + assert_eq!(other.corrections.len(), 1); Self { byte_offset: other.byte_offset, typo: other.typo.as_bytes().to_vec(), - correction: other.correction.as_bytes().to_vec(), + correction: other.corrections[0].as_bytes().to_vec(), } } } @@ -210,7 +222,7 @@ mod test { .line_num(1) .byte_offset(2) .typo("foo") - .correction(std::borrow::Cow::Borrowed("bar")) + .corrections(vec![std::borrow::Cow::Borrowed("bar")]) .into(), ); replace.write().unwrap(); @@ -231,7 +243,7 @@ mod test { .path(input_file.path()) .byte_offset(0) .typo("foo") - .correction(std::borrow::Cow::Borrowed("bar")) + .corrections(vec![std::borrow::Cow::Borrowed("bar")]) .into(), ); replace.write().unwrap();