mirror of
https://github.com/crate-ci/typos.git
synced 2025-02-18 19:12:50 -05:00
feat: Support multiple, valid corrections
Some of the other spell checkers already do this. While I've not checked where we might need it for our dictionary, this will be important for dialects.
This commit is contained in:
parent
a5ed18ee46
commit
bc1302f01b
6 changed files with 75 additions and 56 deletions
|
@ -13,7 +13,7 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
let input = typos::tokens::Word::new("successs", 0).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
corrections.correct_word(input),
|
corrections.correct_word(input),
|
||||||
Some(std::borrow::Cow::Borrowed("successes"))
|
vec![std::borrow::Cow::Borrowed("successes")]
|
||||||
);
|
);
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,6 @@ fn correct_word_hit(b: &mut test::Bencher) {
|
||||||
fn correct_word_miss(b: &mut test::Bencher) {
|
fn correct_word_miss(b: &mut test::Bencher) {
|
||||||
let corrections = typos_cli::dict::BuiltIn::new();
|
let corrections = typos_cli::dict::BuiltIn::new();
|
||||||
let input = typos::tokens::Word::new("success", 0).unwrap();
|
let input = typos::tokens::Word::new("success", 0).unwrap();
|
||||||
assert_eq!(corrections.correct_word(input), None);
|
assert!(corrections.correct_word(input).is_empty());
|
||||||
b.iter(|| corrections.correct_word(input));
|
b.iter(|| corrections.correct_word(input));
|
||||||
}
|
}
|
||||||
|
|
|
@ -227,24 +227,26 @@ impl Checks {
|
||||||
|
|
||||||
if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
|
if let Some(part) = path.file_name().and_then(|s| s.to_str()) {
|
||||||
for ident in parser.parse(part) {
|
for ident in parser.parse(part) {
|
||||||
if let Some(correction) = dictionary.correct_ident(ident) {
|
let corrections = dictionary.correct_ident(ident);
|
||||||
|
if !corrections.is_empty() {
|
||||||
let byte_offset = ident.offset();
|
let byte_offset = ident.offset();
|
||||||
let msg = report::PathCorrection {
|
let msg = report::PathCorrection {
|
||||||
path,
|
path,
|
||||||
byte_offset,
|
byte_offset,
|
||||||
typo: ident.token(),
|
typo: ident.token(),
|
||||||
correction,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
} else {
|
} else {
|
||||||
for word in ident.split() {
|
for word in ident.split() {
|
||||||
if let Some(correction) = dictionary.correct_word(word) {
|
let corrections = dictionary.correct_word(word);
|
||||||
|
if !corrections.is_empty() {
|
||||||
let byte_offset = word.offset();
|
let byte_offset = word.offset();
|
||||||
let msg = report::PathCorrection {
|
let msg = report::PathCorrection {
|
||||||
path,
|
path,
|
||||||
byte_offset,
|
byte_offset,
|
||||||
typo: word.token(),
|
typo: word.token(),
|
||||||
correction,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
}
|
}
|
||||||
|
@ -281,7 +283,8 @@ impl Checks {
|
||||||
for (line_idx, line) in buffer.lines().enumerate() {
|
for (line_idx, line) in buffer.lines().enumerate() {
|
||||||
let line_num = line_idx + 1;
|
let line_num = line_idx + 1;
|
||||||
for ident in parser.parse_bytes(line) {
|
for ident in parser.parse_bytes(line) {
|
||||||
if let Some(correction) = dictionary.correct_ident(ident) {
|
let corrections = dictionary.correct_ident(ident);
|
||||||
|
if !corrections.is_empty() {
|
||||||
let byte_offset = ident.offset();
|
let byte_offset = ident.offset();
|
||||||
let msg = report::Correction {
|
let msg = report::Correction {
|
||||||
path,
|
path,
|
||||||
|
@ -289,12 +292,13 @@ impl Checks {
|
||||||
line_num,
|
line_num,
|
||||||
byte_offset,
|
byte_offset,
|
||||||
typo: ident.token(),
|
typo: ident.token(),
|
||||||
correction,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
} else {
|
} else {
|
||||||
for word in ident.split() {
|
for word in ident.split() {
|
||||||
if let Some(correction) = dictionary.correct_word(word) {
|
let corrections = dictionary.correct_word(word);
|
||||||
|
if !corrections.is_empty() {
|
||||||
let byte_offset = word.offset();
|
let byte_offset = word.offset();
|
||||||
let msg = report::Correction {
|
let msg = report::Correction {
|
||||||
path,
|
path,
|
||||||
|
@ -302,7 +306,7 @@ impl Checks {
|
||||||
line_num,
|
line_num,
|
||||||
byte_offset,
|
byte_offset,
|
||||||
typo: word.token(),
|
typo: word.token(),
|
||||||
correction,
|
corrections,
|
||||||
};
|
};
|
||||||
typos_found |= reporter.report(msg.into());
|
typos_found |= reporter.report(msg.into());
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
pub trait Dictionary: Send + Sync {
|
pub trait Dictionary: Send + Sync {
|
||||||
fn correct_ident<'s, 'w>(
|
fn correct_ident<'s, 'w>(&'s self, _ident: crate::tokens::Identifier<'w>) -> Vec<Cow<'s, str>>;
|
||||||
&'s self,
|
|
||||||
_ident: crate::tokens::Identifier<'w>,
|
|
||||||
) -> Option<Cow<'s, str>>;
|
|
||||||
|
|
||||||
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Option<Cow<'s, str>>;
|
fn correct_word<'s, 'w>(&'s self, word: crate::tokens::Word<'w>) -> Vec<Cow<'s, str>>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -57,7 +57,7 @@ pub struct Correction<'m> {
|
||||||
pub line_num: usize,
|
pub line_num: usize,
|
||||||
pub byte_offset: usize,
|
pub byte_offset: usize,
|
||||||
pub typo: &'m str,
|
pub typo: &'m str,
|
||||||
pub correction: Cow<'m, str>,
|
pub corrections: Vec<Cow<'m, str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'m> Default for Correction<'m> {
|
impl<'m> Default for Correction<'m> {
|
||||||
|
@ -68,7 +68,7 @@ impl<'m> Default for Correction<'m> {
|
||||||
line_num: 0,
|
line_num: 0,
|
||||||
byte_offset: 0,
|
byte_offset: 0,
|
||||||
typo: "",
|
typo: "",
|
||||||
correction: Cow::Borrowed(""),
|
corrections: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@ pub struct PathCorrection<'m> {
|
||||||
pub path: &'m std::path::Path,
|
pub path: &'m std::path::Path,
|
||||||
pub byte_offset: usize,
|
pub byte_offset: usize,
|
||||||
pub typo: &'m str,
|
pub typo: &'m str,
|
||||||
pub correction: Cow<'m, str>,
|
pub corrections: Vec<Cow<'m, str>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'m> Default for PathCorrection<'m> {
|
impl<'m> Default for PathCorrection<'m> {
|
||||||
|
@ -88,7 +88,7 @@ impl<'m> Default for PathCorrection<'m> {
|
||||||
path: std::path::Path::new("-"),
|
path: std::path::Path::new("-"),
|
||||||
byte_offset: 0,
|
byte_offset: 0,
|
||||||
typo: "",
|
typo: "",
|
||||||
correction: Cow::Borrowed(""),
|
corrections: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -201,11 +201,16 @@ impl Report for PrintBrief {
|
||||||
msg.line_num,
|
msg.line_num,
|
||||||
msg.byte_offset,
|
msg.byte_offset,
|
||||||
msg.typo,
|
msg.typo,
|
||||||
msg.correction
|
itertools::join(msg.corrections.iter(), ", ")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Message::PathCorrection(msg) => {
|
Message::PathCorrection(msg) => {
|
||||||
println!("{}: {} -> {}", msg.path.display(), msg.typo, msg.correction);
|
println!(
|
||||||
|
"{}: {} -> {}",
|
||||||
|
msg.path.display(),
|
||||||
|
msg.typo,
|
||||||
|
itertools::join(msg.corrections.iter(), ", ")
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Message::File(msg) => {
|
Message::File(msg) => {
|
||||||
println!("{}", msg.path.display());
|
println!("{}", msg.path.display());
|
||||||
|
@ -236,10 +241,10 @@ impl Report for PrintLong {
|
||||||
Message::Correction(msg) => print_long_correction(msg),
|
Message::Correction(msg) => print_long_correction(msg),
|
||||||
Message::PathCorrection(msg) => {
|
Message::PathCorrection(msg) => {
|
||||||
println!(
|
println!(
|
||||||
"{}: error: `{}` should be `{}`",
|
"{}: error: `{}` should be {}",
|
||||||
msg.path.display(),
|
msg.path.display(),
|
||||||
msg.typo,
|
msg.typo,
|
||||||
msg.correction
|
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Message::File(msg) => {
|
Message::File(msg) => {
|
||||||
|
@ -274,8 +279,9 @@ fn print_long_correction(msg: &Correction) {
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
handle,
|
handle,
|
||||||
"error: `{}` should be `{}`",
|
"error: `{}` should be {}",
|
||||||
msg.typo, msg.correction
|
msg.typo,
|
||||||
|
itertools::join(msg.corrections.iter().map(|c| format!("`{}`", c)), ", ")
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
writeln!(
|
writeln!(
|
||||||
|
|
18
src/dict.rs
18
src/dict.rs
|
@ -15,24 +15,24 @@ impl BuiltIn {
|
||||||
pub fn correct_ident<'s, 'w>(
|
pub fn correct_ident<'s, 'w>(
|
||||||
&'s self,
|
&'s self,
|
||||||
_ident: typos::tokens::Identifier<'w>,
|
_ident: typos::tokens::Identifier<'w>,
|
||||||
) -> Option<Cow<'s, str>> {
|
) -> Vec<Cow<'s, str>> {
|
||||||
None
|
Vec::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
pub fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
||||||
map_lookup(&typos_dict::WORD_DICTIONARY, word.token()).map(|s| case_correct(s, word.case()))
|
map_lookup(&typos_dict::WORD_DICTIONARY, word.token())
|
||||||
|
.map(|s| case_correct(s, word.case()))
|
||||||
|
.into_iter()
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl typos::Dictionary for BuiltIn {
|
impl typos::Dictionary for BuiltIn {
|
||||||
fn correct_ident<'s, 'w>(
|
fn correct_ident<'s, 'w>(&'s self, ident: typos::tokens::Identifier<'w>) -> Vec<Cow<'s, str>> {
|
||||||
&'s self,
|
|
||||||
ident: typos::tokens::Identifier<'w>,
|
|
||||||
) -> Option<Cow<'s, str>> {
|
|
||||||
BuiltIn::correct_ident(self, ident)
|
BuiltIn::correct_ident(self, ident)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Option<Cow<'s, str>> {
|
fn correct_word<'s, 'w>(&'s self, word: typos::tokens::Word<'w>) -> Vec<Cow<'s, str>> {
|
||||||
BuiltIn::correct_word(self, word)
|
BuiltIn::correct_word(self, word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,6 +58,7 @@ impl<'r> typos::report::Report for Replace<'r> {
|
||||||
fn report(&self, msg: typos::report::Message<'_>) -> bool {
|
fn report(&self, msg: typos::report::Message<'_>) -> bool {
|
||||||
match msg {
|
match msg {
|
||||||
typos::report::Message::Correction(msg) => {
|
typos::report::Message::Correction(msg) => {
|
||||||
|
if msg.corrections.len() == 1 {
|
||||||
let path = msg.path.to_owned();
|
let path = msg.path.to_owned();
|
||||||
let line_num = msg.line_num;
|
let line_num = msg.line_num;
|
||||||
let correction = Correction::from_content(msg);
|
let correction = Correction::from_content(msg);
|
||||||
|
@ -70,14 +71,23 @@ impl<'r> typos::report::Report for Replace<'r> {
|
||||||
.or_insert_with(Vec::new);
|
.or_insert_with(Vec::new);
|
||||||
content.push(correction);
|
content.push(correction);
|
||||||
false
|
false
|
||||||
|
} else {
|
||||||
|
self.reporter
|
||||||
|
.report(typos::report::Message::Correction(msg))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
typos::report::Message::PathCorrection(msg) => {
|
typos::report::Message::PathCorrection(msg) => {
|
||||||
|
if msg.corrections.len() == 1 {
|
||||||
let path = msg.path.to_owned();
|
let path = msg.path.to_owned();
|
||||||
let correction = Correction::from_path(msg);
|
let correction = Correction::from_path(msg);
|
||||||
let mut deferred = self.deferred.lock().unwrap();
|
let mut deferred = self.deferred.lock().unwrap();
|
||||||
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
let content = deferred.paths.entry(path).or_insert_with(Vec::new);
|
||||||
content.push(correction);
|
content.push(correction);
|
||||||
false
|
false
|
||||||
|
} else {
|
||||||
|
self.reporter
|
||||||
|
.report(typos::report::Message::PathCorrection(msg))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => self.reporter.report(msg),
|
_ => self.reporter.report(msg),
|
||||||
}
|
}
|
||||||
|
@ -99,18 +109,20 @@ struct Correction {
|
||||||
|
|
||||||
impl Correction {
|
impl Correction {
|
||||||
fn from_content(other: typos::report::Correction<'_>) -> Self {
|
fn from_content(other: typos::report::Correction<'_>) -> Self {
|
||||||
|
assert_eq!(other.corrections.len(), 1);
|
||||||
Self {
|
Self {
|
||||||
byte_offset: other.byte_offset,
|
byte_offset: other.byte_offset,
|
||||||
typo: other.typo.as_bytes().to_vec(),
|
typo: other.typo.as_bytes().to_vec(),
|
||||||
correction: other.correction.as_bytes().to_vec(),
|
correction: other.corrections[0].as_bytes().to_vec(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
|
fn from_path(other: typos::report::PathCorrection<'_>) -> Self {
|
||||||
|
assert_eq!(other.corrections.len(), 1);
|
||||||
Self {
|
Self {
|
||||||
byte_offset: other.byte_offset,
|
byte_offset: other.byte_offset,
|
||||||
typo: other.typo.as_bytes().to_vec(),
|
typo: other.typo.as_bytes().to_vec(),
|
||||||
correction: other.correction.as_bytes().to_vec(),
|
correction: other.corrections[0].as_bytes().to_vec(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,7 +222,7 @@ mod test {
|
||||||
.line_num(1)
|
.line_num(1)
|
||||||
.byte_offset(2)
|
.byte_offset(2)
|
||||||
.typo("foo")
|
.typo("foo")
|
||||||
.correction(std::borrow::Cow::Borrowed("bar"))
|
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
||||||
.into(),
|
.into(),
|
||||||
);
|
);
|
||||||
replace.write().unwrap();
|
replace.write().unwrap();
|
||||||
|
@ -231,7 +243,7 @@ mod test {
|
||||||
.path(input_file.path())
|
.path(input_file.path())
|
||||||
.byte_offset(0)
|
.byte_offset(0)
|
||||||
.typo("foo")
|
.typo("foo")
|
||||||
.correction(std::borrow::Cow::Borrowed("bar"))
|
.corrections(vec![std::borrow::Cow::Borrowed("bar")])
|
||||||
.into(),
|
.into(),
|
||||||
);
|
);
|
||||||
replace.write().unwrap();
|
replace.write().unwrap();
|
||||||
|
|
Loading…
Add table
Reference in a new issue