typos/src/lib.rs

83 lines
2.2 KiB
Rust
Raw Normal View History

2019-01-23 07:33:51 -07:00
#[macro_use]
extern crate serde_derive;
2019-01-24 08:24:20 -07:00
mod dict;
2019-06-14 14:53:34 -06:00
mod dict_codegen;
2019-01-22 15:01:33 -07:00
2019-01-24 08:24:20 -07:00
pub mod report;
2019-04-16 20:16:31 -06:00
pub mod tokens;
2019-01-23 07:44:01 -07:00
2019-01-24 08:24:20 -07:00
pub use crate::dict::*;
2019-01-22 15:01:33 -07:00
2019-01-24 08:24:20 -07:00
use std::fs::File;
use std::io::Read;
2019-01-23 07:33:51 -07:00
use bstr::ByteSlice;
2019-06-14 06:43:21 -06:00
pub fn process_file(
path: &std::path::Path,
dictionary: &Dictionary,
ignore_hex: bool,
2019-07-13 20:14:06 -06:00
binary: bool,
2019-06-14 06:43:21 -06:00
report: report::Report,
) -> Result<(), failure::Error> {
2019-01-22 15:01:33 -07:00
let mut buffer = Vec::new();
File::open(path)?.read_to_end(&mut buffer)?;
2019-07-13 20:14:06 -06:00
if !binary && buffer.find_byte(b'\0').is_some() {
let msg = report::BinaryFile {
path,
non_exhaustive: (),
};
report(msg.into());
2019-07-13 20:14:06 -06:00
return Ok(());
}
for (line_idx, line) in buffer.lines().enumerate() {
2019-01-22 15:01:33 -07:00
let line_num = line_idx + 1;
for ident in tokens::Identifier::parse(line) {
if !ignore_hex && is_hex(ident.token()) {
continue;
}
if let Some(correction) = dictionary.correct_ident(ident) {
let col_num = ident.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
2019-06-22 22:01:27 -06:00
typo: ident.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
2019-01-22 15:01:33 -07:00
}
for word in ident.split() {
if let Some(correction) = dictionary.correct_word(word) {
let col_num = word.offset();
let msg = report::Correction {
path,
line,
line_num,
col_num,
2019-06-22 22:01:27 -06:00
typo: word.token(),
correction,
non_exhaustive: (),
};
report(msg.into());
}
}
2019-01-22 15:01:33 -07:00
}
}
Ok(())
}
fn is_hex(ident: &str) -> bool {
lazy_static::lazy_static! {
// `_`: number literal separator in Rust and other languages
// `'`: number literal separator in C++
static ref HEX: regex::Regex = regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap();
}
HEX.is_match(ident)
}