mirror of
https://github.com/crate-ci/typos.git
synced 2025-02-13 08:40:29 -05:00
perf(parser): Try hand-rolled number parsing
This commit is contained in:
parent
6b92e345cc
commit
4b94352b7a
1 changed files with 30 additions and 15 deletions
|
@ -115,11 +115,11 @@ impl Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn accept(&self, contents: &str) -> bool {
|
fn accept(&self, contents: &str) -> bool {
|
||||||
if self.ignore_numbers && is_number(contents) {
|
if self.ignore_numbers && is_number(contents.as_bytes()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.ignore_hex && is_hex(contents) {
|
if self.ignore_hex && is_hex(contents.as_bytes()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,22 +180,37 @@ impl<'s> Iterator for Utf8Chunks<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// `_`: number literal separator in Rust and other languages
|
fn is_number(ident: &[u8]) -> bool {
|
||||||
// `'`: number literal separator in C++
|
ident.iter().all(|b| is_digit(*b) || is_digit_sep(*b))
|
||||||
static DIGITS: once_cell::sync::Lazy<regex::Regex> =
|
|
||||||
once_cell::sync::Lazy::new(|| regex::Regex::new(r#"^[0-9_']+$"#).unwrap());
|
|
||||||
|
|
||||||
fn is_number(ident: &str) -> bool {
|
|
||||||
DIGITS.is_match(ident)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// `_`: number literal separator in Rust and other languages
|
fn is_hex(ident: &[u8]) -> bool {
|
||||||
// `'`: number literal separator in C++
|
if ident.len() < 3 {
|
||||||
static HEX: once_cell::sync::Lazy<regex::Regex> =
|
false
|
||||||
once_cell::sync::Lazy::new(|| regex::Regex::new(r#"^0[xX][0-9a-fA-F_']+$"#).unwrap());
|
} else {
|
||||||
|
ident[0] == b'0'
|
||||||
|
&& ident[1] == b'x'
|
||||||
|
&& ident[2..]
|
||||||
|
.iter()
|
||||||
|
.all(|b| is_hex_digit(*b) || is_digit_sep(*b))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn is_hex(ident: &str) -> bool {
|
#[inline]
|
||||||
HEX.is_match(ident)
|
fn is_digit(chr: u8) -> bool {
|
||||||
|
chr.is_ascii_digit()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_digit_sep(chr: u8) -> bool {
|
||||||
|
// `_`: number literal separator in Rust and other languages
|
||||||
|
// `'`: number literal separator in C++
|
||||||
|
chr == b'_' || chr == b'\''
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_hex_digit(chr: u8) -> bool {
|
||||||
|
chr.is_ascii_hexdigit()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A term composed of Words.
|
/// A term composed of Words.
|
||||||
|
|
Loading…
Add table
Reference in a new issue