From 3123de3bf70fbb10e3bffdfff7426e38255bb5a2 Mon Sep 17 00:00:00 2001 From: Michael Chernicoff Date: Tue, 29 Oct 2024 22:27:25 -0400 Subject: [PATCH] feat: Adds typo plugin --- Cargo.lock | 18 +- Cargo.toml | 5 +- hipcheck/src/engine.rs | 5 +- plugins/affiliation/Cargo.toml | 3 +- plugins/affiliation/src/main.rs | 4 +- plugins/npm_dependencies/plugin.kdl | 10 + plugins/typo/Cargo.toml | 22 ++ plugins/typo/plugin.kdl | 13 + plugins/typo/src/languages.rs | 355 ++++++++++++++++++++++++++ plugins/typo/src/main.rs | 243 ++++++++++++++++++ plugins/typo/src/types.rs | 351 ++++++++++++++++++++++++++ plugins/typo/src/util/fs.rs | 39 +++ plugins/typo/src/util/mod.rs | 3 + plugins/typo/test/Typos.toml | 372 ++++++++++++++++++++++++++++ 14 files changed, 1431 insertions(+), 12 deletions(-) create mode 100644 plugins/npm_dependencies/plugin.kdl create mode 100644 plugins/typo/Cargo.toml create mode 100644 plugins/typo/plugin.kdl create mode 100644 plugins/typo/src/languages.rs create mode 100644 plugins/typo/src/main.rs create mode 100644 plugins/typo/src/types.rs create mode 100644 plugins/typo/src/util/fs.rs create mode 100644 plugins/typo/src/util/mod.rs create mode 100644 plugins/typo/test/Typos.toml diff --git a/Cargo.lock b/Cargo.lock index 3cd54158..177d6400 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,7 +62,6 @@ dependencies = [ "serde", "serde_json", "strum 0.26.3", - "test-log", "tokio", ] @@ -3578,6 +3577,23 @@ dependencies = [ "typify-impl", ] +[[package]] +name = "typo_sdk" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "hipcheck-sdk", + "log", + "maplit", + "pathbuf", + "serde", + "serde_json", + "tokio", + "toml", + "url", +] + [[package]] name = "unicode-bidi" version = "0.3.15" diff --git a/Cargo.toml b/Cargo.toml index 9a605999..b6078bbc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,8 +30,9 @@ members = [ "plugins/entropy", "plugins/linguist", "plugins/review", - "plugins/binary" -, "plugins/identity"] + "plugins/binary", + "plugins/identity", + "plugins/typo"] # Make sure Hipcheck is run with `cargo run`. # diff --git a/hipcheck/src/engine.rs b/hipcheck/src/engine.rs index fcbb97b2..b8dd6225 100644 --- a/hipcheck/src/engine.rs +++ b/hipcheck/src/engine.rs @@ -65,10 +65,7 @@ fn default_query_explanation( let core = db.core(); let key = get_plugin_key(publisher.as_str(), plugin.as_str()); let Some(p_handle) = core.plugins.get(&key) else { - return Err(hc_error!( - "Plugin '{}' not found", - key, - )); + return Err(hc_error!("Plugin '{}' not found", key,)); }; Ok(p_handle.get_default_query_explanation().cloned()) } diff --git a/plugins/affiliation/Cargo.toml b/plugins/affiliation/Cargo.toml index f3b7c5ca..a33b9f13 100644 --- a/plugins/affiliation/Cargo.toml +++ b/plugins/affiliation/Cargo.toml @@ -19,5 +19,4 @@ strum = { version = "0.26.3", features = ["derive"] } tokio = { version = "1.40.0", features = ["rt"] } [dev-dependencies] -hipcheck-sdk = { path = "../../sdk/rust", features = ["macros", "mock_engine"]} -test-log = "0.2.16" +hipcheck-sdk = { path = "../../sdk/rust", features = ["macros", "mock_engine"]} \ No newline at end of file diff --git a/plugins/affiliation/src/main.rs b/plugins/affiliation/src/main.rs index d2d6aa03..a17d3e00 100644 --- a/plugins/affiliation/src/main.rs +++ b/plugins/affiliation/src/main.rs @@ -406,8 +406,6 @@ mod test { use pathbuf::pathbuf; use std::{env, result::Result as StdResult}; - use test_log::test; - fn repo() -> LocalGitRepo { LocalGitRepo { path: "/home/users/me/.cache/hipcheck/clones/github/foo/bar/".to_string(), @@ -547,7 +545,7 @@ mod test { Ok(mock_responses) } - #[test(tokio::test)] + #[tokio::test] async fn test_affiliation() { let orgs_file = pathbuf![&env::current_dir().unwrap(), "test", "example_orgs.kdl"]; let orgs_spec = OrgSpec::load_from(&orgs_file).unwrap(); diff --git a/plugins/npm_dependencies/plugin.kdl b/plugins/npm_dependencies/plugin.kdl new file mode 100644 index 00000000..26b7a043 --- /dev/null +++ b/plugins/npm_dependencies/plugin.kdl @@ -0,0 +1,10 @@ +publisher "mitre" +name "npm_dependencies" +version "0.1.0" +license "Apache-2.0" +entrypoint { + on arch="aarch64-apple-darwin" "./target/debug/npm_dependencies" + on arch="x86_64-apple-darwin" "./target/debug/npm_dependencies" + on arch="x86_64-unknown-linux-gnu" "./target/debug/npm_dependencies" + on arch="x86_64-pc-windows-msvc" "./target/debug/npm_dependencies" +} diff --git a/plugins/typo/Cargo.toml b/plugins/typo/Cargo.toml new file mode 100644 index 00000000..2c685de5 --- /dev/null +++ b/plugins/typo/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "typo_sdk" +version = "0.1.0" +license = "Apache-2.0" +edition = "2021" +publish = false + +[dependencies] +anyhow = "1.0.91" +clap = { version = "4.5.18", features = ["derive"] } +hipcheck-sdk = { path = "../../sdk/rust", features = ["macros"] } +log = "0.4.22" +maplit = "1.0.2" +pathbuf = "1.0.0" +serde = { version = "1.0.210", features = ["derive", "rc"] } +serde_json = "1.0.128" +tokio = { version = "1.40.0", features = ["rt"] } +toml = "0.8.19" +url = "2.5.2" + +[dev-dependencies] +hipcheck-sdk = { path = "../../sdk/rust", features = ["mock_engine"] } diff --git a/plugins/typo/plugin.kdl b/plugins/typo/plugin.kdl new file mode 100644 index 00000000..ed49fbdb --- /dev/null +++ b/plugins/typo/plugin.kdl @@ -0,0 +1,13 @@ +publisher "mitre" +name "typo" +version "0.1.0" +license "Apache-2.0" +entrypoint { + on arch="aarch64-apple-darwin" "./target/debug/typo" + on arch="x86_64-apple-darwin" "./target/debug/typo" + on arch="x86_64-unknown-linux-gnu" "./target/debug/typo" + on arch="x86_64-pc-windows-msvc" "./target/debug/typo" +} +dependencies { + plugin "mitre/npm_dependencies" version="0.1.0" manifest="./plugins/npm_dependencies/plugin.kdl" +} diff --git a/plugins/typo/src/languages.rs b/plugins/typo/src/languages.rs new file mode 100644 index 00000000..e409a14d --- /dev/null +++ b/plugins/typo/src/languages.rs @@ -0,0 +1,355 @@ +// SPDX-License-Identifier: Apache-2.0 + +use crate::{ + types::{Homoglyphs, KeyboardLayout, NpmDependencies, Typo}, + util::fs as file, +}; +use anyhow::{Context as _, Result}; +use serde::Deserialize; +use std::{collections::HashMap, path::Path}; + +#[derive(Debug, Deserialize)] +pub struct TypoFile { + languages: Languages, +} + +#[derive(Debug, Deserialize)] +struct Languages { + javascript: Vec, +} + +impl TypoFile { + pub fn load_from(typo_path: &Path) -> Result { + file::exists(typo_path).context("typo file does not exist")?; + let typo_file = file::read_toml(typo_path).context("failed to open typo file")?; + + Ok(typo_file) + } +} + +#[derive(Debug, Clone)] +pub struct NameFuzzer<'t> { + // A map of strings which may be typos to the notes for what they may be + // typos of. Fuzzing then only needs to hash the string and look it up in the + // typo hash map. + typos: HashMap>, + // The list of original names. + name: &'t str, +} + +impl<'t> NameFuzzer<'t> { + /// Construct a new NameFuzzer for the given corpus. + pub fn new(name: &'t str) -> NameFuzzer<'t> { + let typos = { + let keyboards = vec![ + KeyboardLayout::qwerty(), + KeyboardLayout::qwertz(), + KeyboardLayout::azerty(), + ]; + + let homoglyphs = vec![Homoglyphs::ascii()]; + + get_typos(name, &keyboards, &homoglyphs).iter().fold( + HashMap::new(), + |mut typos: HashMap>, typo| { + typos + .entry(typo.to_str().to_owned()) + .and_modify(|val| val.push(typo.clone())) + .or_insert_with(|| vec![typo.clone()]); + + typos + }, + ) + }; + + NameFuzzer { typos, name } + } + + /// Check the name against the set of known typos for the corpus to generate + /// a list of possible typos. + /// + /// Returns an empty slice if no typos were found. + pub fn fuzz(&self, name: &str) -> &[Typo] { + if self.name == name { + return &[]; + } + + self.typos.get(name).map(AsRef::as_ref).unwrap_or(&[]) + } +} + +pub(crate) fn typos_for_javascript( + typo_file: &TypoFile, + dependencies: NpmDependencies, +) -> Result> { + let mut typos = Vec::new(); + + for legit_name in &typo_file.languages.javascript { + let fuzzer = NameFuzzer::new(legit_name); + + // Add a dependency name to the list of typos if the list of possible typos for that name is non-empty + for dependency in &dependencies.deps { + if !fuzzer.fuzz(dependency).is_empty() { + typos.push(dependency.to_string()); + } + } + } + + Ok(typos) +} + +#[inline] +fn get_typos(name: &str, keyboards: &[KeyboardLayout], homoglyphs: &[Homoglyphs]) -> Vec { + let mut results = Vec::new(); + + // Get all the kinds of typos. + get_addition_typos(&mut results, name); + get_bitsquatting_typos(&mut results, name); + get_hyphenation_typos(&mut results, name); + get_insertion_typos(&mut results, name, keyboards); + get_omission_typos(&mut results, name); + get_repetition_typos(&mut results, name); + get_replacement_typos(&mut results, name, keyboards); + get_transposition_typos(&mut results, name); + get_vowel_swap_typos(&mut results, name); + get_pluralization_typos(&mut results, name); + get_homoglyph_typos(&mut results, name, homoglyphs); + + // The methods above might generate duplicates. This removes them. + // + // Sorting is done with sort() rather than sort_unstable() to ensure that the + // order of the different kinds of typos is preserved, to make testing easier. + // + // Given that a fuzzer should only be constructed once for a corpus, the cost + // difference of this is expected to be negligible. + results.sort(); + results.dedup(); + + results +} + +#[inline] +fn get_addition_typos(results: &mut Vec, name: &str) { + results.extend( + (b'_'..b'z') + .map(char::from) + .map(|c| format!("{}{}", name, c)) + .filter(|t| t != name) + .map(Typo::addition), + ); +} + +#[inline] +fn get_bitsquatting_typos(results: &mut Vec, name: &str) { + results.extend( + [1, 2, 4, 8, 16, 32, 64, 128] + .iter() + .flat_map(|mask| { + name.bytes().enumerate().map(move |(index, byte)| { + let c = mask ^ byte; + + // If the corrupted byte is within the proper ASCII range, then + // produce a new string including the corrupted byte. + if (c == b'-') || (c == b'_') || c.is_ascii_digit() || c.is_ascii_lowercase() { + let mut corrupted = name.to_owned(); + + // We have already ensured the new byte is a valid ASCII byte, so this + // use of unsafe is alright. + let corrupted_bytes = unsafe { corrupted.as_bytes_mut() }; + corrupted_bytes[index] = c; + + Some(corrupted) + } else { + None + } + }) + }) + .flatten() + .filter(|t| t != name) + .map(Typo::bitsquatting), + ); +} + +#[inline] +fn get_hyphenation_typos(results: &mut Vec, name: &str) { + results.extend( + name.chars() + .enumerate() + .map(|(index, _)| { + let mut corrupted = name.to_owned(); + corrupted.insert(index, '-'); + corrupted + }) + .filter(|t| t != name) + .map(Typo::hyphenation), + ); +} + +#[inline] +fn get_insertion_typos(results: &mut Vec, name: &str, keyboards: &[KeyboardLayout]) { + results.extend( + keyboards + .iter() + .flat_map(|keyboard| { + name.chars().enumerate().flat_map(move |(index, c)| { + let mut corruptions = Vec::new(); + + if keyboard.neighbors().contains_key(&c) { + for neighbor in &keyboard.neighbors()[&c] { + // Before the current character. + let mut corrupted_before = name.to_owned(); + corrupted_before.insert(index, *neighbor); + corruptions.push(corrupted_before); + + // After the current character. + let mut corrupted_after = name.to_owned(); + corrupted_after.insert(index + 1, *neighbor); + corruptions.push(corrupted_after); + } + } + + corruptions + }) + }) + .filter(|t| t != name) + .map(Typo::insertion), + ); +} + +#[inline] +fn get_omission_typos(results: &mut Vec, name: &str) { + results.extend( + name.chars() + .enumerate() + .map(|(index, _)| { + let mut corrupted = name.to_owned(); + corrupted.remove(index); + corrupted + }) + .filter(|t| t != name) + .map(Typo::omission), + ); +} + +#[inline] +fn get_repetition_typos(results: &mut Vec, name: &str) { + results.extend( + name.chars() + .enumerate() + .map(|(index, c)| { + let mut corrupted = name.to_owned(); + corrupted.insert(index, c); + corrupted + }) + .filter(|t| t != name) + .map(Typo::repetition), + ); +} + +#[inline] +fn get_replacement_typos(results: &mut Vec, name: &str, keyboards: &[KeyboardLayout]) { + results.extend( + keyboards + .iter() + .flat_map(|keyboard| { + name.chars().enumerate().flat_map(move |(index, c)| { + let mut corruptions = Vec::new(); + + if keyboard.neighbors().contains_key(&c) { + for neighbor in &keyboard.neighbors()[&c] { + let mut corrupted = name.to_owned(); + corrupted.replace_range(index..=index, &neighbor.to_string()); + corruptions.push(corrupted); + } + } + + corruptions + }) + }) + .filter(|t| t != name) + .map(Typo::replacement), + ); +} + +#[inline] +fn get_transposition_typos(results: &mut Vec, name: &str) { + results.extend({ + // Credit for this code to shepmaster on Stack Overflow. + // + // https://codereview.stackexchange.com/questions/155294/transposing-characters-in-a-string + let bytes = name.as_bytes(); + + (1..bytes.len()) + .map(move |i| { + let mut transpose = bytes.to_owned(); + transpose.swap(i - 1, i); + String::from_utf8(transpose).expect("Invalid UTF-8") + }) + .filter(|t| t != name) + .map(Typo::transposition) + }); +} + +#[inline] +fn get_vowel_swap_typos(results: &mut Vec, name: &str) { + results.extend( + name.chars() + .enumerate() + .flat_map(|(index, c)| { + let vowels = ['a', 'e', 'i', 'o', 'u']; + let mut corruptions = Vec::new(); + + for vowel in &vowels { + if vowels.contains(&c) { + let mut corrupted = name.to_owned(); + corrupted.replace_range(index..=index, &vowel.to_string()); + corruptions.push(corrupted); + } + } + + corruptions + }) + .filter(|t| t != name) + .map(Typo::vowel_swap), + ); +} + +#[inline] +fn get_pluralization_typos(results: &mut Vec, name: &str) { + results.extend( + name.chars() + .enumerate() + .map(|(index, _c)| { + let mut corrupted = name.to_owned(); + corrupted.insert(index + 1, 's'); + corrupted + }) + .filter(|t| t != name) + .map(Typo::pluralization), + ); +} + +#[inline] +fn get_homoglyph_typos(results: &mut Vec, name: &str, homoglyphs: &[Homoglyphs]) { + results.extend( + homoglyphs + .iter() + .flat_map(|homoglph| { + name.chars().enumerate().flat_map(move |(index, c)| { + let mut corruptions = Vec::new(); + + if homoglph.glyphs().contains_key(&c) { + for glyph in &homoglph.glyphs()[&c] { + let mut corrupted = name.to_owned(); + corrupted.replace_range(index..=index, &glyph.to_string()); + corruptions.push(corrupted); + } + } + + corruptions + }) + }) + .filter(|t| t != name) + .map(Typo::homoglyph), + ); +} diff --git a/plugins/typo/src/main.rs b/plugins/typo/src/main.rs new file mode 100644 index 00000000..7121dc26 --- /dev/null +++ b/plugins/typo/src/main.rs @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Plugin for querying typos were found in the repository's package dependencies +//! Currently only NPM dependencies for JavaScript repositories are supported + +mod languages; +mod types; +mod util; + +use crate::{ + languages::TypoFile, + types::{Lang, NpmDependencies}, +}; +use clap::Parser; +use hipcheck_sdk::{prelude::*, types::Target}; +use serde::Deserialize; +use std::{path::PathBuf, result::Result as StdResult, sync::OnceLock}; + +pub static TYPOFILE: OnceLock = OnceLock::new(); + +#[derive(Deserialize)] +struct RawConfig { + typo_file_path: Option, + count_threshold: Option, +} + +struct Config { + typo_file: TypoFile, + count_threshold: Option, +} + +impl TryFrom for Config { + type Error = hipcheck_sdk::error::ConfigError; + fn try_from(value: RawConfig) -> StdResult { + // Get path to typo TOML file + let Some(raw_typo_path) = value.typo_file_path else { + return Err(ConfigError::MissingRequiredConfig { + field_name: "typo_path".to_owned(), + field_type: "string".to_owned(), + possible_values: vec![], + }); + }; + // Parse typo TOML file + let typo_path = PathBuf::from(raw_typo_path); + let typo_file = TypoFile::load_from(&typo_path).map_err(|e| { + log::error!("failed to load typo file: {}", e); + ConfigError::InvalidConfigValue { + field_name: "typo_file_path".to_owned(), + value: "string".to_owned(), + reason: format!("failed to load typo file: {}", e), + } + })?; + + let count_threshold = value.count_threshold; + + Ok(Config { + typo_file, + count_threshold, + }) + } +} + +#[query] +async fn typo(engine: &mut PluginEngine, value: Target) -> Result> { + log::debug!("running typo query"); + + let local = value.local; + + // Get the typo file + let typo_file = TYPOFILE.get().ok_or(Error::UnspecifiedQueryState)?; + + // Get the repo's dependencies + let value = engine + .query("mitre/npm_dependencies/dependencies", local) + .await + .map_err(|e| { + log::error!("failed to get dependencies for typo query: {}", e); + Error::UnspecifiedQueryState + })?; + let dependencies: NpmDependencies = + serde_json::from_value(value).map_err(Error::InvalidJsonInQueryOutput)?; + + // Get the dependencies with identified typos + let typo_deps = match dependencies.language { + Lang::JavaScript => languages::typos_for_javascript(typo_file, dependencies.clone()) + .map_err(|e| { + log::error!("{}", e); + Error::UnspecifiedQueryState + }), + Lang::Unknown => { + log::error!("failed to identify a known language"); + Err(Error::UnexpectedPluginQueryInputFormat) + } + }?; + println!("typo_deps: {:?}", typo_deps); + + // Generate a boolean list of depedencies with and without typos + let typos = dependencies + .deps + .iter() + .map(|d| typo_deps.contains(d)) + .collect(); + println!("typos: {:?}", typos); + + // Report each dependency typo as a concern + for concern in typo_deps { + engine.record_concern(concern); + } + + log::info!("completed typo query"); + + Ok(typos) +} + +#[derive(Clone, Debug, Default)] +struct TypoPlugin { + policy_conf: OnceLock>, +} + +impl Plugin for TypoPlugin { + const PUBLISHER: &'static str = "mitre"; + + const NAME: &'static str = "typo"; + + fn set_config(&self, config: Value) -> StdResult<(), ConfigError> { + // Deserialize and validate the config struct + let conf: Config = serde_json::from_value::(config) + .map_err(|e| ConfigError::Unspecified { + message: e.to_string(), + })? + .try_into()?; + + // Store the policy conf to be accessed only in the `default_policy_expr()` impl + self.policy_conf + .set(conf.count_threshold) + .map_err(|_| ConfigError::Unspecified { + message: "plugin was already configured".to_string(), + })?; + + TYPOFILE + .set(conf.typo_file) + .map_err(|_e| ConfigError::Unspecified { + message: "config was already set".to_owned(), + }) + } + + fn default_policy_expr(&self) -> Result { + match self.policy_conf.get() { + None => Err(Error::UnspecifiedQueryState), + // If no policy vars, we have no default expr + Some(None) => Ok("".to_owned()), + // Use policy config vars to construct a default expr + Some(Some(policy_conf)) => { + Ok(format!("(lte (count (filter (eq #t) $)) {}))", policy_conf)) + } + } + } + + fn explain_default_query(&self) -> Result> { + Ok(Some( + "Returns whether each of the repository's package dependencies has a typo in its name" + .to_string(), + )) + } + + queries! {} +} + +#[derive(Parser, Debug)] +struct Args { + #[arg(long)] + port: u16, +} + +#[tokio::main(flavor = "current_thread")] +async fn main() -> Result<()> { + let args = Args::try_parse().unwrap(); + PluginServer::register(TypoPlugin::default()) + .listen(args.port) + .await +} + +#[cfg(test)] +mod test { + use super::*; + + use hipcheck_sdk::types::LocalGitRepo; + use pathbuf::pathbuf; + use std::env; + + fn local() -> LocalGitRepo { + LocalGitRepo { + path: "/home/users/me/.cache/hipcheck/clones/github/foo/bar/".to_string(), + git_ref: "main".to_string(), + } + } + + fn mock_responses() -> StdResult { + let local = local(); + + let deps = vec![ + "lodash".to_string(), + "chakl".to_string(), + "reacct".to_string(), + ]; + let output = NpmDependencies { + language: Lang::JavaScript, + deps, + }; + + let mut mock_responses = MockResponses::new(); + mock_responses + .insert("mitre/npm_dependencies/dependencies", local, Ok(output)) + .unwrap(); + + Ok(mock_responses) + } + + #[tokio::test] + async fn test_typo() { + let typo_path = pathbuf![&env::current_dir().unwrap(), "test", "Typos.toml"]; + let typo_file = TypoFile::load_from(&typo_path).unwrap(); + TYPOFILE.get_or_init(|| typo_file); + + let local = local(); + let target = Target { + specifier: "bar".to_string(), + local, + remote: None, + package: None, + }; + + let mut engine = PluginEngine::mock(mock_responses().unwrap()); + let output = typo(&mut engine, target).await.unwrap(); + assert_eq!(output.len(), 3); + let num_typos = output.iter().filter(|&n| *n).count(); + assert_eq!(num_typos, 2); + + let concerns = engine.take_concerns(); + assert!(concerns.contains(&"chakl".to_string())); + assert!(concerns.contains(&"reacct".to_string())); + } +} diff --git a/plugins/typo/src/types.rs b/plugins/typo/src/types.rs new file mode 100644 index 00000000..ed64635f --- /dev/null +++ b/plugins/typo/src/types.rs @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: Apache-2.0 + +use maplit::hashmap; +use serde::{Deserialize, Serialize}; +use std::{ + cmp::Ordering, + collections::HashMap, + fmt::{self, Display}, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct NpmDependencies { + pub language: Lang, + pub deps: Vec, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone, Serialize, Deserialize)] +pub enum Lang { + JavaScript, + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct Typo { + kind: TypoKind, + typo: String, +} + +impl Typo { + #[inline] + pub fn addition(typo: String) -> Typo { + Typo { + kind: TypoKind::Addition, + typo, + } + } + + #[inline] + pub fn bitsquatting(typo: String) -> Typo { + Typo { + kind: TypoKind::Bitsquatting, + typo, + } + } + + #[inline] + pub fn hyphenation(typo: String) -> Typo { + Typo { + kind: TypoKind::Hyphenation, + typo, + } + } + + #[inline] + pub fn insertion(typo: String) -> Typo { + Typo { + kind: TypoKind::Insertion, + typo, + } + } + + #[inline] + pub fn omission(typo: String) -> Typo { + Typo { + kind: TypoKind::Omission, + typo, + } + } + + #[inline] + pub fn repetition(typo: String) -> Typo { + Typo { + kind: TypoKind::Repetition, + typo, + } + } + + #[inline] + pub fn replacement(typo: String) -> Typo { + Typo { + kind: TypoKind::Replacement, + typo, + } + } + + #[inline] + pub fn transposition(typo: String) -> Typo { + Typo { + kind: TypoKind::Transposition, + typo, + } + } + + #[inline] + pub fn vowel_swap(typo: String) -> Typo { + Typo { + kind: TypoKind::VowelSwap, + typo, + } + } + + #[inline] + pub fn pluralization(typo: String) -> Typo { + Typo { + kind: TypoKind::Pluralization, + typo, + } + } + + #[inline] + pub fn homoglyph(typo: String) -> Typo { + Typo { + kind: TypoKind::Homoglyph, + typo, + } + } + + #[inline] + pub fn to_str(&self) -> &str { + &self.typo + } +} + +impl PartialOrd for Typo { + #[inline] + fn partial_cmp(&self, other: &Typo) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Typo { + #[inline] + fn cmp(&self, other: &Typo) -> Ordering { + self.typo.cmp(&other.typo) + } +} + +impl PartialEq<&Typo> for Typo { + #[inline] + fn eq(&self, other: &&Typo) -> bool { + self.eq(*other) + } +} + +impl PartialEq for &Typo { + #[inline] + fn eq(&self, other: &Typo) -> bool { + (*self).eq(other) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize)] +enum TypoKind { + Addition, + Bitsquatting, + Hyphenation, + Insertion, + Omission, + Repetition, + Replacement, + Transposition, + VowelSwap, + Pluralization, + Homoglyph, +} + +impl Display for TypoKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TypoKind::Addition => write!(f, "addition"), + TypoKind::Bitsquatting => write!(f, "bitsquatting"), + TypoKind::Hyphenation => write!(f, "hyphenation"), + TypoKind::Insertion => write!(f, "insertion"), + TypoKind::Omission => write!(f, "omission"), + TypoKind::Repetition => write!(f, "repetition"), + TypoKind::Replacement => write!(f, "replacement"), + TypoKind::Transposition => write!(f, "transposition"), + TypoKind::VowelSwap => write!(f, "vowel swap"), + TypoKind::Pluralization => write!(f, "pluralization"), + TypoKind::Homoglyph => write!(f, "homoglyph"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Homoglyphs(HashMap>); + +impl Homoglyphs { + #[inline] + pub fn new(homoglyphs: HashMap>) -> Homoglyphs { + Homoglyphs(homoglyphs) + } + + #[inline] + pub fn ascii() -> Homoglyphs { + Homoglyphs::new(hashmap! { + 'O' => vec!['0'], + '0' => vec!['O'], + 'l' => vec!['I'], + 'I' => vec!['l'], + }) + } + + #[inline] + pub fn glyphs(&self) -> &HashMap> { + &self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct KeyboardLayout { + neighbors: HashMap>, +} + +impl KeyboardLayout { + #[inline] + pub fn new(neighbors: HashMap>) -> KeyboardLayout { + KeyboardLayout { neighbors } + } + + #[inline] + pub fn qwerty() -> KeyboardLayout { + KeyboardLayout::new(hashmap! { + '1' => vec!['2', 'q'], + '2' => vec!['3', 'w', 'q', '1'], + '3' => vec!['4', 'e', 'w', '2'], + '4' => vec!['5', 'r', 'e', '3'], + '5' => vec!['6', 't', 'r', '4'], + '6' => vec!['7', 'y', 't', '5'], + '7' => vec!['8', 'u', 'y', '6'], + '8' => vec!['9', 'i', 'u', '7'], + '9' => vec!['0', 'o', 'i', '8'], + '0' => vec!['p', 'o', '9'], + 'q' => vec!['1', '2', 'w', 'a'], + 'w' => vec!['3', 'e', 's', 'a', 'q', '2'], + 'e' => vec!['4', 'r', 'd', 's', 'w', '3'], + 'r' => vec!['5', 't', 'f', 'd', 'e', '4'], + 't' => vec!['6', 'y', 'g', 'f', 'r', '5'], + 'y' => vec!['7', 'u', 'h', 'g', 't', '6'], + 'u' => vec!['8', 'i', 'j', 'h', 'y', '7'], + 'i' => vec!['9', 'o', 'k', 'j', 'u', '8'], + 'o' => vec!['0', 'p', 'l', 'k', 'i', '9'], + 'p' => vec!['l', 'o', '0'], + 'a' => vec!['q', 'w', 's', 'z'], + 's' => vec!['e', 'd', 'x', 'z', 'a', 'w'], + 'd' => vec!['r', 'f', 'c', 'x', 's', 'e'], + 'f' => vec!['t', 'g', 'v', 'c', 'd', 'r'], + 'g' => vec!['y', 'h', 'b', 'v', 'f', 't'], + 'h' => vec!['u', 'j', 'n', 'b', 'g', 'y'], + 'j' => vec!['i', 'k', 'm', 'n', 'h', 'u'], + 'k' => vec!['o', 'l', 'm', 'j', 'i'], + 'l' => vec!['k', 'o', 'p'], + 'z' => vec!['a', 's', 'x'], + 'x' => vec!['z', 's', 'd', 'c'], + 'c' => vec!['x', 'd', 'f', 'v'], + 'v' => vec!['c', 'f', 'g', 'b'], + 'b' => vec!['v', 'g', 'h', 'n'], + 'n' => vec!['b', 'h', 'j', 'm'], + 'm' => vec!['n', 'j', 'k'], + }) + } + + #[inline] + pub fn qwertz() -> KeyboardLayout { + KeyboardLayout::new(hashmap! { + '1' => vec!['2', 'q'], + '2' => vec!['3', 'w', 'q', '1'], + '3' => vec!['4', 'e', 'w', '2'], + '4' => vec!['5', 'r', 'e', '3'], + '5' => vec!['6', 't', 'r', '4'], + '6' => vec!['7', 'z', 't', '5'], + '7' => vec!['8', 'u', 'z', '6'], + '8' => vec!['9', 'i', 'u', '7'], + '9' => vec!['0', 'o', 'i', '8'], + '0' => vec!['p', 'o', '9'], + 'q' => vec!['1', '2', 'w', 'a'], + 'w' => vec!['3', 'e', 's', 'a', 'q', '2'], + 'e' => vec!['4', 'r', 'd', 's', 'w', '3'], + 'r' => vec!['5', 't', 'f', 'd', 'e', '4'], + 't' => vec!['6', 'z', 'g', 'f', 'r', '5'], + 'z' => vec!['7', 'u', 'h', 'g', 't', '6'], + 'u' => vec!['8', 'i', 'j', 'h', 'z', '7'], + 'i' => vec!['9', 'o', 'k', 'j', 'u', '8'], + 'o' => vec!['0', 'p', 'l', 'k', 'i', '9'], + 'p' => vec!['l', 'o', '0'], + 'a' => vec!['q', 'w', 's', 'y'], + 's' => vec!['e', 'd', 'x', 'y', 'a', 'w'], + 'd' => vec!['r', 'f', 'c', 'x', 's', 'e'], + 'f' => vec!['t', 'g', 'v', 'c', 'd', 'r'], + 'g' => vec!['z', 'h', 'b', 'v', 'f', 't'], + 'h' => vec!['u', 'j', 'n', 'b', 'g', 'z'], + 'j' => vec!['i', 'k', 'm', 'n', 'h', 'u'], + 'k' => vec!['o', 'l', 'm', 'j', 'i'], + 'l' => vec!['k', 'o', 'p'], + 'y' => vec!['a', 's', 'x'], + 'x' => vec!['y', 's', 'd', 'c'], + 'c' => vec!['x', 'd', 'f', 'v'], + 'v' => vec!['c', 'f', 'g', 'b'], + 'b' => vec!['v', 'g', 'h', 'n'], + 'n' => vec!['b', 'h', 'j', 'm'], + 'm' => vec!['n', 'j', 'k'], + }) + } + + #[inline] + pub fn azerty() -> KeyboardLayout { + KeyboardLayout::new(hashmap! { + '1' => vec!['2', 'a'], + '2' => vec!['3', 'z', 'a', '1'], + '3' => vec!['4', 'e', 'z', '2'], + '4' => vec!['5', 'r', 'e', '3'], + '5' => vec!['6', 't', 'r', '4'], + '6' => vec!['7', 'y', 't', '5'], + '7' => vec!['8', 'u', 'y', '6'], + '8' => vec!['9', 'i', 'u', '7'], + '9' => vec!['0', 'o', 'i', '8'], + '0' => vec!['p', 'o', '9'], + 'a' => vec!['2', 'z', 'q', '1'], + 'z' => vec!['3', 'e', 's', 'q', 'a', '2'], + 'e' => vec!['4', 'r', 'd', 's', 'z', '3'], + 'r' => vec!['5', 't', 'f', 'd', 'e', '4'], + 't' => vec!['6', 'y', 'g', 'f', 'r', '5'], + 'y' => vec!['7', 'u', 'h', 'g', 't', '6'], + 'u' => vec!['8', 'i', 'j', 'h', 'y', '7'], + 'i' => vec!['9', 'o', 'k', 'j', 'u', '8'], + 'o' => vec!['0', 'p', 'l', 'k', 'i', '9'], + 'p' => vec!['l', 'o', '0', 'm'], + 'q' => vec!['z', 's', 'w', 'a'], + 's' => vec!['e', 'd', 'x', 'w', 'q', 'z'], + 'd' => vec!['r', 'f', 'c', 'x', 's', 'e'], + 'f' => vec!['t', 'g', 'v', 'c', 'd', 'r'], + 'g' => vec!['y', 'h', 'b', 'v', 'f', 't'], + 'h' => vec!['u', 'j', 'n', 'b', 'g', 'y'], + 'j' => vec!['i', 'k', 'n', 'h', 'u'], + 'k' => vec!['o', 'l', 'j', 'i'], + 'l' => vec!['k', 'o', 'p', 'm'], + 'm' => vec!['l', 'p'], + 'w' => vec!['s', 'x', 'q'], + 'x' => vec!['w', 's', 'd', 'c'], + 'c' => vec!['x', 'd', 'f', 'v'], + 'v' => vec!['c', 'f', 'g', 'b'], + 'b' => vec!['v', 'g', 'h', 'n'], + 'n' => vec!['b', 'h', 'j'], + }) + } + + #[inline] + pub fn neighbors(&self) -> &HashMap> { + &self.neighbors + } +} diff --git a/plugins/typo/src/util/fs.rs b/plugins/typo/src/util/fs.rs new file mode 100644 index 00000000..bcbb2692 --- /dev/null +++ b/plugins/typo/src/util/fs.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 + +use anyhow::{anyhow, Context as _, Result}; +use serde::de::DeserializeOwned; +use std::{fs, path::Path}; + +/// Read a file to a string. +pub fn read_string>(path: P) -> Result { + fn inner(path: &Path) -> Result { + fs::read_to_string(path) + .with_context(|| format!("failed to read as UTF-8 string '{}'", path.display())) + } + + inner(path.as_ref()) +} + +/// Read file to a struct that can be deserialized from TOML format. +pub fn read_toml, T: DeserializeOwned>(path: P) -> Result { + let path = path.as_ref(); + let contents = read_string(path)?; + toml::de::from_str(&contents) + .with_context(|| format!("failed to read as TOML '{}'", path.display())) +} + +/// Check that a given path exists. +pub fn exists>(path: P) -> Result<()> { + fn inner(path: &Path) -> Result<()> { + if !path.exists() { + Err(anyhow!( + "'{}' not found at current directory", + path.display() + )) + } else { + Ok(()) + } + } + + inner(path.as_ref()) +} diff --git a/plugins/typo/src/util/mod.rs b/plugins/typo/src/util/mod.rs new file mode 100644 index 00000000..a2bb7363 --- /dev/null +++ b/plugins/typo/src/util/mod.rs @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: Apache-2.0 + +pub mod fs; diff --git a/plugins/typo/test/Typos.toml b/plugins/typo/test/Typos.toml new file mode 100644 index 00000000..559a36c1 --- /dev/null +++ b/plugins/typo/test/Typos.toml @@ -0,0 +1,372 @@ +############################################################################### +# Typos.toml +# +# This configuration file defines information necessary to detect typos based +# on the package ecosystem being used. +############################################################################### + +[languages] + +javascript = [ + "lodash", + "chalk", + "react", + "request", + "commander", + "express", + "moment", + "react-dom", + "prop-types", + "debug", + "fs-extra", + "tslib", + "async", + "bluebird", + "axios", + "uuid", + "underscore", + "vue", + "classnames", + "mkdirp", + "webpack", + "yargs", + "body-parser", + "rxjs", + "glob", + "inquirer", + "colors", + "core-js", + "babel-runtime", + "jquery", + "minimist", + "dotenv", + "aws-sdk", + "babel-core", + "typescript", + "babel-loader", + "cheerio", + "rimraf", + "q", + "eslint", + "css-loader", + "shelljs", + "dotenv", + "typescript", + "@types/node", + "@angular/core", + "js-yaml", + "style-loader", + "winston", + "@angular/common", + "redux", + "object-assign", + "zone.js", + "babel-eslint", + "gulp", + "gulp-util", + "file-loader", + "ora", + "node-fetch", + "@angular/platform-browser", + "@babel/runtime", + "handlebars", + "eslint-plugin-import", + "@angular/compiler", + "eslint-plugin-react", + "aws-sdk", + "yosay", + "url-loader", + "@angular/forms", + "webpack-dev-server", + "@angular/platform-browser-dynamic", + "mocha", + "html-webpack-plugin", + "socket.io", + "ws", + "babel-preset-es2015", + "postcss-loader", + "node-sass", + "ember-cli-babel", + "babel-polyfill", + "@angular/router", + "ramda", + "react-redux", + "@babel/core", + "@angular/http", + "ejs", + "coffee-script", + "superagent", + "request-promise", + "autoprefixer", + "path", + "mongodb", + "chai", + "mongoose", + "xml2js", + "bootstrap", + "jest", + "sass-loader", + "redis", + "vue-router", + "chokidar", + "co", + "eslint-plugin-jsx-a11y", + "nan", + "optimist", + "promise", + "@angular/animations", + "postcss", + "morgan", + "less", + "immutable", + "qs", + "loader-utils", + "fs", + "extract-text-webpack-plugin", + "marked", + "mime", + "@alifd/next", + "meow", + "styled-components", + "resolve", + "reflect-metadata", + "babel-preset-react", + "jsonwebtoken", + "react-router-dom", + "extend", + "cookie-parser", + "whatwg-fetch", + "babel-preset-env", + "babel-jest", + "mysql", + "joi", + "minimatch", + "eslint-loader", + "react-dev-utils", + "node-uuid", + "es6-promise", + "cross-spawn", + "case-sensitive-paths-webpack-plugin", + "uglify-js", + "cors", + "eslint-plugin-flowtype", + "react-router", + "@babel/preset-env", + "deepmerge", + "socket.io-client", + "npm", + "webpack-manifest-plugin", + "koa", + "isomorphic-fetch", + "babel-cli", + "del", + "postcss-flexbugs-fixes", + "compression", + "update-notifier", + "babel-preset-react-app", + "jade", + "prompt", + "gulp-rename", + "angular", + "underscore.string", + "graphql", + "execa", + "browserify", + "opn", + "validator", + "eslint-config-react-app", + "vuex", + "prettier", + "invariant", + "jsdom", + "@types/react", + "redux-thunk", + "mini-css-extract-plugin", + "globby", + "pg", + "got", + "ajv", + "xtend", + "ember-cli-htmlbars", + "babel-plugin-transform-runtime", + "nodemailer", + "source-map-support", + "express-session", + "d3", + "less-loader", + "fsevents", + "babel-preset-stage-0", + "download-git-repo", + "query-string", + "font-awesome", + "open", + "passport", + "@types/lodash", + "grunt", + "path-to-regexp", + "mustache", + "inherits", + "tmp", + "md5", + "dotenv-expand", + "crypto-js", + "request-promise-native", + "through", + "connect", + "raf", + "react-scripts", + "readable-stream", + "highlight.js", + "@babel/polyfill", + "progress", + "optimize-css-assets-webpack-plugin", + "iconv-lite", + "bunyan", + "gulp-uglify", + "koa-router", + "ncp", + "lodash.merge", + "lru-cache", + "moment-timezone", + "figlet", + "history", + "readline-sync", + "pluralize", + "url", + "log4js", + "cli-table", + "webpack-merge", + "archiver", + "babel-register", + "eslint-config-airbnb", + "clone", + "jsonfile", + "puppeteer", + "shortid", + "@babel/plugin-proposal-class-properties", + "querystring", + "serve-static", + "tslint", + "pug", + "config", + "source-map", + "antd", + "concat-stream", + "element-ui", + "lodash.get", + "@babel/preset-react", + "serve-favicon", + "stylus", + "date-fns", + "esprima", + "sequelize", + "babel-plugin-transform-object-rest-spread", + "bindings", + "events", + "graceful-fs", + "normalize.css", + "crypto", + "cross-env", + "mime-types", + "event-stream", + "hoist-non-react-statics", + "gulp-concat", + "terser-webpack-plugin", + "json-loader", + "warning", + "bignumber.js", + "eventemitter3", + "webpack-cli", + "strip-ansi", + "cli-color", + "form-data", + "web3", + "gulp-sourcemaps", + "webpack-dev-middleware", + "ip", + "camelcase", + "sw-precache-webpack-plugin", + "merge", + "http-proxy", + "react-transition-group", + "multer", + "deep-equal", + "browser-sync", + "babel", + "dateformat", + "postcss-preset-env", + "uglifyjs-webpack-plugin", + "@polymer/polymer", + "sinon", + "eslint-config-prettier", + "gulp-sass", + "identity-obj-proxy", + "ts-loader", + "react-hot-loader", + "sqlite3", + "popper.js", + "which", + "markdown-it", + "tar", + "vue-template-compiler", + "babel-plugin-transform-class-properties", + "js-beautify", + "log-symbols", + "webpack-hot-middleware", + "rollup", + "copy-webpack-plugin", + "nodemon", + "boom", + "xmldom", + "recompose", + "util", + "ini", + "pify", + "command-line-args", + "vinyl", + "mz", + "lodash.debounce", + "html-minifier", + "ts-node", + "nconf", + "recursive-readdir", + "vue-loader", + "@types/express", + "datafire", + "@types/react-dom", + "babel-plugin-transform-decorators-legacy", + "clean-css", + "hoek", + "cookie", + "@babel/plugin-transform-runtime", + "when", + "babel-plugin-named-asset-import", + "postcss-safe-parser", + "bcrypt", + "@material-ui/core", + "@babel/plugin-syntax-dynamic-import", + "nunjucks", + "eslint-plugin-promise", + "react-native", + "lodash.isequal", + "workbox-webpack-plugin", + "acorn", + "amqplib", + "@svgr/webpack", + "color", + "ms", + "js-cookie", + "temp", + "simple-git", + "cssnano", + "reselect", + "yamljs", + "ioredis", + "koa-static", + "react-app-polyfill", + "react-select", + "escape-string-regex", + "firebase", + "bn.js", + "escodegen", +]