From 8f56488c1744949f74986ab916ec26050a20442b Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:11:34 +0100
Subject: [PATCH 1/6] Add templates/lang.rs to Cargo.toml

---
 .travis.yml | 4 ++++
 Cargo.toml  | 1 +
 2 files changed, 5 insertions(+)
diff --git a/.travis.yml b/.travis.yml
index 2ef885d..d0fed17 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,3 +2,7 @@ language: rust
 rust:
   - 1.30.1
   - stable
+script:
+  - cargo fmt -- --check
+  - cargo test
+  - cargo package
diff --git a/Cargo.toml b/Cargo.toml
index 060da1c..69f3ac9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,6 +15,7 @@ include = [
     "test/**/*",
     "misc/data.json",
     "misc/supported_laguages.csv",
+    "templates/lang.rs",
     "build.rs",
     "Cargo.toml",
     "README.md"

From a604c8c6abf70094f6a7219ec98a4b90fc1f3c05 Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:24:12 +0100
Subject: [PATCH 2/6] Fix typo: misc/supported_laguages.csv ->
 misc/supported_languages.csv

---
 Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.toml b/Cargo.toml
index 69f3ac9..5865d62 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ include = [
     "src/**/*",
     "test/**/*",
     "misc/data.json",
-    "misc/supported_laguages.csv",
+    "misc/supported_languages.csv",
     "templates/lang.rs",
     "build.rs",
     "Cargo.toml",

From b26296e752231d6cd4e125a23e6ea83c319a1979 Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:25:07 +0100
Subject: [PATCH 3/6] Fix formatting

---
 benches/example.rs |   2 +-
 build.rs           |  42 ++++--
 examples/cli.rs    |   5 +-
 src/detect.rs      | 102 +++++++------
 src/detector.rs    |  13 +-
 src/info.rs        |   2 +-
 src/lib.rs         |  18 +--
 src/options.rs     |   4 +-
 src/script.rs      | 346 ++++++++++++++++++++++++---------------------
 src/trigrams.rs    |  54 ++++---
 src/utils.rs       |   4 +-
 tests/detect.rs    |   2 +-
 tests/proptests.rs |   3 +-
 13 files changed, 343 insertions(+), 254 deletions(-)

diff --git a/benches/example.rs b/benches/example.rs
index c26a660..f0776fc 100644
--- a/benches/example.rs
+++ b/benches/example.rs
@@ -1,7 +1,7 @@
 #[macro_use]
 extern crate bencher;
-extern crate whatlang;
 extern crate serde_json;
+extern crate whatlang;
 
 use bencher::Bencher;
 use std::collections::HashMap;
diff --git a/build.rs b/build.rs
index 24a160a..101ce47 100644
--- a/build.rs
+++ b/build.rs
@@ -1,16 +1,16 @@
 extern crate csv;
-extern crate skeptic;
-extern crate serde_json;
 extern crate serde;
+extern crate serde_json;
+extern crate skeptic;
 #[macro_use]
 extern crate serde_derive;
 extern crate tera;
 
-use std::io::{Write, BufReader, BufWriter};
 use std::collections::HashMap;
+use std::env;
 use std::fs::File;
+use std::io::{BufReader, BufWriter, Write};
 use std::path::Path;
-use std::env;
 
 const DATA_PATH: &'static str = "misc/data.json";
 const SUPPORTED_LANG_PATH: &'static str = "misc/supported_languages.csv";
@@ -53,16 +53,21 @@ fn generate_source_files() {
 
 fn load_data() -> (Vec<LangInfo>, HashMap<String, Vec<Lang>>) {
     let data_file = BufReader::new(File::open(DATA_PATH).unwrap());
-    let mut lang_reader = csv::ReaderBuilder::new().flexible(true).from_path(SUPPORTED_LANG_PATH).unwrap();
+    let mut lang_reader = csv::ReaderBuilder::new()
+        .flexible(true)
+        .from_path(SUPPORTED_LANG_PATH)
+        .unwrap();
 
     let mut lang_infos: Vec<LangInfo> = lang_reader.deserialize().map(Result::unwrap).collect();
     lang_infos.sort_by(|left, right| left.code.cmp(&right.code));
 
-    let supported_lang_codes: HashMap<String, LangInfo> = lang_infos.iter()
+    let supported_lang_codes: HashMap<String, LangInfo> = lang_infos
+        .iter()
         .map(|lang| (lang.code.clone(), lang.clone()))
         .collect();
 
-    let lang_data: HashMap<String, HashMap<String, String>> = serde_json::from_reader(data_file).unwrap();
+    let lang_data: HashMap<String, HashMap<String, String>> =
+        serde_json::from_reader(data_file).unwrap();
 
     let mut scripts: HashMap<String, Vec<Lang>> = HashMap::with_capacity(lang_data.len());
     let mut all_langs: Vec<Lang> = Vec::new();
@@ -75,23 +80,36 @@ fn load_data() -> (Vec<LangInfo>, HashMap<String, Vec<Lang>>) {
             let lang = Lang {
                 info: (*info).clone(),
                 script: script.clone(),
-                trigrams: trigrams.split('|').map(Into::into).collect()
+                trigrams: trigrams.split('|').map(Into::into).collect(),
             };
             if lang.trigrams.len() != TRIGRAM_COUNT {
-                panic!("Language {} has {} trigrams, instead of {}", code, lang.trigrams.len(), TRIGRAM_COUNT);
+                panic!(
+                    "Language {} has {} trigrams, instead of {}",
+                    code,
+                    lang.trigrams.len(),
+                    TRIGRAM_COUNT
+                );
             }
 
             all_langs.push(lang.clone());
-            scripts.entry(script.clone()).or_insert_with(Vec::new).push(lang);
+            scripts
+                .entry(script.clone())
+                .or_insert_with(Vec::new)
+                .push(lang);
         }
     }
 
     (lang_infos, scripts)
 }
 
-fn render_lang_rs(buf: &mut BufWriter<File>, lang_infos: &[LangInfo], scripts: &HashMap<String, Vec<Lang>>) {
+fn render_lang_rs(
+    buf: &mut BufWriter<File>,
+    lang_infos: &[LangInfo],
+    scripts: &HashMap<String, Vec<Lang>>,
+) {
     let mut tera = tera::Tera::default();
-    tera.add_template_file(TEMPLATE_LANG_RS_PATH, Some("lang.rs")).unwrap();
+    tera.add_template_file(TEMPLATE_LANG_RS_PATH, Some("lang.rs"))
+        .unwrap();
 
     let mut ctx = tera::Context::new();
     ctx.insert("lang_infos", lang_infos);
diff --git a/examples/cli.rs b/examples/cli.rs
index 99cb047..9e984af 100644
--- a/examples/cli.rs
+++ b/examples/cli.rs
@@ -6,7 +6,9 @@ use whatlang::detect;
 fn main() {
     let mut text = String::new();
     println!("Please enter a text:");
-    io::stdin().read_line(&mut text).expect("Failed to read line");
+    io::stdin()
+        .read_line(&mut text)
+        .expect("Failed to read line");
 
     if let Some(info) = detect(&text) {
         println!("Language: {}", info.lang());
@@ -16,4 +18,3 @@ fn main() {
         println!("Cannot recognize a language :(");
     }
 }
-
diff --git a/src/detect.rs b/src/detect.rs
index 2b4eae8..f53cf80 100644
--- a/src/detect.rs
+++ b/src/detect.rs
@@ -1,11 +1,11 @@
 use hashbrown::HashMap;
 
+use constants::{MAX_TOTAL_DISTANCE, MAX_TRIGRAM_DISTANCE};
+use info::Info;
 use lang::*;
+use options::{List, Options};
 use script::*;
 use trigrams::*;
-use info::Info;
-use options::{Options, List};
-use constants::{MAX_TRIGRAM_DISTANCE, MAX_TOTAL_DISTANCE};
 
 /// Detect a language and a script by a given text.
 ///
@@ -39,56 +39,66 @@ pub fn detect_lang_with_options(text: &str, options: &Options) -> Option<Lang> {
 
 pub fn detect_with_options(text: &str, options: &Options) -> Option<Info> {
     detect_script(text).and_then(|script| {
-        detect_lang_based_on_script(text, options, script).map( |(lang, confidence)| {
-            Info { lang, script, confidence }
+        detect_lang_based_on_script(text, options, script).map(|(lang, confidence)| Info {
+            lang,
+            script,
+            confidence,
         })
     })
 }
 
-fn detect_lang_based_on_script(text: &str, options: &Options, script : Script) -> Option<(Lang, f64)> {
+fn detect_lang_based_on_script(
+    text: &str,
+    options: &Options,
+    script: Script,
+) -> Option<(Lang, f64)> {
     match script {
-        Script::Latin      => detect_lang_in_profiles(text, options, LATIN_LANGS),
-        Script::Cyrillic   => detect_lang_in_profiles(text, options, CYRILLIC_LANGS),
+        Script::Latin => detect_lang_in_profiles(text, options, LATIN_LANGS),
+        Script::Cyrillic => detect_lang_in_profiles(text, options, CYRILLIC_LANGS),
         Script::Devanagari => detect_lang_in_profiles(text, options, DEVANAGARI_LANGS),
-        Script::Hebrew     => detect_lang_in_profiles(text, options, HEBREW_LANGS),
-        Script::Ethiopic   => detect_lang_in_profiles(text, options, ETHIOPIC_LANGS),
-        Script::Arabic     => detect_lang_in_profiles(text, options, ARABIC_LANGS),
-        Script::Mandarin  => Some((Lang::Cmn, 1.0)),
-        Script::Bengali   => Some((Lang::Ben, 1.0)),
-        Script::Hangul    => Some((Lang::Kor, 1.0)),
-        Script::Georgian  => Some((Lang::Kat, 1.0)),
-        Script::Greek     => Some((Lang::Ell, 1.0)),
-        Script::Kannada   => Some((Lang::Kan, 1.0)),
-        Script::Tamil     => Some((Lang::Tam, 1.0)),
-        Script::Thai      => Some((Lang::Tha, 1.0)),
-        Script::Gujarati  => Some((Lang::Guj, 1.0)),
-        Script::Gurmukhi  => Some((Lang::Pan, 1.0)),
-        Script::Telugu    => Some((Lang::Tel, 1.0)),
+        Script::Hebrew => detect_lang_in_profiles(text, options, HEBREW_LANGS),
+        Script::Ethiopic => detect_lang_in_profiles(text, options, ETHIOPIC_LANGS),
+        Script::Arabic => detect_lang_in_profiles(text, options, ARABIC_LANGS),
+        Script::Mandarin => Some((Lang::Cmn, 1.0)),
+        Script::Bengali => Some((Lang::Ben, 1.0)),
+        Script::Hangul => Some((Lang::Kor, 1.0)),
+        Script::Georgian => Some((Lang::Kat, 1.0)),
+        Script::Greek => Some((Lang::Ell, 1.0)),
+        Script::Kannada => Some((Lang::Kan, 1.0)),
+        Script::Tamil => Some((Lang::Tam, 1.0)),
+        Script::Thai => Some((Lang::Tha, 1.0)),
+        Script::Gujarati => Some((Lang::Guj, 1.0)),
+        Script::Gurmukhi => Some((Lang::Pan, 1.0)),
+        Script::Telugu => Some((Lang::Tel, 1.0)),
         Script::Malayalam => Some((Lang::Mal, 1.0)),
-        Script::Oriya     => Some((Lang::Ori, 1.0)),
-        Script::Myanmar   => Some((Lang::Mya, 1.0)),
-        Script::Sinhala   => Some((Lang::Sin, 1.0)),
-        Script::Khmer     => Some((Lang::Khm, 1.0)),
-        Script::Katakana | Script::Hiragana  => Some((Lang::Jpn, 1.0))
+        Script::Oriya => Some((Lang::Ori, 1.0)),
+        Script::Myanmar => Some((Lang::Mya, 1.0)),
+        Script::Sinhala => Some((Lang::Sin, 1.0)),
+        Script::Khmer => Some((Lang::Khm, 1.0)),
+        Script::Katakana | Script::Hiragana => Some((Lang::Jpn, 1.0)),
     }
 }
 
-fn detect_lang_in_profiles(text: &str, options: &Options, lang_profile_list : LangProfileList) -> Option<(Lang, f64)> {
-    let mut lang_distances : Vec<(Lang, u32)> = vec![];
+fn detect_lang_in_profiles(
+    text: &str,
+    options: &Options,
+    lang_profile_list: LangProfileList,
+) -> Option<(Lang, f64)> {
+    let mut lang_distances: Vec<(Lang, u32)> = vec![];
     let trigrams = get_trigrams_with_positions(text);
 
     for &(ref lang, lang_trigrams) in lang_profile_list {
         match options.list {
             Some(List::White(ref whitelist)) if !whitelist.contains(lang) => continue,
             Some(List::Black(ref blacklist)) if blacklist.contains(lang) => continue,
-            _ => {},
+            _ => {}
         }
         let dist = calculate_distance(lang_trigrams, &trigrams);
         lang_distances.push(((*lang), dist));
     }
 
     // Sort languages by distance
-    lang_distances.sort_by_key(|key| key.1 );
+    lang_distances.sort_by_key(|key| key.1);
 
     // Return None if lang_distances is empty
     // Return the only language with is_reliable=true if there is only 1 item
@@ -131,23 +141,22 @@ fn detect_lang_in_profiles(text: &str, options: &Options, lang_profile_list : La
     // Numbers 12.0 and 0.05 are obtained experimentally, so the function represents common sense.
     //
     let confident_rate = (12.0 / trigrams.len() as f64) + 0.05;
-    let confidence =
-        if rate > confident_rate {
-            1.0
-        } else {
-            rate / confident_rate
-        };
+    let confidence = if rate > confident_rate {
+        1.0
+    } else {
+        rate / confident_rate
+    };
 
     Some((lang_dist1.0, confidence))
 }
 
-fn calculate_distance(lang_trigrams: LangProfile,  text_trigrams: &HashMap<String, u32>) -> u32 {
+fn calculate_distance(lang_trigrams: LangProfile, text_trigrams: &HashMap<String, u32>) -> u32 {
     let mut total_dist = 0u32;
 
     for (i, &trigram) in lang_trigrams.iter().enumerate() {
         let dist = match text_trigrams.get(trigram) {
             Some(&n) => (n as i32 - i as i32).abs() as u32,
-            None => MAX_TRIGRAM_DISTANCE
+            None => MAX_TRIGRAM_DISTANCE,
         };
         total_dist += dist;
     }
@@ -186,7 +195,16 @@ mod tests {
         assert_eq!(info.lang, Lang::Tgl);
 
         // with blacklist
-        let blacklist = vec![Lang::Tgl, Lang::Jav, Lang::Nld, Lang::Uzb, Lang::Swe, Lang::Nob, Lang::Ceb, Lang::Ilo];
+        let blacklist = vec![
+            Lang::Tgl,
+            Lang::Jav,
+            Lang::Nld,
+            Lang::Uzb,
+            Lang::Swe,
+            Lang::Nob,
+            Lang::Ceb,
+            Lang::Ilo,
+        ];
         let options = Options::new().set_blacklist(blacklist);
         let output = detect_with_options(text, &options);
         assert_eq!(output.is_some(), true);
@@ -224,7 +242,9 @@ mod tests {
         let info = detect("qwertyuioasdfghjklzxcvbnm").unwrap();
         assert!(!info.is_reliable());
 
-        let info = detect("qwertyuioasdfghjklzxcvbnm qwertyuioasdfghjklzxcvbnm qwertyuioasdfghjklzxcvbnm").unwrap();
+        let info =
+            detect("qwertyuioasdfghjklzxcvbnm qwertyuioasdfghjklzxcvbnm qwertyuioasdfghjklzxcvbnm")
+                .unwrap();
         assert!(!info.is_reliable());
 
         // 1000 chars of randomly generated Cyrillic text
diff --git a/src/detector.rs b/src/detector.rs
index 0102c3d..2d4bb20 100644
--- a/src/detector.rs
+++ b/src/detector.rs
@@ -1,9 +1,9 @@
-use lang::Lang;
-use script::Script;
-use script::detect_script;
+use detect;
 use info::Info;
+use lang::Lang;
 use options::Options;
-use detect;
+use script::detect_script;
+use script::Script;
 
 /// Configurable structure that holds detection options and provides functions
 /// to detect language and script.
@@ -72,7 +72,10 @@ mod tests {
     #[test]
     fn test_detect_script() {
         // Russian, Cyrillic
-        assert_eq!(Detector::new().detect_script("Кириллица"), Some(Script::Cyrillic));
+        assert_eq!(
+            Detector::new().detect_script("Кириллица"),
+            Some(Script::Cyrillic)
+        );
     }
 
     #[test]
diff --git a/src/info.rs b/src/info.rs
index 808ee61..73837f6 100644
--- a/src/info.rs
+++ b/src/info.rs
@@ -8,7 +8,7 @@ const RELIABLE_CONFIDENCE_THRESHOLD: f64 = 0.8;
 pub struct Info {
     pub(crate) lang: Lang,
     pub(crate) script: Script,
-    pub(crate) confidence: f64
+    pub(crate) confidence: f64,
 }
 
 impl Info {
diff --git a/src/lib.rs b/src/lib.rs
index 63b9ea0..f640329 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -32,21 +32,21 @@
 //! assert_eq!(lang, Some(Lang::Eng));
 extern crate hashbrown;
 
-mod lang;
-mod script;
-mod info;
-mod utils;
-mod trigrams;
+mod constants;
 mod detect;
 mod detector;
+mod info;
+mod lang;
 mod options;
-mod constants;
+mod script;
+mod trigrams;
+mod utils;
 
-pub use lang::Lang;
-pub use script::Script;
-pub use info::Info;
 pub use detector::Detector;
+pub use info::Info;
+pub use lang::Lang;
 pub use options::Options;
+pub use script::Script;
 
 pub use detect::detect;
 pub use detect::detect_lang;
diff --git a/src/options.rs b/src/options.rs
index 38aff8b..f83a7a1 100644
--- a/src/options.rs
+++ b/src/options.rs
@@ -3,13 +3,13 @@ use lang::Lang;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub(crate) enum List {
     White(Vec<Lang>),
-    Black(Vec<Lang>)
+    Black(Vec<Lang>),
 }
 
 /// Allows to customize behaviour of [Detector](struct.Detector.html).
 #[derive(Debug, Clone, PartialEq, Eq, Default)]
 pub struct Options {
-    pub(crate) list: Option<List>
+    pub(crate) list: Option<List>,
 }
 
 impl Options {
diff --git a/src/script.rs b/src/script.rs
index a8d266c..f724a6f 100644
--- a/src/script.rs
+++ b/src/script.rs
@@ -1,5 +1,5 @@
-use utils::is_stop_char;
 use std::fmt;
+use utils::is_stop_char;
 
 /// Represents a writing system (Latin, Cyrillic, Arabic, etc).
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
@@ -34,30 +34,30 @@ pub enum Script {
 impl Script {
     pub fn name(&self) -> &str {
         match *self {
-            Script::Latin      => "Latin",
-            Script::Cyrillic   => "Cyrillic",
-            Script::Arabic     => "Arabic",
+            Script::Latin => "Latin",
+            Script::Cyrillic => "Cyrillic",
+            Script::Arabic => "Arabic",
             Script::Devanagari => "Devanagari",
-            Script::Hiragana   => "Hiragana",
-            Script::Katakana   => "Katakana",
-            Script::Ethiopic   => "Ethiopic",
-            Script::Hebrew     => "Hebrew",
-            Script::Bengali    => "Bengali",
-            Script::Georgian   => "Georgian",
-            Script::Mandarin   => "Mandarin",
-            Script::Hangul     => "Hangul",
-            Script::Greek      => "Greek",
-            Script::Kannada    => "Kannada",
-            Script::Tamil      => "Tamil",
-            Script::Thai       => "Thai",
-            Script::Gujarati   => "Gujarati",
-            Script::Gurmukhi   => "Gurmukhi",
-            Script::Telugu     => "Telugu",
-            Script::Malayalam  => "Malayalam",
-            Script::Oriya      => "Oriya",
-            Script::Myanmar    => "Myanmar",
-            Script::Sinhala    => "Sinhala",
-            Script::Khmer      => "Khmer"
+            Script::Hiragana => "Hiragana",
+            Script::Katakana => "Katakana",
+            Script::Ethiopic => "Ethiopic",
+            Script::Hebrew => "Hebrew",
+            Script::Bengali => "Bengali",
+            Script::Georgian => "Georgian",
+            Script::Mandarin => "Mandarin",
+            Script::Hangul => "Hangul",
+            Script::Greek => "Greek",
+            Script::Kannada => "Kannada",
+            Script::Tamil => "Tamil",
+            Script::Thai => "Thai",
+            Script::Gujarati => "Gujarati",
+            Script::Gurmukhi => "Gurmukhi",
+            Script::Telugu => "Telugu",
+            Script::Malayalam => "Malayalam",
+            Script::Oriya => "Oriya",
+            Script::Myanmar => "Myanmar",
+            Script::Sinhala => "Sinhala",
+            Script::Khmer => "Khmer",
         }
     }
 }
@@ -80,36 +80,38 @@ type ScriptCounter = (Script, fn(char) -> bool, usize);
 /// ```
 pub fn detect_script(text: &str) -> Option<Script> {
     let mut script_counters: [ScriptCounter; 24] = [
-        (Script::Latin      , is_latin      , 0),
-        (Script::Cyrillic   , is_cyrillic   , 0),
-        (Script::Arabic     , is_arabic     , 0),
-        (Script::Mandarin   , is_mandarin   , 0),
-        (Script::Devanagari , is_devanagari , 0),
-        (Script::Hebrew     , is_hebrew     , 0),
-        (Script::Ethiopic   , is_ethiopic   , 0),
-        (Script::Georgian   , is_georgian   , 0),
-        (Script::Bengali    , is_bengali    , 0),
-        (Script::Hangul     , is_hangul     , 0),
-        (Script::Hiragana   , is_hiragana   , 0),
-        (Script::Katakana   , is_katakana   , 0),
-        (Script::Greek      , is_greek      , 0),
-        (Script::Kannada    , is_kannada    , 0),
-        (Script::Tamil      , is_tamil      , 0),
-        (Script::Thai       , is_thai       , 0),
-        (Script::Gujarati   , is_gujarati   , 0),
-        (Script::Gurmukhi   , is_gurmukhi   , 0),
-        (Script::Telugu     , is_telugu     , 0),
-        (Script::Malayalam  , is_malayalam  , 0),
-        (Script::Oriya      , is_oriya      , 0),
-        (Script::Myanmar    , is_myanmar    , 0),
-        (Script::Sinhala    , is_sinhala    , 0),
-        (Script::Khmer      , is_khmer      , 0)
+        (Script::Latin, is_latin, 0),
+        (Script::Cyrillic, is_cyrillic, 0),
+        (Script::Arabic, is_arabic, 0),
+        (Script::Mandarin, is_mandarin, 0),
+        (Script::Devanagari, is_devanagari, 0),
+        (Script::Hebrew, is_hebrew, 0),
+        (Script::Ethiopic, is_ethiopic, 0),
+        (Script::Georgian, is_georgian, 0),
+        (Script::Bengali, is_bengali, 0),
+        (Script::Hangul, is_hangul, 0),
+        (Script::Hiragana, is_hiragana, 0),
+        (Script::Katakana, is_katakana, 0),
+        (Script::Greek, is_greek, 0),
+        (Script::Kannada, is_kannada, 0),
+        (Script::Tamil, is_tamil, 0),
+        (Script::Thai, is_thai, 0),
+        (Script::Gujarati, is_gujarati, 0),
+        (Script::Gurmukhi, is_gurmukhi, 0),
+        (Script::Telugu, is_telugu, 0),
+        (Script::Malayalam, is_malayalam, 0),
+        (Script::Oriya, is_oriya, 0),
+        (Script::Myanmar, is_myanmar, 0),
+        (Script::Sinhala, is_sinhala, 0),
+        (Script::Khmer, is_khmer, 0),
     ];
 
     let half = text.chars().count() / 2;
 
     for ch in text.chars() {
-        if is_stop_char(ch) { continue; }
+        if is_stop_char(ch) {
+            continue;
+        }
 
         // For performance reasons, we need to mutate script_counters by calling
         // `swap` function, it would not be possible to do using normal iterator.
@@ -153,153 +155,148 @@ pub fn detect_script(text: &str) -> Option<Script> {
 }
 
 fn is_cyrillic(ch: char) -> bool {
-   match ch {
-       '\u{0400}'...'\u{0484}' |
-       '\u{0487}'...'\u{052F}' |
-       '\u{2DE0}'...'\u{2DFF}' |
-       '\u{A640}'...'\u{A69D}' |
-       '\u{1D2B}' |
-       '\u{1D78}' |
-       '\u{A69F}' => true,
-       _ => false
-   }
+    match ch {
+        '\u{0400}'...'\u{0484}'
+        | '\u{0487}'...'\u{052F}'
+        | '\u{2DE0}'...'\u{2DFF}'
+        | '\u{A640}'...'\u{A69D}'
+        | '\u{1D2B}'
+        | '\u{1D78}'
+        | '\u{A69F}' => true,
+        _ => false,
+    }
 }
 
 // https://en.wikipedia.org/wiki/Latin_script_in_Unicode
-fn is_latin(ch : char) -> bool {
+fn is_latin(ch: char) -> bool {
     match ch {
-        'a'...'z' |
-        'A'...'Z' |
-        '\u{0080}'...'\u{00FF}' |
-        '\u{0100}'...'\u{017F}' |
-        '\u{0180}'...'\u{024F}' |
-        '\u{0250}'...'\u{02AF}' |
-        '\u{1D00}'...'\u{1D7F}' |
-        '\u{1D80}'...'\u{1DBF}' |
-        '\u{1E00}'...'\u{1EFF}' |
-        '\u{2100}'...'\u{214F}' |
-        '\u{2C60}'...'\u{2C7F}' |
-        '\u{A720}'...'\u{A7FF}' |
-        '\u{AB30}'...'\u{AB6F}' => true,
-        _ => false
+        'a'...'z'
+        | 'A'...'Z'
+        | '\u{0080}'...'\u{00FF}'
+        | '\u{0100}'...'\u{017F}'
+        | '\u{0180}'...'\u{024F}'
+        | '\u{0250}'...'\u{02AF}'
+        | '\u{1D00}'...'\u{1D7F}'
+        | '\u{1D80}'...'\u{1DBF}'
+        | '\u{1E00}'...'\u{1EFF}'
+        | '\u{2100}'...'\u{214F}'
+        | '\u{2C60}'...'\u{2C7F}'
+        | '\u{A720}'...'\u{A7FF}'
+        | '\u{AB30}'...'\u{AB6F}' => true,
+        _ => false,
     }
 }
 
 // Based on https://en.wikipedia.org/wiki/Arabic_script_in_Unicode
-fn is_arabic(ch : char) -> bool {
+fn is_arabic(ch: char) -> bool {
     match ch {
-        '\u{0600}'...'\u{06FF}' |
-        '\u{0750}'...'\u{07FF}' |
-        '\u{08A0}'...'\u{08FF}' |
-        '\u{FB50}'...'\u{FDFF}' |
-        '\u{FE70}'...'\u{FEFF}' |
-        '\u{10E60}'...'\u{10E7F}' |
-        '\u{1EE00}'...'\u{1EEFF}' => true,
-        _ => false
+        '\u{0600}'...'\u{06FF}'
+        | '\u{0750}'...'\u{07FF}'
+        | '\u{08A0}'...'\u{08FF}'
+        | '\u{FB50}'...'\u{FDFF}'
+        | '\u{FE70}'...'\u{FEFF}'
+        | '\u{10E60}'...'\u{10E7F}'
+        | '\u{1EE00}'...'\u{1EEFF}' => true,
+        _ => false,
     }
 }
 
 // Based on https://en.wikipedia.org/wiki/Devanagari#Unicode
-fn is_devanagari(ch : char) -> bool {
+fn is_devanagari(ch: char) -> bool {
     match ch {
-        '\u{0900}'...'\u{097F}' |
-        '\u{A8E0}'...'\u{A8FF}' |
-        '\u{1CD0}'...'\u{1CFF}' => true,
-        _ => false
+        '\u{0900}'...'\u{097F}' | '\u{A8E0}'...'\u{A8FF}' | '\u{1CD0}'...'\u{1CFF}' => true,
+        _ => false,
     }
 }
 
 // Based on https://www.key-shortcut.com/en/writing-systems/ethiopian-script/
-fn is_ethiopic(ch : char) -> bool {
+fn is_ethiopic(ch: char) -> bool {
     match ch {
-        '\u{1200}'...'\u{139F}' |
-        '\u{2D80}'...'\u{2DDF}' |
-        '\u{AB00}'...'\u{AB2F}' => true,
-        _ => false
+        '\u{1200}'...'\u{139F}' | '\u{2D80}'...'\u{2DDF}' | '\u{AB00}'...'\u{AB2F}' => true,
+        _ => false,
     }
 }
 
 // Based on https://en.wikipedia.org/wiki/Hebrew_(Unicode_block)
-fn is_hebrew(ch : char) -> bool {
+fn is_hebrew(ch: char) -> bool {
     match ch {
         '\u{0590}'...'\u{05FF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
-fn is_georgian(ch : char) -> bool {
-   match ch {
-       '\u{10A0}'...'\u{10FF}' => true,
-       _ => false
-   }
+fn is_georgian(ch: char) -> bool {
+    match ch {
+        '\u{10A0}'...'\u{10FF}' => true,
+        _ => false,
+    }
 }
 
-fn is_mandarin(ch : char) -> bool {
+fn is_mandarin(ch: char) -> bool {
     match ch {
-        '\u{2E80}'...'\u{2E99}' |
-        '\u{2E9B}'...'\u{2EF3}' |
-        '\u{2F00}'...'\u{2FD5}' |
-        '\u{3005}' |
-        '\u{3007}' |
-        '\u{3021}'...'\u{3029}' |
-        '\u{3038}'...'\u{303B}' |
-        '\u{3400}'...'\u{4DB5}' |
-        '\u{4E00}'...'\u{9FCC}' |
-        '\u{F900}'...'\u{FA6D}' |
-        '\u{FA70}'...'\u{FAD9}' => true,
-        _ => false
+        '\u{2E80}'...'\u{2E99}'
+        | '\u{2E9B}'...'\u{2EF3}'
+        | '\u{2F00}'...'\u{2FD5}'
+        | '\u{3005}'
+        | '\u{3007}'
+        | '\u{3021}'...'\u{3029}'
+        | '\u{3038}'...'\u{303B}'
+        | '\u{3400}'...'\u{4DB5}'
+        | '\u{4E00}'...'\u{9FCC}'
+        | '\u{F900}'...'\u{FA6D}'
+        | '\u{FA70}'...'\u{FAD9}' => true,
+        _ => false,
     }
 }
 
-fn is_bengali(ch : char) -> bool {
-   match ch {
-       '\u{0980}'...'\u{09FF}' => true,
-       _ => false
-   }
+fn is_bengali(ch: char) -> bool {
+    match ch {
+        '\u{0980}'...'\u{09FF}' => true,
+        _ => false,
+    }
 }
 
-fn is_hiragana(ch : char) -> bool {
-   match ch {
-       '\u{3040}'...'\u{309F}' => true,
-       _ => false
-   }
+fn is_hiragana(ch: char) -> bool {
+    match ch {
+        '\u{3040}'...'\u{309F}' => true,
+        _ => false,
+    }
 }
 
-fn is_katakana(ch : char) -> bool {
-   match ch {
-       '\u{30A0}'...'\u{30FF}' => true,
-       _ => false
+fn is_katakana(ch: char) -> bool {
+    match ch {
+        '\u{30A0}'...'\u{30FF}' => true,
+        _ => false,
     }
 }
 
-
 // Hangul is Korean Alphabet. Unicode ranges are taken from: https://en.wikipedia.org/wiki/Hangul
-fn is_hangul(ch : char) -> bool {
+fn is_hangul(ch: char) -> bool {
     match ch {
-        '\u{AC00}'...'\u{D7AF}' |
-        '\u{1100}'...'\u{11FF}' |
-        '\u{3130}'...'\u{318F}' |
-        '\u{3200}'...'\u{32FF}' |
-        '\u{A960}'...'\u{A97F}' |
-        '\u{D7B0}'...'\u{D7FF}' |
-        '\u{FF00}'...'\u{FFEF}' => true,
-        _ => false
+        '\u{AC00}'...'\u{D7AF}'
+        | '\u{1100}'...'\u{11FF}'
+        | '\u{3130}'...'\u{318F}'
+        | '\u{3200}'...'\u{32FF}'
+        | '\u{A960}'...'\u{A97F}'
+        | '\u{D7B0}'...'\u{D7FF}'
+        | '\u{FF00}'...'\u{FFEF}' => true,
+        _ => false,
     }
 }
 
 // Taken from: https://en.wikipedia.org/wiki/Greek_and_Coptic
-fn is_greek(ch : char) -> bool {
+fn is_greek(ch: char) -> bool {
     match ch {
         '\u{0370}'...'\u{03FF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
 // Based on: https://en.wikipedia.org/wiki/Kannada_(Unicode_block)
-fn is_kannada(ch : char) -> bool {
+fn is_kannada(ch: char) -> bool {
     match ch {
         '\u{0C80}'...'\u{0CFF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -307,7 +304,7 @@ fn is_kannada(ch : char) -> bool {
 fn is_tamil(ch: char) -> bool {
     match ch {
         '\u{0B80}'...'\u{0BFF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -315,7 +312,7 @@ fn is_tamil(ch: char) -> bool {
 fn is_thai(ch: char) -> bool {
     match ch {
         '\u{0E00}'...'\u{0E7F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -323,7 +320,7 @@ fn is_thai(ch: char) -> bool {
 fn is_gujarati(ch: char) -> bool {
     match ch {
         '\u{0A80}'...'\u{0AFF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -332,14 +329,14 @@ fn is_gujarati(ch: char) -> bool {
 fn is_gurmukhi(ch: char) -> bool {
     match ch {
         '\u{0A00}'...'\u{0A7F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
 fn is_telugu(ch: char) -> bool {
     match ch {
         '\u{0C00}'...'\u{0C7F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -347,7 +344,7 @@ fn is_telugu(ch: char) -> bool {
 fn is_malayalam(ch: char) -> bool {
     match ch {
         '\u{0D00}'...'\u{0D7F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -355,7 +352,7 @@ fn is_malayalam(ch: char) -> bool {
 fn is_oriya(ch: char) -> bool {
     match ch {
         '\u{0B00}'...'\u{0B7F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -363,7 +360,7 @@ fn is_oriya(ch: char) -> bool {
 fn is_myanmar(ch: char) -> bool {
     match ch {
         '\u{1000}'...'\u{109F}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -371,7 +368,7 @@ fn is_myanmar(ch: char) -> bool {
 fn is_sinhala(ch: char) -> bool {
     match ch {
         '\u{0D80}'...'\u{0DFF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -379,7 +376,7 @@ fn is_sinhala(ch: char) -> bool {
 fn is_khmer(ch: char) -> bool {
     match ch {
         '\u{1780}'...'\u{17FF}' | '\u{19E0}'...'\u{19FF}' => true,
-        _ => false
+        _ => false,
     }
 }
 
@@ -399,17 +396,48 @@ mod tests {
 
         // One script
         assert_eq!(detect_script(&"Hello!".to_string()), Some(Script::Latin));
-        assert_eq!(detect_script(&"Привет всем!".to_string()), Some(Script::Cyrillic));
-        assert_eq!(detect_script(&"ქართული ენა მსოფლიო ".to_string()), Some(Script::Georgian));
-        assert_eq!(detect_script(&"県見夜上温国阪題富販".to_string()), Some(Script::Mandarin));
-        assert_eq!(detect_script(&" ككل حوالي 1.6، ومعظم الناس ".to_string()), Some(Script::Arabic));
+        assert_eq!(
+            detect_script(&"Привет всем!".to_string()),
+            Some(Script::Cyrillic)
+        );
+        assert_eq!(
+            detect_script(&"ქართული ენა მსოფლიო ".to_string()),
+            Some(Script::Georgian)
+        );
+        assert_eq!(
+            detect_script(&"県見夜上温国阪題富販".to_string()),
+            Some(Script::Mandarin)
+        );
+        assert_eq!(
+            detect_script(&" ككل حوالي 1.6، ومعظم الناس ".to_string()),
+            Some(Script::Arabic)
+        );
         assert_eq!(detect_script(&"हिमालयी वन चिड़िया (जूथेरा सालिमअली) चिड़िया की एक प्रजाति है".to_string()), Some(Script::Devanagari));
-        assert_eq!(detect_script(&"היסטוריה והתפתחות של האלפבית העברי".to_string()), Some(Script::Hebrew));
-        assert_eq!(detect_script(&"የኢትዮጵያ ፌዴራላዊ ዴሞክራሲያዊሪፐብሊክ".to_string()), Some(Script::Ethiopic));
+        assert_eq!(
+            detect_script(
+                &"היסטוריה והתפתחות של האלפבית העברי".to_string()
+            ),
+            Some(Script::Hebrew)
+        );
+        assert_eq!(
+            detect_script(
+                &"የኢትዮጵያ ፌዴራላዊ ዴሞክራሲያዊሪፐብሊክ"
+                    .to_string()
+            ),
+            Some(Script::Ethiopic)
+        );
 
         // Mixed scripts
-        assert_eq!(detect_script(&"Привет! Текст на русском with some English.".to_string()), Some(Script::Cyrillic));
-        assert_eq!(detect_script(&"Russian word любовь means love.".to_string()), Some(Script::Latin));
+        assert_eq!(
+            detect_script(
+                &"Привет! Текст на русском with some English.".to_string()
+            ),
+            Some(Script::Cyrillic)
+        );
+        assert_eq!(
+            detect_script(&"Russian word любовь means love.".to_string()),
+            Some(Script::Latin)
+        );
     }
 
     #[test]
diff --git a/src/trigrams.rs b/src/trigrams.rs
index 75ffdf1..fb829cd 100644
--- a/src/trigrams.rs
+++ b/src/trigrams.rs
@@ -1,11 +1,10 @@
-use utils::is_stop_char;
-use hashbrown::HashMap;
 use constants::TEXT_TRIGRAMS_SIZE;
+use hashbrown::HashMap;
+use utils::is_stop_char;
 
 const MAX_INITIAL_HASH_CAPACITY: usize = 2048;
 
-pub fn get_trigrams_with_positions(text : &str) -> HashMap<String, u32> {
-
+pub fn get_trigrams_with_positions(text: &str) -> HashMap<String, u32> {
     // Sort in descending order by number of occurrences and trigrams
     let mut count_vec: Vec<_> = count(text)
         .into_iter()
@@ -13,19 +12,24 @@ pub fn get_trigrams_with_positions(text : &str) -> HashMap<String, u32> {
         .collect();
     count_vec.sort_by(|a, b| b.cmp(a));
 
-    count_vec.into_iter()
+    count_vec
+        .into_iter()
         .take(TEXT_TRIGRAMS_SIZE)
         .enumerate()
         .map(|(i, (_, trigram))| (trigram, i as u32))
         .collect()
 }
 
-fn count(text : &str) -> HashMap<String, u32> {
+fn count(text: &str) -> HashMap<String, u32> {
     let hash_capacity = calculate_initial_hash_capacity(text);
-    let mut counter_hash : HashMap<String, u32> = HashMap::with_capacity(hash_capacity);
+    let mut counter_hash: HashMap<String, u32> = HashMap::with_capacity(hash_capacity);
 
     // iterate through the string and count trigrams
-    let mut chars_iter = text.chars().map(to_trigram_char).flat_map(char::to_lowercase).chain(Some(' '));
+    let mut chars_iter = text
+        .chars()
+        .map(to_trigram_char)
+        .flat_map(char::to_lowercase)
+        .chain(Some(' '));
     let mut c1 = ' ';
     // unwrap is safe, because we always chain a space character on the end of the iterator
     let mut c2 = chars_iter.next().unwrap();
@@ -48,8 +52,12 @@ fn count(text : &str) -> HashMap<String, u32> {
 
 // Convert punctuations and digits to a space.
 #[inline]
-fn to_trigram_char(ch : char) -> char {
-    if is_stop_char(ch) { ' ' } else { ch }
+fn to_trigram_char(ch: char) -> char {
+    if is_stop_char(ch) {
+        ' '
+    } else {
+        ch
+    }
 }
 
 // In order to improve performance, define the initial capacity for trigrams hash map,
@@ -63,19 +71,17 @@ fn calculate_initial_hash_capacity(text: &str) -> usize {
     }
 }
 
-
-
 #[cfg(test)]
 mod tests {
     use super::*;
 
-    fn assert_valuable_trigram_chars(chars : &[char]) {
+    fn assert_valuable_trigram_chars(chars: &[char]) {
         for &ch in chars.iter() {
             assert_eq!(to_trigram_char(ch), ch);
         }
     }
 
-    fn assert_not_valuable_trigram_chars(chars : &[char]) {
+    fn assert_not_valuable_trigram_chars(chars: &[char]) {
         for &ch in chars.iter() {
             assert_eq!(to_trigram_char(ch), ' ');
         }
@@ -96,13 +102,15 @@ mod tests {
         assert_not_valuable_trigram_chars(&['[', '|', '{', '}', '~']);
     }
 
-
-
     fn assert_count(text: &str, pairs: &[(&str, u32)]) {
         let result = count(text);
         for &(trigram, expected_n) in pairs.iter() {
             let actual_n = result[trigram];
-            assert_eq!(actual_n, expected_n, "trigram '{}' expected to occur {} times, got {}", trigram, expected_n, actual_n);
+            assert_eq!(
+                actual_n, expected_n,
+                "trigram '{}' expected to occur {} times, got {}",
+                trigram, expected_n, actual_n
+            );
         }
         assert_eq!(result.len(), pairs.len());
     }
@@ -114,7 +122,17 @@ mod tests {
         assert_count("a", &[(" a ", 1)]);
         assert_count("-a-", &[(" a ", 1)]);
         assert_count("yes", &[(" ye", 1), ("yes", 1), ("es ", 1)]);
-        assert_count("Give - IT...", &[(" gi", 1), ("giv", 1), ("ive", 1), ("ve ", 1), (" it", 1), ("it ", 1)]);
+        assert_count(
+            "Give - IT...",
+            &[
+                (" gi", 1),
+                ("giv", 1),
+                ("ive", 1),
+                ("ve ", 1),
+                (" it", 1),
+                ("it ", 1),
+            ],
+        );
     }
 
     #[test]
diff --git a/src/utils.rs b/src/utils.rs
index b33ff37..a5c96ad 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -2,10 +2,10 @@
 // Stop character is a character that does not give any value for script
 // or language detection.
 #[inline]
-pub fn is_stop_char(ch : char) -> bool {
+pub fn is_stop_char(ch: char) -> bool {
     match ch {
         '\u{0000}'...'\u{0040}' | '\u{005B}'...'\u{0060}' | '\u{007B}'...'\u{007E}' => true,
-        _ => false
+        _ => false,
     }
 }
 
diff --git a/tests/detect.rs b/tests/detect.rs
index 2728f65..f49c1df 100644
--- a/tests/detect.rs
+++ b/tests/detect.rs
@@ -1,5 +1,5 @@
-extern crate whatlang;
 extern crate serde_json;
+extern crate whatlang;
 
 use whatlang::{detect, Lang, Script};
 
diff --git a/tests/proptests.rs b/tests/proptests.rs
index 4b3169c..8ecaf28 100644
--- a/tests/proptests.rs
+++ b/tests/proptests.rs
@@ -1,5 +1,6 @@
 extern crate whatlang;
-#[macro_use] extern crate proptest;
+#[macro_use]
+extern crate proptest;
 
 use whatlang::detect;
 

From f59046cbd80eab1af83049a5f5a19dd664b66e02 Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:30:56 +0100
Subject: [PATCH 4/6] Fix clippy complains

---
 src/detect.rs | 4 ++--
 src/lang.rs   | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/detect.rs b/src/detect.rs
index f53cf80..c58c830 100644
--- a/src/detect.rs
+++ b/src/detect.rs
@@ -127,14 +127,14 @@ fn detect_lang_in_profiles(
         // * Text really matches one of the languages.
         //
         // Number 500.0 is based on experiments and common sense expectations.
-        let mut confidence = (score1 as f64) / 500.0;
+        let mut confidence = f64::from(score1) / 500.0;
         if confidence > 1.0 {
             confidence = 1.0;
         }
         return Some((lang_dist1.0, confidence));
     }
 
-    let rate = (score1 - score2) as f64 / (score2 as f64);
+    let rate = f64::from(score1 - score2) / f64::from(score2);
 
     // Hyperbola function. Everything that is above the function has confidence = 1.0
     // If rate is below, confidence is calculated proportionally.
diff --git a/src/lang.rs b/src/lang.rs
index 2a42d4d..e27a2aa 100644
--- a/src/lang.rs
+++ b/src/lang.rs
@@ -33,8 +33,8 @@ impl Lang {
     /// use whatlang::Lang;
     /// assert_eq!(Lang::Ukr.name(), "Українська");
     /// ```
-    pub fn name(&self) -> &'static str {
-        lang_to_name(*self)
+    pub fn name(self) -> &'static str {
+        lang_to_name(self)
     }
 
     /// Get a human readable name of the language in English.
@@ -44,8 +44,8 @@ impl Lang {
     /// use whatlang::Lang;
     /// assert_eq!(Lang::Deu.eng_name(), "German");
     /// ```
-    pub fn eng_name(&self) -> &'static str {
-        lang_to_eng_name(*self)
+    pub fn eng_name(self) -> &'static str {
+        lang_to_eng_name(self)
     }
 }
 

From a1fe001f805a06fca0d9239fdde6185563283e6e Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:32:01 +0100
Subject: [PATCH 5/6] Add clippy to travis check

---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index d0fed17..3a3a977 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,5 +4,6 @@ rust:
   - stable
 script:
   - cargo fmt -- --check
+  - cargo clippy -- -D warnings
   - cargo test
   - cargo package

From 201c89ee44eb04a5cba94626d69d96d97c2e9d63 Mon Sep 17 00:00:00 2001
From: Sergey Potapov <blake131313@gmail.com>
Date: Fri, 9 Nov 2018 21:42:18 +0100
Subject: [PATCH 6/6] Travis: add rustfmt and clippy components

---
 .travis.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 3a3a977..f8751e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,6 +2,9 @@ language: rust
 rust:
   - 1.30.1
   - stable
+install:
+  - rustup component add rustfmt-preview
+  - rustup component add clippy-preview
 script:
   - cargo fmt -- --check
   - cargo clippy -- -D warnings