forked from hackyourlife/lima-gold
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrot.py
121 lines (103 loc) · 3.49 KB
/
rot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# -*- coding: utf-8 -*-
# vim:set ts=8 sts=8 sw=8 tw=80 noet cc=80:
import re
from collections import namedtuple
frequencies_de = [ 6.516, 1.886, 2.732, 5.076, 16.396, 1.656, 3.009, 4.577,
6.55, 0.268, 1.417, 3.437, 2.534, 9.776, 2.594, 0.67, 0.018,
7.003, 7.27, 6.154, 4.166, 0.846, 1.921, 0.034, 0.039, 1.134]
frequencies_de_utf = { "ä": 0.578, "ö": 0.443, "ü": 0.995, "ß": 0.307 }
frequencies_en = [ 8.167, 1.492, 2.782, 4.253, 12.702, 2.228, 2.015, 6.094,
6.966, 0.153, 0.772, 4.025, 2.406, 6.749, 7.507, 1.929, 0.095,
5.987, 6.327, 9.056, 2.758, 0.978, 2.361, 0.15, 1.974, 0.074 ]
def merge_dicts(*dicts):
r = {}
for d in dicts:
r.update(d)
return r
def default_frequencies(lang):
def basic_frequencies(table):
return { chr(i + 97): table[i] / 100.0 \
for i in range(len(table)) }
if lang == "en":
return basic_frequencies(frequencies_en)
if lang == "de":
return merge_dicts(basic_frequencies(frequencies_de),
frequencies_de_utf)
raise ValueError("no frequency table for language \"%s\"" % lang)
dicts = {}
def _read(path):
with open(path) as f:
return { l.strip().lower() for l in f.readlines() \
if len(l.strip()) > 0 }
def _get_dict(lang):
if lang == "de":
return _read("/usr/share/dict/german")
if lang == "en":
return _read("/usr/share/dict/british-english")
raise ValueError("no dict for language \"%s\"" % lang)
def get_dict(lang):
if lang in dicts:
return dicts[lang]
dicts[lang] = _get_dict(lang)
return dicts[lang]
def is_supported(lang):
try:
default_frequencies(lang)
except:
return False
try:
get_dict(lang)
except:
return False
return True
def frequencies(text, letters):
f = { l: text.count(l) for l in set(text) if l in letters }
s = sum([ v for (c, v) in f.items() ])
return { l: f[l] / s for l in f }
def cost(text, freq):
tf = frequencies(text, freq)
t = [ (tf[c] - f) ** 2 for (c, f) in freq.items() if c in tf ]
return sum(t) / len(t)
def rot(text, n):
def r(c, n):
if (c < 'A' or c > 'z') or (c > 'Z' and c < 'a'): return c
ch = chr((ord(c.upper()) - ord('A') + n) % 26 + ord('A'))
return ch if c < 'a' else ch.lower()
return "".join([ r(c, n) for c in text ])
Result = namedtuple("Result", ["n", "text", "error"])
def crack(text, lang):
freq = default_frequencies(lang)
c = { cost(rot(text, n), freq): n for n in range(0, 26) }
candidates = sorted(c.keys())
return [ Result(26 - c[i], rot(text, c[i]), i) for i in candidates ]
ascii_only = re.compile(r"\W")
def crackx(text, lang, only_exact=False):
def get_words(text):
return [ w.strip() for w in ascii_only.sub(" ", text).split(" ")
if len(w.strip()) > 0 ]
try:
dictionary = get_dict(lang)
except: # try without dictionary
return text if only_exact else crack(text, lang)[0]
# skip expensive decoding if it is already plaintext
words = get_words(text)
threshold = max(int(len(words) / 2), 1 if len(words) == 1 else 2)
if sum([ 1 for word in words if word.lower() in dictionary ]) >= \
threshold:
return Result(0, text, cost(text, default_frequencies(lang)))
results = crack(text, lang)
matches = {}
n = 0
for result in results:
n += 1
cnt = sum([ 1 for word in get_words(result.text) \
if word.lower() in dictionary ])
if cnt >= threshold:
return result
if cnt not in matches:
matches[cnt] = result
result = matches[sorted(matches.keys())[-1]]
cnt = sum([ 1 for word in get_words(result.text) \
if word.lower() in dictionary ])
return result if cnt > 0 or not only_exact else \
Result(0, text, cost(text, default_frequencies(lang)))