-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmodel.py
90 lines (69 loc) · 2.72 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# Copyright 2016 Symantec Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
import abc
import bisect
import decimal
import math
import random
import numpy as np
class Model(metaclass=abc.ABCMeta):
"""Abstract base class for models.
For each probability p, we handle its logprob -- i.e., with some
abuse of notation, the base-2 logarithm changed of sign:
-math.log2(p).
"""
@abc.abstractmethod
def generate(self):
"""Generate a random password according to the model.
Returns (logprob, passwd); passwd is the random password and
logprob is its probability.
"""
pass
def sample(self, n):
"""Generate a sample of n passwords."""
return (self.generate() for _ in range(n))
@abc.abstractmethod
def logprob(self, word):
"""Return the logprob of word according to the model."""
pass
class PosEstimator:
def __init__(self, sample, realsize=None):
# realsize is a hack to make plot_restrictions work.
# don't use unless you know what you're doing!
self.logprobs = logprobs = np.fromiter((lp for lp, _ in sample), float)
logprobs.sort()
if realsize is None:
realsize = len(logprobs)
logn = math.log2(realsize)
self.positions = (2 ** (logprobs - logn)).cumsum()
def position(self, logprob):
idx = bisect.bisect_right(self.logprobs, logprob)
return self.positions[idx - 1] if idx > 0 else 0
def logpos(self, logprob):
return math.log2(self.position(logprob))
def logprob(self, pos):
return np.interp(math.log2(pos + 1), np.log2(self.positions + 1),
self.logprobs)
def generate(self, model_generate, entropy):
lp_threshold = self.logprob(2 ** entropy)
for logprob, word in iter(model_generate, None):
if (logprob <= lp_threshold and
lp_threshold < logprob - math.log2(random.random())):
return logprob, word
def sample(self, model_generate, entropy, n):
for _ in range(n):
yield self.generate(model_generate, entropy)
class IPWEstimator:
def __init__(self, sample, store=lambda lp, word: (lp, word)):
sample = list(sample)
self.logn = logn = math.log2(len(sample))
self.ipw = [2 ** decimal.Decimal(lp - logn) for lp, _ in sample]
self.stored = [store(lp, word) for lp, word in sample]
def evaluate(self, fun):
return sum(w * fun(v)
for w, v in zip(self.ipw, self.stored))