-
Notifications
You must be signed in to change notification settings - Fork 32
/
Copy pathhashbook.py
59 lines (52 loc) · 1.9 KB
/
hashbook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import hashlib
import binascii
import concurrent.futures
import time
names = [
'Gulliver',
'Alice in Wonderland',
'Pride and prejudice',
'Yellow wallpaper',
'Metamorphosis ',
'A Tale of Two Cities',
'The Importance of Being Earnest',
'Frankenstein'
]
filenames = {name: '../data/{}.txt'.format(name) for name in names}
def read_book(item):
name, filename = item
with open(filename) as f:
data = f.read()
return name, data
def hash_book(item, k=1024):
name, data = item
# very slow function
fingerprint = hashlib.pbkdf2_hmac('sha512', data.encode('utf8'), b'salt', 1000000)
return name, binascii.hexlify(fingerprint).decode()
if __name__ == '__main__':
# read books to memory
with concurrent.futures.ThreadPoolExecutor(len(filenames)) as executor: # much faster than a single-threaded program
books = dict(executor.map(read_book, filenames.items()))
print("Single-thread")
tic = time.time()
results = map(hash_book, books.items())
for name, fingerprint in results:
pass #print('Fingerprint for {} is "{}"'.format(name, fingerprint))
toc = time.time()
print("Elapsed time: {:.2f} seconds".format(toc - tic))
print("Multi-thread")
tic = time.time()
with concurrent.futures.ThreadPoolExecutor(len(books)) as executor:
results = executor.map(hash_book, books.items())
for name, fingerprint in results:
pass #print('Fingerprint for {} is "{}"'.format(name, fingerprint))
toc = time.time()
print("Elapsed time: {:.2f} seconds".format(toc - tic))
print("Multi-process")
tic = time.time()
with concurrent.futures.ProcessPoolExecutor() as executor:
results = executor.map(hash_book, books.items())
for name, fingerprint in results:
pass #print('Fingerprint for {} is "{}"'.format(name, fingerprint))
toc = time.time()
print("Elapsed time: {:.2f} seconds".format(toc - tic))