-
Notifications
You must be signed in to change notification settings - Fork 0
/
lambda_benchmark.py
129 lines (100 loc) · 3.63 KB
/
lambda_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import boto3
import sys
import os
import time
### NUMPY, SCIPY, SKLEARN MAGIC
import os
import ctypes
import platform
if platform.system() != 'Darwin': # don't do this on my local machine
for d, _, files in os.walk('lib'):
for f in files:
if f.endswith('.a'):
continue
ctypes.cdll.LoadLibrary(os.path.join(d, f))
import numpy as np
### NUMPY, SCIPY, SKLEARN MAGIC END
s3_client = boto3.client('s3')
def build_matrix(matrix_name, dimension, block_size):
rows = []
for i in range(0,dimension/block_size):
blocks = []
for j in range(0,dimension/block_size):
blocks.append(np.load('/tmp/{}/m_{}_{}.npy'.format(matrix_name,i,j)))
rows.append(np.vstack(blocks))
return np.hstack(rows)
def download_s3_file(bucket, matrix_name, key):
if not os.path.exists('/tmp/' + matrix_name):
os.mkdir('/tmp/' + matrix_name)
s3_client.download_file(bucket, key, '/tmp/' + key)
return '/tmp/' + key
def download_all_matrix_parts(bucket, keys, matrix_name):
paths = []
for key in keys:
paths.append(download_s3_file(bucket, matrix_name, key))
return paths
def get_all_keys_for_matrix(matrix_name):
response = s3_client.list_objects_v2(
Bucket='jmue-multiplication-benchmarks',
Delimiter='string',
Prefix=matrix_name + '/'
)
keys = []
print response
for item in response['Contents']:
keys.append(item['Key'])
return keys
def benchmark(event, context):
execution_start = time.time()
dimension = event['dimension']
block_size = event['block-size']
matrix_A_name = 'sq_{}kx{}k_bs{}k'.format(dimension, dimension, block_size)
matrix_B_name = 'sq_{}kx{}k_bs{}k-2'.format(dimension, dimension, block_size)
bucket = 'jmue-multiplication-benchmarks'
print("Getting all keys and download matrices")
start = time.time()
for matrix_name in [matrix_A_name, matrix_B_name]:
keys = get_all_keys_for_matrix(matrix_name)
download_all_matrix_parts(bucket, keys, matrix_name)
end = time.time()
download_time = end - start
print("Building matrices")
matA = build_matrix(matrix_name, dimension, block_size)
matB = build_matrix(matrix_name, dimension, block_size)
print("performing calculation")
start = time.time()
result = matA.dot(matB)
end = time.time()
calculation_time = end - start
del matA
del matB
print("Splitting and uploading result")
start = time.time()
rows = np.hsplit(result, dimension/block_size)
for i, row in enumerate(rows):
blocks = np.vsplit(row, dimension/block_size)
for j, block in enumerate(blocks):
s3_client.put_object(Body=block.dumps(), Bucket=bucket, Key=matrix_name + '-lambda-result/' + 'm_{}_{}'.format(i,j))
end = time.time()
upload_time = end - start
execution_end = time.time()
execution_time = execution_end - execution_start
print('download_time: ', int(download_time * 1000))
print('calculation_time: ', int(calculation_time * 1000))
print('upload_time: ', int(upload_time * 1000))
print('execution_time: ', int(execution_time * 1000))
call('rm -rf /tmp/*', shell=True)
return {
'download_time': int(download_time * 1000),
'calculation_time': int(calculation_time * 1000),
'upload_time': int(upload_time * 1000),
'execution_time': int(execution_time * 1000),
}
# sls invoke --function lambda_benchmark --path events/lambda_benchmark.json
# {
# "execution_time": 49435,
# "download_time": 3908,
# "upload_time": 2930,
# "calculation_time": 42168
# }