-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyrdf_sum_interactive.py
36 lines (30 loc) · 1.04 KB
/
pyrdf_sum_interactive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
"""
Simple sum operation on a column of the reference dataset.
Connects to the OpenStack VMs Spark cluster. If there is another cluster change
arguments of PyRDF.use() accordingly.
This variation simulates the use of an interactive cluster by creating RDF
actions in a loop. Each action will trigger a new computation graph.
"""
import ROOT
import PyRDF
PyRDF.use("spark", conf={
'npartitions': 3,
'spark.master': 'spark://137.138.55.13:7077',
'spark.driver.port': 40000,
'spark.blockManager.port': 30000,
'spark.app.name': 'PyRDF',
'spark.executor.instances': 3,
})
rdf = PyRDF.RDataFrame(
"reftree",
"root://eosuser.cern.ch//eos/user/v/vpadulan/reftree/reftree_100000000entry.root")
for i in range(1, 101):
s = rdf.Sum("b3")
print("Iteration {} with rdf {} and action {}".format(i, str(rdf), str(s)))
t = ROOT.TStopwatch()
s.GetValue()
t.Stop()
realtime = round(t.RealTime(), 2)
with open("baseline_pyrdf_sum_interactive.csv", "a+") as f:
f.write(str(realtime))
f.write("\n")