-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjoinTables.py
55 lines (41 loc) · 1.27 KB
/
joinTables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
## This script joins two or more csv files based on a common field
import csv
import sys
import functools
InFiles=[_ for _ in sys.argv[1::2]]
InKeys=[_ for _ in sys.argv[2::2]]
fs=[]
for InFile in InFiles:
with open(InFile) as _: fs.append(list(csv.DictReader(_)))
filesAndKeys=list(zip(fs, InKeys))
fields=list(fs[0][0].keys())
print(fields)
getNewKeys=lambda f,k: [_ for _ in f[0].keys() if _ != k]
fields += functools.reduce(lambda a,b:a+b,[getNewKeys(*_) for _ in filesAndKeys[1:]])
print(fields)
allKeys=set.union(*[{_[k] for _ in f} for f,k in filesAndKeys])
print(allKeys)
print(filesAndKeys[0][1])
def joinRow(k):
# print(k)
#tmp=[_ for _ in filesAndKeys[0][0] if _[filesAndKeys[0][1]]==k][0]
tmp=dict()
# print(tmp)
# for f, InKey in filesAndKeys[1:]:
for f, InKey in filesAndKeys:
if k in allKeys:
klist=[_ for _ in f if _[InKey]==k]
if len(klist)>0:
# print('DEBUG %s'%(klist[0]))
# print('updating..')
tmp.update(klist[0])
# print('updated')
return tmp
with open("output.csv","w") as f:
w=csv.DictWriter(f, fieldnames=fields)
w.writeheader()
for k in allKeys:
# print(k)
row=joinRow(k)
# print(row)
w.writerow(row)