-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcatTbl.py
executable file
·127 lines (101 loc) · 3.58 KB
/
catTbl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
# Script for concatenating IPAC table files, padding out the width of columns
# and needed nulls as needed.
# Written by: Sean Lake at UCLA in 2012
#
# The concatenated result is written to stdout.
# If the command line option --header=N is specified (N an integer), the only
# header used in the output table is copied from the Nth input file (one indexed).
# If --header=N is specified more than once, each later one overrides the previous.
# If no --header= argument is specified, the headers of each input file are
# concatenated.
import sys
import ipac
infnames = sys.argv[1:]
hdidx = -1
i = 0
while i < len(infnames):
if infnames[i][:9] == "--header=":
hdidx = int(infnames[i][9:]) - 1
del( infnames[i] )
else:
i += 1
if len(infnames) == 0:
sys.stderr.write( "catTbl requires at least one ipac table file as an argument.\n" )
sys.exit(1)
if hdidx >= len( infnames ):
sys.stderr.write( "Invalid header selected: " + str(hidx+1) +
" selected, " + str(len(infnames)) +
" available.\n" )
sys.exit(1)
intbls = []
for fn in infnames:
if fn[-3:] != ".gz":
intbls.append( ipac.BigTbl( fn ) )
else:
intbls.append( ipac.BigTbl() )
intbls[-1].OpenRead( fn, gzip = True )
#First, get the header right
Otbl = ipac.BigTbl()
if hdidx >= 0:
Otbl.hdr = intbls[ hdidx ].hdr
else:
Otbl.hdr = [ "\\fixlen = T", "\\ " ]
for i in range(len( intbls )):
Otbl.hdr.append( "\\ Table header number " + str(i + 1) + ":" )
Otbl.hdr.extend( intbls[i].hdr )
Otbl.hdr.append( "\\ " )
#Next, fix the column names, widths, etc
Otbl.colnames = intbls[0].colnames
Otbl.types = intbls[0].types
Otbl.nulls = intbls[0].nulls
Otbl.units = intbls[0].units
Otbl.colwidths = intbls[0].colwidths
for t in intbls[1:]:
for inidx in range(len(t.colnames)):
cn = t.colnames[inidx]
if cn in Otbl.colnames:
i = Otbl.colnames.index( cn )
Otbl.colwidths[i] = max( Otbl.colwidths[i],
t.colwidths[inidx] )
else:
Otbl.colnames.append( cn )
Otbl.colwidths.append( t.colwidths[inidx] )
Otbl.types[cn] = t.types[cn]
Otbl.nulls[cn] = t.nulls[cn]
Otbl.units[cn] = t.units[cn]
Otbl.RefreshStringers()
#Finally, write the data to stdout
Otbl.outfile = sys.stdout
Otbl.WriteHeader()
fmtstrs = [ "{0: ^" + str(w) + "s}" for w in Otbl.colwidths ]
NullsOut = [ Otbl.nulls[n] for n in Otbl.colnames ]
obuffsize = 5 * 1024**2 #5 megabytes
obufflines = ( 1 + obuffsize /
( sum(Otbl.colwidths) + 2 + len(Otbl.colwidths) ) ) #lines
obuff = []
tidx = 0
for fn, t in zip(infnames, intbls):
# sys.stderr.write("Processing file: " + fn + "\n")
outidxes = [ Otbl.colnames.index( cn ) for cn in t.colnames ]
outrow = [ n for n in NullsOut ] #copy NullsOut
while True:
inLine = t.ReadLine()
if inLine == None:
break
inrow = [ inLine[s:e] for s, e in zip(t.colstarts, t.colends) ]
for i, n in enumerate(NullsOut): #reset outrow
outrow[i] = n
for inidx, s in enumerate(fmtstrs):
outidx = outidxes[inidx]
outrow[outidx] = s.format( inrow[inidx] )
obuff.append( " " + " ".join(outrow) + " \n" )
if len(obuff) >= obufflines:
Otbl.outfile.writelines( obuff )
obuff = []
tidx += 1
#Flush the buffer
Otbl.outfile.writelines( obuff )
Otbl.Close()
for i in range(len(intbls)):
intbls[i].Close()