-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocropus-sidebyside
executable file
·85 lines (72 loc) · 2.15 KB
/
ocropus-sidebyside
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/python
import os,sys,glob,re,traceback,codecs,os.path
import argparse
parser = argparse.ArgumentParser(description = """
Create a simple side-by-side display of OCR results.
""")
parser.add_argument('-s','--fontsize',default=10,type=int)
parser.add_argument('-R','--readme',default="README")
parser.add_argument('-x','--extensions',default=".txt")
parser.add_argument('-o','--output',default="index.html")
parser.add_argument('-b','--bookdir',default="book")
parser.add_argument('-N','--maxpages',default=10,type=int)
args = parser.parse_args()
extensions = args.extensions.split()
def format(s):
s = re.sub(r'[<]','<',s)
s = re.sub(r'[\s\n]*\n[\s\n]*','',s)
return s
readme = None
if os.path.exists(args.readme):
readme = codecs.open(args.readme,"r","utf-8").read()
os.chdir(args.bookdir)
stream = codecs.open(args.output,"w","utf-8")
def P(x):
stream.write(x)
stream.write("\n")
P("<header>")
P('<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>')
P("""<style type='text/css'>
td {
font-size:%dpt;
border: solid 2px #808080;
}
</style>"""%args.fontsize)
P("</header>")
P("<body>")
if readme is not None:
import markdown
P(markdown.markdown(readme))
else:
P("""
<h1>Image vs Output</h1>
""")
P("<table>")
P("<tr>")
for t in ["image"]+extensions:
P("<th>%s</th>"%t)
P("</tr>")
for i,base in enumerate(sorted(glob.glob("[0-9][0-9][0-9][0-9]"))):
if i>=args.maxpages: break
P("<tr>")
P("<td valign=top style='width:600px'>")
print base
if os.path.exists("%s.nrm.png"%base):
P("<img src='%s.nrm.png' width=100%% />"%base)
elif os.path.exists("%s.bin.png"%base):
P("<img src='%s.bin.png' width=100%% />"%base)
else:
P("<p>[no image for %s]<p>"%base)
P("<font size=1>%s</font>"%base)
P("</td>")
for ext in extensions:
P("<td valign=top style='width:600px;padding:3ex'>")
for lname in sorted(glob.glob(base+"/0?????"+ext)):
try:
P(format(codecs.open(lname,"r","utf-8").read())+u"<br />")
except:
traceback.print_exc()
P("</td>")
P("</tr>")
P("</table>")
stream.close()