-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocropus-visualize-results
executable file
·112 lines (101 loc) · 4.27 KB
/
ocropus-visualize-results
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/python
import matplotlib
matplotlib.use("AGG")
import sys,os,re,glob,math,glob,signal,traceback
from optparse import OptionParser
from scipy.ndimage import interpolation
import pylab
from pylab import *
import ocrolib
from ocrolib import number_of_processors,die,linerec,morph
from scipy.misc import imsave
signal.signal(signal.SIGINT,lambda *args:sys.exit(1))
# these options control alignment
import argparse
parser = argparse.ArgumentParser(description = """
Generate HTML for debugging a book directory.
Input: a directory in standard OCRopus book format
Output: index.html files and thumbnails showing recognition results
""")
parser.add_argument("book",default="book")
parser.add_argument("-N","--npages",type=int,default=100000,help="max number of pages")
args = parser.parse_args()
def write_cseg(stream,cseg_file):
cseg = ocrolib.read_line_segmentation(cseg_file)
cseg = ocrolib.read_line_segmentation(cseg_file)
csegs = linerec.extract_csegs(cseg)
stream.write("<table><tr>")
for i,c in enumerate(csegs):
out = ".__"+cseg_file+"_%03d.png"%i
pylab.imsave(out,amax(c.img)-c.img,cmap=cm.gray)
stream.write("<td><img src=%s height=%d style='border: 1px #ccccff solid;'></td>"%(out,max(2,c.img.shape[0]/2)))
stream.write("</tr></table>")
stream.write("\n")
def genpage(d):
print "===",d
here = os.getcwd()
try:
os.chdir(d)
with open("index.html","w") as stream:
stream.write("<h1>%s</h1>\n"%d)
images = sorted(glob.glob("??????.bin.png"))
for img in images:
txt = ocrolib.fvariant(img,"txt","")
if os.path.exists(txt):
with open(txt) as tf: text = tf.read()
stream.write("<font color='#000066'><b>%s</b></font><br>\n"%text)
rtxt = ocrolib.fvariant(img,"txt","raw")
if os.path.exists(rtxt):
with open(rtxt) as tf: rtext = tf.read()
stream.write("<font color='gray'><b>%s</b></font><br>\n"%rtext)
stream.write("<p />\n")
image = ocrolib.read_image_gray(img)
stream.write("<img width='%d' src='%s'>\n"%(max(10,image.shape[1]/2),img))
stream.write("<br />\n")
stream.write("<font size=-2>")
stream.write("<a href=%s>%s</a> / "%(args.book,args.book))
stream.write("<a href=%s>%s</a> / "%(args.book+"/"+d,d))
stream.write("<a href=%s>%s</a>"%(args.book+"/"+d+"/"+img,img))
stream.write("</font>")
stream.write("<p />\n")
# if os.path.exists(cseg): stream.write("<img src='%s'><br>\n"%cseg)
cseg = ocrolib.fvariant(img,"cseg")
if os.path.exists(cseg):
write_cseg(stream,cseg)
rseg_file = ocrolib.fvariant(img,"rseg")
if os.path.exists(rseg_file):
rseg = ocrolib.read_line_segmentation(rseg_file)
figure(figsize=(20,1),dpi=150)
morph.showlabels(rseg)
figfile = ".__"+rseg_file+"_.png"
savefig(figfile)
stream.write("<img height='50' src='%s'><br>\n"%figfile)
stream.write("<hr>\n")
finally:
os.chdir(here)
os.chdir(args.book)
with open("index.html","w") as stream:
for d in sorted(glob.glob("????"))[:args.npages]:
genpage(d)
if os.path.exists(d+".bin.png"):
image = ocrolib.read_image_gray(d+".bin.png")
else:
image = zeros((300,300))
out = ".__"+d+".png"
image = interpolation.zoom(image,(0.125,0.125),order=1)
imsave(out,image)
stream.write("<table border=1><tr>\n")
stream.write("<td>")
stream.write("<a href='%s/index.html'><img src='%s'></a>"%(d,out))
stream.write("<br>%s<br>"%d)
stream.write("</td>\n")
stream.write("<td>")
count = 0
for fname in sorted(glob.glob(d+"/??????.txt")):
with open(fname) as tf: s = tf.read()
if len(s)<20: continue
stream.write("%s<br>\n"%s[:100])
count += 1
if count>=10: break
stream.write("</td>\n")
stream.write("</tr></table>\n")