-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_parser.rb
62 lines (53 loc) · 1.56 KB
/
ocr_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# @author Joris J. van Zundert
require_relative "ocr_parse_models"
# OCRParser takes a text and splits it on line breaks.
# Each line is matched to all the models provided.
# A Model tests if a line of text fits that model or not.
class OCRParser
# @param models [Array<Model>] the models to match against each line of the text
attr_accessor :models
# @param text [String] the text to be parsed as a string
def text=( text )
@text = text
@lines = text.split( "\n" )
end
# @param file_path [String] the path to the file having the text to be parsed
def load_text( file_path )
self.text = File.read( file_path )
end
def match_lines
@lines.each_with_index do |line, index|
matches = []
@models.each do |model|
model.line_context = LineContext.new( @lines, index )
if model.matches( line )
matches.push( model.class )
end
end
yield line, matches
end
end
def parse_tuples
active_multiline_models = []
match_lines do |line, matches|
matches.each do |model|
active_multiline_models.reject! do |active_multiline_model|
active_multiline_model.terminators.include? model
end
if model.terminators != nil
active_multiline_models.push( model )
end
end
if matches.size == 0 && active_multiline_models.size == 0
yield true, line, matches
else
yield false, line, matches
end
end
end
def parse
tuples = []
parse_tuples { | accept, line | tuples.push line if accept }
tuples
end
end