-
Notifications
You must be signed in to change notification settings - Fork 20
/
colibricore_alignmodel.pxi
144 lines (108 loc) · 4.47 KB
/
colibricore_alignmodel.pxi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def __len__(self):
"""Returns the total number of distinct patterns in the model"""
return self.data.size()
def __bool__(self):
return self.data.size() > 0
def types(self):
"""Returns the total number of distinct word types in the training data"""
return self.data.types()
def tokens(self):
"""Returns the total number of tokens in the training data"""
return self.data.tokens()
def minlength(self):
"""Returns the minimum pattern length in the model"""
return self.data.minlength()
def maxlength(self):
"""Returns the maximum pattern length in the model"""
return self.data.maxlength()
def type(self):
"""Returns the model type (10 = UNINDEXED, 20 = INDEXED)"""
return self.data.getmodeltype()
def version(self):
"""Return the version of the model type"""
return self.data.getmodelversion()
cdef has(self, Pattern pattern):
if not isinstance(pattern, Pattern):
raise ValueError("Expected instance of Pattern")
return self.data.has(pattern.cpattern)
cdef hastuple(self, Pattern pattern, Pattern pattern2):
if not isinstance(pattern, Pattern):
raise ValueError("Expected instance of Pattern")
return self.data.has(pattern.cpattern, pattern2.cpattern)
def __contains__(self, pattern):
"""Tests if a pattern is in the model:
:param pattern: A pattern or a pair of patterns
:type pattern: Pattern or 2-tuple of patterns
:rtype: bool
Example::
sourcepattern in alignmodel
(sourcepattern, targetpattern) in alignmodel
"""
if isinstance(pattern, tuple):
if len(pattern) != 2 or not isinstance(pattern[0], Pattern) or not isinstance(pattern[1], Pattern):
raise ValueError("Expected instance of Pattern or 2-tuple of Patterns")
return self.hastuple(pattern[0], pattern[1])
if not isinstance(pattern, Pattern):
raise ValueError("Expected instance of Pattern or 2-tuple of Patterns")
return self.has(pattern)
def __getitem__(self, pattern):
"""Retrieves the value for the pattern
:param pattern: A pattern
:type pattern: Pattern
:rtype: int (for Unindexed Models), IndexData (for Indexed models)
Example (unindexed model)::
occurrences = model[pattern]
"""
if isinstance(pattern, tuple):
if len(pattern) != 2 or not isinstance(pattern[0], Pattern) or not isinstance(pattern[1], Pattern):
raise ValueError("Expected instance of Pattern or 2-tuple of Patterns")
return self.getdatatuple(pattern[0], pattern[1])
if not isinstance(pattern, Pattern):
raise ValueError("Expected instance of Pattern or 2-tuple of Patterns")
return self.getdata(pattern)
def __iter__(self):
"""Iterates over all source patterns in the model.
Example::
for sourcepattern in alignmodel:
print(pattern.tostring(classdecoder))
"""
it = self.data.begin()
cdef cPattern cpattern
while it != self.data.end():
cpattern = deref(it).first
pattern = Pattern()
pattern.bind(cpattern)
yield pattern
inc(it)
def __init__(self, str filename = "",PatternModelOptions options = None):
"""Initialise an alignment model. Either an empty one or loading from file.
:param filename: The name of the file to load, must be a valid colibri alignmodel file
:type filename: str
:param options: An instance of PatternModelOptions, containing the options used for loading
:type options: PatternModelOptions
"""
if filename:
self.load(filename,options)
def load(self, str filename, PatternModelOptions options=None):
"""Load an alignment model from file
:param filename: The name of the file to load, must be a valid colibri alignmodel file
:type filename: str
:param options: An instance of PatternModelOptions, containing the options used for loading
:type options: PatternModelOptions
"""
if options is None:
options = PatternModelOptions()
if filename and not os.path.exists(filename):
raise FileNotFoundError(filename)
self.data.load(encode(filename), options.coptions)
def read(self, str filename, PatternModelOptions options=None):
"""Alias for load"""
self.load(filename, options)
cpdef write(self, str filename):
"""Write an alignment model to file
:param filename: The name of the file to write to
:type filename: str
"""
self.data.write(encode(filename))
cdef cPatternModelInterface* getinterface(self):
return self.data.getinterface()