-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Provision iterator syntax sugar in python #40
Changes from 5 commits
ef60531
c67f889
988fc39
0dd82db
cdf23d9
0ad4755
afcbf99
bf44fae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
* text=auto | ||
|
||
# Compiled Object files | ||
*.slo binary | ||
*.lo binary | ||
*.o binary | ||
*.obj binary | ||
|
||
# Precompiled Headers | ||
*.gch binary | ||
*.pch binary | ||
|
||
# Compiled Dynamic libraries | ||
*.so binary | ||
*.dylib binary | ||
*.dll binary | ||
|
||
# Compiled Static libraries | ||
*.lai binary | ||
*.la binary | ||
*.a binary | ||
*.lib binary | ||
|
||
# Executables | ||
*.exe binary | ||
*.out binary | ||
*.app binary | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
class WordIterator: | ||
def __init__(self, annotation, sentence_id=None): | ||
self._annotation = annotation | ||
|
||
if sentence_id == None: | ||
self._sentence_id = 0 | ||
self._max_sentence_id = self._annotation.sentence_count() | ||
else: | ||
self._sentence_id = sentence_id | ||
self._max_sentence_id = sentence_id + 1 | ||
|
||
self._word_id = -1 | ||
|
||
def __iter__(self): | ||
self._word_id = -1 | ||
return self | ||
|
||
def __next__(self): | ||
if self._annotation.sentence_count() == 0: | ||
raise StopIteration | ||
|
||
self._word_id += 1 | ||
if self._word_id >= self._annotation.word_count(self._sentence_id): | ||
self._sentence_id += 1 | ||
if self._sentence_id >= self._max_sentence_id: | ||
raise StopIteration | ||
self._word_id = 0 | ||
return self | ||
|
||
def surface(self): | ||
range = self.range() | ||
return self._annotation.text[range.begin:range.end] | ||
|
||
def range(self): | ||
return self._annotation.word_as_range(self._sentence_id, self._word_id) | ||
|
||
def id(self): | ||
return (self._sentence_id, self._word_id) | ||
|
||
class SentenceIterator: | ||
def __init__(self, annotation): | ||
self._annotation = annotation | ||
self._sentence_id = -1 | ||
|
||
def __iter__(self): | ||
self._sentence_id = -1 | ||
return self | ||
|
||
def __next__(self): | ||
self._sentence_id += 1 | ||
if self._sentence_id >= self._annotation.sentence_count(): | ||
raise StopIteration | ||
return self | ||
|
||
def words(self): | ||
return WordIterator(self._annotation, self._sentence_id) | ||
|
||
def __repr__(self): | ||
range = self._annotation.sentence_as_range(self._sentence_id) | ||
sentence = self._annotation.text[range.begin:range.end] | ||
return f'{sentence}' | ||
|
||
def sentences(annotation): | ||
return SentenceIterator(annotation) | ||
|
||
def words(annotation, sentence_id=None): | ||
return WordIterator(annotation, sentence_id) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# type: ignore | ||
from slimt import iterators | ||
|
||
def test_iterators(service, models): | ||
source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?" | ||
model = models[1] | ||
response = service.translate(model, [source], html=False)[0] | ||
|
||
target = response.target | ||
text = target.text | ||
|
||
sen_iter_tgt = iterators.sentences(target) | ||
word_iter_global = iterators.words(target) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not prefer abbreviations when it can be named better. sentences = iterators.sentences(response.target)
for sentence in sentences:
# ...
words = iterators.words(response.target)
for word in words:
# ... In general chaining more entities into the naming connected by |
||
|
||
sentence_count = target.sentence_count() | ||
for sentence_idx, word_iter in zip(range(sentence_count), sen_iter_tgt): | ||
word_count = target.word_count(sentence_idx) | ||
for word_idx, word in zip(range(word_count), word_iter.words()): | ||
|
||
expected_text_range = target.word_as_range(sentence_idx, word_idx) | ||
reconstructed_text_range = word.range() | ||
|
||
# For Sentence Iterator and Word Iterator | ||
assert expected_text_range.begin == reconstructed_text_range.begin | ||
assert expected_text_range.end == reconstructed_text_range.end | ||
|
||
expected_word = text[expected_text_range.begin:expected_text_range.end] | ||
reconstructed_word = word.surface() | ||
|
||
assert expected_word == reconstructed_word | ||
|
||
word_global = next(word_iter_global) | ||
|
||
reconstructed_text_range_glob = word_global.range() | ||
reconstructed_word_glob = word_global.surface() | ||
|
||
# For Global Word Iterator | ||
assert expected_text_range.begin == reconstructed_text_range_glob.begin | ||
assert expected_text_range.end == reconstructed_text_range_glob.end | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lot of |
||
assert expected_word == reconstructed_word_glob |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no trailing newline