Skip to content

Commit

Permalink
Provision iterator syntax sugar in python
Browse files Browse the repository at this point in the history
  • Loading branch information
sivaprasad2000 committed Dec 16, 2023
1 parent 7f89761 commit ef60531
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 7 deletions.
27 changes: 27 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
* text=auto

# Compiled Object files
*.slo binary
*.lo binary
*.o binary
*.obj binary

# Precompiled Headers
*.gch binary
*.pch binary

# Compiled Dynamic libraries
*.so binary
*.dylib binary
*.dll binary

# Compiled Static libraries
*.lai binary
*.la binary
*.a binary
*.lib binary

# Executables
*.exe binary
*.out binary
*.app binary
2 changes: 2 additions & 0 deletions bindings/python/iterators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .sentences import SentenceIterator as sentences
from .words import WordIterator as words
24 changes: 24 additions & 0 deletions bindings/python/iterators/sentences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from .words import WordIterator

class SentenceIterator:
def __init__(self, annotation):
self._annotation = annotation
self._sentence_id = -1

def __iter__(self):
self._sentence_id = -1
return self

def __next__(self):
self._sentence_id += 1
if self._sentence_id >= self._annotation.sentence_count():
raise StopIteration
return self

def words(self):
return WordIterator(self._annotation, self._sentence_id)

def __repr__(self):
range = self._annotation.sentence_as_range(self._sentence_id)
sentence = self._annotation.text[range.begin:range.end]
return f'{sentence}'
38 changes: 38 additions & 0 deletions bindings/python/iterators/words.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
class WordIterator:
def __init__(self, annotation, sentence_id=None):
self._annotation = annotation

if sentence_id == None:
self._sentence_id = 0
self._max_sentence_id = self._annotation.sentence_count()
else:
self._sentence_id = sentence_id
self._max_sentence_id = sentence_id + 1

self._word_id = -1

def __iter__(self):
self._word_id = -1
return self

def __next__(self):
if self._annotation.sentence_count() == 0:
raise StopIteration

self._word_id += 1
if self._word_id >= self._annotation.word_count(self._sentence_id):
self._sentence_id += 1
if self._sentence_id >= self._max_sentence_id:
raise StopIteration
self._word_id = 0
return self

def surface(self):
range = self.range()
return self._annotation.text[range.begin:range.end]

def range(self):
return self._annotation.word_as_range(self._sentence_id, self._word_id)

def id(self):
return (self._sentence_id, self._word_id)
2 changes: 1 addition & 1 deletion bindings/python/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@ def test_basic(service, models):
byte_range = byte.word_as_range(sentence_idx, word_idx)
utf8_to_byte_range = utf8_to_byte.word_as_range(sentence_idx, word_idx)
assert byte_range.begin == utf8_to_byte_range.begin
assert byte_range.end == utf8_to_byte_range.end
assert byte_range.end == utf8_to_byte_range.end
40 changes: 40 additions & 0 deletions bindings/python/tests/test_iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# type: ignore
from slimt import iterators

def test_iterators(service, models):
source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?"
model = models[1]
response = service.translate(model, [source], html=False)[0]

target = response.target
text = target.text

sen_iter_tgt = iterators.sentences(target)
word_iter_global = iterators.words(target)

sentence_count = target.sentence_count()
for sentence_idx, word_iter in zip(range(sentence_count), sen_iter_tgt):
word_count = target.word_count(sentence_idx)
for word_idx, word in zip(range(word_count), word_iter.words()):

expected_text_range = target.word_as_range(sentence_idx, word_idx)
reconstructed_text_range = word.range()

# For Sentence Iterator and Word Iterator
assert expected_text_range.begin == reconstructed_text_range.begin
assert expected_text_range.end == reconstructed_text_range.end

expected_word = text[expected_text_range.begin:expected_text_range.end]
reconstructed_word = word.surface()

assert expected_word == reconstructed_word

word_global = next(word_iter_global)

reconstructed_text_range_glob = word_global.range()
reconstructed_word_glob = word_global.surface()

# For Global Word Iterator
assert expected_text_range.begin == reconstructed_text_range_glob.begin
assert expected_text_range.end == reconstructed_text_range_glob.end
assert expected_word == reconstructed_word_glob
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def run(self):
extras_require={"test": ["pytest>=6.0"]},
license_files=("LICENSE",),
python_requires=">=3.6",
packages=["slimt", "slimt.tests"],
packages=["slimt", "slimt.tests", "slimt.iterators"],
package_dir={"slimt": "bindings/python"},
install_requires=["pyyaml>=5.1", "appdirs"],
entry_points={
Expand Down
10 changes: 5 additions & 5 deletions slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,15 @@ foreach(SLIMT_LIB IN LISTS SLIMT_LIBRARIES)
target_link_libraries(
${SLIMT_LIB}
PUBLIC ${SLIMT_PUBLIC_LIBS}
INTERFACE $<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>
PRIVATE $<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>)
INTERFACE "$<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>"
PRIVATE "$<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>")

target_include_directories(
${SLIMT_LIB}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>)
"$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>")

target_link_options(${SLIMT_LIB} PUBLIC ${SLIMT_LINK_OPTIONS})
target_compile_options(${SLIMT_LIB} PRIVATE ${SLIMT_COMPILE_OPTIONS})
Expand Down

0 comments on commit ef60531

Please sign in to comment.