Skip to content

Commit

Permalink
Provision iterator syntax sugar in python
Browse files Browse the repository at this point in the history
The changes enhances the project a flexible and reusable iterator system
for processing annotations in Python, along with associated tests.

`.gitattributes`:
Added a new file with various attributes for Git, specifying binary
handling for certain file types.

`bindings/python/iterators.py`:
Added a new Python module (iterators.py) that defines iterators for
processing annotations, sentences, and words.
    Includes WordIterator and SentenceIterator classes.
    Provides `sentences()` and `words()` for syntax sugar.

`bindings/python/tests/test_iterators.py`:
Added a new test module (`test_iterators.py`) to test the functionality
of the iterators in the `iterators.py` module.

`slimt/CMakeLists.txt`:
Modified the CMakeLists.txt file to include changes related to target
link libraries and include directories.

Pull request: #40
  • Loading branch information
sivaprasad2000 authored Dec 18, 2023
1 parent 8255316 commit 87c9105
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 6 deletions.
27 changes: 27 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
* text=auto

# Compiled Object files
*.slo binary
*.lo binary
*.o binary
*.obj binary

# Precompiled Headers
*.gch binary
*.pch binary

# Compiled Dynamic libraries
*.so binary
*.dylib binary
*.dll binary

# Compiled Static libraries
*.lai binary
*.la binary
*.a binary
*.lib binary

# Executables
*.exe binary
*.out binary
*.app binary
67 changes: 67 additions & 0 deletions bindings/python/iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
class WordIterator:
def __init__(self, annotation, sentence_id=None):
self._annotation = annotation

if sentence_id == None:
self._sentence_id = 0
self._max_sentence_id = self._annotation.sentence_count()
else:
self._sentence_id = sentence_id
self._max_sentence_id = sentence_id + 1

self._word_id = -1

def __iter__(self):
self._word_id = -1
return self

def __next__(self):
if self._annotation.sentence_count() == 0:
raise StopIteration

self._word_id += 1
if self._word_id >= self._annotation.word_count(self._sentence_id):
self._sentence_id += 1
if self._sentence_id >= self._max_sentence_id:
raise StopIteration
self._word_id = 0
return self

def surface(self):
range = self.range()
return self._annotation.text[range.begin:range.end]

def range(self):
return self._annotation.word_as_range(self._sentence_id, self._word_id)

def id(self):
return (self._sentence_id, self._word_id)

class SentenceIterator:
def __init__(self, annotation):
self._annotation = annotation
self._sentence_id = -1

def __iter__(self):
self._sentence_id = -1
return self

def __next__(self):
self._sentence_id += 1
if self._sentence_id >= self._annotation.sentence_count():
raise StopIteration
return self

def words(self):
return WordIterator(self._annotation, self._sentence_id)

def __repr__(self):
range = self._annotation.sentence_as_range(self._sentence_id)
sentence = self._annotation.text[range.begin:range.end]
return f'{sentence}'

def sentences(annotation):
return SentenceIterator(annotation)

def words(annotation, sentence_id=None):
return WordIterator(annotation, sentence_id)
2 changes: 1 addition & 1 deletion bindings/python/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple


def test_basic(service, models):
def test_encoding(service, models):
Pair = namedtuple("Pair", ["byte", "utf8"])
source = "no sé 😀 😃 😄 😁 😆 ⛄ 🤔"
model = models[1]
Expand Down
44 changes: 44 additions & 0 deletions bindings/python/tests/test_iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# type: ignore
from slimt import iterators

def test_iterators(service, models):
source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?"
model = models[1]
response = service.translate(model, [source], html=False)[0]

target = response.target
text = target.text

sentences = iterators.sentences(target)
words = iterators.words(target)

sentence_count = target.sentence_count()
for sentence_idx, word_iter in zip(range(sentence_count), sentences):
word_count = target.word_count(sentence_idx)
for word_idx, word in zip(range(word_count), word_iter.words()):

expected_range = target.word_as_range(sentence_idx, word_idx)
expected_word = text[expected_range.begin:expected_range.end]

# For Sentence Iterator and Word Iterator
# Range
reconstructed = word.range()

assert expected_range.begin == reconstructed.begin
assert expected_range.end == reconstructed.end

# Word
reconstructed = word.surface()
assert expected_word == reconstructed

# For Global Word Iterator
word_global = next(words)

# Range
reconstructed = word_global.range()
assert expected_range.begin == reconstructed.begin
assert expected_range.end == reconstructed.end

# Word
reconstructed = word_global.surface()
assert expected_word == reconstructed
11 changes: 6 additions & 5 deletions slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ foreach(SLIMT_LIB IN LISTS SLIMT_LIBRARIES)
target_link_libraries(
${SLIMT_LIB}
PUBLIC ${SLIMT_PUBLIC_LIBS}
INTERFACE $<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>
PRIVATE $<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>)
INTERFACE "$<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>"
PRIVATE "$<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>")

target_include_directories(
${SLIMT_LIB}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>)
"$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>"
)

target_link_options(${SLIMT_LIB} PUBLIC ${SLIMT_LINK_OPTIONS})
target_compile_options(${SLIMT_LIB} PRIVATE ${SLIMT_COMPILE_OPTIONS})
Expand Down

0 comments on commit 87c9105

Please sign in to comment.