Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provision iterator syntax sugar in python #40

Merged
merged 8 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
* text=auto

# Compiled Object files
*.slo binary
*.lo binary
*.o binary
*.obj binary

# Precompiled Headers
*.gch binary
*.pch binary

# Compiled Dynamic libraries
*.so binary
*.dylib binary
*.dll binary

# Compiled Static libraries
*.lai binary
*.la binary
*.a binary
*.lib binary

# Executables
*.exe binary
*.out binary
*.app binary
67 changes: 67 additions & 0 deletions bindings/python/iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
class WordIterator:
def __init__(self, annotation, sentence_id=None):
self._annotation = annotation

if sentence_id == None:
self._sentence_id = 0
self._max_sentence_id = self._annotation.sentence_count()
else:
self._sentence_id = sentence_id
self._max_sentence_id = sentence_id + 1

self._word_id = -1

def __iter__(self):
self._word_id = -1
return self

def __next__(self):
if self._annotation.sentence_count() == 0:
raise StopIteration

self._word_id += 1
if self._word_id >= self._annotation.word_count(self._sentence_id):
self._sentence_id += 1
if self._sentence_id >= self._max_sentence_id:
raise StopIteration
self._word_id = 0
return self

def surface(self):
range = self.range()
return self._annotation.text[range.begin:range.end]

def range(self):
return self._annotation.word_as_range(self._sentence_id, self._word_id)

def id(self):
return (self._sentence_id, self._word_id)

class SentenceIterator:
def __init__(self, annotation):
self._annotation = annotation
self._sentence_id = -1

def __iter__(self):
self._sentence_id = -1
return self

def __next__(self):
self._sentence_id += 1
if self._sentence_id >= self._annotation.sentence_count():
raise StopIteration
return self

def words(self):
return WordIterator(self._annotation, self._sentence_id)

def __repr__(self):
range = self._annotation.sentence_as_range(self._sentence_id)
sentence = self._annotation.text[range.begin:range.end]
return f'{sentence}'

def sentences(annotation):
return SentenceIterator(annotation)

def words(annotation, sentence_id=None):
return WordIterator(annotation, sentence_id)
2 changes: 1 addition & 1 deletion bindings/python/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple


def test_basic(service, models):
def test_encoding(service, models):
Pair = namedtuple("Pair", ["byte", "utf8"])
source = "no sé 😀 😃 😄 😁 😆 ⛄ 🤔"
model = models[1]
Expand Down
44 changes: 44 additions & 0 deletions bindings/python/tests/test_iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# type: ignore
from slimt import iterators

def test_iterators(service, models):
source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?"
model = models[1]
response = service.translate(model, [source], html=False)[0]

target = response.target
text = target.text

sentences = iterators.sentences(target)
words = iterators.words(target)

sentence_count = target.sentence_count()
for sentence_idx, word_iter in zip(range(sentence_count), sentences):
word_count = target.word_count(sentence_idx)
for word_idx, word in zip(range(word_count), word_iter.words()):

expected_range = target.word_as_range(sentence_idx, word_idx)
expected_word = text[expected_range.begin:expected_range.end]

# For Sentence Iterator and Word Iterator
# Range
reconstructed = word.range()

assert expected_range.begin == reconstructed.begin
assert expected_range.end == reconstructed.end

# Word
reconstructed = word.surface()
assert expected_word == reconstructed

# For Global Word Iterator
word_global = next(words)

# Range
reconstructed = word_global.range()
assert expected_range.begin == reconstructed.begin
assert expected_range.end == reconstructed.end

# Word
reconstructed = word_global.surface()
assert expected_word == reconstructed
11 changes: 6 additions & 5 deletions slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ foreach(SLIMT_LIB IN LISTS SLIMT_LIBRARIES)
target_link_libraries(
${SLIMT_LIB}
PUBLIC ${SLIMT_PUBLIC_LIBS}
INTERFACE $<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>
PRIVATE $<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>)
INTERFACE "$<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>"
PRIVATE "$<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>")

target_include_directories(
${SLIMT_LIB}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>)
"$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>"
)

target_link_options(${SLIMT_LIB} PUBLIC ${SLIMT_LINK_OPTIONS})
target_compile_options(${SLIMT_LIB} PRIVATE ${SLIMT_COMPILE_OPTIONS})
Expand Down
Loading