Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provision iterator syntax sugar in python #40

Merged
merged 8 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
* text=auto

# Compiled Object files
*.slo binary
*.lo binary
*.o binary
*.obj binary

# Precompiled Headers
*.gch binary
*.pch binary

# Compiled Dynamic libraries
*.so binary
*.dylib binary
*.dll binary

# Compiled Static libraries
*.lai binary
*.la binary
*.a binary
*.lib binary

# Executables
*.exe binary
*.out binary
*.app binary
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no trailing newline

67 changes: 67 additions & 0 deletions bindings/python/iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
class WordIterator:
def __init__(self, annotation, sentence_id=None):
self._annotation = annotation

if sentence_id == None:
self._sentence_id = 0
self._max_sentence_id = self._annotation.sentence_count()
else:
self._sentence_id = sentence_id
self._max_sentence_id = sentence_id + 1

self._word_id = -1

def __iter__(self):
self._word_id = -1
return self

def __next__(self):
if self._annotation.sentence_count() == 0:
raise StopIteration

self._word_id += 1
if self._word_id >= self._annotation.word_count(self._sentence_id):
self._sentence_id += 1
if self._sentence_id >= self._max_sentence_id:
raise StopIteration
self._word_id = 0
return self

def surface(self):
range = self.range()
return self._annotation.text[range.begin:range.end]

def range(self):
return self._annotation.word_as_range(self._sentence_id, self._word_id)

def id(self):
return (self._sentence_id, self._word_id)

class SentenceIterator:
def __init__(self, annotation):
self._annotation = annotation
self._sentence_id = -1

def __iter__(self):
self._sentence_id = -1
return self

def __next__(self):
self._sentence_id += 1
if self._sentence_id >= self._annotation.sentence_count():
raise StopIteration
return self

def words(self):
return WordIterator(self._annotation, self._sentence_id)

def __repr__(self):
range = self._annotation.sentence_as_range(self._sentence_id)
sentence = self._annotation.text[range.begin:range.end]
return f'{sentence}'

def sentences(annotation):
return SentenceIterator(annotation)

def words(annotation, sentence_id=None):
return WordIterator(annotation, sentence_id)
2 changes: 1 addition & 1 deletion bindings/python/tests/test_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from collections import namedtuple


def test_basic(service, models):
def test_encoding(service, models):
Pair = namedtuple("Pair", ["byte", "utf8"])
source = "no sé 😀 😃 😄 😁 😆 ⛄ 🤔"
model = models[1]
Expand Down
40 changes: 40 additions & 0 deletions bindings/python/tests/test_iterators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# type: ignore
from slimt import iterators

def test_iterators(service, models):
source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?"
model = models[1]
response = service.translate(model, [source], html=False)[0]

target = response.target
text = target.text

sen_iter_tgt = iterators.sentences(target)
word_iter_global = iterators.words(target)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not prefer abbreviations when it can be named better.

sentences = iterators.sentences(response.target)
for sentence in sentences:
   # ...
   
words = iterators.words(response.target)
for word in words:
   # ...

In general chaining more entities into the naming connected by _ points to that we've failed somewhere at abstracting (Imagine sentence, sentence_item - instead of sentence, word for an example).


sentence_count = target.sentence_count()
for sentence_idx, word_iter in zip(range(sentence_count), sen_iter_tgt):
word_count = target.word_count(sentence_idx)
for word_idx, word in zip(range(word_count), word_iter.words()):

expected_text_range = target.word_as_range(sentence_idx, word_idx)
reconstructed_text_range = word.range()

# For Sentence Iterator and Word Iterator
assert expected_text_range.begin == reconstructed_text_range.begin
assert expected_text_range.end == reconstructed_text_range.end

expected_word = text[expected_text_range.begin:expected_text_range.end]
reconstructed_word = word.surface()

assert expected_word == reconstructed_word

word_global = next(word_iter_global)

reconstructed_text_range_glob = word_global.range()
reconstructed_word_glob = word_global.surface()

# For Global Word Iterator
assert expected_text_range.begin == reconstructed_text_range_glob.begin
assert expected_text_range.end == reconstructed_text_range_glob.end
Copy link
Owner

@jerinphilip jerinphilip Dec 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lot of _ chaining. This is a naming nit, but there's room for improvement. Same intentions/ideas as #40 (comment).

assert expected_word == reconstructed_word_glob
11 changes: 6 additions & 5 deletions slimt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,15 +91,16 @@ foreach(SLIMT_LIB IN LISTS SLIMT_LIBRARIES)
target_link_libraries(
${SLIMT_LIB}
PUBLIC ${SLIMT_PUBLIC_LIBS}
INTERFACE $<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>
PRIVATE $<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>)
INTERFACE "$<BUILD_INTERFACE:${SLIMT_INTERFACE_LIBS}>"
PRIVATE "$<BUILD_INTERFACE:${SLIMT_PRIVATE_LIBS}>")

target_include_directories(
${SLIMT_LIB}
PUBLIC
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>)
"$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}>"
)

target_link_options(${SLIMT_LIB} PUBLIC ${SLIMT_LINK_OPTIONS})
target_compile_options(${SLIMT_LIB} PRIVATE ${SLIMT_COMPILE_OPTIONS})
Expand Down
Loading