diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..58309376 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,27 @@ +* text=auto + +# Compiled Object files +*.slo binary +*.lo binary +*.o binary +*.obj binary + +# Precompiled Headers +*.gch binary +*.pch binary + +# Compiled Dynamic libraries +*.so binary +*.dylib binary +*.dll binary + +# Compiled Static libraries +*.lai binary +*.la binary +*.a binary +*.lib binary + +# Executables +*.exe binary +*.out binary +*.app binary \ No newline at end of file diff --git a/bindings/python/iterators/__init__.py b/bindings/python/iterators/__init__.py new file mode 100644 index 00000000..08fb471b --- /dev/null +++ b/bindings/python/iterators/__init__.py @@ -0,0 +1,2 @@ +from .sentences import SentenceIterator as sentences +from .words import WordIterator as words \ No newline at end of file diff --git a/bindings/python/iterators/sentences.py b/bindings/python/iterators/sentences.py new file mode 100644 index 00000000..2c613caa --- /dev/null +++ b/bindings/python/iterators/sentences.py @@ -0,0 +1,24 @@ +from .words import WordIterator + +class SentenceIterator: + def __init__(self, annotation): + self._annotation = annotation + self._sentence_id = -1 + + def __iter__(self): + self._sentence_id = -1 + return self + + def __next__(self): + self._sentence_id += 1 + if self._sentence_id >= self._annotation.sentence_count(): + raise StopIteration + return self + + def words(self): + return WordIterator(self._annotation, self._sentence_id) + + def __repr__(self): + range = self._annotation.sentence_as_range(self._sentence_id) + sentence = self._annotation.text[range.begin:range.end] + return f'{sentence}' diff --git a/bindings/python/iterators/words.py b/bindings/python/iterators/words.py new file mode 100644 index 00000000..b3701781 --- /dev/null +++ b/bindings/python/iterators/words.py @@ -0,0 +1,38 @@ +class WordIterator: + def __init__(self, annotation, sentence_id=None): + self._annotation = annotation + + if sentence_id == None: + self._sentence_id = 0 + self._max_sentence_id = self._annotation.sentence_count() + else: + self._sentence_id = sentence_id + self._max_sentence_id = sentence_id + 1 + + self._word_id = -1 + + def __iter__(self): + self._word_id = -1 + return self + + def __next__(self): + if self._annotation.sentence_count() == 0: + raise StopIteration + + self._word_id += 1 + if self._word_id >= self._annotation.word_count(self._sentence_id): + self._sentence_id += 1 + if self._sentence_id >= self._max_sentence_id: + raise StopIteration + self._word_id = 0 + return self + + def surface(self): + range = self.range() + return self._annotation.text[range.begin:range.end] + + def range(self): + return self._annotation.word_as_range(self._sentence_id, self._word_id) + + def id(self): + return (self._sentence_id, self._word_id) \ No newline at end of file diff --git a/bindings/python/tests/test_encoding.py b/bindings/python/tests/test_encoding.py index a367cb66..e45cec97 100644 --- a/bindings/python/tests/test_encoding.py +++ b/bindings/python/tests/test_encoding.py @@ -46,4 +46,4 @@ def test_basic(service, models): byte_range = byte.word_as_range(sentence_idx, word_idx) utf8_to_byte_range = utf8_to_byte.word_as_range(sentence_idx, word_idx) assert byte_range.begin == utf8_to_byte_range.begin - assert byte_range.end == utf8_to_byte_range.end + assert byte_range.end == utf8_to_byte_range.end \ No newline at end of file diff --git a/bindings/python/tests/test_iterators.py b/bindings/python/tests/test_iterators.py new file mode 100644 index 00000000..26559fb4 --- /dev/null +++ b/bindings/python/tests/test_iterators.py @@ -0,0 +1,40 @@ +# type: ignore +from slimt import iterators + +def test_iterators(service, models): + source = "Hi, How are you? Its been a long time.\nCan you help me out with some things?" + model = models[1] + response = service.translate(model, [source], html=False)[0] + + target = response.target + text = target.text + + sen_iter_tgt = iterators.sentences(target) + word_iter_global = iterators.words(target) + + sentence_count = target.sentence_count() + for sentence_idx, word_iter in zip(range(sentence_count), sen_iter_tgt): + word_count = target.word_count(sentence_idx) + for word_idx, word in zip(range(word_count), word_iter.words()): + + expected_text_range = target.word_as_range(sentence_idx, word_idx) + reconstructed_text_range = word.range() + + # For Sentence Iterator and Word Iterator + assert expected_text_range.begin == reconstructed_text_range.begin + assert expected_text_range.end == reconstructed_text_range.end + + expected_word = text[expected_text_range.begin:expected_text_range.end] + reconstructed_word = word.surface() + + assert expected_word == reconstructed_word + + word_global = next(word_iter_global) + + reconstructed_text_range_glob = word_global.range() + reconstructed_word_glob = word_global.surface() + + # For Global Word Iterator + assert expected_text_range.begin == reconstructed_text_range_glob.begin + assert expected_text_range.end == reconstructed_text_range_glob.end + assert expected_word == reconstructed_word_glob \ No newline at end of file diff --git a/setup.py b/setup.py index 100a0d8e..522ac2bd 100644 --- a/setup.py +++ b/setup.py @@ -211,7 +211,7 @@ def run(self): extras_require={"test": ["pytest>=6.0"]}, license_files=("LICENSE",), python_requires=">=3.6", - packages=["slimt", "slimt.tests"], + packages=["slimt", "slimt.tests", "slimt.iterators"], package_dir={"slimt": "bindings/python"}, install_requires=["pyyaml>=5.1", "appdirs"], entry_points={ diff --git a/slimt/CMakeLists.txt b/slimt/CMakeLists.txt index 4711444a..e838032a 100644 --- a/slimt/CMakeLists.txt +++ b/slimt/CMakeLists.txt @@ -91,15 +91,15 @@ foreach(SLIMT_LIB IN LISTS SLIMT_LIBRARIES) target_link_libraries( ${SLIMT_LIB} PUBLIC ${SLIMT_PUBLIC_LIBS} - INTERFACE $ - PRIVATE $) + INTERFACE "$" + PRIVATE "$") target_include_directories( ${SLIMT_LIB} PUBLIC - $ - $ - $) + "$" + "$" + "$") target_link_options(${SLIMT_LIB} PUBLIC ${SLIMT_LINK_OPTIONS}) target_compile_options(${SLIMT_LIB} PRIVATE ${SLIMT_COMPILE_OPTIONS})