Skip to content

Commit

Permalink
Fix the prefix spaces and punctuations; upgrade to version 0.9 (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkufool authored Aug 8, 2023
1 parent 63b18db commit 98ffea2
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(textsearch)

set(TS_VERSION "0.8")
set(TS_VERSION "0.9")

set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "fasttextsearch"
version = "0.8"
version = "0.9"
authors = [
{ name="Next-gen Kaldi development team", email="[email protected]" },
]
Expand Down
7 changes: 7 additions & 0 deletions textsearch/python/textsearch/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -1091,6 +1091,10 @@ def _split_into_segments(

for seg in segments:
begin_pos = aligns[seg[0]]["ref_pos"]
# trim the prefix space
while chr(target_source.binary_text[begin_pos]) == " ":
begin_pos += 1

while begin_pos >= 1:
current_token = chr(target_source.binary_text[begin_pos - 1])
if current_token in PUCTUATIONS["left"]:
Expand Down Expand Up @@ -1132,6 +1136,9 @@ def _split_into_segments(
end_time = aligns[succeeding_index]["hyp_time"]

hyp_begin_pos = aligns[seg[0]]["hyp_pos"]
while chr(query_source.binary_text[hyp_begin_pos]) == " ":
hyp_begin_pos += 1

hyp_end_pos = aligns[seg[1]]["hyp_pos"]
hyp = "".join(
[
Expand Down

0 comments on commit 98ffea2

Please sign in to comment.