Skip to content

Commit

Permalink
Build tagger dict only once
Browse files Browse the repository at this point in the history
  • Loading branch information
p-goulart committed Jan 12, 2024
1 parent 5283e86 commit 7a040b7
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 25 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,7 @@ Under construction!
## Usage

This repository should be a submodule of language-specific repositories. For example, the [Portuguese repository](https://github.com/languagetool-org/portuguese-pos-dict).

⚠️ Note that the name of this repository is in **kebab-case**, but Python modules should be imported in **snake_case**.
Therefore, when importing this as a submodule, make sure to set the path to `dict_tools`, which uses the underscore.
If you don't do this, you may fail to import it as a module.
2 changes: 1 addition & 1 deletion lib/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Variant:
def __init__(self, locale_code: str):
parsed = locale_code.split('-')
self.lang = parsed[0]
self.country = parsed[1]
self.country = parsed[1] if len(parsed) > 1 else None
self.agreement = parsed[2] if len(parsed) > 2 else None
self.pretty = self.LANG_CODES.get(self.lang)
self.hyphenated = locale_code
Expand Down
46 changes: 22 additions & 24 deletions scripts/build_tagger_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self):
self.args = self.parser.parse_args()


def set_shell_env() -> dict[str, str]:
def set_shell_env():
custom_env = {
'REPO_DIR': REPO_DIR,
'DATA_SRC_DIR': TAGGER_DICT_DIR,
Expand All @@ -52,54 +52,52 @@ def set_shell_env() -> dict[str, str]:
return {**os.environ, **custom_env}


def run_shell_script(env: dict) -> None:
def run_shell_script() -> None:
"""Calls the shell script that gathers the tagger dict source files into a single TXT."""
ShellCommand(f"bash {TAGGER_BUILD_SCRIPT_PATH}", env=env).run_with_output()
ShellCommand(f"bash {TAGGER_BUILD_SCRIPT_PATH}", env=SHELL_ENV).run_with_output()


def build_pos_binary(variant: Variant) -> None:
def build_pos_binary() -> None:
cmd_build = (
f"java -cp {LT_JAR_PATH} "
f"org.languagetool.tools.POSDictionaryBuilder "
f"-i {RESULT_POS_DICT_FILEPATH} "
f"-info {variant.pos_info_java_input_path()} "
f"-o {variant.pos_dict_java_output_path()}"
f"-info {LANGUAGE.pos_info_java_input_path()} "
f"-o {LANGUAGE.pos_dict_java_output_path()}"
)
ShellCommand(cmd_build).run()
variant.copy_pos_info()
LANGUAGE.copy_pos_info()


def build_synth_binary(variant: Variant) -> None:
def build_synth_binary() -> None:
cmd_build = (
f"java -cp {LT_JAR_PATH} "
f"org.languagetool.tools.SynthDictionaryBuilder "
f"-i {RESULT_POS_DICT_FILEPATH} "
f"-info {variant.synth_info_java_input_path()} "
f"-o {variant.synth_dict_java_output_path()}"
f"-info {LANGUAGE.synth_info_java_input_path()} "
f"-o {LANGUAGE.synth_dict_java_output_path()}"
)
ShellCommand(cmd_build).run()
variant.copy_synth_info()
variant.rename_synth_tag_files()
LANGUAGE.copy_synth_info()
LANGUAGE.rename_synth_tag_files()


def main():
cli = CLI()
LOGGER.setLevel(cli.args.verbosity.upper())
FORCE_INSTALL = cli.args.force_install
FORCE_COMPILE = cli.args.no_force_compile
CUSTOM_INSTALL_VERSION = cli.args.install_version
# TODO: understand how to select dict variants for tagger dict compilation (usually a single one?)
DIC_VARIANTS = VARIANT_MAPPING[cli.args.language]
SHELL_ENV = set_shell_env()
if FORCE_COMPILE:
compile_lt_dev()
for variant in DIC_VARIANTS:
run_shell_script(SHELL_ENV)
build_pos_binary(variant)
build_synth_binary(variant)
run_shell_script()
build_pos_binary()
build_synth_binary()
if FORCE_INSTALL:
install_dictionaries(custom_version=CUSTOM_INSTALL_VERSION)


if __name__ == "__main__":
cli = CLI()
LOGGER.setLevel(cli.args.verbosity.upper())
FORCE_INSTALL = cli.args.force_install
FORCE_COMPILE = cli.args.no_force_compile
CUSTOM_INSTALL_VERSION = cli.args.install_version
LANGUAGE = Variant(cli.args.language)
SHELL_ENV = set_shell_env()
main()

0 comments on commit 7a040b7

Please sign in to comment.