From 7a040b7a4dadf55cfab724bbf57b6090614f5161 Mon Sep 17 00:00:00 2001 From: p-goulart Date: Fri, 12 Jan 2024 20:06:40 +0100 Subject: [PATCH] Build tagger dict only once --- README.md | 4 +++ lib/variant.py | 2 +- scripts/build_tagger_dicts.py | 46 +++++++++++++++++------------------ 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 7f134c0..02e71a4 100644 --- a/README.md +++ b/README.md @@ -16,3 +16,7 @@ Under construction! ## Usage This repository should be a submodule of language-specific repositories. For example, the [Portuguese repository](https://github.com/languagetool-org/portuguese-pos-dict). + +⚠️ Note that the name of this repository is in **kebab-case**, but Python modules should be imported in **snake_case**. +Therefore, when importing this as a submodule, make sure to set the path to `dict_tools`, which uses the underscore. +If you don't do this, you may fail to import it as a module. diff --git a/lib/variant.py b/lib/variant.py index ab60ae3..e941d09 100644 --- a/lib/variant.py +++ b/lib/variant.py @@ -22,7 +22,7 @@ class Variant: def __init__(self, locale_code: str): parsed = locale_code.split('-') self.lang = parsed[0] - self.country = parsed[1] + self.country = parsed[1] if len(parsed) > 1 else None self.agreement = parsed[2] if len(parsed) > 2 else None self.pretty = self.LANG_CODES.get(self.lang) self.hyphenated = locale_code diff --git a/scripts/build_tagger_dicts.py b/scripts/build_tagger_dicts.py index 9f9594f..450c2d3 100644 --- a/scripts/build_tagger_dicts.py +++ b/scripts/build_tagger_dicts.py @@ -38,7 +38,7 @@ def __init__(self): self.args = self.parser.parse_args() -def set_shell_env() -> dict[str, str]: +def set_shell_env(): custom_env = { 'REPO_DIR': REPO_DIR, 'DATA_SRC_DIR': TAGGER_DICT_DIR, @@ -52,54 +52,52 @@ def set_shell_env() -> dict[str, str]: return {**os.environ, **custom_env} -def run_shell_script(env: dict) -> None: +def run_shell_script() -> None: """Calls the shell script that gathers the tagger dict source files into a single TXT.""" - ShellCommand(f"bash {TAGGER_BUILD_SCRIPT_PATH}", env=env).run_with_output() + ShellCommand(f"bash {TAGGER_BUILD_SCRIPT_PATH}", env=SHELL_ENV).run_with_output() -def build_pos_binary(variant: Variant) -> None: +def build_pos_binary() -> None: cmd_build = ( f"java -cp {LT_JAR_PATH} " f"org.languagetool.tools.POSDictionaryBuilder " f"-i {RESULT_POS_DICT_FILEPATH} " - f"-info {variant.pos_info_java_input_path()} " - f"-o {variant.pos_dict_java_output_path()}" + f"-info {LANGUAGE.pos_info_java_input_path()} " + f"-o {LANGUAGE.pos_dict_java_output_path()}" ) ShellCommand(cmd_build).run() - variant.copy_pos_info() + LANGUAGE.copy_pos_info() -def build_synth_binary(variant: Variant) -> None: +def build_synth_binary() -> None: cmd_build = ( f"java -cp {LT_JAR_PATH} " f"org.languagetool.tools.SynthDictionaryBuilder " f"-i {RESULT_POS_DICT_FILEPATH} " - f"-info {variant.synth_info_java_input_path()} " - f"-o {variant.synth_dict_java_output_path()}" + f"-info {LANGUAGE.synth_info_java_input_path()} " + f"-o {LANGUAGE.synth_dict_java_output_path()}" ) ShellCommand(cmd_build).run() - variant.copy_synth_info() - variant.rename_synth_tag_files() + LANGUAGE.copy_synth_info() + LANGUAGE.rename_synth_tag_files() def main(): - cli = CLI() - LOGGER.setLevel(cli.args.verbosity.upper()) - FORCE_INSTALL = cli.args.force_install - FORCE_COMPILE = cli.args.no_force_compile - CUSTOM_INSTALL_VERSION = cli.args.install_version - # TODO: understand how to select dict variants for tagger dict compilation (usually a single one?) - DIC_VARIANTS = VARIANT_MAPPING[cli.args.language] - SHELL_ENV = set_shell_env() if FORCE_COMPILE: compile_lt_dev() - for variant in DIC_VARIANTS: - run_shell_script(SHELL_ENV) - build_pos_binary(variant) - build_synth_binary(variant) + run_shell_script() + build_pos_binary() + build_synth_binary() if FORCE_INSTALL: install_dictionaries(custom_version=CUSTOM_INSTALL_VERSION) if __name__ == "__main__": + cli = CLI() + LOGGER.setLevel(cli.args.verbosity.upper()) + FORCE_INSTALL = cli.args.force_install + FORCE_COMPILE = cli.args.no_force_compile + CUSTOM_INSTALL_VERSION = cli.args.install_version + LANGUAGE = Variant(cli.args.language) + SHELL_ENV = set_shell_env() main()