From 3e1dc079e9ed569bc9da1093631aebfe5ecd0259 Mon Sep 17 00:00:00 2001 From: getzze Date: Mon, 4 Nov 2024 23:38:04 +0000 Subject: [PATCH] Add opensubtitles converter, copied from babelfish (#1183) * update converters types * add opensubtitles converter from babelfish * fix opensubtitles typing * fix tests --- pyproject.toml | 1 + subliminal/converters/__init__.py | 2 +- subliminal/converters/addic7ed.py | 54 +++++++++------- subliminal/converters/opensubtitles.py | 77 +++++++++++++++++++++++ subliminal/converters/opensubtitlescom.py | 18 +++--- subliminal/converters/tvsubtitles.py | 20 +++--- subliminal/providers/opensubtitles.py | 10 +++ tests/providers/test_addic7ed.py | 2 +- tests/providers/test_tvsubtitles.py | 2 +- 9 files changed, 142 insertions(+), 44 deletions(-) create mode 100644 subliminal/converters/opensubtitles.py diff --git a/pyproject.toml b/pyproject.toml index 7a51e4c0..9f5ff8e4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,6 +112,7 @@ tvdb = "subliminal.refiners.tvdb:refine" [project.entry-points."babelfish.language_converters"] addic7ed = "subliminal.converters.addic7ed:Addic7edConverter" +opensubtitles = "subliminal.converters.opensubtitles:OpenSubtitlesConverter" opensubtitlescom = "subliminal.converters.opensubtitlescom:OpenSubtitlesComConverter" tvsubtitles = "subliminal.converters.tvsubtitles:TVsubtitlesConverter" diff --git a/subliminal/converters/__init__.py b/subliminal/converters/__init__.py index 6e07fe2b..3dafd740 100644 --- a/subliminal/converters/__init__.py +++ b/subliminal/converters/__init__.py @@ -4,4 +4,4 @@ if TYPE_CHECKING: # Tuple of language (alpha3, country, script), with country and script optional - LanguageTuple = tuple[str] | tuple[str, str | None] | tuple[str, str | None, str | None] + LanguageTuple = tuple[str, str | None, str | None] diff --git a/subliminal/converters/addic7ed.py b/subliminal/converters/addic7ed.py index ce2a8c1d..e1858ad7 100644 --- a/subliminal/converters/addic7ed.py +++ b/subliminal/converters/addic7ed.py @@ -16,27 +16,29 @@ class Addic7edConverter(LanguageReverseConverter): def __init__(self) -> None: self.name_converter = language_converters['name'] self.from_addic7ed: dict[str, LanguageTuple] = { - 'Català': ('cat',), - 'Chinese (Simplified)': ('zho',), - 'Chinese (Traditional)': ('zho',), - 'Euskera': ('eus',), - 'Galego': ('glg',), - 'Greek': ('ell',), - 'Malay': ('msa',), - 'Portuguese (Brazilian)': ('por', 'BR'), + 'Català': ('cat', None, None), + 'Chinese (Simplified)': ('zho', None, None), + 'Chinese (Traditional)': ('zho', None, None), + 'Euskera': ('eus', None, None), + 'French (Canadian)': ('fra', 'CA', None), + 'Galego': ('glg', None, None), + 'Greek': ('ell', None, None), + 'Malay': ('msa', None, None), + 'Portuguese (Brazilian)': ('por', 'BR', None), 'Serbian (Cyrillic)': ('srp', None, 'Cyrl'), - 'Serbian (Latin)': ('srp',), - 'Spanish (Latin America)': ('spa',), - 'Spanish (Spain)': ('spa',), + 'Serbian (Latin)': ('srp', None, None), + 'Spanish (Latin America)': ('spa', None, None), + 'Spanish (Spain)': ('spa', None, None), } self.to_addic7ed: dict[LanguageTuple, str] = { - ('cat',): 'Català', - ('zho',): 'Chinese (Simplified)', - ('eus',): 'Euskera', - ('glg',): 'Galego', - ('ell',): 'Greek', - ('msa',): 'Malay', - ('por', 'BR'): 'Portuguese (Brazilian)', + ('cat', None, None): 'Català', + ('zho', None, None): 'Chinese (Simplified)', + ('eus', None, None): 'Euskera', + ('fra', 'CA', None): 'French (Canadian)', + ('glg', None, None): 'Galego', + ('ell', None, None): 'Greek', + ('msa', None, None): 'Malay', + ('por', 'BR', None): 'Portuguese (Brazilian)', ('srp', None, 'Cyrl'): 'Serbian (Cyrillic)', } self.codes = self.name_converter.codes | set(self.from_addic7ed.keys()) @@ -45,16 +47,22 @@ def convert(self, alpha3: str, country: str | None = None, script: str | None = """Convert an alpha3 language code with an alpha2 country code and a script code into a custom code.""" if (alpha3, country, script) in self.to_addic7ed: return self.to_addic7ed[(alpha3, country, script)] - if (alpha3, country) in self.to_addic7ed: - return self.to_addic7ed[(alpha3, country)] - if (alpha3,) in self.to_addic7ed: - return self.to_addic7ed[(alpha3,)] + if (alpha3, country, None) in self.to_addic7ed: + return self.to_addic7ed[(alpha3, country, None)] + if (alpha3, None, None) in self.to_addic7ed: + return self.to_addic7ed[(alpha3, None, None)] return self.name_converter.convert(alpha3, country, script) # type: ignore[no-any-return] def reverse(self, code: str) -> LanguageTuple: """Reverse a custom code into alpha3, country and script code.""" if code in self.from_addic7ed: - return self.from_addic7ed[code] + ret = self.from_addic7ed[code] + if len(ret) == 1: + return (*ret, None, None) + if len(ret) == 2: + return (*ret, None) + # if len(ret) == 3: + return ret return self.name_converter.reverse(code) # type: ignore[no-any-return] diff --git a/subliminal/converters/opensubtitles.py b/subliminal/converters/opensubtitles.py new file mode 100644 index 00000000..6c519a2a --- /dev/null +++ b/subliminal/converters/opensubtitles.py @@ -0,0 +1,77 @@ +# Copyright (c) 2013 the BabelFish authors. All rights reserved. +# Use of this source code is governed by the 3-clause BSD license +# that can be found in the LICENSE file. +# +"""Language converter for OpenSubtitles.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +from babelfish import ( # type: ignore[import-untyped] + LanguageReverseConverter, + LanguageReverseError, + language_converters, +) +from babelfish.converters import CaseInsensitiveDict # type: ignore[import-untyped] + +if TYPE_CHECKING: + from . import LanguageTuple + + +class OpenSubtitlesConverter(LanguageReverseConverter): + """Language converter for OpenSubtitlesCom. + + Originally defined in :mod:`babelfish`. + """ + + codes: set[str] + to_opensubtitles: dict[LanguageTuple, str] + # from_opensubtitles: CaseInsensitiveDict[tuple[str, str | None]] + from_opensubtitles: CaseInsensitiveDict + + def __init__(self) -> None: + self.alpha3b_converter = language_converters['alpha3b'] + self.alpha2_converter = language_converters['alpha2'] + self.to_opensubtitles = { + ('por', 'BR', None): 'pob', + ('gre', None, None): 'ell', + ('srp', None, None): 'scc', + ('srp', 'ME', None): 'mne', + ('srp', None, 'Latn'): 'scc', + ('srp', None, 'Cyrl'): 'scc', + ('spa', 'MX', None): 'spl', + ('chi', None, 'Hant'): 'zht', + ('chi', 'TW', None): 'zht', + } + self.from_opensubtitles = CaseInsensitiveDict( + { + 'pob': ('por', 'BR', None), + 'pb': ('por', 'BR', None), + 'ell': ('ell', None, None), + 'scc': ('srp', None, None), + 'mne': ('srp', 'ME', None), + 'spl': ('spa', 'MX'), + 'zht': ('zho', None, 'Hant'), + }, + ) + self.codes = self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys()) + + def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str: + """Convert an alpha3 language code with an alpha2 country code and a script code into a custom code.""" + alpha3b = self.alpha3b_converter.convert(alpha3, country, script) # type: ignore[no-any-return] + if (alpha3b, country, script) in self.to_opensubtitles: + return self.to_opensubtitles[(alpha3b, country, script)] + return alpha3b # type: ignore[no-any-return] + + def reverse(self, code: str) -> LanguageTuple: + """Reverse a custom code into alpha3, country and script code.""" + if code in self.from_opensubtitles: + return self.from_opensubtitles[code] # type: ignore[no-any-return] + for conv in [self.alpha3b_converter, self.alpha2_converter]: + conv = cast(LanguageReverseConverter, conv) + try: + return conv.reverse(code) # type: ignore[no-any-return] + except LanguageReverseError: + pass + raise LanguageReverseError(code) diff --git a/subliminal/converters/opensubtitlescom.py b/subliminal/converters/opensubtitlescom.py index 534f21fb..96349693 100644 --- a/subliminal/converters/opensubtitlescom.py +++ b/subliminal/converters/opensubtitlescom.py @@ -18,26 +18,28 @@ class OpenSubtitlesComConverter(LanguageReverseConverter): def __init__(self) -> None: self.alpha2_converter = language_converters['alpha2'] - self.from_opensubtitlescom: dict[str, LanguageTuple] = { + self.from_opensubtitlescom: dict[str, tuple[str, str | None]] = { 'pt-br': ('por', 'BR'), 'pt-pt': ('por', 'PT'), 'zh-cn': ('zho', 'CN'), 'zh-tw': ('zho', 'TW'), 'ze': ('zho', 'US'), 'me': ('srp', 'ME'), - 'sy': ('syr',), - 'ma': ('mni',), - 'at': ('ast',), + 'sy': ('syr', None), + 'ma': ('mni', None), + 'at': ('ast', None), + } + self.to_opensubtitlescom: dict[tuple[str, str | None], str] = { + v: k for k, v in self.from_opensubtitlescom.items() } - self.to_opensubtitlescom: dict[LanguageTuple, str] = {v: k for k, v in self.from_opensubtitlescom.items()} self.codes = self.alpha2_converter.codes | set(self.from_opensubtitlescom.keys()) def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str: """Convert an alpha3 language code with an alpha2 country code and a script code into a custom code.""" if (alpha3, country) in self.to_opensubtitlescom: return self.to_opensubtitlescom[(alpha3, country)] - if (alpha3,) in self.to_opensubtitlescom: - return self.to_opensubtitlescom[(alpha3,)] + if (alpha3, None) in self.to_opensubtitlescom: + return self.to_opensubtitlescom[(alpha3, None)] return self.alpha2_converter.convert(alpha3, country, script) # type: ignore[no-any-return] @@ -45,6 +47,6 @@ def reverse(self, code: str) -> LanguageTuple: """Reverse a custom code into alpha3, country and script code.""" code_lower = code.lower() if code_lower in self.from_opensubtitlescom: - return self.from_opensubtitlescom[code_lower] + return (*self.from_opensubtitlescom[code_lower], None) return self.alpha2_converter.reverse(code) # type: ignore[no-any-return] diff --git a/subliminal/converters/tvsubtitles.py b/subliminal/converters/tvsubtitles.py index 7834a20d..f6309220 100644 --- a/subliminal/converters/tvsubtitles.py +++ b/subliminal/converters/tvsubtitles.py @@ -15,29 +15,29 @@ class TVsubtitlesConverter(LanguageReverseConverter): def __init__(self) -> None: self.alpha2_converter = language_converters['alpha2'] - self.from_tvsubtitles: dict[str, LanguageTuple] = { + self.from_tvsubtitles: dict[str, tuple[str, str | None]] = { 'br': ('por', 'BR'), - 'ua': ('ukr',), - 'gr': ('ell',), - 'cn': ('zho',), - 'jp': ('jpn',), - 'cz': ('ces',), + 'ua': ('ukr', None), + 'gr': ('ell', None), + 'cn': ('zho', None), + 'jp': ('jpn', None), + 'cz': ('ces', None), } - self.to_tvsubtitles: dict[LanguageTuple, str] = {v: k for k, v in self.from_tvsubtitles.items()} + self.to_tvsubtitles: dict[tuple[str, str | None], str] = {v: k for k, v in self.from_tvsubtitles.items()} self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys()) def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str: """Convert an alpha3 language code with an alpha2 country code and a script code into a custom code.""" if (alpha3, country) in self.to_tvsubtitles: return self.to_tvsubtitles[(alpha3, country)] - if (alpha3,) in self.to_tvsubtitles: - return self.to_tvsubtitles[(alpha3,)] + if (alpha3, None) in self.to_tvsubtitles: + return self.to_tvsubtitles[(alpha3, None)] return self.alpha2_converter.convert(alpha3, country, script) # type: ignore[no-any-return] def reverse(self, code: str) -> LanguageTuple: """Reverse a custom code into alpha3, country and script code.""" if code in self.from_tvsubtitles: - return self.from_tvsubtitles[code] + return (*self.from_tvsubtitles[code], None) return self.alpha2_converter.reverse(code) # type: ignore[no-any-return] diff --git a/subliminal/providers/opensubtitles.py b/subliminal/providers/opensubtitles.py index ab383656..4e31cf39 100644 --- a/subliminal/providers/opensubtitles.py +++ b/subliminal/providers/opensubtitles.py @@ -3,6 +3,7 @@ from __future__ import annotations import base64 +import contextlib import logging import os import re @@ -32,6 +33,15 @@ logger = logging.getLogger(__name__) +with contextlib.suppress(ValueError, KeyError): + # Delete entry from babelfish, if it was defined + language_converters.internal_converters.remove( + 'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter' + ) + del language_converters.converters['opensubtitles'] + # Register subliminal version + language_converters.register('opensubtitles = subliminal.converters.opensubtitles:OpenSubtitlesConverter') + class OpenSubtitlesSubtitle(Subtitle): """OpenSubtitles Subtitle.""" diff --git a/tests/providers/test_addic7ed.py b/tests/providers/test_addic7ed.py index e49132ab..42016f8e 100644 --- a/tests/providers/test_addic7ed.py +++ b/tests/providers/test_addic7ed.py @@ -40,7 +40,7 @@ def test_converter_convert_alpha3_name_converter(): @pytest.mark.converter() def test_converter_reverse(): - assert language_converters['addic7ed'].reverse('Chinese (Traditional)') == ('zho',) + assert language_converters['addic7ed'].reverse('Chinese (Traditional)') == ('zho', None, None) @pytest.mark.converter() diff --git a/tests/providers/test_tvsubtitles.py b/tests/providers/test_tvsubtitles.py index 3e1d716c..7e68fea7 100644 --- a/tests/providers/test_tvsubtitles.py +++ b/tests/providers/test_tvsubtitles.py @@ -31,7 +31,7 @@ def test_converter_convert_alpha3_alpha2_converter(): @pytest.mark.converter() def test_converter_reverse(): - assert language_converters['tvsubtitles'].reverse('gr') == ('ell',) + assert language_converters['tvsubtitles'].reverse('gr') == ('ell', None, None) @pytest.mark.converter()