Skip to content

Commit

Permalink
Add opensubtitles converter, copied from babelfish (Diaoul#1183)
Browse files Browse the repository at this point in the history
* update converters types

* add opensubtitles converter from babelfish

* fix opensubtitles typing

* fix tests
  • Loading branch information
getzze authored Nov 4, 2024
1 parent 0c4a957 commit 3e1dc07
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 44 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ tvdb = "subliminal.refiners.tvdb:refine"

[project.entry-points."babelfish.language_converters"]
addic7ed = "subliminal.converters.addic7ed:Addic7edConverter"
opensubtitles = "subliminal.converters.opensubtitles:OpenSubtitlesConverter"
opensubtitlescom = "subliminal.converters.opensubtitlescom:OpenSubtitlesComConverter"
tvsubtitles = "subliminal.converters.tvsubtitles:TVsubtitlesConverter"

Expand Down
2 changes: 1 addition & 1 deletion subliminal/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

if TYPE_CHECKING:
# Tuple of language (alpha3, country, script), with country and script optional
LanguageTuple = tuple[str] | tuple[str, str | None] | tuple[str, str | None, str | None]
LanguageTuple = tuple[str, str | None, str | None]
54 changes: 31 additions & 23 deletions subliminal/converters/addic7ed.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,29 @@ class Addic7edConverter(LanguageReverseConverter):
def __init__(self) -> None:
self.name_converter = language_converters['name']
self.from_addic7ed: dict[str, LanguageTuple] = {
'Català': ('cat',),
'Chinese (Simplified)': ('zho',),
'Chinese (Traditional)': ('zho',),
'Euskera': ('eus',),
'Galego': ('glg',),
'Greek': ('ell',),
'Malay': ('msa',),
'Portuguese (Brazilian)': ('por', 'BR'),
'Català': ('cat', None, None),
'Chinese (Simplified)': ('zho', None, None),
'Chinese (Traditional)': ('zho', None, None),
'Euskera': ('eus', None, None),
'French (Canadian)': ('fra', 'CA', None),
'Galego': ('glg', None, None),
'Greek': ('ell', None, None),
'Malay': ('msa', None, None),
'Portuguese (Brazilian)': ('por', 'BR', None),
'Serbian (Cyrillic)': ('srp', None, 'Cyrl'),
'Serbian (Latin)': ('srp',),
'Spanish (Latin America)': ('spa',),
'Spanish (Spain)': ('spa',),
'Serbian (Latin)': ('srp', None, None),
'Spanish (Latin America)': ('spa', None, None),
'Spanish (Spain)': ('spa', None, None),
}
self.to_addic7ed: dict[LanguageTuple, str] = {
('cat',): 'Català',
('zho',): 'Chinese (Simplified)',
('eus',): 'Euskera',
('glg',): 'Galego',
('ell',): 'Greek',
('msa',): 'Malay',
('por', 'BR'): 'Portuguese (Brazilian)',
('cat', None, None): 'Català',
('zho', None, None): 'Chinese (Simplified)',
('eus', None, None): 'Euskera',
('fra', 'CA', None): 'French (Canadian)',
('glg', None, None): 'Galego',
('ell', None, None): 'Greek',
('msa', None, None): 'Malay',
('por', 'BR', None): 'Portuguese (Brazilian)',
('srp', None, 'Cyrl'): 'Serbian (Cyrillic)',
}
self.codes = self.name_converter.codes | set(self.from_addic7ed.keys())
Expand All @@ -45,16 +47,22 @@ def convert(self, alpha3: str, country: str | None = None, script: str | None =
"""Convert an alpha3 language code with an alpha2 country code and a script code into a custom code."""
if (alpha3, country, script) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country, script)]
if (alpha3, country) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country)]
if (alpha3,) in self.to_addic7ed:
return self.to_addic7ed[(alpha3,)]
if (alpha3, country, None) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, country, None)]
if (alpha3, None, None) in self.to_addic7ed:
return self.to_addic7ed[(alpha3, None, None)]

return self.name_converter.convert(alpha3, country, script) # type: ignore[no-any-return]

def reverse(self, code: str) -> LanguageTuple:
"""Reverse a custom code into alpha3, country and script code."""
if code in self.from_addic7ed:
return self.from_addic7ed[code]
ret = self.from_addic7ed[code]
if len(ret) == 1:
return (*ret, None, None)
if len(ret) == 2:
return (*ret, None)
# if len(ret) == 3:
return ret

return self.name_converter.reverse(code) # type: ignore[no-any-return]
77 changes: 77 additions & 0 deletions subliminal/converters/opensubtitles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2013 the BabelFish authors. All rights reserved.
# Use of this source code is governed by the 3-clause BSD license
# that can be found in the LICENSE file.
#
"""Language converter for OpenSubtitles."""

from __future__ import annotations

from typing import TYPE_CHECKING, cast

from babelfish import ( # type: ignore[import-untyped]
LanguageReverseConverter,
LanguageReverseError,
language_converters,
)
from babelfish.converters import CaseInsensitiveDict # type: ignore[import-untyped]

if TYPE_CHECKING:
from . import LanguageTuple


class OpenSubtitlesConverter(LanguageReverseConverter):
"""Language converter for OpenSubtitlesCom.
Originally defined in :mod:`babelfish`.
"""

codes: set[str]
to_opensubtitles: dict[LanguageTuple, str]
# from_opensubtitles: CaseInsensitiveDict[tuple[str, str | None]]
from_opensubtitles: CaseInsensitiveDict

def __init__(self) -> None:
self.alpha3b_converter = language_converters['alpha3b']
self.alpha2_converter = language_converters['alpha2']
self.to_opensubtitles = {
('por', 'BR', None): 'pob',
('gre', None, None): 'ell',
('srp', None, None): 'scc',
('srp', 'ME', None): 'mne',
('srp', None, 'Latn'): 'scc',
('srp', None, 'Cyrl'): 'scc',
('spa', 'MX', None): 'spl',
('chi', None, 'Hant'): 'zht',
('chi', 'TW', None): 'zht',
}
self.from_opensubtitles = CaseInsensitiveDict(
{
'pob': ('por', 'BR', None),
'pb': ('por', 'BR', None),
'ell': ('ell', None, None),
'scc': ('srp', None, None),
'mne': ('srp', 'ME', None),
'spl': ('spa', 'MX'),
'zht': ('zho', None, 'Hant'),
},
)
self.codes = self.alpha2_converter.codes | self.alpha3b_converter.codes | set(self.from_opensubtitles.keys())

def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str:
"""Convert an alpha3 language code with an alpha2 country code and a script code into a custom code."""
alpha3b = self.alpha3b_converter.convert(alpha3, country, script) # type: ignore[no-any-return]
if (alpha3b, country, script) in self.to_opensubtitles:
return self.to_opensubtitles[(alpha3b, country, script)]
return alpha3b # type: ignore[no-any-return]

def reverse(self, code: str) -> LanguageTuple:
"""Reverse a custom code into alpha3, country and script code."""
if code in self.from_opensubtitles:
return self.from_opensubtitles[code] # type: ignore[no-any-return]
for conv in [self.alpha3b_converter, self.alpha2_converter]:
conv = cast(LanguageReverseConverter, conv)
try:
return conv.reverse(code) # type: ignore[no-any-return]
except LanguageReverseError:
pass
raise LanguageReverseError(code)
18 changes: 10 additions & 8 deletions subliminal/converters/opensubtitlescom.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,35 @@ class OpenSubtitlesComConverter(LanguageReverseConverter):

def __init__(self) -> None:
self.alpha2_converter = language_converters['alpha2']
self.from_opensubtitlescom: dict[str, LanguageTuple] = {
self.from_opensubtitlescom: dict[str, tuple[str, str | None]] = {
'pt-br': ('por', 'BR'),
'pt-pt': ('por', 'PT'),
'zh-cn': ('zho', 'CN'),
'zh-tw': ('zho', 'TW'),
'ze': ('zho', 'US'),
'me': ('srp', 'ME'),
'sy': ('syr',),
'ma': ('mni',),
'at': ('ast',),
'sy': ('syr', None),
'ma': ('mni', None),
'at': ('ast', None),
}
self.to_opensubtitlescom: dict[tuple[str, str | None], str] = {
v: k for k, v in self.from_opensubtitlescom.items()
}
self.to_opensubtitlescom: dict[LanguageTuple, str] = {v: k for k, v in self.from_opensubtitlescom.items()}
self.codes = self.alpha2_converter.codes | set(self.from_opensubtitlescom.keys())

def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str:
"""Convert an alpha3 language code with an alpha2 country code and a script code into a custom code."""
if (alpha3, country) in self.to_opensubtitlescom:
return self.to_opensubtitlescom[(alpha3, country)]
if (alpha3,) in self.to_opensubtitlescom:
return self.to_opensubtitlescom[(alpha3,)]
if (alpha3, None) in self.to_opensubtitlescom:
return self.to_opensubtitlescom[(alpha3, None)]

return self.alpha2_converter.convert(alpha3, country, script) # type: ignore[no-any-return]

def reverse(self, code: str) -> LanguageTuple:
"""Reverse a custom code into alpha3, country and script code."""
code_lower = code.lower()
if code_lower in self.from_opensubtitlescom:
return self.from_opensubtitlescom[code_lower]
return (*self.from_opensubtitlescom[code_lower], None)

return self.alpha2_converter.reverse(code) # type: ignore[no-any-return]
20 changes: 10 additions & 10 deletions subliminal/converters/tvsubtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,29 @@ class TVsubtitlesConverter(LanguageReverseConverter):

def __init__(self) -> None:
self.alpha2_converter = language_converters['alpha2']
self.from_tvsubtitles: dict[str, LanguageTuple] = {
self.from_tvsubtitles: dict[str, tuple[str, str | None]] = {
'br': ('por', 'BR'),
'ua': ('ukr',),
'gr': ('ell',),
'cn': ('zho',),
'jp': ('jpn',),
'cz': ('ces',),
'ua': ('ukr', None),
'gr': ('ell', None),
'cn': ('zho', None),
'jp': ('jpn', None),
'cz': ('ces', None),
}
self.to_tvsubtitles: dict[LanguageTuple, str] = {v: k for k, v in self.from_tvsubtitles.items()}
self.to_tvsubtitles: dict[tuple[str, str | None], str] = {v: k for k, v in self.from_tvsubtitles.items()}
self.codes = self.alpha2_converter.codes | set(self.from_tvsubtitles.keys())

def convert(self, alpha3: str, country: str | None = None, script: str | None = None) -> str:
"""Convert an alpha3 language code with an alpha2 country code and a script code into a custom code."""
if (alpha3, country) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3, country)]
if (alpha3,) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3,)]
if (alpha3, None) in self.to_tvsubtitles:
return self.to_tvsubtitles[(alpha3, None)]

return self.alpha2_converter.convert(alpha3, country, script) # type: ignore[no-any-return]

def reverse(self, code: str) -> LanguageTuple:
"""Reverse a custom code into alpha3, country and script code."""
if code in self.from_tvsubtitles:
return self.from_tvsubtitles[code]
return (*self.from_tvsubtitles[code], None)

return self.alpha2_converter.reverse(code) # type: ignore[no-any-return]
10 changes: 10 additions & 0 deletions subliminal/providers/opensubtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import base64
import contextlib
import logging
import os
import re
Expand Down Expand Up @@ -32,6 +33,15 @@

logger = logging.getLogger(__name__)

with contextlib.suppress(ValueError, KeyError):
# Delete entry from babelfish, if it was defined
language_converters.internal_converters.remove(
'opensubtitles = babelfish.converters.opensubtitles:OpenSubtitlesConverter'
)
del language_converters.converters['opensubtitles']
# Register subliminal version
language_converters.register('opensubtitles = subliminal.converters.opensubtitles:OpenSubtitlesConverter')


class OpenSubtitlesSubtitle(Subtitle):
"""OpenSubtitles Subtitle."""
Expand Down
2 changes: 1 addition & 1 deletion tests/providers/test_addic7ed.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def test_converter_convert_alpha3_name_converter():

@pytest.mark.converter()
def test_converter_reverse():
assert language_converters['addic7ed'].reverse('Chinese (Traditional)') == ('zho',)
assert language_converters['addic7ed'].reverse('Chinese (Traditional)') == ('zho', None, None)


@pytest.mark.converter()
Expand Down
2 changes: 1 addition & 1 deletion tests/providers/test_tvsubtitles.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_converter_convert_alpha3_alpha2_converter():

@pytest.mark.converter()
def test_converter_reverse():
assert language_converters['tvsubtitles'].reverse('gr') == ('ell',)
assert language_converters['tvsubtitles'].reverse('gr') == ('ell', None, None)


@pytest.mark.converter()
Expand Down

0 comments on commit 3e1dc07

Please sign in to comment.