Skip to content

Commit

Permalink
Merge pull request #80 from openzim/metadata_fixes
Browse files Browse the repository at this point in the history
Metadata fixes and enhancements
  • Loading branch information
rgaudin authored Jan 16, 2024
2 parents 72493a8 + edaef40 commit cb214f4
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 33 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Language metadata can be customized (#77)
- New html option in coverage report

### Fixed
- Name metadadata is not set correctly (#76)
- Default publisher is not correctly spelled (#78)
- Adapt to hatchling v1.19.0 which mandates packages setting (#79)
- Small fixes in invoke tasks

### Changed
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "hatchling.build"
name = "kolibri2zim"
authors = [{ name = "Kiwix", email = "[email protected]" }]
keywords = ["kiwix", "zim", "offline", "kolibri"]
requires-python = ">=3.11"
requires-python = ">=3.11,<3.12"
description = "Make ZIM file from Kolibri Channels"
readme = "README.md"
license = { text = "GPL-3.0-or-later" }
Expand Down Expand Up @@ -68,6 +68,9 @@ exclude = ["/.github"]
path = "hatch_build.py"
dependencies = ["zimscraperlib==3.1.1"]

[tool.hatch.build.targets.wheel]
packages = ["src/kolibri2zim"]

[tool.hatch.envs.default]
features = ["dev"]

Expand Down
18 changes: 0 additions & 18 deletions src/kolibri2zim/__main__.py

This file was deleted.

20 changes: 15 additions & 5 deletions src/kolibri2zim/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
import sys

from kolibri2zim.constants import NAME, SCRAPER, Global, get_logger, set_debug
from kolibri2zim.scraper import Kolibri2Zim


def main():
def parse_args(raw_args):
parser = argparse.ArgumentParser(
prog=NAME,
description="Scraper to create ZIM files from Kolibri channels",
Expand Down Expand Up @@ -37,6 +38,13 @@ def main():
required=True,
)

parser.add_argument(
"--lang",
help="ZIM Language, used in metadata (should be a ISO-639-3 language code). "
"If unspecified, scraper will use 'eng'",
default="eng",
)

parser.add_argument(
"--title",
help="Custom title for your ZIM. Kolibri channel name otherwise",
Expand Down Expand Up @@ -79,7 +87,8 @@ def main():
)

parser.add_argument(
"--publisher", help="Custom publisher name (ZIM metadata). “OpenZIM” otherwise"
"--publisher",
help="Custom publisher name (ZIM metadata). “openZIM” otherwise",
)

parser.add_argument(
Expand Down Expand Up @@ -192,13 +201,14 @@ def main():
action="version",
version=SCRAPER,
)
return parser.parse_args(raw_args)

args = parser.parse_args()

def main():
args = parse_args(sys.argv[1:])
set_debug(args.debug)
logger = get_logger()

from kolibri2zim.scraper import Kolibri2Zim

try:
scraper = Kolibri2Zim(**dict(args._get_kwargs()))
sys.exit(scraper.run())
Expand Down
8 changes: 5 additions & 3 deletions src/kolibri2zim/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def go(option):
self.author = go("creator")
self.publisher = go("publisher")
self.name = go("name")
self.language = go("lang")

# customization
self.favicon = go("favicon")
Expand Down Expand Up @@ -784,6 +785,7 @@ def run(self):
f" description: {self.description}\n"
f" creator: {self.author}\n"
f" publisher: {self.publisher}\n"
f" language: {self.language}\n"
f" tags: {';'.join(self.tags)}"
)

Expand Down Expand Up @@ -815,8 +817,8 @@ def run(self):
ignore_duplicates=True,
)
self.creator.config_metadata(
Name=self.clean_fname,
Language="eng",
Name=self.name, # pyright: ignore reportGeneralTypeIssues
Language=self.language, # pyright: ignore reportGeneralTypeIssues
Title=self.title,
Description=self.description,
LongDescription=self.long_description,
Expand Down Expand Up @@ -966,7 +968,7 @@ def sanitize_inputs(self):
self.author = self.author.strip()

if not self.publisher:
self.publisher = "Openzim"
self.publisher = "openZIM"
self.publisher = self.publisher.strip()

self.tags = list({*self.tags, "_category:other", "kolibri", "_videos:yes"})
Expand Down
31 changes: 25 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@
from kolibri2zim.scraper import Kolibri2Zim, KolibriDB
from kolibri2zim.scraper import options as expected_options_keys

CHANNEL_NAME = "channel_name"
CHANNEL_DESCRIPTION = "a description"


class FakeDb(KolibriDB):
def __init__(
Expand All @@ -30,10 +27,32 @@ def get_channel_metadata(self, _):


@pytest.fixture()
def scraper_generator() -> Generator[Callable[..., Kolibri2Zim], None, None]:
def channel_name() -> Generator[str, None, None]:
yield "channel name"


@pytest.fixture()
def channel_description() -> Generator[str, None, None]:
yield "channel description"


@pytest.fixture()
def channel_author() -> Generator[str, None, None]:
yield "channel author"


@pytest.fixture()
def zim_name() -> Generator[str, None, None]:
yield "a_name"


@pytest.fixture()
def scraper_generator(
channel_name, channel_description
) -> Generator[Callable[..., Kolibri2Zim], None, None]:
def _scraper(
channel_name: str = CHANNEL_NAME,
channel_description: str = CHANNEL_DESCRIPTION,
channel_name: str = channel_name,
channel_description: str = channel_description,
channel_author: str | None = None,
additional_options: dict[str, Any] | None = None,
) -> Kolibri2Zim:
Expand Down
31 changes: 31 additions & 0 deletions tests/test_sanitize_inputs.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import random
import re
import string
from collections.abc import Callable

import pytest
from conftest import FakeDb
from zimscraperlib.constants import MAXIMUM_DESCRIPTION_METADATA_LENGTH as MAX_DESC_LEN
from zimscraperlib.constants import (
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH as MAX_LONG_DESC_LEN,
)

from kolibri2zim.entrypoint import parse_args
from kolibri2zim.scraper import Kolibri2Zim


Expand Down Expand Up @@ -191,3 +194,31 @@ def test_description(

assert scraper.description == expected_description
assert scraper.long_description == expected_long_description


def test_no_required_args():
with pytest.raises(expected_exception=SystemExit):
parse_args([])


def test_defaults_args(channel_name, channel_description, channel_author, zim_name):
args = parse_args(["--name", zim_name])
scraper = Kolibri2Zim(**dict(args._get_kwargs()))
scraper.db = FakeDb(
channel_name=channel_name,
channel_description=channel_description,
channel_author=channel_author,
)
scraper.sanitize_inputs()
assert scraper.language == "eng"
assert scraper.publisher == "openZIM"
assert scraper.author == channel_author
assert scraper.title == channel_name
assert scraper.description == channel_description
assert scraper.name == zim_name
assert re.match(
pattern=f"{zim_name}_\\d{{4}}-\\d{{2}}\\.zim", string=scraper.clean_fname
)
# We compare sets because ordering does not matter
assert set(scraper.tags) == {"_category:other", "kolibri", "_videos:yes"}
assert len(scraper.tags) == 3

0 comments on commit cb214f4

Please sign in to comment.