Skip to content

Commit

Permalink
Add support for no pre_head_insert and no js_notify callback
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Oct 24, 2024
1 parent 43c2c8a commit a7003af
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 98 deletions.
13 changes: 8 additions & 5 deletions src/zimscraperlib/rewriting/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ class HtmlRewriter(HTMLParser):
def __init__(
self,
url_rewriter: ArticleUrlRewriter,
pre_head_insert: str,
pre_head_insert: str | None,
post_head_insert: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
super().__init__(convert_charrefs=False)
self.url_rewriter = url_rewriter
Expand Down Expand Up @@ -430,7 +430,7 @@ def do_attribute_rewrite(
css_rewriter: CssRewriter,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
) -> AttrNameAndValue:
"""Utility function to process all attribute rewriting rules
Expand Down Expand Up @@ -587,7 +587,7 @@ def rewrite_href_src_attributes(
attrs: AttrsList,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
"""Rewrite href and src attributes
Expand All @@ -596,7 +596,10 @@ def rewrite_href_src_attributes(
"""
if attr_name not in ("href", "src") or not attr_value:
return
if get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module":
if (
notify_js_module
and get_html_rewrite_context(tag=tag, attrs=attrs) == "js-module"
):
notify_js_module(url_rewriter.get_item_path(attr_value, base_href=base_href))
return (
attr_name,
Expand Down
11 changes: 6 additions & 5 deletions src/zimscraperlib/rewriting/js.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def __init__(
self,
url_rewriter: ArticleUrlRewriter,
base_href: str | None,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
):
super().__init__(None)
self.first_buff = self._init_local_declaration(GLOBAL_OVERRIDES)
Expand Down Expand Up @@ -298,11 +298,12 @@ def func(
m_object: re.Match[str], _opts: dict[str, Any] | None = None
) -> str:
def sub_funct(match: re.Match[str]) -> str:
self.notify_js_module(
self.url_rewriter.get_item_path(
match.group(2), base_href=self.base_href
if self.notify_js_module:
self.notify_js_module(
self.url_rewriter.get_item_path(
match.group(2), base_href=self.base_href
)
)
)
return (
f"{match.group(1)}{get_rewriten_import_url(match.group(2))}"
f"{match.group(3)}"
Expand Down
10 changes: 0 additions & 10 deletions tests/rewriting/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,6 @@
)


@pytest.fixture(scope="module")
def no_js_notify():
"""Fixture to not care about notification of detection of a JS file"""

def no_js_notify_handler(_: str):
pass

yield no_js_notify_handler


class SimpleUrlRewriter(ArticleUrlRewriter):
"""Basic URL rewriter mocking most calls"""

Expand Down
126 changes: 55 additions & 71 deletions tests/rewriting/test_html_rewriting.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,15 @@ def no_rewrite_content(request: pytest.FixtureRequest):
yield request.param


def test_no_rewrite(
no_rewrite_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_no_rewrite(no_rewrite_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{no_rewrite_content.article_url}"),
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(no_rewrite_content.input_str)
.content
Expand Down Expand Up @@ -116,17 +114,15 @@ def escaped_content(request: pytest.FixtureRequest):
yield request.param


def test_escaped_content(
escaped_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_escaped_content(escaped_content: ContentForTests):
transformed = (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{escaped_content.article_url}")
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(escaped_content.input_str)
.content
Expand Down Expand Up @@ -239,17 +235,15 @@ def js_rewrites(request: pytest.FixtureRequest):
yield request.param


def test_js_rewrites(
js_rewrites: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_js_rewrites(js_rewrites: ContentForTests):
transformed = (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{js_rewrites.article_url}")
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(js_rewrites.input_str)
.content
Expand Down Expand Up @@ -334,24 +328,24 @@ def rewrite_url(request: pytest.FixtureRequest):
yield request.param


def test_rewrite(rewrite_url: ContentForTests, no_js_notify: Callable[[ZimPath], None]):
def test_rewrite(rewrite_url: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(f"http://{rewrite_url.article_url}"),
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_url.input_str)
.content
== rewrite_url.expected_str
)


def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
def test_extract_title():
content = """<html>
<head>
<title>Page title</title>
Expand All @@ -367,25 +361,25 @@ def test_extract_title(no_js_notify: Callable[[ZimPath], None]):
article_url=HttpUrl("http://example.com"),
existing_zim_paths={ZimPath("exemple.com/a/long/path")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(content)
.title
== "Page title"
)


def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
def test_rewrite_attributes():
rewriter = HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl("http://kiwix.org/"),
existing_zim_paths={ZimPath("kiwix.org/foo")},
),
"",
"",
no_js_notify,
None,
None,
None,
)

assert (
Expand All @@ -407,13 +401,13 @@ def test_rewrite_attributes(no_js_notify: Callable[[ZimPath], None]):
)


def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
def test_rewrite_css():
output = (
HtmlRewriter(
ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/")),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(
"<style>p { /* A comment with a http://link.org/ */ "
Expand All @@ -427,7 +421,7 @@ def test_rewrite_css(no_js_notify: Callable[[ZimPath], None]):
)


def test_head_insert(no_js_notify: Callable[[ZimPath], None]):
def test_head_insert():
content = """<html>
<head>
<title>A test content</title>
Expand All @@ -439,18 +433,17 @@ def test_head_insert(no_js_notify: Callable[[ZimPath], None]):

url_rewriter = ArticleUrlRewriter(article_url=HttpUrl("http://kiwix.org/"))
assert (
HtmlRewriter(url_rewriter, "", "", no_js_notify).rewrite(content).content
== content
HtmlRewriter(url_rewriter, None, None, None).rewrite(content).content == content
)

assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", "", no_js_notify).rewrite(
assert HtmlRewriter(url_rewriter, "PRE_HEAD_INSERT", None, None).rewrite(
content
).content == content.replace("<head>", "<head>PRE_HEAD_INSERT")
assert HtmlRewriter(url_rewriter, "", "POST_HEAD_INSERT", no_js_notify).rewrite(
assert HtmlRewriter(url_rewriter, None, "POST_HEAD_INSERT", None).rewrite(
content
).content == content.replace("</head>", "POST_HEAD_INSERT</head>")
assert HtmlRewriter(
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", no_js_notify
url_rewriter, "PRE_HEAD_INSERT", "POST_HEAD_INSERT", None
).rewrite(content).content == content.replace(
"<head>", "<head>PRE_HEAD_INSERT"
).replace(
Expand Down Expand Up @@ -735,9 +728,7 @@ def rewrite_base_href_content(request):
yield request.param


def test_rewrite_base_href(
rewrite_base_href_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_rewrite_base_href(rewrite_base_href_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
Expand All @@ -750,9 +741,9 @@ def test_rewrite_base_href(
ZimPath("kiwix.org/favicon.png"),
},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_base_href_content.input_str)
.content
Expand Down Expand Up @@ -795,15 +786,13 @@ def test_rewrite_base_href(
),
],
)
def test_simple_rewrite(
input_content: str, expected_output: str, no_js_notify: Callable[[ZimPath], None]
):
def test_simple_rewrite(input_content: str, expected_output: str):
assert (
HtmlRewriter(
ArticleUrlRewriter(article_url=HttpUrl("http://example.com")),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(input_content)
.content
Expand Down Expand Up @@ -862,9 +851,7 @@ def rewrite_onxxx_content(request: pytest.FixtureRequest):
yield request.param


def test_rewrite_onxxx_event(
rewrite_onxxx_content: ContentForTests, no_js_notify: Callable[[ZimPath], None]
):
def test_rewrite_onxxx_event(rewrite_onxxx_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
Expand All @@ -877,9 +864,9 @@ def test_rewrite_onxxx_event(
ZimPath("kiwix.org/favicon.png"),
},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_onxxx_content.input_str)
.content
Expand Down Expand Up @@ -924,20 +911,17 @@ def rewrite_meta_charset_content(request: pytest.FixtureRequest):
yield request.param


def test_rewrite_meta_charset(
rewrite_meta_charset_content: ContentForTests,
no_js_notify: Callable[[ZimPath], None],
):
def test_rewrite_meta_charset(rewrite_meta_charset_content: ContentForTests):
assert (
HtmlRewriter(
ArticleUrlRewriter(
article_url=HttpUrl(
f"http://{rewrite_meta_charset_content.article_url}"
)
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_meta_charset_content.input_str)
.content
Expand All @@ -963,7 +947,6 @@ def rewrite_meta_http_equiv_redirect_full_content(request: pytest.FixtureRequest

def test_rewrite_meta_http_equiv_redirect_full(
rewrite_meta_http_equiv_redirect_full_content: ContentForTests,
no_js_notify: Callable[[ZimPath], None],
):
assert (
HtmlRewriter(
Expand All @@ -973,9 +956,9 @@ def test_rewrite_meta_http_equiv_redirect_full(
),
existing_zim_paths={ZimPath("kiwix.org/somepage")},
),
"",
"",
no_js_notify,
None,
None,
None,
)
.rewrite(rewrite_meta_http_equiv_redirect_full_content.input_str)
.content
Expand Down Expand Up @@ -1112,11 +1095,12 @@ def rewrite_tag_name(attr_name: str, attr_value: str | None) -> AttrNameAndValue
@rules.rewrite_attribute()
def rewrite_call_notify(
attr_name: str,
notify_js_module: Callable[[ZimPath], None],
notify_js_module: Callable[[ZimPath], None] | None,
) -> AttrNameAndValue | None:
if attr_name != "call_notify":
return
notify_js_module(ZimPath("foo"))
if notify_js_module:
notify_js_module(ZimPath("foo"))
return


Expand Down
Loading

0 comments on commit a7003af

Please sign in to comment.