Skip to content

Commit

Permalink
MarleySpoon: add precautionary check for unexpected API URLs. (#1069)
Browse files Browse the repository at this point in the history
  • Loading branch information
jayaddison authored May 6, 2024
1 parent b4567bf commit 94b2617
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 2 deletions.
24 changes: 22 additions & 2 deletions recipe_scrapers/marleyspoon.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# mypy: disallow_untyped_defs=False
import json
import re
from urllib.parse import urljoin

import requests

from ._abstract import HEADERS, AbstractScraper
from ._exceptions import ElementNotFoundInHtml
from ._utils import normalize_string
from ._exceptions import ElementNotFoundInHtml, RecipeScrapersExceptions
from ._utils import get_host_name, normalize_string

ID_PATTERN = re.compile(r"/(\d+)-")
SCRIPT_PATTERN = re.compile(
Expand Down Expand Up @@ -65,6 +66,25 @@ def _get_json_params(self):
if api_url is None or api_token is None:
raise ElementNotFoundInHtml("Required script not found.")

from . import SCRAPERS

scraper_name = self.__class__.__name__
try:
next_url = urljoin(self.url, api_url)
host_name = get_host_name(next_url)
next_scraper = type(None)
# check: api.foo.xx.example, foo.xx.example, xx.example
while host_name and host_name.count("."):
next_scraper = SCRAPERS.get(host_name)
if next_scraper:
break
_, host_name = host_name.split(".", 1)
if not isinstance(self, next_scraper):
msg = f"Attempted to scrape using {next_scraper} from {scraper_name}"
raise ValueError(msg)
except Exception as e:
raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}") from e

return api_url, api_token

@classmethod
Expand Down
6 changes: 6 additions & 0 deletions tests/legacy/test_data/faulty.testhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!DOCTYPE html>
<html>
<script>
gon.current_brand="test_invalid"; gon.current_country="XX"; gon.api_token=" ".trim() || null; gon.api_host="http://api.marlarkey.invalid";
</script>
</html>
6 changes: 6 additions & 0 deletions tests/legacy/test_data/relative_url.testhtml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<!DOCTYPE html>
<html>
<script>
gon.current_brand="test_invalid"; gon.current_country="XX"; gon.api_token=" ".trim() || null; gon.api_host="relative_path/unexpected.js";
</script>
</html>
41 changes: 41 additions & 0 deletions tests/legacy/test_marleyspoon_invalid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import unittest

import responses

from recipe_scrapers._exceptions import RecipeScrapersExceptions
from recipe_scrapers.marleyspoon import MarleySpoon


class TestFaultyAPIURLResponse(unittest.TestCase):

@responses.activate
def test_faulty_response(self):
url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
with open("tests/legacy/test_data/faulty.testhtml") as faulty_data:
faulty_response = faulty_data.read()

responses.add(
method=responses.GET,
url=url,
body=faulty_response,
)

with self.assertRaises(RecipeScrapersExceptions):
MarleySpoon(url=url)

@responses.activate
def test_relative_api_url(self):
url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat"
with open("tests/legacy/test_data/relative_url.testhtml") as relative_url_data:
relative_url_response = relative_url_data.read()

responses.add(
method=responses.GET,
url=url,
body=relative_url_response,
)

with self.assertRaises(Exception):
MarleySpoon(
url=url
) # currently this raises an requests.exceptions.MissingSchema exception

0 comments on commit 94b2617

Please sign in to comment.