diff --git a/recipe_scrapers/marleyspoon.py b/recipe_scrapers/marleyspoon.py index 1325a201b..d0f44ca14 100644 --- a/recipe_scrapers/marleyspoon.py +++ b/recipe_scrapers/marleyspoon.py @@ -1,12 +1,13 @@ # mypy: disallow_untyped_defs=False import json import re +from urllib.parse import urljoin import requests from ._abstract import HEADERS, AbstractScraper -from ._exceptions import ElementNotFoundInHtml -from ._utils import normalize_string +from ._exceptions import ElementNotFoundInHtml, RecipeScrapersExceptions +from ._utils import get_host_name, normalize_string ID_PATTERN = re.compile(r"/(\d+)-") SCRIPT_PATTERN = re.compile( @@ -65,6 +66,25 @@ def _get_json_params(self): if api_url is None or api_token is None: raise ElementNotFoundInHtml("Required script not found.") + from . import SCRAPERS + + scraper_name = self.__class__.__name__ + try: + next_url = urljoin(self.url, api_url) + host_name = get_host_name(next_url) + next_scraper = type(None) + # check: api.foo.xx.example, foo.xx.example, xx.example + while host_name and host_name.count("."): + next_scraper = SCRAPERS.get(host_name) + if next_scraper: + break + _, host_name = host_name.split(".", 1) + if not isinstance(self, next_scraper): + msg = f"Attempted to scrape using {next_scraper} from {scraper_name}" + raise ValueError(msg) + except Exception as e: + raise RecipeScrapersExceptions(f"Unexpected API URL: {api_url}") from e + return api_url, api_token @classmethod diff --git a/tests/legacy/test_data/faulty.testhtml b/tests/legacy/test_data/faulty.testhtml new file mode 100644 index 000000000..b0c4f2998 --- /dev/null +++ b/tests/legacy/test_data/faulty.testhtml @@ -0,0 +1,6 @@ + + + + diff --git a/tests/legacy/test_data/relative_url.testhtml b/tests/legacy/test_data/relative_url.testhtml new file mode 100644 index 000000000..16650db42 --- /dev/null +++ b/tests/legacy/test_data/relative_url.testhtml @@ -0,0 +1,6 @@ + + + + diff --git a/tests/legacy/test_marleyspoon_invalid.py b/tests/legacy/test_marleyspoon_invalid.py new file mode 100644 index 000000000..a8bde3b42 --- /dev/null +++ b/tests/legacy/test_marleyspoon_invalid.py @@ -0,0 +1,41 @@ +import unittest + +import responses + +from recipe_scrapers._exceptions import RecipeScrapersExceptions +from recipe_scrapers.marleyspoon import MarleySpoon + + +class TestFaultyAPIURLResponse(unittest.TestCase): + + @responses.activate + def test_faulty_response(self): + url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat" + with open("tests/legacy/test_data/faulty.testhtml") as faulty_data: + faulty_response = faulty_data.read() + + responses.add( + method=responses.GET, + url=url, + body=faulty_response, + ) + + with self.assertRaises(RecipeScrapersExceptions): + MarleySpoon(url=url) + + @responses.activate + def test_relative_api_url(self): + url = "https://marleyspoon.de/menu/113813-glasierte-veggie-burger-mit-roestkartoffeln-und-apfel-gurken-salat" + with open("tests/legacy/test_data/relative_url.testhtml") as relative_url_data: + relative_url_response = relative_url_data.read() + + responses.add( + method=responses.GET, + url=url, + body=relative_url_response, + ) + + with self.assertRaises(Exception): + MarleySpoon( + url=url + ) # currently this raises an requests.exceptions.MissingSchema exception