From 664211bdb6b70454aa1de361215668c7bccea7f4 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 5 Jul 2024 21:03:05 +0200 Subject: [PATCH 01/24] feat: WiP reimplement mimeparse --- docs/api/util.rst | 1 + falcon/__init__.py | 1 + falcon/util/mediatypes.py | 18 +++++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/api/util.rst b/docs/api/util.rst index 2c645fede..c51115bff 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -38,6 +38,7 @@ Media types ----------- .. autofunction:: falcon.parse_header +.. autofunction:: falcon.mediatypes.best_match Async ----- diff --git a/falcon/__init__.py b/falcon/__init__.py index 745856058..7130529e0 100644 --- a/falcon/__init__.py +++ b/falcon/__init__.py @@ -76,6 +76,7 @@ from falcon.util import http_status_to_code from falcon.util import IS_64_BITS from falcon.util import is_python_func +from falcon.util import mediatypes from falcon.util import misc from falcon.util import parse_header from falcon.util import reader diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index c0dca5121..9b9044223 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -86,4 +86,20 @@ def parse_header(line: str) -> typing.Tuple[str, dict]: return _parse_header_old_stdlib(line) -__all__ = ['parse_header'] +def best_match(media_types: typing.Iterable[str], header: str) -> str: + """Choose media type with the highest quality from a list of candidates. + + Args: + media_types: An iterable over one or more Internet media types + to match against the provided header value. + header: The value of a header that conforms to the format of the + HTTP ``Accept`` header. + + Returns: + Best match from the supported candidates, or ``None`` if the provided + ``Accept`` header value does not match any of the given types. + """ + return '' + + +__all__ = ['best_match', 'parse_header'] From 34f5f4b68b5ef2d6cccf37a7a71c547502b0808a Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Wed, 28 Aug 2024 19:16:52 +0200 Subject: [PATCH 02/24] feat(mediatypes): add some skeletons for mediatype parsing --- docs/api/util.rst | 1 + docs/ext/rfc.py | 6 ++--- falcon/util/mediatypes.py | 47 +++++++++++++++++++++++++++++++++------ 3 files changed, 44 insertions(+), 10 deletions(-) diff --git a/docs/api/util.rst b/docs/api/util.rst index 86a91e92c..86002ffdb 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -38,6 +38,7 @@ Media types ----------- .. autofunction:: falcon.parse_header +.. autofunction:: falcon.mediatypes.quality .. autofunction:: falcon.mediatypes.best_match Async diff --git a/docs/ext/rfc.py b/docs/ext/rfc.py index abe13499b..fe9afe601 100644 --- a/docs/ext/rfc.py +++ b/docs/ext/rfc.py @@ -22,6 +22,7 @@ import re +IETF_DOCS = 'https://datatracker.ietf.org/doc/html' RFC_PATTERN = re.compile(r'RFC (\d{4}), Section ([\d\.]+)') @@ -39,11 +40,10 @@ def _process_line(line): section = m.group(2) template = ( - '`RFC {rfc}, Section {section} ' - '`_' + '`RFC {rfc}, Section {section} ' '<{ietf_docs}/rfc{rfc}#section-{section}>`__' ) - rendered_text = template.format(rfc=rfc, section=section) + rendered_text = template.format(rfc=rfc, section=section, ietf_docs=IETF_DOCS) return line[: m.start()] + rendered_text + line[m.end() :] diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 7dc4c69bd..ef425d1cc 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -14,9 +14,10 @@ """Media (aka MIME) type parsing and matching utilities.""" +import functools import typing -__all__ = ('parse_header',) +__all__ = ('best_match', 'parse_header', 'quality') def _parse_param_old_stdlib(s): # type: ignore @@ -88,7 +89,41 @@ def parse_header(line: str) -> typing.Tuple[str, dict]: return _parse_header_old_stdlib(line) -def best_match(media_types: typing.Iterable[str], header: str) -> str: +class _MediaRange(tuple): + @classmethod + def parse(cls, media_range): + pass + + def matches(self, media_type): + pass + + +@functools.lru_cache() +def _parse_media_ranges(header): + return tuple(_MediaRange.parse(media_range) for media_range in header.split(',')) + + +@functools.lru_cache() +def quality(media_type: str, header: str) -> float: + """Get quality of the most specific matching media range. + + Media-ranges are parsed from the provided `header` value according to + RFC 9110, Section 12.5.1 (The ``Accept`` header). + + Args: + media_type: The Internet media type to match against the provided + HTTP ``Accept`` header value. + header: The value of a header that conforms to the format of the + HTTP ``Accept`` header. + + Returns: + Quality of the most specific media range matching the provided + `media_type`. + """ + return 0.0 + + +def best_match(media_types: typing.Iterable[str], header: str) -> typing.Optional[str]: """Choose media type with the highest quality from a list of candidates. Args: @@ -99,9 +134,7 @@ def best_match(media_types: typing.Iterable[str], header: str) -> str: Returns: Best match from the supported candidates, or ``None`` if the provided - ``Accept`` header value does not match any of the given types. + header value does not match any of the given types. """ - return '' - - -__all__ = ['best_match', 'parse_header'] + # media_ranges = _parse_media_ranges(header) + return None From 1ff911d8fbceb383a3864e93eed36c08f11c0e18 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Wed, 4 Sep 2024 20:33:50 +0200 Subject: [PATCH 03/24] chore: fix up after master merge --- falcon/util/mediatypes.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 56c339337..1857c8f20 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -14,12 +14,10 @@ """Media (aka MIME) type parsing and matching utilities.""" -import functools - - from __future__ import annotations -from typing import Dict, Iterator, Tuple +import functools +from typing import Dict, Iterable, Iterator, Optional, Tuple __all__ = ('best_match', 'parse_header', 'quality') @@ -127,7 +125,7 @@ def quality(media_type: str, header: str) -> float: return 0.0 -def best_match(media_types: typing.Iterable[str], header: str) -> typing.Optional[str]: +def best_match(media_types: Iterable[str], header: str) -> Optional[str]: """Choose media type with the highest quality from a list of candidates. Args: From 4243b7bc2de0c48eb861472714e045884654502e Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 28 Sep 2024 23:43:38 +0200 Subject: [PATCH 04/24] feat(mimeparse): wip doodles --- falcon/util/mediatypes.py | 39 +++++++++++++++++++- tests/test_mediatypes.py | 77 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 2 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 1857c8f20..70b17f7cb 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -91,15 +91,48 @@ def parse_header(line: str) -> Tuple[str, Dict[str, str]]: return _parse_header_old_stdlib(line) +class _MediaType: + main_type: str + subtype: str + params: dict + + __slots__ = ('main_type', 'subtype', 'params') + + def __init__(self, main_type: str, subtype: str, params: dict) -> None: + self.main_type = main_type + self.subtype = subtype + self.params = params + + def __repr__(self) -> str: + return f'MediaType<{self.main_type}/{self.subtype}; {self.params}>' + + class _MediaRange(tuple): @classmethod def parse(cls, media_range): pass - def matches(self, media_type): + def match_score(self, media_type): pass +def _parse_media_type(media_type: str) -> _MediaType: + full_type, params = parse_header(media_type) + + # TODO(vytas): Workaround from python-mimeparse by J. Gregorio et al. + # Do we still need this in 2024? + # Java URLConnection class sends an Accept header that includes a + # single '*'. Turn it into a legal wildcard. + if full_type == '*': + full_type = '*/*' + + main_type, separator, subtype = full_type.partition('/') + if not separator: + raise ValueError('invalid media type') + + return _MediaType(main_type.strip(), subtype.strip(), params) + + @functools.lru_cache() def _parse_media_ranges(header): return tuple(_MediaRange.parse(media_range) for media_range in header.split(',')) @@ -110,7 +143,7 @@ def quality(media_type: str, header: str) -> float: """Get quality of the most specific matching media range. Media-ranges are parsed from the provided `header` value according to - RFC 9110, Section 12.5.1 (The ``Accept`` header). + RFC 9110, Section 12.5.1 (the ``Accept`` header). Args: media_type: The Internet media type to match against the provided @@ -122,6 +155,8 @@ def quality(media_type: str, header: str) -> float: Quality of the most specific media range matching the provided `media_type`. """ + # media_ranges = _parse_media_ranges(header) + return 0.0 diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index 0fae79b43..ef5d61301 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -1,3 +1,5 @@ +import itertools + import pytest from falcon.util import mediatypes @@ -39,3 +41,78 @@ ) def test_parse_header(value, expected): assert mediatypes.parse_header(value) == expected + + +_RFC_7231_EXAMPLE_ACCEPT = ( + 'text/*;q=0.3, text/html;q=0.7, text/html;level=1, ' + 'text/html;level=2;q=0.4, */*;q=0.5' +) +_RFC_7231_EXAMPLE_VALUES = [ + ('text/html;level=1', 1.0), + ('text/html', 0.7), + ('text/plain', 0.3), + ('image/jpeg', 0.5), + ('text/html;level=2', 0.4), + ('text/html;level=3', 0.7), +] + +_RFC_9110_EXAMPLE_ACCEPT = ( + 'text/*;q=0.3, text/plain;q=0.7, text/plain;format=flowed, ' + 'text/plain;format=fixed;q=0.4, */*;q=0.5' +) +# NOTE(vytas): Including the errata https://www.rfc-editor.org/errata/eid7138. +_RFC_9110_EXAMPLE_VALUES = [ + ('text/plain;format=flowed', 1), + ('text/plain', 0.7), + ('text/html', 0.3), + ('image/jpeg', 0.5), + ('text/plain;format=fixed', 0.4), + ('text/html;level=3', 0.3), +] + +_RFC_EXAMPLES = list( + itertools.chain.from_iterable( + ((accept,) + media_type_quality for media_type_quality in example_values) + for accept, example_values in ( + (_RFC_7231_EXAMPLE_ACCEPT, _RFC_7231_EXAMPLE_VALUES), + (_RFC_9110_EXAMPLE_ACCEPT, _RFC_7231_EXAMPLE_VALUES), + ) + ) +) + +_RFC_EXAMPLE_IDS = list( + itertools.chain.from_iterable( + ( + f'{rfc}-{media_type}-{quality_value}' + for media_type, quality_value in example_values + ) + for rfc, example_values in ( + ('RFC-7231', _RFC_7231_EXAMPLE_VALUES), + ('RFC-9110', _RFC_7231_EXAMPLE_VALUES), + ) + ) +) + + +@pytest.mark.parametrize( + 'accept,media_type,quality_value', _RFC_EXAMPLES, ids=_RFC_EXAMPLE_IDS +) +def test_quality_rfc_examples(accept, media_type, quality_value): + assert pytest.approx(mediatypes.quality(media_type, accept)) == quality_value + + +@pytest.mark.parametrize( + 'accept,media_type', + [ + ('application/json', 'application/yaml'), + ('audio/*; q=0.2, audio/basic', 'video/mp3'), + ( + 'falcon/peregrine; speed=high; unladen=true', + 'falcon/peregrine; speed=average', + ), + ('text/html, text/plain', 'text/x-python'), + ('*/json; q=0.2, application/json', 'application/msgpack'), + ], +) +def test_quality_none_matches(accept, media_type): + assert mediatypes.quality(media_type, accept) == 0.0 From b3150a2b225bca2b77f1e682842167130c62e911 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sun, 29 Sep 2024 23:44:41 +0200 Subject: [PATCH 05/24] feat(mediatypes): implement computation of best quality --- falcon/util/mediatypes.py | 120 +++++++++++++++++++++++++++++++------- tests/test_mediatypes.py | 36 +++++++++++- 2 files changed, 133 insertions(+), 23 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 70b17f7cb..f9410e0f4 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -91,6 +91,24 @@ def parse_header(line: str) -> Tuple[str, Dict[str, str]]: return _parse_header_old_stdlib(line) +def _parse_media_type_header(media_type: str) -> Tuple[str, str, dict]: + full_type, params = parse_header(media_type) + + # TODO(vytas): Workaround from python-mimeparse by J. Gregorio et al. + # Do we still need this in 2024? + # Java URLConnection class sends an Accept header that includes a + # single '*'. Turn it into a legal wildcard. + if full_type == '*': + full_type = '*/*' + + main_type, separator, subtype = full_type.partition('/') + if not separator: + raise ValueError('invalid media type') + + return (main_type.strip(), subtype.strip(), params) + + +# TODO(vytas): Should we make these structures public? class _MediaType: main_type: str subtype: str @@ -98,6 +116,10 @@ class _MediaType: __slots__ = ('main_type', 'subtype', 'params') + @classmethod + def parse(cls, media_type: str) -> _MediaType: + return cls(*_parse_media_type_header(media_type)) + def __init__(self, main_type: str, subtype: str, params: dict) -> None: self.main_type = main_type self.subtype = subtype @@ -107,34 +129,84 @@ def __repr__(self) -> str: return f'MediaType<{self.main_type}/{self.subtype}; {self.params}>' -class _MediaRange(tuple): - @classmethod - def parse(cls, media_range): - pass +class _MediaRange: + main_type: str + subtype: str + quality: float + params: dict - def match_score(self, media_type): - pass + __slots__ = ('main_type', 'subtype', 'quality', 'params') + _NOT_MATCHING = (-1, -1, -1, 0.0, -1) -def _parse_media_type(media_type: str) -> _MediaType: - full_type, params = parse_header(media_type) + def __init__( + self, main_type: str, subtype: str, quality: float, params: dict + ) -> None: + self.main_type = main_type + self.subtype = subtype + self.quality = quality + self.params = params - # TODO(vytas): Workaround from python-mimeparse by J. Gregorio et al. - # Do we still need this in 2024? - # Java URLConnection class sends an Accept header that includes a - # single '*'. Turn it into a legal wildcard. - if full_type == '*': - full_type = '*/*' + @classmethod + def parse(cls, media_range: str) -> _MediaRange: + main_type, subtype, params = _parse_media_type_header(media_range) + + # NOTE(vytas): We don't need to special-case Q since the above + # parse_header always lowercases parameters. + q = params.pop('q', 1.0) + + try: + quality = float(q) + except (TypeError, ValueError) as ex: + raise ValueError('invalid media range') from ex + if not (0.0 <= quality <= 1.0): + raise ValueError('q is not between 0.0 and 1.0') + + return cls(main_type, subtype, quality, params) + + def match_score( + self, media_type: _MediaType, index: int = -1 + ) -> Tuple[int, int, int, float, int]: + if self.main_type == '*' or media_type.main_type == '*': + main_matches = 0 + elif self.main_type != media_type.main_type: + return self._NOT_MATCHING + else: + main_matches = 1 + + if self.subtype == '*' or media_type.subtype == '*': + sub_matches = 0 + elif self.subtype != media_type.subtype: + return self._NOT_MATCHING + else: + sub_matches = 1 + + mr_pnames = frozenset(self.params) + mt_pnames = frozenset(media_type.params) + param_score = -len(mr_pnames.symmetric_difference(mt_pnames)) + matching = mr_pnames & mt_pnames + for pname in matching: + if self.params[pname] != media_type.params[pname]: + return self._NOT_MATCHING + param_score += len(matching) + + score = (main_matches, sub_matches, param_score, self.quality, index) + print(f'score({self}, {media_type}) -> {score}') + return (main_matches, sub_matches, param_score, self.quality, index) - main_type, separator, subtype = full_type.partition('/') - if not separator: - raise ValueError('invalid media type') + def __repr__(self) -> str: + q = self.quality + return f'MediaRange<{self.main_type}/{self.subtype}; {q=}; {self.params}>' - return _MediaType(main_type.strip(), subtype.strip(), params) + +# PERF(vytas): It is possible to cache a classmethod too, but the invocation is +# less efficient, especially in the case of a cache hit. +_parse_media_type = functools.lru_cache(_MediaType.parse) +_parse_media_range = functools.lru_cache(_MediaRange.parse) @functools.lru_cache() -def _parse_media_ranges(header): +def _parse_media_ranges(header: str) -> Tuple[_MediaRange, ...]: return tuple(_MediaRange.parse(media_range) for media_range in header.split(',')) @@ -153,11 +225,15 @@ def quality(media_type: str, header: str) -> float: Returns: Quality of the most specific media range matching the provided - `media_type`. + `media_type`. (If none matches, 0.0 is returned.) """ - # media_ranges = _parse_media_ranges(header) + parsed_media_type = _parse_media_type(media_type) + media_ranges = _parse_media_ranges(header) - return 0.0 + most_specific = max( + media_range.match_score(parsed_media_type) for media_range in media_ranges + ) + return most_specific[-2] def best_match(media_types: Iterable[str], header: str) -> Optional[str]: diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index ef5d61301..2fa59cd9b 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -43,6 +43,40 @@ def test_parse_header(value, expected): assert mediatypes.parse_header(value) == expected +def test_media_type_private_cls(): + mt1 = mediatypes._MediaType.parse('image/png') + assert mt1.main_type == 'image' + assert mt1.subtype == 'png' + assert mt1.params == {} + assert repr(mt1) == 'MediaType' + + mt2 = mediatypes._MediaType.parse('text/plain; charset=latin-1') + assert mt2.main_type == 'text' + assert mt2.subtype == 'plain' + assert mt2.params == {'charset': 'latin-1'} + + +def test_media_range_private_cls(): + mr1 = mediatypes._MediaRange.parse('image/png') + assert mr1.main_type == 'image' + assert mr1.subtype == 'png' + assert mr1.quality == 1.0 + assert mr1.params == {} + assert repr(mr1) == 'MediaRange' + + mr2 = mediatypes._MediaRange.parse('text/plain; charset=latin-1; Q=0.9') + assert mr2.main_type == 'text' + assert mr2.subtype == 'plain' + assert pytest.approx(mr2.quality) == 0.9 + assert mr2.params == {'charset': 'latin-1'} + + mr3 = mediatypes._MediaRange.parse('*; q=0.7') + assert mr3.main_type == '*' + assert mr3.subtype == '*' + assert pytest.approx(mr3.quality) == 0.7 + assert mr3.params == {} + + _RFC_7231_EXAMPLE_ACCEPT = ( 'text/*;q=0.3, text/html;q=0.7, text/html;level=1, ' 'text/html;level=2;q=0.4, */*;q=0.5' @@ -75,7 +109,7 @@ def test_parse_header(value, expected): ((accept,) + media_type_quality for media_type_quality in example_values) for accept, example_values in ( (_RFC_7231_EXAMPLE_ACCEPT, _RFC_7231_EXAMPLE_VALUES), - (_RFC_9110_EXAMPLE_ACCEPT, _RFC_7231_EXAMPLE_VALUES), + (_RFC_9110_EXAMPLE_ACCEPT, _RFC_9110_EXAMPLE_VALUES), ) ) ) From cefa80a387bcb70f440e93b40063a9a07c9b268f Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Mon, 30 Sep 2024 13:50:56 +0200 Subject: [PATCH 06/24] feat(mediatypes): remove vendored mimeparse --- falcon/app_helpers.py | 4 +- falcon/media/handlers.py | 4 +- falcon/request.py | 6 +- falcon/util/mediatypes.py | 36 +++-- falcon/vendor/__init__.py | 0 falcon/vendor/mimeparse/LICENSE | 17 --- falcon/vendor/mimeparse/__init__.py | 11 -- falcon/vendor/mimeparse/mimeparse.py | 191 --------------------------- pyproject.toml | 10 -- setup.py | 2 +- tests/test_mediatypes.py | 31 +++++ tests/test_request_media.py | 5 +- tox.ini | 13 +- 13 files changed, 66 insertions(+), 264 deletions(-) delete mode 100644 falcon/vendor/__init__.py delete mode 100755 falcon/vendor/mimeparse/LICENSE delete mode 100644 falcon/vendor/mimeparse/__init__.py delete mode 100755 falcon/vendor/mimeparse/mimeparse.py diff --git a/falcon/app_helpers.py b/falcon/app_helpers.py index e9ccba253..372b45c09 100644 --- a/falcon/app_helpers.py +++ b/falcon/app_helpers.py @@ -291,7 +291,9 @@ def default_serialize_error(req: Request, resp: Response, exception: HTTPError) resp: Instance of ``falcon.Response`` exception: Instance of ``falcon.HTTPError`` """ - preferred = req.client_prefers((MEDIA_XML, 'text/xml', MEDIA_JSON)) + # NOTE(vytas): Unlike python-mimeparse, our reimplementation returns the + # first item if all of them have the same score. + preferred = req.client_prefers((MEDIA_JSON, 'text/xml', MEDIA_XML)) if preferred is None: # NOTE(kgriffs): See if the client expects a custom media diff --git a/falcon/media/handlers.py b/falcon/media/handlers.py index 6b096ec91..b48a95d77 100644 --- a/falcon/media/handlers.py +++ b/falcon/media/handlers.py @@ -30,8 +30,8 @@ from falcon.media.multipart import MultipartFormHandler from falcon.media.multipart import MultipartParseOptions from falcon.media.urlencoded import URLEncodedFormHandler +from falcon.util import mediatypes from falcon.util import misc -from falcon.vendor import mimeparse class MissingDependencyHandler(BinaryBaseHandlerWS): @@ -186,7 +186,7 @@ def _best_match(media_type: str, all_media_types: Sequence[str]) -> Optional[str try: # NOTE(jmvrbanac): Mimeparse will return an empty string if it can # parse the media type, but cannot find a suitable type. - result = mimeparse.best_match(all_media_types, media_type) + result = mediatypes.best_match(all_media_types, media_type) except ValueError: pass diff --git a/falcon/request.py b/falcon/request.py index 59ddddab3..f79da9400 100644 --- a/falcon/request.py +++ b/falcon/request.py @@ -53,10 +53,10 @@ from falcon.typing import ReadableIO from falcon.util import deprecation from falcon.util import ETag +from falcon.util import mediatypes from falcon.util import structures from falcon.util.uri import parse_host from falcon.util.uri import parse_query_string -from falcon.vendor import mimeparse DEFAULT_ERROR_LOG_FORMAT = '{0:%Y-%m-%d %H:%M:%S} [FALCON] [ERROR] {1} {2}{3} => ' @@ -1167,7 +1167,7 @@ def client_accepts(self, media_type: str) -> bool: # Fall back to full-blown parsing try: - return mimeparse.quality(media_type, accept) != 0.0 + return mediatypes.quality(media_type, accept) != 0.0 except ValueError: return False @@ -1187,7 +1187,7 @@ def client_prefers(self, media_types: Sequence[str]) -> Optional[str]: try: # NOTE(kgriffs): best_match will return '' if no match is found - preferred_type = mimeparse.best_match(media_types, self.accept) + preferred_type = mediatypes.best_match(media_types, self.accept) except ValueError: # Value for the accept header was not formatted correctly preferred_type = '' diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index f9410e0f4..d5240196f 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -137,7 +137,7 @@ class _MediaRange: __slots__ = ('main_type', 'subtype', 'quality', 'params') - _NOT_MATCHING = (-1, -1, -1, 0.0, -1) + _NOT_MATCHING = (-1, -1, -1, 0.0) def __init__( self, main_type: str, subtype: str, quality: float, params: dict @@ -164,9 +164,7 @@ def parse(cls, media_range: str) -> _MediaRange: return cls(main_type, subtype, quality, params) - def match_score( - self, media_type: _MediaType, index: int = -1 - ) -> Tuple[int, int, int, float, int]: + def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, float]: if self.main_type == '*' or media_type.main_type == '*': main_matches = 0 elif self.main_type != media_type.main_type: @@ -190,9 +188,9 @@ def match_score( return self._NOT_MATCHING param_score += len(matching) - score = (main_matches, sub_matches, param_score, self.quality, index) + score = (main_matches, sub_matches, param_score, self.quality) print(f'score({self}, {media_type}) -> {score}') - return (main_matches, sub_matches, param_score, self.quality, index) + return (main_matches, sub_matches, param_score, self.quality) def __repr__(self) -> str: q = self.quality @@ -228,12 +226,11 @@ def quality(media_type: str, header: str) -> float: `media_type`. (If none matches, 0.0 is returned.) """ parsed_media_type = _parse_media_type(media_type) - media_ranges = _parse_media_ranges(header) - most_specific = max( - media_range.match_score(parsed_media_type) for media_range in media_ranges + media_range.match_score(parsed_media_type) + for media_range in _parse_media_ranges(header) ) - return most_specific[-2] + return most_specific[-1] def best_match(media_types: Iterable[str], header: str) -> Optional[str]: @@ -246,8 +243,19 @@ def best_match(media_types: Iterable[str], header: str) -> Optional[str]: HTTP ``Accept`` header. Returns: - Best match from the supported candidates, or ``None`` if the provided - header value does not match any of the given types. + Best match from the supported candidates, or an empty string if the + provided header value does not match any of the given types. """ - # media_ranges = _parse_media_ranges(header) - return None + for media_type in media_types: + q = quality(media_type, header) + print(f'quality{(media_type, header)} -> {q}') + + matching, best_quality = max( + ((media_type, quality(media_type, header)) for media_type in media_types), + key=lambda mt_quality: mt_quality[1], + ) + if best_quality > 0.0: + print(f'returning {matching} with q={best_quality}') + return matching + print('returning no match') + return '' diff --git a/falcon/vendor/__init__.py b/falcon/vendor/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/falcon/vendor/mimeparse/LICENSE b/falcon/vendor/mimeparse/LICENSE deleted file mode 100755 index 89de35479..000000000 --- a/falcon/vendor/mimeparse/LICENSE +++ /dev/null @@ -1,17 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/falcon/vendor/mimeparse/__init__.py b/falcon/vendor/mimeparse/__init__.py deleted file mode 100644 index 223767a7b..000000000 --- a/falcon/vendor/mimeparse/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -""" -This module wraps code from the MIT-licensed python-mimeparse project. - -The original project source code may be obtained from GitHub by visiting the -following URL: - - https://github.com/dbtsai/python-mimeparse - -""" - -from .mimeparse import * # NOQA diff --git a/falcon/vendor/mimeparse/mimeparse.py b/falcon/vendor/mimeparse/mimeparse.py deleted file mode 100755 index f96e63384..000000000 --- a/falcon/vendor/mimeparse/mimeparse.py +++ /dev/null @@ -1,191 +0,0 @@ -from falcon.util.mediatypes import parse_header - -__version__ = '1.6.0' -__author__ = 'Joe Gregorio' -__email__ = 'joe@bitworking.org' -__license__ = 'MIT License' -__credits__ = '' - - -class MimeTypeParseException(ValueError): - pass - - -def parse_mime_type(mime_type): - """Parses a mime-type into its component parts. - - Carves up a mime-type and returns a tuple of the (type, subtype, params) - where 'params' is a dictionary of all the parameters for the media range. - For example, the media range 'application/xhtml;q=0.5' would get parsed - into: - - ('application', 'xhtml', {'q', '0.5'}) - - :rtype: (str,str,dict) - """ - full_type, params = parse_header(mime_type) - # Java URLConnection class sends an Accept header that includes a - # single '*'. Turn it into a legal wildcard. - if full_type == '*': - full_type = '*/*' - - type_parts = full_type.split('/') if '/' in full_type else None - if not type_parts or len(type_parts) > 2: - raise MimeTypeParseException( - "Can't parse type \"{}\"".format(full_type)) - - (type, subtype) = type_parts - - return (type.strip(), subtype.strip(), params) - - -def parse_media_range(range): - """Parse a media-range into its component parts. - - Carves up a media range and returns a tuple of the (type, subtype, - params) where 'params' is a dictionary of all the parameters for the media - range. For example, the media range 'application/*;q=0.5' would get parsed - into: - - ('application', '*', {'q', '0.5'}) - - In addition this function also guarantees that there is a value for 'q' - in the params dictionary, filling it in with a proper default if - necessary. - - :rtype: (str,str,dict) - """ - (type, subtype, params) = parse_mime_type(range) - params.setdefault('q', params.pop('Q', None)) # q is case insensitive - try: - if not params['q'] or not 0 <= float(params['q']) <= 1: - params['q'] = '1' - except ValueError: # from float() - params['q'] = '1' - - return (type, subtype, params) - - -def quality_and_fitness_parsed(mime_type, parsed_ranges): - """Find the best match for a mime-type amongst parsed media-ranges. - - Find the best match for a given mime-type against a list of media_ranges - that have already been parsed by parse_media_range(). Returns a tuple of - the fitness value and the value of the 'q' quality parameter of the best - match, or (-1, 0) if no match was found. Just as for quality_parsed(), - 'parsed_ranges' must be a list of parsed media ranges. - - :rtype: (float,int) - """ - best_fitness = -1 - best_fit_q = 0 - (target_type, target_subtype, target_params) = \ - parse_media_range(mime_type) - - for (type, subtype, params) in parsed_ranges: - - # check if the type and the subtype match - type_match = ( - type in (target_type, '*') or - target_type == '*' - ) - subtype_match = ( - subtype in (target_subtype, '*') or - target_subtype == '*' - ) - - # if they do, assess the "fitness" of this mime_type - if type_match and subtype_match: - - # 100 points if the type matches w/o a wildcard - fitness = type == target_type and 100 or 0 - - # 10 points if the subtype matches w/o a wildcard - fitness += subtype == target_subtype and 10 or 0 - - # 1 bonus point for each matching param besides "q" - param_matches = sum([ - 1 for (key, value) in target_params.items() - if key != 'q' and key in params and value == params[key] - ]) - fitness += param_matches - - # finally, add the target's "q" param (between 0 and 1) - fitness += float(target_params.get('q', 1)) - - if fitness > best_fitness: - best_fitness = fitness - best_fit_q = params['q'] - - return float(best_fit_q), best_fitness - - -def quality_parsed(mime_type, parsed_ranges): - """Find the best match for a mime-type amongst parsed media-ranges. - - Find the best match for a given mime-type against a list of media_ranges - that have already been parsed by parse_media_range(). Returns the 'q' - quality parameter of the best match, 0 if no match was found. This function - behaves the same as quality() except that 'parsed_ranges' must be a list of - parsed media ranges. - - :rtype: float - """ - - return quality_and_fitness_parsed(mime_type, parsed_ranges)[0] - - -def quality(mime_type, ranges): - """Return the quality ('q') of a mime-type against a list of media-ranges. - - Returns the quality 'q' of a mime-type when compared against the - media-ranges in ranges. For example: - - >>> quality('text/html','text/*;q=0.3, text/html;q=0.7, - text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5') - 0.7 - - :rtype: float - """ - parsed_ranges = [parse_media_range(r) for r in ranges.split(',')] - - return quality_parsed(mime_type, parsed_ranges) - - -def best_match(supported, header): - """Return mime-type with the highest quality ('q') from list of candidates. - - Takes a list of supported mime-types and finds the best match for all the - media-ranges listed in header. The value of header must be a string that - conforms to the format of the HTTP Accept: header. The value of 'supported' - is a list of mime-types. The list of supported mime-types should be sorted - in order of increasing desirability, in case of a situation where there is - a tie. - - >>> best_match(['application/xbel+xml', 'text/xml'], - 'text/*;q=0.5,*/*; q=0.1') - 'text/xml' - - :rtype: str - """ - split_header = _filter_blank(header.split(',')) - parsed_header = [parse_media_range(r) for r in split_header] - weighted_matches = [] - pos = 0 - for mime_type in supported: - weighted_matches.append(( - quality_and_fitness_parsed(mime_type, parsed_header), - pos, - mime_type - )) - pos += 1 - weighted_matches.sort() - - return weighted_matches[-1][0][0] and weighted_matches[-1][2] or '' - - -def _filter_blank(i): - """Return all non-empty items in the list.""" - for s in i: - if s.strip(): - yield s diff --git a/pyproject.toml b/pyproject.toml index ebccd4505..4fe30640a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -84,7 +84,6 @@ exclude = ["examples", "tests"] exclude = [ "falcon/bench", "falcon/cmd", - "falcon/vendor", ] disallow_untyped_defs = true warn_unused_ignores = true @@ -114,12 +113,6 @@ exclude = ["examples", "tests"] ] ignore_missing_imports = true - [[tool.mypy.overrides]] - module = [ - "falcon.vendor.*", - ] - disallow_untyped_defs = false - [tool.towncrier] package = "falcon" package_dir = "" @@ -159,7 +152,6 @@ exclude = ["examples", "tests"] target-version = ["py38"] skip-string-normalization = true line-length = 88 - extend-exclude = "falcon/vendor" [tool.blue] # NOTE(vytas): Before switching to Ruff, Falcon used the Blue formatter. @@ -167,13 +159,11 @@ exclude = ["examples", "tests"] # only minor cosmetic changes in a handful of files. target-version = ["py38"] line-length = 88 - extend-exclude = "falcon/vendor" [tool.ruff] target-version = "py38" format.quote-style = "single" line-length = 88 - extend-exclude = ["falcon/vendor"] builtins = [ "ignore", "attr", diff --git a/setup.py b/setup.py index 9751f825c..3e5ce1e99 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,6 @@ def list_modules(dirname, pattern): 'falcon.media', 'falcon.routing', 'falcon.util', - 'falcon.vendor.mimeparse', ] modules_to_exclude = [ @@ -59,6 +58,7 @@ def list_modules(dirname, pattern): # NOTE(vytas): It is pointless to cythonize reader.py, since cythonized # Falcon is using reader.pyx instead. 'falcon.hooks', + 'falcon.inspect', 'falcon.responders', 'falcon.util.reader', 'falcon.util.sync', diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index 2fa59cd9b..ef2a51c70 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -150,3 +150,34 @@ def test_quality_rfc_examples(accept, media_type, quality_value): ) def test_quality_none_matches(accept, media_type): assert mediatypes.quality(media_type, accept) == 0.0 + + +@pytest.mark.parametrize( + 'media_types,accept,expected', + [ + (['application/json'], 'application/json', 'application/json'), + (['application/json'], 'application/json; charset=utf-8', 'application/json'), + ( + ['application/json', 'application/yaml'], + 'application/json, */*; q=0.2', + 'application/json', + ), + ], +) +def test_best_match(media_types, accept, expected): + assert mediatypes.best_match(media_types, accept) == expected + + +@pytest.mark.parametrize( + 'media_types,accept', + [ + (['application/json'], 'application/yaml'), + (['application/json', 'application/yaml'], 'application/xml, text/*; q=0.7'), + ( + ['text/plain', 'falcon/peregrine; load=unladen'], + 'falcon/peregrine; load=heavy', + ), + ], +) +def test_best_match_none_matches(media_types, accept): + assert mediatypes.best_match(media_types, accept) == '' diff --git a/tests/test_request_media.py b/tests/test_request_media.py index 0edb8e4eb..e61b77a4f 100644 --- a/tests/test_request_media.py +++ b/tests/test_request_media.py @@ -89,8 +89,9 @@ async def on_post(self, req, resp, **kwargs): def test_json(client, media_type): expected_body = b'{"something": true}' headers = {'Content-Type': media_type} - client.simulate_post('/', body=expected_body, headers=headers) - + resp = client.simulate_post('/', body=expected_body, headers=headers) + print(resp.text) + assert resp.status_code == 200 media = client.resource.captured_req_media assert media is not None assert media.get('something') is True diff --git a/tox.ini b/tox.ini index 0b96f9d77..5c0222e81 100644 --- a/tox.ini +++ b/tox.ini @@ -281,7 +281,7 @@ commands = ruff format . [] [testenv:pep8-docstrings] deps = ruff commands = ruff check \ - --exclude=.ecosystem,.eggs,.git,.tox,.venv,build,dist,docs,examples,tests,falcon/vendor,falcon/bench/nuts \ + --exclude=.ecosystem,.eggs,.git,.tox,.venv,build,dist,docs,examples,tests,falcon/bench/nuts \ --select=D205,D212,D400,D401,D403,D404 \ [] @@ -343,16 +343,6 @@ basepython = pypy3 deps = -r{toxinidir}/requirements/bench commands = falcon-bench [] -# -------------------------------------------------------------------- -# Check for new versions of vendored packages -# -------------------------------------------------------------------- - -[testenv:check_vendored] -basepython = python3.10 -allowlist_externals = {toxinidir}/tools/check-vendored.sh -deps = -commands = {toxinidir}/tools/check-vendored.sh - # -------------------------------------------------------------------- # Package sanity check with twine # -------------------------------------------------------------------- @@ -485,4 +475,3 @@ deps = -r{toxinidir}/requirements/e2e commands = pytest {toxinidir}/e2e-tests/ --browser=firefox - From 379998f565fed6a9851a32db1acacfa822cb38ca Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Mon, 30 Sep 2024 16:23:41 +0200 Subject: [PATCH 07/24] docs: add a newsfragment for one of the issues --- docs/_newsfragments/864.breakingchange.rst | 23 ++++++++++++++++++++++ docs/api/util.rst | 2 ++ 2 files changed, 25 insertions(+) create mode 100644 docs/_newsfragments/864.breakingchange.rst diff --git a/docs/_newsfragments/864.breakingchange.rst b/docs/_newsfragments/864.breakingchange.rst new file mode 100644 index 000000000..5f4d5aaef --- /dev/null +++ b/docs/_newsfragments/864.breakingchange.rst @@ -0,0 +1,23 @@ +Falcon is no longer vendoring the +`python-mimeparse `_ library; +the relevant functionality has instead been reimplemented in the framework +itself, fixing a handful of long-standing bugs in the new implementation. + +The following new behaviors are considered breaking changes: + +* Previously, the iterable passed to + :meth:`req.client_prefers ` had be sorted in + the order of increasing desirability. + :func:`~falcon.mediatypes.best_match`, and by proxy + :meth:`~falcon.Request.client_prefers`, now consider the provided media types + to be sorted in the (more intuitive, we hope) order of decreasing + desirability. + +* Unlike ``python-mimeparse``, the new + :ref:`media type utilities ` consider media types with + different values for the same parameters as non-matching. + +The new functions, +:func:`falcon.mediatypes.quality` and :func:`falcon.mediatypes.best_match`, +otherwise have the same signature as the corresponding methods from +``python-mimeparse``. diff --git a/docs/api/util.rst b/docs/api/util.rst index 580afeee2..254b4cb60 100644 --- a/docs/api/util.rst +++ b/docs/api/util.rst @@ -33,6 +33,8 @@ HTTP Status .. autofunction:: falcon.http_status_to_code .. autofunction:: falcon.code_to_http_status +.. _mediatype_util: + Media types ----------- From aa138b086046c913693c0147fe84a6a669e03657 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Mon, 30 Sep 2024 21:04:57 +0200 Subject: [PATCH 08/24] refactor: remove debug `print()`s --- falcon/util/mediatypes.py | 8 -------- tests/test_request_media.py | 5 ++--- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index d5240196f..85c6d29e9 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -188,8 +188,6 @@ def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, float]: return self._NOT_MATCHING param_score += len(matching) - score = (main_matches, sub_matches, param_score, self.quality) - print(f'score({self}, {media_type}) -> {score}') return (main_matches, sub_matches, param_score, self.quality) def __repr__(self) -> str: @@ -246,16 +244,10 @@ def best_match(media_types: Iterable[str], header: str) -> Optional[str]: Best match from the supported candidates, or an empty string if the provided header value does not match any of the given types. """ - for media_type in media_types: - q = quality(media_type, header) - print(f'quality{(media_type, header)} -> {q}') - matching, best_quality = max( ((media_type, quality(media_type, header)) for media_type in media_types), key=lambda mt_quality: mt_quality[1], ) if best_quality > 0.0: - print(f'returning {matching} with q={best_quality}') return matching - print('returning no match') return '' diff --git a/tests/test_request_media.py b/tests/test_request_media.py index e61b77a4f..0edb8e4eb 100644 --- a/tests/test_request_media.py +++ b/tests/test_request_media.py @@ -89,9 +89,8 @@ async def on_post(self, req, resp, **kwargs): def test_json(client, media_type): expected_body = b'{"something": true}' headers = {'Content-Type': media_type} - resp = client.simulate_post('/', body=expected_body, headers=headers) - print(resp.text) - assert resp.status_code == 200 + client.simulate_post('/', body=expected_body, headers=headers) + media = client.resource.captured_req_media assert media is not None assert media.get('something') is True From 75bd57b05515764d43657cc6dbc7be599783bfff Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 09:15:36 +0200 Subject: [PATCH 09/24] feat(mediatypes): add specialized mediatype/range errors, coverage --- falcon/__init__.py | 5 +++++ falcon/errors.py | 10 ++++++++++ falcon/http_error.py | 7 +++---- falcon/util/mediatypes.py | 32 +++++++++++++++++++++++++++----- tests/test_mediatypes.py | 25 +++++++++++++++++++++++++ 5 files changed, 70 insertions(+), 9 deletions(-) diff --git a/falcon/__init__.py b/falcon/__init__.py index 01e4d3b57..55ea9cde4 100644 --- a/falcon/__init__.py +++ b/falcon/__init__.py @@ -84,6 +84,7 @@ 'http_status_to_code', 'IS_64_BITS', 'is_python_func', + 'mediatypes', 'misc', 'parse_header', 'reader', @@ -138,6 +139,8 @@ 'HTTPUnsupportedMediaType', 'HTTPUriTooLong', 'HTTPVersionNotSupported', + 'InvalidMediaRange', + 'InvalidMediaType', 'MediaMalformedError', 'MediaNotFoundError', 'MediaValidationError', @@ -388,6 +391,8 @@ from falcon.errors import HTTPUnsupportedMediaType from falcon.errors import HTTPUriTooLong from falcon.errors import HTTPVersionNotSupported +from falcon.errors import InvalidMediaRange +from falcon.errors import InvalidMediaType from falcon.errors import MediaMalformedError from falcon.errors import MediaNotFoundError from falcon.errors import MediaValidationError diff --git a/falcon/errors.py b/falcon/errors.py index 4acc7247d..092fff2e5 100644 --- a/falcon/errors.py +++ b/falcon/errors.py @@ -88,6 +88,8 @@ def on_get(self, req, resp): 'HTTPUnsupportedMediaType', 'HTTPUriTooLong', 'HTTPVersionNotSupported', + 'InvalidMediaRange', + 'InvalidMediaType', 'MediaMalformedError', 'MediaNotFoundError', 'MediaValidationError', @@ -111,6 +113,14 @@ class CompatibilityError(ValueError): """The given method, value, or type is not compatible.""" +class InvalidMediaType(ValueError): + """The provided media type cannot be parsed into type/subtype.""" + + +class InvalidMediaRange(InvalidMediaType): + """The media range contains an invalid media type and/or the q value.""" + + class UnsupportedScopeError(RuntimeError): """The ASGI scope type is not supported by Falcon.""" diff --git a/falcon/http_error.py b/falcon/http_error.py index b0aef8cc1..b30166dab 100644 --- a/falcon/http_error.py +++ b/falcon/http_error.py @@ -20,8 +20,7 @@ import xml.etree.ElementTree as et from falcon.constants import MEDIA_JSON -from falcon.util import code_to_http_status -from falcon.util import http_status_to_code +from falcon.util import misc from falcon.util import uri if TYPE_CHECKING: @@ -137,7 +136,7 @@ def __init__( # we'll probably switch over to making everything code-based to more # easily support HTTP/2. When that happens, should we continue to # include the reason phrase in the title? - self.title = title or code_to_http_status(status) + self.title = title or misc.code_to_http_status(status) self.description = description self.headers = headers @@ -161,7 +160,7 @@ def status_code(self) -> int: """HTTP status code normalized from the ``status`` argument passed to the initializer. """ # noqa: D205 - return http_status_to_code(self.status) + return misc.http_status_to_code(self.status) def to_dict( self, obj_type: Type[MutableMapping[str, Union[str, int, None, Link]]] = dict diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 85c6d29e9..ab5795de0 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -17,8 +17,11 @@ from __future__ import annotations import functools +import math from typing import Dict, Iterable, Iterator, Optional, Tuple +from falcon import errors + __all__ = ('best_match', 'parse_header', 'quality') @@ -103,7 +106,7 @@ def _parse_media_type_header(media_type: str) -> Tuple[str, str, dict]: main_type, separator, subtype = full_type.partition('/') if not separator: - raise ValueError('invalid media type') + raise errors.InvalidMediaType('The media type value must contain type/subtype.') return (main_type.strip(), subtype.strip(), params) @@ -139,6 +142,11 @@ class _MediaRange: _NOT_MATCHING = (-1, -1, -1, 0.0) + _Q_VALUE_ERROR_MESSAGE = ( + 'If provided, the q parameter must be a real number ' + 'in the range 0 through 1.' + ) + def __init__( self, main_type: str, subtype: str, quality: float, params: dict ) -> None: @@ -149,7 +157,12 @@ def __init__( @classmethod def parse(cls, media_range: str) -> _MediaRange: - main_type, subtype, params = _parse_media_type_header(media_range) + try: + main_type, subtype, params = _parse_media_type_header(media_range) + except errors.InvalidMediaType as ex: + raise errors.InvalidMediaRange( + 'The media range value must contain type/subtype.' + ) from ex # NOTE(vytas): We don't need to special-case Q since the above # parse_header always lowercases parameters. @@ -158,9 +171,18 @@ def parse(cls, media_range: str) -> _MediaRange: try: quality = float(q) except (TypeError, ValueError) as ex: - raise ValueError('invalid media range') from ex - if not (0.0 <= quality <= 1.0): - raise ValueError('q is not between 0.0 and 1.0') + # NOTE(vytas): RFC 9110, Section 12.4.2: + # weight = OWS ";" OWS "q=" qvalue + # qvalue = ( "0" [ "." 0*3DIGIT ] ) / ( "1" [ "." 0*3("0") ] ) + raise errors.InvalidMediaRange(cls._Q_VALUE_ERROR_MESSAGE) from ex + + if not (0.0 <= quality <= 1.0) or not math.isfinite(quality): + raise errors.InvalidMediaRange(cls._Q_VALUE_ERROR_MESSAGE) + + # NOTE(vytas): RFC 9110, Section 12.4.2 states that a sender of qvalue + # MUST NOT generate more than three digits after the decimal point, + # but we are more permissive here, and opt not to spend any extra CPU + # cycles, if we have already managed to convert the value to float. return cls(main_type, subtype, quality, params) diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index ef2a51c70..c0165fea9 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -2,6 +2,7 @@ import pytest +from falcon import errors from falcon.util import mediatypes @@ -181,3 +182,27 @@ def test_best_match(media_types, accept, expected): ) def test_best_match_none_matches(media_types, accept): assert mediatypes.best_match(media_types, accept) == '' + + +@pytest.mark.parametrize('media_type', ['', 'word document', 'text']) +def test_invalid_media_type(media_type): + with pytest.raises(errors.InvalidMediaType): + mediatypes.quality(media_type, '*/*') + + +@pytest.mark.parametrize( + 'media_range', + [ + '', + 'word document', + 'text', + 'text/plain; q=high', + '*/*; q=inf', + '*/*; q=-inf', + '*/*; q=nan', + 'application/very-important; q=1337.0', + ], +) +def test_invalid_media_range(media_range): + with pytest.raises(errors.InvalidMediaRange): + mediatypes.quality('falcon/peregrine', media_range) From 69bd926edcad4ad46c2122f6ea009fc88d7abefa Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 13:35:37 +0200 Subject: [PATCH 10/24] docs(newsfragments): add a newsfragment for #1367 --- docs/_newsfragments/1367.newandimproved.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 docs/_newsfragments/1367.newandimproved.rst diff --git a/docs/_newsfragments/1367.newandimproved.rst b/docs/_newsfragments/1367.newandimproved.rst new file mode 100644 index 000000000..e4b4e5c09 --- /dev/null +++ b/docs/_newsfragments/1367.newandimproved.rst @@ -0,0 +1,3 @@ +The new implementation of :ref:`media type utilities ` +(Falcon was using the ``python-mimeparse`` library before) now always favors +the exact media type match, if one is available. From 9fdc9ce557910a3866409593119267dbb9946fd3 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 15:51:04 +0200 Subject: [PATCH 11/24] test(mediatypes): add more tests --- falcon/asgi/app.py | 2 +- falcon/asgi/request.py | 2 +- falcon/asgi/response.py | 2 +- falcon/response.py | 2 +- tests/test_mediatypes.py | 53 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/falcon/asgi/app.py b/falcon/asgi/app.py index d28c8c49a..f3b637802 100644 --- a/falcon/asgi/app.py +++ b/falcon/asgi/app.py @@ -554,7 +554,7 @@ async def __call__( # type: ignore[override] # noqa: C901 data = resp._data if data is None and resp._media is not None: - # NOTE(kgriffs): We use a special MISSING singleton since + # NOTE(kgriffs): We use a special _UNSET singleton since # None is ambiguous (the media handler might return None). if resp._media_rendered is _UNSET: opt = resp.options diff --git a/falcon/asgi/request.py b/falcon/asgi/request.py index 8442ea8bc..ebd2e382d 100644 --- a/falcon/asgi/request.py +++ b/falcon/asgi/request.py @@ -169,7 +169,7 @@ def __init__( self.uri_template = None # PERF(vytas): Fall back to class variable(s) when unset. - # self._media = MISSING + # self._media = _UNSET # self._media_error = None # TODO(kgriffs): ASGI does not specify whether 'path' may be empty, diff --git a/falcon/asgi/response.py b/falcon/asgi/response.py index 42cc05830..1b5717444 100644 --- a/falcon/asgi/response.py +++ b/falcon/asgi/response.py @@ -207,7 +207,7 @@ async def render_body(self) -> Optional[bytes]: # type: ignore[override] data = self._data if data is None and self._media is not None: - # NOTE(kgriffs): We use a special MISSING singleton since + # NOTE(kgriffs): We use a special _UNSET singleton since # None is ambiguous (the media handler might return None). if self._media_rendered is _UNSET: if not self.content_type: diff --git a/falcon/response.py b/falcon/response.py index d468b0db3..e71a4b4c9 100644 --- a/falcon/response.py +++ b/falcon/response.py @@ -276,7 +276,7 @@ def render_body(self) -> Optional[bytes]: data = self._data if data is None and self._media is not None: - # NOTE(kgriffs): We use a special MISSING singleton since + # NOTE(kgriffs): We use a special _UNSET singleton since # None is ambiguous (the media handler might return None). if self._media_rendered is _UNSET: if not self.content_type: diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index c0165fea9..1544b512c 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -97,7 +97,7 @@ def test_media_range_private_cls(): ) # NOTE(vytas): Including the errata https://www.rfc-editor.org/errata/eid7138. _RFC_9110_EXAMPLE_VALUES = [ - ('text/plain;format=flowed', 1), + ('text/plain;format=flowed', 1.0), ('text/plain', 0.7), ('text/html', 0.3), ('image/jpeg', 0.5), @@ -136,6 +136,53 @@ def test_quality_rfc_examples(accept, media_type, quality_value): assert pytest.approx(mediatypes.quality(media_type, accept)) == quality_value +@pytest.mark.parametrize( + 'accept,media_type,quality_value', + [ + ( + 'application/*, */wildcard; q=0.7, */*; q=0.25', + 'test/wildcard; expect=pass', + 0.7, + ), + ( + 'application/*, */wildcard; q=0.7, */*; q=0.25', + 'application/wildcard; expect=pass', + 1.0, + ), + ( + 'application/*, */wildcard; q=0.7, */*; q=0.25', + 'test/wildcard; expect=pass', + 0.7, + ), + ( + 'text/x-python, text/*; q=0.33, text/plain; format=fixed', + 'text/plain; format=flowed', + 0.33, + ), + ], +) +def test_quality(accept, media_type, quality_value): + assert pytest.approx(mediatypes.quality(media_type, accept)) == quality_value + + +@pytest.mark.parametrize( + 'accept,media_type', + [ + ( + 'foo/bar, test/app; q=0.2, test/app; p=1; q=0.9, test/app;p=1;r=2', + 'test/app', + ), + ('test/app; q=0.1, test/app; p=1; q=0.2, test/app;p=1;r=2', 'test/app; p=1'), + ( + '*/app; q=0.1, simple/app; test=true; q=0.2, simple/app; color=blue', + 'simple/app; test=true', + ), + ], +) +def test_quality_prefer_exact_match(accept, media_type): + assert pytest.approx(mediatypes.quality(media_type, accept)) == 0.2 + + @pytest.mark.parametrize( 'accept,media_type', [ @@ -147,6 +194,10 @@ def test_quality_rfc_examples(accept, media_type, quality_value): ), ('text/html, text/plain', 'text/x-python'), ('*/json; q=0.2, application/json', 'application/msgpack'), + ( + 'text/x-python, image/*; q=0.33, text/plain; format=fixed', + 'text/plain; format=flowed', + ), ], ) def test_quality_none_matches(accept, media_type): From 79129241196a4891e1b546f7450c1ef680a98a2f Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 23:51:34 +0200 Subject: [PATCH 12/24] feat(mediatypes): improve docstring, simplify behaviour --- falcon/util/mediatypes.py | 50 ++++++++++++++++++++++++++++++++++----- tests/test_mediatypes.py | 9 +++++++ 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index ab5795de0..1e14c9d59 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -140,7 +140,7 @@ class _MediaRange: __slots__ = ('main_type', 'subtype', 'quality', 'params') - _NOT_MATCHING = (-1, -1, -1, 0.0) + _NOT_MATCHING = (-1, -1, -1, -1, 0.0) _Q_VALUE_ERROR_MESSAGE = ( 'If provided, the q parameter must be a real number ' @@ -186,7 +186,7 @@ def parse(cls, media_range: str) -> _MediaRange: return cls(main_type, subtype, quality, params) - def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, float]: + def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, int, float]: if self.main_type == '*' or media_type.main_type == '*': main_matches = 0 elif self.main_type != media_type.main_type: @@ -203,14 +203,15 @@ def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, float]: mr_pnames = frozenset(self.params) mt_pnames = frozenset(media_type.params) - param_score = -len(mr_pnames.symmetric_difference(mt_pnames)) + + exact_match = 0 if mr_pnames.symmetric_difference(mt_pnames) else 1 + matching = mr_pnames & mt_pnames for pname in matching: if self.params[pname] != media_type.params[pname]: return self._NOT_MATCHING - param_score += len(matching) - return (main_matches, sub_matches, param_score, self.quality) + return (main_matches, sub_matches, exact_match, len(matching), self.quality) def __repr__(self) -> str: q = self.quality @@ -235,6 +236,42 @@ def quality(media_type: str, header: str) -> float: Media-ranges are parsed from the provided `header` value according to RFC 9110, Section 12.5.1 (the ``Accept`` header). + The provided `media_type` is matched against each of the parsed media + ranges, and the fitness of each match is assessed as follows + (in the decreasing priority list of criteria): + + 1. Do the main types (as in ``type/subtype``) match? + + The types must either match exactly, or as wildcard (``*``). + The matches involving a wildcard are prioritized lower. + + 2. Do the subtypes (as in ``type/subtype``) match? + + The subtypes must either match exactly, or as wildcard (``*``). + The matches involving a wildcard are prioritized lower. + + 3. Do the parameters match exactly? + + If all the parameter names and values (if any) between the media range + and media type match exactly, such match is prioritized higher than + matches involving extraneous parameters on either side. + + Note that if parameter names match, the corresponding values must also + be equal, or the provided media type is considered not to match the + media range in question at all. + + 4. The number of matching parameters. + + 5. Finally, if two or more best media range matches are equally fit + according to all of the above criteria (1) through (4), the highest + quality (i.e., the value of the ``q`` parameter) of these is returned. + + Note: + With the exception of evaluating the exact parameter match (3), the + number of extraneous parameters (i.e. where the names are only present + in the media type, or only in the media range) currently does not + influence the described specificity sort order. + Args: media_type: The Internet media type to match against the provided HTTP ``Accept`` header value. @@ -254,7 +291,8 @@ def quality(media_type: str, header: str) -> float: def best_match(media_types: Iterable[str], header: str) -> Optional[str]: - """Choose media type with the highest quality from a list of candidates. + """Choose media type with the highest :func:`quality` from a list of + candidates. Args: media_types: An iterable over one or more Internet media types diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index 1544b512c..f4d76fcc9 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -159,6 +159,15 @@ def test_quality_rfc_examples(accept, media_type, quality_value): 'text/plain; format=flowed', 0.33, ), + ( + # NOTE(vytas): Same as one of the RFC 7231 examples, just with some + # media ranges reordered. python-mimeparse fails to yield the + # correct result in this specific case. + 'text/*;q=0.3, text/html;level=1, text/html;q=0.7, ' + 'text/html;level=2;q=0.4, */*;q=0.5', + 'text/html; level=3', + 0.7, + ), ], ) def test_quality(accept, media_type, quality_value): From 2f11234724a37e0385bfc7f9cfd6aff277f78427 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 23:56:46 +0200 Subject: [PATCH 13/24] refactor(mediatypes): use a stricter type annotation --- falcon/util/mediatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 1e14c9d59..b72c52c5c 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -290,7 +290,7 @@ def quality(media_type: str, header: str) -> float: return most_specific[-1] -def best_match(media_types: Iterable[str], header: str) -> Optional[str]: +def best_match(media_types: Iterable[str], header: str) -> str: """Choose media type with the highest :func:`quality` from a list of candidates. From be2f1552fdaf8ed80c96a6447752a3cf1b9f86c3 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Tue, 1 Oct 2024 23:57:37 +0200 Subject: [PATCH 14/24] chore: remove an unused import --- falcon/util/mediatypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index b72c52c5c..c80c7817c 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -18,7 +18,7 @@ import functools import math -from typing import Dict, Iterable, Iterator, Optional, Tuple +from typing import Dict, Iterable, Iterator, Tuple from falcon import errors From 317109ce794b9f1819df336193bf6319e7494f34 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Wed, 2 Oct 2024 00:03:17 +0200 Subject: [PATCH 15/24] chore: fix docstring style violation D205 --- falcon/util/mediatypes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index c80c7817c..d9bd73450 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -291,8 +291,7 @@ def quality(media_type: str, header: str) -> float: def best_match(media_types: Iterable[str], header: str) -> str: - """Choose media type with the highest :func:`quality` from a list of - candidates. + """Choose media type with the highest :func:`quality` from a list of candidates. Args: media_types: An iterable over one or more Internet media types From 4caed9932cb423b6db8cf8ab8685fc5b7c19318f Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 4 Oct 2024 21:28:08 +0200 Subject: [PATCH 16/24] chore(docs): apply review suggestion to `docs/ext/rfc.py` Co-authored-by: Federico Caselli --- docs/ext/rfc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ext/rfc.py b/docs/ext/rfc.py index fe9afe601..3633d6ce0 100644 --- a/docs/ext/rfc.py +++ b/docs/ext/rfc.py @@ -40,7 +40,7 @@ def _process_line(line): section = m.group(2) template = ( - '`RFC {rfc}, Section {section} ' '<{ietf_docs}/rfc{rfc}#section-{section}>`__' + '`RFC {rfc}, Section {section} <{ietf_docs}/rfc{rfc}#section-{section}>`__' ) rendered_text = template.format(rfc=rfc, section=section, ietf_docs=IETF_DOCS) From 42d2ceae55baf5e6765a982a594ac3f789a5e9bc Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 4 Oct 2024 21:31:27 +0200 Subject: [PATCH 17/24] docs(newsfragments): apply review suggestion for `docs/_newsfragments/864.breakingchange.rst` Co-authored-by: Federico Caselli --- docs/_newsfragments/864.breakingchange.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_newsfragments/864.breakingchange.rst b/docs/_newsfragments/864.breakingchange.rst index 5f4d5aaef..4e6b4bd25 100644 --- a/docs/_newsfragments/864.breakingchange.rst +++ b/docs/_newsfragments/864.breakingchange.rst @@ -6,7 +6,7 @@ itself, fixing a handful of long-standing bugs in the new implementation. The following new behaviors are considered breaking changes: * Previously, the iterable passed to - :meth:`req.client_prefers ` had be sorted in + :meth:`req.client_prefers ` had to be sorted in the order of increasing desirability. :func:`~falcon.mediatypes.best_match`, and by proxy :meth:`~falcon.Request.client_prefers`, now consider the provided media types From f6e2ef34f6fb51536c299d7849db62fc806b7ae4 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 4 Oct 2024 23:43:33 +0200 Subject: [PATCH 18/24] refactor(mediatypes): address some review comments --- falcon/util/mediatypes.py | 25 ++++++++++--- tests/test_mediatypes.py | 77 ++++++++++++++++++++++++--------------- 2 files changed, 66 insertions(+), 36 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index d9bd73450..f457330f7 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -303,10 +303,23 @@ def best_match(media_types: Iterable[str], header: str) -> str: Best match from the supported candidates, or an empty string if the provided header value does not match any of the given types. """ - matching, best_quality = max( - ((media_type, quality(media_type, header)) for media_type in media_types), - key=lambda mt_quality: mt_quality[1], - ) - if best_quality > 0.0: - return matching + # PERF(vytas): Using the default parameter, i.e., max(..., default='', 0.0) + # would be much nicer than EAFP, but for some reason it is quite slow + # regardless of whether media_types is empty or not. + try: + matching, best_quality = max( + ((media_type, quality(media_type, header)) for media_type in media_types), + key=lambda mt_quality: mt_quality[1], + ) + if best_quality > 0.0: + return matching + except errors.InvalidMediaType: + # NOTE(vytas): Do not swallow instances of InvalidMediaType + # (it a subclass of ValueError). + raise + except ValueError: + # NOTE(vytas): Barring unknown bugs, we only expect unhandled + # ValueErrors from supplying an empty media_types value. + pass + return '' diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index f4d76fcc9..6b6d8af9b 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -151,8 +151,8 @@ def test_quality_rfc_examples(accept, media_type, quality_value): ), ( 'application/*, */wildcard; q=0.7, */*; q=0.25', - 'test/wildcard; expect=pass', - 0.7, + 'test/something; expect=pass', + 0.25, ), ( 'text/x-python, text/*; q=0.33, text/plain; format=fixed', @@ -192,23 +192,23 @@ def test_quality_prefer_exact_match(accept, media_type): assert pytest.approx(mediatypes.quality(media_type, accept)) == 0.2 -@pytest.mark.parametrize( - 'accept,media_type', - [ - ('application/json', 'application/yaml'), - ('audio/*; q=0.2, audio/basic', 'video/mp3'), - ( - 'falcon/peregrine; speed=high; unladen=true', - 'falcon/peregrine; speed=average', - ), - ('text/html, text/plain', 'text/x-python'), - ('*/json; q=0.2, application/json', 'application/msgpack'), - ( - 'text/x-python, image/*; q=0.33, text/plain; format=fixed', - 'text/plain; format=flowed', - ), - ], -) +_QUALITY_NONE_MATCHES_EXAMPLES = [ + ('application/json', 'application/yaml'), + ('audio/*; q=0.2, audio/basic', 'video/mp3'), + ( + 'falcon/peregrine; speed=high; unladen=true', + 'falcon/peregrine; speed=average', + ), + ('text/html, text/plain', 'text/x-python'), + ('*/json; q=0.2, application/json', 'application/msgpack'), + ( + 'text/x-python, image/*; q=0.33, text/plain; format=fixed', + 'text/plain; format=flowed', + ), +] + + +@pytest.mark.parametrize('accept,media_type', _QUALITY_NONE_MATCHES_EXAMPLES) def test_quality_none_matches(accept, media_type): assert mediatypes.quality(media_type, accept) == 0.0 @@ -229,17 +229,18 @@ def test_best_match(media_types, accept, expected): assert mediatypes.best_match(media_types, accept) == expected -@pytest.mark.parametrize( - 'media_types,accept', - [ - (['application/json'], 'application/yaml'), - (['application/json', 'application/yaml'], 'application/xml, text/*; q=0.7'), - ( - ['text/plain', 'falcon/peregrine; load=unladen'], - 'falcon/peregrine; load=heavy', - ), - ], -) +_BEST_MATCH_NONE_MATCHES_EXAMPLES = [ + ([_mt], _acc) for _mt, _acc in _QUALITY_NONE_MATCHES_EXAMPLES +] + [ + (['application/json', 'application/yaml'], 'application/xml, text/*; q=0.7'), + ( + ['text/plain', 'falcon/peregrine; load=unladen'], + 'falcon/peregrine; load=heavy', + ), +] + + +@pytest.mark.parametrize('media_types,accept', _BEST_MATCH_NONE_MATCHES_EXAMPLES) def test_best_match_none_matches(media_types, accept): assert mediatypes.best_match(media_types, accept) == '' @@ -250,6 +251,10 @@ def test_invalid_media_type(media_type): mediatypes.quality(media_type, '*/*') +def _generate_strings(items): + yield from items + + @pytest.mark.parametrize( 'media_range', [ @@ -266,3 +271,15 @@ def test_invalid_media_type(media_type): def test_invalid_media_range(media_range): with pytest.raises(errors.InvalidMediaRange): mediatypes.quality('falcon/peregrine', media_range) + + with pytest.raises(errors.InvalidMediaRange): + mediatypes.best_match(_generate_strings(['falcon/peregrine']), media_range) + + +@pytest.mark.parametrize( + 'accept', + ['*/*', 'application/xml, text/*; q=0.7, */*; q=0.3'], +) +@pytest.mark.parametrize('media_types', [(), [], {}, _generate_strings(())]) +def test_empty_media_types(accept, media_types): + assert mediatypes.best_match(media_types, accept) == '' From 6f7afc2118b89cf0fed3b27d4f47ffbbd1a6bce3 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Fri, 4 Oct 2024 23:52:01 +0200 Subject: [PATCH 19/24] perf(mediatypes): short-circuit if q is absent as per review comment --- falcon/util/mediatypes.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index f457330f7..01327d3fc 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -165,18 +165,21 @@ def parse(cls, media_range: str) -> _MediaRange: ) from ex # NOTE(vytas): We don't need to special-case Q since the above - # parse_header always lowercases parameters. - q = params.pop('q', 1.0) + # parse_header always lowercases parameter names. + + # PERF(vytas): Short-circuit if q is absent. + if 'q' not in params: + return cls(main_type, subtype, 1.0, params) try: - quality = float(q) + q = float(params.pop('q')) except (TypeError, ValueError) as ex: # NOTE(vytas): RFC 9110, Section 12.4.2: # weight = OWS ";" OWS "q=" qvalue # qvalue = ( "0" [ "." 0*3DIGIT ] ) / ( "1" [ "." 0*3("0") ] ) raise errors.InvalidMediaRange(cls._Q_VALUE_ERROR_MESSAGE) from ex - if not (0.0 <= quality <= 1.0) or not math.isfinite(quality): + if not (0.0 <= q <= 1.0) or not math.isfinite(q): raise errors.InvalidMediaRange(cls._Q_VALUE_ERROR_MESSAGE) # NOTE(vytas): RFC 9110, Section 12.4.2 states that a sender of qvalue @@ -184,7 +187,7 @@ def parse(cls, media_range: str) -> _MediaRange: # but we are more permissive here, and opt not to spend any extra CPU # cycles, if we have already managed to convert the value to float. - return cls(main_type, subtype, quality, params) + return cls(main_type, subtype, q, params) def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, int, float]: if self.main_type == '*' or media_type.main_type == '*': From 017f75ec69f27569b95adaa85c3492d824d2359e Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 5 Oct 2024 00:05:05 +0200 Subject: [PATCH 20/24] docs: explain how to mitigate a potentially breaking change --- docs/_newsfragments/864.breakingchange.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/_newsfragments/864.breakingchange.rst b/docs/_newsfragments/864.breakingchange.rst index 4e6b4bd25..d9e1c0e24 100644 --- a/docs/_newsfragments/864.breakingchange.rst +++ b/docs/_newsfragments/864.breakingchange.rst @@ -17,6 +17,14 @@ The following new behaviors are considered breaking changes: :ref:`media type utilities ` consider media types with different values for the same parameters as non-matching. + One theoretically possible scenario where this change can affect you is only + installing a :ref:`media ` handler for a content type with parameters; + it then may not match media types with conflicting values (that used to match + before Falcon 4.0). + If this turns out to be the case, also + :ref:`install the same handler ` for the generic + ``type/subtype`` without parameters. + The new functions, :func:`falcon.mediatypes.quality` and :func:`falcon.mediatypes.best_match`, otherwise have the same signature as the corresponding methods from From 94495f676965eae4a8460e46ac5df5efb9ea37f8 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 5 Oct 2024 00:24:57 +0200 Subject: [PATCH 21/24] docs: add a note that we continue to maintain python-mimeparse --- docs/_newsfragments/864.breakingchange.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/_newsfragments/864.breakingchange.rst b/docs/_newsfragments/864.breakingchange.rst index d9e1c0e24..0c518bc1c 100644 --- a/docs/_newsfragments/864.breakingchange.rst +++ b/docs/_newsfragments/864.breakingchange.rst @@ -1,8 +1,13 @@ Falcon is no longer vendoring the -`python-mimeparse `_ library; +`python-mimeparse `__ library; the relevant functionality has instead been reimplemented in the framework itself, fixing a handful of long-standing bugs in the new implementation. +If you use standalone +`python-mimeparse `__ in your +project, do not worry! We will continue to maintain it as a separate package +under the Falconry umbrella (we took over about 3 years ago). + The following new behaviors are considered breaking changes: * Previously, the iterable passed to From b0e3829254a4faa9e573ea010395dab8cffad874 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 5 Oct 2024 12:38:33 +0200 Subject: [PATCH 22/24] refactor(mediatypes): convert _MediaType and _MediaRange to dataclasses --- falcon/util/mediatypes.py | 41 ++++++++++++++------------------------- tests/test_mediatypes.py | 2 -- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 01327d3fc..125ba8f5e 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -16,6 +16,7 @@ from __future__ import annotations +import dataclasses import functools import math from typing import Dict, Iterable, Iterator, Tuple @@ -111,35 +112,29 @@ def _parse_media_type_header(media_type: str) -> Tuple[str, str, dict]: return (main_type.strip(), subtype.strip(), params) -# TODO(vytas): Should we make these structures public? +# TODO(vytas): Should we make these data structures public? + + +# PERF(vytas): It would be nice to use frozen=True as we never modify the data, +# but it seems to incur a performance hit (~2-3x) on CPython 3.12. +@dataclasses.dataclass(slots=True) class _MediaType: main_type: str subtype: str params: dict - __slots__ = ('main_type', 'subtype', 'params') - @classmethod def parse(cls, media_type: str) -> _MediaType: return cls(*_parse_media_type_header(media_type)) - def __init__(self, main_type: str, subtype: str, params: dict) -> None: - self.main_type = main_type - self.subtype = subtype - self.params = params - - def __repr__(self) -> str: - return f'MediaType<{self.main_type}/{self.subtype}; {self.params}>' - +@dataclasses.dataclass(slots=True) class _MediaRange: main_type: str subtype: str quality: float params: dict - __slots__ = ('main_type', 'subtype', 'quality', 'params') - _NOT_MATCHING = (-1, -1, -1, -1, 0.0) _Q_VALUE_ERROR_MESSAGE = ( @@ -147,14 +142,6 @@ class _MediaRange: 'in the range 0 through 1.' ) - def __init__( - self, main_type: str, subtype: str, quality: float, params: dict - ) -> None: - self.main_type = main_type - self.subtype = subtype - self.quality = quality - self.params = params - @classmethod def parse(cls, media_range: str) -> _MediaRange: try: @@ -204,10 +191,14 @@ def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, int, float else: sub_matches = 1 + # PERF(vytas): We could also use bitwise operators directly between + # params.keys(), but set()/frozenset() seem to outperform dict.keys() + # slightly regardless of the number of elements, especially when we + # reuse the same sets for both intersection and symmetric_difference. mr_pnames = frozenset(self.params) mt_pnames = frozenset(media_type.params) - exact_match = 0 if mr_pnames.symmetric_difference(mt_pnames) else 1 + exact_match = 0 if mr_pnames ^ mt_pnames else 1 matching = mr_pnames & mt_pnames for pname in matching: @@ -216,13 +207,11 @@ def match_score(self, media_type: _MediaType) -> Tuple[int, int, int, int, float return (main_matches, sub_matches, exact_match, len(matching), self.quality) - def __repr__(self) -> str: - q = self.quality - return f'MediaRange<{self.main_type}/{self.subtype}; {q=}; {self.params}>' - # PERF(vytas): It is possible to cache a classmethod too, but the invocation is # less efficient, especially in the case of a cache hit. +# NOTE(vytas): Also, if we decide to make these classes public, we either need +# to keep these cached parsers private, or to make sure we use frozen classes. _parse_media_type = functools.lru_cache(_MediaType.parse) _parse_media_range = functools.lru_cache(_MediaRange.parse) diff --git a/tests/test_mediatypes.py b/tests/test_mediatypes.py index 6b6d8af9b..7093a5175 100644 --- a/tests/test_mediatypes.py +++ b/tests/test_mediatypes.py @@ -49,7 +49,6 @@ def test_media_type_private_cls(): assert mt1.main_type == 'image' assert mt1.subtype == 'png' assert mt1.params == {} - assert repr(mt1) == 'MediaType' mt2 = mediatypes._MediaType.parse('text/plain; charset=latin-1') assert mt2.main_type == 'text' @@ -63,7 +62,6 @@ def test_media_range_private_cls(): assert mr1.subtype == 'png' assert mr1.quality == 1.0 assert mr1.params == {} - assert repr(mr1) == 'MediaRange' mr2 = mediatypes._MediaRange.parse('text/plain; charset=latin-1; Q=0.9') assert mr2.main_type == 'text' From 86186404716a5e158d6a1f82bc2c04a1573cf104 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 5 Oct 2024 12:56:28 +0200 Subject: [PATCH 23/24] fix(mediatypes): only use dataclass(slots=True) where supported (>=py310) --- falcon/util/mediatypes.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index 125ba8f5e..fa4faad17 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -19,6 +19,7 @@ import dataclasses import functools import math +import sys from typing import Dict, Iterable, Iterator, Tuple from falcon import errors @@ -114,10 +115,17 @@ def _parse_media_type_header(media_type: str) -> Tuple[str, str, dict]: # TODO(vytas): Should we make these data structures public? +# NOTE(vytas): The slots parameter requires Python 3.10. +_dataclass_with_slots = ( + dataclasses.dataclass(slots=True) + if sys.version_info >= (3, 10) + else dataclasses.dataclass() +) + # PERF(vytas): It would be nice to use frozen=True as we never modify the data, # but it seems to incur a performance hit (~2-3x) on CPython 3.12. -@dataclasses.dataclass(slots=True) +@_dataclass_with_slots class _MediaType: main_type: str subtype: str @@ -128,7 +136,7 @@ def parse(cls, media_type: str) -> _MediaType: return cls(*_parse_media_type_header(media_type)) -@dataclasses.dataclass(slots=True) +@_dataclass_with_slots class _MediaRange: main_type: str subtype: str From 532b44ea8381c909464e095488e3cebe605372d3 Mon Sep 17 00:00:00 2001 From: Vytautas Liuolia Date: Sat, 5 Oct 2024 13:12:23 +0200 Subject: [PATCH 24/24] refactor(mediatypes): a yet another attempt to make dataclasses work with __slots__ --- falcon/util/mediatypes.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/falcon/util/mediatypes.py b/falcon/util/mediatypes.py index fa4faad17..9aba2e38c 100644 --- a/falcon/util/mediatypes.py +++ b/falcon/util/mediatypes.py @@ -19,7 +19,6 @@ import dataclasses import functools import math -import sys from typing import Dict, Iterable, Iterator, Tuple from falcon import errors @@ -115,34 +114,33 @@ def _parse_media_type_header(media_type: str) -> Tuple[str, str, dict]: # TODO(vytas): Should we make these data structures public? -# NOTE(vytas): The slots parameter requires Python 3.10. -_dataclass_with_slots = ( - dataclasses.dataclass(slots=True) - if sys.version_info >= (3, 10) - else dataclasses.dataclass() -) - # PERF(vytas): It would be nice to use frozen=True as we never modify the data, # but it seems to incur a performance hit (~2-3x) on CPython 3.12. -@_dataclass_with_slots +@dataclasses.dataclass class _MediaType: main_type: str subtype: str params: dict + # NOTE(vytas): Using __slots__ with dataclasses is tricky, but it seems to + # work here since we are not using any default values in the definition. + __slots__ = ('main_type', 'subtype', 'params') + @classmethod def parse(cls, media_type: str) -> _MediaType: return cls(*_parse_media_type_header(media_type)) -@_dataclass_with_slots +@dataclasses.dataclass class _MediaRange: main_type: str subtype: str quality: float params: dict + __slots__ = ('main_type', 'subtype', 'quality', 'params') + _NOT_MATCHING = (-1, -1, -1, -1, 0.0) _Q_VALUE_ERROR_MESSAGE = (