diff --git a/CHANGES.rst b/CHANGES.rst index 7daded2..6bc5c94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,8 @@ Changelog 5.1.1 (unreleased) ------------------ -- Nothing changed yet. +- Add catalog and search patches to limit results. + [cekk] 5.1.0 (2023-08-17) diff --git a/setup.py b/setup.py index 6e002ae..0f08427 100644 --- a/setup.py +++ b/setup.py @@ -59,7 +59,7 @@ "collective.purgebyid", "kitconcept.seo>=2.0.0", "plone.volto>=4.0.0", - "plone.restapi>=8.16.1", + "plone.restapi>=8.36.0", "Products.PortalTransforms>=3.2.0", ], extras_require={ diff --git a/src/redturtle/volto/__init__.py b/src/redturtle/volto/__init__.py index 865f8f4..be7550d 100644 --- a/src/redturtle/volto/__init__.py +++ b/src/redturtle/volto/__init__.py @@ -1,7 +1,13 @@ # -*- coding: utf-8 -*- """Init and utils.""" from plone.app.content.browser.vocabulary import PERMISSIONS +from plone.folder.nogopip import GopipIndex +from Products.ZCatalog.Catalog import Catalog +from redturtle.volto.catalogplan import Catalog_sorted_search_indexes from zope.i18nmessageid import MessageFactory +from ZTUtils.Lazy import LazyCat +from ZTUtils.Lazy import LazyMap + import logging @@ -12,3 +18,61 @@ _ = MessageFactory("redturtle.volto") PERMISSIONS["plone.app.vocabularies.Keywords"] = "View" + +# CATALOG PATCHES + +logger.info( + "install monkey patch for Products.ZCatalog.Catalog.Catalog._sorted_search_indexes #### WORK IN PROGRESS ####" +) +Catalog._orig_sorted_search_indexes = Catalog._sorted_search_indexes +Catalog._sorted_search_indexes = Catalog_sorted_search_indexes + +MAX_SORTABLE = 5000 + + +def Catalog_sortResults( + self, + rs, + sort_index, + reverse=False, + limit=None, + merge=True, + actual_result_count=None, + b_start=0, + b_size=None, +): + if MAX_SORTABLE > 0: + if actual_result_count is None: + actual_result_count = len(rs) + if actual_result_count >= MAX_SORTABLE and isinstance(sort_index, GopipIndex): + logger.warning( + "too many results %s disable GopipIndex sorting", actual_result_count + ) + switched_reverse = bool( + b_size and b_start and b_start > actual_result_count / 2 + ) + if hasattr(rs, "keys"): + sequence, slen = self._limit_sequence( + rs.keys(), actual_result_count, b_start, b_size, switched_reverse + ) + return LazyMap( + self.__getitem__, + sequence, + len(sequence), + actual_result_count=actual_result_count, + ) + else: + logger.error( + "too many results %s disable GopipIndex sorting results %s has no key", + actual_result_count, + type(rs), + ) + return LazyCat([], 0, actual_result_count) + return self._orig_sortResults( + rs, sort_index, reverse, limit, merge, actual_result_count, b_start, b_size + ) + + +logger.info("install monkey patch for Products.ZCatalog.Catalog.Catalog.sortResults") +Catalog._orig_sortResults = Catalog.sortResults +Catalog.sortResults = Catalog_sortResults diff --git a/src/redturtle/volto/catalogplan.py b/src/redturtle/volto/catalogplan.py new file mode 100644 index 0000000..142ce3c --- /dev/null +++ b/src/redturtle/volto/catalogplan.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +CUSTOM = { + "allowedRolesAndUsers": 80, + "effectiveRange": 100, +} + + +def rank_index(index, query): + if ( + index == "UID" + and isinstance(query.get("UID"), dict) # noqa + and query["UID"].get("not") # noqa + ): + return 100 + return CUSTOM.get(index, 0) + + +def Catalog_sorted_search_indexes(self, query): + return sorted( + self._orig_sorted_search_indexes(query), key=lambda i: (rank_index(i, query), i) + ) diff --git a/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml b/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml index df8fb43..5423a88 100644 --- a/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml +++ b/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml @@ -2,11 +2,12 @@ xmlns="http://namespaces.zope.org/zope" xmlns:cache="http://namespaces.zope.org/cache" xmlns:plone="http://namespaces.plone.org/plone" + xmlns:zcml="http://namespaces.zope.org/zcml" > + + + + diff --git a/src/redturtle/volto/restapi/services/querystringsearch/post.py b/src/redturtle/volto/restapi/services/querystringsearch/get.py similarity index 55% rename from src/redturtle/volto/restapi/services/querystringsearch/post.py rename to src/redturtle/volto/restapi/services/querystringsearch/get.py index 6a4459f..eda12fd 100644 --- a/src/redturtle/volto/restapi/services/querystringsearch/post.py +++ b/src/redturtle/volto/restapi/services/querystringsearch/get.py @@ -5,9 +5,15 @@ from plone.app.querystring import queryparser from plone.restapi.batching import HypermediaBatch from plone.restapi.deserializer import json_body +from plone.restapi.exceptions import DeserializationError from plone.restapi.interfaces import ISerializeToJson from plone.restapi.interfaces import ISerializeToJsonSummary -from plone.restapi.services.querystringsearch.get import QuerystringSearchPost +from plone.restapi.services import Service +from plone.restapi.services.querystringsearch.get import ( + QuerystringSearch as BaseQuerystringSearch, +) +from urllib import parse +from zExceptions import BadRequest from zope.component import getMultiAdapter import logging @@ -15,22 +21,104 @@ logger = logging.getLogger(__name__) +# default: 1000 +MAX_LIMIT = 200 -class RTQuerystringSearchPost(QuerystringSearchPost): - """ - Perform a custom search if we are searching events - """ - def reply(self): - if self.is_event_search(): +class QuerystringSearch(BaseQuerystringSearch): + def __call__(self): + try: + data = json_body(self.request) + except DeserializationError as err: + raise BadRequest(str(err)) + + query = data.get("query", None) + if self.is_event_search(query=query): return self.reply_events() - return super().reply() - def is_event_search(self): + try: + b_start = int(data.get("b_start", 0)) + except ValueError: + raise BadRequest("Invalid b_start") + try: + b_size = int(data.get("b_size", 25)) + except ValueError: + raise BadRequest("Invalid b_size") + sort_on = data.get("sort_on", None) + sort_order = data.get("sort_order", None) + + # LIMIT PATCH + if not query: + raise BadRequest("No query supplied") + limit = self.get_limit(data=data) + # END OF LIMIT PATCH + + fullobjects = bool(data.get("fullobjects", False)) + + if sort_order: + sort_order = "descending" if sort_order == "descending" else "ascending" + + querybuilder = getMultiAdapter( + (self.context, self.request), name="querybuilderresults" + ) + + querybuilder_parameters = dict( + query=query, + brains=True, + b_start=b_start, + b_size=b_size, + sort_on=sort_on, + sort_order=sort_order, + limit=limit, + ) + + # PATCH: we disable this query to boost performances on big sites + # Exclude "self" content item from the results when ZCatalog supports NOT UUID + # queries and it is called on a content object. + # if not IPloneSiteRoot.providedBy(self.context) and SUPPORT_NOT_UUID_QUERIES: + # querybuilder_parameters.update( + # dict(custom_query={"UID": {"not": self.context.UID()}}) + # ) + # END OF PATCH + + try: + results = querybuilder(**querybuilder_parameters) + except KeyError: + # This can happen if the query has an invalid operation, + # but plone.app.querystring doesn't raise an exception + # with specific info. + raise BadRequest("Invalid query.") + + results = getMultiAdapter((results, self.request), ISerializeToJson)( + fullobjects=fullobjects + ) + return results + + def get_limit(self, data): + """ + If limit is <= 0 or higher than MAX_LIMIT, set it to MAX_LIMIT + """ + try: + limit = int(data.get("limit", MAX_LIMIT)) + except ValueError: + raise BadRequest("Invalid limit") + + if "limit" in data and limit <= 0: + del data["limit"] + limit = MAX_LIMIT + if limit > MAX_LIMIT: + logger.warning( + '[wrong query] limit is too high: "{}". Set to default ({}).'.format( + data["query"], MAX_LIMIT + ) + ) + limit = MAX_LIMIT + return limit + + def is_event_search(self, query): """ Check if we need to perform a custom search with p.a.events method """ - query = json_body(self.request).get("query", []) indexes = [x["i"] for x in query] portal_type_check = False @@ -52,7 +140,7 @@ def generate_query_for_events(self): b_size = data.get("b_size", None) b_start = data.get("b_start", 0) query = {k: v for k, v in parsed_query.items() if k not in ["start", "end"]} - limit = int(data.get("limit", 1000)) + limit = int(self.get_limit(data=data)) sort = "start" sort_reverse = False start, end = self.parse_event_dates(parsed_query) @@ -167,3 +255,26 @@ def reply_events(self): results["items"].append(result) return results + + +class QuerystringSearchPost(Service): + """Copied from plone.restapi == 8.42.0""" + + def reply(self): + querystring_search = QuerystringSearch(self.context, self.request) + return querystring_search() + + +class QuerystringSearchGet(Service): + """Copied from plone.restapi == 8.42.0""" + + def reply(self): + # We need to copy the JSON query parameters from the querystring + # into the request body, because that's where other code expects to find them + self.request["BODY"] = parse.unquote( + self.request.form.get("query", "{}") + ).encode(self.request.charset) + # unset the get parameters + self.request.form = {} + querystring_search = QuerystringSearch(self.context, self.request) + return querystring_search() diff --git a/src/redturtle/volto/restapi/services/search/get.py b/src/redturtle/volto/restapi/services/search/get.py index 14a3ebc..30541cf 100644 --- a/src/redturtle/volto/restapi/services/search/get.py +++ b/src/redturtle/volto/restapi/services/search/get.py @@ -23,6 +23,8 @@ except ImportError: HAS_ADVANCEDQUERY = False +# default: 1000 +MAX_LIMIT = 200 # custom search handler @@ -119,6 +121,29 @@ def search(self, query=None): else: return super(SearchHandler, self).search(query) + def _parse_query(self, query): + query = super()._parse_query(query) + for idx in ["sort_limit", "b_size"]: + if idx not in query: + continue + value = query.get(idx, MAX_LIMIT) + if value <= 0: + logger.warning( + '[wrong query] {} is wrong: "{}". Set to default ({}).'.format( + idx, query, MAX_LIMIT + ) + ) + query[idx] = MAX_LIMIT + + if value > MAX_LIMIT: + logger.warning( + '[wrong query] {} is too high: "{}". Set to default ({}).'.format( + idx, query, MAX_LIMIT + ) + ) + query[idx] = MAX_LIMIT + return query + class SearchGet(Service): def reply(self): diff --git a/src/redturtle/volto/tests/test_catalog_limit_patches.py b/src/redturtle/volto/tests/test_catalog_limit_patches.py new file mode 100644 index 0000000..49a4f91 --- /dev/null +++ b/src/redturtle/volto/tests/test_catalog_limit_patches.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +from plone import api +from plone.app.testing import setRoles +from plone.app.testing import SITE_OWNER_NAME +from plone.app.testing import SITE_OWNER_PASSWORD +from plone.app.testing import TEST_USER_ID +from plone.restapi.testing import RelativeSession +from redturtle.volto.restapi.services.search.get import MAX_LIMIT +from redturtle.volto.testing import REDTURTLE_VOLTO_API_FUNCTIONAL_TESTING +from transaction import commit +from urllib.parse import quote + +import json +import unittest + + +class CatalogLimitPatches(unittest.TestCase): + layer = REDTURTLE_VOLTO_API_FUNCTIONAL_TESTING + + def setUp(self): + self.app = self.layer["app"] + self.portal = self.layer["portal"] + self.portal_url = self.portal.absolute_url() + + setRoles(self.portal, TEST_USER_ID, ["Manager"]) + + for i in range(210): + api.content.create( + container=self.portal, + type="Document", + title=f"Document {i}", + ) + commit() + + self.api_session = RelativeSession(self.portal_url) + self.api_session.headers.update({"Accept": "application/json"}) + self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD) + + def tearDown(self): + self.api_session.close() + + def test_search_b_size_to_200(self): + response = self.api_session.get( + "/@search", params={"portal_type": "Document", "b_size": 1000} + ) + result = response.json() + self.assertEqual(len(result["items"]), MAX_LIMIT) + + def test_querystringsearch_post_limit_200(self): + response = self.api_session.post( + "/@querystring-search", + json={ + "query": [ + { + "i": "portal_type", + "o": "plone.app.querystring.operation.selection.is", + "v": ["Document"], + } + ], + "limit": 2000, + "b_size": 2000, + }, + ) + result = response.json() + self.assertEqual(len(result["items"]), MAX_LIMIT) + self.assertEqual(result["items_total"], MAX_LIMIT) + + def test_querystringsearch_get_limit_200(self): + query = { + "query": [ + { + "i": "portal_type", + "o": "plone.app.querystring.operation.selection.any", + "v": ["Document"], + } + ], + "b_size": 2000, + "limit": 2000, + } + response = self.api_session.get( + f"/@querystring-search?query={quote(json.dumps(query))}", + ) + result = response.json() + self.assertEqual(len(result["items"]), MAX_LIMIT) + self.assertEqual(result["items_total"], MAX_LIMIT) diff --git a/test_plone52.cfg b/test_plone52.cfg index fbbd01d..977c6d4 100644 --- a/test_plone52.cfg +++ b/test_plone52.cfg @@ -14,7 +14,7 @@ plone.app.versioningbehavior = 1.4.6 plone.app.vocabularies = 4.3.0 plone.patternslib = 1.1.1 plone.rest = 2.0.0 -plone.restapi = 8.32.4 +plone.restapi = 8.42.0 plone.volto = 4.0.0 pycountry = 19.8.18 collective.purgebyid = 1.1.1