diff --git a/CHANGES.rst b/CHANGES.rst
index 7daded2..6bc5c94 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -4,7 +4,8 @@ Changelog
5.1.1 (unreleased)
------------------
-- Nothing changed yet.
+- Add catalog and search patches to limit results.
+ [cekk]
5.1.0 (2023-08-17)
diff --git a/setup.py b/setup.py
index 6e002ae..0f08427 100644
--- a/setup.py
+++ b/setup.py
@@ -59,7 +59,7 @@
"collective.purgebyid",
"kitconcept.seo>=2.0.0",
"plone.volto>=4.0.0",
- "plone.restapi>=8.16.1",
+ "plone.restapi>=8.36.0",
"Products.PortalTransforms>=3.2.0",
],
extras_require={
diff --git a/src/redturtle/volto/__init__.py b/src/redturtle/volto/__init__.py
index 865f8f4..be7550d 100644
--- a/src/redturtle/volto/__init__.py
+++ b/src/redturtle/volto/__init__.py
@@ -1,7 +1,13 @@
# -*- coding: utf-8 -*-
"""Init and utils."""
from plone.app.content.browser.vocabulary import PERMISSIONS
+from plone.folder.nogopip import GopipIndex
+from Products.ZCatalog.Catalog import Catalog
+from redturtle.volto.catalogplan import Catalog_sorted_search_indexes
from zope.i18nmessageid import MessageFactory
+from ZTUtils.Lazy import LazyCat
+from ZTUtils.Lazy import LazyMap
+
import logging
@@ -12,3 +18,61 @@
_ = MessageFactory("redturtle.volto")
PERMISSIONS["plone.app.vocabularies.Keywords"] = "View"
+
+# CATALOG PATCHES
+
+logger.info(
+ "install monkey patch for Products.ZCatalog.Catalog.Catalog._sorted_search_indexes #### WORK IN PROGRESS ####"
+)
+Catalog._orig_sorted_search_indexes = Catalog._sorted_search_indexes
+Catalog._sorted_search_indexes = Catalog_sorted_search_indexes
+
+MAX_SORTABLE = 5000
+
+
+def Catalog_sortResults(
+ self,
+ rs,
+ sort_index,
+ reverse=False,
+ limit=None,
+ merge=True,
+ actual_result_count=None,
+ b_start=0,
+ b_size=None,
+):
+ if MAX_SORTABLE > 0:
+ if actual_result_count is None:
+ actual_result_count = len(rs)
+ if actual_result_count >= MAX_SORTABLE and isinstance(sort_index, GopipIndex):
+ logger.warning(
+ "too many results %s disable GopipIndex sorting", actual_result_count
+ )
+ switched_reverse = bool(
+ b_size and b_start and b_start > actual_result_count / 2
+ )
+ if hasattr(rs, "keys"):
+ sequence, slen = self._limit_sequence(
+ rs.keys(), actual_result_count, b_start, b_size, switched_reverse
+ )
+ return LazyMap(
+ self.__getitem__,
+ sequence,
+ len(sequence),
+ actual_result_count=actual_result_count,
+ )
+ else:
+ logger.error(
+ "too many results %s disable GopipIndex sorting results %s has no key",
+ actual_result_count,
+ type(rs),
+ )
+ return LazyCat([], 0, actual_result_count)
+ return self._orig_sortResults(
+ rs, sort_index, reverse, limit, merge, actual_result_count, b_start, b_size
+ )
+
+
+logger.info("install monkey patch for Products.ZCatalog.Catalog.Catalog.sortResults")
+Catalog._orig_sortResults = Catalog.sortResults
+Catalog.sortResults = Catalog_sortResults
diff --git a/src/redturtle/volto/catalogplan.py b/src/redturtle/volto/catalogplan.py
new file mode 100644
index 0000000..142ce3c
--- /dev/null
+++ b/src/redturtle/volto/catalogplan.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+CUSTOM = {
+ "allowedRolesAndUsers": 80,
+ "effectiveRange": 100,
+}
+
+
+def rank_index(index, query):
+ if (
+ index == "UID"
+ and isinstance(query.get("UID"), dict) # noqa
+ and query["UID"].get("not") # noqa
+ ):
+ return 100
+ return CUSTOM.get(index, 0)
+
+
+def Catalog_sorted_search_indexes(self, query):
+ return sorted(
+ self._orig_sorted_search_indexes(query), key=lambda i: (rank_index(i, query), i)
+ )
diff --git a/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml b/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml
index df8fb43..5423a88 100644
--- a/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml
+++ b/src/redturtle/volto/restapi/services/querystringsearch/configure.zcml
@@ -2,11 +2,12 @@
xmlns="http://namespaces.zope.org/zope"
xmlns:cache="http://namespaces.zope.org/cache"
xmlns:plone="http://namespaces.plone.org/plone"
+ xmlns:zcml="http://namespaces.zope.org/zcml"
>
+
+
+
+
diff --git a/src/redturtle/volto/restapi/services/querystringsearch/post.py b/src/redturtle/volto/restapi/services/querystringsearch/get.py
similarity index 55%
rename from src/redturtle/volto/restapi/services/querystringsearch/post.py
rename to src/redturtle/volto/restapi/services/querystringsearch/get.py
index 6a4459f..eda12fd 100644
--- a/src/redturtle/volto/restapi/services/querystringsearch/post.py
+++ b/src/redturtle/volto/restapi/services/querystringsearch/get.py
@@ -5,9 +5,15 @@
from plone.app.querystring import queryparser
from plone.restapi.batching import HypermediaBatch
from plone.restapi.deserializer import json_body
+from plone.restapi.exceptions import DeserializationError
from plone.restapi.interfaces import ISerializeToJson
from plone.restapi.interfaces import ISerializeToJsonSummary
-from plone.restapi.services.querystringsearch.get import QuerystringSearchPost
+from plone.restapi.services import Service
+from plone.restapi.services.querystringsearch.get import (
+ QuerystringSearch as BaseQuerystringSearch,
+)
+from urllib import parse
+from zExceptions import BadRequest
from zope.component import getMultiAdapter
import logging
@@ -15,22 +21,104 @@
logger = logging.getLogger(__name__)
+# default: 1000
+MAX_LIMIT = 200
-class RTQuerystringSearchPost(QuerystringSearchPost):
- """
- Perform a custom search if we are searching events
- """
- def reply(self):
- if self.is_event_search():
+class QuerystringSearch(BaseQuerystringSearch):
+ def __call__(self):
+ try:
+ data = json_body(self.request)
+ except DeserializationError as err:
+ raise BadRequest(str(err))
+
+ query = data.get("query", None)
+ if self.is_event_search(query=query):
return self.reply_events()
- return super().reply()
- def is_event_search(self):
+ try:
+ b_start = int(data.get("b_start", 0))
+ except ValueError:
+ raise BadRequest("Invalid b_start")
+ try:
+ b_size = int(data.get("b_size", 25))
+ except ValueError:
+ raise BadRequest("Invalid b_size")
+ sort_on = data.get("sort_on", None)
+ sort_order = data.get("sort_order", None)
+
+ # LIMIT PATCH
+ if not query:
+ raise BadRequest("No query supplied")
+ limit = self.get_limit(data=data)
+ # END OF LIMIT PATCH
+
+ fullobjects = bool(data.get("fullobjects", False))
+
+ if sort_order:
+ sort_order = "descending" if sort_order == "descending" else "ascending"
+
+ querybuilder = getMultiAdapter(
+ (self.context, self.request), name="querybuilderresults"
+ )
+
+ querybuilder_parameters = dict(
+ query=query,
+ brains=True,
+ b_start=b_start,
+ b_size=b_size,
+ sort_on=sort_on,
+ sort_order=sort_order,
+ limit=limit,
+ )
+
+ # PATCH: we disable this query to boost performances on big sites
+ # Exclude "self" content item from the results when ZCatalog supports NOT UUID
+ # queries and it is called on a content object.
+ # if not IPloneSiteRoot.providedBy(self.context) and SUPPORT_NOT_UUID_QUERIES:
+ # querybuilder_parameters.update(
+ # dict(custom_query={"UID": {"not": self.context.UID()}})
+ # )
+ # END OF PATCH
+
+ try:
+ results = querybuilder(**querybuilder_parameters)
+ except KeyError:
+ # This can happen if the query has an invalid operation,
+ # but plone.app.querystring doesn't raise an exception
+ # with specific info.
+ raise BadRequest("Invalid query.")
+
+ results = getMultiAdapter((results, self.request), ISerializeToJson)(
+ fullobjects=fullobjects
+ )
+ return results
+
+ def get_limit(self, data):
+ """
+ If limit is <= 0 or higher than MAX_LIMIT, set it to MAX_LIMIT
+ """
+ try:
+ limit = int(data.get("limit", MAX_LIMIT))
+ except ValueError:
+ raise BadRequest("Invalid limit")
+
+ if "limit" in data and limit <= 0:
+ del data["limit"]
+ limit = MAX_LIMIT
+ if limit > MAX_LIMIT:
+ logger.warning(
+ '[wrong query] limit is too high: "{}". Set to default ({}).'.format(
+ data["query"], MAX_LIMIT
+ )
+ )
+ limit = MAX_LIMIT
+ return limit
+
+ def is_event_search(self, query):
"""
Check if we need to perform a custom search with p.a.events method
"""
- query = json_body(self.request).get("query", [])
indexes = [x["i"] for x in query]
portal_type_check = False
@@ -52,7 +140,7 @@ def generate_query_for_events(self):
b_size = data.get("b_size", None)
b_start = data.get("b_start", 0)
query = {k: v for k, v in parsed_query.items() if k not in ["start", "end"]}
- limit = int(data.get("limit", 1000))
+ limit = int(self.get_limit(data=data))
sort = "start"
sort_reverse = False
start, end = self.parse_event_dates(parsed_query)
@@ -167,3 +255,26 @@ def reply_events(self):
results["items"].append(result)
return results
+
+
+class QuerystringSearchPost(Service):
+ """Copied from plone.restapi == 8.42.0"""
+
+ def reply(self):
+ querystring_search = QuerystringSearch(self.context, self.request)
+ return querystring_search()
+
+
+class QuerystringSearchGet(Service):
+ """Copied from plone.restapi == 8.42.0"""
+
+ def reply(self):
+ # We need to copy the JSON query parameters from the querystring
+ # into the request body, because that's where other code expects to find them
+ self.request["BODY"] = parse.unquote(
+ self.request.form.get("query", "{}")
+ ).encode(self.request.charset)
+ # unset the get parameters
+ self.request.form = {}
+ querystring_search = QuerystringSearch(self.context, self.request)
+ return querystring_search()
diff --git a/src/redturtle/volto/restapi/services/search/get.py b/src/redturtle/volto/restapi/services/search/get.py
index 14a3ebc..30541cf 100644
--- a/src/redturtle/volto/restapi/services/search/get.py
+++ b/src/redturtle/volto/restapi/services/search/get.py
@@ -23,6 +23,8 @@
except ImportError:
HAS_ADVANCEDQUERY = False
+# default: 1000
+MAX_LIMIT = 200
# custom search handler
@@ -119,6 +121,29 @@ def search(self, query=None):
else:
return super(SearchHandler, self).search(query)
+ def _parse_query(self, query):
+ query = super()._parse_query(query)
+ for idx in ["sort_limit", "b_size"]:
+ if idx not in query:
+ continue
+ value = query.get(idx, MAX_LIMIT)
+ if value <= 0:
+ logger.warning(
+ '[wrong query] {} is wrong: "{}". Set to default ({}).'.format(
+ idx, query, MAX_LIMIT
+ )
+ )
+ query[idx] = MAX_LIMIT
+
+ if value > MAX_LIMIT:
+ logger.warning(
+ '[wrong query] {} is too high: "{}". Set to default ({}).'.format(
+ idx, query, MAX_LIMIT
+ )
+ )
+ query[idx] = MAX_LIMIT
+ return query
+
class SearchGet(Service):
def reply(self):
diff --git a/src/redturtle/volto/tests/test_catalog_limit_patches.py b/src/redturtle/volto/tests/test_catalog_limit_patches.py
new file mode 100644
index 0000000..49a4f91
--- /dev/null
+++ b/src/redturtle/volto/tests/test_catalog_limit_patches.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+from plone import api
+from plone.app.testing import setRoles
+from plone.app.testing import SITE_OWNER_NAME
+from plone.app.testing import SITE_OWNER_PASSWORD
+from plone.app.testing import TEST_USER_ID
+from plone.restapi.testing import RelativeSession
+from redturtle.volto.restapi.services.search.get import MAX_LIMIT
+from redturtle.volto.testing import REDTURTLE_VOLTO_API_FUNCTIONAL_TESTING
+from transaction import commit
+from urllib.parse import quote
+
+import json
+import unittest
+
+
+class CatalogLimitPatches(unittest.TestCase):
+ layer = REDTURTLE_VOLTO_API_FUNCTIONAL_TESTING
+
+ def setUp(self):
+ self.app = self.layer["app"]
+ self.portal = self.layer["portal"]
+ self.portal_url = self.portal.absolute_url()
+
+ setRoles(self.portal, TEST_USER_ID, ["Manager"])
+
+ for i in range(210):
+ api.content.create(
+ container=self.portal,
+ type="Document",
+ title=f"Document {i}",
+ )
+ commit()
+
+ self.api_session = RelativeSession(self.portal_url)
+ self.api_session.headers.update({"Accept": "application/json"})
+ self.api_session.auth = (SITE_OWNER_NAME, SITE_OWNER_PASSWORD)
+
+ def tearDown(self):
+ self.api_session.close()
+
+ def test_search_b_size_to_200(self):
+ response = self.api_session.get(
+ "/@search", params={"portal_type": "Document", "b_size": 1000}
+ )
+ result = response.json()
+ self.assertEqual(len(result["items"]), MAX_LIMIT)
+
+ def test_querystringsearch_post_limit_200(self):
+ response = self.api_session.post(
+ "/@querystring-search",
+ json={
+ "query": [
+ {
+ "i": "portal_type",
+ "o": "plone.app.querystring.operation.selection.is",
+ "v": ["Document"],
+ }
+ ],
+ "limit": 2000,
+ "b_size": 2000,
+ },
+ )
+ result = response.json()
+ self.assertEqual(len(result["items"]), MAX_LIMIT)
+ self.assertEqual(result["items_total"], MAX_LIMIT)
+
+ def test_querystringsearch_get_limit_200(self):
+ query = {
+ "query": [
+ {
+ "i": "portal_type",
+ "o": "plone.app.querystring.operation.selection.any",
+ "v": ["Document"],
+ }
+ ],
+ "b_size": 2000,
+ "limit": 2000,
+ }
+ response = self.api_session.get(
+ f"/@querystring-search?query={quote(json.dumps(query))}",
+ )
+ result = response.json()
+ self.assertEqual(len(result["items"]), MAX_LIMIT)
+ self.assertEqual(result["items_total"], MAX_LIMIT)
diff --git a/test_plone52.cfg b/test_plone52.cfg
index fbbd01d..977c6d4 100644
--- a/test_plone52.cfg
+++ b/test_plone52.cfg
@@ -14,7 +14,7 @@ plone.app.versioningbehavior = 1.4.6
plone.app.vocabularies = 4.3.0
plone.patternslib = 1.1.1
plone.rest = 2.0.0
-plone.restapi = 8.32.4
+plone.restapi = 8.42.0
plone.volto = 4.0.0
pycountry = 19.8.18
collective.purgebyid = 1.1.1