Skip to content

Commit

Permalink
Catalog patches (#70)
Browse files Browse the repository at this point in the history
* Add catalog and search patches to limit results

* remove unused import

* remove unused patches

* removed unused import

* remove unused code

* align querystring-search with new version in restapi and fix tests

* zpretty

* force min restapi version

* leggibilità

---------

Co-authored-by: Mauro Amico <[email protected]>
  • Loading branch information
cekk and mamico authored Aug 21, 2023
1 parent 99adbd2 commit 4cd1eb0
Show file tree
Hide file tree
Showing 9 changed files with 344 additions and 17 deletions.
3 changes: 2 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ Changelog
5.1.1 (unreleased)
------------------

- Nothing changed yet.
- Add catalog and search patches to limit results.
[cekk]


5.1.0 (2023-08-17)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"collective.purgebyid",
"kitconcept.seo>=2.0.0",
"plone.volto>=4.0.0",
"plone.restapi>=8.16.1",
"plone.restapi>=8.36.0",
"Products.PortalTransforms>=3.2.0",
],
extras_require={
Expand Down
64 changes: 64 additions & 0 deletions src/redturtle/volto/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# -*- coding: utf-8 -*-
"""Init and utils."""
from plone.app.content.browser.vocabulary import PERMISSIONS
from plone.folder.nogopip import GopipIndex
from Products.ZCatalog.Catalog import Catalog
from redturtle.volto.catalogplan import Catalog_sorted_search_indexes
from zope.i18nmessageid import MessageFactory
from ZTUtils.Lazy import LazyCat
from ZTUtils.Lazy import LazyMap


import logging

Expand All @@ -12,3 +18,61 @@
_ = MessageFactory("redturtle.volto")

PERMISSIONS["plone.app.vocabularies.Keywords"] = "View"

# CATALOG PATCHES

logger.info(
"install monkey patch for Products.ZCatalog.Catalog.Catalog._sorted_search_indexes #### WORK IN PROGRESS ####"
)
Catalog._orig_sorted_search_indexes = Catalog._sorted_search_indexes
Catalog._sorted_search_indexes = Catalog_sorted_search_indexes

MAX_SORTABLE = 5000


def Catalog_sortResults(
self,
rs,
sort_index,
reverse=False,
limit=None,
merge=True,
actual_result_count=None,
b_start=0,
b_size=None,
):
if MAX_SORTABLE > 0:
if actual_result_count is None:
actual_result_count = len(rs)
if actual_result_count >= MAX_SORTABLE and isinstance(sort_index, GopipIndex):
logger.warning(
"too many results %s disable GopipIndex sorting", actual_result_count
)
switched_reverse = bool(
b_size and b_start and b_start > actual_result_count / 2
)
if hasattr(rs, "keys"):
sequence, slen = self._limit_sequence(
rs.keys(), actual_result_count, b_start, b_size, switched_reverse
)
return LazyMap(
self.__getitem__,
sequence,
len(sequence),
actual_result_count=actual_result_count,
)
else:
logger.error(
"too many results %s disable GopipIndex sorting results %s has no key",
actual_result_count,
type(rs),
)
return LazyCat([], 0, actual_result_count)
return self._orig_sortResults(
rs, sort_index, reverse, limit, merge, actual_result_count, b_start, b_size
)


logger.info("install monkey patch for Products.ZCatalog.Catalog.Catalog.sortResults")
Catalog._orig_sortResults = Catalog.sortResults
Catalog.sortResults = Catalog_sortResults
21 changes: 21 additions & 0 deletions src/redturtle/volto/catalogplan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
CUSTOM = {
"allowedRolesAndUsers": 80,
"effectiveRange": 100,
}


def rank_index(index, query):
if (
index == "UID"
and isinstance(query.get("UID"), dict) # noqa
and query["UID"].get("not") # noqa
):
return 100
return CUSTOM.get(index, 0)


def Catalog_sorted_search_indexes(self, query):
return sorted(
self._orig_sorted_search_indexes(query), key=lambda i: (rank_index(i, query), i)
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
xmlns="http://namespaces.zope.org/zope"
xmlns:cache="http://namespaces.zope.org/cache"
xmlns:plone="http://namespaces.plone.org/plone"
xmlns:zcml="http://namespaces.zope.org/zcml"
>

<plone:service
method="POST"
factory=".post.RTQuerystringSearchPost"
factory=".get.QuerystringSearchPost"
for="Products.CMFPlone.interfaces.IPloneSiteRoot"
permission="zope2.View"
layer="redturtle.volto.interfaces.IRedturtleVoltoLayer"
Expand All @@ -15,16 +16,35 @@

<plone:service
method="POST"
factory=".post.RTQuerystringSearchPost"
factory=".get.QuerystringSearchPost"
for="Products.CMFCore.interfaces.IContentish"
permission="zope2.View"
layer="redturtle.volto.interfaces.IRedturtleVoltoLayer"
name="@querystring-search"
/>

<plone:service
method="GET"
factory=".get.QuerystringSearchGet"
for="Products.CMFPlone.interfaces.IPloneSiteRoot"
permission="zope2.View"
layer="redturtle.volto.interfaces.IRedturtleVoltoLayer"
name="@querystring-search"
/>

<plone:service
method="GET"
factory=".get.QuerystringSearchGet"
for="Products.CMFCore.interfaces.IContentish"
permission="zope2.View"
layer="redturtle.volto.interfaces.IRedturtleVoltoLayer"
name="@querystring-search"
/>

<cache:ruleset
for=".post.RTQuerystringSearchPost"
for=".get.QuerystringSearchGet"
ruleset="plone.content.dynamic"
zcml:condition="have plone-app-caching-3"
/>

</configure>
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,120 @@
from plone.app.querystring import queryparser
from plone.restapi.batching import HypermediaBatch
from plone.restapi.deserializer import json_body
from plone.restapi.exceptions import DeserializationError
from plone.restapi.interfaces import ISerializeToJson
from plone.restapi.interfaces import ISerializeToJsonSummary
from plone.restapi.services.querystringsearch.get import QuerystringSearchPost
from plone.restapi.services import Service
from plone.restapi.services.querystringsearch.get import (
QuerystringSearch as BaseQuerystringSearch,
)
from urllib import parse
from zExceptions import BadRequest
from zope.component import getMultiAdapter

import logging


logger = logging.getLogger(__name__)

# default: 1000
MAX_LIMIT = 200

class RTQuerystringSearchPost(QuerystringSearchPost):
"""
Perform a custom search if we are searching events
"""

def reply(self):
if self.is_event_search():
class QuerystringSearch(BaseQuerystringSearch):
def __call__(self):
try:
data = json_body(self.request)
except DeserializationError as err:
raise BadRequest(str(err))

query = data.get("query", None)
if self.is_event_search(query=query):
return self.reply_events()
return super().reply()

def is_event_search(self):
try:
b_start = int(data.get("b_start", 0))
except ValueError:
raise BadRequest("Invalid b_start")
try:
b_size = int(data.get("b_size", 25))
except ValueError:
raise BadRequest("Invalid b_size")
sort_on = data.get("sort_on", None)
sort_order = data.get("sort_order", None)

# LIMIT PATCH
if not query:
raise BadRequest("No query supplied")
limit = self.get_limit(data=data)
# END OF LIMIT PATCH

fullobjects = bool(data.get("fullobjects", False))

if sort_order:
sort_order = "descending" if sort_order == "descending" else "ascending"

querybuilder = getMultiAdapter(
(self.context, self.request), name="querybuilderresults"
)

querybuilder_parameters = dict(
query=query,
brains=True,
b_start=b_start,
b_size=b_size,
sort_on=sort_on,
sort_order=sort_order,
limit=limit,
)

# PATCH: we disable this query to boost performances on big sites
# Exclude "self" content item from the results when ZCatalog supports NOT UUID
# queries and it is called on a content object.
# if not IPloneSiteRoot.providedBy(self.context) and SUPPORT_NOT_UUID_QUERIES:
# querybuilder_parameters.update(
# dict(custom_query={"UID": {"not": self.context.UID()}})
# )
# END OF PATCH

try:
results = querybuilder(**querybuilder_parameters)
except KeyError:
# This can happen if the query has an invalid operation,
# but plone.app.querystring doesn't raise an exception
# with specific info.
raise BadRequest("Invalid query.")

results = getMultiAdapter((results, self.request), ISerializeToJson)(
fullobjects=fullobjects
)
return results

def get_limit(self, data):
"""
If limit is <= 0 or higher than MAX_LIMIT, set it to MAX_LIMIT
"""
try:
limit = int(data.get("limit", MAX_LIMIT))
except ValueError:
raise BadRequest("Invalid limit")

if "limit" in data and limit <= 0:
del data["limit"]
limit = MAX_LIMIT
if limit > MAX_LIMIT:
logger.warning(
'[wrong query] limit is too high: "{}". Set to default ({}).'.format(
data["query"], MAX_LIMIT
)
)
limit = MAX_LIMIT
return limit

def is_event_search(self, query):
"""
Check if we need to perform a custom search with p.a.events method
"""
query = json_body(self.request).get("query", [])
indexes = [x["i"] for x in query]

portal_type_check = False
Expand All @@ -52,7 +140,7 @@ def generate_query_for_events(self):
b_size = data.get("b_size", None)
b_start = data.get("b_start", 0)
query = {k: v for k, v in parsed_query.items() if k not in ["start", "end"]}
limit = int(data.get("limit", 1000))
limit = int(self.get_limit(data=data))
sort = "start"
sort_reverse = False
start, end = self.parse_event_dates(parsed_query)
Expand Down Expand Up @@ -167,3 +255,26 @@ def reply_events(self):
results["items"].append(result)

return results


class QuerystringSearchPost(Service):
"""Copied from plone.restapi == 8.42.0"""

def reply(self):
querystring_search = QuerystringSearch(self.context, self.request)
return querystring_search()


class QuerystringSearchGet(Service):
"""Copied from plone.restapi == 8.42.0"""

def reply(self):
# We need to copy the JSON query parameters from the querystring
# into the request body, because that's where other code expects to find them
self.request["BODY"] = parse.unquote(
self.request.form.get("query", "{}")
).encode(self.request.charset)
# unset the get parameters
self.request.form = {}
querystring_search = QuerystringSearch(self.context, self.request)
return querystring_search()
25 changes: 25 additions & 0 deletions src/redturtle/volto/restapi/services/search/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
except ImportError:
HAS_ADVANCEDQUERY = False

# default: 1000
MAX_LIMIT = 200

# custom search handler

Expand Down Expand Up @@ -119,6 +121,29 @@ def search(self, query=None):
else:
return super(SearchHandler, self).search(query)

def _parse_query(self, query):
query = super()._parse_query(query)
for idx in ["sort_limit", "b_size"]:
if idx not in query:
continue
value = query.get(idx, MAX_LIMIT)
if value <= 0:
logger.warning(
'[wrong query] {} is wrong: "{}". Set to default ({}).'.format(
idx, query, MAX_LIMIT
)
)
query[idx] = MAX_LIMIT

if value > MAX_LIMIT:
logger.warning(
'[wrong query] {} is too high: "{}". Set to default ({}).'.format(
idx, query, MAX_LIMIT
)
)
query[idx] = MAX_LIMIT
return query


class SearchGet(Service):
def reply(self):
Expand Down
Loading

0 comments on commit 4cd1eb0

Please sign in to comment.