From 85d4f14c926213a53b9dd623109a207ec9a74938 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Thu, 26 Sep 2024 12:47:18 +0200 Subject: [PATCH] update many queries for newer elasticsearch Removes 'and', 'or', 'missing', and 'filtered' queries. Fixes #1264 Removes 'filter' field in 'nested' query. Fixes #1265 Removes 'filter' in 'function_score' queries. Fixes #1266 Removes use of top level 'filter' search parameter. Fixes #1267 Removes use of negative boosts. Fixes #1270 --- docs/API-docs.md | 188 +++++++++-------- lib/MetaCPAN/Document/Distribution.pm | 2 +- lib/MetaCPAN/Document/File/Set.pm | 183 +++++++--------- lib/MetaCPAN/Document/Release.pm | 34 +-- lib/MetaCPAN/Document/Release/Set.pm | 44 ++-- lib/MetaCPAN/Model/Search.pm | 220 ++++++++++---------- lib/MetaCPAN/Model/User/Account/Set.pm | 16 +- lib/MetaCPAN/Query/Author.pm | 14 +- lib/MetaCPAN/Query/Favorite.pm | 8 +- lib/MetaCPAN/Query/File.pm | 18 +- lib/MetaCPAN/Query/Mirror.pm | 13 +- lib/MetaCPAN/Query/Release.pm | 9 +- lib/MetaCPAN/Script/Check.pm | 54 ++--- lib/MetaCPAN/Script/First.pm | 3 +- lib/MetaCPAN/Script/Latest.pm | 2 +- lib/MetaCPAN/Script/Mapping.pm | 8 +- lib/MetaCPAN/Script/Role/External/Debian.pm | 2 +- lib/MetaCPAN/Script/Session.pm | 2 - lib/MetaCPAN/Script/Watcher.pm | 72 +++---- t/model/search.t | 3 +- t/release/documentation-hide.t | 28 +-- t/release/file-changes.t | 2 +- t/release/meta-provides.t | 16 +- t/release/moose.t | 52 +++-- t/release/multiple-modules.t | 26 ++- t/release/pm-PL.t | 2 +- t/release/prefer-meta-json.t | 14 +- t/release/scripts.t | 12 +- t/server/controller/bad_request.t | 20 +- 29 files changed, 536 insertions(+), 531 deletions(-) diff --git a/docs/API-docs.md b/docs/API-docs.md index 7c3a4f854..10137aa6d 100644 --- a/docs/API-docs.md +++ b/docs/API-docs.md @@ -174,22 +174,24 @@ dependency. ```sh curl -XPOST https://fastapi.metacpan.org/v1/release/_search -d '{ - "size": 5000, - "fields": [ "distribution" ], - "filter": { - "and": [ - { "term": { "dependency.module": "MooseX::NonMoose" } }, - { "term": {"maturity": "released"} }, - { "term": {"status": "latest"} } - ] - } + "size" : 5000, + "fields" : [ "distribution" ], + "query" : { + "bool" : { + "must" : [ + { "term" : { "dependency.module" : "MooseX::NonMoose" } }, + { "term" : { "maturity" : "released" } }, + { "term" : { "status" : "latest" } } + ] + } + } }' ``` -_Note it is also possible to use these queries in GET requests (useful for cross-domain JSONP requests) by appropriately encoding the JSON query into the `source` parameter of the URL. For example the query above [would become](https://fastapi.metacpan.org/v1/release/_search?source=%7B%22query%22%3A%7B%22match_all%22%3A%7B%7D%7D%2C%22size%22%3A5000%2C%22fields%22%3A%5B%22distribution%22%5D%2C%22filter%22%3A%7B%22and%22%3A%5B%7B%22term%22%3A%7B%22release.dependency.module%22%3A%22MooseX%3A%3ANonMoose%22%7D%7D%2C%7B%22term%22%3A%7B%22release.maturity%22%3A%22released%22%7D%7D%2C%7B%22term%22%3A%7B%22release.status%22%3A%22latest%22%7D%7D%5D%7D%7D):_ +_Note it is also possible to use these queries in GET requests (useful for cross-domain JSONP requests) by appropriately encoding the JSON query into the `source` parameter of the URL. For example the query above [would become](https://fastapi.metacpan.org/v1/release/_search?source=%7B%0A%20%20%20%20%22size%22%20%3A%205000%2C%0A%20%20%20%20%22fields%22%20%3A%20%5B%20%22distribution%22%20%5D%2C%0A%20%20%20%20%22query%22%20%3A%20%7B%0A%20%20%20%20%20%20%20%20%22bool%22%20%3A%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22must%22%20%3A%20%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22dependency.module%22%20%3A%20%22MooseX%3A%3ANonMoose%22%20%7D%20%7D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22maturity%22%20%3A%20%22released%22%20%7D%20%7D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22status%22%20%3A%20%22latest%22%20%7D%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%7D):_ ``` -curl 'https://fastapi.metacpan.org/v1/release/_search?source=%7B%22query%22%3A%7B%22match_all%22%3A%7B%7D%7D%2C%22size%22%3A5000%2C%22fields%22%3A%5B%22distribution%22%5D%2C%22filter%22%3A%7B%22and%22%3A%5B%7B%22term%22%3A%7B%22release.dependency.module%22%3A%22MooseX%3A%3ANonMoose%22%7D%7D%2C%7B%22term%22%3A%7B%22release.maturity%22%3A%22released%22%7D%7D%2C%7B%22term%22%3A%7B%22release.status%22%3A%22latest%22%7D%7D%5D%7D%7D' +curl 'https://fastapi.metacpan.org/v1/release/_search?source=%7B%0A%20%20%20%20%22size%22%20%3A%205000%2C%0A%20%20%20%20%22fields%22%20%3A%20%5B%20%22distribution%22%20%5D%2C%0A%20%20%20%20%22query%22%20%3A%20%7B%0A%20%20%20%20%20%20%20%20%22bool%22%20%3A%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22must%22%20%3A%20%5B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22dependency.module%22%20%3A%20%22MooseX%3A%3ANonMoose%22%20%7D%20%7D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22maturity%22%20%3A%20%22released%22%20%7D%20%7D%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%7B%20%22term%22%20%3A%20%7B%20%22status%22%20%3A%20%22latest%22%20%7D%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20%5D%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%7D' ``` ### [The size of the CPAN unpacked](https://github.com/metacpan/metacpan-examples/blob/master/scripts/file/5-size-of-cpan.pl) @@ -199,15 +201,15 @@ curl 'https://fastapi.metacpan.org/v1/release/_search?source=%7B%22query%22%3A%7 ```sh curl -XPOST https://fastapi.metacpan.org/v1/release/_search?size=100 -d '{ - "query": { - "range" : { - "date" : { - "gte" : "2010-06-05T00:00:00", - "lte" : "2011-06-05T00:00:00" + "query" : { + "range" : { + "date" : { + "gte" : "2010-06-05T00:00:00", + "lte" : "2011-06-05T00:00:00" + } } - } - }, - "fields": ["license", "name", "distribution", "date", "version_numified"] + }, + "fields": [ "license", "name", "distribution", "date", "version_numified" ] }' ``` @@ -215,17 +217,17 @@ curl -XPOST https://fastapi.metacpan.org/v1/release/_search?size=100 -d '{ ```sh curl -XPOST https://fastapi.metacpan.org/v1/release/_search -d '{ - "query": { - "match_all": {} + "query" : { + "match_all" : {} }, - "aggs": { - "license": { - "terms": { - "field": "license" + "aggs" : { + "license" : { + "terms" : { + "field" : "license" } } }, - "size": 0 + "size" : 0 }' ``` @@ -233,15 +235,18 @@ curl -XPOST https://fastapi.metacpan.org/v1/release/_search -d '{ ```sh curl -XPOST https://fastapi.metacpan.org/v1/file/_search -d '{ - "query": { "filtered":{"query":{"match_all":{}},"filter":{"term":{"level":0}}} - }, - "aggs": { - "license": { - "terms": { - "size":100, - "field":"name" - } } }, - "size":0 + "query" : { + "term" : { "level" : 0 } + }, + "aggs" : { + "license" : { + "terms" : { + "size" : 100, + "field" : "name" + } + } + }, + "size" : 0 }' ``` @@ -249,14 +254,15 @@ curl -XPOST https://fastapi.metacpan.org/v1/file/_search -d '{ ```sh curl -XPOST https://fastapi.metacpan.org/v1/file/_search -d '{ - "query": { "filtered":{ - "query":{"match_all":{}}, - "filter":{"and":[ - {"term":{"module.name":"DBI::Profile"}}, - {"term":{"module.version":"2.014123"}} - ]} - }}, - "fields":["release"] + "query" : { + "bool" : { + "must" : [ + { "term" : { "module.name" : "DBI::Profile" } }, + { "term" : { "module.version" : "2.014123" } } + ] + } + }, + "fields" : [ "release" ] }' ``` @@ -277,14 +283,24 @@ Note that "size" should be the number of distributions you are looking for. ```sh lynx --dump --post_data https://fastapi.metacpan.org/v1/release/_search < ( sub releases { my $self = shift; return $self->index->type("release") - ->filter( { term => { "distribution" => $self->name } } ); + ->query( { term => { "distribution" => $self->name } } ); } sub set_first_release { diff --git a/lib/MetaCPAN/Document/File/Set.pm b/lib/MetaCPAN/Document/File/Set.pm index c758497df..4c3b331e6 100644 --- a/lib/MetaCPAN/Document/File/Set.pm +++ b/lib/MetaCPAN/Document/File/Set.pm @@ -78,53 +78,47 @@ sub find { my @candidates = $self->index->type('file')->query( { bool => { must => [ - { term => { indexed => 1, } }, + { term => { indexed => 1 } }, { term => { authorized => 1 } }, { term => { status => 'latest' } }, { - or => [ - { - nested => { - path => "module", - filter => { - and => [ - { - term => { - "module.name" => $module - } - }, - { - term => { - "module.authorized" => 1 - } + bool => { + should => [ + { term => { documentation => $module } }, + { + nested => { + path => "module", + query => { + bool => { + must => [ + { + term => { "module.name" => + $module } + }, + { + bool => { should => + [ + { term => + { "module.authorized" + => 1 } + }, + { exists => + { field => + 'module.associated_pod' + } }, + ], + } + }, + ], }, - ] + }, } - } - }, - { term => { documentation => $module } }, - ] - }, - ], - should => [ - { term => { documentation => $module } }, - { - nested => { - path => 'module', - filter => { - and => [ - { term => { 'module.name' => $module } }, - { - exists => { - field => 'module.associated_pod' - } - }, - ] - } + }, + ] } }, - ] - } + ], + }, } )->sort( [ '_score', { 'version_numified' => { order => 'desc' } }, @@ -165,40 +159,32 @@ sub find_pod { sub documented_modules { my ( $self, $release ) = @_; - return $self->filter( { - and => [ - { term => { release => $release->{name} } }, - { term => { author => $release->{author} } }, - { exists => { field => "documentation" } }, - { - or => [ - { - and => [ - { - exists => { - field => 'module.name', - } - }, - { - term => { - 'module.indexed' => 1 - } + return $self->query( { + bool => { + must => [ + { term => { release => $release->{name} } }, + { term => { author => $release->{author} } }, + { exists => { field => "documentation" } }, + { + bool => { + should => [ + bool => { + must => [ + { exists => { field => 'module.name' } }, + { term => { 'module.indexed' => 1 } }, + ], }, - ] - }, - { - and => [ - { - exists => { - field => 'pod.analyzed', - } + bool => { + must => [ + { exists => { field => 'pod.analyzed' } }, + { term => { indexed => 1 } }, + ], }, - { term => { indexed => 1 } }, - ] - }, - ] - }, - ], + ], + } + }, + ], + }, } )->size(999) ->source( [qw(name module path documentation distribution)] )->all; } @@ -270,32 +256,25 @@ sub autocomplete { return $self unless $query; my $data = $self->search_type('dfs_query_then_fetch')->query( { - filtered => { - query => { - multi_match => { - query => $query, - type => 'most_fields', - fields => [ 'documentation', 'documentation.*' ], - analyzer => 'camelcase', - minimum_should_match => '80%' + bool => { + must => [ + { + multi_match => { + query => $query, + type => 'most_fields', + fields => [ 'documentation', 'documentation.*' ], + analyzer => 'camelcase', + minimum_should_match => '80%' + } }, - }, - filter => { - bool => { - must => [ - { exists => { field => 'documentation' } }, - { term => { status => 'latest' } }, - { term => { indexed => 1 } }, - { term => { authorized => 1 } } - ], - must_not => [ - { - terms => { distribution => \@ROGUE_DISTRIBUTIONS } - }, - ], - } - } - } + { exists => { field => 'documentation' } }, + { term => { status => 'latest' } }, + { term => { indexed => 1 } }, + { term => { authorized => 1 } } + ], + must_not => + [ { terms => { distribution => \@ROGUE_DISTRIBUTIONS } }, ], + }, } )->sort( [ '_score', 'documentation' ] ); $data = $data->fields( [qw(documentation release author distribution)] ) @@ -351,12 +330,10 @@ sub autocomplete_suggester { query => { bool => { must => [ - { term => { indexed => 1 } }, - { term => { authorized => 1 } }, - { term => { status => 'latest' } }, - { - terms => { 'documentation' => [ keys %docs ] } - }, + { term => { indexed => 1 } }, + { term => { authorized => 1 } }, + { term => { status => 'latest' } }, + { terms => { documentation => [ keys %docs ] } }, ], must_not => [ { diff --git a/lib/MetaCPAN/Document/Release.pm b/lib/MetaCPAN/Document/Release.pm index 0912270da..0ec62788a 100644 --- a/lib/MetaCPAN/Document/Release.pm +++ b/lib/MetaCPAN/Document/Release.pm @@ -278,23 +278,23 @@ sub _build_download_url { sub set_first { my $self = shift; - my $is_first = $self->index->type('release')->filter( { - and => [ - { term => { distribution => $self->distribution } }, - { - range => { - version_numified => { 'lt' => $self->version_numified } - } - }, - - # REINDEX: after a full reindex, the above line is to replaced with: - # { term => { first => 1 } }, - # currently, the "first" property is not computed on all releases - # since this feature has not been around when last reindexed - ] - } )->count - ? 0 - : 1; + my $is_first = $self->index->type('release')->query( { + bool => { + must => [ + { term => { distribution => $self->distribution } }, + { + range => { + version_numified => { lt => $self->version_numified } + } + }, + ], + }, + + # REINDEX: after a full reindex, the above line is to replaced with: + # { term => { first => 1 } }, + # currently, the "first" property is not computed on all releases + # since this feature has not been around when last reindexed + } )->count ? 0 : 1; $self->_set_first($is_first); } diff --git a/lib/MetaCPAN/Document/Release/Set.pm b/lib/MetaCPAN/Document/Release/Set.pm index c052652dd..08a2c462b 100644 --- a/lib/MetaCPAN/Document/Release/Set.pm +++ b/lib/MetaCPAN/Document/Release/Set.pm @@ -44,26 +44,30 @@ sub _build_query_release { } sub find_github_based { - shift->filter( { - and => [ - { term => { status => 'latest' } }, - { - or => [ - { - prefix => { - "resources.bugtracker.web" => - 'http://github.com/' - } - }, - { - prefix => { - "resources.bugtracker.web" => - 'https://github.com/' - } - }, - ] - } - ] + shift->query( { + bool => { + must => [ + { term => { status => 'latest' } }, + { + bool => { + should => [ + { + prefix => { + "resources.bugtracker.web" => + 'http://github.com/' + } + }, + { + prefix => { + "resources.bugtracker.web" => + 'https://github.com/' + } + }, + ], + } + }, + ], + }, } ); } diff --git a/lib/MetaCPAN/Model/Search.pm b/lib/MetaCPAN/Model/Search.pm index 43ea5a979..d46a1d2b9 100644 --- a/lib/MetaCPAN/Model/Search.pm +++ b/lib/MetaCPAN/Model/Search.pm @@ -37,12 +37,6 @@ const my @ROGUE_DISTRIBUTIONS => qw( spodcxx ); -sub _not_rogue { - my @rogue_dists - = map { { term => { 'distribution' => $_ } } } @ROGUE_DISTRIBUTIONS; - return { not => { filter => { or => \@rogue_dists } } }; -} - sub search_for_first_result { my ( $self, $search_term ) = @_; my $es_query = $self->build_query($search_term); @@ -218,133 +212,131 @@ sub build_query { $params //= {}; ( my $clean = $search_term ) =~ s/::/ /g; - my $negative = { - bool => { - should => [ - { term => { 'mime' => { value => 'text/x-script.perl' } } }, - { term => { 'deprecated' => { value => 1, boost => -100 } } }, - ], - }, - }; - - my $positive = { + my $query = { bool => { - should => [ - - # exact matches result in a huge boost + filter => [ + { term => { status => 'latest' } }, + { term => { authorized => 1 } }, + { term => { indexed => 1 } }, { - term => { - 'documentation' => { - value => $search_term, - boost => 20, - } + bool => { + should => [ + { + bool => { + must => [ + { + exists => + { field => 'module.name' } + }, + { term => { 'module.indexed' => 1 } } + ], + } + }, + { exists => { field => 'documentation' } }, + ], } }, + ], + must_not => + [ { terms => { distribution => \@ROGUE_DISTRIBUTIONS } }, ], + must => [ { - term => { - 'module.name' => { - value => $search_term, - boost => 20, - } - } - }, + bool => { + should => [ - # take the maximum score from the module name and the abstract/pod - { - dis_max => { - queries => [ + # exact matches result in a huge boost { - query_string => { - fields => [ - qw(documentation.analyzed^2 module.name.analyzed^2 distribution.analyzed), - qw(documentation.camelcase module.name.camelcase distribution.camelcase) - ], - query => $clean, - boost => 3, - default_operator => 'AND', - allow_leading_wildcard => 0, - use_dis_max => 1, - + term => { + 'documentation' => { + value => $search_term, + boost => 20, + } } }, { - query_string => { - fields => - [qw(abstract.analyzed pod.analyzed)], - query => $clean, - default_operator => 'AND', - allow_leading_wildcard => 0, - use_dis_max => 1, + term => { + 'module.name' => { + value => $search_term, + boost => 20, + } + } + }, + # take the maximum score from the module name and the abstract/pod + { + dis_max => { + queries => [ + { + query_string => { + fields => [ + qw(documentation.analyzed^2 module.name.analyzed^2 distribution.analyzed), + qw(documentation.camelcase module.name.camelcase distribution.camelcase) + ], + query => $clean, + boost => 3, + default_operator => 'AND', + allow_leading_wildcard => 0, + use_dis_max => 1, + + } + }, + { + query_string => { + fields => [ + qw(abstract.analyzed pod.analyzed) + ], + query => $clean, + default_operator => 'AND', + allow_leading_wildcard => 0, + use_dis_max => 1, + }, + }, + ], } - } - ] + }, + ], } - } - - ] - } + }, + ], + }, }; - my $search = merge( - $params, - { + $query = { + function_score => { + script_score => { + + # prefer shorter module names + script => { + lang => 'expression', + inline => + "_score - (doc['documentation_length'].value == 0 ? 26 : doc['documentation_length'].value)/400", + }, + }, query => { - filtered => { - query => { - function_score => { - - # prefer shorter module names - script_score => { - script => { - lang => 'expression', - inline => - "_score - (doc['documentation_length'].value == 0 ? 26 : doc['documentation_length'].value)/400", + boosting => { + negative_boost => 0.5, + positive => $query, + negative => { + bool => { + should => [ + { + term => { 'mime' => 'text/x-script.perl' } }, - }, - query => { - boosting => { - negative_boost => 0.5, - negative => $negative, - positive => $positive - } - } - } + { term => { 'deprecated' => 1 } }, + ], + }, }, - filter => { - and => [ - $self->_not_rogue, - { term => { status => 'latest' } }, - { term => { 'authorized' => 1 } }, - { term => { 'indexed' => 1 } }, - { - or => [ - { - and => [ - { - exists => { - field => 'module.name' - } - }, - { - term => - { 'module.indexed' => 1 } - } - ] - }, - { - exists => { field => 'documentation' } - }, - ] - } - ] - } - } + }, }, - _source => [ - "module", - ], - fields => [ qw( + }, + }; + + my $search = merge( + $params, + { + query => $query, + _source => [ "module", ], + fields => [ qw( abstract.analyzed author authorized diff --git a/lib/MetaCPAN/Model/User/Account/Set.pm b/lib/MetaCPAN/Model/User/Account/Set.pm index 35d26c515..6891aacf1 100644 --- a/lib/MetaCPAN/Model/User/Account/Set.pm +++ b/lib/MetaCPAN/Model/User/Account/Set.pm @@ -15,11 +15,13 @@ Find an account based on its identity. sub find { my ( $self, $p ) = @_; - return $self->filter( { - and => [ - { term => { 'identity.name' => $p->{name} } }, - { term => { 'identity.key' => $p->{key} } } - ] + return $self->query( { + bool => { + must => [ + { term => { 'identity.name' => $p->{name} } }, + { term => { 'identity.key' => $p->{key} } }, + ], + } } )->first; } @@ -33,7 +35,7 @@ Find account by C<$code>. See L. sub find_code { my ( $self, $token ) = @_; - return $self->filter( { term => { 'code' => $token } } )->first; + return $self->query( { term => { code => $token } } )->first; } =head2 find_token @@ -46,7 +48,7 @@ Find account by C<$access_token>. See L. sub find_token { my ( $self, $token ) = @_; - return $self->filter( { term => { 'access_token.token' => $token } } ) + return $self->query( { term => { 'access_token.token' => $token } } ) ->first; } diff --git a/lib/MetaCPAN/Query/Author.pm b/lib/MetaCPAN/Query/Author.pm index e172520ad..ea52e58fe 100644 --- a/lib/MetaCPAN/Query/Author.pm +++ b/lib/MetaCPAN/Query/Author.pm @@ -13,12 +13,8 @@ sub by_ids { map {uc} @{$ids}; my $body = { - query => { - constant_score => { - filter => { ids => { values => $ids } } - } - }, - size => scalar @{$ids}, + query => { ids => { values => $ids } }, + size => scalar @{$ids}, }; my $authors = $self->es->search( @@ -74,18 +70,18 @@ sub search { { match => { 'name.analyzed' => - { query => $query, operator => 'and' } + { query => $query, operator => 'AND' } } }, { match => { 'asciiname.analyzed' => - { query => $query, operator => 'and' } + { query => $query, operator => 'AND' } } }, { match => { 'pauseid' => uc($query) } }, { match => { 'profile.id' => lc($query) } }, - ] + ], } }, size => 10, diff --git a/lib/MetaCPAN/Query/Favorite.pm b/lib/MetaCPAN/Query/Favorite.pm index 2d1750e9f..da2d0987f 100644 --- a/lib/MetaCPAN/Query/Favorite.pm +++ b/lib/MetaCPAN/Query/Favorite.pm @@ -18,7 +18,7 @@ sub agg_by_distributions { my $body = { size => 0, query => { - terms => { 'distribution' => $distributions } + terms => { distribution => $distributions } }, aggregations => { favorites => { @@ -30,9 +30,9 @@ sub agg_by_distributions { $user ? ( myfavorites => { - filter => { term => { 'user' => $user } }, + filter => { term => { user => $user } }, aggregations => { - enteries => { + entries => { terms => { field => 'distribution' } } } @@ -135,7 +135,7 @@ sub leaderboard { }, totals => { cardinality => { - field => "distribution", + field => 'distribution', }, }, }, diff --git a/lib/MetaCPAN/Query/File.pm b/lib/MetaCPAN/Query/File.pm index 2a52fcdae..5de697de0 100644 --- a/lib/MetaCPAN/Query/File.pm +++ b/lib/MetaCPAN/Query/File.pm @@ -277,16 +277,18 @@ sub interesting_files { { term => { release => $release } }, { term => { author => $author } }, { term => { directory => \0 } }, - { not => { prefix => { 'path' => 'corpus/' } } }, - { not => { prefix => { 'path' => 'fatlib/' } } }, - { not => { prefix => { 'path' => 'inc/' } } }, - { not => { prefix => { 'path' => 'local/' } } }, - { not => { prefix => { 'path' => 'perl5/' } } }, - { not => { prefix => { 'path' => 'share/' } } }, - { not => { prefix => { 'path' => 't/' } } }, - { not => { prefix => { 'path' => 'xt/' } } }, { bool => { should => \@clauses } }, ], + must_not => [ + { prefix => { 'path' => 'corpus/' } }, + { prefix => { 'path' => 'fatlib/' } }, + { prefix => { 'path' => 'inc/' } }, + { prefix => { 'path' => 'local/' } }, + { prefix => { 'path' => 'perl5/' } }, + { prefix => { 'path' => 'share/' } }, + { prefix => { 'path' => 't/' } }, + { prefix => { 'path' => 'xt/' } }, + ], }, }, _source => $source, diff --git a/lib/MetaCPAN/Query/Mirror.pm b/lib/MetaCPAN/Query/Mirror.pm index 05fc28ec1..dbd8764c1 100644 --- a/lib/MetaCPAN/Query/Mirror.pm +++ b/lib/MetaCPAN/Query/Mirror.pm @@ -11,17 +11,10 @@ sub search { if ($q) { my @protocols = grep /^ (?: http | ftp | rsync ) $/x, split /\s+/, $q; - my $query = { + $query = { bool => { - must_not => { - bool => { - should => [ - map +{ filter => { missing => { field => $_ } } }, - @protocols - ] - } - } - } + must => [ map +{ exists => { field => $_ } }, @protocols ] + }, }; } diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 512789106..9d048a397 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -1100,14 +1100,19 @@ sub find_download_url { ? { bool => { must => \@filters } } : $filters[0]; - my $version_sort - = $module_filter + my $version_sort = $module_filter ? { 'module.version_numified' => { mode => 'max', order => 'desc', nested_path => 'module', nested_filter => $entity_filter, + + # TODO: ES6 - replace prior 2 lines with: + #nested => { + # path => 'module', + # filter => $entity_filter, + #}, } } : { version_numified => { order => 'desc' } }; diff --git a/lib/MetaCPAN/Script/Check.pm b/lib/MetaCPAN/Script/Check.pm index 485a31968..b6aafd5c1 100644 --- a/lib/MetaCPAN/Script/Check.pm +++ b/lib/MetaCPAN/Script/Check.pm @@ -90,13 +90,14 @@ sub check_modules { fields => [ qw(name release author distribution version authorized indexed maturity date) ], - query => { match_all => {} }, - filter => { - and => [ - { term => { 'module.name' => $pkg } }, - { term => { 'authorized' => 'true' } }, - { term => { 'maturity' => 'released' } }, - ], + query => { + bool => { + must => [ + { term => { 'module.name' => $pkg } }, + { term => { 'authorized' => 'true' } }, + { term => { 'maturity' => 'released' } }, + ], + }, }, ); my @files = @{ $results->{hits}->{hits} }; @@ -109,16 +110,17 @@ sub check_modules { size => 1, fields => [qw(name status authorized version id date)], - query => { match_all => {} }, - filter => { - and => [ - { - term => { - 'name' => $file->{fields}->{release} - } - }, - { term => { 'status' => 'latest' } }, - ], + query => { + bool => { + must => [ + { + term => { + name => $file->{fields}->{release} + } + }, + { term => { status => 'latest' } }, + ], + }, }, ); @@ -138,13 +140,17 @@ sub check_modules { size => 1, fields => [qw(name status authorized version id date)], - query => { match_all => {} }, - filter => { - and => [ { - term => { - 'name' => $file->{fields}->{release} - } - } ] + query => { + bool => { + must => [ + { + term => { + name => + $file->{fields}->{release} + } + }, + ], + }, }, ); diff --git a/lib/MetaCPAN/Script/First.pm b/lib/MetaCPAN/Script/First.pm index b7f7620cd..d3e57b026 100644 --- a/lib/MetaCPAN/Script/First.pm +++ b/lib/MetaCPAN/Script/First.pm @@ -19,8 +19,7 @@ sub run { my $self = shift; my $distributions = $self->index->type("distribution"); $distributions - = $distributions->filter( - { term => { name => $self->distribution } } ) + = $distributions->query( { term => { name => $self->distribution } } ) if $self->distribution; $distributions = $distributions->size(500)->scroll; diff --git a/lib/MetaCPAN/Script/Latest.pm b/lib/MetaCPAN/Script/Latest.pm index c2f0fa8cf..73b78154a 100644 --- a/lib/MetaCPAN/Script/Latest.pm +++ b/lib/MetaCPAN/Script/Latest.pm @@ -139,7 +139,7 @@ sub run { 'Searching for ' . @$filter . ' of ' . $total . ' modules' } if @module_filters > 1; - my $scroll = $self->index->type('file')->filter($query)->source( [ qw( + my $scroll = $self->index->type('file')->query($query)->source( [ qw( author date distribution download_url module.name release status ) ] )->size(100)->raw->scroll; diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index c3e3a2af5..a838a4a39 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -416,12 +416,8 @@ sub _copy_slice { index => $self->index->name, type => $type, body => { - query => { - filtered => { - query => $query - } - }, - sort => '_doc', + query => $query, + sort => '_doc', }, ); diff --git a/lib/MetaCPAN/Script/Role/External/Debian.pm b/lib/MetaCPAN/Script/Role/External/Debian.pm index 14fdfef72..2544faf9e 100644 --- a/lib/MetaCPAN/Script/Role/External/Debian.pm +++ b/lib/MetaCPAN/Script/Role/External/Debian.pm @@ -98,7 +98,7 @@ sub dist_for_debian { my $query = { term => { 'distribution.lowercase' => $alias{$1} // $1 } }; - my $res = $self->index->type('release')->filter($query) + my $res = $self->index->type('release')->query($query) ->sort( [ { date => { order => "desc" } } ] )->raw->first; return $res->{_source}{distribution} diff --git a/lib/MetaCPAN/Script/Session.pm b/lib/MetaCPAN/Script/Session.pm index 28f6f9d9a..d0feecd76 100644 --- a/lib/MetaCPAN/Script/Session.pm +++ b/lib/MetaCPAN/Script/Session.pm @@ -16,8 +16,6 @@ sub run { scroll => '1m', index => 'user', type => 'session', - body => - { query => { filtered => { query => { match_all => {} }, }, }, }, ); my $bulk = $self->es->bulk_helper( diff --git a/lib/MetaCPAN/Script/Watcher.pm b/lib/MetaCPAN/Script/Watcher.pm index 1cad62d23..b7b17d390 100644 --- a/lib/MetaCPAN/Script/Watcher.pm +++ b/lib/MetaCPAN/Script/Watcher.pm @@ -102,13 +102,8 @@ sub backpan_changes { fields => [qw(author archive)], body => { query => { - filtered => { - query => { match_all => {} }, - filter => { - not => { - filter => { term => { status => 'backpan' } } - } - }, + bool => { + must_not => [ { term => { status => 'backpan' } }, ], }, }, sort => '_doc', @@ -137,13 +132,21 @@ sub latest_release { sub skip { my ( $self, $author, $archive ) = @_; - return $self->index->type('release')->filter( { - and => [ - { term => { status => 'backpan' } }, - { term => { archive => $archive } }, - { term => { author => $author } }, - ] - } )->raw->count; + return $self->es->count( { + index => $self->index->name, + type => 'release', + body => { + query => { + bool => { + must => [ + { term => { status => 'backpan' } }, + { term => { archive => $archive } }, + { term => { author => $author } }, + ], + }, + }, + }, + } ); } sub index_release { @@ -173,11 +176,13 @@ sub index_release { sub reindex_release { my ( $self, $release ) = @_; my $info = CPAN::DistnameInfo->new( $release->{path} ); - $release = $self->index->type('release')->filter( { - and => [ - { term => { author => $info->cpanid } }, - { term => { archive => $info->filename } }, - ] + $release = $self->index->type('release')->query( { + bool => { + must => [ + { term => { author => $info->cpanid } }, + { term => { archive => $info->filename } }, + ], + }, } )->raw->first; return unless ($release); log_info {"Moving $release->{_source}->{name} to BackPAN"}; @@ -191,23 +196,20 @@ sub reindex_release { fields => [ '_parent', '_source' ], body => { query => { - filtered => { - query => { match_all => {} }, - filter => { - and => [ - { - term => { - 'release' => $release->{_source}->{name} - } - }, - { - term => { - 'author' => $release->{_source}->{author} - } + bool => { + must => [ + { + term => { + release => $release->{_source}->{name} } - ] - } - } + }, + { + term => { + author => $release->{_source}->{author} + } + }, + ], + }, }, sort => '_doc', }, diff --git a/t/model/search.t b/t/model/search.t index 5ce0290a9..6a01ba10e 100644 --- a/t/model/search.t +++ b/t/model/search.t @@ -15,8 +15,7 @@ my $search = MetaCPAN::Model::Search->new( index => 'cpan', ); -ok( $search, 'search' ); -ok( $search->_not_rogue, '_not_rogue' ); +ok( $search, 'search' ); { my $results = $search->search_web('Fooxxxx'); diff --git a/t/release/documentation-hide.t b/t/release/documentation-hide.t index 58cd0fd56..e8c1ca452 100644 --- a/t/release/documentation-hide.t +++ b/t/release/documentation-hide.t @@ -21,12 +21,14 @@ is( $release->main_module, 'Documentation::Hide', 'main_module ok' ); ok( $release->first, 'Release is first' ); { - my @files = $idx->type('file')->filter( { - and => [ - { term => { author => $release->author } }, - { term => { release => $release->name } }, - { exists => { field => 'module.name' } }, - ] + my @files = $idx->type('file')->query( { + bool => { + must => [ + { term => { author => $release->author } }, + { term => { release => $release->name } }, + { exists => { field => 'module.name' } }, + ], + }, } )->all; is( @files, 1, 'includes one file with modules' ); @@ -43,12 +45,14 @@ ok( $release->first, 'Release is first' ); } { - my @files = $idx->type('file')->filter( { - and => [ - { term => { author => $release->author } }, - { term => { release => $release->name } }, - { exists => { field => 'documentation' } } - ] + my @files = $idx->type('file')->query( { + bool => { + must => [ + { term => { author => $release->author } }, + { term => { release => $release->name } }, + { exists => { field => 'documentation' } } + ], + }, } )->all; is( @files, 2, 'two files with documentation' ); } diff --git a/t/release/file-changes.t b/t/release/file-changes.t index 2eeda79ec..654f00e36 100644 --- a/t/release/file-changes.t +++ b/t/release/file-changes.t @@ -21,7 +21,7 @@ is( $release->changes_file, 'Changes', 'changes_file ok' ); { my @files = $idx->type('file') - ->filter( { term => { release => 'File-Changes-1.0' } } )->all; + ->query( { term => { release => 'File-Changes-1.0' } } )->all; my ($changes) = grep { $_->name eq 'Changes' } @files; ok $changes, 'found Changes'; diff --git a/t/release/meta-provides.t b/t/release/meta-provides.t index 5536a0ccf..6f54fdf66 100644 --- a/t/release/meta-provides.t +++ b/t/release/meta-provides.t @@ -20,13 +20,15 @@ test_release( my ($self) = @_; my $release = $self->data; - my @files = $self->index->type('file')->filter( { - and => [ - { term => { 'author' => $release->author } }, - { term => { 'release' => $release->name } }, - { term => { 'directory' => \0 } }, - { prefix => { 'path' => 'lib/' } }, - ] + my @files = $self->index->type('file')->query( { + bool => { + must => [ + { term => { 'author' => $release->author } }, + { term => { 'release' => $release->name } }, + { term => { 'directory' => \0 } }, + { prefix => { 'path' => 'lib/' } }, + ], + }, } )->all; is( @files, 2, 'two files found in lib/' ); diff --git a/t/release/moose.t b/t/release/moose.t index 88ea6aa66..0b9811510 100644 --- a/t/release/moose.t +++ b/t/release/moose.t @@ -7,8 +7,9 @@ use Test::More; my $model = model(); my $idx = $model->index('cpan'); -my @moose = $idx->type('release') - ->filter( { term => { distribution => 'Moose' } } )->all; +my @moose + = $idx->type('release')->query( { term => { distribution => 'Moose' } } ) + ->all; my $first = 0; map { $first++ } grep { $_->first } @moose; @@ -22,7 +23,7 @@ is( $moose[1]->main_module, 'Moose', 'main_module ok' ); ok( my $faq = $idx->type('file') - ->filter( { match_phrase => { documentation => 'Moose::FAQ' } } ) + ->query( { match_phrase => { documentation => 'Moose::FAQ' } } ) ->first, 'get Moose::FAQ' ); @@ -35,7 +36,7 @@ ok( !$faq->binary, 'is not binary' ); ok( my $binary - = $idx->type('file')->filter( { term => { name => 't' } } )->first, + = $idx->type('file')->query( { term => { name => 't' } } )->first, 'get a t/ directory' ); @@ -44,8 +45,7 @@ ok( $binary->binary, 'is binary' ); ok( my $ppport = $idx->type('file') - ->filter( { match_phrase => { documentation => 'ppport.h' } } ) - ->first, + ->query( { match_phrase => { documentation => 'ppport.h' } } )->first, 'get ppport.h' ); @@ -58,29 +58,35 @@ is( $moose->name, 'Moose.pm', 'defined in Moose.pm' ); is( $moose->module->[0]->associated_pod, 'DOY/Moose-0.02/lib/Moose.pm' ); my $signature; -$signature = $idx->type('file')->filter( { - and => [ - { term => { mime => 'text/x-script.perl' } }, - { term => { name => 'SIGNATURE' } } - ] +$signature = $idx->type('file')->query( { + bool => { + must => [ + { term => { mime => 'text/x-script.perl' } }, + { term => { name => 'SIGNATURE' } }, + ], + }, } )->first; ok( !$signature, 'SIGNATURE is not perl code' ); -$signature = $idx->type('file')->filter( { - and => [ - { term => { documentation => 'SIGNATURE' } }, - { term => { mime => 'text/x-script.perl' } }, - { term => { name => 'SIGNATURE' } } - ] +$signature = $idx->type('file')->query( { + bool => { + must => [ + { term => { documentation => 'SIGNATURE' } }, + { term => { mime => 'text/x-script.perl' } }, + { term => { name => 'SIGNATURE' } }, + ], + }, } )->first; ok( !$signature, 'SIGNATURE is not documentation' ); -$signature = $idx->type('file')->filter( { - and => [ - { term => { name => 'SIGNATURE' } }, - { exists => { field => 'documentation' } }, - { term => { indexed => 1 } }, - ] +$signature = $idx->type('file')->query( { + bool => { + must => [ + { term => { name => 'SIGNATURE' } }, + { exists => { field => 'documentation' } }, + { term => { indexed => 1 } }, + ], + }, } )->first; ok( !$signature, 'SIGNATURE is not pod' ); diff --git a/t/release/multiple-modules.t b/t/release/multiple-modules.t index ede71c347..4f39e95ab 100644 --- a/t/release/multiple-modules.t +++ b/t/release/multiple-modules.t @@ -34,12 +34,14 @@ is_deeply( ok( !$release->first, 'Release is not first' ); { - my @files = $idx->type('file')->filter( { - and => [ - { term => { author => $release->author } }, - { term => { release => $release->name } }, - { exists => { field => 'module.name' } }, - ] + my @files = $idx->type('file')->query( { + bool => { + must => [ + { term => { author => $release->author } }, + { term => { release => $release->name } }, + { exists => { field => 'module.name' } }, + ], + }, } )->all; is( @files, 3, 'includes three files with modules' ); @@ -106,11 +108,13 @@ ok $release, 'got older version of release'; ok $release->first, 'this version was first'; ok( - my $file = $idx->type('file')->filter( { - and => [ - { term => { release => 'Multiple-Modules-0.1' } }, - { match_phrase => { documentation => 'Moose' } } - ] + my $file = $idx->type('file')->query( { + bool => { + must => [ + { term => { release => 'Multiple-Modules-0.1' } }, + { match_phrase => { documentation => 'Moose' } }, + ], + }, } )->first, 'get Moose.pm' ); diff --git a/t/release/pm-PL.t b/t/release/pm-PL.t index 293126d23..17c3e220e 100644 --- a/t/release/pm-PL.t +++ b/t/release/pm-PL.t @@ -35,7 +35,7 @@ is( $pm->module->[0]->version, # Verify all the files we expect to be contained in the release. my $files = $idx->type('file') - ->filter( { term => { release => 'uncommon-sense-0.01' }, } ) + ->query( { term => { release => 'uncommon-sense-0.01' } } ) ->raw->size(20)->all->{hits}->{hits}; $files = [ map { $_->{_source} } @$files ]; diff --git a/t/release/prefer-meta-json.t b/t/release/prefer-meta-json.t index f89959c04..11bc7080b 100644 --- a/t/release/prefer-meta-json.t +++ b/t/release/prefer-meta-json.t @@ -22,12 +22,14 @@ is( ref $release->metadata, 'HASH', 'comes with metadata in a hashref' ); is( $release->metadata->{'meta-spec'}{version}, 2, 'meta_spec version is 2' ); { - my @files = $idx->type('file')->filter( { - and => [ - { term => { author => $release->author } }, - { term => { release => $release->name } }, - { exists => { field => 'module.name' } }, - ] + my @files = $idx->type('file')->query( { + bool => { + must => [ + { term => { author => $release->author } }, + { term => { release => $release->name } }, + { exists => { field => 'module.name' } }, + ], + }, } )->all; is( @files, 1, 'includes one file with modules' ); diff --git a/t/release/scripts.t b/t/release/scripts.t index e7490e4ab..880c2cc13 100644 --- a/t/release/scripts.t +++ b/t/release/scripts.t @@ -21,11 +21,13 @@ is( $release->version, '0.01', 'version ok' ); is( $release->main_module, 'Scripts', 'main_module ok' ); { - my @files = $idx->type('file')->filter( { - and => [ - { term => { mime => 'text/x-script.perl' } }, - { term => { distribution => 'Scripts' } } - ] + my @files = $idx->type('file')->query( { + bool => { + must => [ + { term => { mime => 'text/x-script.perl' } }, + { term => { distribution => 'Scripts' } }, + ], + }, } )->all; is( @files, 4, 'four scripts found' ); @files = sort { $a->name cmp $b->name } diff --git a/t/server/controller/bad_request.t b/t/server/controller/bad_request.t index a3890834f..6f536a494 100644 --- a/t/server/controller/bad_request.t +++ b/t/server/controller/bad_request.t @@ -13,20 +13,16 @@ my $test = Plack::Test->create($app); my $sbigqueryjson = q({ "query": { - "query_string": { - "query": "cpanfile" + "bool": { + "must": [ + { "query_string": { + "query": "cpanfile" + } }, + { "term": { "status": "latest" } } + ] } }, - "filter": { - "and": [ - { - "term": { - "status": "latest" - } - } - ] - }, - "fields": ["distribution", "release", "module.name", "name", "path", "download_url"], + "fields": [ "distribution", "release", "module.name", "name", "path", "download_url" ], "size": "5001" });