From 148979421058c59669598468ef39ca32295fd2a2 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 22 Oct 2024 20:59:14 +0200 Subject: [PATCH 01/31] add MetaCPAN::ESConfig module to centralize ES config Centralize Elasticsearch configuration in MetaCPAN::ESConfig. Allow overridden values from the main config file. This module is not meant to have any behavior aside from holding the configuration. --- lib/MetaCPAN/ESConfig.pm | 219 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 lib/MetaCPAN/ESConfig.pm diff --git a/lib/MetaCPAN/ESConfig.pm b/lib/MetaCPAN/ESConfig.pm new file mode 100644 index 000000000..300935f8e --- /dev/null +++ b/lib/MetaCPAN/ESConfig.pm @@ -0,0 +1,219 @@ +use v5.20; +use warnings; +use experimental qw(signatures postderef); + +package MetaCPAN::ESConfig; + +use Carp qw(croak); +use Const::Fast qw(const); +use Exporter qw(import); +use MetaCPAN::Util qw(root_dir); +use Module::Runtime qw(require_module $module_name_rx); +use Cpanel::JSON::XS (); +use Hash::Merge::Simple qw(merge); +use MetaCPAN::Server::Config (); +use Const::Fast qw(const); + +const my %config => merge( + { + aliases => { + 'cpan' => 'cpan_v1_01', + }, + indexes => { + _default => { + settings => + 'MetaCPAN::Script::Mapping::DeployStatement::mapping()', + }, + }, + documents => { + author => { + index => 'cpan_v1_01', + type => 'author', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Author', + model => 'MetaCPAN::Document::Author', + }, + cve => { + index => 'cve', + type => 'cve', + mapping => 'MetaCPAN::Script::Mapping::CVE', + model => 'MetaCPAN::Document::CVE', + }, + contributor => { + index => 'contributor', + type => 'contributor', + mapping => 'MetaCPAN::Script::Mapping::Contributor', + model => 'MetaCPAN::Document::Contributor', + }, + cover => { + index => 'cover', + type => 'cover', + mapping => 'MetaCPAN::Script::Mapping::Cover', + model => 'MetaCPAN::Document::Cover', + }, + distribution => { + index => 'cpan_v1_01', + type => 'distribution', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Distribution', + model => 'MetaCPAN::Document::Distribution', + }, + favorite => { + index => 'cpan_v1_01', + type => 'favorite', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Favorite', + model => 'MetaCPAN::Document::Favorite', + }, + file => { + index => 'cpan_v1_01', + type => 'file', + mapping => 'MetaCPAN::Script::Mapping::CPAN::File', + model => 'MetaCPAN::Document::File', + }, + mirror => { + index => 'cpan_v1_01', + type => 'mirror', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Mirror', + model => 'MetaCPAN::Document::Mirror', + }, + package => { + index => 'cpan_v1_01', + type => 'package', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Package', + model => 'MetaCPAN::Document::Package', + }, + permission => { + index => 'cpan_v1_01', + type => 'permission', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Permission', + model => 'MetaCPAN::Document::Permission', + }, + release => { + index => 'cpan_v1_01', + type => 'release', + mapping => 'MetaCPAN::Script::Mapping::CPAN::Release', + model => 'MetaCPAN::Document::Release', + }, + + account => { + index => 'user', + type => 'account', + mapping => 'MetaCPAN::Script::Mapping::User::Account', + model => 'MetaCPAN::Model::User::Account', + }, + identity => { + index => 'user', + type => 'identity', + mapping => 'MetaCPAN::Script::Mapping::User::Identity', + model => 'MetaCPAN::Model::User::Identity', + }, + session => { + index => 'user', + type => 'session', + mapping => 'MetaCPAN::Script::Mapping::User::Session', + model => 'MetaCPAN::Model::User::Session', + }, + }, + }, + MetaCPAN::Server::Config::config()->{elasticsearch} || {}, +)->%*; + +{ + use Moo; +} + +has indexes => ( + is => 'ro', + required => 1, +); + +has aliases => ( + is => 'ro', + default => sub { {} }, +); + +has documents => ( + is => 'ro', + required => 1, +); + +sub _load_es_data ( $location, $def_sub = 'mapping' ) { + my $data; + + if ( ref $location ) { + $data = $location; + } + elsif ( $location + =~ /\A($module_name_rx)(?:::([0-9a-zA-Z_]+)\(\)|->($module_name_rx))?\z/ + ) + { + my ( $module, $sub, $method ) = ( $1, $2, $3 ); + require_module $module; + if ($method) { + $data = $module->$method; + } + else { + $sub ||= $def_sub; + no strict 'refs'; + my $code = \&{"${module}::${sub}"}; + die "can't find $location" + if !defined &$code; + $data = $code->(); + } + } + else { + my $abs_path = File::Spec->rel2abs( $location, root_dir() ); + open my $fh, '<', $abs_path + or die "can't open mapping file $abs_path: $!"; + $data = do { local $/; <$fh> }; + } + + return $data + if ref $data; + + return Cpanel::JSON::XS::decode_json($data); +} + +sub mapping ( $self, $doc ) { + my $doc_data = $self->documents->{$doc} + or croak "unknown document $doc"; + return _load_es_data( $doc_data->{mapping}, 'mapping' ); +} + +sub index_settings ( $self, $index ) { + my $indexes = $self->indexes; + my $index_data = exists $indexes->{$index} && $indexes->{$index}; + my $settings + = $index_data + && exists $index_data->{settings} + && $index_data->{settings}; + if ( !$settings ) { + my $default_data + = exists $indexes->{_default} && $indexes->{_default}; + $settings + = $default_data + && exists $default_data->{settings} + && $default_data->{settings}; + } + return {} + if !$settings; + return _load_es_data($settings); +} + +sub doc_path ( $self, $doc ) { + my $doc_data = $self->documents->{$doc} + or croak "unknown document $doc"; + return ( + ( $doc_data->{index} ? ( index => $doc_data->{index} ) : () ), + ( $doc_data->{type} ? ( type => $doc_data->{type} ) : () ), + ); +} + +our @EXPORT_OK = qw( + es_config + es_doc_path +); + +my $single = __PACKAGE__->new(%config); +sub es_config : prototype() {$single} +sub es_doc_path ($doc) { $single->doc_path($doc) } + +1; From edb8e78a1f7432fa5ab6c7df687a071265a93fd5 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 22 Oct 2024 21:01:50 +0200 Subject: [PATCH 02/31] adapt Mapping script to use ESConfig module ESConfig knows how to find mapping data. Use it to find the mapping data as well as index configuration. The mapping data should be able to be moved into json files rather than json wrapped in a module. This can happen in the future. --- lib/MetaCPAN/Script/Mapping.pm | 110 +++++++++------------------------ 1 file changed, 29 insertions(+), 81 deletions(-) diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index f1da40695..2986318de 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -2,25 +2,11 @@ package MetaCPAN::Script::Mapping; use Moose; -use Cpanel::JSON::XS qw( decode_json ); -use DateTime (); -use Log::Contextual qw( :log ); -use MetaCPAN::Script::Mapping::Contributor (); -use MetaCPAN::Script::Mapping::Cover (); -use MetaCPAN::Script::Mapping::CPAN::Author (); -use MetaCPAN::Script::Mapping::CPAN::Distribution (); -use MetaCPAN::Script::Mapping::CPAN::Favorite (); -use MetaCPAN::Script::Mapping::CPAN::File (); -use MetaCPAN::Script::Mapping::CPAN::Mirror (); -use MetaCPAN::Script::Mapping::CPAN::Package (); -use MetaCPAN::Script::Mapping::CPAN::Permission (); -use MetaCPAN::Script::Mapping::CPAN::Release (); -use MetaCPAN::Script::Mapping::CVE (); -use MetaCPAN::Script::Mapping::DeployStatement (); -use MetaCPAN::Script::Mapping::User::Account (); -use MetaCPAN::Script::Mapping::User::Identity (); -use MetaCPAN::Script::Mapping::User::Session (); -use MetaCPAN::Types::TypeTiny qw( Bool Str ); +use Cpanel::JSON::XS qw( decode_json ); +use DateTime (); +use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_config ); +use MetaCPAN::Types::TypeTiny qw( Bool Str ); use constant { EXPECTED => 1, @@ -29,13 +15,6 @@ use constant { with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; -has cpan_index => ( - is => 'ro', - isa => Str, - default => 'cpan_v1_01', - documentation => 'real name for the cpan index', -); - has arg_deploy_mapping => ( init_arg => 'delete', is => 'ro', @@ -285,11 +264,9 @@ sub update_index { die "update_index requires patch_mapping\n" unless $self->patch_mapping; - my $patch_mapping = decode_json $self->patch_mapping; - my @patch_types = sort keys %{$patch_mapping}; - my $dep = $self->index->deployment_statement; - my $existing_mapping = delete $dep->{mappings}; - my $mapping = +{ map { $_ => $patch_mapping->{$_} } @patch_types }; + my $patch_mapping = decode_json $self->patch_mapping; + my @patch_types = sort keys %{$patch_mapping}; + my $mapping = +{ map { $_ => $patch_mapping->{$_} } @patch_types }; log_info {"Updating mapping for index: $name"}; @@ -311,15 +288,14 @@ sub create_index { my $dst_idx = $self->arg_create_index; $self->_check_index_exists( $dst_idx, NOT_EXPECTED ); - my $patch_mapping = decode_json $self->patch_mapping; - my @patch_types = sort keys %{$patch_mapping}; - my $dep = $self->index->deployment_statement; - delete $dep->{mappings}; - my $mapping = +{}; + my $patch_mapping = decode_json $self->patch_mapping; + my @patch_types = sort keys %{$patch_mapping}; + my $index_settings = es_config->index_settings($dst_idx); + my $mapping = +{}; # create the new index with the copied settings log_info {"Creating index: $dst_idx"}; - $self->es->indices->create( index => $dst_idx, body => $dep ); + $self->es->indices->create( index => $dst_idx, body => $index_settings ); # override with new type mapping if ( $self->patch_mapping ) { @@ -489,51 +465,25 @@ sub show_info { } sub _build_mapping { - my $self = $_[0]; - return { - $self->cpan_index => { - author => - decode_json(MetaCPAN::Script::Mapping::CPAN::Author::mapping), - distribution => decode_json( - MetaCPAN::Script::Mapping::CPAN::Distribution::mapping), - favorite => decode_json( - MetaCPAN::Script::Mapping::CPAN::Favorite::mapping), - file => - decode_json(MetaCPAN::Script::Mapping::CPAN::File::mapping), - mirror => - decode_json(MetaCPAN::Script::Mapping::CPAN::Mirror::mapping), - permission => decode_json( - MetaCPAN::Script::Mapping::CPAN::Permission::mapping), - package => decode_json( - MetaCPAN::Script::Mapping::CPAN::Package::mapping), - release => decode_json( - MetaCPAN::Script::Mapping::CPAN::Release::mapping), - }, + my $self = $_[0]; + my $docs = es_config->documents; + my $mappings = {}; + for my $name ( sort keys %$docs ) { + my $doc = $docs->{$name}; + my $index = $doc->{index} + or die "no index defined for $name documents"; + my $type = $doc->{type} + or die "no type defined for $name documents"; + my $mapping = es_config->mapping($name); + $mappings->{$index}{$type} = $mapping; + } - user => { - account => decode_json( - MetaCPAN::Script::Mapping::User::Account::mapping), - identity => decode_json( - MetaCPAN::Script::Mapping::User::Identity::mapping), - session => decode_json( - MetaCPAN::Script::Mapping::User::Session::mapping), - }, - contributor => { - contributor => - decode_json(MetaCPAN::Script::Mapping::Contributor::mapping), - }, - cover => { - cover => decode_json(MetaCPAN::Script::Mapping::Cover::mapping), - }, - cve => { - cve => decode_json(MetaCPAN::Script::Mapping::CVE::mapping), - }, - }; + return $mappings; } sub _build_aliases { my $self = $_[0]; - return { 'cpan' => $self->cpan_index }; + es_config->aliases; } sub deploy_mapping { @@ -546,18 +496,16 @@ sub deploy_mapping { # Deserialize the Index Mapping Structure my $rmappings = $self->_build_mapping; - my $deploy_statement - = decode_json(MetaCPAN::Script::Mapping::DeployStatement::mapping); - my $es = $self->es; # recreate the indices and apply the mapping for my $idx ( sort keys %$rmappings ) { $self->_delete_index($idx) if $es->indices->exists( index => $idx ); + my $index_settings = es_config->index_settings($idx); log_info {"Creating index: $idx"}; - $es->indices->create( index => $idx, body => $deploy_statement ); + $es->indices->create( index => $idx, body => $index_settings ); for my $type ( sort keys %{ $rmappings->{$idx} } ) { log_info {"Adding mapping: $idx/$type"}; From 4e2c206d184fc352a92cccfeb14513602a091271 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 22 Oct 2024 21:05:02 +0200 Subject: [PATCH 03/31] remove analysis configuration from MetaCPAN::Model The analysis set in MetaCPAN::Model wasn't used for anything directly, generate the index deployment statements. The index settings we actually use lives in MetaCPAN::Script::Mapping::DeployStatement, so the declarations in MetaCPAN::Model were used for nothing. --- lib/MetaCPAN/Model.pm | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/lib/MetaCPAN/Model.pm b/lib/MetaCPAN/Model.pm index 3272711f8..a239f5da6 100644 --- a/lib/MetaCPAN/Model.pm +++ b/lib/MetaCPAN/Model.pm @@ -5,43 +5,6 @@ use Moose; use ElasticSearchX::Model; -analyzer lowercase => ( - tokenizer => 'keyword', - filter => 'lowercase', -); - -analyzer fulltext => ( type => 'english' ); - -tokenizer camelcase => ( - type => 'pattern', - pattern => - "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])" -); - -filter edge => ( - type => 'edge_ngram', - min_gram => 1, - max_gram => 20 -); - -analyzer camelcase => ( - type => 'custom', - tokenizer => 'camelcase', - filter => [ 'lowercase', 'unique' ] -); - -analyzer edge_camelcase => ( - type => 'custom', - tokenizer => 'camelcase', - filter => [ 'lowercase', 'edge' ] -); - -analyzer edge => ( - type => 'custom', - tokenizer => 'standard', - filter => [ 'lowercase', 'edge' ] -); - index cpan => ( namespace => 'MetaCPAN::Document', alias_for => 'cpan_v1_01', From 1ce41ec609e7c351ee94b6805851861b2a064aff Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 14:04:48 +0200 Subject: [PATCH 04/31] explicitly load modules for ES sub types --- lib/MetaCPAN/Document/Author.pm | 7 ++++--- lib/MetaCPAN/Document/Release.pm | 5 +++-- lib/MetaCPAN/Model/User/Account.pm | 7 ++++--- lib/MetaCPAN/Types/Internal.pm | 27 ++++++++++++++++++++------- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/lib/MetaCPAN/Document/Author.pm b/lib/MetaCPAN/Document/Author.pm index 48296bb18..b0fe6b446 100644 --- a/lib/MetaCPAN/Document/Author.pm +++ b/lib/MetaCPAN/Document/Author.pm @@ -7,9 +7,10 @@ use ElasticSearchX::Model::Document::Types qw( Location ); use ElasticSearchX::Model::Document; # load order not important -use Gravatar::URL (); -use MetaCPAN::Types qw( ESBool Profile ); -use MetaCPAN::Types::TypeTiny qw( +use Gravatar::URL (); +use MetaCPAN::Document::Author::Profile (); +use MetaCPAN::Types qw( ESBool Profile ); +use MetaCPAN::Types::TypeTiny qw( ArrayRef ArrayRefPromote Blog diff --git a/lib/MetaCPAN/Document/Release.pm b/lib/MetaCPAN/Document/Release.pm index cdc5a0e1b..c4e4e2998 100644 --- a/lib/MetaCPAN/Document/Release.pm +++ b/lib/MetaCPAN/Document/Release.pm @@ -3,8 +3,9 @@ package MetaCPAN::Document::Release; use Moose; use ElasticSearchX::Model::Document; -use MetaCPAN::Types qw( ESBool Dependency ); -use MetaCPAN::Types::TypeTiny qw( +use MetaCPAN::Document::Dependency (); +use MetaCPAN::Types qw( ESBool Dependency ); +use MetaCPAN::Types::TypeTiny qw( ArrayRef HashRefCPANMeta Num diff --git a/lib/MetaCPAN/Model/User/Account.pm b/lib/MetaCPAN/Model/User/Account.pm index eb2013f79..9307d28c9 100644 --- a/lib/MetaCPAN/Model/User/Account.pm +++ b/lib/MetaCPAN/Model/User/Account.pm @@ -6,9 +6,10 @@ use warnings; use Moose; use ElasticSearchX::Model::Document; -use MetaCPAN::Types qw( ESBool Identity ); -use MetaCPAN::Types::TypeTiny qw( ArrayRef Dict Str ); -use MetaCPAN::Util qw(true false); +use MetaCPAN::Model::User::Identity (); +use MetaCPAN::Types qw( ESBool Identity ); +use MetaCPAN::Types::TypeTiny qw( ArrayRef Dict Str ); +use MetaCPAN::Util qw(true false); =head1 PROPERTIES diff --git a/lib/MetaCPAN/Types/Internal.pm b/lib/MetaCPAN/Types/Internal.pm index d48e4fd8b..af291a563 100644 --- a/lib/MetaCPAN/Types/Internal.pm +++ b/lib/MetaCPAN/Types/Internal.pm @@ -19,13 +19,18 @@ use MooseX::Types -declare => [ qw( subtype Module, as ArrayRef [ Type ['MetaCPAN::Document::Module'] ]; coerce Module, from ArrayRef, via { + require MetaCPAN::Document::Module; [ map { ref $_ eq 'HASH' ? MetaCPAN::Document::Module->new($_) : $_ } @$_ ]; }; -coerce Module, from HashRef, via { [ MetaCPAN::Document::Module->new($_) ] }; +coerce Module, from HashRef, via { + require MetaCPAN::Document::Module; + [ MetaCPAN::Document::Module->new($_) ]; +}; subtype Identity, as ArrayRef [ Type ['MetaCPAN::Model::User::Identity'] ]; coerce Identity, from ArrayRef, via { + require MetaCPAN::Model::User::Identity; [ map { ref $_ eq 'HASH' @@ -34,11 +39,14 @@ coerce Identity, from ArrayRef, via { } @$_ ]; }; -coerce Identity, from HashRef, - via { [ MetaCPAN::Model::User::Identity->new($_) ] }; +coerce Identity, from HashRef, via { + require MetaCPAN::Model::User::Identity; + [ MetaCPAN::Model::User::Identity->new($_) ]; +}; subtype Dependency, as ArrayRef [ Type ['MetaCPAN::Document::Dependency'] ]; coerce Dependency, from ArrayRef, via { + require MetaCPAN::Document::Dependency; [ map { ref $_ eq 'HASH' @@ -47,11 +55,14 @@ coerce Dependency, from ArrayRef, via { } @$_ ]; }; -coerce Dependency, from HashRef, - via { [ MetaCPAN::Document::Dependency->new($_) ] }; +coerce Dependency, from HashRef, via { + require MetaCPAN::Document::Dependency; + [ MetaCPAN::Document::Dependency->new($_) ]; +}; subtype Profile, as ArrayRef [ Type ['MetaCPAN::Document::Author::Profile'] ]; coerce Profile, from ArrayRef, via { + require MetaCPAN::Document::Author::Profile; [ map { ref $_ eq 'HASH' @@ -60,8 +71,10 @@ coerce Profile, from ArrayRef, via { } @$_ ]; }; -coerce Profile, from HashRef, - via { [ MetaCPAN::Document::Author::Profile->new($_) ] }; +coerce Profile, from HashRef, via { + require MetaCPAN::Document::Author::Profile; + [ MetaCPAN::Document::Author::Profile->new($_) ]; +}; MooseX::Getopt::OptionTypeMap->add_option_type_to_map( 'MooseX::Types::ElasticSearch::ES' => '=s' ); From 94c84ea68b6d1ccd3a8f2cee55658cb0ec2ee2cc Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Tue, 22 Oct 2024 21:08:13 +0200 Subject: [PATCH 05/31] configure MetaCPAN::Model via ESConfig Rather than searching for modules on disk, use the explicit configuration in ESConfig to configure MetaCPAN::Model. --- lib/MetaCPAN/Model.pm | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/lib/MetaCPAN/Model.pm b/lib/MetaCPAN/Model.pm index a239f5da6..500bc63b1 100644 --- a/lib/MetaCPAN/Model.pm +++ b/lib/MetaCPAN/Model.pm @@ -4,14 +4,37 @@ package MetaCPAN::Model; use Moose; use ElasticSearchX::Model; +use MetaCPAN::ESConfig qw(es_config); +use Module::Runtime qw(require_module); -index cpan => ( - namespace => 'MetaCPAN::Document', - alias_for => 'cpan_v1_01', - shards => 3 -); +my %indexes; +my $docs = es_config->documents; +for my $name ( sort keys %$docs ) { + my $doc = $docs->{$name}; + my $model = $doc->{model} + or next; + require_module($model); + my $index = $doc->{index} + or die "no index for $name documents!"; -index user => ( namespace => 'MetaCPAN::Model::User' ); + $indexes{$index}{types}{$name} = $model->meta; +} + +my $aliases = es_config->aliases; +for my $alias ( sort keys %$aliases ) { + my $index = $aliases->{$alias}; + my $index_data = $indexes{$index} + or die "unknown index $index"; + if ( $index_data->{alias_for} ) { + die "duplicate alias for $index"; + } + $index_data->{alias_for} = $index; + $indexes{$alias} = delete $indexes{$index}; +} + +for my $index ( sort keys %indexes ) { + index $index => %{ $indexes{$index} }; +} __PACKAGE__->meta->make_immutable; 1; From 491e640c8595ba1db7125395327ac43104626768 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 10:13:50 +0200 Subject: [PATCH 06/31] check for compilation errors in document set modules ElasticSearchX::Model ignores all errors in a ::Set package can't be loaded, and uses a generic ElasticSearchX::Model::Document::Set object. It's fine for the module to be missing, but compilation errors should be reported. --- lib/MetaCPAN/Model.pm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/MetaCPAN/Model.pm b/lib/MetaCPAN/Model.pm index 500bc63b1..341d67b69 100644 --- a/lib/MetaCPAN/Model.pm +++ b/lib/MetaCPAN/Model.pm @@ -5,7 +5,7 @@ use Moose; use ElasticSearchX::Model; use MetaCPAN::ESConfig qw(es_config); -use Module::Runtime qw(require_module); +use Module::Runtime qw(require_module use_package_optimistically); my %indexes; my $docs = es_config->documents; @@ -14,6 +14,7 @@ for my $name ( sort keys %$docs ) { my $model = $doc->{model} or next; require_module($model); + use_package_optimistically( $model . '::Set' ); my $index = $doc->{index} or die "no index for $name documents!"; From aff19bf73056d6fea8b13b58552a08d91675e54f Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 02:08:42 +0200 Subject: [PATCH 07/31] disable critic rule prohibiting prototypes PPI and thus Perl::Critic don't understand signatures, so the rule ends up prohibiting signatures as well --- .perlcriticrc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.perlcriticrc b/.perlcriticrc index 2b6db72b5..578566da8 100644 --- a/.perlcriticrc +++ b/.perlcriticrc @@ -16,6 +16,9 @@ theme = core [-ValuesAndExpressions::ProhibitNoisyQuotes] [-Variables::ProhibitPunctuationVars] +# doesn't understand signatures +[-Subroutines::ProhibitSubroutinePrototypes] + [CodeLayout::RequireTrailingCommas] severity = 4 From 967b34ec00e3d1d530a0548d4101e3750fbffdc6 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 02:58:37 +0200 Subject: [PATCH 08/31] add doc method to MetaCPAN::Model Allows getting a "type" object from a document name rather than needing to specify an index and type. --- lib/MetaCPAN/Model.pm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/MetaCPAN/Model.pm b/lib/MetaCPAN/Model.pm index 341d67b69..52a0cfdea 100644 --- a/lib/MetaCPAN/Model.pm +++ b/lib/MetaCPAN/Model.pm @@ -37,6 +37,12 @@ for my $index ( sort keys %indexes ) { index $index => %{ $indexes{$index} }; } +sub doc { + my ( $self, $doc ) = @_; + my $doc_config = es_config->documents->{$doc}; + return $self->index( $doc_config->{index} )->type( $doc_config->{type} ); +} + __PACKAGE__->meta->make_immutable; 1; From f9f7dea2201f02ae86fa6d1e81379019c92945a5 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:50:41 +0200 Subject: [PATCH 09/31] mapping script: require specifying a source index if copying --- lib/MetaCPAN/Script/Mapping.pm | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index 2986318de..17997d4f3 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -86,6 +86,12 @@ has skip_existing_mapping => ( documentation => 'do NOT copy mappings other than patch_mapping', ); +has copy_from_index => ( + is => 'ro', + isa => Str, + documentation => 'index to copy type from', +); + has copy_to_index => ( is => 'ro', isa => Str, @@ -336,9 +342,13 @@ sub create_index { sub copy_type { my ( $self, $index, $type ) = @_; - $index //= $self->copy_to_index; + my $from_index = $self->copy_from_index + or die "can't copy without a source index"; + $index //= $self->copy_to_index + or die "can't copy without a destination index"; - $self->_check_index_exists( $index, EXPECTED ); + $self->_check_index_exists( $from_index, EXPECTED ); + $self->_check_index_exists( $index, EXPECTED ); $type //= $self->arg_copy_type; $type or die "can't copy without a type\n"; @@ -358,7 +368,7 @@ sub copy_type { }; } - return $self->_copy_slice( $query, $index, $type ) if $query; + return $self->_copy_slice( $query, $from_index, $index, $type ) if $query; # else ... do copy by monthly slices @@ -374,7 +384,7 @@ sub copy_type { log_info {"copying data for month: $gte"}; eval { - $self->_copy_slice( $q, $index, $type ); + $self->_copy_slice( $q, $from_index, $index, $type ); 1; } or do { my $err = $@ || 'zombie error'; @@ -384,12 +394,12 @@ sub copy_type { } sub _copy_slice { - my ( $self, $query, $index, $type ) = @_; + my ( $self, $query, $from_index, $index, $type ) = @_; my $scroll = $self->es->scroll_helper( size => 250, scroll => '10m', - index => $self->index->name, + index => $from_index, type => $type, body => { query => $query, From a2e9202c994c33f871200d9608fa79dcfff37046 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 07:37:09 +0200 Subject: [PATCH 10/31] Scripts: refresh all indices Previously when trying to refresh indices, scripts would call $self->index->refresh. This would refresh the "currently used" index. That doesn't make any sense when splitting each type into its own index. This was also using ElasticSearchX::Model, which we want to get rid of. Instead, call ->indices->refresh via the Search::Elasticsearch object. This will refresh all indices, which is fine for our purposes. In the future, we could consider being more selective about which indices we are refreshing, but this is no worse than the old behavior. --- lib/MetaCPAN/Script/Author.pm | 4 ++-- lib/MetaCPAN/Script/CPANTesters.pm | 2 +- lib/MetaCPAN/Script/CPANTestersAPI.pm | 2 +- lib/MetaCPAN/Script/Favorite.pm | 2 +- lib/MetaCPAN/Script/Latest.pm | 4 ++-- lib/MetaCPAN/Script/Mirrors.pm | 2 +- lib/MetaCPAN/Script/Package.pm | 2 +- lib/MetaCPAN/Script/Permission.pm | 2 +- lib/MetaCPAN/Script/Purge.pm | 2 +- lib/MetaCPAN/Script/Release.pm | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/MetaCPAN/Script/Author.pm b/lib/MetaCPAN/Script/Author.pm index 7abf621fa..0a1cd0279 100644 --- a/lib/MetaCPAN/Script/Author.pm +++ b/lib/MetaCPAN/Script/Author.pm @@ -45,7 +45,7 @@ sub run { # ) unless $index =~ /author/; $self->index_authors; - $self->index->refresh; + $self->es->indices->refresh; } my @author_config_fields = qw( @@ -167,7 +167,7 @@ sub index_authors { } $bulk->flush; - $self->index->refresh; + $self->es->indices->refresh; $self->perform_purges; diff --git a/lib/MetaCPAN/Script/CPANTesters.pm b/lib/MetaCPAN/Script/CPANTesters.pm index 3a6547290..9a33d8f5b 100644 --- a/lib/MetaCPAN/Script/CPANTesters.pm +++ b/lib/MetaCPAN/Script/CPANTesters.pm @@ -65,7 +65,7 @@ sub _build_db { sub run { my $self = shift; $self->index_reports; - $self->index->refresh; + $self->es->indices->refresh; } sub index_reports { diff --git a/lib/MetaCPAN/Script/CPANTestersAPI.pm b/lib/MetaCPAN/Script/CPANTestersAPI.pm index 959c909a7..957c51751 100644 --- a/lib/MetaCPAN/Script/CPANTestersAPI.pm +++ b/lib/MetaCPAN/Script/CPANTestersAPI.pm @@ -45,7 +45,7 @@ has _bulk => ( sub run { my $self = shift; $self->index_reports; - $self->index->refresh; + $self->es->indices->refresh; } sub index_reports { diff --git a/lib/MetaCPAN/Script/Favorite.pm b/lib/MetaCPAN/Script/Favorite.pm index d59f2401f..59b884756 100644 --- a/lib/MetaCPAN/Script/Favorite.pm +++ b/lib/MetaCPAN/Script/Favorite.pm @@ -70,7 +70,7 @@ sub run { } $self->index_favorites; - $self->index->refresh; + $self->es->indices->refresh; } sub index_favorites { diff --git a/lib/MetaCPAN/Script/Latest.pm b/lib/MetaCPAN/Script/Latest.pm index 345785d0e..286c9c5be 100644 --- a/lib/MetaCPAN/Script/Latest.pm +++ b/lib/MetaCPAN/Script/Latest.pm @@ -63,7 +63,7 @@ sub run { } my $p = $self->packages; - $self->index->refresh; + $self->es->indices->refresh; # If a distribution name is passed get all the package names # from 02packages that match that distribution so we can limit @@ -251,7 +251,7 @@ sub run { $self->reindex( $bulk, $file_data, 'cpan' ); } $bulk->flush; - $self->index->refresh; + $self->es->indices->refresh; # Call Fastly to purge $self->purge_cpan_distnameinfos( [ diff --git a/lib/MetaCPAN/Script/Mirrors.pm b/lib/MetaCPAN/Script/Mirrors.pm index 6a0bcf7cc..a2cf03452 100644 --- a/lib/MetaCPAN/Script/Mirrors.pm +++ b/lib/MetaCPAN/Script/Mirrors.pm @@ -12,7 +12,7 @@ with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; sub run { my $self = shift; $self->index_mirrors; - $self->index->refresh; + $self->es->indices->refresh; } sub index_mirrors { diff --git a/lib/MetaCPAN/Script/Package.pm b/lib/MetaCPAN/Script/Package.pm index 708f986db..05d293482 100644 --- a/lib/MetaCPAN/Script/Package.pm +++ b/lib/MetaCPAN/Script/Package.pm @@ -25,7 +25,7 @@ has clean_up => ( sub run { my $self = shift; $self->index_packages; - $self->index->refresh; + $self->es->indices->refresh; } sub _get_02packages_fh { diff --git a/lib/MetaCPAN/Script/Permission.pm b/lib/MetaCPAN/Script/Permission.pm index b2cc0a925..1167beaf7 100644 --- a/lib/MetaCPAN/Script/Permission.pm +++ b/lib/MetaCPAN/Script/Permission.pm @@ -25,7 +25,7 @@ has clean_up => ( sub run { my $self = shift; $self->index_permissions; - $self->index->refresh; + $self->es->indices->refresh; } sub index_permissions { diff --git a/lib/MetaCPAN/Script/Purge.pm b/lib/MetaCPAN/Script/Purge.pm index afa80d5b7..57a3503bf 100644 --- a/lib/MetaCPAN/Script/Purge.pm +++ b/lib/MetaCPAN/Script/Purge.pm @@ -135,7 +135,7 @@ sub run { } } - $self->index->refresh; + $self->es->indices->refresh; } sub purge_author_releases { diff --git a/lib/MetaCPAN/Script/Release.pm b/lib/MetaCPAN/Script/Release.pm index 0c75dfcb3..563d26a6e 100644 --- a/lib/MetaCPAN/Script/Release.pm +++ b/lib/MetaCPAN/Script/Release.pm @@ -254,7 +254,7 @@ sub run { }; } } - $self->index->refresh unless $self->queue; + $self->es->indices->refresh unless $self->queue; # Call Fastly to purge $self->purge_cpan_distnameinfos( \@module_to_purge_dists ); From 2f5b7d1396fd85218e20bc923cf239b3f3a94c72 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 08:00:30 +0200 Subject: [PATCH 11/31] find ESXM types via model rather than index Rather than using the same index to find other types, find them via the model. This means the types don't need to be in the same index. --- lib/MetaCPAN/Document/Distribution.pm | 2 +- lib/MetaCPAN/Document/Release.pm | 2 +- lib/MetaCPAN/Model/Release.pm | 9 ++++----- lib/MetaCPAN/Model/User/Account.pm | 7 +++---- lib/MetaCPAN/Script/First.pm | 2 +- lib/MetaCPAN/Script/Latest.pm | 2 +- lib/MetaCPAN/Script/Mirrors.pm | 2 +- lib/MetaCPAN/Script/Release.pm | 4 ++-- lib/MetaCPAN/Script/Role/Contributor.pm | 2 +- lib/MetaCPAN/Script/Role/External/Debian.pm | 2 +- lib/MetaCPAN/Script/Tickets.pm | 2 +- lib/MetaCPAN/Script/Watcher.pm | 4 ++-- t/lib/MetaCPAN/Tests/Model.pm | 15 ++------------- t/lib/MetaCPAN/Tests/Release.pm | 2 +- t/release/documentation-hide.t | 7 +++---- t/release/file-changes.t | 5 ++--- t/release/meta-provides.t | 2 +- t/release/moose.t | 21 ++++++++++----------- t/release/multiple-modules.t | 9 ++++----- t/release/perl-changes-file.t | 3 +-- t/release/pm-PL.t | 7 +++---- t/release/pod-pm.t | 4 ++-- t/release/prefer-meta-json.t | 5 ++--- t/release/scripts.t | 5 ++--- t/release/some-trial.t | 3 +-- t/release/versions.t | 3 +-- 26 files changed, 54 insertions(+), 77 deletions(-) diff --git a/lib/MetaCPAN/Document/Distribution.pm b/lib/MetaCPAN/Document/Distribution.pm index c2a19f0e4..64c178664 100644 --- a/lib/MetaCPAN/Document/Distribution.pm +++ b/lib/MetaCPAN/Document/Distribution.pm @@ -40,7 +40,7 @@ has river => ( sub releases { my $self = shift; - return $self->index->type("release") + return $self->index->model->doc("release") ->query( { term => { "distribution" => $self->name } } ); } diff --git a/lib/MetaCPAN/Document/Release.pm b/lib/MetaCPAN/Document/Release.pm index c4e4e2998..e1a477b9d 100644 --- a/lib/MetaCPAN/Document/Release.pm +++ b/lib/MetaCPAN/Document/Release.pm @@ -278,7 +278,7 @@ sub _build_download_url { sub set_first { my $self = shift; - my $is_first = $self->index->type('release')->query( { + my $is_first = $self->index->model->doc('release')->query( { bool => { must => [ { term => { distribution => $self->distribution } }, diff --git a/lib/MetaCPAN/Model/Release.pm b/lib/MetaCPAN/Model/Release.pm index c5f06fff6..932c754c7 100644 --- a/lib/MetaCPAN/Model/Release.pm +++ b/lib/MetaCPAN/Model/Release.pm @@ -84,7 +84,7 @@ has date => ( }, ); -has index => ( is => 'ro' ); +has model => ( is => 'ro' ); has metadata => ( is => 'ro', @@ -222,8 +222,7 @@ sub _build_document { || $document->{abstract} eq 'null' ); $document - = $self->index->type('release') - ->put( $document, { refresh => true } ); + = $self->model->doc('release')->put( $document, { refresh => true } ); # create distribution if doesn't exist my $dist_count = $self->es->count( @@ -232,7 +231,7 @@ sub _build_document { body => { query => { term => { name => $self->distribution } } }, ); if ( !$dist_count->{count} ) { - $self->index->type('distribution') + $self->model->doc('distribution') ->put( { name => $self->distribution }, { create => 1 } ); } return $document; @@ -333,7 +332,7 @@ sub _build_files { my @files; log_debug { 'Indexing ', scalar @{ $self->archive->files }, ' files' }; - my $file_set = $self->index->type('file'); + my $file_set = $self->model->doc('file'); my $extract_dir = $self->extract; File::Find::find( diff --git a/lib/MetaCPAN/Model/User/Account.pm b/lib/MetaCPAN/Model/User/Account.pm index 9307d28c9..2f8f72ecd 100644 --- a/lib/MetaCPAN/Model/User/Account.pm +++ b/lib/MetaCPAN/Model/User/Account.pm @@ -121,8 +121,8 @@ after add_identity => sub { my ( $self, $identity ) = @_; if ( $identity->{name} eq 'pause' ) { $self->clear_looks_human; - my $profile = $self->index->model->index('cpan')->type('author') - ->get( $identity->{key} ); + my $profile + = $self->index->model->doc('author')->get( $identity->{key} ); # Not every user is an author if ($profile) { @@ -157,8 +157,7 @@ sub remove_identity { @$ids = grep { $_->{name} ne $identity } @$ids; if ( $identity eq 'pause' ) { - my $profile = $self->index->model->index('cpan')->type('author') - ->get( $id->{key} ); + my $profile = $self->index->model->doc('author')->get( $id->{key} ); if ( $profile && $profile->user eq $self->id ) { $profile->_clear_user; diff --git a/lib/MetaCPAN/Script/First.pm b/lib/MetaCPAN/Script/First.pm index d3e57b026..71b2c3a79 100644 --- a/lib/MetaCPAN/Script/First.pm +++ b/lib/MetaCPAN/Script/First.pm @@ -17,7 +17,7 @@ has distribution => ( sub run { my $self = shift; - my $distributions = $self->index->type("distribution"); + my $distributions = $self->model->doc("distribution"); $distributions = $distributions->query( { term => { name => $self->distribution } } ) if $self->distribution; diff --git a/lib/MetaCPAN/Script/Latest.pm b/lib/MetaCPAN/Script/Latest.pm index 286c9c5be..73dff3edd 100644 --- a/lib/MetaCPAN/Script/Latest.pm +++ b/lib/MetaCPAN/Script/Latest.pm @@ -263,7 +263,7 @@ sub reindex { my ( $self, $bulk, $source, $status ) = @_; # Update the status on the release. - my $release = $self->index->type('release')->get( { + my $release = $self->model->doc('release')->get( { author => $source->{author}, name => $source->{release}, } ); diff --git a/lib/MetaCPAN/Script/Mirrors.pm b/lib/MetaCPAN/Script/Mirrors.pm index a2cf03452..3aa80b530 100644 --- a/lib/MetaCPAN/Script/Mirrors.pm +++ b/lib/MetaCPAN/Script/Mirrors.pm @@ -20,7 +20,7 @@ sub index_mirrors { log_info { 'Getting mirrors.json file from ' . $self->cpan }; my $json = $self->cpan->child( 'indices', 'mirrors.json' )->slurp; - my $type = $self->index->type('mirror'); + my $type = $self->model->doc('mirror'); # Clear out everything in the index # so don't end up with old mirrors diff --git a/lib/MetaCPAN/Script/Release.pm b/lib/MetaCPAN/Script/Release.pm index 563d26a6e..f2969c56b 100644 --- a/lib/MetaCPAN/Script/Release.pm +++ b/lib/MetaCPAN/Script/Release.pm @@ -270,7 +270,7 @@ sub _get_release_model { bulk => $bulk, distinfo => $d, file => $archive_path, - index => $self->index, + model => $self->model, level => $self->level, logger => $self->logger, status => $self->detect_status( $d->cpanid, $d->filename ), @@ -285,7 +285,7 @@ sub import_archive { my $self = shift; my $archive_path = shift; - my $bulk = $self->index->bulk( size => $self->_bulk_size ); + my $bulk = $self->model->bulk( size => $self->_bulk_size ); my $model = $self->_get_release_model( $archive_path, $bulk ); log_debug {'Gathering modules'}; diff --git a/lib/MetaCPAN/Script/Role/Contributor.pm b/lib/MetaCPAN/Script/Role/Contributor.pm index 3802eff9e..95529a89e 100644 --- a/lib/MetaCPAN/Script/Role/Contributor.pm +++ b/lib/MetaCPAN/Script/Role/Contributor.pm @@ -10,7 +10,7 @@ sub get_cpan_author_contributors { my @ret; my $es = $self->es; - my $type = $self->index->type('release'); + my $type = $self->model->doc('release'); my $data; eval { $data = $type->get_contributors( $author, $release ); diff --git a/lib/MetaCPAN/Script/Role/External/Debian.pm b/lib/MetaCPAN/Script/Role/External/Debian.pm index 2544faf9e..5bae00f1f 100644 --- a/lib/MetaCPAN/Script/Role/External/Debian.pm +++ b/lib/MetaCPAN/Script/Role/External/Debian.pm @@ -98,7 +98,7 @@ sub dist_for_debian { my $query = { term => { 'distribution.lowercase' => $alias{$1} // $1 } }; - my $res = $self->index->type('release')->query($query) + my $res = $self->model->doc('release')->query($query) ->sort( [ { date => { order => "desc" } } ] )->raw->first; return $res->{_source}{distribution} diff --git a/lib/MetaCPAN/Script/Tickets.pm b/lib/MetaCPAN/Script/Tickets.pm index 7318cd07b..7dc231a2b 100644 --- a/lib/MetaCPAN/Script/Tickets.pm +++ b/lib/MetaCPAN/Script/Tickets.pm @@ -108,7 +108,7 @@ sub index_github_bugs { log_debug {'Fetching GitHub issues'}; my $scroll - = $self->index->type('release')->find_github_based->scroll('5m'); + = $self->model->doc('release')->find_github_based->scroll('5m'); log_debug { sprintf( "Found %s repos", $scroll->total ) }; my %summary; diff --git a/lib/MetaCPAN/Script/Watcher.pm b/lib/MetaCPAN/Script/Watcher.pm index 2caf4aa5c..fffb6770b 100644 --- a/lib/MetaCPAN/Script/Watcher.pm +++ b/lib/MetaCPAN/Script/Watcher.pm @@ -126,7 +126,7 @@ sub backpan_changes { sub latest_release { my $self = shift; return undef if ( $self->backpan ); - return $self->index->type('release') + return $self->model->doc('release') ->sort( [ { 'date' => { order => "desc" } } ] )->first; } @@ -157,7 +157,7 @@ sub index_release { sub reindex_release { my ( $self, $release ) = @_; my $info = CPAN::DistnameInfo->new( $release->{path} ); - $release = $self->index->type('release')->query( { + $release = $self->model->doc('release')->query( { bool => { must => [ { term => { author => $info->cpanid } }, diff --git a/t/lib/MetaCPAN/Tests/Model.pm b/t/lib/MetaCPAN/Tests/Model.pm index 86a384c79..1a9f8e799 100644 --- a/t/lib/MetaCPAN/Tests/Model.pm +++ b/t/lib/MetaCPAN/Tests/Model.pm @@ -40,24 +40,13 @@ has _type => ( builder => '_build_type', ); -has _model => ( +has model => ( is => 'ro', isa => 'MetaCPAN::Model', lazy => 1, default => sub { MetaCPAN::Server::Test::model() }, ); -has _es_index_name => ( - is => 'ro', - isa => Str, - default => 'cpan', -); - -sub index { - my ($self) = @_; - return $self->_model->index( $self->_es_index_name ); -} - has search => ( is => 'ro', isa => ArrayRef, @@ -68,7 +57,7 @@ has search => ( sub _do_search { my ($self) = @_; my ( $method, @params ) = @{ $self->search }; - return $self->index->type( $self->_type )->$method(@params); + return $self->model->doc( $self->_type )->$method(@params); } has data => ( diff --git a/t/lib/MetaCPAN/Tests/Release.pm b/t/lib/MetaCPAN/Tests/Release.pm index 0ba2bdeff..4ae03ea10 100644 --- a/t/lib/MetaCPAN/Tests/Release.pm +++ b/t/lib/MetaCPAN/Tests/Release.pm @@ -118,7 +118,7 @@ sub filter_files { my $release = $self->data; return [ - $self->index->type('file')->query( { + $self->model->doc('file')->query( { bool => { must => [ { term => { 'author' => $release->author } }, diff --git a/t/release/documentation-hide.t b/t/release/documentation-hide.t index e8c1ca452..a9cfb4bd3 100644 --- a/t/release/documentation-hide.t +++ b/t/release/documentation-hide.t @@ -6,8 +6,7 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'MO', name => 'Documentation-Hide-0.01' } ); @@ -21,7 +20,7 @@ is( $release->main_module, 'Documentation::Hide', 'main_module ok' ); ok( $release->first, 'Release is first' ); { - my @files = $idx->type('file')->query( { + my @files = $model->doc('file')->query( { bool => { must => [ { term => { author => $release->author } }, @@ -45,7 +44,7 @@ ok( $release->first, 'Release is first' ); } { - my @files = $idx->type('file')->query( { + my @files = $model->doc('file')->query( { bool => { must => [ { term => { author => $release->author } }, diff --git a/t/release/file-changes.t b/t/release/file-changes.t index 654f00e36..2de772d55 100644 --- a/t/release/file-changes.t +++ b/t/release/file-changes.t @@ -6,8 +6,7 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'LOCAL', name => 'File-Changes-1.0' } ); @@ -20,7 +19,7 @@ is( $release->changes_file, 'Changes', 'changes_file ok' ); { my @files - = $idx->type('file') + = $model->doc('file') ->query( { term => { release => 'File-Changes-1.0' } } )->all; my ($changes) = grep { $_->name eq 'Changes' } @files; diff --git a/t/release/meta-provides.t b/t/release/meta-provides.t index ae73bd13b..df58fd024 100644 --- a/t/release/meta-provides.t +++ b/t/release/meta-provides.t @@ -21,7 +21,7 @@ test_release( my ($self) = @_; my $release = $self->data; - my @files = $self->index->type('file')->query( { + my @files = $self->model->doc('file')->query( { bool => { must => [ { term => { 'author' => $release->author } }, diff --git a/t/release/moose.t b/t/release/moose.t index 1454b41a9..f3b28a03d 100644 --- a/t/release/moose.t +++ b/t/release/moose.t @@ -7,10 +7,9 @@ use MetaCPAN::Util qw( true false hit_total ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); my @moose - = $idx->type('release')->query( { term => { distribution => 'Moose' } } ) - ->all; + = $model->doc('release') + ->query( { term => { distribution => 'Moose' } } )->all; my $first = 0; map { $first++ } grep { $_->first } @moose; @@ -23,7 +22,7 @@ is( $moose[1]->main_module, 'Moose', 'main_module ok' ); ok( my $faq - = $idx->type('file') + = $model->doc('file') ->query( { match_phrase => { documentation => 'Moose::FAQ' } } ) ->first, 'get Moose::FAQ' @@ -37,7 +36,7 @@ ok( !$faq->binary, 'is not binary' ); ok( my $binary - = $idx->type('file')->query( { term => { name => 't' } } )->first, + = $model->doc('file')->query( { term => { name => 't' } } )->first, 'get a t/ directory' ); @@ -45,21 +44,21 @@ ok( $binary->binary, 'is binary' ); ok( my $ppport - = $idx->type('file') + = $model->doc('file') ->query( { match_phrase => { documentation => 'ppport.h' } } )->first, 'get ppport.h' ); is( $ppport->name, 'ppphdoc', 'name doesn\'t contain a dot' ); -ok( my $moose = $idx->type('file')->find('Moose'), 'find Moose module' ); +ok( my $moose = $model->doc('file')->find('Moose'), 'find Moose module' ); is( $moose->name, 'Moose.pm', 'defined in Moose.pm' ); is( $moose->module->[0]->associated_pod, 'DOY/Moose-0.02/lib/Moose.pm' ); my $signature; -$signature = $idx->type('file')->query( { +$signature = $model->doc('file')->query( { bool => { must => [ { term => { mime => 'text/x-script.perl' } }, @@ -69,7 +68,7 @@ $signature = $idx->type('file')->query( { } )->first; ok( !$signature, 'SIGNATURE is not perl code' ); -$signature = $idx->type('file')->query( { +$signature = $model->doc('file')->query( { bool => { must => [ { term => { documentation => 'SIGNATURE' } }, @@ -80,7 +79,7 @@ $signature = $idx->type('file')->query( { } )->first; ok( !$signature, 'SIGNATURE is not documentation' ); -$signature = $idx->type('file')->query( { +$signature = $model->doc('file')->query( { bool => { must => [ { term => { name => 'SIGNATURE' } }, @@ -92,7 +91,7 @@ $signature = $idx->type('file')->query( { ok( !$signature, 'SIGNATURE is not pod' ); { - my $files = $idx->type('file'); + my $files = $model->doc('file'); my $module = $files->history( module => 'Moose' )->raw->all; my $file = $files->history( file => 'Moose', 'lib/Moose.pm' )->raw->all; diff --git a/t/release/multiple-modules.t b/t/release/multiple-modules.t index 9d795ea6b..642bcd38f 100644 --- a/t/release/multiple-modules.t +++ b/t/release/multiple-modules.t @@ -7,8 +7,7 @@ use MetaCPAN::Util qw(true false); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'LOCAL', name => 'Multiple-Modules-1.01' } ); @@ -35,7 +34,7 @@ is_deeply( ok( !$release->first, 'Release is not first' ); { - my @files = $idx->type('file')->query( { + my @files = $model->doc('file')->query( { bool => { must => [ { term => { author => $release->author } }, @@ -101,7 +100,7 @@ ok( !$release->first, 'Release is not first' ); } } -$release = $idx->type('release')->get( { +$release = $model->doc('release')->get( { author => 'LOCAL', name => 'Multiple-Modules-0.1' } ); @@ -109,7 +108,7 @@ ok $release, 'got older version of release'; ok $release->first, 'this version was first'; ok( - my $file = $idx->type('file')->query( { + my $file = $model->doc('file')->query( { bool => { must => [ { term => { release => 'Multiple-Modules-0.1' } }, diff --git a/t/release/perl-changes-file.t b/t/release/perl-changes-file.t index 5fc8f5530..6c7cf40a8 100644 --- a/t/release/perl-changes-file.t +++ b/t/release/perl-changes-file.t @@ -6,8 +6,7 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'RWSTAUNER', name => 'perl-1' } ); diff --git a/t/release/pm-PL.t b/t/release/pm-PL.t index 17c3e220e..9abd3cd4a 100644 --- a/t/release/pm-PL.t +++ b/t/release/pm-PL.t @@ -6,14 +6,13 @@ use MetaCPAN::Server::Test qw( app GET model test_psgi ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); # Module::Faker will generate a regular pm for the main module. -is( $idx->type('file')->find('uncommon::sense')->path, +is( $model->doc('file')->find('uncommon::sense')->path, 'lib/uncommon/sense.pm', 'find main module' ); # This should be the .pm.PL file we specified. -ok( my $pm = $idx->type('file')->find('less::sense'), +ok( my $pm = $model->doc('file')->find('less::sense'), 'find sense.pm.PL module' ); is( $pm->name, 'sense.pm.PL', 'name is correct' ); @@ -34,7 +33,7 @@ is( $pm->module->[0]->version, { # Verify all the files we expect to be contained in the release. my $files - = $idx->type('file') + = $model->doc('file') ->query( { term => { release => 'uncommon-sense-0.01' } } ) ->raw->size(20)->all->{hits}->{hits}; $files = [ map { $_->{_source} } @$files ]; diff --git a/t/release/pod-pm.t b/t/release/pod-pm.t index 3aab2e03d..b24a217cd 100644 --- a/t/release/pod-pm.t +++ b/t/release/pod-pm.t @@ -6,9 +6,9 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -ok( my $pod_pm = $idx->type('file')->find('Pod::Pm'), 'find Pod::Pm module' ); +ok( my $pod_pm = $model->doc('file')->find('Pod::Pm'), + 'find Pod::Pm module' ); is( $pod_pm->name, 'Pm.pm', 'defined in Pm.pm' ); diff --git a/t/release/prefer-meta-json.t b/t/release/prefer-meta-json.t index d6e13cddb..b44b223c9 100644 --- a/t/release/prefer-meta-json.t +++ b/t/release/prefer-meta-json.t @@ -7,8 +7,7 @@ use MetaCPAN::Util qw(true false); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'LOCAL', name => 'Prefer-Meta-JSON-1.1' } ); @@ -23,7 +22,7 @@ is( ref $release->metadata, 'HASH', 'comes with metadata in a hashref' ); is( $release->metadata->{'meta-spec'}{version}, 2, 'meta_spec version is 2' ); { - my @files = $idx->type('file')->query( { + my @files = $model->doc('file')->query( { bool => { must => [ { term => { author => $release->author } }, diff --git a/t/release/scripts.t b/t/release/scripts.t index a3acc8f26..2e7bf7e95 100644 --- a/t/release/scripts.t +++ b/t/release/scripts.t @@ -7,8 +7,7 @@ use MetaCPAN::Util qw(true false); use Test::More skip_all => 'Scripting is disabled'; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'MO', name => 'Scripts-0.01' } ); @@ -22,7 +21,7 @@ is( $release->version, '0.01', 'version ok' ); is( $release->main_module, 'Scripts', 'main_module ok' ); { - my @files = $idx->type('file')->query( { + my @files = $model->doc('file')->query( { bool => { must => [ { term => { mime => 'text/x-script.perl' } }, diff --git a/t/release/some-trial.t b/t/release/some-trial.t index 623c8ff07..7003913b5 100644 --- a/t/release/some-trial.t +++ b/t/release/some-trial.t @@ -6,8 +6,7 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); -my $release = $idx->type('release')->get( { +my $release = $model->doc('release')->get( { author => 'LOCAL', name => 'Some-1.00-TRIAL', main_module => 'Some', diff --git a/t/release/versions.t b/t/release/versions.t index 3ebf0ff5c..90f25ba9a 100644 --- a/t/release/versions.t +++ b/t/release/versions.t @@ -6,7 +6,6 @@ use MetaCPAN::Server::Test qw( model ); use Test::More; my $model = model(); -my $idx = $model->index('cpan'); my %modules = ( 'Versions::Our' => '1.45', @@ -17,7 +16,7 @@ my %modules = ( while ( my ( $module, $version ) = each %modules ) { - ok( my $file = $idx->type('file')->find($module), "find $module" ) + ok( my $file = $model->doc('file')->find($module), "find $module" ) or next; ( my $path = "lib/$module.pm" ) =~ s/::/\//; From 0e55e2e229d9b59eddf0c70fc324f149d4aa16bd Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:15:39 +0200 Subject: [PATCH 12/31] find es index/type via ESConfig rather than passing index around Many parts of the code treated the index as the parent of all data, so it was the thing being passed around. That will no be true in the future. Instead, ESConfig can give the path (index+type) of each named document type. Convert most places passing around index to use es_doc_path. --- .../Plugin/Session/Store/ElasticSearch.pm | 25 +--- lib/MetaCPAN/API/Model/Cover.pm | 6 +- lib/MetaCPAN/API/Model/User.pm | 4 +- lib/MetaCPAN/Document/File/Set.pm | 18 ++- lib/MetaCPAN/Model/Release.pm | 8 +- lib/MetaCPAN/Query/Author.pm | 28 ++-- lib/MetaCPAN/Query/CVE.pm | 17 ++- lib/MetaCPAN/Query/Contributor.pm | 13 +- lib/MetaCPAN/Query/Cover.pm | 8 +- lib/MetaCPAN/Query/Distribution.pm | 13 +- lib/MetaCPAN/Query/Favorite.pm | 35 ++--- lib/MetaCPAN/Query/File.pm | 11 +- lib/MetaCPAN/Query/Mirror.pm | 7 +- lib/MetaCPAN/Query/Package.pm | 7 +- lib/MetaCPAN/Query/Permission.pm | 15 +- lib/MetaCPAN/Query/Release.pm | 136 +++++------------- lib/MetaCPAN/Query/Search.pm | 6 +- lib/MetaCPAN/Script/Author.pm | 11 +- lib/MetaCPAN/Script/Backpan.pm | 17 +-- lib/MetaCPAN/Script/CPANTesters.pm | 13 +- lib/MetaCPAN/Script/CPANTestersAPI.pm | 11 +- lib/MetaCPAN/Script/CVE.pm | 11 +- lib/MetaCPAN/Script/Check.pm | 10 +- lib/MetaCPAN/Script/Checksum.pm | 9 +- lib/MetaCPAN/Script/Contributor.pm | 10 +- lib/MetaCPAN/Script/Cover.pm | 13 +- lib/MetaCPAN/Script/External.pm | 9 +- lib/MetaCPAN/Script/Favorite.pm | 16 +-- lib/MetaCPAN/Script/Latest.pm | 20 ++- lib/MetaCPAN/Script/Mapping.pm | 12 +- lib/MetaCPAN/Script/Package.pm | 9 +- lib/MetaCPAN/Script/Permission.pm | 9 +- lib/MetaCPAN/Script/Purge.pm | 45 +++--- lib/MetaCPAN/Script/Release.pm | 6 +- lib/MetaCPAN/Script/River.pm | 6 +- lib/MetaCPAN/Script/Role/Contributor.pm | 18 +-- lib/MetaCPAN/Script/Session.pm | 14 +- lib/MetaCPAN/Script/Suggest.pm | 7 +- lib/MetaCPAN/Script/Tickets.pm | 11 +- lib/MetaCPAN/Script/Watcher.pm | 14 +- lib/MetaCPAN/Server/Controller.pm | 16 +-- 41 files changed, 244 insertions(+), 430 deletions(-) diff --git a/lib/Catalyst/Plugin/Session/Store/ElasticSearch.pm b/lib/Catalyst/Plugin/Session/Store/ElasticSearch.pm index b02e4b718..a26ffda62 100644 --- a/lib/Catalyst/Plugin/Session/Store/ElasticSearch.pm +++ b/lib/Catalyst/Plugin/Session/Store/ElasticSearch.pm @@ -6,6 +6,7 @@ use Moose; extends 'Catalyst::Plugin::Session::Store'; use MooseX::Types::ElasticSearch qw( ES ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Server::Config (); use MetaCPAN::Util qw( true false ); @@ -17,26 +18,12 @@ has _session_es => ( default => sub { MetaCPAN::Server::Config::config()->{elasticsearch_servers} }, ); -has _session_es_index => ( - required => 1, - is => 'ro', - default => sub { shift->_session_plugin_config->{index} || 'user' } -); -has _session_es_type => ( - required => 1, - is => 'ro', - default => sub { shift->_session_plugin_config->{type} || 'session' } -); sub get_session_data { my ( $self, $key ) = @_; if ( my ($sid) = $key =~ /^\w+:(.*)/ ) { my $data = eval { - $self->_session_es->get( - index => $self->_session_es_index, - type => $self->_session_es_type, - id => $sid, - ); + $self->_session_es->get( es_doc_path('session'), id => $sid, ); } || return undef; if ( $key =~ /^expires:/ ) { return $data->{_source}->{_expires}; @@ -52,8 +39,7 @@ sub store_session_data { if ( my ($sid) = $key =~ /^session:(.*)/ ) { $session->{_expires} = $self->session_expires; $self->_session_es->index( - index => $self->_session_es_index, - type => $self->_session_es_type, + es_doc_path('session'), id => $sid, body => $session, refresh => true, @@ -66,8 +52,7 @@ sub delete_session_data { if ( my ($sid) = $key =~ /^session:(.*)/ ) { eval { $self->_session_es->delete( - index => $self->_session_es_index, - type => $self->_session_es_type, + es_doc_path('session'), id => $sid, refresh => true, ); @@ -93,8 +78,6 @@ sub delete_expired_sessions { } MyApp->config( 'Plugin::Session' => { servers => ':9200', - index => 'user', - type => 'session', } ); =head1 DESCRIPTION diff --git a/lib/MetaCPAN/API/Model/Cover.pm b/lib/MetaCPAN/API/Model/Cover.pm index 7a9dfa1e4..243ccae7d 100644 --- a/lib/MetaCPAN/API/Model/Cover.pm +++ b/lib/MetaCPAN/API/Model/Cover.pm @@ -1,5 +1,6 @@ package MetaCPAN::API::Model::Cover; +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Moose; use MetaCPAN::Util qw(hit_total); @@ -12,9 +13,8 @@ sub find_release_coverage { my $query = +{ term => { release => $release } }; my $res = $self->_run_query( - index => 'cover', - type => 'cover', - body => { + es_doc_path('cover'), + body => { query => $query, size => 999, } diff --git a/lib/MetaCPAN/API/Model/User.pm b/lib/MetaCPAN/API/Model/User.pm index 81a28c5a8..5414be5c0 100644 --- a/lib/MetaCPAN/API/Model/User.pm +++ b/lib/MetaCPAN/API/Model/User.pm @@ -1,5 +1,6 @@ package MetaCPAN::API::Model::User; +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Moose; with 'MetaCPAN::API::Model::Role::ES'; @@ -17,8 +18,7 @@ sub lookup { }; my $res = $self->_run_query( - index => 'user', - type => 'account', + es_doc_path('account'), body => { query => $query }, search_type => 'dfs_query_then_fetch', ); diff --git a/lib/MetaCPAN/Document/File/Set.pm b/lib/MetaCPAN/Document/File/Set.pm index 28b90349a..abf545445 100644 --- a/lib/MetaCPAN/Document/File/Set.pm +++ b/lib/MetaCPAN/Document/File/Set.pm @@ -3,6 +3,7 @@ package MetaCPAN::Document::File::Set; use Moose; use List::Util qw( max ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Query::Favorite (); use MetaCPAN::Query::File (); use MetaCPAN::Query::Release (); @@ -123,8 +124,7 @@ sub find { }; my $res = $self->es->search( - index => $self->index->name, - type => 'file', + es_doc_path('file'), search_type => 'dfs_query_then_fetch', body => { query => $query, @@ -307,9 +307,8 @@ sub autocomplete { my $data = $self->es->search( search_type => 'dfs_query_then_fetch', - index => $self->index->name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => $query, sort => [ '_score', 'documentation' ], _source => [qw( documentation release author distribution )], @@ -329,8 +328,8 @@ sub autocomplete_suggester { my $search_size = 100; my $suggestions = $self->es->suggest( { - index => $self->index->name, - body => { + es_doc_path('file'), + body => { documentation => { text => $query, completion => { @@ -349,9 +348,8 @@ sub autocomplete_suggester { } my $data = $self->es->search( { - index => $self->index->name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => { bool => { must => [ diff --git a/lib/MetaCPAN/Model/Release.pm b/lib/MetaCPAN/Model/Release.pm index 932c754c7..a29a24752 100644 --- a/lib/MetaCPAN/Model/Release.pm +++ b/lib/MetaCPAN/Model/Release.pm @@ -10,6 +10,7 @@ use DateTime (); use File::Find (); use File::Spec (); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Model::Archive (); use MetaCPAN::Types::TypeTiny qw( AbsPath ArrayRef Str ); use MetaCPAN::Util qw( fix_version true false ); @@ -225,11 +226,8 @@ sub _build_document { = $self->model->doc('release')->put( $document, { refresh => true } ); # create distribution if doesn't exist - my $dist_count = $self->es->count( - index => 'cpan', - type => 'distribution', - body => { query => { term => { name => $self->distribution } } }, - ); + my $dist_count = $self->es->count( es_doc_path('distribution'), + body => { query => { term => { name => $self->distribution } } }, ); if ( !$dist_count->{count} ) { $self->model->doc('distribution') ->put( { name => $self->distribution }, { create => 1 } ); diff --git a/lib/MetaCPAN/Query/Author.pm b/lib/MetaCPAN/Query/Author.pm index c6179c5b0..3da100566 100644 --- a/lib/MetaCPAN/Query/Author.pm +++ b/lib/MetaCPAN/Query/Author.pm @@ -2,8 +2,9 @@ package MetaCPAN::Query::Author; use MetaCPAN::Moose; -use MetaCPAN::Util qw(hit_total); -use Ref::Util qw( is_arrayref ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw(hit_total); +use Ref::Util qw( is_arrayref ); with 'MetaCPAN::Query::Role::Common'; @@ -17,11 +18,7 @@ sub by_ids { size => scalar @{$ids}, }; - my $authors = $self->es->search( - index => $self->index_name, - type => 'author', - body => $body, - ); + my $authors = $self->es->search( es_doc_path('author'), body => $body, ); my @authors = map $_->{_source}, @{ $authors->{hits}{hits} }; @@ -37,9 +34,8 @@ sub by_user { $users = [$users] unless is_arrayref($users); my $authors = $self->es->search( - index => $self->index_name, - type => 'author', - body => { + es_doc_path('author'), + body => { query => { terms => { user => $users } }, size => 500, } @@ -82,11 +78,7 @@ sub search { from => $from || 0, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'author', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('author'), body => $body, ); my @authors = map { +{ %{ $_->{_source} }, id => $_->{_id} } } @{ $ret->{hits}{hits} }; @@ -113,11 +105,7 @@ sub prefix_search { from => $from, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'author', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('author'), body => $body, ); my @authors = map { +{ %{ $_->{_source} }, id => $_->{_id} } } @{ $ret->{hits}{hits} }; diff --git a/lib/MetaCPAN/Query/CVE.pm b/lib/MetaCPAN/Query/CVE.pm index 39cf8dcc6..20a58d328 100644 --- a/lib/MetaCPAN/Query/CVE.pm +++ b/lib/MetaCPAN/Query/CVE.pm @@ -2,6 +2,8 @@ package MetaCPAN::Query::CVE; use MetaCPAN::Moose; +use MetaCPAN::ESConfig qw( es_doc_path ); + with 'MetaCPAN::Query::Role::Common'; sub find_cves_by_cpansa { @@ -10,9 +12,8 @@ sub find_cves_by_cpansa { my $query = +{ term => { cpansa_id => $cpansa_id } }; my $res = $self->es->search( - index => $self->index_name, - type => 'cve', - body => { + es_doc_path('cve'), + body => { query => $query, size => 999, } @@ -27,9 +28,8 @@ sub find_cves_by_release { my $query = +{ match => { releases => "$author/$release" } }; my $res = $self->es->search( - index => $self->index_name, - type => 'cve', - body => { + es_doc_path('cve'), + body => { query => $query, size => 999, } @@ -49,9 +49,8 @@ sub find_cves_by_dist { }; my $res = $self->es->search( - index => $self->index_name, - type => 'cve', - body => { + es_doc_path('cve'), + body => { query => $query, size => 999, } diff --git a/lib/MetaCPAN/Query/Contributor.pm b/lib/MetaCPAN/Query/Contributor.pm index 30b53774f..4aea27617 100644 --- a/lib/MetaCPAN/Query/Contributor.pm +++ b/lib/MetaCPAN/Query/Contributor.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::Contributor; use MetaCPAN::Moose; -use MetaCPAN::Util qw(hit_total); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw(hit_total); with 'MetaCPAN::Query::Role::Common'; @@ -19,9 +20,8 @@ sub find_release_contributors { }; my $res = $self->es->search( - index => $self->index_name, - type => 'contributor', - body => { + es_doc_path('contributor'), + body => { query => $query, size => 999, } @@ -38,9 +38,8 @@ sub find_author_contributions { my $query = +{ term => { pauseid => $pauseid } }; my $res = $self->es->search( - index => $self->index_name, - type => 'contributor', - body => { + es_doc_path('contributor'), + body => { query => $query, size => 999, } diff --git a/lib/MetaCPAN/Query/Cover.pm b/lib/MetaCPAN/Query/Cover.pm index 63738c206..d524ea5c9 100644 --- a/lib/MetaCPAN/Query/Cover.pm +++ b/lib/MetaCPAN/Query/Cover.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::Cover; use MetaCPAN::Moose; -use MetaCPAN::Util qw(hit_total); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw(hit_total); with 'MetaCPAN::Query::Role::Common'; @@ -12,9 +13,8 @@ sub find_release_coverage { my $query = +{ term => { release => $release } }; my $res = $self->es->search( - index => $self->index_name, - type => 'cover', - body => { + es_doc_path('cover'), + body => { query => $query, size => 999, } diff --git a/lib/MetaCPAN/Query/Distribution.pm b/lib/MetaCPAN/Query/Distribution.pm index 63fd0559c..7ffd8a506 100644 --- a/lib/MetaCPAN/Query/Distribution.pm +++ b/lib/MetaCPAN/Query/Distribution.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::Distribution; use MetaCPAN::Moose; -use MetaCPAN::Util qw(hit_total); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw(hit_total); with 'MetaCPAN::Query::Role::Common'; @@ -16,9 +17,8 @@ sub get_river_data_by_dist { }; my $res = $self->es->search( - index => $self->index_name, - type => 'distribution', - body => { + es_doc_path('distribution'), + body => { query => $query, size => 999, } @@ -38,9 +38,8 @@ sub get_river_data_by_dists { }; my $res = $self->es->search( - index => $self->index_name, - type => 'distribution', - body => { + es_doc_path('distribution'), + body => { query => $query, size => 999, } diff --git a/lib/MetaCPAN/Query/Favorite.pm b/lib/MetaCPAN/Query/Favorite.pm index 0d1c2447c..721942e05 100644 --- a/lib/MetaCPAN/Query/Favorite.pm +++ b/lib/MetaCPAN/Query/Favorite.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::Favorite; use MetaCPAN::Moose; -use MetaCPAN::Util qw(hit_total); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw(hit_total); with 'MetaCPAN::Query::Role::Common'; @@ -42,11 +43,7 @@ sub agg_by_distributions { } }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'favorite', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('favorite'), body => $body, ); my %favorites = map { $_->{key} => $_->{doc_count} } @{ $ret->{aggregations}{favorites}{buckets} }; @@ -69,9 +66,8 @@ sub by_user { $size ||= 250; my $favs = $self->es->search( - index => $self->index_name, - type => 'favorite', - body => { + es_doc_path('favorite'), + body => { query => { term => { user => $user } }, _source => [qw( author date distribution )], sort => ['distribution'], @@ -86,9 +82,8 @@ sub by_user { # filter out backpan only distributions my $no_backpan = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ @@ -139,11 +134,7 @@ sub leaderboard { }, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'favorite', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('favorite'), body => $body, ); return { leaderboard => $ret->{aggregations}{leaderboard}{buckets}, @@ -158,9 +149,8 @@ sub recent { $size //= 100; my $favs = $self->es->search( - index => $self->index_name, - type => 'favorite', - body => { + es_doc_path('favorite'), + body => { size => $size, from => ( $page - 1 ) * $size, query => { match_all => {} }, @@ -181,9 +171,8 @@ sub users_by_distribution { my ( $self, $distribution ) = @_; my $favs = $self->es->search( - index => $self->index_name, - type => 'favorite', - body => { + es_doc_path('favorite'), + body => { query => { term => { distribution => $distribution } }, _source => ['user'], size => 1000, diff --git a/lib/MetaCPAN/Query/File.pm b/lib/MetaCPAN/Query/File.pm index ace8334a9..da760f45f 100644 --- a/lib/MetaCPAN/Query/File.pm +++ b/lib/MetaCPAN/Query/File.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::File; use MetaCPAN::Moose; -use MetaCPAN::Util qw( hit_total true false ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw( hit_total true false ); with 'MetaCPAN::Query::Role::Common'; @@ -31,9 +32,7 @@ sub dir { }; my $data = $self->es->search( { - index => $self->index_name, - type => 'file', - body => $body, + es_doc_path('file'), body => $body, } ); my $dir = [ map { $_->{_source} } @{ $data->{hits}{hits} } ]; @@ -295,9 +294,7 @@ sub interesting_files { }; my $data = $self->es->search( { - index => $self->index_name, - type => 'file', - body => $body, + es_doc_path('file'), body => $body, } ); $return->{took} = $data->{took}; diff --git a/lib/MetaCPAN/Query/Mirror.pm b/lib/MetaCPAN/Query/Mirror.pm index b6c281c52..c16ee36e0 100644 --- a/lib/MetaCPAN/Query/Mirror.pm +++ b/lib/MetaCPAN/Query/Mirror.pm @@ -3,6 +3,8 @@ package MetaCPAN::Query::Mirror; use MetaCPAN::Moose; use MetaCPAN::Util qw( hit_total ); +use MetaCPAN::ESConfig qw( es_doc_path ); + with 'MetaCPAN::Query::Role::Common'; sub search { @@ -39,9 +41,8 @@ sub search { } my $ret = $self->es->search( - index => $self->index_name, - type => 'mirror', - body => { + es_doc_path('mirror'), + body => { size => 999, query => $query, @sort, diff --git a/lib/MetaCPAN/Query/Package.pm b/lib/MetaCPAN/Query/Package.pm index cf3addd5c..00027f683 100644 --- a/lib/MetaCPAN/Query/Package.pm +++ b/lib/MetaCPAN/Query/Package.pm @@ -2,6 +2,8 @@ package MetaCPAN::Query::Package; use MetaCPAN::Moose; +use MetaCPAN::ESConfig qw( es_doc_path ); + with 'MetaCPAN::Query::Role::Common'; sub get_modules { @@ -17,9 +19,8 @@ sub get_modules { }; my $res = $self->es->search( - index => $self->index_name, - type => 'package', - body => { + es_doc_path('package'), + body => { query => $query, size => 999, _source => [qw< module_name >], diff --git a/lib/MetaCPAN/Query/Permission.pm b/lib/MetaCPAN/Query/Permission.pm index 5fe315446..311a68aab 100644 --- a/lib/MetaCPAN/Query/Permission.pm +++ b/lib/MetaCPAN/Query/Permission.pm @@ -2,7 +2,8 @@ package MetaCPAN::Query::Permission; use MetaCPAN::Moose; -use Ref::Util qw( is_arrayref ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use Ref::Util qw( is_arrayref ); with 'MetaCPAN::Query::Role::Common'; @@ -21,11 +22,7 @@ sub by_author { size => 5_000, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'permission', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('permission'), body => $body, ); my $data = [ sort { $a->{module_name} cmp $b->{module_name} } @@ -51,11 +48,7 @@ sub by_modules { size => 1_000, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'permission', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('permission'), body => $body, ); my $data = [ sort { $a->{module_name} cmp $b->{module_name} } diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 3e688fe59..08a6eb6eb 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -2,6 +2,7 @@ package MetaCPAN::Query::Release; use MetaCPAN::Moose; +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Util qw( hit_total single_valued_arrayref_to_scalar true false ); @@ -39,9 +40,8 @@ sub author_status { sub aggregate_status_by_author { my ( $self, $pauseid ) = @_; my $agg = $self->es->search( { - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { term => { author => $pauseid } }, @@ -73,9 +73,8 @@ sub get_contributors { }; my $res = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => $query, size => 999, _source => [qw< metadata.author metadata.x_contributors >], @@ -100,17 +99,10 @@ sub get_contributors { # this check is against a failure in tests (because fake author) return - unless $self->es->exists( - index => $self->index_name, - type => 'author', - id => $author_name, + unless $self->es->exists( es_doc_path('author'), id => $author_name, ); - my $author = $self->es->get( - index => $self->index_name, - type => 'author', - id => $author_name, - ); + my $author = $self->es->get( es_doc_path('author'), id => $author_name, ); my $author_email = $author->{_source}{email}; my $author_gravatar_url = $author->{_source}{gravatar_url}; @@ -177,9 +169,8 @@ sub get_contributors { if ( !$contrib->{pauseid} ) { for my $email ( @{ $contrib->{email} } ) { my $check_author = $self->es->search( - index => $self->index_name, - type => 'author', - body => { + es_doc_path('author'), + body => { query => { term => { email => $email } }, size => 10, } @@ -201,9 +192,8 @@ sub get_contributors { }; my $contrib_authors = $self->es->search( - index => $self->index_name, - type => 'author', - body => { + es_doc_path('author'), + body => { query => $contrib_query, size => 999, _source => [qw< pauseid gravatar_url >], @@ -236,9 +226,8 @@ sub get_files { }; my $ret = $self->es->search( - index => $self->index_name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => $query, size => 999, _source => [qw< name path >], @@ -256,9 +245,8 @@ sub get_checksums { my $query = { term => { name => $release } }; my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => $query, size => 1, _source => [qw< checksum_md5 checksum_sha256 >], @@ -322,11 +310,7 @@ sub activity { size => 0, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = { map { $_->{key} => $_->{doc_count} } @{ $ret->{aggregations}{histo}{entries}{buckets} } }; @@ -355,11 +339,7 @@ sub by_author_and_name { } }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = $ret->{hits}{hits}[0]{_source}; @@ -399,11 +379,7 @@ sub by_author_and_names { } }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my @releases; for my $hit ( @{ $ret->{hits}{hits} } ) { @@ -441,11 +417,7 @@ sub by_author { from => ( $page - 1 ) * $size, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = [ map { $_->{_source} } @{ $ret->{hits}{hits} } ]; @@ -476,11 +448,7 @@ sub latest_by_distribution { size => 1 }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = $ret->{hits}{hits}[0]{_source}; @@ -511,11 +479,7 @@ sub latest_by_author { size => 1000, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = [ map { $_->{_source} } @{ $ret->{hits}{hits} } ]; @@ -536,11 +500,7 @@ sub all_by_author { size => $size, from => ( $page - 1 ) * $size, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = [ map { $_->{_source} } @{ $ret->{hits}{hits} } ]; @@ -608,11 +568,7 @@ sub versions { ], }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = [ map { $_->{_source} } @{ $ret->{hits}{hits} } ]; @@ -653,11 +609,7 @@ sub top_uploaders { size => 0, }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $counts = { map { $_->{key} => $_->{doc_count} } @{ $ret->{aggregations}{author}{entries}{buckets} } }; @@ -692,9 +644,8 @@ sub _get_latest_release { my ( $self, $distribution ) = @_; my $release = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ @@ -722,9 +673,8 @@ sub _get_provided_modules { my ( $self, $release ) = @_; my $provided_modules = $self->es->search( - index => $self->index_name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => { bool => { must => [ @@ -793,9 +743,8 @@ sub _get_depended_releases { }; my $depended = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ @@ -871,11 +820,7 @@ sub recent { sort => [ { 'date' => { order => 'desc' } } ] }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'release', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('release'), body => $body, ); my $data = [ map { $_->{_source} } @{ $ret->{hits}{hits} } ]; @@ -960,11 +905,7 @@ sub modules { ) ], }; - my $ret = $self->es->search( - index => $self->index_name, - type => 'file', - body => $body, - ); + my $ret = $self->es->search( es_doc_path('file'), body => $body, ); my @files = map $_->{_source}, @{ $ret->{hits}{hits} }; @@ -1148,8 +1089,7 @@ sub find_download_url { }; my $res = $self->es->search( - index => $self->index_name, - type => $module_filter ? 'file' : 'release', + es_doc_path( $module_filter ? 'file' : 'release' ), body => $body, search_type => 'dfs_query_then_fetch', ); @@ -1253,9 +1193,8 @@ sub predecessor { my ( $self, $name ) = @_; my $res = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ { term => { distribution => $name } }, ], @@ -1275,9 +1214,8 @@ sub find { my ( $self, $name ) = @_; my $res = $self->es->search( - index => $self->index_name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ diff --git a/lib/MetaCPAN/Query/Search.pm b/lib/MetaCPAN/Query/Search.pm index 55d7e8364..4e75da972 100644 --- a/lib/MetaCPAN/Query/Search.pm +++ b/lib/MetaCPAN/Query/Search.pm @@ -6,6 +6,7 @@ use Const::Fast qw( const ); use Hash::Merge qw( merge ); use List::Util qw( min uniq ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Object Str ); use MetaCPAN::Util qw( hit_total true false ); use MooseX::StrictConstructor; @@ -355,10 +356,9 @@ sub build_query { } sub run_query { - my ( $self, $type, $es_query ) = @_; + my ( $self, $doc, $es_query ) = @_; return $self->es->search( - index => $self->index_name, - type => $type, + es_doc_path($doc), body => $es_query, search_type => 'dfs_query_then_fetch', ); diff --git a/lib/MetaCPAN/Script/Author.pm b/lib/MetaCPAN/Script/Author.pm index 0a1cd0279..5332a7be9 100644 --- a/lib/MetaCPAN/Script/Author.pm +++ b/lib/MetaCPAN/Script/Author.pm @@ -12,6 +12,7 @@ use Email::Valid (); use Encode (); use Log::Contextual qw( :log :dlog ); use MetaCPAN::Document::Author (); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Str ); use MetaCPAN::Util qw(diff_struct true false); use URI (); @@ -128,17 +129,15 @@ sub index_authors { my @author_ids_to_purge; my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'author', + es_doc_path('author'), max_count => 250, timeout => '25m', ); my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'author', - size => 500, - body => { + es_doc_path('author'), + size => 500, + body => { query => { $self->pauseid ? ( diff --git a/lib/MetaCPAN/Script/Backpan.pm b/lib/MetaCPAN/Script/Backpan.pm index 44b30b45a..935ed4a36 100644 --- a/lib/MetaCPAN/Script/Backpan.pm +++ b/lib/MetaCPAN/Script/Backpan.pm @@ -6,6 +6,7 @@ use warnings; use Moose; use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool HashRef Str ); with 'MetaCPAN::Role::Script', 'MooseX::Getopt::Dashes'; @@ -63,9 +64,8 @@ sub build_release_status_map { my $scroll = $self->es->scroll_helper( scroll => '5m', - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { %{ $self->_get_release_query }, size => 500, _source => [ 'author', 'archive', 'name' ], @@ -126,8 +126,7 @@ sub update_releases { log_info {"update_releases"}; $self->_bulk->{release} ||= $self->es->bulk_helper( - index => $self->index->name, - type => 'release', + es_doc_path('release'), max_count => 250, timeout => '5m', ); @@ -166,9 +165,8 @@ sub update_files_author { my $scroll = $self->es->scroll_helper( scroll => '5m', - index => $self->index->name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => { bool => { must => [ @@ -183,8 +181,7 @@ sub update_files_author { ); $self->_bulk->{file} ||= $self->es->bulk_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), max_count => 250, timeout => '5m', ); diff --git a/lib/MetaCPAN/Script/CPANTesters.pm b/lib/MetaCPAN/Script/CPANTesters.pm index 9a33d8f5b..ebf630ae8 100644 --- a/lib/MetaCPAN/Script/CPANTesters.pm +++ b/lib/MetaCPAN/Script/CPANTesters.pm @@ -7,6 +7,7 @@ use ElasticSearchX::Model::Document::Types qw( ESBulk ); use File::stat qw( stat ); use IO::Uncompress::Bunzip2 qw( bunzip2 ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Path Uri ); use MetaCPAN::Util qw( true false ); @@ -47,10 +48,7 @@ has _bulk => ( isa => ESBulk, lazy => 1, default => sub { - $_[0]->es->bulk_helper( - index => $_[0]->index->name, - type => 'release' - ); + $_[0]->es->bulk_helper( es_doc_path('release') ); }, ); @@ -86,10 +84,9 @@ sub index_reports { bunzip2 "$db.bz2" => "$db", AutoClose => 1 if -e "$db.bz2"; my $scroll = $es->scroll_helper( - index => $self->index->name, - size => '500', - type => 'release', - body => { + es_doc_path('release'), + size => '500', + body => { sort => '_doc', }, ); diff --git a/lib/MetaCPAN/Script/CPANTestersAPI.pm b/lib/MetaCPAN/Script/CPANTestersAPI.pm index 957c51751..0c9e6ca50 100644 --- a/lib/MetaCPAN/Script/CPANTestersAPI.pm +++ b/lib/MetaCPAN/Script/CPANTestersAPI.pm @@ -6,6 +6,7 @@ use warnings; use Cpanel::JSON::XS qw( decode_json ); use ElasticSearchX::Model::Document::Types qw( ESBulk ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Uri ); use MetaCPAN::Util qw( true false ); use Moose; @@ -34,8 +35,7 @@ has _bulk => ( lazy => 1, default => sub { $_[0]->es->bulk_helper( - index => $_[0]->index->name, - type => 'release', + es_doc_path('release'), max_count => 250, timeout => '30m', ); @@ -63,10 +63,9 @@ sub index_reports { my $data = decode_json $json; my $scroll = $es->scroll_helper( - index => $self->index->name, - size => '500', - type => 'release', - body => { + es_doc_path('release'), + size => '500', + body => { sort => '_doc', }, ); diff --git a/lib/MetaCPAN/Script/CVE.pm b/lib/MetaCPAN/Script/CVE.pm index 3980f6461..2c22c58f0 100644 --- a/lib/MetaCPAN/Script/CVE.pm +++ b/lib/MetaCPAN/Script/CVE.pm @@ -5,6 +5,7 @@ use namespace::autoclean; use Cpanel::JSON::XS qw( decode_json ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Str Uri ); use MetaCPAN::Util qw( hit_total numify_version true false ); use Path::Tiny qw( path ); @@ -66,10 +67,7 @@ sub run { sub index_cve_data { my ( $self, $data ) = @_; - my $bulk = $self->es->bulk_helper( - index => 'cve', - type => 'cve', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('cve') ); log_info {'Updating the cve index'}; @@ -161,9 +159,8 @@ sub index_cve_data { my $query = {}; my $releases = $self->es->search( - index => 'cpan', - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ diff --git a/lib/MetaCPAN/Script/Check.pm b/lib/MetaCPAN/Script/Check.pm index 712896ac5..81b115ca0 100644 --- a/lib/MetaCPAN/Script/Check.pm +++ b/lib/MetaCPAN/Script/Check.pm @@ -6,6 +6,7 @@ use warnings; use File::Spec::Functions qw( catfile ); use Log::Contextual qw( :log ); use Moose; +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Int Str ); use MetaCPAN::Util qw( true false ); @@ -85,8 +86,7 @@ sub check_modules { # look up this module in ElasticSearch and see what we have on it my $results = $es->search( - index => $self->index->name, - type => 'file', + es_doc_path('file'), query => { bool => { must => [ @@ -114,8 +114,7 @@ sub check_modules { # now find the first latest releases for these files foreach my $file (@files) { my $release_results = $es->search( - index => $self->index->name, - type => 'release', + es_doc_path('release'), query => { bool => { must => [ @@ -145,8 +144,7 @@ sub check_modules { if ( !@releases ) { foreach my $file (@files) { my $release_results = $es->search( - index => $self->index->name, - type => 'release', + es_doc_path('release'), query => { bool => { must => [ diff --git a/lib/MetaCPAN/Script/Checksum.pm b/lib/MetaCPAN/Script/Checksum.pm index 52a22ccf9..57d1b68af 100644 --- a/lib/MetaCPAN/Script/Checksum.pm +++ b/lib/MetaCPAN/Script/Checksum.pm @@ -3,6 +3,7 @@ package MetaCPAN::Script::Checksum; use Moose; use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Int ); use MetaCPAN::Util qw( true false ); @@ -33,10 +34,7 @@ sub run { my $bulk; if ( !$self->dry_run ) { - $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'release', - ); + $bulk = $self->es->bulk_helper( es_doc_path('release') ); } else { log_warn {"--- DRY-RUN ---"}; @@ -45,8 +43,7 @@ sub run { log_info {"Searching for releases missing checksums"}; my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'release', + es_doc_path('release'), scroll => '10m', body => { query => { diff --git a/lib/MetaCPAN/Script/Contributor.pm b/lib/MetaCPAN/Script/Contributor.pm index 2239e3fb3..3f7b8f047 100644 --- a/lib/MetaCPAN/Script/Contributor.pm +++ b/lib/MetaCPAN/Script/Contributor.pm @@ -5,8 +5,9 @@ use warnings; use Moose; -use Log::Contextual qw( :log ); -use Ref::Util qw( is_arrayref ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use Log::Contextual qw( :log ); +use Ref::Util qw( is_arrayref ); use MetaCPAN::Types::TypeTiny qw( Bool HashRef Int Str ); @@ -83,9 +84,8 @@ sub run { my $scroll = $self->es->scroll_helper( scroll => $timeout, - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => $query, size => 500, _source => [qw( author distribution name )], diff --git a/lib/MetaCPAN/Script/Cover.pm b/lib/MetaCPAN/Script/Cover.pm index 46dd3dab5..e14f51d0c 100644 --- a/lib/MetaCPAN/Script/Cover.pm +++ b/lib/MetaCPAN/Script/Cover.pm @@ -5,6 +5,7 @@ use namespace::autoclean; use Cpanel::JSON::XS qw( decode_json ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Str Uri ); use Path::Tiny qw( path ); use MetaCPAN::Util qw( hit_total true false ); @@ -53,10 +54,7 @@ sub run { sub index_cover_data { my ( $self, $data ) = @_; - my $bulk = $self->es->bulk_helper( - index => 'cover', - type => 'cover', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('cover') ); log_info {'Updating the cover index'}; @@ -64,10 +62,9 @@ sub index_cover_data { for my $version ( keys %{ $data->{$dist} } ) { my $release = $dist . '-' . $version; my $rel_check = $self->es->search( - index => 'cpan', - type => 'release', - size => 0, - body => { + es_doc_path('release'), + size => 0, + body => { query => { term => { name => $release } }, }, ); diff --git a/lib/MetaCPAN/Script/External.pm b/lib/MetaCPAN/Script/External.pm index c74de55c1..76d50ee59 100644 --- a/lib/MetaCPAN/Script/External.pm +++ b/lib/MetaCPAN/Script/External.pm @@ -6,6 +6,7 @@ use namespace::autoclean; use Email::Sender::Simple (); use Email::Simple (); use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Str ); use MetaCPAN::Util qw( true false ); @@ -64,8 +65,7 @@ sub update { my $external_source = $self->external_source; my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'distribution', + es_doc_path('distribution'), scroll => '10m', body => { query => { @@ -90,10 +90,7 @@ sub update { } } - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'distribution', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('distribution'), ); for my $d ( keys %{$dist} ) { log_debug {"[$external_source] adding $d"}; diff --git a/lib/MetaCPAN/Script/Favorite.pm b/lib/MetaCPAN/Script/Favorite.pm index 59b884756..958203e85 100644 --- a/lib/MetaCPAN/Script/Favorite.pm +++ b/lib/MetaCPAN/Script/Favorite.pm @@ -4,6 +4,7 @@ use Moose; use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Int Str ); use MetaCPAN::Util qw( true false ); @@ -92,8 +93,7 @@ sub index_favorites { } elsif ( $self->age ) { my $favs = $self->es->scroll_helper( - index => $self->index->name, - type => 'favorite', + es_doc_path('favorite'), scroll => '5m', body => { query => $age_filter, @@ -125,8 +125,7 @@ sub index_favorites { } else { my $favs = $self->es->scroll_helper( - index => $self->index->name, - type => 'favorite', + es_doc_path('favorite'), scroll => '30s', body => { query => $query, @@ -154,8 +153,7 @@ sub index_favorites { } my $files = $self->es->scroll_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), scroll => '15m', body => { query => { @@ -223,15 +221,13 @@ sub index_favorites { } else { my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), max_count => 250, timeout => '120m', ); my $files = $self->es->scroll_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), scroll => '15s', body => { query => { term => { distribution => $dist } }, diff --git a/lib/MetaCPAN/Script/Latest.pm b/lib/MetaCPAN/Script/Latest.pm index 73dff3edd..a707dfc5c 100644 --- a/lib/MetaCPAN/Script/Latest.pm +++ b/lib/MetaCPAN/Script/Latest.pm @@ -7,6 +7,7 @@ use Log::Contextual qw( :log ); use Moose; use CPAN::DistnameInfo (); use DateTime::Format::ISO8601 (); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Str ); use MetaCPAN::Util qw( true false ); use Parse::CPAN::Packages::Fast (); @@ -142,10 +143,9 @@ sub run { if @module_filters > 1; my $scroll = $self->es->scroll_helper( { - index => $self->index->name, - type => 'file', - size => 100, - body => { + es_doc_path('file'), + size => 100, + body => { query => $query, _source => [ qw(author date distribution download_url module.name release status) @@ -216,10 +216,7 @@ sub run { } } - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'file' - ); + my $bulk = $self->es->bulk_helper( es_doc_path('file') ); my %to_purge; @@ -277,10 +274,9 @@ sub reindex { # Get all the files for the release. my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'file', - size => 100, - body => { + es_doc_path('file'), + size => 100, + body => { query => { bool => { must => [ diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index 17997d4f3..421ab8532 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -428,18 +428,14 @@ sub empty_type { my $type = $self->delete_from_type; log_info {"Emptying type: $type"}; - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => $type, - max_count => 500, - ); + my $bulk + = $self->es->bulk_helper( es_doc_path($type), max_count => 500, ); my $scroll = $self->es->scroll_helper( size => 250, scroll => '10m', - index => $self->index->name, - type => $type, - body => { + es_doc_path($type), + body => { query => { match_all => {} }, sort => '_doc', }, diff --git a/lib/MetaCPAN/Script/Package.pm b/lib/MetaCPAN/Script/Package.pm index 05d293482..9e8a1982b 100644 --- a/lib/MetaCPAN/Script/Package.pm +++ b/lib/MetaCPAN/Script/Package.pm @@ -5,6 +5,7 @@ use Moose; use CPAN::DistnameInfo (); use IO::Uncompress::Gunzip (); use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool ); use MetaCPAN::Util qw( true false ); @@ -52,10 +53,7 @@ sub index_packages { } log_debug {$meta}; - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'package', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('package') ); my %seen; log_debug {"adding data"}; @@ -98,8 +96,7 @@ sub run_cleanup { log_debug {"checking package data to remove"}; my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'package', + es_doc_path('package'), scroll => '30m', body => { query => { match_all => {} } }, ); diff --git a/lib/MetaCPAN/Script/Permission.pm b/lib/MetaCPAN/Script/Permission.pm index 1167beaf7..bce91f271 100644 --- a/lib/MetaCPAN/Script/Permission.pm +++ b/lib/MetaCPAN/Script/Permission.pm @@ -3,6 +3,7 @@ package MetaCPAN::Script::Permission; use Moose; use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool ); use MetaCPAN::Util qw( true false ); use PAUSE::Permissions (); @@ -34,10 +35,7 @@ sub index_permissions { my $file_path = $self->cpan->child(qw(modules 06perms.txt))->absolute; my $pp = PAUSE::Permissions->new( path => $file_path ); - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'permission', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('permission') ); my %seen; log_debug {"building permission data to add"}; @@ -83,8 +81,7 @@ sub run_cleanup { log_debug {"checking permission data to remove"}; my $scroll = $self->es->scroll_helper( - index => $self->index->name, - type => 'permission', + es_doc_path('permission'), scroll => '30m', body => { query => { match_all => {} } }, ); diff --git a/lib/MetaCPAN/Script/Purge.pm b/lib/MetaCPAN/Script/Purge.pm index 57a3503bf..c12ea3f8a 100644 --- a/lib/MetaCPAN/Script/Purge.pm +++ b/lib/MetaCPAN/Script/Purge.pm @@ -3,6 +3,7 @@ package MetaCPAN::Script::Purge; use Moose; use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool HashRef Str ); use MetaCPAN::Util qw( author_dir true false ); @@ -34,21 +35,17 @@ has bulk => ( ); sub _build_bulk { - my $self = shift; - my $index = $self->index->name; + my $self = shift; return +{ - author => $self->es->bulk_helper( index => $index, type => 'author' ), - contributor => $self->es->bulk_helper( - index => 'contributor', - type => 'contributor' - ), - favorite => - $self->es->bulk_helper( index => $index, type => 'favorite' ), - file => $self->es->bulk_helper( index => $index, type => 'file' ), - permission => - $self->es->bulk_helper( index => $index, type => 'permission' ), - release => - $self->es->bulk_helper( index => $index, type => 'release' ), + map { ; $_ => $self->es->bulk_helper( es_doc_path($_) ) } + qw( + author + contributor + favorite + file + permission + release + ) }; } @@ -56,9 +53,8 @@ sub _get_scroller_release { my ( $self, $query ) = @_; return $self->es->scroll_helper( scroll => '10m', - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => $query, size => 500, _source => [qw( archive name )], @@ -70,9 +66,8 @@ sub _get_scroller_file { my ( $self, $query ) = @_; return $self->es->scroll_helper( scroll => '10m', - index => $self->index->name, - type => 'file', - body => { + es_doc_path('file'), + body => { query => $query, size => 500, _source => [qw( name )], @@ -84,9 +79,8 @@ sub _get_scroller_favorite { my ( $self, $query ) = @_; return $self->es->scroll_helper( scroll => '10m', - index => $self->index->name, - type => 'favorite', - body => { + es_doc_path('favorite'), + body => { query => $query, size => 500, _source => false, @@ -98,9 +92,8 @@ sub _get_scroller_contributor { my ( $self, $query ) = @_; return $self->es->scroll_helper( scroll => '10m', - index => 'contributor', - type => 'contributor', - body => { + es_doc_path('contributor'), + body => { query => $query, size => 500, _source => [qw( release_name )], diff --git a/lib/MetaCPAN/Script/Release.pm b/lib/MetaCPAN/Script/Release.pm index f2969c56b..40c375254 100644 --- a/lib/MetaCPAN/Script/Release.pm +++ b/lib/MetaCPAN/Script/Release.pm @@ -8,6 +8,7 @@ use File::Find::Rule (); use File::stat (); use List::Util qw( uniq ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Model::Release (); use MetaCPAN::Script::Runner (); use MetaCPAN::Types::TypeTiny qw( Bool HashRef Int Str ); @@ -197,9 +198,8 @@ sub run { if ( $self->skip ) { my $count = $self->es->count( - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must => [ diff --git a/lib/MetaCPAN/Script/River.pm b/lib/MetaCPAN/Script/River.pm index d12c8bd88..d1c0554c3 100644 --- a/lib/MetaCPAN/Script/River.pm +++ b/lib/MetaCPAN/Script/River.pm @@ -5,6 +5,7 @@ use namespace::autoclean; use Cpanel::JSON::XS qw( decode_json ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Uri ); use MetaCPAN::Util qw( true false ); @@ -29,10 +30,7 @@ sub run { sub index_river_summaries { my ( $self, $summaries ) = @_; - my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'distribution', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('distribution') ); for my $summary ( @{$summaries} ) { my $dist = delete $summary->{dist}; diff --git a/lib/MetaCPAN/Script/Role/Contributor.pm b/lib/MetaCPAN/Script/Role/Contributor.pm index 95529a89e..5f96a6919 100644 --- a/lib/MetaCPAN/Script/Role/Contributor.pm +++ b/lib/MetaCPAN/Script/Role/Contributor.pm @@ -2,8 +2,9 @@ package MetaCPAN::Script::Role::Contributor; use Moose::Role; -use MetaCPAN::Util qw( digest true false ); -use Ref::Util qw( is_arrayref ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw( digest true false ); +use Ref::Util qw( is_arrayref ); sub get_cpan_author_contributors { my ( $self, $author, $release, $distribution ) = @_; @@ -22,11 +23,7 @@ sub get_cpan_author_contributors { # skip existing records my $id = digest( $d->{pauseid}, $release ); - my $exists = $es->exists( - index => 'contributor', - type => 'contributor', - id => $id, - ); + my $exists = $es->exists( es_doc_path('contributor'), id => $id, ); next if $exists; $d->{release_author} = $author; @@ -42,11 +39,8 @@ sub update_release_contirbutors { my ( $self, $data, $timeout ) = @_; return unless $data and is_arrayref($data); - my $bulk = $self->es->bulk_helper( - index => 'contributor', - type => 'contributor', - timeout => $timeout || '5m', - ); + my $bulk = $self->es->bulk_helper( es_doc_path('contributor'), + timeout => $timeout || '5m', ); for my $d ( @{$data} ) { my $id = digest( $d->{pauseid}, $d->{release_name} ); diff --git a/lib/MetaCPAN/Script/Session.pm b/lib/MetaCPAN/Script/Session.pm index d0feecd76..8f7f14638 100644 --- a/lib/MetaCPAN/Script/Session.pm +++ b/lib/MetaCPAN/Script/Session.pm @@ -3,9 +3,11 @@ package MetaCPAN::Script::Session; use strict; use warnings; -use DateTime (); use Moose; +use DateTime (); +use MetaCPAN::ESConfig qw( es_doc_path ); + with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; sub run { @@ -14,15 +16,11 @@ sub run { my $scroll = $self->es->scroll_helper( size => 10_000, scroll => '1m', - index => 'user', - type => 'session', + es_doc_path('session'), ); - my $bulk = $self->es->bulk_helper( - index => 'user', - type => 'session', - max_count => 10_000 - ); + my $bulk = $self->es->bulk_helper( es_doc_path('session'), + max_count => 10_000 ); my $cutoff = DateTime->now->subtract( months => 1 )->epoch; diff --git a/lib/MetaCPAN/Script/Suggest.pm b/lib/MetaCPAN/Script/Suggest.pm index 2edb77eef..75e47bda3 100644 --- a/lib/MetaCPAN/Script/Suggest.pm +++ b/lib/MetaCPAN/Script/Suggest.pm @@ -7,6 +7,7 @@ use Moose; use DateTime (); use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool Int ); with 'MetaCPAN::Role::Script', 'MooseX::Getopt'; @@ -63,8 +64,7 @@ sub _update_slice { my ( $self, $range ) = @_; my $files = $self->es->scroll_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), scroll => '5m', body => { query => { @@ -81,8 +81,7 @@ sub _update_slice { ); my $bulk = $self->es->bulk_helper( - index => $self->index->name, - type => 'file', + es_doc_path('file'), max_count => 250, timeout => '5m', ); diff --git a/lib/MetaCPAN/Script/Tickets.pm b/lib/MetaCPAN/Script/Tickets.pm index 7dc231a2b..60a5a357b 100644 --- a/lib/MetaCPAN/Script/Tickets.pm +++ b/lib/MetaCPAN/Script/Tickets.pm @@ -8,6 +8,7 @@ $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0; use HTTP::Request::Common qw( GET ); use Log::Contextual qw( :log :dlog ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Util qw( true false ); use Net::GitHub::V4 (); use Ref::Util qw( is_hashref is_ref ); @@ -42,10 +43,7 @@ has _bulk => ( sub _build_bulk { my $self = shift; - $self->es->bulk_helper( - index => $self->index->name, - type => 'distribution', - ); + $self->es->bulk_helper( es_doc_path('distribution') ); } sub _build_github_token { @@ -78,9 +76,8 @@ sub check_all_distributions { # first: make sure all distributions have an entry my $scroll = $self->es->scroll_helper( scroll => '5m', - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must_not => [ { term => { status => 'backpan' } } ] } diff --git a/lib/MetaCPAN/Script/Watcher.pm b/lib/MetaCPAN/Script/Watcher.pm index fffb6770b..3bdbaa6d9 100644 --- a/lib/MetaCPAN/Script/Watcher.pm +++ b/lib/MetaCPAN/Script/Watcher.pm @@ -7,6 +7,7 @@ use Moose; use CPAN::DistnameInfo (); use Cpanel::JSON::XS qw( decode_json ); use Log::Contextual qw( :log ); +use MetaCPAN::ESConfig qw( es_doc_path ); use MetaCPAN::Types::TypeTiny qw( Bool ); use MetaCPAN::Util qw( true false ); @@ -96,9 +97,8 @@ sub backpan_changes { my $self = shift; my $scroll = $self->es->scroll_helper( { scroll => '1m', - index => $self->index->name, - type => 'release', - body => { + es_doc_path('release'), + body => { query => { bool => { must_not => [ { term => { status => 'backpan' } }, ], @@ -170,8 +170,7 @@ sub reindex_release { my $es = $self->es; my $scroll = $es->scroll_helper( { - index => $self->index->name, - type => 'file', + es_doc_path('file'), scroll => '1m', body => { query => { @@ -199,10 +198,7 @@ sub reindex_release { my %bulk_helper; for (qw/ file release /) { - $bulk_helper{$_} = $self->es->bulk_helper( - index => $self->index->name, - type => $_, - ); + $bulk_helper{$_} = $self->es->bulk_helper( es_doc_path($_) ); } while ( my $row = $scroll->next ) { diff --git a/lib/MetaCPAN/Server/Controller.pm b/lib/MetaCPAN/Server/Controller.pm index a6affa018..75fd71a80 100644 --- a/lib/MetaCPAN/Server/Controller.pm +++ b/lib/MetaCPAN/Server/Controller.pm @@ -3,7 +3,8 @@ package MetaCPAN::Server::Controller; use Moose; use namespace::autoclean; -use MetaCPAN::Util qw( single_valued_arrayref_to_scalar ); +use MetaCPAN::ESConfig qw( es_doc_path ); +use MetaCPAN::Util qw( single_valued_arrayref_to_scalar ); BEGIN { extends 'Catalyst::Controller'; } @@ -54,12 +55,8 @@ sub model { sub mapping : Path('_mapping') Args(0) { my ( $self, $c ) = @_; - $c->stash( - $c->model('CPAN')->es->indices->get_mapping( - index => $c->model('CPAN')->index, - type => $self->type - ) - ); + $c->stash( $c->model('CPAN') + ->es->indices->get_mapping( es_doc_path( $self->type ) ) ); } sub get : Path('') : Args(1) { @@ -104,9 +101,8 @@ sub search : Path('_search') : ActionClass('~Deserialize') { delete $params->{callback}; eval { my $res = $self->model($c)->es->search( { - index => $c->model('CPAN')->index, - type => $self->type, - body => $c->req->data || delete $params->{source}, + es_doc_path( $self->type ), + body => $c->req->data || delete $params->{source}, %$params, } ); single_valued_arrayref_to_scalar( $_->{fields} ) From d709a5e54bb0195f6eeb4004cb991e21cd70ed66 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:21:01 +0200 Subject: [PATCH 13/31] fix model type alias for ESBool --- lib/MetaCPAN/Types/Internal.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MetaCPAN/Types/Internal.pm b/lib/MetaCPAN/Types/Internal.pm index af291a563..3bffb47f7 100644 --- a/lib/MetaCPAN/Types/Internal.pm +++ b/lib/MetaCPAN/Types/Internal.pm @@ -85,7 +85,7 @@ coerce ESBool, from Bool, via { }; $ElasticSearchX::Model::Document::Mapping::MAPPING{ESBool} - = $ElasticSearchX::Model::Document::Mapping::MAPPING{ESBool}; + = $ElasticSearchX::Model::Document::Mapping::MAPPING{Bool}; use MooseX::Attribute::Deflator; deflate 'ScalarRef', via {$$_}; From 373da0d04f7d2b133522a7c834cffa90a5a411f6 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:25:50 +0200 Subject: [PATCH 14/31] query modules don't need to be given an index name anymore --- lib/MetaCPAN/API/Plugin/Model.pm | 5 +---- lib/MetaCPAN/Document/Author/Set.pm | 5 +---- lib/MetaCPAN/Document/CVE/Set.pm | 5 +---- lib/MetaCPAN/Document/Contributor/Set.pm | 5 +---- lib/MetaCPAN/Document/Cover/Set.pm | 5 +---- lib/MetaCPAN/Document/Distribution/Set.pm | 5 +---- lib/MetaCPAN/Document/Favorite/Set.pm | 5 +---- lib/MetaCPAN/Document/File/Set.pm | 15 +++------------ lib/MetaCPAN/Document/Mirror/Set.pm | 5 +---- lib/MetaCPAN/Document/Package/Set.pm | 5 +---- lib/MetaCPAN/Document/Permission/Set.pm | 5 +---- lib/MetaCPAN/Document/Release/Set.pm | 5 +---- lib/MetaCPAN/Query/Role/Common.pm | 2 -- lib/MetaCPAN/Server/Model/Search.pm | 5 +---- t/model/search.t | 5 +---- t/query/release.t | 4 +--- xt/search_web.t | 5 +---- 17 files changed, 18 insertions(+), 73 deletions(-) diff --git a/lib/MetaCPAN/API/Plugin/Model.pm b/lib/MetaCPAN/API/Plugin/Model.pm index b95e56286..f33dd4b8e 100644 --- a/lib/MetaCPAN/API/Plugin/Model.pm +++ b/lib/MetaCPAN/API/Plugin/Model.pm @@ -21,10 +21,7 @@ has download => sub { has search => sub { my $self = shift; - return MetaCPAN::Query::Search->new( - es => $self->app->es, - index_name => 'cpan', - ); + return MetaCPAN::Query::Search->new( es => $self->app->es, ); }; has user => sub { diff --git a/lib/MetaCPAN/Document/Author/Set.pm b/lib/MetaCPAN/Document/Author/Set.pm index 8ad81236c..2b041ed9d 100644 --- a/lib/MetaCPAN/Document/Author/Set.pm +++ b/lib/MetaCPAN/Document/Author/Set.pm @@ -16,10 +16,7 @@ has query_author => ( sub _build_query_author { my $self = shift; - return MetaCPAN::Query::Author->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Author->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/CVE/Set.pm b/lib/MetaCPAN/Document/CVE/Set.pm index 63df15529..4602a7c5e 100644 --- a/lib/MetaCPAN/Document/CVE/Set.pm +++ b/lib/MetaCPAN/Document/CVE/Set.pm @@ -20,10 +20,7 @@ has query_cve => ( sub _build_query_cve { my $self = shift; - return MetaCPAN::Query::CVE->new( - es => $self->es, - index_name => 'cve', - ); + return MetaCPAN::Query::CVE->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Contributor/Set.pm b/lib/MetaCPAN/Document/Contributor/Set.pm index 17fdbba37..9e9b91f60 100644 --- a/lib/MetaCPAN/Document/Contributor/Set.pm +++ b/lib/MetaCPAN/Document/Contributor/Set.pm @@ -16,10 +16,7 @@ has query_contributor => ( sub _build_query_contributor { my $self = shift; - return MetaCPAN::Query::Contributor->new( - es => $self->es, - index_name => 'contributor', - ); + return MetaCPAN::Query::Contributor->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Cover/Set.pm b/lib/MetaCPAN/Document/Cover/Set.pm index 05e6278a8..52b833bdc 100644 --- a/lib/MetaCPAN/Document/Cover/Set.pm +++ b/lib/MetaCPAN/Document/Cover/Set.pm @@ -16,10 +16,7 @@ has query_cover => ( sub _build_query_cover { my $self = shift; - return MetaCPAN::Query::Cover->new( - es => $self->es, - index_name => 'cover', - ); + return MetaCPAN::Query::Cover->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Distribution/Set.pm b/lib/MetaCPAN/Document/Distribution/Set.pm index d0d0df59d..b7d4ece89 100644 --- a/lib/MetaCPAN/Document/Distribution/Set.pm +++ b/lib/MetaCPAN/Document/Distribution/Set.pm @@ -16,10 +16,7 @@ has query_distribution => ( sub _build_query_distribution { my $self = shift; - return MetaCPAN::Query::Distribution->new( - es => $self->es, - index_name => 'cpan', - ); + return MetaCPAN::Query::Distribution->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Favorite/Set.pm b/lib/MetaCPAN/Document/Favorite/Set.pm index c73a190ef..60b0649a6 100644 --- a/lib/MetaCPAN/Document/Favorite/Set.pm +++ b/lib/MetaCPAN/Document/Favorite/Set.pm @@ -20,10 +20,7 @@ has query_favorite => ( sub _build_query_favorite { my $self = shift; - return MetaCPAN::Query::Favorite->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Favorite->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/File/Set.pm b/lib/MetaCPAN/Document/File/Set.pm index abf545445..683c7c898 100644 --- a/lib/MetaCPAN/Document/File/Set.pm +++ b/lib/MetaCPAN/Document/File/Set.pm @@ -25,10 +25,7 @@ has query_file => ( sub _build_query_file { my $self = shift; - return MetaCPAN::Query::File->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::File->new( es => $self->es ); } has query_favorite => ( @@ -41,10 +38,7 @@ has query_favorite => ( sub _build_query_favorite { my $self = shift; - return MetaCPAN::Query::Favorite->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Favorite->new( es => $self->es ); } has query_release => ( @@ -57,10 +51,7 @@ has query_release => ( sub _build_query_release { my $self = shift; - return MetaCPAN::Query::Release->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Release->new( es => $self->es ); } my @ROGUE_DISTRIBUTIONS = qw( diff --git a/lib/MetaCPAN/Document/Mirror/Set.pm b/lib/MetaCPAN/Document/Mirror/Set.pm index 35b0d0248..7fb91a04a 100644 --- a/lib/MetaCPAN/Document/Mirror/Set.pm +++ b/lib/MetaCPAN/Document/Mirror/Set.pm @@ -16,10 +16,7 @@ has query_mirror => ( sub _build_query_mirror { my $self = shift; - return MetaCPAN::Query::Mirror->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Mirror->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Package/Set.pm b/lib/MetaCPAN/Document/Package/Set.pm index ed7774331..981c61c53 100644 --- a/lib/MetaCPAN/Document/Package/Set.pm +++ b/lib/MetaCPAN/Document/Package/Set.pm @@ -16,10 +16,7 @@ has query_package => ( sub _build_query_package { my $self = shift; - return MetaCPAN::Query::Package->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Package->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Permission/Set.pm b/lib/MetaCPAN/Document/Permission/Set.pm index c6ac91110..d4192b248 100644 --- a/lib/MetaCPAN/Document/Permission/Set.pm +++ b/lib/MetaCPAN/Document/Permission/Set.pm @@ -16,10 +16,7 @@ has query_permission => ( sub _build_query_permission { my $self = shift; - return MetaCPAN::Query::Permission->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Permission->new( es => $self->es ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Document/Release/Set.pm b/lib/MetaCPAN/Document/Release/Set.pm index 8adae0f94..7cd365d3c 100644 --- a/lib/MetaCPAN/Document/Release/Set.pm +++ b/lib/MetaCPAN/Document/Release/Set.pm @@ -35,10 +35,7 @@ has query_release => ( sub _build_query_release { my $self = shift; - return MetaCPAN::Query::Release->new( - es => $self->es, - index_name => $self->index->name, - ); + return MetaCPAN::Query::Release->new( es => $self->es ); } sub find_github_based { diff --git a/lib/MetaCPAN/Query/Role/Common.pm b/lib/MetaCPAN/Query/Role/Common.pm index bbc90ccaf..dd492357a 100644 --- a/lib/MetaCPAN/Query/Role/Common.pm +++ b/lib/MetaCPAN/Query/Role/Common.pm @@ -4,6 +4,4 @@ use Moose::Role; has es => ( is => 'ro', ); -has index_name => ( is => 'ro', ); - 1; diff --git a/lib/MetaCPAN/Server/Model/Search.pm b/lib/MetaCPAN/Server/Model/Search.pm index 646a17ebf..92c73de09 100644 --- a/lib/MetaCPAN/Server/Model/Search.pm +++ b/lib/MetaCPAN/Server/Model/Search.pm @@ -15,10 +15,7 @@ has search => ( handles => [qw( search_for_first_result search_web )], default => sub { my $self = shift; - return MetaCPAN::Query::Search->new( - es => $self->es, - index_name => $self->index, - ); + return MetaCPAN::Query::Search->new( es => $self->es, ); }, ); diff --git a/t/model/search.t b/t/model/search.t index 76f62267a..57a109db1 100644 --- a/t/model/search.t +++ b/t/model/search.t @@ -10,10 +10,7 @@ use Test::More; # Just use this to get an es object. my $server = MetaCPAN::TestServer->new; -my $search = MetaCPAN::Query::Search->new( - es => $server->es_client, - index_name => 'cpan', -); +my $search = MetaCPAN::Query::Search->new( es => $server->es_client, ); ok( $search, 'search' ); diff --git a/t/query/release.t b/t/query/release.t index 30367d999..80514ba75 100644 --- a/t/query/release.t +++ b/t/query/release.t @@ -8,9 +8,7 @@ use MetaCPAN::Server::Test (); use Test::More; my $query = MetaCPAN::Query::Release->new( - es => MetaCPAN::Server::Test::model->es(), - index_name => 'cpan', -); + es => MetaCPAN::Server::Test::model->es(), ); is( $query->_get_latest_release('DoesNotExist'), undef, '_get_latest_release returns undef when release does not exist' ); diff --git a/xt/search_web.t b/xt/search_web.t index 14293bfd4..33e19143d 100644 --- a/xt/search_web.t +++ b/xt/search_web.t @@ -11,10 +11,7 @@ use Test::More; # Just use this to get an es object. my $server = MetaCPAN::TestServer->new; -my $search = MetaCPAN::Query::Search->new( - es => $server->es_client, - index_name => 'cpan', -); +my $search = MetaCPAN::Query::Search->new( es => $server->es_client ); my %tests = ( 'anyevent http' => 'AnyEvent::HTTP', From 74fcce0cf352950fb86cbba0f53b4c7798932a4d Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:26:44 +0200 Subject: [PATCH 15/31] remove out of date comment --- lib/MetaCPAN/Script/Author.pm | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lib/MetaCPAN/Script/Author.pm b/lib/MetaCPAN/Script/Author.pm index 5332a7be9..00e59e372 100644 --- a/lib/MetaCPAN/Script/Author.pm +++ b/lib/MetaCPAN/Script/Author.pm @@ -39,12 +39,6 @@ has pauseid => ( sub run { my $self = shift; - # check we are using a dedicated index, prompts if not - # my $index = $self->index->name; - # $self->are_you_sure( - # "Author script is run against a non-author specific index: $index !!!" - # ) unless $index =~ /author/; - $self->index_authors; $self->es->indices->refresh; } From 75b70213edabd238d1abbf70810ce9b91381c126 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:27:14 +0200 Subject: [PATCH 16/31] no need to prebuild an index object --- lib/MetaCPAN/Script/Release.pm | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/MetaCPAN/Script/Release.pm b/lib/MetaCPAN/Script/Release.pm index 40c375254..ced176545 100644 --- a/lib/MetaCPAN/Script/Release.pm +++ b/lib/MetaCPAN/Script/Release.pm @@ -187,7 +187,6 @@ sub run { # logic - feel free to clean up so the CP::DistInfo isn't my @module_to_purge_dists = map { CPAN::DistnameInfo->new($_) } @files; - $self->index; $self->cpan_file_map if ( $self->detect_backpan ); $self->perms; my @pid; From 709788364860c987d8aaa6a9d03ec155074cf665 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:28:23 +0200 Subject: [PATCH 17/31] add ES and ESModel Catalyst model classes Replaces the CPAN and User model classes. Removes magic namespace creation. Just return the Search::Elasticsearch object, or the model object. --- lib/MetaCPAN/Server/Controller.pm | 4 +- lib/MetaCPAN/Server/Controller/Activity.pm | 2 +- lib/MetaCPAN/Server/Controller/Author.pm | 2 +- lib/MetaCPAN/Server/Controller/Changes.pm | 8 ++-- lib/MetaCPAN/Server/Controller/Diff.pm | 6 +-- lib/MetaCPAN/Server/Controller/Login.pm | 2 +- lib/MetaCPAN/Server/Controller/Login/PAUSE.pm | 2 +- lib/MetaCPAN/Server/Controller/OAuth2.pm | 2 +- lib/MetaCPAN/Server/Controller/Package.pm | 2 +- lib/MetaCPAN/Server/Controller/Pod.pm | 4 +- lib/MetaCPAN/Server/Controller/Release.pm | 4 +- .../Server/Controller/ReverseDependencies.pm | 4 +- lib/MetaCPAN/Server/Controller/Scroll.pm | 2 +- lib/MetaCPAN/Server/Controller/Source.pm | 2 +- lib/MetaCPAN/Server/Controller/User.pm | 13 ++++-- .../Server/Controller/User/Favorite.pm | 4 +- lib/MetaCPAN/Server/Model/CPAN.pm | 44 ------------------- lib/MetaCPAN/Server/Model/ES.pm | 25 +++++++++++ lib/MetaCPAN/Server/Model/ESModel.pm | 31 +++++++++++++ lib/MetaCPAN/Server/Model/Search.pm | 19 ++++++-- lib/MetaCPAN/Server/Model/User.pm | 12 ----- lib/MetaCPAN/Server/User.pm | 2 +- t/lib/MetaCPAN/TestServer.pm | 4 +- t/model/release/reverse_dependencies.t | 9 ++-- 24 files changed, 114 insertions(+), 95 deletions(-) delete mode 100644 lib/MetaCPAN/Server/Model/CPAN.pm create mode 100644 lib/MetaCPAN/Server/Model/ES.pm create mode 100644 lib/MetaCPAN/Server/Model/ESModel.pm delete mode 100644 lib/MetaCPAN/Server/Model/User.pm diff --git a/lib/MetaCPAN/Server/Controller.pm b/lib/MetaCPAN/Server/Controller.pm index 75fd71a80..95e14b62e 100644 --- a/lib/MetaCPAN/Server/Controller.pm +++ b/lib/MetaCPAN/Server/Controller.pm @@ -41,7 +41,7 @@ sub apply_request_filter { sub model { my ( $self, $c ) = @_; - my $model = $c->model('CPAN')->type( $self->type ); + my $model = $c->model('ESModel')->doc( $self->type ); $model = $model->fields( [ map { split(/,/) } $c->req->param('fields') ] ) if $c->req->param('fields'); if ( my ($size) = $c->req->param('size') ) { @@ -55,7 +55,7 @@ sub model { sub mapping : Path('_mapping') Args(0) { my ( $self, $c ) = @_; - $c->stash( $c->model('CPAN') + $c->stash( $c->model('ESModel') ->es->indices->get_mapping( es_doc_path( $self->type ) ) ); } diff --git a/lib/MetaCPAN/Server/Controller/Activity.pm b/lib/MetaCPAN/Server/Controller/Activity.pm index cb6035f98..811b2f261 100644 --- a/lib/MetaCPAN/Server/Controller/Activity.pm +++ b/lib/MetaCPAN/Server/Controller/Activity.pm @@ -13,7 +13,7 @@ sub get : Path('') : Args(0) { my ( $self, $c ) = @_; $c->stash_or_detach( - $c->model('CPAN::Release')->activity( $c->req->params ) ); + $c->model('ESModel')->doc('release')->activity( $c->req->params ) ); } __PACKAGE__->meta->make_immutable; diff --git a/lib/MetaCPAN/Server/Controller/Author.pm b/lib/MetaCPAN/Server/Controller/Author.pm index 581176d93..2d9ff8d4a 100644 --- a/lib/MetaCPAN/Server/Controller/Author.pm +++ b/lib/MetaCPAN/Server/Controller/Author.pm @@ -16,7 +16,7 @@ sub get : Path('') : Args(1) { $c->cdn_max_age('1y'); my $file = $self->model($c)->raw->get($id); $c->stash_or_detach( - $c->model('CPAN::Release')->author_status( $id, $file ) ); + $c->model('ESModel')->doc('release')->author_status( $id, $file ) ); } # /author/search?q=QUERY diff --git a/lib/MetaCPAN/Server/Controller/Changes.pm b/lib/MetaCPAN/Server/Controller/Changes.pm index 75061a4cf..c775d07bd 100644 --- a/lib/MetaCPAN/Server/Controller/Changes.pm +++ b/lib/MetaCPAN/Server/Controller/Changes.pm @@ -25,7 +25,8 @@ sub get : Chained('index') : PathPart('') : Args(2) { $c->cdn_max_age('1y'); my $file - = $c->model('CPAN::File')->find_changes_files( $author, $release ); + = $c->model('ESModel')->doc('file') + ->find_changes_files( $author, $release ); $file or $c->detach( '/not_found', [] ); my $source = $c->model('Source')->path( @$file{qw(author release path)} ) @@ -52,7 +53,7 @@ sub get : Chained('index') : PathPart('') : Args(2) { sub find : Chained('index') : PathPart('') : Args(1) { my ( $self, $c, $name ) = @_; - my $release = eval { $c->model('CPAN::Release')->find($name); } + my $release = eval { $c->model('ESModel')->doc('release')->find($name); } or $c->detach( '/not_found', [] ); $c->forward( 'get', [ @$release{qw( author name )} ] ); @@ -76,7 +77,8 @@ sub by_releases : Path('by_releases') : Args(0) { return; } - my $ret = $c->model('CPAN::Release')->by_author_and_names( \@releases ); + my $ret = $c->model('ESModel')->doc('release') + ->by_author_and_names( \@releases ); my @changes; for my $release ( @{ $ret->{releases} } ) { diff --git a/lib/MetaCPAN/Server/Controller/Diff.pm b/lib/MetaCPAN/Server/Controller/Diff.pm index 468c3a3b2..3789ac9be 100644 --- a/lib/MetaCPAN/Server/Controller/Diff.pm +++ b/lib/MetaCPAN/Server/Controller/Diff.pm @@ -33,8 +33,8 @@ sub release : Chained('index') : PathPart('release') : Args(1) { my ( $latest, $previous ); try { - $latest = $c->model('CPAN::Release')->find($name); - $previous = $c->model('CPAN::Release')->predecessor($name); + $latest = $c->model('ESModel')->doc('release')->find($name); + $previous = $c->model('ESModel')->doc('release')->predecessor($name); } catch { $c->detach('/not_found'); @@ -55,7 +55,7 @@ sub file : Chained('index') : PathPart('file') : Args(2) { = map { [ @$_{qw(author release path)} ] } map { my $file = $_; - try { $c->model('CPAN::File')->raw->get($file)->{_source}; } + try { $c->model('ESModel')->doc('file')->raw->get($file)->{_source}; } or $c->detach('/not_found'); } ( $source, $target ); diff --git a/lib/MetaCPAN/Server/Controller/Login.pm b/lib/MetaCPAN/Server/Controller/Login.pm index 9d8770d77..2cf718167 100644 --- a/lib/MetaCPAN/Server/Controller/Login.pm +++ b/lib/MetaCPAN/Server/Controller/Login.pm @@ -37,7 +37,7 @@ sub index : Path Args(0) { sub update_user { my ( $self, $c, $type, $id, $data ) = @_; - my $model = $c->model('User::Account'); + my $model = $c->model('ESModel')->doc('account'); my $user = $model->find( { name => $type, key => $id } ); unless ($user) { $user = $model->get( $c->user->id ) diff --git a/lib/MetaCPAN/Server/Controller/Login/PAUSE.pm b/lib/MetaCPAN/Server/Controller/Login/PAUSE.pm index c96ac2262..ebce10c14 100644 --- a/lib/MetaCPAN/Server/Controller/Login/PAUSE.pm +++ b/lib/MetaCPAN/Server/Controller/Login/PAUSE.pm @@ -37,7 +37,7 @@ sub index : Path Args(0) { elsif ( ( $id = $c->req->parameters->{id} ) && $c->req->parameters->{id} =~ /[a-zA-Z]+/ ) { - my $author = $c->model('CPAN::Author')->get( uc($id) ); + my $author = $c->model('ESModel')->doc('author')->get( uc($id) ); $c->controller('OAuth2')->redirect( $c, error => "author_not_found" ) unless ($author); diff --git a/lib/MetaCPAN/Server/Controller/OAuth2.pm b/lib/MetaCPAN/Server/Controller/OAuth2.pm index 1ae22701b..1e04049ab 100644 --- a/lib/MetaCPAN/Server/Controller/OAuth2.pm +++ b/lib/MetaCPAN/Server/Controller/OAuth2.pm @@ -78,7 +78,7 @@ sub access_token : Local { $self->bad_request( $c, invalid_request => 'code query parameter is required' ) unless ($code); - my $user = $c->model('User::Account')->find_code($code); + my $user = $c->model('ESModel')->doc('account')->find_code($code); $self->bad_request( $c, access_denied => 'the code is invalid' ) unless ($user); diff --git a/lib/MetaCPAN/Server/Controller/Package.pm b/lib/MetaCPAN/Server/Controller/Package.pm index 2ab527977..c2117e64f 100644 --- a/lib/MetaCPAN/Server/Controller/Package.pm +++ b/lib/MetaCPAN/Server/Controller/Package.pm @@ -11,7 +11,7 @@ with 'MetaCPAN::Server::Role::JSONP'; sub modules : Path('modules') : Args(1) { my ( $self, $c, $dist ) = @_; - my $last = $c->model('CPAN::Release')->find($dist); + my $last = $c->model('ESModel')->doc('release')->find($dist); $c->detach( '/not_found', ["Cannot find last release for $dist"] ) unless $last; $c->stash_or_detach( diff --git a/lib/MetaCPAN/Server/Controller/Pod.pm b/lib/MetaCPAN/Server/Controller/Pod.pm index 4ee619714..b903a211f 100644 --- a/lib/MetaCPAN/Server/Controller/Pod.pm +++ b/lib/MetaCPAN/Server/Controller/Pod.pm @@ -36,14 +36,14 @@ sub find : Path('') { sub get : Path('') : Args(1) { my ( $self, $c, $module ) = @_; - $module = $c->model('CPAN::File')->find_pod($module) + $module = $c->model('ESModel')->doc('file')->find_pod($module) or $c->detach( '/not_found', [] ); $c->forward( 'find', [ map { $module->$_ } qw(author release path) ] ); } sub find_dist_links { my ( $self, $c, $author, $release, $permalinks ) = @_; - my @modules = $c->model('CPAN::File') + my @modules = $c->model('ESModel')->doc('file') ->documented_modules( { name => $release, author => $author } ); my $links = {}; diff --git a/lib/MetaCPAN/Server/Controller/Release.pm b/lib/MetaCPAN/Server/Controller/Release.pm index 8d5f1660d..2e333a910 100644 --- a/lib/MetaCPAN/Server/Controller/Release.pm +++ b/lib/MetaCPAN/Server/Controller/Release.pm @@ -103,14 +103,14 @@ sub top_uploaders : Path('top_uploaders') : Args() { sub interesting_files : Path('interesting_files') : Args(2) { my ( $self, $c, $author, $release ) = @_; my $categories = $c->read_param( 'category', 1 ); - $c->stash_or_detach( $c->model('CPAN::File') + $c->stash_or_detach( $c->model('ESModel')->doc('file') ->interesting_files( $author, $release, $categories ) ); } sub files_by_category : Path('files_by_category') : Args(2) { my ( $self, $c, $author, $release ) = @_; my $categories = $c->read_param( 'category', 1 ); - $c->stash_or_detach( $c->model('CPAN::File') + $c->stash_or_detach( $c->model('ESModel')->doc('file') ->files_by_category( $author, $release, $categories ) ); } diff --git a/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm b/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm index 1cc5db085..0549b9ecb 100644 --- a/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm +++ b/lib/MetaCPAN/Server/Controller/ReverseDependencies.pm @@ -14,7 +14,7 @@ with 'MetaCPAN::Server::Role::JSONP'; sub dist : Path('dist') : Args(1) { my ( $self, $c, $dist ) = @_; $c->stash_or_detach( - $c->model('CPAN::Release')->reverse_dependencies( + $c->model('ESModel')->doc('release')->reverse_dependencies( $dist, @{ $c->req->params }{qw< page page_size size sort >} ) ); @@ -23,7 +23,7 @@ sub dist : Path('dist') : Args(1) { sub module : Path('module') : Args(1) { my ( $self, $c, $module ) = @_; $c->stash_or_detach( - $c->model('CPAN::Release')->requires( + $c->model('ESModel')->doc('release')->requires( $module, @{ $c->req->params }{qw< page page_size sort >} ) ); diff --git a/lib/MetaCPAN/Server/Controller/Scroll.pm b/lib/MetaCPAN/Server/Controller/Scroll.pm index a2917680f..f2876c2bc 100644 --- a/lib/MetaCPAN/Server/Controller/Scroll.pm +++ b/lib/MetaCPAN/Server/Controller/Scroll.pm @@ -55,7 +55,7 @@ sub index : Path('/_search/scroll') : Args { } my $res = eval { - $c->model('CPAN')->es->scroll( { + $c->model('ESModel')->es->scroll( { scroll_id => $scroll_id, scroll => $c->req->params->{scroll}, } ); diff --git a/lib/MetaCPAN/Server/Controller/Source.pm b/lib/MetaCPAN/Server/Controller/Source.pm index 90ec2e3e3..d667b3d5d 100644 --- a/lib/MetaCPAN/Server/Controller/Source.pm +++ b/lib/MetaCPAN/Server/Controller/Source.pm @@ -63,7 +63,7 @@ sub module : Chained('index') : PathPart('') : Args(1) { $c->cdn_never_cache(1); - $module = $c->model('CPAN::File')->find($module) + $module = $c->model('ESModel')->doc('file')->find($module) or $c->detach( '/not_found', [] ); $c->forward( 'get', [ map { $module->$_ } qw(author release path) ] ); } diff --git a/lib/MetaCPAN/Server/Controller/User.pm b/lib/MetaCPAN/Server/Controller/User.pm index fd0a0febf..3391ea0c5 100644 --- a/lib/MetaCPAN/Server/Controller/User.pm +++ b/lib/MetaCPAN/Server/Controller/User.pm @@ -24,7 +24,9 @@ sub auto : Private { $c->cdn_never_cache(1); if ( my $token = $c->req->params->{access_token} ) { - if ( my $user = $c->model('User::Account')->find_token($token) ) { + if ( my $user + = $c->model('ESModel')->doc('account')->find_token($token) ) + { $c->authenticate( { user => $user } ); Log::Log4perl::MDC->put( user => $user->id ); } @@ -73,7 +75,8 @@ sub profile : Local : ActionClass('REST') { $self->status_not_found( $c, message => 'Profile doesn\'t exist' ); $c->detach; } - my $profile = $c->model('CPAN::Author')->raw->get( $pause->key ); + my $profile + = $c->model('ESModel')->doc('author')->raw->get( $pause->key ); $c->stash->{profile} = $profile->{_source}; } @@ -95,7 +98,8 @@ sub profile_PUT { donation city region country location extra perlmongers); $profile->{updated} = DateTime->now->iso8601; - my @errors = $c->model('CPAN::Author')->new_document->validate($profile); + my @errors = $c->model('ESModel')->doc('author') + ->new_document->validate($profile); if (@errors) { $self->status_bad_request( $c, message => 'Validation failed' ); @@ -103,7 +107,8 @@ sub profile_PUT { } else { $profile - = $c->model('CPAN::Author')->put( $profile, { refresh => true } ); + = $c->model('ESModel')->doc('author') + ->put( $profile, { refresh => true } ); $self->status_created( $c, location => $c->uri_for( '/author/' . $profile->{pauseid} ), diff --git a/lib/MetaCPAN/Server/Controller/User/Favorite.pm b/lib/MetaCPAN/Server/Controller/User/Favorite.pm index 62024c268..e8d4297c6 100644 --- a/lib/MetaCPAN/Server/Controller/User/Favorite.pm +++ b/lib/MetaCPAN/Server/Controller/User/Favorite.pm @@ -27,7 +27,7 @@ sub index_POST { my ( $self, $c ) = @_; my $pause = $c->stash->{pause}; my $data = $c->req->data; - my $favorite = $c->model('CPAN::Favorite')->put( + my $favorite = $c->model('ESModel')->doc('favorite')->put( { user => $c->user->id, author => $data->{author}, @@ -48,7 +48,7 @@ sub index_POST { sub index_DELETE { my ( $self, $c, $distribution ) = @_; - my $favorite = $c->model('CPAN::Favorite') + my $favorite = $c->model('ESModel')->doc('favorite') ->get( { user => $c->user->id, distribution => $distribution } ); if ($favorite) { $favorite->delete( { refresh => true } ); diff --git a/lib/MetaCPAN/Server/Model/CPAN.pm b/lib/MetaCPAN/Server/Model/CPAN.pm deleted file mode 100644 index bbac0b9b5..000000000 --- a/lib/MetaCPAN/Server/Model/CPAN.pm +++ /dev/null @@ -1,44 +0,0 @@ -package MetaCPAN::Server::Model::CPAN; - -use Moose; - -use MetaCPAN::Model (); -use MetaCPAN::Server::Config (); - -extends 'Catalyst::Model'; - -has _esx_model => ( - is => 'ro', - lazy => 1, - handles => ['es'], - default => sub { - MetaCPAN::Model->new( - es => MetaCPAN::Server::Config::config()->{elasticsearch_servers} - ); - }, -); - -has index => ( - is => 'ro', - default => 'cpan', -); - -sub type { - my $self = shift; - return $self->_esx_model->index( $self->index )->type(shift); -} - -sub BUILD { - my ( $self, $args ) = @_; - my $index = $self->_esx_model->index( $self->index ); - my $class = ref $self; - while ( my ( $k, $v ) = each %{ $index->types } ) { - no strict 'refs'; - my $classname = "${class}::" . ucfirst($k); - *{"${classname}::ACCEPT_CONTEXT"} = sub { - return $index->type($k); - }; - } -} - -1; diff --git a/lib/MetaCPAN/Server/Model/ES.pm b/lib/MetaCPAN/Server/Model/ES.pm new file mode 100644 index 000000000..e22311d46 --- /dev/null +++ b/lib/MetaCPAN/Server/Model/ES.pm @@ -0,0 +1,25 @@ +package MetaCPAN::Server::Model::ES; + +use Moose; + +use MetaCPAN::Server::Config (); +use MooseX::Types::ElasticSearch qw( ES ); + +extends 'Catalyst::Model'; + +has es => ( + is => 'ro', + isa => ES, + coerce => 1, + lazy => 1, + default => sub { + MetaCPAN::Server::Config::config()->{elasticsearch_servers}; + }, +); + +sub ACCEPT_CONTEXT { + my ( $self, $c ) = @_; + return $self->es; +} + +1; diff --git a/lib/MetaCPAN/Server/Model/ESModel.pm b/lib/MetaCPAN/Server/Model/ESModel.pm new file mode 100644 index 000000000..e14334052 --- /dev/null +++ b/lib/MetaCPAN/Server/Model/ESModel.pm @@ -0,0 +1,31 @@ +package MetaCPAN::Server::Model::ESModel; + +use Moose; + +use MetaCPAN::Model (); + +extends 'Catalyst::Model'; + +has es => ( + is => 'ro', + writer => '_set_es', +); + +has _esx_model => ( + is => 'ro', + lazy => 1, + default => sub { + my $self = shift; + MetaCPAN::Model->new( es => $self->es ); + }, +); + +sub ACCEPT_CONTEXT { + my ( $self, $c ) = @_; + if ( !$self->es ) { + $self->_set_es( $c->model('ES') ); + } + return $self->_esx_model; +} + +1; diff --git a/lib/MetaCPAN/Server/Model/Search.pm b/lib/MetaCPAN/Server/Model/Search.pm index 92c73de09..85da37972 100644 --- a/lib/MetaCPAN/Server/Model/Search.pm +++ b/lib/MetaCPAN/Server/Model/Search.pm @@ -6,18 +6,29 @@ use warnings; use Moose; use MetaCPAN::Query::Search (); -extends 'MetaCPAN::Server::Model::CPAN'; +extends 'Catalyst::Model'; + +has es => ( + is => 'ro', + writer => '_set_es', +); has search => ( is => 'ro', isa => 'MetaCPAN::Query::Search', lazy => 1, - handles => [qw( search_for_first_result search_web )], default => sub { my $self = shift; - return MetaCPAN::Query::Search->new( es => $self->es, ); + return MetaCPAN::Query::Search->new( es => $self->es ); }, ); -1; +sub ACCEPT_CONTEXT { + my ( $self, $c ) = @_; + if ( !$self->es ) { + $self->_set_es( $c->model('ES') ); + } + return $self->search; +} +1; diff --git a/lib/MetaCPAN/Server/Model/User.pm b/lib/MetaCPAN/Server/Model/User.pm deleted file mode 100644 index 733d74dde..000000000 --- a/lib/MetaCPAN/Server/Model/User.pm +++ /dev/null @@ -1,12 +0,0 @@ -package MetaCPAN::Server::Model::User; - -use strict; -use warnings; - -use Moose; - -extends 'MetaCPAN::Server::Model::CPAN'; - -has '+index' => ( default => 'user' ); - -1; diff --git a/lib/MetaCPAN/Server/User.pm b/lib/MetaCPAN/Server/User.pm index 152335323..c84281957 100644 --- a/lib/MetaCPAN/Server/User.pm +++ b/lib/MetaCPAN/Server/User.pm @@ -23,7 +23,7 @@ sub for_session { sub from_session { my ( $self, $c, $id ) = @_; - my $user = $c->model('User::Account')->get($id); + my $user = $c->model('ESModel')->doc('account')->get($id); $self->_set_obj($user) if ($user); return $user ? $self : undef; } diff --git a/t/lib/MetaCPAN/TestServer.pm b/t/lib/MetaCPAN/TestServer.pm index 2af4f76e9..e2477e2f6 100644 --- a/t/lib/MetaCPAN/TestServer.pm +++ b/t/lib/MetaCPAN/TestServer.pm @@ -256,7 +256,7 @@ sub index_favorite { sub prepare_user_test_data { my $self = shift; ok( - my $user = MetaCPAN::Server->model('User::Account')->put( { + my $user = MetaCPAN::Server->model('ESModel')->doc('account')->put( { access_token => [ { client => 'testing', token => 'testing' } ] } ), 'prepare user' @@ -266,7 +266,7 @@ sub prepare_user_test_data { ok( $user->put( { refresh => true } ), 'put user' ); ok( - MetaCPAN::Server->model('User::Account')->put( + MetaCPAN::Server->model('ESModel')->doc('account')->put( { access_token => [ { client => 'testing', token => 'bot' } ] }, { refresh => true } ), diff --git a/t/model/release/reverse_dependencies.t b/t/model/release/reverse_dependencies.t index 3b2f60e16..81eddf44e 100644 --- a/t/model/release/reverse_dependencies.t +++ b/t/model/release/reverse_dependencies.t @@ -13,7 +13,7 @@ subtest 'distribution reverse_dependencies' => sub { sort { $a->[1] cmp $b->[1] } map +[ @{$_}{qw(author name)} ], @{ - $c->model('CPAN::Release') + $c->model('ESModel')->doc('release') ->raw->reverse_dependencies('Multiple-Modules')->{data} } ]; @@ -32,8 +32,8 @@ subtest 'module reverse_dependencies' => sub { my $data = [ map +[ @{$_}{qw(author name)} ], @{ - $c->model('CPAN::Release')->raw->requires('Multiple::Modules') - ->{data} + $c->model('ESModel')->doc('release') + ->raw->requires('Multiple::Modules')->{data} } ]; @@ -46,7 +46,8 @@ subtest 'module reverse_dependencies' => sub { subtest 'no reverse_dependencies' => sub { my $data - = $c->model('CPAN::Release')->raw->requires('DoesNotExist')->{data}; + = $c->model('ESModel')->doc('release')->raw->requires('DoesNotExist') + ->{data}; is_deeply( $data, [], 'Found no reverse dependencies for module.' ); }; From f72bdf152e83584c5d2e76d35b0088cbc9f681d0 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:30:24 +0200 Subject: [PATCH 18/31] testserver: get mapping via esconfig --- t/lib/MetaCPAN/TestServer.pm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/t/lib/MetaCPAN/TestServer.pm b/t/lib/MetaCPAN/TestServer.pm index e2477e2f6..85a226bcd 100644 --- a/t/lib/MetaCPAN/TestServer.pm +++ b/t/lib/MetaCPAN/TestServer.pm @@ -2,6 +2,8 @@ package MetaCPAN::TestServer; use MetaCPAN::Moose; +use Cpanel::JSON::XS qw( encode_json ); +use MetaCPAN::ESConfig qw( es_config ); use MetaCPAN::Script::Author (); use MetaCPAN::Script::Cover (); use MetaCPAN::Script::CPANTestersAPI (); @@ -285,7 +287,7 @@ sub test_index_missing { my $self = $_[0]; subtest 'missing index' => sub { - my $scoverindexjson = MetaCPAN::Script::Mapping::Cover::mapping; + my $cover_mapping_json = encode_json( es_config->mapping('cover') ); subtest 'delete cover index' => sub { local @ARGV = qw(mapping --delete_index cover); @@ -300,7 +302,7 @@ sub test_index_missing { local @ARGV = ( 'mapping', '--create_index', 'cover', '--patch_mapping', - qq({ "cover": $scoverindexjson }) + qq({ "cover": $cover_mapping_json }) ); my $mapping = MetaCPAN::Script::Mapping->new_with_options( From c44ac5131f0e3a6289d292d4695997212b5dc221 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:55:18 +0200 Subject: [PATCH 19/31] mapping: list document types, not types from index --- lib/MetaCPAN/Script/Mapping.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index 421ab8532..6f1036fd8 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -45,7 +45,7 @@ has arg_list_types => ( is => 'ro', isa => Bool, default => 0, - documentation => 'list available index type names', + documentation => 'list available document type names', ); has arg_cluster_info => ( @@ -457,7 +457,7 @@ sub empty_type { sub list_types { my $self = shift; - print "$_\n" for sort keys %{ $self->index->types }; + print "$_\n" for sort keys %{ es_config->documents }; } sub show_info { From c61c5161d787b5f87d0ea87a799e7c03411bd949 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Wed, 23 Oct 2024 09:56:11 +0200 Subject: [PATCH 20/31] scripts don't need index method --- lib/MetaCPAN/Role/Script.pm | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/MetaCPAN/Role/Script.pm b/lib/MetaCPAN/Role/Script.pm index f5cab3bfa..cd6c46053 100644 --- a/lib/MetaCPAN/Role/Script.pm +++ b/lib/MetaCPAN/Role/Script.pm @@ -168,11 +168,6 @@ sub print_error { log_error {$error}; } -sub index { - my $self = shift; - return $self->model->index('cpan'); -} - sub _build_model { my $self = shift; From e5c53764091e370e57cf132595f019927de306c7 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:30:35 +0200 Subject: [PATCH 21/31] ESConfig: overrides with undef removes element --- lib/MetaCPAN/ESConfig.pm | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/lib/MetaCPAN/ESConfig.pm b/lib/MetaCPAN/ESConfig.pm index 300935f8e..511e2b6ca 100644 --- a/lib/MetaCPAN/ESConfig.pm +++ b/lib/MetaCPAN/ESConfig.pm @@ -4,15 +4,16 @@ use experimental qw(signatures postderef); package MetaCPAN::ESConfig; -use Carp qw(croak); -use Const::Fast qw(const); -use Exporter qw(import); -use MetaCPAN::Util qw(root_dir); -use Module::Runtime qw(require_module $module_name_rx); -use Cpanel::JSON::XS (); -use Hash::Merge::Simple qw(merge); -use MetaCPAN::Server::Config (); -use Const::Fast qw(const); +use Carp qw(croak); +use Const::Fast qw(const); +use Exporter qw(import); +use MetaCPAN::Util qw(root_dir); +use Module::Runtime qw(require_module $module_name_rx); +use Cpanel::JSON::XS (); +use Hash::Merge::Simple qw(merge); +use MetaCPAN::Server::Config (); +use MetaCPAN::Types::TypeTiny qw(HashRef Defined); +use Const::Fast qw(const); const my %config => merge( { @@ -125,13 +126,28 @@ has indexes => ( required => 1, ); +my $DefinedHash = ( HashRef [Defined] )->plus_coercions( + HashRef, + => sub ($hash) { + return { + map { + my $value = $hash->{$_}; + defined $value ? ( $_ => $value ) : (); + } keys %$hash + }; + }, +); has aliases => ( is => 'ro', + isa => $DefinedHash, + coerce => 1, default => sub { {} }, ); has documents => ( is => 'ro', + isa => HashRef [$DefinedHash], + coerce => 1, required => 1, ); From ca7b31f02090834a373205cf2529e19be1198c54 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:30:48 +0200 Subject: [PATCH 22/31] create distributions with upsert Trying to count distribution documents before creating is vulnerable to concurrency and consistency issues. Instead, use an upsert to create it. --- lib/MetaCPAN/Model/Release.pm | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lib/MetaCPAN/Model/Release.pm b/lib/MetaCPAN/Model/Release.pm index a29a24752..ecbd1916d 100644 --- a/lib/MetaCPAN/Model/Release.pm +++ b/lib/MetaCPAN/Model/Release.pm @@ -226,12 +226,17 @@ sub _build_document { = $self->model->doc('release')->put( $document, { refresh => true } ); # create distribution if doesn't exist - my $dist_count = $self->es->count( es_doc_path('distribution'), - body => { query => { term => { name => $self->distribution } } }, ); - if ( !$dist_count->{count} ) { - $self->model->doc('distribution') - ->put( { name => $self->distribution }, { create => 1 } ); - } + $self->es->update( + es_doc_path('distribution'), + id => $self->distribution, + body => { + doc => { + name => $self->distribution, + }, + doc_as_upsert => true, + }, + ); + return $document; } From 762f54d29f1367637df2aede3116464b69de6961 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:33:04 +0200 Subject: [PATCH 23/31] default backup all indexes --- lib/MetaCPAN/ESConfig.pm | 13 +++++++++++++ lib/MetaCPAN/Script/Backup.pm | 6 ++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/MetaCPAN/ESConfig.pm b/lib/MetaCPAN/ESConfig.pm index 511e2b6ca..801f09682 100644 --- a/lib/MetaCPAN/ESConfig.pm +++ b/lib/MetaCPAN/ESConfig.pm @@ -126,6 +126,19 @@ has indexes => ( required => 1, ); +has all_indexes => ( + is => 'lazy', + default => sub ($self) { + my %seen; + [ + sort + grep !$seen{$_}++, + map $_->{index}, + values $self->documents->%* + ]; + }, +); + my $DefinedHash = ( HashRef [Defined] )->plus_coercions( HashRef, => sub ($hash) { diff --git a/lib/MetaCPAN/Script/Backup.pm b/lib/MetaCPAN/Script/Backup.pm index 32d6fd54b..6085caabb 100644 --- a/lib/MetaCPAN/Script/Backup.pm +++ b/lib/MetaCPAN/Script/Backup.pm @@ -10,6 +10,7 @@ use IO::Zlib (); use Log::Contextual qw( :log :dlog ); use MetaCPAN::Types::TypeTiny qw( Bool Int Path Str CommaSepOption ); use MetaCPAN::Util qw( true false ); +use MetaCPAN::ESConfig qw( es_config ); use Moose; use Try::Tiny qw( catch try ); @@ -28,8 +29,9 @@ has index => ( is => 'ro', isa => CommaSepOption, coerce => 1, - default => 'cpan', - documentation => 'ES indexes to backup, defaults to "cpan"', + default => sub { es_config->all_indexes }, + documentation => 'ES indexes to backup, defaults to "' + . join( ', ', @{ es_config->all_indexes } ) . '"', ); has type => ( From 407b552ace36770b0f3f085c9ac09eb428d61163 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:37:49 +0200 Subject: [PATCH 24/31] validate index is actually deleted --- lib/MetaCPAN/Script/Mapping.pm | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index 6f1036fd8..3ae24e885 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -7,6 +7,7 @@ use DateTime (); use Log::Contextual qw( :log ); use MetaCPAN::ESConfig qw( es_config ); use MetaCPAN::Types::TypeTiny qw( Bool Str ); +use Time::HiRes qw( sleep time ); use constant { EXPECTED => 1, @@ -257,7 +258,19 @@ sub _delete_index { my ( $self, $name ) = @_; log_info {"Deleting index: $name"}; - $self->es->indices->delete( index => $name ); + my $idx = $self->es->indices; + $idx->delete( index => $name ); + + my $exists; + my $end = time + 2; + while ( time < $end ) { + $exists = $idx->exists( index => $name ) or last; + sleep 0.1; + } + if ($exists) { + log_error {"Failed to delete index: $name"}; + } + return $exists; } sub update_index { From b9c246de0ab24f62fdc4ebf5dfee2e8c7585dc16 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:38:03 +0200 Subject: [PATCH 25/31] check indexes and aliases we're actually using --- t/lib/MetaCPAN/TestServer.pm | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/t/lib/MetaCPAN/TestServer.pm b/t/lib/MetaCPAN/TestServer.pm index 85a226bcd..669fa638d 100644 --- a/t/lib/MetaCPAN/TestServer.pm +++ b/t/lib/MetaCPAN/TestServer.pm @@ -91,14 +91,8 @@ sub wait_for_es { sub check_mappings { my $self = $_[0]; - my %indices = ( - 'cover' => 'yellow', - 'cpan_v1_01' => 'yellow', - 'contributor' => 'yellow', - 'cve' => 'yellow', - 'user' => 'yellow' - ); - my %aliases = ( 'cpan' => 'cpan_v1_01' ); + my %indices = ( map +( $_ => 'yellow' ), @{ es_config->all_indexes } ); + my %aliases = %{ es_config->aliases }; local @ARGV = qw(mapping --show_cluster_info); @@ -127,6 +121,8 @@ sub check_mappings { } }; subtest 'verify aliases' => sub { + ok "no aliases to verify" + if !%aliases; foreach ( keys %aliases ) { ok( defined $mapping->aliases_info->{$_}, "alias '$_' was created" ); From 374abed2926a568f28a694b27f41ee4250986050 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sat, 26 Oct 2024 17:39:17 +0200 Subject: [PATCH 26/31] remove old ES config from test config --- metacpan_server_testing.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/metacpan_server_testing.yaml b/metacpan_server_testing.yaml index 77c91a046..1d653cac2 100644 --- a/metacpan_server_testing.yaml +++ b/metacpan_server_testing.yaml @@ -12,16 +12,6 @@ logger: class: Log::Log4perl::Appender::Screen name: testing -model: - CPAN: - servers: __ENV(ES)__ - User: - servers: __ENV(ES)__ - -plugin: - Session: - servers: __ENV(ES)__ - controller: User::Turing: captcha_class: Captcha::Mock From a29eff9fbe01a93a711e92f5554228e4c0186a23 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sun, 27 Oct 2024 08:57:37 +0100 Subject: [PATCH 27/31] always use suggester for autocomplete We have two autocomplete end points. The old one is no longer used by the front end. The new one uses the suggest API. Rewrite both end points to use the suggest SPI, just returning data in different forms. --- lib/MetaCPAN/Document/File/Set.pm | 154 ++++++++++++---------- t/server/controller/search/autocomplete.t | 29 ++++ 2 files changed, 114 insertions(+), 69 deletions(-) diff --git a/lib/MetaCPAN/Document/File/Set.pm b/lib/MetaCPAN/Document/File/Set.pm index 683c7c898..569bda6df 100644 --- a/lib/MetaCPAN/Document/File/Set.pm +++ b/lib/MetaCPAN/Document/File/Set.pm @@ -271,74 +271,33 @@ sub history { return $search->sort( [ { date => 'desc' } ] ); } -sub autocomplete { - my ( $self, @terms ) = @_; - - my $query = { - bool => { - must => [ - { - multi_match => { - query => join( q{ }, @terms ), - type => 'most_fields', - fields => [ 'documentation', 'documentation.*' ], - analyzer => 'camelcase', - minimum_should_match => '80%' - } - }, - { exists => { field => 'documentation' } }, - { term => { status => 'latest' } }, - { term => { indexed => true } }, - { term => { authorized => true } } - ], - must_not => - [ { terms => { distribution => \@ROGUE_DISTRIBUTIONS } }, ], - }, - }; - - my $data = $self->es->search( - search_type => 'dfs_query_then_fetch', - es_doc_path('file'), - body => { - query => $query, - sort => [ '_score', 'documentation' ], - _source => [qw( documentation release author distribution )], - }, - ); - - # this is backcompat. we don't use this end point. - $_->{fields} = delete $_->{_source} for @{ $data->{hits}{hits} }; - - return $data; -} - -sub autocomplete_suggester { +sub _autocomplete { my ( $self, $query ) = @_; - return $self unless $query; my $search_size = 100; - my $suggestions = $self->es->suggest( { + my $sugg_res = $self->es->search( es_doc_path('file'), body => { - documentation => { - text => $query, - completion => { - field => "suggest", - size => $search_size, - } + suggest => { + documentation => { + text => $query, + completion => { + field => "suggest", + size => $search_size, + }, + }, } }, - } ); + ); my %docs; - - for my $suggest ( @{ $suggestions->{documentation}[0]{options} } ) { + for my $suggest ( @{ $sugg_res->{suggest}{documentation}[0]{options} } ) { $docs{ $suggest->{text} } = max grep {defined} ( $docs{ $suggest->{text} }, $suggest->{score} ); } - my $data = $self->es->search( { + my $res = $self->es->search( es_doc_path('file'), body => { query => { @@ -366,34 +325,91 @@ sub autocomplete_suggester { ) ], size => $search_size, }, - } ); + ); + + my $hits = $res->{hits}{hits}; + + my $fav_res + = $self->agg_by_distributions( + [ map $_->{_source}{distribution}, @$hits ] ); + + my $favs = $fav_res->{favorites}; my %valid = map { - my %record = %{ $_->{_source} }; - $record{name} = delete $record{documentation}; # rename - ( $record{name} => \%record ); - } @{ $data->{hits}{hits} }; + my $source = $_->{_source}; + ( + $source->{documentation} => { + %$source, favorites => $favs->{ $source->{distribution} }, + } + ); + } @{ $res->{hits}{hits} }; # remove any exact match, it will be added later my $exact = delete $valid{$query}; - my $favorites - = $self->agg_by_distributions( - [ map { $_->{distribution} } values %valid ] )->{favorites}; - no warnings 'uninitialized'; my @sorted = map { $valid{$_} } sort { - $valid{$a}->{deprecated} <=> $valid{$b}->{deprecated} - || $favorites->{ $valid{$b}->{distribution} } - <=> $favorites->{ $valid{$a}->{distribution} } - || $docs{$b} <=> $docs{$a} - || length($a) <=> length($b) + my $a_data = $valid{$a}; + my $b_data = $valid{$b}; + $a_data->{deprecated} <=> $b_data->{deprecated} + || $b_data->{favorites} <=> $a_data->{favorites} + || $docs{$b} <=> $docs{$a} + || length($a) <=> length($b) || $a cmp $b } keys %valid; - return +{ suggestions => [ grep {defined} ( $exact, @sorted ) ] }; + return { + took => $sugg_res->{took} + $res->{took} + $fav_res->{took}, + suggestions => \@sorted, + }; +} + +sub autocomplete { + my ( $self, @terms ) = @_; + my $data = $self->_autocomplete( join ' ', @terms ); + + return { + took => $data->{took}, + hits => { + hits => [ + map { + my $source = $_; + +{ + fields => { + map +( $_ => $source->{$_} ), qw( + documentation + release + author + distribution + ), + }, + }; + } @{ $data->{suggestions} } + ], + }, + }; +} + +sub autocomplete_suggester { + my ( $self, @terms ) = @_; + my $data = $self->_autocomplete( join ' ', @terms ); + + return { + took => $data->{took}, + suggestions => [ + map +{ + author => $_->{author}, + date => $_->{date}, + deprecated => $_->{deprecated}, + distribution => $_->{distribution}, + name => $_->{documentation}, + release => $_->{release}, + }, + @{ $data->{suggestions} } + ], + }; } sub find_changes_files { diff --git a/t/server/controller/search/autocomplete.t b/t/server/controller/search/autocomplete.t index 4facdb103..760cd7405 100644 --- a/t/server/controller/search/autocomplete.t +++ b/t/server/controller/search/autocomplete.t @@ -32,4 +32,33 @@ test_psgi app, sub { } }; +test_psgi app, sub { + my $cb = shift; + + # test ES script using doc['blah'] value + { + ok( + my $res + = $cb->( + GET '/search/autocomplete/suggest?q=Multiple::Modu' ), + 'GET' + ); + my $json = decode_json_ok($res); + + my $got = [ map $_->{name}, @{ $json->{suggestions} } ]; + + is_deeply $got, [ qw( + Multiple::Modules + Multiple::Modules::A + Multiple::Modules::B + Multiple::Modules::RDeps + Multiple::Modules::Tester + Multiple::Modules::RDeps::A + Multiple::Modules::RDeps::Deprecated + ) ], + 'results are sorted lexically by module name + length' + or diag( Test::More::explain($got) ); + } +}; + done_testing; From c0b70ae0639037a52e9251c37107cc192d393b3c Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sun, 27 Oct 2024 09:23:30 +0100 Subject: [PATCH 28/31] download_url: account for elasticsearch 6 --- lib/MetaCPAN/Query/Release.pm | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/MetaCPAN/Query/Release.pm b/lib/MetaCPAN/Query/Release.pm index 08a6eb6eb..8a3a49802 100644 --- a/lib/MetaCPAN/Query/Release.pm +++ b/lib/MetaCPAN/Query/Release.pm @@ -1025,19 +1025,25 @@ sub find_download_url { ? { bool => { must => \@filters } } : $filters[0]; - my $version_sort = $module_filter + my $version_sort + = $module_filter ? { 'module.version_numified' => { - mode => 'max', - order => 'desc', - nested_path => 'module', - nested_filter => $entity_filter, - - # TODO: ES6 - replace prior 2 lines with: - #nested => { - # path => 'module', - # filter => $entity_filter, - #}, + mode => 'max', + order => 'desc', + ( + $self->es->api_version ge '6_0' + ? ( + nested => { + path => 'module', + filter => $entity_filter, + }, + ) + : ( + nested_path => 'module', + nested_filter => $entity_filter, + ) + ), } } : { version_numified => { order => 'desc' } }; From 38877ad5bd49b55664e3a4d683e1615f89e61647 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sun, 27 Oct 2024 09:23:58 +0100 Subject: [PATCH 29/31] fix script query syntax when using newer Elasticsearch Older versions expect the key "inline", newer expect "source". --- lib/MetaCPAN/Query/Search.pm | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/MetaCPAN/Query/Search.pm b/lib/MetaCPAN/Query/Search.pm index 4e75da972..eca1e113b 100644 --- a/lib/MetaCPAN/Query/Search.pm +++ b/lib/MetaCPAN/Query/Search.pm @@ -137,6 +137,8 @@ sub _search_collapsed { my $es_query = $self->build_query( $search_term, $es_query_opts ); my $source = delete $es_query->{_source}; + my $script_key = $self->es->api_version ge '5_0' ? 'source' : 'inline'; + $es_query->{aggregations} = { by_dist => { terms => { @@ -156,8 +158,8 @@ sub _search_collapsed { max_score => { max => { script => { - lang => "expression", - inline => "_score", + lang => "expression", + $script_key => "_score", }, }, }, @@ -294,14 +296,16 @@ sub build_query { }, }; + my $script_key = $self->es->api_version ge '5_0' ? 'source' : 'inline'; + $query = { function_score => { script_score => { # prefer shorter module names script => { - lang => 'expression', - inline => + lang => 'expression', + $script_key => "_score - (doc['documentation_length'].value == 0 ? 26 : doc['documentation_length'].value)/400", }, }, From 8d407462153fd88fda5d391b3631dee804685c12 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sun, 27 Oct 2024 09:25:29 +0100 Subject: [PATCH 30/31] remove use_dis_max from query_string query use_dis_max is the default, and isn't supported in newer versions --- lib/MetaCPAN/Query/Search.pm | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/MetaCPAN/Query/Search.pm b/lib/MetaCPAN/Query/Search.pm index eca1e113b..95c7f12fc 100644 --- a/lib/MetaCPAN/Query/Search.pm +++ b/lib/MetaCPAN/Query/Search.pm @@ -270,7 +270,6 @@ sub build_query { default_operator => 'AND', allow_leading_wildcard => false, - use_dis_max => true, } }, @@ -283,7 +282,6 @@ sub build_query { default_operator => 'AND', allow_leading_wildcard => false, - use_dis_max => true, }, }, ], From d6571ecfae74ed51b8704ea32471a26d552c9837 Mon Sep 17 00:00:00 2001 From: Graham Knop Date: Sun, 27 Oct 2024 12:11:57 +0100 Subject: [PATCH 31/31] ignore system indexes when deleting for testing --- lib/MetaCPAN/Script/Mapping.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/MetaCPAN/Script/Mapping.pm b/lib/MetaCPAN/Script/Mapping.pm index 3ae24e885..d5c35deda 100644 --- a/lib/MetaCPAN/Script/Mapping.pm +++ b/lib/MetaCPAN/Script/Mapping.pm @@ -240,7 +240,7 @@ sub delete_all { || $runtime_environment eq 'testing'; if ($is_development) { - foreach my $name ( keys %{ $self->indices_info } ) { + foreach my $name ( grep !/\A\./, keys %{ $self->indices_info } ) { $self->_delete_index($name); } }