From 82d5b76c17eb74dac5969d5e44b128c65e4ad31a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Thu, 31 Oct 2024 12:35:18 +0100 Subject: [PATCH 1/3] feat: script to count product contributions by year --- .../count_product_contributions_by_year.pl | 246 ++++++++++++++++++ 1 file changed, 246 insertions(+) create mode 100755 scripts/count_product_contributions_by_year.pl diff --git a/scripts/count_product_contributions_by_year.pl b/scripts/count_product_contributions_by_year.pl new file mode 100755 index 0000000000000..15d56d56123e3 --- /dev/null +++ b/scripts/count_product_contributions_by_year.pl @@ -0,0 +1,246 @@ +#!/usr/bin/perl -w + +# This file is part of Product Opener. +# +# Product Opener +# Copyright (C) 2011-2023 Association Open Food Facts +# Contact: contact@openfoodfacts.org +# Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France +# +# Product Opener is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +use Modern::Perl '2017'; +use utf8; + +my $usage = < $query_params_ref, + "all-owners" => \$all_owners, + "obsolete" => \$obsolete, + "fix" => \$fix, + +) or die("Error in command line arguments:\n\n$usage"); + +# Get a list of all products +# Use query filters entered using --query categories_tags=en:plant-milks + +# Build the mongodb query from the --query parameters +my $query_ref = {}; + +add_params_to_query($query_params_ref, $query_ref); + +# On the producers platform, require --query owners_tags to be set, or the --all-owners field to be set. + +if ((defined $server_options{private_products}) and ($server_options{private_products})) { + if ((not $all_owners) and (not defined $query_ref->{owners_tags})) { + print STDERR "On producers platform, --query owners_tags=... or --all-owners must be set.\n"; + exit(); + } +} + +use Data::Dumper; +print STDERR "MongoDB query:\n" . Dumper($query_ref); + +my $socket_timeout_ms = 2 * 60000; # 2 mins, instead of 30s default, to not die as easily if mongodb is busy. + +# Collection that will be used to iterate products +my $products_collection = get_products_collection({obsolete => $obsolete, timeout => $socket_timeout_ms}); + +my $current_products_collection = get_products_collection( + { + obsolete => 0, + timeout => 10000 + } +); +my $obsolete_products_collection = get_products_collection( + { + obsolete => 1, + timeout => 10000 + } +); + +my $products_count = ""; + +eval { + $products_count = $products_collection->count_documents($query_ref); + + print STDERR "$products_count documents to check.\n"; +}; + +# only retrieve important fields +my $cursor = $products_collection->query($query_ref)->fields({_id => 1, code => 1, owner => 1}); + +$cursor->immortal(1); + +my %products_edited = (); +my %products_added = (); +my %number_of_products = (); +my %new_editors = (); +my %active_editors = (); +my %editors_first_year = (); + +my $i = 0; + +while (my $product_ref = $cursor->next) { + + my $productid = $product_ref->{_id}; + my $code = $product_ref->{code}; + my $path = product_path($product_ref); + + # Retrieve the changes.sto file + my $changes_ref = retrieve("$data_root/products/$path/changes.sto"); + if (defined $changes_ref) { + + my $first_change = 1; + + # Go through each change + foreach my $change_ref (@{$changes_ref}) { + # Get the timestamp and userid + my $t = $change_ref->{t}; + my $userid = $change_ref->{userid} || "openfoodfacts-countributors"; + # Get the year + my $year = (localtime($t))[5] + 1900; + + # First change: update products_added + if ($first_change) { + deep_set(\%products_added, $year, $code, 1); + deep_set(\%products_added, "all", $code, 1); + $first_change = 0; + } + + # Update products_edited + deep_set(\%products_edited, $year, $code, 1); + deep_set(\%products_edited, "all", $code, 1); + + # Update the active editors + deep_set(\%active_editors, $year, $userid, 1); + deep_set(\%active_editors, "all", $userid, 1); + + # Update the first year of editors if the year is older than the current first year + if (not defined $editors_first_year{$userid}) { + $editors_first_year{$userid} = $year; + } + elsif ($year < $editors_first_year{$userid}) { + $editors_first_year{$userid} = $year; + } + } + } + + $i++; + ($i % 1000 == 0) and print STDERR "$i products checked\n"; +} + +# Compute the new editors by year +foreach my $userid (keys %editors_first_year) { + $new_editors{$editors_first_year{$userid}}++; + $new_editors{"all"}++; +} + +# Print the active editors by year and for all years + +print STDERR "Active editors by year:\n"; +foreach my $year (sort keys %active_editors) { + print STDERR "$year: " . (scalar keys %{$active_editors{$year}} || 0) . "\n"; +} + +# Print the new editors +print STDERR "New editors by year:\n"; +foreach my $year (sort keys %new_editors) { + print STDERR "$year: $new_editors{$year}\n"; +} + +# Print the products added by year +print STDERR "Products added by year:\n"; +foreach my $year (sort keys %products_added) { + print STDERR "$year: " . (scalar keys %{$products_added{$year}} || 0) . "\n"; +} + +# Print the products edited by year +print STDERR "Products edited by year:\n"; +foreach my $year (sort keys %products_edited) { + print STDERR "$year: " . (scalar keys %{$products_edited{$year}} || 0) . "\n"; +} + +# Compute the total number of products by year by summing the new products added in the year and previous years + +foreach my $year (sort keys %products_added) { + $number_of_products{$year} = 0; + foreach my $year2 (sort keys %products_added) { + next if $year2 eq "all"; + if (($year eq "all") or ($year2 <= $year)) { + $number_of_products{$year} += scalar keys %{$products_added{$year2}}; + } + } +} + +# Print the total number of products by year +print STDERR "Total number of products by year:\n"; +foreach my $year (sort keys %number_of_products) { + print STDERR "$year: $number_of_products{$year}\n"; +} + +# Print all the stats by year in tab separated columns to STDOUT +print "year\tactive_editors\tnew_editors\tproducts_edited\tproducts_added\ttotal_products\n"; +foreach my $year (sort keys %number_of_products) { + print join("\t", $year, scalar keys %{$active_editors{$year}}, $new_editors{$year}, scalar keys %{$products_edited{$year}}, scalar keys %{$products_added{$year}}, $number_of_products{$year}) . "\n"; +} + + +exit(0); From 552f42464e316d0eb61d4a20f4cd6f7b3352009c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Thu, 31 Oct 2024 15:08:19 +0100 Subject: [PATCH 2/3] lint --- scripts/count_product_contributions_by_year.pl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/count_product_contributions_by_year.pl b/scripts/count_product_contributions_by_year.pl index 15d56d56123e3..ba260c18d7589 100755 --- a/scripts/count_product_contributions_by_year.pl +++ b/scripts/count_product_contributions_by_year.pl @@ -239,8 +239,12 @@ # Print all the stats by year in tab separated columns to STDOUT print "year\tactive_editors\tnew_editors\tproducts_edited\tproducts_added\ttotal_products\n"; foreach my $year (sort keys %number_of_products) { - print join("\t", $year, scalar keys %{$active_editors{$year}}, $new_editors{$year}, scalar keys %{$products_edited{$year}}, scalar keys %{$products_added{$year}}, $number_of_products{$year}) . "\n"; + print join("\t", + $year, scalar keys %{$active_editors{$year}}, + $new_editors{$year}, + scalar keys %{$products_edited{$year}}, + scalar keys %{$products_added{$year}}, + $number_of_products{$year}) . "\n"; } - exit(0); From 223c5c5e38e4488ba5d71284f072461c6c2c35e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Fri, 15 Nov 2024 10:14:18 +0100 Subject: [PATCH 3/3] Update scripts/count_product_contributions_by_year.pl Co-authored-by: Pierre Slamich --- scripts/count_product_contributions_by_year.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/count_product_contributions_by_year.pl b/scripts/count_product_contributions_by_year.pl index ba260c18d7589..ce607e9f052b2 100755 --- a/scripts/count_product_contributions_by_year.pl +++ b/scripts/count_product_contributions_by_year.pl @@ -3,7 +3,7 @@ # This file is part of Product Opener. # # Product Opener -# Copyright (C) 2011-2023 Association Open Food Facts +# Copyright (C) 2011-2024 Association Open Food Facts # Contact: contact@openfoodfacts.org # Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France #