-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathagg-csv.pl
executable file
·61 lines (50 loc) · 1.47 KB
/
agg-csv.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env perl
#Aggregate CSV from several places
use strict;
use warnings;
use v5.12;
use JSON;
use File::Slurp::Tiny qw(read_file);
my $provincias_content = read_file('poblacion-provincia-INE.csv');
my @provincias = split("\n",$provincias_content);
my @prov_names = split(",",$provincias[0]);
my @prov_pop = split(",",$provincias[1]);
my %new_names = ( "Alicante/Alacant" => "Alicante",
"Araba/Álava" => "Álava",
"Bizkaia" => "Bilbao",
"Castellón/Castelló" => "Castellón",
"Gipuzkoa" => "Donostia",
"Girona" => "Gerona",
"Palmas" => "Las Palmas",
"Valencia/València" => "Valencia");
my @columns = qw( contributions stars user_stars followers );
say "province;population;users;",join(";",@columns);
for my $p ( @prov_names ) {
my $population = shift @prov_pop;
my $name = $new_names{$p}?$new_names{$p}:$p;
next if $name eq "Guadalajara"; #Problems with sampling
my $file_contents = read_file("data/user-data-$name.json");
next if !$file_contents;
my $p_data = decode_json( $file_contents);
my $totals = {};
for my $c (@columns) {
$totals->{$c} = 0;
}
my $users = 0;
for my $u (@$p_data ) {
$users++;
for my $column ( @columns ) {
if ($u->{$column} ) {
if ( $u->{$column} =~ /(\S+)k/ ) {
$u->{$column} = $1*1000;
}
$totals->{$column} += $u->{$column};
}
}
}
my @row = ( $name, $population, $users );
for my $column ( @columns ) {
push @row, $totals->{$column};
}
say join(";",@row);
}