-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathbuild-data.uk
202 lines (181 loc) · 8.52 KB
/
build-data.uk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#!/usr/bin/env perl
use strict;
use warnings;
use Data::CompactReadonly;
use File::Find::Rule;
use Text::CSV_XS;
my @telco_length_data = my @geo_prefices = my @free_prefices = my @network_svc_prefices = my @corporate_prefices = my @personal_prefices = my @pager_prefices = my @mobile_prefices = my @special_prefices = my @adult_prefices = my @ip_prefices = my %areanames = my %statuses = ();
open(my $area_codes, '<', 'libphonenumber/resources/geocoding/en/44.txt') || die("Couldn't open libphonenumber/resources/geocoding/en/44.txt: $!\n");
print "Working on libphonenumber/resources/geocoding/en/44.txt\n";
while(my $line = <$area_codes>) {
chomp($line) if($line);
next if(!$line || $line =~ /^#/);
my @row = split(/\|/, $line);
$row[0] =~ s/^44//;
$areanames{$row[0]} = $row[1];
}
foreach my $csvfile (File::Find::Rule->name('S?.csv')->in('data-files')) {
print "Working on $csvfile\n";
my $parser = Text::CSV_XS->new();
open(my $fh, '<', $csvfile) || die "Could not open '$csvfile': $!\n";
my @rows = ();
while (my $row = $parser->getline($fh)) {
push @rows, $row;
}
my $count = 0;
my %header_fields_index_by_name = map { $_ => $count++ } my @header_fields_index_by_column = @{shift(@rows)};
my @prefix_fields = grep { exists($header_fields_index_by_name{$_}) } (qw(SABC D/DE FG), 'NMS Number Block: Number Block');
foreach my $row (@rows) {
my $prefix = join('', map { s/\s//g; $_; } grep { defined } map { $row->[$_] } @header_fields_index_by_name{@prefix_fields});
my $format_field =
exists($header_fields_index_by_name{'Number Length'}) ? 'Number Length' :
exists($header_fields_index_by_name{'NumberLengthNonGeo'}) ? 'NumberLengthNonGeo' :
exists($header_fields_index_by_name{'Non Geo Number Length'}) ? 'Non Geo Number Length' :
exists($header_fields_index_by_name{'Geographic Number Length'}) ? 'Geographic Number Length' :
die("Can't find a format field in $csvfile\n");
my $telco_field =
exists($header_fields_index_by_name{'Communications Provider'}) ? 'Communications Provider' :
exists($header_fields_index_by_name{'CP Name'}) ? 'CP Name' :
die("Can't find a telco field in $csvfile\n");
my $status_field =
exists($header_fields_index_by_name{'Status'}) ? 'Status' :
exists($header_fields_index_by_name{'Block Status'}) ? 'Block Status' :
exists($header_fields_index_by_name{'Block Status '}) ? 'Block Status ' :
exists($header_fields_index_by_name{'Block Status Formula'}) ? 'Block Status Formula' :
die("Can't find a status field in $csvfile\n");
my($status, $telco, $format) = map {
$row->[$_]
} @header_fields_index_by_name{$status_field, $telco_field, $format_field};
$status ||= '';
$telco ||= '';
$format ||= '';
next if(
$status !~ /^(Protected|Allocated ?(\(Closed Range\)|for Migration only)?)$/
);
$statuses{"$prefix"} = $status;
if($prefix =~ /^[12]/) { push @geo_prefices, $prefix }
elsif($prefix =~ /^80/) { push @free_prefices, $prefix }
elsif($prefix =~ /^55/) { push @corporate_prefices, $prefix }
elsif($prefix =~ /^56/) { push @ip_prefices, $prefix }
elsif($prefix =~ /^70/) { push @personal_prefices, $prefix }
# NB order is important. 7624 is IOM mobiles except 76242 which is pagers. Apparently.
elsif($prefix =~ /^7([12345789]|624[013-9])/) { push @mobile_prefices, $prefix }
elsif($prefix =~ /^76/) { push @pager_prefices, $prefix }
elsif($prefix =~ /^(3|8[47]|9)/) { push @special_prefices, $prefix }
## previously allocated for "internet for schools" and "inbound routing codes" some time
## before 30 Sep 2020. This classification may be obsolete for the UK now
# elsif($prefix =~ /^82[09]/) { push @network_svc_prefices, $prefix } # internet for schools
if($prefix =~ /^9(8|0[89])/) { push @adult_prefices, $prefix }
push @telco_length_data, [$prefix, $telco, $format];
}
}
# special numbers allocated for use in drama that don't appear
# in the spreadsheets
push @free_prefices, '8081570';
push @special_prefices, '9098790', '3069990';
print "Building telco/length data ...\n";
my %telco_by_prefix = ();
my %format_by_prefix = ();
foreach my $datum (@telco_length_data) {
my($prefix, $telco, $format) = @{$datum};
# if($prefix eq '1442600') {
# warn "Correcting OFCOM's broken data for 1442 60 0\n";
# warn " check this and remove the warning if fixed in XLS file (last checked 2017/06/20)\n";
# if($format eq '') { $format = '4+6' }
# }
# see https://github.com/DrHyde/perl-modules-Number-Phone/issues/112
# if($prefix eq '800716') {
# warn "Correcting OFCOM's broken data for 8007 16\n";
# $format = '0+9/10';
# }
if($format eq '') {
if( $prefix =~ /^[37]/ ) { $format = '10 digit numbers' }
elsif($prefix =~ /^2/ ) { $format = '2+8' }
elsif($prefix =~ /^(1.1|11.)/) { $format = '3+7' }
}
if($format eq '(0)+10' || $format =~ /^10 digit number/i) {
$format = '0+10';
} elsif($format =~ /^9 +digit number/i) {
$format = '0+9';
} elsif($format =~ /^7 digit number/i) {
$format = '0+7';
} elsif($format =~ /^Mixed 4\+5 &(amp;)? 4\+6$/) {
$format = '4+5/6';
} elsif(
$format ne '2+8' &&
$format ne '3+7' &&
$format ne '4+6' &&
$format ne '4+5' &&
$format ne '5+5' &&
$format ne '5+4' &&
$format !~ /^Mixed 4\+5 &(amp;)? 4\+6$/
) {
warn "Unknown format: $format (r: $prefix; t: $telco)\n"
unless($prefix =~ /^[89]/ || $statuses{$prefix} eq 'Protected')
}
$telco_by_prefix{$prefix} = $telco;
$format_by_prefix{$prefix} = $format;
}
print "Creating Data::CompactReadonly file ...\n";
mkdir('share');
unlink('share/Number-Phone-UK-Data.db');
Data::CompactReadonly->create(
'share/Number-Phone-UK-Data.db',
{
# Booleans to save space in the db if building with perl 5.36 or newer
geo_prefices => { map { ($_, 1 == 1) } @geo_prefices },
network_svc_prefices => { map { ($_, 1 == 1) } @network_svc_prefices },
free_prefices => { map { ($_, 1 == 1) } @free_prefices },
corporate_prefices => { map { ($_, 1 == 1) } @corporate_prefices },
personal_prefices => { map { ($_, 1 == 1) } @personal_prefices },
pager_prefices => { map { ($_, 1 == 1) } @pager_prefices },
mobile_prefices => { map { ($_, 1 == 1) } @mobile_prefices },
special_prefices => { map { ($_, 1 == 1) } @special_prefices },
adult_prefices => { map { ($_, 1 == 1) } @adult_prefices },
ip_prefices => { map { ($_, 1 == 1) } @ip_prefices },
areanames => \%areanames,
telco => \%telco_by_prefix,
format => \%format_by_prefix,
subclass => {
# taken from libphonenumber
# checked on 2024-12-11
# next check due 2025-12-01 (annually)
# NB for all of these also check build-data.country-mapping
(map { $_ => 'GG' } (
1481, 7781, 7839, 79111, 79117
)),
(map { $_ => 'IM' } (
1624,
762450,
762456,
74576,
7524,
7924,
76240,
76241,
76242,
76243,
76244,
76246,
76248,
76249,
808162,
8440406, 8440906,
872299,
845624, 870624,
900624, 901624, 906624, 907624,
)),
(map { $_ => 'JE' } (
1534,
7509, 7829, 7937,
77003, 77007, 77008,
# the rest of 7797 is *reserved* but not allocated
77977, 77978, 77979,
800735, 800781,
# libphonenumber says 808901 is JE, OFCOM say only
# these three ranges are allocated
8089012, 8089013, 8089019,
)),
}
}
);