Skip to content

Commit

Permalink
Fix parsing plain $$ (#136)
Browse files Browse the repository at this point in the history
  • Loading branch information
nichtich committed Aug 28, 2023
1 parent 44ec3d8 commit fbb6d8e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 32 deletions.
1 change: 1 addition & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Revision history for PICA::Data

{{$NEXT}}
- Fix parsing plain $$ (#136)

2.11 2023-08-18T05:19:26Z
- Add parser method: all
Expand Down
47 changes: 15 additions & 32 deletions lib/PICA/Parser/Plain.pm
Original file line number Diff line number Diff line change
Expand Up @@ -49,45 +49,28 @@ sub _next_record {
$occ = $3;
$data = $4;
}
elsif ($self->{strict}) {
croak "ERROR: invalid PICA field structure \"$field\"";
}
else {
carp
"WARNING: invalid PICA field structure \"$field\". Skipped field";
next;
}

if (!$self->{strict} && $data =~ //) {
$data =~ s/\$/\$\$/g;
$data =~ s/ƒ/\$/g;
}

my @subfields = split /\$(\$+|.)/, $data;
shift @subfields;
push @subfields, '' if @subfields % 2; # last subfield without value

if ($data =~ /\$\$/) {
my @tokens = (shift @subfields, shift @subfields);
while (@subfields) {
my $code = shift @subfields;
my $value = shift @subfields;
if ($code =~ /^\$+$/) {
my $length = length $code;
$code =~ s/\$\$/\$/g;
if ($length % 2) {
$tokens[-1] .= "$code$value";
next;
}
else {
$tokens[-1] .= $code;
$code = substr $value, 0, 1;
$value = substr $value, 1;
}
}
push @tokens, $code, $value;
if ($data !~ /^(\$[^\$]([^\$]|\$\$)*)+$/) {
if ($self->{strict}) {
croak "ERROR: invalid PICA field structure \"$field\"";
}
else {
carp
"WARNING: invalid PICA field structure \"$field\". Skipped field";
next;
}
@subfields = @tokens;
}

my @subfields;
while ($data =~ m/\G\$([^\$])(([^\$]|\$\$)*)/g) {
my ($code, $value) = ($1, $2);
$value =~ s/\$\$/\$/g;
push @subfields, $code, $value;
}

push @subfields, $annotation if defined $annotation;
Expand Down
6 changes: 6 additions & 0 deletions t/20-parser.t
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ ok $first->{record}->[0][0] eq '002@', 'tag from first field';
is_deeply $first->{record}->[1], ['003@', '', 0 => '12345'], 'second field';
is_deeply $first->{record}->[4], ['012X', '', 0 => '0', x => '', y => ''],
'empty subfields';


is $first->{record}->[6]->[7], '柳经纬主编;', 'Unicode';
is_deeply $first->{record}->[11],
['145Z', '40', 'a', '$', 'b', 'test$', 'c', '...'], 'sub field with $';
Expand Down Expand Up @@ -116,6 +118,10 @@ note 'error handling';
dies_ok {pica_parser(xml => '')} 'invalid handle';
dies_ok {pica_parser(plus => [])} 'invalid handle';
dies_ok {pica_parser(plain => bless({}, 'MyFooBar'))} 'invalid handle';

# https://github.com/gbv/PICA-Data/issues/136
$plain = '021A $$';
dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next} 'issue 136';
}

is pica_parser(plain => \'012A/00 $xy')->next->string,
Expand Down

0 comments on commit fbb6d8e

Please sign in to comment.