From fbb6d8e00208fa636df12e8e4414ac79f58645d7 Mon Sep 17 00:00:00 2001 From: Jakob Voss Date: Mon, 28 Aug 2023 07:38:40 +0200 Subject: [PATCH] Fix parsing plain $$ (#136) --- Changes | 1 + lib/PICA/Parser/Plain.pm | 47 +++++++++++++--------------------------- t/20-parser.t | 6 +++++ 3 files changed, 22 insertions(+), 32 deletions(-) diff --git a/Changes b/Changes index a7b2ad0..f371f74 100644 --- a/Changes +++ b/Changes @@ -1,6 +1,7 @@ Revision history for PICA::Data {{$NEXT}} + - Fix parsing plain $$ (#136) 2.11 2023-08-18T05:19:26Z - Add parser method: all diff --git a/lib/PICA/Parser/Plain.pm b/lib/PICA/Parser/Plain.pm index 12778d5..8f1d096 100644 --- a/lib/PICA/Parser/Plain.pm +++ b/lib/PICA/Parser/Plain.pm @@ -49,45 +49,28 @@ sub _next_record { $occ = $3; $data = $4; } - elsif ($self->{strict}) { - croak "ERROR: invalid PICA field structure \"$field\""; - } - else { - carp - "WARNING: invalid PICA field structure \"$field\". Skipped field"; - next; - } if (!$self->{strict} && $data =~ /^ƒ/) { $data =~ s/\$/\$\$/g; $data =~ s/ƒ/\$/g; } - my @subfields = split /\$(\$+|.)/, $data; - shift @subfields; - push @subfields, '' if @subfields % 2; # last subfield without value - - if ($data =~ /\$\$/) { - my @tokens = (shift @subfields, shift @subfields); - while (@subfields) { - my $code = shift @subfields; - my $value = shift @subfields; - if ($code =~ /^\$+$/) { - my $length = length $code; - $code =~ s/\$\$/\$/g; - if ($length % 2) { - $tokens[-1] .= "$code$value"; - next; - } - else { - $tokens[-1] .= $code; - $code = substr $value, 0, 1; - $value = substr $value, 1; - } - } - push @tokens, $code, $value; + if ($data !~ /^(\$[^\$]([^\$]|\$\$)*)+$/) { + if ($self->{strict}) { + croak "ERROR: invalid PICA field structure \"$field\""; + } + else { + carp + "WARNING: invalid PICA field structure \"$field\". Skipped field"; + next; } - @subfields = @tokens; + } + + my @subfields; + while ($data =~ m/\G\$([^\$])(([^\$]|\$\$)*)/g) { + my ($code, $value) = ($1, $2); + $value =~ s/\$\$/\$/g; + push @subfields, $code, $value; } push @subfields, $annotation if defined $annotation; diff --git a/t/20-parser.t b/t/20-parser.t index 6656586..8700aa0 100644 --- a/t/20-parser.t +++ b/t/20-parser.t @@ -13,6 +13,8 @@ ok $first->{record}->[0][0] eq '002@', 'tag from first field'; is_deeply $first->{record}->[1], ['003@', '', 0 => '12345'], 'second field'; is_deeply $first->{record}->[4], ['012X', '', 0 => '0', x => '', y => ''], 'empty subfields'; + + is $first->{record}->[6]->[7], '柳经纬主编;', 'Unicode'; is_deeply $first->{record}->[11], ['145Z', '40', 'a', '$', 'b', 'test$', 'c', '...'], 'sub field with $'; @@ -116,6 +118,10 @@ note 'error handling'; dies_ok {pica_parser(xml => '')} 'invalid handle'; dies_ok {pica_parser(plus => [])} 'invalid handle'; dies_ok {pica_parser(plain => bless({}, 'MyFooBar'))} 'invalid handle'; + + # https://github.com/gbv/PICA-Data/issues/136 + $plain = '021A $$'; + dies_ok {PICA::Parser::Plain->new(\$plain, strict => 1)->next} 'issue 136'; } is pica_parser(plain => \'012A/00 $xy')->next->string,