From: Dobrica Pavlinusic Date: Sun, 9 Jul 2006 21:36:33 +0000 (+0000) Subject: initial implementation of split_on_delimiters functionality needed for X-Git-Url: http://git.rot13.org/?p=Biblio-Isis;a=commitdiff_plain;h=1d7ffb42f40b3837c71123f616cc8ba7349ffd5e initial implementation of split_on_delimiters functionality needed for Webpac. It adds just regexpes hash to to_hash, but I'm still not quite satisfied with it. git-svn-id: file:///home/dpavlin/svn/Biblio-Isis/trunk@61 4670fa4d-42ec-0310-ab5b-a66af6943492 --- diff --git a/lib/Biblio/Isis.pm b/lib/Biblio/Isis.pm index cc144e2..6f29e20 100644 --- a/lib/Biblio/Isis.pm +++ b/lib/Biblio/Isis.pm @@ -7,7 +7,7 @@ use File::Glob qw(:globally :nocase); BEGIN { use Exporter (); use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - $VERSION = 0.21; + $VERSION = 0.22_1; @ISA = qw (Exporter); #Give a hoot don't pollute, do not export more than needed by default @EXPORT = qw (); @@ -519,6 +519,9 @@ There is also more elaborative way to call C like this: my $hash = $isis->to_hash({ mfn => 42, include_subfields => 1, + regexps => [ + 's/something/else/g', + ], }); Each option controll creation of hash: @@ -562,6 +565,8 @@ sub to_hash { $mfn = $arg->{mfn} || confess "need mfn in arguments"; } + confess "regexps must be HASH" if ($arg->{regexps} && ref($arg->{regexps}) ne 'HASH'); + # init record to include MFN as field 000 my $rec = { '000' => [ $mfn ] }; @@ -580,6 +585,16 @@ sub to_hash { next unless defined($l); } + # apply regexps + if ($arg->{regexps} && defined($arg->{regexps}->{$f_nr})) { + confess "regexps->{$f_nr} must be ARRAY" if (ref($arg->{regexps}->{$f_nr}) ne 'ARRAY'); + my $c = 0; + foreach my $r (@{ $arg->{regexps}->{$f_nr} }) { + while ( eval '$l =~ ' . $r ) { $c++ }; + } + warn "## field $f_nr triggered $c regexpes\n" if ($c && $self->{debug}); + } + my $val; my $r_sf; # repeatable subfields in this record diff --git a/t/2_isis.t b/t/2_isis.t index 83508d1..dd46771 100755 --- a/t/2_isis.t +++ b/t/2_isis.t @@ -3,7 +3,7 @@ use strict; use blib; -use Test::More tests => 134; +use Test::More tests => 136; use File::Spec; BEGIN { @@ -271,3 +271,59 @@ is_deeply( $hash, { ], 902 => [{ a => "a1 ; a2 ; a3 ; a4 ; a5", b => "b1 ; b2", c => "c1" }], }, 'hash is_deeply'); + +# split_on_delimiters + +$isis->{record} = { + 900 => [ '^a900a : 900b ; 900c' ], + 901 => [ + '^a901a-1 x 901b-1 y 901c-1', + '^b901b-2 x 901b-2', + '^c901c-3 y 901c-3', + ], +}; + +my $split_on_delimiters = { + 900 => { a => { ' : ' => 'b', ' ; ' => 'c' } }, + 901 => { + a => { ' x ' => 'b', ' y ' => 'c' }, + b => { ' x ' => 'b' }, + c => { ' y ' => 'c' } + }, +}; + +diag "split_on_delimiters = ", Dumper($split_on_delimiters); + +my $regexpes; + +foreach my $f (keys %$split_on_delimiters) { +warn "--- f: $f\n"; + foreach my $sf (keys %{ $split_on_delimiters->{$f} }) { +warn "---- sf: $sf\n"; + foreach my $patt (keys %{ $split_on_delimiters->{$f}->{$sf} }) { + my $new_sf = $split_on_delimiters->{$f}->{$sf}->{$patt} || die "no field?"; +warn "----- patt: $patt -> $new_sf\n"; + + # fool-proof spaces in pattern + $patt =~ s/\s+/\\s\+/g; + + push @{ $regexpes->{$f} }, + 's/\^' . $sf . '([^\^]+)' . $patt . '([^\^]+)/^' . $sf . '$1^' . $new_sf . '$2/g'; + } + } +} + +diag "generated regexpes = ", Dumper( $regexpes ); + + +ok($hash = $isis->to_hash({ mfn => $isis->mfn, regexps => $regexpes, }), 'to_hash(mfn,regexpes)'); +diag "to_hash = ",Dumper( $hash ) if ($debug); +is_deeply( $hash, { + "000" => [42], + 900 => [{ a => "900a", b => "900b", c => "900c" }], + 901 => [ + { a => "901a-1", b => "901b-1", c => "901c-1" }, + { b => ["901b-2", "901b-2"] }, + { c => ["901c-3", "901c-3"] }, + ], +}, 'hash is_deeply');