1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader marc_fixed
13 marc_duplicate marc_remove marc_count
17 regex prefix suffix surround
18 first lookup join_with
30 #use base qw/WebPAC::Common/;
31 use Data::Dump qw/dump/;
32 use Storable qw/dclone/;
41 WebPAC::Normalize - describe normalisaton rules using sets
49 our $VERSION = '0.29';
53 This module uses C<conf/normalize/*.pl> files to perform normalisation
54 from input records using perl functions which are specialized for set
57 Sets are implemented as arrays, and normalisation file is valid perl, which
58 means that you check it's validity before running WebPAC using
59 C<perl -c normalize.pl>.
61 Normalisation can generate multiple output normalized data. For now, supported output
62 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
67 Functions which start with C<_> are private and used by WebPAC internally.
68 All other functions are available for use within normalisation rules.
74 my $ds = WebPAC::Normalize::data_structure(
75 lookup => $lookup_hash,
77 rules => $normalize_pl_config,
78 marc_encoding => 'utf-8',
80 load_row_coderef => sub {
81 my ($database,$input,$mfn) = shift;
82 $store->load_row( database => $database, input => $input, id => $mfn );
86 Options C<row>, C<rules> and C<log> are mandatory while all
89 C<load_row_coderef> is closure only used when executing lookups, so they will
90 die if it's not defined.
92 This function will B<die> if normalizastion can't be evaled.
94 Since this function isn't exported you have to call it with
95 C<WebPAC::Normalize::data_structure>.
104 die "need row argument" unless ($arg->{row});
105 die "need normalisation argument" unless ($arg->{rules});
108 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
109 _set_rec( $arg->{row} );
110 _set_config( $arg->{config} ) if defined($arg->{config});
111 _clean_ds( %{ $arg } );
112 $load_row_coderef = $arg->{load_row_coderef};
114 eval "$arg->{rules}";
115 die "error evaling $arg->{rules}: $@\n" if ($@);
122 Set current record hash
131 $rec = shift or die "no record hash";
136 Set current config hash
138 _set_config( $config );
146 Code of current database
164 Return hash formatted as data structure
170 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
171 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
179 Clean data structure hash for next record
187 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
188 ($marc_record_offset, $marc_fetch_offset) = (0,0);
189 $marc_encoding = $a->{marc_encoding};
194 Set current lookup hash
196 _set_lookup( $lookup );
208 Get current lookup hash
210 my $lookup = _get_lookup();
220 Setup code reference which will return L<data_structure> from
224 my ($database,$input,$mfn) = @_;
225 $store->load_row( database => $database, input => $input, id => $mfn );
232 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
234 $load_row_coderef = $coderef;
237 =head2 _get_marc_fields
239 Get all fields defined by calls to C<marc>
241 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
243 We are using I<magic> which detect repeatable fields only from
244 sequence of field/subfield data generated by normalization.
246 Repeatable field is created when there is second occurence of same subfield or
247 if any of indicators are different.
249 This is sane for most cases. Something like:
255 will be created from any combination of:
257 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
261 marc('900','a', rec('200','a') );
262 marc('900','b', rec('200','b') );
263 marc('900','c', rec('200','c') );
265 which might not be what you have in mind. If you need repeatable subfield,
266 define it using C<marc_repeatable_subfield> like this:
268 marc_repeatable_subfield('900','a');
269 marc('900','a', rec('200','a') );
270 marc('900','b', rec('200','b') );
271 marc('900','c', rec('200','c') );
275 900a-1 900a-2 900a-3 900b-1 900c-1
278 There is also support for returning next or specific using:
280 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
281 # do something with $mf
284 will always return fields from next MARC record or
286 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
288 will return 42th copy record (if it exists).
294 sub _get_marc_fields {
297 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
298 $fetch_pos = $marc_fetch_offset;
299 if ($arg->{offset}) {
300 $fetch_pos = $arg->{offset};
301 } elsif($arg->{fetch_next}) {
302 $marc_fetch_offset++;
305 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
307 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
309 my $marc_rec = $marc_record->[ $fetch_pos ];
311 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
313 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
315 # first, sort all existing fields
316 # XXX might not be needed, but modern perl might randomize elements in hash
317 my @sorted_marc_record = sort {
318 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
321 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
326 # count unique field-subfields (used for offset when walking to next subfield)
328 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
331 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
332 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
333 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
334 warn "## subfield count = ", dump( $u ), $/;
337 my $len = $#sorted_marc_record;
342 foreach ( 0 .. $len ) {
344 # find next element which isn't visited
345 while ($visited->{$i}) {
346 $i = ($i + 1) % ($len + 1);
352 my $row = dclone( $sorted_marc_record[$i] );
354 # field and subfield which is key for
355 # marc_repeatable_subfield and u
356 my $fsf = $row->[0] . ( $row->[3] || '' );
360 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
361 print "### this [$i]: ", dump( $row ),$/;
362 print "### sf: ", $row->[3], " vs ", $field->[3],
363 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
369 if ( $#$field >= 0 ) {
371 $row->[0] ne $field->[0] || # field
372 $row->[1] ne $field->[1] || # i1
373 $row->[2] ne $field->[2] # i2
376 warn "## saved/1 ", dump( $field ),$/ if ($debug);
380 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
382 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
383 ! $marc_repeatable_subfield->{ $fsf }
387 warn "## saved/2 ", dump( $field ),$/ if ($debug);
391 # append new subfields to existing field
392 push @$field, ( $row->[3], $row->[4] );
399 if (! $marc_repeatable_subfield->{ $fsf }) {
400 # make step to next subfield
401 $i = ($i + $u->{ $fsf } ) % ($len + 1);
407 warn "## saved/3 ", dump( $field ),$/ if ($debug);
413 =head2 _get_marc_leader
415 Return leader from currently fetched record by L</_get_marc_fields>
417 print WebPAC::Normalize::_get_marc_leader();
421 sub _get_marc_leader {
422 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
423 return $marc_leader->[ $fetch_pos ];
428 Change level of debug warnings
436 return $debug unless defined($l);
437 warn "debug level $l",$/ if ($l > 0);
441 =head1 Functions to create C<data_structure>
443 Those functions generally have to first in your normalization file.
447 Define new tag for I<search> and I<display>.
449 tag('Title', rec('200','a') );
455 my $name = shift or die "tag needs name as first argument";
456 my @o = grep { defined($_) && $_ ne '' } @_;
458 $out->{$name}->{tag} = $name;
459 $out->{$name}->{search} = \@o;
460 $out->{$name}->{display} = \@o;
465 Define tag just for I<display>
467 @v = display('Title', rec('200','a') );
472 my $name = shift or die "display needs name as first argument";
473 my @o = grep { defined($_) && $_ ne '' } @_;
475 $out->{$name}->{tag} = $name;
476 $out->{$name}->{display} = \@o;
481 Prepare values just for I<search>
483 @v = search('Title', rec('200','a') );
488 my $name = shift or die "search needs name as first argument";
489 my @o = grep { defined($_) && $_ ne '' } @_;
491 $out->{$name}->{tag} = $name;
492 $out->{$name}->{search} = \@o;
497 Setup fields within MARC leader or get leader
499 marc_leader('05','c');
500 my $leader = marc_leader();
505 my ($offset,$value) = @_;
508 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
511 if (defined($marc_leader)) {
512 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
513 return $marc_leader->[ $marc_record_offset ];
522 Create control/indentifier fields with values in fixed positions
524 marc_fixed('008', 00, '070402');
525 marc_fixed('008', 39, '|');
527 Positions not specified will be filled with spaces (C<0x20>).
529 There will be no effort to extend last specified value to full length of
535 my ($f, $pos, $val) = @_;
536 die "need marc(field, position, value)" unless defined($f) && defined($pos);
543 if (length($old) < $pos) {
544 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
545 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
547 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
548 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
552 } @{ $marc_record->[ $marc_record_offset ] };
555 my $v = ' ' x $pos . $val;
556 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
557 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
563 Save value for MARC field
565 marc('900','a', rec('200','a') );
566 marc('001', rec('000') );
571 my $f = shift or die "marc needs field";
572 die "marc field must be numer" unless ($f =~ /^\d+$/);
576 $sf = shift or die "marc needs subfield";
580 my $v = $_; # make var read-write for Encode
581 next unless (defined($v) && $v !~ /^\s*$/);
582 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
584 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
586 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
591 =head2 marc_repeatable_subfield
593 Save values for MARC repetable subfield
595 marc_repeatable_subfield('910', 'z', rec('909') );
599 sub marc_repeatable_subfield {
601 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
602 $marc_repeatable_subfield->{ $f . $sf }++;
606 =head2 marc_indicators
608 Set both indicators for MARC field
610 marc_indicators('900', ' ', 1);
612 Any indicator value other than C<0-9> will be treated as undefined.
616 sub marc_indicators {
617 my $f = shift || die "marc_indicators need field!\n";
619 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
620 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
622 $i1 = ' ' if ($i1 !~ /^\d$/);
623 $i2 = ' ' if ($i2 !~ /^\d$/);
624 @{ $marc_indicators->{$f} } = ($i1,$i2);
629 Save values for each MARC subfield explicitly
638 If you specify C<+> for subfield, value will be appended
639 to previous defined subfield.
644 my $f = shift or die "marc_compose needs field";
645 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
647 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
648 my $m = [ $f, $i1, $i2 ];
650 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
653 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
660 next unless (defined($v) && $v !~ /^\s*$/);
661 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
663 push @$m, ( $sf, $v );
669 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
671 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
674 =head2 marc_duplicate
676 Generate copy of current MARC record and continue working on copy
680 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
681 C<< _get_marc_fields( offset => 42 ) >>.
686 my $m = $marc_record->[ -1 ];
687 die "can't duplicate record which isn't defined" unless ($m);
688 push @{ $marc_record }, dclone( $m );
689 push @{ $marc_leader }, dclone( marc_leader() );
690 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
691 $marc_record_offset = $#{ $marc_record };
692 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
698 Remove some field or subfield from MARC record.
701 marc_remove('200','a');
703 This will erase field C<200> or C<200^a> from current MARC record.
707 Will remove all fields in current MARC record.
709 This is useful after calling C<marc_duplicate> or on it's own (but, you
710 should probably just remove that subfield definition if you are not
711 using C<marc_duplicate>).
713 FIXME: support fields < 10.
720 die "marc_remove needs record number" unless defined($f);
722 my $marc = $marc_record->[ $marc_record_offset ];
724 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
728 delete( $marc_record->[ $marc_record_offset ] );
729 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
734 foreach ( 0 .. $#{ $marc } ) {
735 last unless (defined $marc->[$i]);
736 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
737 if ($marc->[$i]->[0] eq $f) {
740 splice @$marc, $i, 1;
741 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
744 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
745 my $o = ($j * 2) + 3;
746 if ($marc->[$i]->[$o] eq $sf) {
748 splice @{$marc->[$i]}, $o, 2;
749 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
750 # is record now empty?
751 if ($#{ $marc->[$i] } == 2) {
752 splice @$marc, $i, 1;
753 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
763 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
765 $marc_record->[ $marc_record_offset ] = $marc;
768 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
771 =head2 marc_original_order
773 Copy all subfields preserving original order to marc field.
775 marc_original_order( marc_field_number, original_input_field_number );
777 Please note that field numbers are consistent with other commands (marc
778 field number first), but somewhat counter-intuitive (destination and then
781 You might want to use this command if you are just renaming subfields or
782 using pre-processing modify_record in C<config.yml> and don't need any
783 post-processing or want to preserve order of original subfields.
788 sub marc_original_order {
790 my ($to, $from) = @_;
791 die "marc_original_order needs from and to fields\n" unless ($from && $to);
793 return unless defined($rec->{$from});
795 my $r = $rec->{$from};
796 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
798 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
799 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
801 foreach my $d (@$r) {
803 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
804 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
808 my @sfs = @{ $d->{subfields} };
810 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
812 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
814 my $m = [ $to, $i1, $i2 ];
816 while (my $sf = shift @sfs) {
818 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
819 my $offset = shift @sfs;
820 die "corrupted sufields specification for field $from\n" unless defined($offset);
823 if (ref($d->{$sf}) eq 'ARRAY') {
824 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
825 } elsif ($offset == 0) {
828 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
830 push @$m, ( $sf, $v ) if (defined($v));
834 push @{ $marc_record->[ $marc_record_offset ] }, $m;
838 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
843 Return number of MARC records created using L</marc_duplicate>.
845 print "created ", marc_count(), " records";
850 return $#{ $marc_record };
854 =head1 Functions to extract data from input
856 This function should be used inside functions to create C<data_structure> described
859 =head2 _pack_subfields_hash
861 @subfields = _pack_subfields_hash( $h );
862 $subfields = _pack_subfields_hash( $h, 1 );
864 Return each subfield value in array or pack them all together and return scalar
865 with subfields (denoted by C<^>) and values.
869 sub _pack_subfields_hash {
871 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
873 my ($h,$include_subfields) = @_;
875 if ( defined($h->{subfields}) ) {
876 my $sfs = delete $h->{subfields} || die "no subfields?";
879 my $sf = shift @$sfs;
880 push @out, '^' . $sf if ($include_subfields);
882 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
883 # single element subfields are not arrays
884 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
886 push @out, $h->{$sf};
888 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
889 push @out, $h->{$sf}->[$o];
892 if ($include_subfields) {
893 return join('', @out);
898 if ($include_subfields) {
900 foreach my $sf (sort keys %$h) {
901 if (ref($h->{$sf}) eq 'ARRAY') {
902 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
904 $out .= '^' . $sf . $h->{$sf};
909 # FIXME this should probably be in alphabetical order instead of hash order
917 Return all values in some field
921 TODO: order of values is probably same as in source data, need to investigate that
927 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
928 return unless (defined($rec) && defined($rec->{$f}));
929 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
930 if (ref($rec->{$f}) eq 'ARRAY') {
932 foreach my $h ( @{ $rec->{$f} } ) {
933 if (ref($h) eq 'HASH') {
934 push @out, ( _pack_subfields_hash( $h ) );
940 } elsif( defined($rec->{$f}) ) {
947 Return all values in specific field and subfield
955 return unless (defined($rec && $rec->{$f}));
957 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
959 if (ref($_->{$sf}) eq 'ARRAY') {
964 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
974 If rec() returns just single value, it will
975 return scalar, not array.
986 if ($#out == 0 && ! wantarray) {
997 Apply regex to some or all values
999 @v = regex( 's/foo/bar/g', @v );
1006 #warn "r: $r\n", dump(\@_);
1007 foreach my $t (@_) {
1010 push @out, $t if ($t && $t ne '');
1017 Prefix all values with a string
1019 @v = prefix( 'my_', @v );
1025 return @_ unless defined( $p );
1026 return map { $p . $_ } grep { defined($_) } @_;
1031 suffix all values with a string
1033 @v = suffix( '_my', @v );
1039 return @_ unless defined( $s );
1040 return map { $_ . $s } grep { defined($_) } @_;
1045 surround all values with a two strings
1047 @v = surround( 'prefix_', '_suffix', @v );
1054 $p = '' unless defined( $p );
1055 $s = '' unless defined( $s );
1056 return map { $p . $_ . $s } grep { defined($_) } @_;
1061 Return first element
1074 Consult lookup hashes for some value
1078 'ffkk/peri/mfn'.rec('000')
1080 'ffkk','peri','200-a-200-e',
1082 first(rec(200,'a')).' '.first(rec('200','e'))
1086 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1087 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1090 # which results to return from record recorded in lookup
1091 sub { 'ffkk/peri/mfn' . rec('000') },
1092 # from which database and input
1094 # such that following values match
1095 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1096 # if this part is missing, we will try to match same fields
1097 # from lookup record and current one, or you can override
1098 # which records to use from current record using
1099 sub { rec('900','x') . ' ' . rec('900','y') },
1102 You can think about this lookup as SQL (if that helps):
1109 sub { filter from lookuped record }
1111 sub { optional filter on current record }
1118 my ($what, $database, $input, $key, $having) = @_;
1120 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1122 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1123 return unless (defined($lookup->{$database}->{$input}->{$key}));
1125 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1128 my @having = $having->();
1130 warn "## having = ", dump( @having ) if ($debug > 2);
1132 foreach my $h ( @having ) {
1133 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1134 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1135 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1139 return unless ($mfns);
1141 my @mfns = sort keys %$mfns;
1143 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1148 foreach my $mfn (@mfns) {
1149 $rec = $load_row_coderef->( $database, $input, $mfn );
1151 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1153 my @vals = $what->();
1155 push @out, ( @vals );
1157 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1160 # if (ref($lookup->{$k}) eq 'ARRAY') {
1161 # return @{ $lookup->{$k} };
1163 # return $lookup->{$k};
1168 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1177 =head2 save_into_lookup
1179 Save value into lookup. It associates current database, input
1180 and specific keys with one or more values which will be
1181 associated over MFN.
1183 MFN will be extracted from first occurence current of field 000
1184 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1186 my $nr = save_into_lookup($database,$input,$key,sub {
1187 # code which produce one or more values
1190 It returns number of items saved.
1192 This function shouldn't be called directly, it's called from code created by
1197 sub save_into_lookup {
1198 my ($database,$input,$key,$coderef) = @_;
1199 die "save_into_lookup needs database" unless defined($database);
1200 die "save_into_lookup needs input" unless defined($input);
1201 die "save_into_lookup needs key" unless defined($key);
1202 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1204 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1207 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1208 defined($config->{_mfn}) ? $config->{_mfn} :
1209 die "mfn not defined or zero";
1213 foreach my $v ( $coderef->() ) {
1214 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1215 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1224 Consult config values stored in C<config.yml>
1226 # return database code (key under databases in yaml)
1227 $database_code = config(); # use _ from hash
1228 $database_name = config('name');
1229 $database_input_name = config('input name');
1230 $tag = config('input normalize tag');
1232 Up to three levels are supported.
1237 return unless ($config);
1245 warn "### getting config($p)\n" if ($debug > 1);
1247 my @p = split(/\s+/,$p);
1249 $v = $config->{ '_' }; # special, database code
1252 my $c = dclone( $config );
1254 foreach my $k (@p) {
1255 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1256 if (ref($c) eq 'ARRAY') {
1258 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1262 if (! defined($c->{$k}) ) {
1273 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1274 warn "config( '$p' ) is empty\n" if (! $v);
1281 Returns unique id of this record
1285 Returns C<42/2> for 2nd occurence of MFN 42.
1290 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1291 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1296 Joins walues with some delimiter
1298 $v = join_with(", ", @v);
1304 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1305 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1306 return '' unless defined($v);
1312 Split record subfield on some regex and take one of parts out
1314 $a_before_semi_column =
1315 split_rec_on('200','a', /\s*;\s*/, $part);
1317 C<$part> is optional number of element. First element is
1320 If there is no C<$part> parameter or C<$part> is 0, this function will
1321 return all values produced by splitting.
1326 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1328 my ($fld, $sf, $regex, $part) = @_;
1329 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1331 my @r = rec( $fld, $sf );
1333 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1335 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1337 my @s = split( $regex, $v );
1338 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1339 if ($part && $part > 0) {
1340 return $s[ $part - 1 ];
1350 set( key => 'value' );
1356 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1367 my $k = shift || return;
1368 my $v = $hash->{$k};
1369 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1375 if ( count( @result ) == 1 ) {
1376 # do something if only 1 result is there
1382 warn "## count ",dump(@_),$/ if ( $debug );