1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader marc_fixed
13 marc_duplicate marc_remove marc_count
17 regex prefix suffix surround
18 first lookup join_with
30 #use base qw/WebPAC::Common/;
31 use Data::Dump qw/dump/;
32 use Storable qw/dclone/;
41 WebPAC::Normalize - describe normalisaton rules using sets
49 our $VERSION = '0.28';
53 This module uses C<conf/normalize/*.pl> files to perform normalisation
54 from input records using perl functions which are specialized for set
57 Sets are implemented as arrays, and normalisation file is valid perl, which
58 means that you check it's validity before running WebPAC using
59 C<perl -c normalize.pl>.
61 Normalisation can generate multiple output normalized data. For now, supported output
62 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
67 Functions which start with C<_> are private and used by WebPAC internally.
68 All other functions are available for use within normalisation rules.
74 my $ds = WebPAC::Normalize::data_structure(
75 lookup => $lookup_hash,
77 rules => $normalize_pl_config,
78 marc_encoding => 'utf-8',
80 load_row_coderef => sub {
81 my ($database,$input,$mfn) = shift;
82 $store->load_row( database => $database, input => $input, id => $mfn );
86 Options C<row>, C<rules> and C<log> are mandatory while all
89 C<load_row_coderef> is closure only used when executing lookups, so they will
90 die if it's not defined.
92 This function will B<die> if normalizastion can't be evaled.
94 Since this function isn't exported you have to call it with
95 C<WebPAC::Normalize::data_structure>.
104 die "need row argument" unless ($arg->{row});
105 die "need normalisation argument" unless ($arg->{rules});
108 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
109 _set_rec( $arg->{row} );
110 _set_config( $arg->{config} ) if defined($arg->{config});
111 _clean_ds( %{ $arg } );
112 $load_row_coderef = $arg->{load_row_coderef};
114 eval "$arg->{rules}";
115 die "error evaling $arg->{rules}: $@\n" if ($@);
122 Set current record hash
131 $rec = shift or die "no record hash";
136 Set current config hash
138 _set_config( $config );
146 Code of current database
164 Return hash formatted as data structure
170 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
171 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
179 Clean data structure hash for next record
187 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
188 ($marc_record_offset, $marc_fetch_offset) = (0,0);
189 $marc_encoding = $a->{marc_encoding};
194 Set current lookup hash
196 _set_lookup( $lookup );
208 Get current lookup hash
210 my $lookup = _get_lookup();
220 Setup code reference which will return L<data_structure> from
224 my ($database,$input,$mfn) = @_;
225 $store->load_row( database => $database, input => $input, id => $mfn );
232 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
234 $load_row_coderef = $coderef;
237 =head2 _get_marc_fields
239 Get all fields defined by calls to C<marc>
241 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
243 We are using I<magic> which detect repeatable fields only from
244 sequence of field/subfield data generated by normalization.
246 Repeatable field is created when there is second occurence of same subfield or
247 if any of indicators are different.
249 This is sane for most cases. Something like:
255 will be created from any combination of:
257 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
261 marc('900','a', rec('200','a') );
262 marc('900','b', rec('200','b') );
263 marc('900','c', rec('200','c') );
265 which might not be what you have in mind. If you need repeatable subfield,
266 define it using C<marc_repeatable_subfield> like this:
268 marc_repeatable_subfield('900','a');
269 marc('900','a', rec('200','a') );
270 marc('900','b', rec('200','b') );
271 marc('900','c', rec('200','c') );
275 900a-1 900a-2 900a-3 900b-1 900c-1
278 There is also support for returning next or specific using:
280 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
281 # do something with $mf
284 will always return fields from next MARC record or
286 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
288 will return 42th copy record (if it exists).
294 sub _get_marc_fields {
297 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
298 $fetch_pos = $marc_fetch_offset;
299 if ($arg->{offset}) {
300 $fetch_pos = $arg->{offset};
301 } elsif($arg->{fetch_next}) {
302 $marc_fetch_offset++;
305 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
307 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
309 my $marc_rec = $marc_record->[ $fetch_pos ];
311 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
313 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
315 # first, sort all existing fields
316 # XXX might not be needed, but modern perl might randomize elements in hash
317 my @sorted_marc_record = sort {
318 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
321 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
326 # count unique field-subfields (used for offset when walking to next subfield)
328 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
331 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
332 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
333 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
334 warn "## subfield count = ", dump( $u ), $/;
337 my $len = $#sorted_marc_record;
342 foreach ( 0 .. $len ) {
344 # find next element which isn't visited
345 while ($visited->{$i}) {
346 $i = ($i + 1) % ($len + 1);
352 my $row = dclone( $sorted_marc_record[$i] );
354 # field and subfield which is key for
355 # marc_repeatable_subfield and u
356 my $fsf = $row->[0] . ( $row->[3] || '' );
360 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
361 print "### this [$i]: ", dump( $row ),$/;
362 print "### sf: ", $row->[3], " vs ", $field->[3],
363 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
369 if ( $#$field >= 0 ) {
371 $row->[0] ne $field->[0] || # field
372 $row->[1] ne $field->[1] || # i1
373 $row->[2] ne $field->[2] # i2
376 warn "## saved/1 ", dump( $field ),$/ if ($debug);
380 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
382 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
383 ! $marc_repeatable_subfield->{ $fsf }
387 warn "## saved/2 ", dump( $field ),$/ if ($debug);
391 # append new subfields to existing field
392 push @$field, ( $row->[3], $row->[4] );
399 if (! $marc_repeatable_subfield->{ $fsf }) {
400 # make step to next subfield
401 $i = ($i + $u->{ $fsf } ) % ($len + 1);
407 warn "## saved/3 ", dump( $field ),$/ if ($debug);
413 =head2 _get_marc_leader
415 Return leader from currently fetched record by L</_get_marc_fields>
417 print WebPAC::Normalize::_get_marc_leader();
421 sub _get_marc_leader {
422 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
423 return $marc_leader->[ $fetch_pos ];
428 Change level of debug warnings
436 return $debug unless defined($l);
437 warn "debug level $l",$/ if ($l > 0);
441 =head1 Functions to create C<data_structure>
443 Those functions generally have to first in your normalization file.
447 Define new tag for I<search> and I<display>.
449 tag('Title', rec('200','a') );
455 my $name = shift or die "tag needs name as first argument";
456 my @o = grep { defined($_) && $_ ne '' } @_;
458 $out->{$name}->{tag} = $name;
459 $out->{$name}->{search} = \@o;
460 $out->{$name}->{display} = \@o;
465 Define tag just for I<display>
467 @v = display('Title', rec('200','a') );
472 my $name = shift or die "display needs name as first argument";
473 my @o = grep { defined($_) && $_ ne '' } @_;
475 $out->{$name}->{tag} = $name;
476 $out->{$name}->{display} = \@o;
481 Prepare values just for I<search>
483 @v = search('Title', rec('200','a') );
488 my $name = shift or die "search needs name as first argument";
489 my @o = grep { defined($_) && $_ ne '' } @_;
491 $out->{$name}->{tag} = $name;
492 $out->{$name}->{search} = \@o;
497 Setup fields within MARC leader or get leader
499 marc_leader('05','c');
500 my $leader = marc_leader();
505 my ($offset,$value) = @_;
508 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
511 if (defined($marc_leader)) {
512 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
513 return $marc_leader->[ $marc_record_offset ];
522 Create control/indentifier fields with values in fixed positions
524 marc_fixed('008', 00, '070402');
525 marc_fixed('008', 39, '|');
527 Positions not specified will be filled with spaces (C<0x20>).
529 There will be no effort to extend last specified value to full length of
535 my ($f, $pos, $val) = @_;
536 die "need marc(field, position, value)" unless defined($f) && defined($pos);
543 if (length($old) < $pos) {
544 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
545 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
547 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
548 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
552 } @{ $marc_record->[ $marc_record_offset ] };
555 my $v = ' ' x $pos . $val;
556 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
557 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
563 Save value for MARC field
565 marc('900','a', rec('200','a') );
566 marc('001', rec('000') );
571 my $f = shift or die "marc needs field";
572 die "marc field must be numer" unless ($f =~ /^\d+$/);
576 $sf = shift or die "marc needs subfield";
580 my $v = $_; # make var read-write for Encode
581 next unless (defined($v) && $v !~ /^\s*$/);
582 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
584 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
586 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
591 =head2 marc_repeatable_subfield
593 Save values for MARC repetable subfield
595 marc_repeatable_subfield('910', 'z', rec('909') );
599 sub marc_repeatable_subfield {
601 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
602 $marc_repeatable_subfield->{ $f . $sf }++;
606 =head2 marc_indicators
608 Set both indicators for MARC field
610 marc_indicators('900', ' ', 1);
612 Any indicator value other than C<0-9> will be treated as undefined.
616 sub marc_indicators {
617 my $f = shift || die "marc_indicators need field!\n";
619 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
620 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
622 $i1 = ' ' if ($i1 !~ /^\d$/);
623 $i2 = ' ' if ($i2 !~ /^\d$/);
624 @{ $marc_indicators->{$f} } = ($i1,$i2);
629 Save values for each MARC subfield explicitly
638 If you specify C<+> for subfield, value will be appended
639 to previous defined subfield.
644 my $f = shift or die "marc_compose needs field";
645 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
647 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
648 my $m = [ $f, $i1, $i2 ];
650 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
653 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
660 next unless (defined($v) && $v !~ /^\s*$/);
661 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
663 push @$m, ( $sf, $v );
669 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
671 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
674 =head2 marc_duplicate
676 Generate copy of current MARC record and continue working on copy
680 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
681 C<< _get_marc_fields( offset => 42 ) >>.
686 my $m = $marc_record->[ -1 ];
687 die "can't duplicate record which isn't defined" unless ($m);
688 push @{ $marc_record }, dclone( $m );
689 push @{ $marc_leader }, dclone( marc_leader() );
690 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
691 $marc_record_offset = $#{ $marc_record };
692 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
698 Remove some field or subfield from MARC record.
701 marc_remove('200','a');
703 This will erase field C<200> or C<200^a> from current MARC record.
707 Will remove all fields in current MARC record.
709 This is useful after calling C<marc_duplicate> or on it's own (but, you
710 should probably just remove that subfield definition if you are not
711 using C<marc_duplicate>).
713 FIXME: support fields < 10.
720 die "marc_remove needs record number" unless defined($f);
722 my $marc = $marc_record->[ $marc_record_offset ];
724 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
728 delete( $marc_record->[ $marc_record_offset ] );
729 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
734 foreach ( 0 .. $#{ $marc } ) {
735 last unless (defined $marc->[$i]);
736 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
737 if ($marc->[$i]->[0] eq $f) {
740 splice @$marc, $i, 1;
741 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
744 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
745 my $o = ($j * 2) + 3;
746 if ($marc->[$i]->[$o] eq $sf) {
748 splice @{$marc->[$i]}, $o, 2;
749 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
750 # is record now empty?
751 if ($#{ $marc->[$i] } == 2) {
752 splice @$marc, $i, 1;
753 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
763 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
765 $marc_record->[ $marc_record_offset ] = $marc;
768 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
771 =head2 marc_original_order
773 Copy all subfields preserving original order to marc field.
775 marc_original_order( marc_field_number, original_input_field_number );
777 Please note that field numbers are consistent with other commands (marc
778 field number first), but somewhat counter-intuitive (destination and then
781 You might want to use this command if you are just renaming subfields or
782 using pre-processing modify_record in C<config.yml> and don't need any
783 post-processing or want to preserve order of original subfields.
788 sub marc_original_order {
790 my ($to, $from) = @_;
791 die "marc_original_order needs from and to fields\n" unless ($from && $to);
793 return unless defined($rec->{$from});
795 my $r = $rec->{$from};
796 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
798 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
799 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
801 foreach my $d (@$r) {
803 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
804 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
808 my @sfs = @{ $d->{subfields} };
810 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
812 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
814 my $m = [ $to, $i1, $i2 ];
816 while (my $sf = shift @sfs) {
818 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
819 my $offset = shift @sfs;
820 die "corrupted sufields specification for field $from\n" unless defined($offset);
823 if (ref($d->{$sf}) eq 'ARRAY') {
824 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
825 } elsif ($offset == 0) {
828 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
830 push @$m, ( $sf, $v ) if (defined($v));
834 push @{ $marc_record->[ $marc_record_offset ] }, $m;
838 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
843 Return number of MARC records created using L</marc_duplicate>.
845 print "created ", marc_count(), " records";
850 return $#{ $marc_record };
854 =head1 Functions to extract data from input
856 This function should be used inside functions to create C<data_structure> described
859 =head2 _pack_subfields_hash
861 @subfields = _pack_subfields_hash( $h );
862 $subfields = _pack_subfields_hash( $h, 1 );
864 Return each subfield value in array or pack them all together and return scalar
865 with subfields (denoted by C<^>) and values.
869 sub _pack_subfields_hash {
871 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
873 my ($h,$include_subfields) = @_;
875 if ( defined($h->{subfields}) ) {
876 my $sfs = delete $h->{subfields} || die "no subfields?";
879 my $sf = shift @$sfs;
880 push @out, '^' . $sf if ($include_subfields);
882 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
883 # single element subfields are not arrays
884 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
886 push @out, $h->{$sf};
888 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
889 push @out, $h->{$sf}->[$o];
892 if ($include_subfields) {
893 return join('', @out);
898 if ($include_subfields) {
900 foreach my $sf (sort keys %$h) {
901 if (ref($h->{$sf}) eq 'ARRAY') {
902 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
904 $out .= '^' . $sf . $h->{$sf};
909 # FIXME this should probably be in alphabetical order instead of hash order
917 Return all values in some field
921 TODO: order of values is probably same as in source data, need to investigate that
927 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
928 return unless (defined($rec) && defined($rec->{$f}));
929 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
930 if (ref($rec->{$f}) eq 'ARRAY') {
932 foreach my $h ( @{ $rec->{$f} } ) {
933 if (ref($h) eq 'HASH') {
934 push @out, ( _pack_subfields_hash( $h ) );
940 } elsif( defined($rec->{$f}) ) {
947 Return all values in specific field and subfield
955 return unless (defined($rec && $rec->{$f}));
957 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
959 if (ref($_->{$sf}) eq 'ARRAY') {
964 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
974 If rec() returns just single value, it will
975 return scalar, not array.
986 if ($#out == 0 && ! wantarray) {
997 Apply regex to some or all values
999 @v = regex( 's/foo/bar/g', @v );
1006 #warn "r: $r\n", dump(\@_);
1007 foreach my $t (@_) {
1010 push @out, $t if ($t && $t ne '');
1017 Prefix all values with a string
1019 @v = prefix( 'my_', @v );
1024 my $p = shift or return;
1025 return map { $p . $_ } grep { defined($_) } @_;
1030 suffix all values with a string
1032 @v = suffix( '_my', @v );
1037 my $s = shift or die "suffix needs string as first argument";
1038 return map { $_ . $s } grep { defined($_) } @_;
1043 surround all values with a two strings
1045 @v = surround( 'prefix_', '_suffix', @v );
1050 my $p = shift or die "surround need prefix as first argument";
1051 my $s = shift or die "surround needs suffix as second argument";
1052 return map { $p . $_ . $s } grep { defined($_) } @_;
1057 Return first element
1070 Consult lookup hashes for some value
1074 'ffkk/peri/mfn'.rec('000')
1076 'ffkk','peri','200-a-200-e',
1078 first(rec(200,'a')).' '.first(rec('200','e'))
1082 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1083 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1086 # which results to return from record recorded in lookup
1087 sub { 'ffkk/peri/mfn' . rec('000') },
1088 # from which database and input
1090 # such that following values match
1091 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1092 # if this part is missing, we will try to match same fields
1093 # from lookup record and current one, or you can override
1094 # which records to use from current record using
1095 sub { rec('900','x') . ' ' . rec('900','y') },
1098 You can think about this lookup as SQL (if that helps):
1105 sub { filter from lookuped record }
1107 sub { optional filter on current record }
1114 my ($what, $database, $input, $key, $having) = @_;
1116 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1118 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1119 return unless (defined($lookup->{$database}->{$input}->{$key}));
1121 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1124 my @having = $having->();
1126 warn "## having = ", dump( @having ) if ($debug > 2);
1128 foreach my $h ( @having ) {
1129 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1130 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1131 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1135 return unless ($mfns);
1137 my @mfns = sort keys %$mfns;
1139 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1144 foreach my $mfn (@mfns) {
1145 $rec = $load_row_coderef->( $database, $input, $mfn );
1147 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1149 my @vals = $what->();
1151 push @out, ( @vals );
1153 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1156 # if (ref($lookup->{$k}) eq 'ARRAY') {
1157 # return @{ $lookup->{$k} };
1159 # return $lookup->{$k};
1164 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1173 =head2 save_into_lookup
1175 Save value into lookup. It associates current database, input
1176 and specific keys with one or more values which will be
1177 associated over MFN.
1179 MFN will be extracted from first occurence current of field 000
1180 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1182 my $nr = save_into_lookup($database,$input,$key,sub {
1183 # code which produce one or more values
1186 It returns number of items saved.
1188 This function shouldn't be called directly, it's called from code created by
1193 sub save_into_lookup {
1194 my ($database,$input,$key,$coderef) = @_;
1195 die "save_into_lookup needs database" unless defined($database);
1196 die "save_into_lookup needs input" unless defined($input);
1197 die "save_into_lookup needs key" unless defined($key);
1198 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1200 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1203 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1204 defined($config->{_mfn}) ? $config->{_mfn} :
1205 die "mfn not defined or zero";
1209 foreach my $v ( $coderef->() ) {
1210 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1211 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1220 Consult config values stored in C<config.yml>
1222 # return database code (key under databases in yaml)
1223 $database_code = config(); # use _ from hash
1224 $database_name = config('name');
1225 $database_input_name = config('input name');
1226 $tag = config('input normalize tag');
1228 Up to three levels are supported.
1233 return unless ($config);
1241 warn "### getting config($p)\n" if ($debug > 1);
1243 my @p = split(/\s+/,$p);
1245 $v = $config->{ '_' }; # special, database code
1248 my $c = dclone( $config );
1250 foreach my $k (@p) {
1251 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1252 if (ref($c) eq 'ARRAY') {
1254 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1258 if (! defined($c->{$k}) ) {
1269 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1270 warn "config( '$p' ) is empty\n" if (! $v);
1277 Returns unique id of this record
1281 Returns C<42/2> for 2nd occurence of MFN 42.
1286 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1287 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1292 Joins walues with some delimiter
1294 $v = join_with(", ", @v);
1300 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1301 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1302 return '' unless defined($v);
1308 Split record subfield on some regex and take one of parts out
1310 $a_before_semi_column =
1311 split_rec_on('200','a', /\s*;\s*/, $part);
1313 C<$part> is optional number of element. First element is
1316 If there is no C<$part> parameter or C<$part> is 0, this function will
1317 return all values produced by splitting.
1322 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1324 my ($fld, $sf, $regex, $part) = @_;
1325 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1327 my @r = rec( $fld, $sf );
1329 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1331 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1333 my @s = split( $regex, $v );
1334 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1335 if ($part && $part > 0) {
1336 return $s[ $part - 1 ];
1346 set( key => 'value' );
1352 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1363 my $k = shift || return;
1364 my $v = $hash->{$k};
1365 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1371 if ( count( @result ) == 1 ) {
1372 # do something if only 1 result is there
1378 warn "## count ",dump(@_),$/ if ( $debug );