1 package WebPAC::Normalize;
10 search_display search display sorted
12 marc marc_indicators marc_repeatable_subfield
13 marc_compose marc_leader marc_fixed
14 marc_duplicate marc_remove marc_count
19 regex prefix suffix surround
20 first lookup join_with
32 #use base qw/WebPAC::Common/;
33 use Data::Dump qw/dump/;
34 use Storable qw/dclone/;
43 WebPAC::Normalize - describe normalisaton rules using sets
47 our $VERSION = '0.31';
51 This module uses C<conf/normalize/*.pl> files to perform normalisation
52 from input records using perl functions which are specialized for set
55 Sets are implemented as arrays, and normalisation file is valid perl, which
56 means that you check it's validity before running WebPAC using
57 C<perl -c normalize.pl>.
59 Normalisation can generate multiple output normalized data. For now, supported output
60 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
65 Functions which start with C<_> are private and used by WebPAC internally.
66 All other functions are available for use within normalisation rules.
72 my $ds = WebPAC::Normalize::data_structure(
73 lookup => $lookup_hash,
75 rules => $normalize_pl_config,
76 marc_encoding => 'utf-8',
78 load_row_coderef => sub {
79 my ($database,$input,$mfn) = @_;
80 $store->load_row( database => $database, input => $input, id => $mfn );
84 Options C<row>, C<rules> and C<log> are mandatory while all
87 C<load_row_coderef> is closure only used when executing lookups, so they will
88 die if it's not defined.
90 This function will B<die> if normalizastion can't be evaled.
92 Since this function isn't exported you have to call it with
93 C<WebPAC::Normalize::data_structure>.
102 die "need row argument" unless ($arg->{row});
103 die "need normalisation argument" unless ($arg->{rules});
105 # FIXME load this conditionally
106 # use WebPAC::Normalize::ISBN;
109 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
110 _set_rec( $arg->{row} );
111 _set_config( $arg->{config} ) if defined($arg->{config});
112 _clean_ds( %{ $arg } );
113 $load_row_coderef = $arg->{load_row_coderef};
115 eval "$arg->{rules}";
116 die "error evaling $arg->{rules}: $@\n" if ($@);
123 Set current record hash
132 $rec = shift or die "no record hash";
137 Set current config hash
139 _set_config( $config );
147 Code of current database
165 Return hash formatted as data structure
171 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
172 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
176 warn "## out = ",dump($out);
183 Clean data structure hash for next record
191 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
192 ($marc_record_offset, $marc_fetch_offset) = (0,0);
193 $marc_encoding = $a->{marc_encoding};
198 Set current lookup hash
200 _set_lookup( $lookup );
212 Get current lookup hash
214 my $lookup = _get_lookup();
224 Setup code reference which will return L<data_structure> from
228 my ($database,$input,$mfn) = @_;
229 $store->load_row( database => $database, input => $input, id => $mfn );
236 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
238 $load_row_coderef = $coderef;
241 =head2 _get_marc_fields
243 Get all fields defined by calls to C<marc>
245 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
247 We are using I<magic> which detect repeatable fields only from
248 sequence of field/subfield data generated by normalization.
250 Repeatable field is created when there is second occurence of same subfield or
251 if any of indicators are different.
253 This is sane for most cases. Something like:
259 will be created from any combination of:
261 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
265 marc('900','a', rec('200','a') );
266 marc('900','b', rec('200','b') );
267 marc('900','c', rec('200','c') );
269 which might not be what you have in mind. If you need repeatable subfield,
270 define it using C<marc_repeatable_subfield> like this:
272 marc_repeatable_subfield('900','a');
273 marc('900','a', rec('200','a') );
274 marc('900','b', rec('200','b') );
275 marc('900','c', rec('200','c') );
279 900a-1 900a-2 900a-3 900b-1 900c-1
282 There is also support for returning next or specific using:
284 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
285 # do something with $mf
288 will always return fields from next MARC record or
290 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
292 will return 42th copy record (if it exists).
298 sub _get_marc_fields {
301 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
302 $fetch_pos = $marc_fetch_offset;
303 if ($arg->{offset}) {
304 $fetch_pos = $arg->{offset};
305 } elsif($arg->{fetch_next}) {
306 $marc_fetch_offset++;
309 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
311 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
313 my $marc_rec = $marc_record->[ $fetch_pos ];
315 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
317 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
319 # first, sort all existing fields
320 # XXX might not be needed, but modern perl might randomize elements in hash
321 my @sorted_marc_record = sort {
322 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
325 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
330 # count unique field-subfields (used for offset when walking to next subfield)
332 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
335 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
336 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
337 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
338 warn "## subfield count = ", dump( $u ), $/;
341 my $len = $#sorted_marc_record;
346 foreach ( 0 .. $len ) {
348 # find next element which isn't visited
349 while ($visited->{$i}) {
350 $i = ($i + 1) % ($len + 1);
356 my $row = dclone( $sorted_marc_record[$i] );
358 # field and subfield which is key for
359 # marc_repeatable_subfield and u
360 my $fsf = $row->[0] . ( $row->[3] || '' );
364 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
365 print "### this [$i]: ", dump( $row ),$/;
366 print "### sf: ", $row->[3], " vs ", $field->[3],
367 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
373 if ( $#$field >= 0 ) {
375 $row->[0] ne $field->[0] || # field
376 $row->[1] ne $field->[1] || # i1
377 $row->[2] ne $field->[2] # i2
380 warn "## saved/1 ", dump( $field ),$/ if ($debug);
384 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
386 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
387 ! $marc_repeatable_subfield->{ $fsf }
391 warn "## saved/2 ", dump( $field ),$/ if ($debug);
395 # append new subfields to existing field
396 push @$field, ( $row->[3], $row->[4] );
403 if (! $marc_repeatable_subfield->{ $fsf }) {
404 # make step to next subfield
405 $i = ($i + $u->{ $fsf } ) % ($len + 1);
411 warn "## saved/3 ", dump( $field ),$/ if ($debug);
417 =head2 _get_marc_leader
419 Return leader from currently fetched record by L</_get_marc_fields>
421 print WebPAC::Normalize::_get_marc_leader();
425 sub _get_marc_leader {
426 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
427 return $marc_leader->[ $fetch_pos ];
432 Change level of debug warnings
440 return $debug unless defined($l);
441 warn "debug level $l",$/ if ($l > 0);
445 =head1 Functions to create C<data_structure>
447 Those functions generally have to first in your normalization file.
449 =head2 search_display
451 Define output for L<search> and L<display> at the same time
453 search_display('Title', rec('200','a') );
459 my $name = shift or die "search_display needs name as first argument";
460 my @o = grep { defined($_) && $_ ne '' } @_;
462 $out->{$name}->{search} = \@o;
463 $out->{$name}->{display} = \@o;
468 Old name for L<search_display>, but supported
473 search_display( @_ );
478 Define output just for I<display>
480 @v = display('Title', rec('200','a') );
485 my $type = shift or confess "need type -- BUG?";
486 my $name = shift or confess "needs name as first argument";
487 my @o = grep { defined($_) && $_ ne '' } @_;
489 $out->{$name}->{$type} = \@o;
492 sub display { _field( 'display', @_ ) }
496 Prepare values just for I<search>
498 @v = search('Title', rec('200','a') );
502 sub search { _field( 'search', @_ ) }
506 Insert into lists which will be automatically sorted
508 sorted('Title', rec('200','a') );
512 sub sorted { _field( 'sorted', @_ ) }
517 Setup fields within MARC leader or get leader
519 marc_leader('05','c');
520 my $leader = marc_leader();
525 my ($offset,$value) = @_;
528 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
531 if (defined($marc_leader)) {
532 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
533 return $marc_leader->[ $marc_record_offset ];
542 Create control/indentifier fields with values in fixed positions
544 marc_fixed('008', 00, '070402');
545 marc_fixed('008', 39, '|');
547 Positions not specified will be filled with spaces (C<0x20>).
549 There will be no effort to extend last specified value to full length of
555 my ($f, $pos, $val) = @_;
556 die "need marc(field, position, value)" unless defined($f) && defined($pos);
558 confess "need val" unless defined $val;
565 if (length($old) <= $pos) {
566 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
567 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
569 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
570 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
574 } @{ $marc_record->[ $marc_record_offset ] };
577 my $v = ' ' x $pos . $val;
578 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
579 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
585 Save value for MARC field
587 marc('900','a', rec('200','a') );
588 marc('001', rec('000') );
593 my $f = shift or die "marc needs field";
594 die "marc field must be numer" unless ($f =~ /^\d+$/);
598 $sf = shift or die "marc needs subfield";
602 my $v = $_; # make var read-write for Encode
603 next unless (defined($v) && $v !~ /^\s*$/);
604 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
606 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
608 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
613 =head2 marc_repeatable_subfield
615 Save values for MARC repetable subfield
617 marc_repeatable_subfield('910', 'z', rec('909') );
621 sub marc_repeatable_subfield {
623 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
624 $marc_repeatable_subfield->{ $f . $sf }++;
628 =head2 marc_indicators
630 Set both indicators for MARC field
632 marc_indicators('900', ' ', 1);
634 Any indicator value other than C<0-9> will be treated as undefined.
638 sub marc_indicators {
639 my $f = shift || die "marc_indicators need field!\n";
641 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
642 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
644 $i1 = ' ' if ($i1 !~ /^\d$/);
645 $i2 = ' ' if ($i2 !~ /^\d$/);
646 @{ $marc_indicators->{$f} } = ($i1,$i2);
651 Save values for each MARC subfield explicitly
660 If you specify C<+> for subfield, value will be appended
661 to previous defined subfield.
666 my $f = shift or die "marc_compose needs field";
667 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
669 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
670 my $m = [ $f, $i1, $i2 ];
672 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
675 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
682 next unless (defined($v) && $v !~ /^\s*$/);
683 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
685 push @$m, ( $sf, $v );
691 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
693 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
696 =head2 marc_duplicate
698 Generate copy of current MARC record and continue working on copy
702 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
703 C<< _get_marc_fields( offset => 42 ) >>.
708 my $m = $marc_record->[ -1 ];
709 die "can't duplicate record which isn't defined" unless ($m);
710 push @{ $marc_record }, dclone( $m );
711 push @{ $marc_leader }, dclone( marc_leader() );
712 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
713 $marc_record_offset = $#{ $marc_record };
714 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
720 Remove some field or subfield from MARC record.
723 marc_remove('200','a');
725 This will erase field C<200> or C<200^a> from current MARC record.
729 Will remove all fields in current MARC record.
731 This is useful after calling C<marc_duplicate> or on it's own (but, you
732 should probably just remove that subfield definition if you are not
733 using C<marc_duplicate>).
735 FIXME: support fields < 10.
742 die "marc_remove needs record number" unless defined($f);
744 my $marc = $marc_record->[ $marc_record_offset ];
746 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
750 delete( $marc_record->[ $marc_record_offset ] );
751 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
756 foreach ( 0 .. $#{ $marc } ) {
757 last unless (defined $marc->[$i]);
758 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
759 if ($marc->[$i]->[0] eq $f) {
762 splice @$marc, $i, 1;
763 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
766 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
767 my $o = ($j * 2) + 3;
768 if ($marc->[$i]->[$o] eq $sf) {
770 splice @{$marc->[$i]}, $o, 2;
771 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
772 # is record now empty?
773 if ($#{ $marc->[$i] } == 2) {
774 splice @$marc, $i, 1;
775 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
785 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
787 $marc_record->[ $marc_record_offset ] = $marc;
790 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
793 =head2 marc_original_order
795 Copy all subfields preserving original order to marc field.
797 marc_original_order( marc_field_number, original_input_field_number );
799 Please note that field numbers are consistent with other commands (marc
800 field number first), but somewhat counter-intuitive (destination and then
803 You might want to use this command if you are just renaming subfields or
804 using pre-processing modify_record in C<config.yml> and don't need any
805 post-processing or want to preserve order of original subfields.
810 sub marc_original_order {
812 my ($to, $from) = @_;
813 die "marc_original_order needs from and to fields\n" unless ($from && $to);
815 return unless defined($rec->{$from});
817 my $r = $rec->{$from};
818 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
820 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
821 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
823 foreach my $d (@$r) {
825 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
826 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
830 my @sfs = @{ $d->{subfields} };
832 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
834 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
836 my $m = [ $to, $i1, $i2 ];
838 while (my $sf = shift @sfs) {
840 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
841 my $offset = shift @sfs;
842 die "corrupted sufields specification for field $from\n" unless defined($offset);
845 if (ref($d->{$sf}) eq 'ARRAY') {
846 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
847 } elsif ($offset == 0) {
850 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
852 push @$m, ( $sf, $v ) if (defined($v));
856 push @{ $marc_record->[ $marc_record_offset ] }, $m;
860 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
865 Return number of MARC records created using L</marc_duplicate>.
867 print "created ", marc_count(), " records";
872 return $#{ $marc_record };
876 =head1 Functions to extract data from input
878 This function should be used inside functions to create C<data_structure> described
881 =head2 _pack_subfields_hash
883 @subfields = _pack_subfields_hash( $h );
884 $subfields = _pack_subfields_hash( $h, 1 );
886 Return each subfield value in array or pack them all together and return scalar
887 with subfields (denoted by C<^>) and values.
891 sub _pack_subfields_hash {
893 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
895 my ($h,$include_subfields) = @_;
897 # sanity and ease of use
898 return $h if (ref($h) ne 'HASH');
900 if ( defined($h->{subfields}) ) {
901 my $sfs = delete $h->{subfields} || die "no subfields?";
904 my $sf = shift @$sfs;
905 push @out, '^' . $sf if ($include_subfields);
907 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
908 # single element subfields are not arrays
909 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
911 push @out, $h->{$sf};
913 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
914 push @out, $h->{$sf}->[$o];
917 if ($include_subfields) {
918 return join('', @out);
923 if ($include_subfields) {
925 foreach my $sf (sort keys %$h) {
926 if (ref($h->{$sf}) eq 'ARRAY') {
927 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
929 $out .= '^' . $sf . $h->{$sf};
934 # FIXME this should probably be in alphabetical order instead of hash order
942 Return all values in some field
946 TODO: order of values is probably same as in source data, need to investigate that
952 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
953 return unless (defined($rec) && defined($rec->{$f}));
954 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
955 if (ref($rec->{$f}) eq 'ARRAY') {
957 foreach my $h ( @{ $rec->{$f} } ) {
958 if (ref($h) eq 'HASH') {
959 push @out, ( _pack_subfields_hash( $h ) );
965 } elsif( defined($rec->{$f}) ) {
972 Return all values in specific field and subfield
980 return unless (defined($rec && $rec->{$f}));
982 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
984 if (ref($_->{$sf}) eq 'ARRAY') {
989 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
999 If rec() returns just single value, it will
1000 return scalar, not array.
1006 warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1014 } elsif ($#_ == 1) {
1017 if ($#out == 0 && ! wantarray) {
1028 Apply regex to some or all values
1030 @v = regex( 's/foo/bar/g', @v );
1037 #warn "r: $r\n", dump(\@_);
1038 foreach my $t (@_) {
1041 push @out, $t if ($t && $t ne '');
1048 Prefix all values with a string
1050 @v = prefix( 'my_', @v );
1056 return @_ unless defined( $p );
1057 return map { $p . $_ } grep { defined($_) } @_;
1062 suffix all values with a string
1064 @v = suffix( '_my', @v );
1070 return @_ unless defined( $s );
1071 return map { $_ . $s } grep { defined($_) } @_;
1076 surround all values with a two strings
1078 @v = surround( 'prefix_', '_suffix', @v );
1085 $p = '' unless defined( $p );
1086 $s = '' unless defined( $s );
1087 return map { $p . $_ . $s } grep { defined($_) } @_;
1092 Return first element
1105 Consult lookup hashes for some value
1109 'ffkk/peri/mfn'.rec('000')
1111 'ffkk','peri','200-a-200-e',
1113 first(rec(200,'a')).' '.first(rec('200','e'))
1117 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1118 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1121 # which results to return from record recorded in lookup
1122 sub { 'ffkk/peri/mfn' . rec('000') },
1123 # from which database and input
1125 # such that following values match
1126 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1127 # if this part is missing, we will try to match same fields
1128 # from lookup record and current one, or you can override
1129 # which records to use from current record using
1130 sub { rec('900','x') . ' ' . rec('900','y') },
1133 You can think about this lookup as SQL (if that helps):
1140 sub { filter from lookuped record }
1142 sub { optional filter on current record }
1149 my ($what, $database, $input, $key, $having) = @_;
1151 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1153 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1154 return unless (defined($lookup->{$database}->{$input}->{$key}));
1156 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1159 my @having = $having->();
1161 warn "## having = ", dump( @having ) if ($debug > 2);
1163 foreach my $h ( @having ) {
1164 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1165 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1166 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1170 return unless ($mfns);
1172 my @mfns = sort keys %$mfns;
1174 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1179 foreach my $mfn (@mfns) {
1180 $rec = $load_row_coderef->( $database, $input, $mfn );
1182 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1184 my @vals = $what->();
1186 push @out, ( @vals );
1188 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1191 # if (ref($lookup->{$k}) eq 'ARRAY') {
1192 # return @{ $lookup->{$k} };
1194 # return $lookup->{$k};
1199 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1208 =head2 save_into_lookup
1210 Save value into lookup. It associates current database, input
1211 and specific keys with one or more values which will be
1212 associated over MFN.
1214 MFN will be extracted from first occurence current of field 000
1215 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1217 my $nr = save_into_lookup($database,$input,$key,sub {
1218 # code which produce one or more values
1221 It returns number of items saved.
1223 This function shouldn't be called directly, it's called from code created by
1228 sub save_into_lookup {
1229 my ($database,$input,$key,$coderef) = @_;
1230 die "save_into_lookup needs database" unless defined($database);
1231 die "save_into_lookup needs input" unless defined($input);
1232 die "save_into_lookup needs key" unless defined($key);
1233 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1235 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1238 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1239 defined($config->{_mfn}) ? $config->{_mfn} :
1240 die "mfn not defined or zero";
1244 foreach my $v ( $coderef->() ) {
1245 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1246 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1255 Consult config values stored in C<config.yml>
1257 # return database code (key under databases in yaml)
1258 $database_code = config(); # use _ from hash
1259 $database_name = config('name');
1260 $database_input_name = config('input name');
1262 Up to three levels are supported.
1267 return unless ($config);
1275 warn "### getting config($p)\n" if ($debug > 1);
1277 my @p = split(/\s+/,$p);
1279 $v = $config->{ '_' }; # special, database code
1282 my $c = dclone( $config );
1284 foreach my $k (@p) {
1285 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1286 if (ref($c) eq 'ARRAY') {
1288 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1292 if (! defined($c->{$k}) ) {
1303 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1304 warn "config( '$p' ) is empty\n" if (! $v);
1311 Returns unique id of this record
1315 Returns C<42/2> for 2nd occurence of MFN 42.
1320 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1321 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1326 Joins walues with some delimiter
1328 $v = join_with(", ", @v);
1334 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1335 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1336 return '' unless defined($v);
1342 Split record subfield on some regex and take one of parts out
1344 $a_before_semi_column =
1345 split_rec_on('200','a', /\s*;\s*/, $part);
1347 C<$part> is optional number of element. First element is
1350 If there is no C<$part> parameter or C<$part> is 0, this function will
1351 return all values produced by splitting.
1356 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1358 my ($fld, $sf, $regex, $part) = @_;
1359 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1361 my @r = rec( $fld, $sf );
1363 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1365 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1367 my @s = split( $regex, $v );
1368 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1369 if ($part && $part > 0) {
1370 return $s[ $part - 1 ];
1380 set( key => 'value' );
1386 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1397 my $k = shift || return;
1398 my $v = $hash->{$k};
1399 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1405 if ( count( @result ) == 1 ) {
1406 # do something if only 1 result is there
1412 warn "## count ",dump(@_),$/ if ( $debug );