1 package WebPAC::Normalize;
10 search_display search display sorted
12 marc marc_indicators marc_repeatable_subfield
13 marc_compose marc_leader marc_fixed
14 marc_duplicate marc_remove marc_count
19 regex prefix suffix surround
20 first lookup join_with
33 #use base qw/WebPAC::Common/;
34 use Data::Dump qw/dump/;
35 use Storable qw/dclone/;
41 use WebPAC::Normalize::ISBN;
42 push @EXPORT, ( 'isbn_10', 'isbn_13' );
46 WebPAC::Normalize - describe normalisaton rules using sets
50 our $VERSION = '0.31';
54 This module uses C<conf/normalize/*.pl> files to perform normalisation
55 from input records using perl functions which are specialized for set
58 Sets are implemented as arrays, and normalisation file is valid perl, which
59 means that you check it's validity before running WebPAC using
60 C<perl -c normalize.pl>.
62 Normalisation can generate multiple output normalized data. For now, supported output
63 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
68 Functions which start with C<_> are private and used by WebPAC internally.
69 All other functions are available for use within normalisation rules.
75 my $ds = WebPAC::Normalize::data_structure(
76 lookup => $lookup_hash,
78 rules => $normalize_pl_config,
79 marc_encoding => 'utf-8',
81 load_row_coderef => sub {
82 my ($database,$input,$mfn) = @_;
83 $store->load_row( database => $database, input => $input, id => $mfn );
87 Options C<row>, C<rules> and C<log> are mandatory while all
90 C<load_row_coderef> is closure only used when executing lookups, so they will
91 die if it's not defined.
93 This function will B<die> if normalizastion can't be evaled.
95 Since this function isn't exported you have to call it with
96 C<WebPAC::Normalize::data_structure>.
100 my $load_row_coderef;
105 die "need row argument" unless ($arg->{row});
106 die "need normalisation argument" unless ($arg->{rules});
109 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
110 _set_rec( $arg->{row} );
111 _set_config( $arg->{config} ) if defined($arg->{config});
112 _clean_ds( %{ $arg } );
113 $load_row_coderef = $arg->{load_row_coderef};
115 eval "$arg->{rules}";
116 die "error evaling $arg->{rules}: $@\n" if ($@);
123 Set current record hash
132 $rec = shift or die "no record hash";
137 Set current config hash
139 _set_config( $config );
147 Code of current database
165 Return hash formatted as data structure
171 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
172 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
175 #warn "## out = ",dump($out);
181 Clean data structure hash for next record
189 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
190 ($marc_record_offset, $marc_fetch_offset) = (0,0);
191 $marc_encoding = $a->{marc_encoding};
196 Set current lookup hash
198 _set_lookup( $lookup );
210 Get current lookup hash
212 my $lookup = _get_lookup();
222 Setup code reference which will return L<data_structure> from
226 my ($database,$input,$mfn) = @_;
227 $store->load_row( database => $database, input => $input, id => $mfn );
234 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
236 $load_row_coderef = $coderef;
239 =head2 _get_marc_fields
241 Get all fields defined by calls to C<marc>
243 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
245 We are using I<magic> which detect repeatable fields only from
246 sequence of field/subfield data generated by normalization.
248 Repeatable field is created when there is second occurence of same subfield or
249 if any of indicators are different.
251 This is sane for most cases. Something like:
257 will be created from any combination of:
259 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
263 marc('900','a', rec('200','a') );
264 marc('900','b', rec('200','b') );
265 marc('900','c', rec('200','c') );
267 which might not be what you have in mind. If you need repeatable subfield,
268 define it using C<marc_repeatable_subfield> like this:
270 marc_repeatable_subfield('900','a');
271 marc('900','a', rec('200','a') );
272 marc('900','b', rec('200','b') );
273 marc('900','c', rec('200','c') );
277 900a-1 900a-2 900a-3 900b-1 900c-1
280 There is also support for returning next or specific using:
282 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
283 # do something with $mf
286 will always return fields from next MARC record or
288 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
290 will return 42th copy record (if it exists).
296 sub _get_marc_fields {
299 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
300 $fetch_pos = $marc_fetch_offset;
301 if ($arg->{offset}) {
302 $fetch_pos = $arg->{offset};
303 } elsif($arg->{fetch_next}) {
304 $marc_fetch_offset++;
307 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
309 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
311 my $marc_rec = $marc_record->[ $fetch_pos ];
313 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
315 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
317 # first, sort all existing fields
318 # XXX might not be needed, but modern perl might randomize elements in hash
319 my @sorted_marc_record = sort {
320 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
323 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
328 # count unique field-subfields (used for offset when walking to next subfield)
330 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
333 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
334 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
335 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
336 warn "## subfield count = ", dump( $u ), $/;
339 my $len = $#sorted_marc_record;
344 foreach ( 0 .. $len ) {
346 # find next element which isn't visited
347 while ($visited->{$i}) {
348 $i = ($i + 1) % ($len + 1);
354 my $row = dclone( $sorted_marc_record[$i] );
356 # field and subfield which is key for
357 # marc_repeatable_subfield and u
358 my $fsf = $row->[0] . ( $row->[3] || '' );
362 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
363 print "### this [$i]: ", dump( $row ),$/;
364 print "### sf: ", $row->[3], " vs ", $field->[3],
365 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
371 if ( $#$field >= 0 ) {
373 $row->[0] ne $field->[0] || # field
374 $row->[1] ne $field->[1] || # i1
375 $row->[2] ne $field->[2] # i2
378 warn "## saved/1 ", dump( $field ),$/ if ($debug);
382 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
384 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
385 ! $marc_repeatable_subfield->{ $fsf }
389 warn "## saved/2 ", dump( $field ),$/ if ($debug);
393 # append new subfields to existing field
394 push @$field, ( $row->[3], $row->[4] );
401 if (! $marc_repeatable_subfield->{ $fsf }) {
402 # make step to next subfield
403 $i = ($i + $u->{ $fsf } ) % ($len + 1);
409 warn "## saved/3 ", dump( $field ),$/ if ($debug);
415 =head2 _get_marc_leader
417 Return leader from currently fetched record by L</_get_marc_fields>
419 print WebPAC::Normalize::_get_marc_leader();
423 sub _get_marc_leader {
424 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
425 return $marc_leader->[ $fetch_pos ];
430 Change level of debug warnings
438 return $debug unless defined($l);
439 warn "debug level $l",$/ if ($l > 0);
443 =head1 Functions to create C<data_structure>
445 Those functions generally have to first in your normalization file.
447 =head2 search_display
449 Define output for L<search> and L<display> at the same time
451 search_display('Title', rec('200','a') );
457 my $name = shift or die "search_display needs name as first argument";
458 my @o = grep { defined($_) && $_ ne '' } @_;
460 $out->{$name}->{search} = \@o;
461 $out->{$name}->{display} = \@o;
466 Old name for L<search_display>, but supported
471 search_display( @_ );
476 Define output just for I<display>
478 @v = display('Title', rec('200','a') );
483 my $type = shift or confess "need type -- BUG?";
484 my $name = shift or confess "needs name as first argument";
485 my @o = grep { defined($_) && $_ ne '' } @_;
487 $out->{$name}->{$type} = \@o;
490 sub display { _field( 'display', @_ ) }
494 Prepare values just for I<search>
496 @v = search('Title', rec('200','a') );
500 sub search { _field( 'search', @_ ) }
504 Insert into lists which will be automatically sorted
506 sorted('Title', rec('200','a') );
510 sub sorted { _field( 'sorted', @_ ) }
515 Setup fields within MARC leader or get leader
517 marc_leader('05','c');
518 my $leader = marc_leader();
523 my ($offset,$value) = @_;
526 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
529 if (defined($marc_leader)) {
530 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
531 return $marc_leader->[ $marc_record_offset ];
540 Create control/indentifier fields with values in fixed positions
542 marc_fixed('008', 00, '070402');
543 marc_fixed('008', 39, '|');
545 Positions not specified will be filled with spaces (C<0x20>).
547 There will be no effort to extend last specified value to full length of
553 my ($f, $pos, $val) = @_;
554 die "need marc(field, position, value)" unless defined($f) && defined($pos);
556 confess "need val" unless defined $val;
563 if (length($old) <= $pos) {
564 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
565 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
567 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
568 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
572 } @{ $marc_record->[ $marc_record_offset ] };
575 my $v = ' ' x $pos . $val;
576 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
577 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
583 Save value for MARC field
585 marc('900','a', rec('200','a') );
586 marc('001', rec('000') );
591 my $f = shift or die "marc needs field";
592 die "marc field must be numer" unless ($f =~ /^\d+$/);
596 $sf = shift or die "marc needs subfield";
600 my $v = $_; # make var read-write for Encode
601 next unless (defined($v) && $v !~ /^\s*$/);
602 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
604 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
606 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
611 =head2 marc_repeatable_subfield
613 Save values for MARC repetable subfield
615 marc_repeatable_subfield('910', 'z', rec('909') );
619 sub marc_repeatable_subfield {
621 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
622 $marc_repeatable_subfield->{ $f . $sf }++;
626 =head2 marc_indicators
628 Set both indicators for MARC field
630 marc_indicators('900', ' ', 1);
632 Any indicator value other than C<0-9> will be treated as undefined.
636 sub marc_indicators {
637 my $f = shift || die "marc_indicators need field!\n";
639 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
640 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
642 $i1 = ' ' if ($i1 !~ /^\d$/);
643 $i2 = ' ' if ($i2 !~ /^\d$/);
644 @{ $marc_indicators->{$f} } = ($i1,$i2);
649 Save values for each MARC subfield explicitly
658 If you specify C<+> for subfield, value will be appended
659 to previous defined subfield.
664 my $f = shift or die "marc_compose needs field";
665 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
667 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
668 my $m = [ $f, $i1, $i2 ];
670 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
673 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
680 next unless (defined($v) && $v !~ /^\s*$/);
681 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
683 push @$m, ( $sf, $v );
689 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
691 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
694 =head2 marc_duplicate
696 Generate copy of current MARC record and continue working on copy
700 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
701 C<< _get_marc_fields( offset => 42 ) >>.
706 my $m = $marc_record->[ -1 ];
707 die "can't duplicate record which isn't defined" unless ($m);
708 push @{ $marc_record }, dclone( $m );
709 push @{ $marc_leader }, dclone( marc_leader() );
710 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
711 $marc_record_offset = $#{ $marc_record };
712 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
718 Remove some field or subfield from MARC record.
721 marc_remove('200','a');
723 This will erase field C<200> or C<200^a> from current MARC record.
727 Will remove all fields in current MARC record.
729 This is useful after calling C<marc_duplicate> or on it's own (but, you
730 should probably just remove that subfield definition if you are not
731 using C<marc_duplicate>).
733 FIXME: support fields < 10.
740 die "marc_remove needs record number" unless defined($f);
742 my $marc = $marc_record->[ $marc_record_offset ];
744 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
748 delete( $marc_record->[ $marc_record_offset ] );
749 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
754 foreach ( 0 .. $#{ $marc } ) {
755 last unless (defined $marc->[$i]);
756 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
757 if ($marc->[$i]->[0] eq $f) {
760 splice @$marc, $i, 1;
761 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
764 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
765 my $o = ($j * 2) + 3;
766 if ($marc->[$i]->[$o] eq $sf) {
768 splice @{$marc->[$i]}, $o, 2;
769 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
770 # is record now empty?
771 if ($#{ $marc->[$i] } == 2) {
772 splice @$marc, $i, 1;
773 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
783 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
785 $marc_record->[ $marc_record_offset ] = $marc;
788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
791 =head2 marc_original_order
793 Copy all subfields preserving original order to marc field.
795 marc_original_order( marc_field_number, original_input_field_number );
797 Please note that field numbers are consistent with other commands (marc
798 field number first), but somewhat counter-intuitive (destination and then
801 You might want to use this command if you are just renaming subfields or
802 using pre-processing modify_record in C<config.yml> and don't need any
803 post-processing or want to preserve order of original subfields.
808 sub marc_original_order {
810 my ($to, $from) = @_;
811 die "marc_original_order needs from and to fields\n" unless ($from && $to);
813 return unless defined($rec->{$from});
815 my $r = $rec->{$from};
816 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
818 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
819 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
821 foreach my $d (@$r) {
823 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
824 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
828 my @sfs = @{ $d->{subfields} };
830 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
832 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
834 my $m = [ $to, $i1, $i2 ];
836 while (my $sf = shift @sfs) {
838 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
839 my $offset = shift @sfs;
840 die "corrupted sufields specification for field $from\n" unless defined($offset);
843 if (ref($d->{$sf}) eq 'ARRAY') {
844 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
845 } elsif ($offset == 0) {
848 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
850 push @$m, ( $sf, $v ) if (defined($v));
854 push @{ $marc_record->[ $marc_record_offset ] }, $m;
858 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
863 Return number of MARC records created using L</marc_duplicate>.
865 print "created ", marc_count(), " records";
870 return $#{ $marc_record };
874 =head1 Functions to extract data from input
876 This function should be used inside functions to create C<data_structure> described
879 =head2 _pack_subfields_hash
881 @subfields = _pack_subfields_hash( $h );
882 $subfields = _pack_subfields_hash( $h, 1 );
884 Return each subfield value in array or pack them all together and return scalar
885 with subfields (denoted by C<^>) and values.
889 sub _pack_subfields_hash {
891 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
893 my ($h,$include_subfields) = @_;
895 # sanity and ease of use
896 return $h if (ref($h) ne 'HASH');
898 if ( defined($h->{subfields}) ) {
899 my $sfs = delete $h->{subfields} || die "no subfields?";
902 my $sf = shift @$sfs;
903 push @out, '^' . $sf if ($include_subfields);
905 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
906 # single element subfields are not arrays
907 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
909 push @out, $h->{$sf};
911 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
912 push @out, $h->{$sf}->[$o];
915 if ($include_subfields) {
916 return join('', @out);
921 if ($include_subfields) {
923 foreach my $sf (sort keys %$h) {
924 if (ref($h->{$sf}) eq 'ARRAY') {
925 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
927 $out .= '^' . $sf . $h->{$sf};
932 # FIXME this should probably be in alphabetical order instead of hash order
940 Return all values in some field
944 TODO: order of values is probably same as in source data, need to investigate that
950 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
951 return unless (defined($rec) && defined($rec->{$f}));
952 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
953 if (ref($rec->{$f}) eq 'ARRAY') {
955 foreach my $h ( @{ $rec->{$f} } ) {
956 if (ref($h) eq 'HASH') {
957 push @out, ( _pack_subfields_hash( $h ) );
963 } elsif( defined($rec->{$f}) ) {
970 Return all values in specific field and subfield
978 return unless (defined($rec && $rec->{$f}));
980 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
982 if (ref($_->{$sf}) eq 'ARRAY') {
987 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
997 If rec() returns just single value, it will
998 return scalar, not array.
1004 warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1012 } elsif ($#_ == 1) {
1015 if ($#out == 0 && ! wantarray) {
1026 Apply regex to some or all values
1028 @v = regex( 's/foo/bar/g', @v );
1035 #warn "r: $r\n", dump(\@_);
1036 foreach my $t (@_) {
1039 push @out, $t if ($t && $t ne '');
1046 Prefix all values with a string
1048 @v = prefix( 'my_', @v );
1054 return @_ unless defined( $p );
1055 return map { $p . $_ } grep { defined($_) } @_;
1060 suffix all values with a string
1062 @v = suffix( '_my', @v );
1068 return @_ unless defined( $s );
1069 return map { $_ . $s } grep { defined($_) } @_;
1074 surround all values with a two strings
1076 @v = surround( 'prefix_', '_suffix', @v );
1083 $p = '' unless defined( $p );
1084 $s = '' unless defined( $s );
1085 return map { $p . $_ . $s } grep { defined($_) } @_;
1090 Return first element
1103 Consult lookup hashes for some value
1107 'ffkk/peri/mfn'.rec('000')
1109 'ffkk','peri','200-a-200-e',
1111 first(rec(200,'a')).' '.first(rec('200','e'))
1115 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1116 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1119 # which results to return from record recorded in lookup
1120 sub { 'ffkk/peri/mfn' . rec('000') },
1121 # from which database and input
1123 # such that following values match
1124 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1125 # if this part is missing, we will try to match same fields
1126 # from lookup record and current one, or you can override
1127 # which records to use from current record using
1128 sub { rec('900','x') . ' ' . rec('900','y') },
1131 You can think about this lookup as SQL (if that helps):
1138 sub { filter from lookuped record }
1140 sub { optional filter on current record }
1147 my ($what, $database, $input, $key, $having) = @_;
1149 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1151 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1152 return unless (defined($lookup->{$database}->{$input}->{$key}));
1154 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1157 my @having = $having->();
1159 warn "## having = ", dump( @having ) if ($debug > 2);
1161 foreach my $h ( @having ) {
1162 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1163 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1164 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1168 return unless ($mfns);
1170 my @mfns = sort keys %$mfns;
1172 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1177 foreach my $mfn (@mfns) {
1178 $rec = $load_row_coderef->( $database, $input, $mfn );
1180 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1182 my @vals = $what->();
1184 push @out, ( @vals );
1186 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1189 # if (ref($lookup->{$k}) eq 'ARRAY') {
1190 # return @{ $lookup->{$k} };
1192 # return $lookup->{$k};
1197 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1206 =head2 save_into_lookup
1208 Save value into lookup. It associates current database, input
1209 and specific keys with one or more values which will be
1210 associated over MFN.
1212 MFN will be extracted from first occurence current of field 000
1213 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1215 my $nr = save_into_lookup($database,$input,$key,sub {
1216 # code which produce one or more values
1219 It returns number of items saved.
1221 This function shouldn't be called directly, it's called from code created by
1226 sub save_into_lookup {
1227 my ($database,$input,$key,$coderef) = @_;
1228 die "save_into_lookup needs database" unless defined($database);
1229 die "save_into_lookup needs input" unless defined($input);
1230 die "save_into_lookup needs key" unless defined($key);
1231 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1233 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1236 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1237 defined($config->{_mfn}) ? $config->{_mfn} :
1238 die "mfn not defined or zero";
1242 foreach my $v ( $coderef->() ) {
1243 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1244 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1253 Consult config values stored in C<config.yml>
1255 # return database code (key under databases in yaml)
1256 $database_code = config(); # use _ from hash
1257 $database_name = config('name');
1258 $database_input_name = config('input name');
1260 Up to three levels are supported.
1265 return unless ($config);
1273 warn "### getting config($p)\n" if ($debug > 1);
1275 my @p = split(/\s+/,$p);
1277 $v = $config->{ '_' }; # special, database code
1280 my $c = dclone( $config );
1282 foreach my $k (@p) {
1283 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1284 if (ref($c) eq 'ARRAY') {
1286 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1290 if (! defined($c->{$k}) ) {
1301 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1302 warn "config( '$p' ) is empty\n" if (! $v);
1309 Returns unique id of this record
1313 Returns C<42/2> for 2nd occurence of MFN 42.
1318 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1319 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1324 Joins walues with some delimiter
1326 $v = join_with(", ", @v);
1332 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1333 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1334 return '' unless defined($v);
1340 Split record subfield on some regex and take one of parts out
1342 $a_before_semi_column =
1343 split_rec_on('200','a', /\s*;\s*/, $part);
1345 C<$part> is optional number of element. First element is
1348 If there is no C<$part> parameter or C<$part> is 0, this function will
1349 return all values produced by splitting.
1354 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1356 my ($fld, $sf, $regex, $part) = @_;
1357 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1359 my @r = rec( $fld, $sf );
1361 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1363 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1365 my @s = split( $regex, $v );
1366 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1367 if ($part && $part > 0) {
1368 return $s[ $part - 1 ];
1378 set( key => 'value' );
1384 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1395 my $k = shift || return;
1396 my $v = $hash->{$k};
1397 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1403 if ( count( @result ) == 1 ) {
1404 # do something if only 1 result is there
1410 warn "## count ",dump(@_),$/ if ( $debug );