1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader
13 marc_duplicate marc_remove marc_count
17 regex prefix suffix surround
18 first lookup join_with
30 #use base qw/WebPAC::Common/;
31 use Data::Dump qw/dump/;
32 use Storable qw/dclone/;
41 WebPAC::Normalize - describe normalisaton rules using sets
49 our $VERSION = '0.27';
53 This module uses C<conf/normalize/*.pl> files to perform normalisation
54 from input records using perl functions which are specialized for set
57 Sets are implemented as arrays, and normalisation file is valid perl, which
58 means that you check it's validity before running WebPAC using
59 C<perl -c normalize.pl>.
61 Normalisation can generate multiple output normalized data. For now, supported output
62 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
67 Functions which start with C<_> are private and used by WebPAC internally.
68 All other functions are available for use within normalisation rules.
74 my $ds = WebPAC::Normalize::data_structure(
75 lookup => $lookup_hash,
77 rules => $normalize_pl_config,
78 marc_encoding => 'utf-8',
80 load_row_coderef => sub {
81 my ($database,$input,$mfn) = shift;
82 $store->load_row( database => $database, input => $input, id => $mfn );
86 Options C<row>, C<rules> and C<log> are mandatory while all
89 C<load_row_coderef> is closure only used when executing lookups, so they will
90 die if it's not defined.
92 This function will B<die> if normalizastion can't be evaled.
94 Since this function isn't exported you have to call it with
95 C<WebPAC::Normalize::data_structure>.
104 die "need row argument" unless ($arg->{row});
105 die "need normalisation argument" unless ($arg->{rules});
108 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
109 _set_rec( $arg->{row} );
110 _set_config( $arg->{config} ) if defined($arg->{config});
111 _clean_ds( %{ $arg } );
112 $load_row_coderef = $arg->{load_row_coderef};
114 eval "$arg->{rules}";
115 die "error evaling $arg->{rules}: $@\n" if ($@);
122 Set current record hash
131 $rec = shift or die "no record hash";
136 Set current config hash
138 _set_config( $config );
146 Code of current database
164 Return hash formatted as data structure
170 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
171 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
179 Clean data structure hash for next record
187 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
188 ($marc_record_offset, $marc_fetch_offset) = (0,0);
189 $marc_encoding = $a->{marc_encoding};
194 Set current lookup hash
196 _set_lookup( $lookup );
208 Get current lookup hash
210 my $lookup = _get_lookup();
220 Setup code reference which will return L<data_structure> from
224 my ($database,$input,$mfn) = @_;
225 $store->load_row( database => $database, input => $input, id => $mfn );
232 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
234 $load_row_coderef = $coderef;
237 =head2 _get_marc_fields
239 Get all fields defined by calls to C<marc>
241 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
243 We are using I<magic> which detect repeatable fields only from
244 sequence of field/subfield data generated by normalization.
246 Repeatable field is created when there is second occurence of same subfield or
247 if any of indicators are different.
249 This is sane for most cases. Something like:
255 will be created from any combination of:
257 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
261 marc('900','a', rec('200','a') );
262 marc('900','b', rec('200','b') );
263 marc('900','c', rec('200','c') );
265 which might not be what you have in mind. If you need repeatable subfield,
266 define it using C<marc_repeatable_subfield> like this:
268 marc_repeatable_subfield('900','a');
269 marc('900','a', rec('200','a') );
270 marc('900','b', rec('200','b') );
271 marc('900','c', rec('200','c') );
275 900a-1 900a-2 900a-3 900b-1 900c-1
278 There is also support for returning next or specific using:
280 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
281 # do something with $mf
284 will always return fields from next MARC record or
286 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
288 will return 42th copy record (if it exists).
294 sub _get_marc_fields {
297 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
298 $fetch_pos = $marc_fetch_offset;
299 if ($arg->{offset}) {
300 $fetch_pos = $arg->{offset};
301 } elsif($arg->{fetch_next}) {
302 $marc_fetch_offset++;
305 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
307 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
309 my $marc_rec = $marc_record->[ $fetch_pos ];
311 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
313 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
315 # first, sort all existing fields
316 # XXX might not be needed, but modern perl might randomize elements in hash
317 my @sorted_marc_record = sort {
318 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
321 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
326 # count unique field-subfields (used for offset when walking to next subfield)
328 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
331 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
332 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
333 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
334 warn "## subfield count = ", dump( $u ), $/;
337 my $len = $#sorted_marc_record;
342 foreach ( 0 .. $len ) {
344 # find next element which isn't visited
345 while ($visited->{$i}) {
346 $i = ($i + 1) % ($len + 1);
352 my $row = dclone( $sorted_marc_record[$i] );
354 # field and subfield which is key for
355 # marc_repeatable_subfield and u
356 my $fsf = $row->[0] . ( $row->[3] || '' );
360 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
361 print "### this [$i]: ", dump( $row ),$/;
362 print "### sf: ", $row->[3], " vs ", $field->[3],
363 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
369 if ( $#$field >= 0 ) {
371 $row->[0] ne $field->[0] || # field
372 $row->[1] ne $field->[1] || # i1
373 $row->[2] ne $field->[2] # i2
376 warn "## saved/1 ", dump( $field ),$/ if ($debug);
380 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
382 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
383 ! $marc_repeatable_subfield->{ $fsf }
387 warn "## saved/2 ", dump( $field ),$/ if ($debug);
391 # append new subfields to existing field
392 push @$field, ( $row->[3], $row->[4] );
399 if (! $marc_repeatable_subfield->{ $fsf }) {
400 # make step to next subfield
401 $i = ($i + $u->{ $fsf } ) % ($len + 1);
407 warn "## saved/3 ", dump( $field ),$/ if ($debug);
413 =head2 _get_marc_leader
415 Return leader from currently fetched record by L</_get_marc_fields>
417 print WebPAC::Normalize::_get_marc_leader();
421 sub _get_marc_leader {
422 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
423 return $marc_leader->[ $fetch_pos ];
428 Change level of debug warnings
436 return $debug unless defined($l);
437 warn "debug level $l",$/ if ($l > 0);
441 =head1 Functions to create C<data_structure>
443 Those functions generally have to first in your normalization file.
447 Define new tag for I<search> and I<display>.
449 tag('Title', rec('200','a') );
455 my $name = shift or die "tag needs name as first argument";
456 my @o = grep { defined($_) && $_ ne '' } @_;
458 $out->{$name}->{tag} = $name;
459 $out->{$name}->{search} = \@o;
460 $out->{$name}->{display} = \@o;
465 Define tag just for I<display>
467 @v = display('Title', rec('200','a') );
472 my $name = shift or die "display needs name as first argument";
473 my @o = grep { defined($_) && $_ ne '' } @_;
475 $out->{$name}->{tag} = $name;
476 $out->{$name}->{display} = \@o;
481 Prepare values just for I<search>
483 @v = search('Title', rec('200','a') );
488 my $name = shift or die "search needs name as first argument";
489 my @o = grep { defined($_) && $_ ne '' } @_;
491 $out->{$name}->{tag} = $name;
492 $out->{$name}->{search} = \@o;
497 Setup fields within MARC leader or get leader
499 marc_leader('05','c');
500 my $leader = marc_leader();
505 my ($offset,$value) = @_;
508 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
511 if (defined($marc_leader)) {
512 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
513 return $marc_leader->[ $marc_record_offset ];
522 Save value for MARC field
524 marc('900','a', rec('200','a') );
525 marc('001', rec('000') );
530 my $f = shift or die "marc needs field";
531 die "marc field must be numer" unless ($f =~ /^\d+$/);
535 $sf = shift or die "marc needs subfield";
539 my $v = $_; # make var read-write for Encode
540 next unless (defined($v) && $v !~ /^\s*$/);
541 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
543 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
545 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
550 =head2 marc_repeatable_subfield
552 Save values for MARC repetable subfield
554 marc_repeatable_subfield('910', 'z', rec('909') );
558 sub marc_repeatable_subfield {
560 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
561 $marc_repeatable_subfield->{ $f . $sf }++;
565 =head2 marc_indicators
567 Set both indicators for MARC field
569 marc_indicators('900', ' ', 1);
571 Any indicator value other than C<0-9> will be treated as undefined.
575 sub marc_indicators {
576 my $f = shift || die "marc_indicators need field!\n";
578 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
579 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
581 $i1 = ' ' if ($i1 !~ /^\d$/);
582 $i2 = ' ' if ($i2 !~ /^\d$/);
583 @{ $marc_indicators->{$f} } = ($i1,$i2);
588 Save values for each MARC subfield explicitly
597 If you specify C<+> for subfield, value will be appended
598 to previous defined subfield.
603 my $f = shift or die "marc_compose needs field";
604 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
606 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
607 my $m = [ $f, $i1, $i2 ];
609 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
612 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
619 next unless (defined($v) && $v !~ /^\s*$/);
620 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
622 push @$m, ( $sf, $v );
628 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
630 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
633 =head2 marc_duplicate
635 Generate copy of current MARC record and continue working on copy
639 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
640 C<< _get_marc_fields( offset => 42 ) >>.
645 my $m = $marc_record->[ -1 ];
646 die "can't duplicate record which isn't defined" unless ($m);
647 push @{ $marc_record }, dclone( $m );
648 push @{ $marc_leader }, dclone( marc_leader() );
649 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
650 $marc_record_offset = $#{ $marc_record };
651 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
657 Remove some field or subfield from MARC record.
660 marc_remove('200','a');
662 This will erase field C<200> or C<200^a> from current MARC record.
666 Will remove all fields in current MARC record.
668 This is useful after calling C<marc_duplicate> or on it's own (but, you
669 should probably just remove that subfield definition if you are not
670 using C<marc_duplicate>).
672 FIXME: support fields < 10.
679 die "marc_remove needs record number" unless defined($f);
681 my $marc = $marc_record->[ $marc_record_offset ];
683 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
687 delete( $marc_record->[ $marc_record_offset ] );
688 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
693 foreach ( 0 .. $#{ $marc } ) {
694 last unless (defined $marc->[$i]);
695 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
696 if ($marc->[$i]->[0] eq $f) {
699 splice @$marc, $i, 1;
700 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
703 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
704 my $o = ($j * 2) + 3;
705 if ($marc->[$i]->[$o] eq $sf) {
707 splice @{$marc->[$i]}, $o, 2;
708 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
709 # is record now empty?
710 if ($#{ $marc->[$i] } == 2) {
711 splice @$marc, $i, 1;
712 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
722 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
724 $marc_record->[ $marc_record_offset ] = $marc;
727 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
730 =head2 marc_original_order
732 Copy all subfields preserving original order to marc field.
734 marc_original_order( marc_field_number, original_input_field_number );
736 Please note that field numbers are consistent with other commands (marc
737 field number first), but somewhat counter-intuitive (destination and then
740 You might want to use this command if you are just renaming subfields or
741 using pre-processing modify_record in C<config.yml> and don't need any
742 post-processing or want to preserve order of original subfields.
747 sub marc_original_order {
749 my ($to, $from) = @_;
750 die "marc_original_order needs from and to fields\n" unless ($from && $to);
752 return unless defined($rec->{$from});
754 my $r = $rec->{$from};
755 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
757 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
758 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
760 foreach my $d (@$r) {
762 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
763 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
767 my @sfs = @{ $d->{subfields} };
769 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
771 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
773 my $m = [ $to, $i1, $i2 ];
775 while (my $sf = shift @sfs) {
777 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
778 my $offset = shift @sfs;
779 die "corrupted sufields specification for field $from\n" unless defined($offset);
782 if (ref($d->{$sf}) eq 'ARRAY') {
783 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
784 } elsif ($offset == 0) {
787 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
789 push @$m, ( $sf, $v ) if (defined($v));
793 push @{ $marc_record->[ $marc_record_offset ] }, $m;
797 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
802 Return number of MARC records created using L</marc_duplicate>.
804 print "created ", marc_count(), " records";
809 return $#{ $marc_record };
813 =head1 Functions to extract data from input
815 This function should be used inside functions to create C<data_structure> described
818 =head2 _pack_subfields_hash
820 @subfields = _pack_subfields_hash( $h );
821 $subfields = _pack_subfields_hash( $h, 1 );
823 Return each subfield value in array or pack them all together and return scalar
824 with subfields (denoted by C<^>) and values.
828 sub _pack_subfields_hash {
830 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
832 my ($h,$include_subfields) = @_;
834 if ( defined($h->{subfields}) ) {
835 my $sfs = delete $h->{subfields} || die "no subfields?";
838 my $sf = shift @$sfs;
839 push @out, '^' . $sf if ($include_subfields);
841 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
842 # single element subfields are not arrays
843 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
845 push @out, $h->{$sf};
847 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
848 push @out, $h->{$sf}->[$o];
851 if ($include_subfields) {
852 return join('', @out);
857 if ($include_subfields) {
859 foreach my $sf (sort keys %$h) {
860 if (ref($h->{$sf}) eq 'ARRAY') {
861 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
863 $out .= '^' . $sf . $h->{$sf};
868 # FIXME this should probably be in alphabetical order instead of hash order
876 Return all values in some field
880 TODO: order of values is probably same as in source data, need to investigate that
886 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
887 return unless (defined($rec) && defined($rec->{$f}));
888 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
889 if (ref($rec->{$f}) eq 'ARRAY') {
891 foreach my $h ( @{ $rec->{$f} } ) {
892 if (ref($h) eq 'HASH') {
893 push @out, ( _pack_subfields_hash( $h ) );
899 } elsif( defined($rec->{$f}) ) {
906 Return all values in specific field and subfield
914 return unless (defined($rec && $rec->{$f}));
916 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
918 if (ref($_->{$sf}) eq 'ARRAY') {
923 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
933 If rec() returns just single value, it will
934 return scalar, not array.
945 if ($#out == 0 && ! wantarray) {
956 Apply regex to some or all values
958 @v = regex( 's/foo/bar/g', @v );
965 #warn "r: $r\n", dump(\@_);
969 push @out, $t if ($t && $t ne '');
976 Prefix all values with a string
978 @v = prefix( 'my_', @v );
983 my $p = shift or return;
984 return map { $p . $_ } grep { defined($_) } @_;
989 suffix all values with a string
991 @v = suffix( '_my', @v );
996 my $s = shift or die "suffix needs string as first argument";
997 return map { $_ . $s } grep { defined($_) } @_;
1002 surround all values with a two strings
1004 @v = surround( 'prefix_', '_suffix', @v );
1009 my $p = shift or die "surround need prefix as first argument";
1010 my $s = shift or die "surround needs suffix as second argument";
1011 return map { $p . $_ . $s } grep { defined($_) } @_;
1016 Return first element
1029 Consult lookup hashes for some value
1033 'ffkk/peri/mfn'.rec('000')
1035 'ffkk','peri','200-a-200-e',
1037 first(rec(200,'a')).' '.first(rec('200','e'))
1041 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1042 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1045 # which results to return from record recorded in lookup
1046 sub { 'ffkk/peri/mfn' . rec('000') },
1047 # from which database and input
1049 # such that following values match
1050 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1051 # if this part is missing, we will try to match same fields
1052 # from lookup record and current one, or you can override
1053 # which records to use from current record using
1054 sub { rec('900','x') . ' ' . rec('900','y') },
1057 You can think about this lookup as SQL (if that helps):
1064 sub { filter from lookuped record }
1066 sub { optional filter on current record }
1073 my ($what, $database, $input, $key, $having) = @_;
1075 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1077 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1078 return unless (defined($lookup->{$database}->{$input}->{$key}));
1080 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1083 my @having = $having->();
1085 warn "## having = ", dump( @having ) if ($debug > 2);
1087 foreach my $h ( @having ) {
1088 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1089 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1090 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1094 return unless ($mfns);
1096 my @mfns = sort keys %$mfns;
1098 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1103 foreach my $mfn (@mfns) {
1104 $rec = $load_row_coderef->( $database, $input, $mfn );
1106 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1108 my @vals = $what->();
1110 push @out, ( @vals );
1112 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1115 # if (ref($lookup->{$k}) eq 'ARRAY') {
1116 # return @{ $lookup->{$k} };
1118 # return $lookup->{$k};
1123 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1132 =head2 save_into_lookup
1134 Save value into lookup. It associates current database, input
1135 and specific keys with one or more values which will be
1136 associated over MFN.
1138 MFN will be extracted from first occurence current of field 000
1139 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1141 my $nr = save_into_lookup($database,$input,$key,sub {
1142 # code which produce one or more values
1145 It returns number of items saved.
1147 This function shouldn't be called directly, it's called from code created by
1152 sub save_into_lookup {
1153 my ($database,$input,$key,$coderef) = @_;
1154 die "save_into_lookup needs database" unless defined($database);
1155 die "save_into_lookup needs input" unless defined($input);
1156 die "save_into_lookup needs key" unless defined($key);
1157 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1159 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1162 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1163 defined($config->{_mfn}) ? $config->{_mfn} :
1164 die "mfn not defined or zero";
1168 foreach my $v ( $coderef->() ) {
1169 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1170 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1179 Consult config values stored in C<config.yml>
1181 # return database code (key under databases in yaml)
1182 $database_code = config(); # use _ from hash
1183 $database_name = config('name');
1184 $database_input_name = config('input name');
1185 $tag = config('input normalize tag');
1187 Up to three levels are supported.
1192 return unless ($config);
1200 warn "### getting config($p)\n" if ($debug > 1);
1202 my @p = split(/\s+/,$p);
1204 $v = $config->{ '_' }; # special, database code
1207 my $c = dclone( $config );
1209 foreach my $k (@p) {
1210 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1211 if (ref($c) eq 'ARRAY') {
1213 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1217 if (! defined($c->{$k}) ) {
1228 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1229 warn "config( '$p' ) is empty\n" if (! $v);
1236 Returns unique id of this record
1240 Returns C<42/2> for 2nd occurence of MFN 42.
1245 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1246 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1251 Joins walues with some delimiter
1253 $v = join_with(", ", @v);
1259 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1260 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1261 return '' unless defined($v);
1267 Split record subfield on some regex and take one of parts out
1269 $a_before_semi_column =
1270 split_rec_on('200','a', /\s*;\s*/, $part);
1272 C<$part> is optional number of element. First element is
1275 If there is no C<$part> parameter or C<$part> is 0, this function will
1276 return all values produced by splitting.
1281 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1283 my ($fld, $sf, $regex, $part) = @_;
1284 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1286 my @r = rec( $fld, $sf );
1288 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1290 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1292 my @s = split( $regex, $v );
1293 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1294 if ($part && $part > 0) {
1295 return $s[ $part - 1 ];
1305 set( key => 'value' );
1311 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1322 my $k = shift || return;
1323 my $v = $hash->{$k};
1324 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1330 if ( count( @result ) == 1 ) {
1331 # do something if only 1 result is there
1337 warn "## count ",dump(@_),$/ if ( $debug );