1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader
13 marc_duplicate marc_remove
17 regex prefix suffix surround
18 first lookup join_with
29 #use base qw/WebPAC::Common/;
30 use Data::Dump qw/dump/;
31 use Storable qw/dclone/;
40 WebPAC::Normalize - describe normalisaton rules using sets
48 our $VERSION = '0.25';
52 This module uses C<conf/normalize/*.pl> files to perform normalisation
53 from input records using perl functions which are specialized for set
56 Sets are implemented as arrays, and normalisation file is valid perl, which
57 means that you check it's validity before running WebPAC using
58 C<perl -c normalize.pl>.
60 Normalisation can generate multiple output normalized data. For now, supported output
61 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
66 Functions which start with C<_> are private and used by WebPAC internally.
67 All other functions are available for use within normalisation rules.
73 my $ds = WebPAC::Normalize::data_structure(
74 lookup => $lookup_hash,
76 rules => $normalize_pl_config,
77 marc_encoding => 'utf-8',
79 load_row_coderef => sub {
80 my ($database,$input,$mfn) = shift;
81 $store->load_row( database => $database, input => $input, id => $mfn );
85 Options C<row>, C<rules> and C<log> are mandatory while all
88 C<load_row_coderef> is closure only used when executing lookups, so they will
89 die if it's not defined.
91 This function will B<die> if normalizastion can't be evaled.
93 Since this function isn't exported you have to call it with
94 C<WebPAC::Normalize::data_structure>.
103 die "need row argument" unless ($arg->{row});
104 die "need normalisation argument" unless ($arg->{rules});
107 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
108 _set_rec( $arg->{row} );
109 _set_config( $arg->{config} ) if defined($arg->{config});
110 _clean_ds( %{ $arg } );
111 $load_row_coderef = $arg->{load_row_coderef};
113 eval "$arg->{rules}";
114 die "error evaling $arg->{rules}: $@\n" if ($@);
121 Set current record hash
130 $rec = shift or die "no record hash";
135 Set current config hash
137 _set_config( $config );
145 Code of current database
163 Return hash formatted as data structure
169 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader);
170 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
178 Clean data structure hash for next record
186 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader) = ();
187 ($marc_record_offset, $marc_fetch_offset) = (0,0);
188 $marc_encoding = $a->{marc_encoding};
193 Set current lookup hash
195 _set_lookup( $lookup );
207 Get current lookup hash
209 my $lookup = _get_lookup();
219 Setup code reference which will return L<data_structure> from
223 my ($database,$input,$mfn) = @_;
224 $store->load_row( database => $database, input => $input, id => $mfn );
231 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
233 $load_row_coderef = $coderef;
236 =head2 _get_marc_fields
238 Get all fields defined by calls to C<marc>
240 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
242 We are using I<magic> which detect repeatable fields only from
243 sequence of field/subfield data generated by normalization.
245 Repeatable field is created when there is second occurence of same subfield or
246 if any of indicators are different.
248 This is sane for most cases. Something like:
254 will be created from any combination of:
256 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
260 marc('900','a', rec('200','a') );
261 marc('900','b', rec('200','b') );
262 marc('900','c', rec('200','c') );
264 which might not be what you have in mind. If you need repeatable subfield,
265 define it using C<marc_repeatable_subfield> like this:
267 marc_repeatable_subfield('900','a');
268 marc('900','a', rec('200','a') );
269 marc('900','b', rec('200','b') );
270 marc('900','c', rec('200','c') );
274 900a-1 900a-2 900a-3 900b-1 900c-1
277 There is also support for returning next or specific using:
279 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
280 # do something with $mf
283 will always return fields from next MARC record or
285 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
287 will return 42th copy record (if it exists).
291 sub _get_marc_fields {
294 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
295 my $offset = $marc_fetch_offset;
296 if ($arg->{offset}) {
297 $offset = $arg->{offset};
298 } elsif($arg->{fetch_next}) {
299 $marc_fetch_offset++;
302 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
304 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
306 my $marc_rec = $marc_record->[ $offset ];
308 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
310 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
312 # first, sort all existing fields
313 # XXX might not be needed, but modern perl might randomize elements in hash
314 my @sorted_marc_record = sort {
315 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
318 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
323 # count unique field-subfields (used for offset when walking to next subfield)
325 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
328 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
329 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
330 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
331 warn "## subfield count = ", dump( $u ), $/;
334 my $len = $#sorted_marc_record;
339 foreach ( 0 .. $len ) {
341 # find next element which isn't visited
342 while ($visited->{$i}) {
343 $i = ($i + 1) % ($len + 1);
349 my $row = dclone( $sorted_marc_record[$i] );
351 # field and subfield which is key for
352 # marc_repeatable_subfield and u
353 my $fsf = $row->[0] . ( $row->[3] || '' );
357 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
358 print "### this [$i]: ", dump( $row ),$/;
359 print "### sf: ", $row->[3], " vs ", $field->[3],
360 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
366 if ( $#$field >= 0 ) {
368 $row->[0] ne $field->[0] || # field
369 $row->[1] ne $field->[1] || # i1
370 $row->[2] ne $field->[2] # i2
373 warn "## saved/1 ", dump( $field ),$/ if ($debug);
377 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
379 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
380 ! $marc_repeatable_subfield->{ $fsf }
384 warn "## saved/2 ", dump( $field ),$/ if ($debug);
388 # append new subfields to existing field
389 push @$field, ( $row->[3], $row->[4] );
396 if (! $marc_repeatable_subfield->{ $fsf }) {
397 # make step to next subfield
398 $i = ($i + $u->{ $fsf } ) % ($len + 1);
404 warn "## saved/3 ", dump( $field ),$/ if ($debug);
412 Change level of debug warnings
420 return $debug unless defined($l);
421 warn "debug level $l",$/ if ($l > 0);
425 =head1 Functions to create C<data_structure>
427 Those functions generally have to first in your normalization file.
431 Define new tag for I<search> and I<display>.
433 tag('Title', rec('200','a') );
439 my $name = shift or die "tag needs name as first argument";
440 my @o = grep { defined($_) && $_ ne '' } @_;
442 $out->{$name}->{tag} = $name;
443 $out->{$name}->{search} = \@o;
444 $out->{$name}->{display} = \@o;
449 Define tag just for I<display>
451 @v = display('Title', rec('200','a') );
456 my $name = shift or die "display needs name as first argument";
457 my @o = grep { defined($_) && $_ ne '' } @_;
459 $out->{$name}->{tag} = $name;
460 $out->{$name}->{display} = \@o;
465 Prepare values just for I<search>
467 @v = search('Title', rec('200','a') );
472 my $name = shift or die "search needs name as first argument";
473 my @o = grep { defined($_) && $_ ne '' } @_;
475 $out->{$name}->{tag} = $name;
476 $out->{$name}->{search} = \@o;
481 Setup fields within MARC leader or get leader
483 marc_leader('05','c');
484 my $leader = marc_leader();
489 my ($offset,$value) = @_;
492 $leader->{ $offset } = $value;
500 Save value for MARC field
502 marc('900','a', rec('200','a') );
503 marc('001', rec('000') );
508 my $f = shift or die "marc needs field";
509 die "marc field must be numer" unless ($f =~ /^\d+$/);
513 $sf = shift or die "marc needs subfield";
517 my $v = $_; # make var read-write for Encode
518 next unless (defined($v) && $v !~ /^\s*$/);
519 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
521 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
523 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
528 =head2 marc_repeatable_subfield
530 Save values for MARC repetable subfield
532 marc_repeatable_subfield('910', 'z', rec('909') );
536 sub marc_repeatable_subfield {
538 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
539 $marc_repeatable_subfield->{ $f . $sf }++;
543 =head2 marc_indicators
545 Set both indicators for MARC field
547 marc_indicators('900', ' ', 1);
549 Any indicator value other than C<0-9> will be treated as undefined.
553 sub marc_indicators {
554 my $f = shift || die "marc_indicators need field!\n";
556 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
557 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
559 $i1 = ' ' if ($i1 !~ /^\d$/);
560 $i2 = ' ' if ($i2 !~ /^\d$/);
561 @{ $marc_indicators->{$f} } = ($i1,$i2);
566 Save values for each MARC subfield explicitly
575 If you specify C<+> for subfield, value will be appended
576 to previous defined subfield.
581 my $f = shift or die "marc_compose needs field";
582 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
584 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
585 my $m = [ $f, $i1, $i2 ];
587 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
590 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
597 next unless (defined($v) && $v !~ /^\s*$/);
598 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
600 push @$m, ( $sf, $v );
606 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
608 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
611 =head2 marc_duplicate
613 Generate copy of current MARC record and continue working on copy
617 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
618 C<< _get_marc_fields( offset => 42 ) >>.
623 my $m = $marc_record->[ -1 ];
624 die "can't duplicate record which isn't defined" unless ($m);
625 push @{ $marc_record }, dclone( $m );
626 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
627 $marc_record_offset = $#{ $marc_record };
628 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
633 Remove some field or subfield from MARC record.
636 marc_remove('200','a');
638 This will erase field C<200> or C<200^a> from current MARC record.
642 Will remove all fields in current MARC record.
644 This is useful after calling C<marc_duplicate> or on it's own (but, you
645 should probably just remove that subfield definition if you are not
646 using C<marc_duplicate>).
648 FIXME: support fields < 10.
655 die "marc_remove needs record number" unless defined($f);
657 my $marc = $marc_record->[ $marc_record_offset ];
659 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
663 delete( $marc_record->[ $marc_record_offset ] );
668 foreach ( 0 .. $#{ $marc } ) {
669 last unless (defined $marc->[$i]);
670 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
671 if ($marc->[$i]->[0] eq $f) {
674 splice @$marc, $i, 1;
675 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
678 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
679 my $o = ($j * 2) + 3;
680 if ($marc->[$i]->[$o] eq $sf) {
682 splice @{$marc->[$i]}, $o, 2;
683 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
684 # is record now empty?
685 if ($#{ $marc->[$i] } == 2) {
686 splice @$marc, $i, 1;
687 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
697 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
699 $marc_record->[ $marc_record_offset ] = $marc;
703 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
706 =head2 marc_original_order
708 Copy all subfields preserving original order to marc field.
710 marc_original_order( marc_field_number, original_input_field_number );
712 Please note that field numbers are consistent with other commands (marc
713 field number first), but somewhat counter-intuitive (destination and then
716 You might want to use this command if you are just renaming subfields or
717 using pre-processing modify_record in C<config.yml> and don't need any
718 post-processing or want to preserve order of original subfields.
723 sub marc_original_order {
725 my ($to, $from) = @_;
726 die "marc_original_order needs from and to fields\n" unless ($from && $to);
728 return unless defined($rec->{$from});
730 my $r = $rec->{$from};
731 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
733 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
734 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
736 foreach my $d (@$r) {
738 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
739 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
743 my @sfs = @{ $d->{subfields} };
745 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
747 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
749 my $m = [ $to, $i1, $i2 ];
751 while (my $sf = shift @sfs) {
753 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
754 my $offset = shift @sfs;
755 die "corrupted sufields specification for field $from\n" unless defined($offset);
758 if (ref($d->{$sf}) eq 'ARRAY') {
759 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
760 } elsif ($offset == 0) {
763 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
765 push @$m, ( $sf, $v ) if (defined($v));
769 push @{ $marc_record->[ $marc_record_offset ] }, $m;
773 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
777 =head1 Functions to extract data from input
779 This function should be used inside functions to create C<data_structure> described
782 =head2 _pack_subfields_hash
784 @subfields = _pack_subfields_hash( $h );
785 $subfields = _pack_subfields_hash( $h, 1 );
787 Return each subfield value in array or pack them all together and return scalar
788 with subfields (denoted by C<^>) and values.
792 sub _pack_subfields_hash {
794 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
796 my ($h,$include_subfields) = @_;
798 if ( defined($h->{subfields}) ) {
799 my $sfs = delete $h->{subfields} || die "no subfields?";
802 my $sf = shift @$sfs;
803 push @out, '^' . $sf if ($include_subfields);
805 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
806 # single element subfields are not arrays
807 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
809 push @out, $h->{$sf};
811 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
812 push @out, $h->{$sf}->[$o];
815 if ($include_subfields) {
816 return join('', @out);
821 if ($include_subfields) {
823 foreach my $sf (sort keys %$h) {
824 if (ref($h->{$sf}) eq 'ARRAY') {
825 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
827 $out .= '^' . $sf . $h->{$sf};
832 # FIXME this should probably be in alphabetical order instead of hash order
840 Return all values in some field
844 TODO: order of values is probably same as in source data, need to investigate that
850 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
851 return unless (defined($rec) && defined($rec->{$f}));
852 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
853 if (ref($rec->{$f}) eq 'ARRAY') {
855 foreach my $h ( @{ $rec->{$f} } ) {
856 if (ref($h) eq 'HASH') {
857 push @out, ( _pack_subfields_hash( $h ) );
863 } elsif( defined($rec->{$f}) ) {
870 Return all values in specific field and subfield
878 return unless (defined($rec && $rec->{$f}));
880 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
882 if (ref($_->{$sf}) eq 'ARRAY') {
887 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
897 If rec() returns just single value, it will
898 return scalar, not array.
909 if ($#out == 0 && ! wantarray) {
920 Apply regex to some or all values
922 @v = regex( 's/foo/bar/g', @v );
929 #warn "r: $r\n", dump(\@_);
933 push @out, $t if ($t && $t ne '');
940 Prefix all values with a string
942 @v = prefix( 'my_', @v );
947 my $p = shift or return;
948 return map { $p . $_ } grep { defined($_) } @_;
953 suffix all values with a string
955 @v = suffix( '_my', @v );
960 my $s = shift or die "suffix needs string as first argument";
961 return map { $_ . $s } grep { defined($_) } @_;
966 surround all values with a two strings
968 @v = surround( 'prefix_', '_suffix', @v );
973 my $p = shift or die "surround need prefix as first argument";
974 my $s = shift or die "surround needs suffix as second argument";
975 return map { $p . $_ . $s } grep { defined($_) } @_;
993 Consult lookup hashes for some value
997 'ffkk/peri/mfn'.rec('000')
999 'ffkk','peri','200-a-200-e',
1001 first(rec(200,'a')).' '.first(rec('200','e'))
1005 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1006 normal lookup definition in C<conf/lookup/something.pl> which looks like:
1009 # which results to return from record recorded in lookup
1010 sub { 'ffkk/peri/mfn' . rec('000') },
1011 # from which database and input
1013 # such that following values match
1014 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1015 # if this part is missing, we will try to match same fields
1016 # from lookup record and current one, or you can override
1017 # which records to use from current record using
1018 sub { rec('900','x') . ' ' . rec('900','y') },
1021 You can think about this lookup as SQL (if that helps):
1028 sub { filter from lookuped record }
1030 sub { optional filter on current record }
1037 my ($what, $database, $input, $key, $having) = @_;
1039 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1041 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1042 return unless (defined($lookup->{$database}->{$input}->{$key}));
1044 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1047 my @having = $having->();
1049 warn "## having = ", dump( @having ) if ($debug > 2);
1051 foreach my $h ( @having ) {
1052 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1053 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1054 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1058 return unless ($mfns);
1060 my @mfns = sort keys %$mfns;
1062 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1067 foreach my $mfn (@mfns) {
1068 $rec = $load_row_coderef->( $database, $input, $mfn );
1070 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1072 my @vals = $what->();
1074 push @out, ( @vals );
1076 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1079 # if (ref($lookup->{$k}) eq 'ARRAY') {
1080 # return @{ $lookup->{$k} };
1082 # return $lookup->{$k};
1087 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1096 =head2 save_into_lookup
1098 Save value into lookup. It associates current database, input
1099 and specific keys with one or more values which will be
1100 associated over MFN.
1102 MFN will be extracted from first occurence current of field 000
1103 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1105 my $nr = save_into_lookup($database,$input,$key,sub {
1106 # code which produce one or more values
1109 It returns number of items saved.
1111 This function shouldn't be called directly, it's called from code created by
1116 sub save_into_lookup {
1117 my ($database,$input,$key,$coderef) = @_;
1118 die "save_into_lookup needs database" unless defined($database);
1119 die "save_into_lookup needs input" unless defined($input);
1120 die "save_into_lookup needs key" unless defined($key);
1121 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1123 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1126 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1127 defined($config->{_mfn}) ? $config->{_mfn} :
1128 die "mfn not defined or zero";
1132 foreach my $v ( $coderef->() ) {
1133 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1134 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1143 Consult config values stored in C<config.yml>
1145 # return database code (key under databases in yaml)
1146 $database_code = config(); # use _ from hash
1147 $database_name = config('name');
1148 $database_input_name = config('input name');
1149 $tag = config('input normalize tag');
1151 Up to three levels are supported.
1156 return unless ($config);
1164 warn "### getting config($p)\n" if ($debug > 1);
1166 my @p = split(/\s+/,$p);
1168 $v = $config->{ '_' }; # special, database code
1171 my $c = dclone( $config );
1173 foreach my $k (@p) {
1174 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1175 if (ref($c) eq 'ARRAY') {
1177 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1181 if (! defined($c->{$k}) ) {
1192 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1193 warn "config( '$p' ) is empty\n" if (! $v);
1200 Returns unique id of this record
1204 Returns C<42/2> for 2nd occurence of MFN 42.
1209 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1210 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1215 Joins walues with some delimiter
1217 $v = join_with(", ", @v);
1223 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1224 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1225 return '' unless defined($v);
1231 Split record subfield on some regex and take one of parts out
1233 $a_before_semi_column =
1234 split_rec_on('200','a', /\s*;\s*/, $part);
1236 C<$part> is optional number of element. First element is
1239 If there is no C<$part> parameter or C<$part> is 0, this function will
1240 return all values produced by splitting.
1245 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1247 my ($fld, $sf, $regex, $part) = @_;
1248 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1250 my @r = rec( $fld, $sf );
1252 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1254 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1256 my @s = split( $regex, $v );
1257 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1258 if ($part && $part > 0) {
1259 return $s[ $part - 1 ];
1269 set( key => 'value' );
1275 warn "## set ( $k => ", dump($v), " )", $/;
1286 my $k = shift || return;
1287 my $v = $hash->{$k};
1288 warn "## get $k = ", dump( $v ), $/;