1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader
13 marc_duplicate marc_remove
17 regex prefix suffix surround
18 first lookup join_with
29 #use base qw/WebPAC::Common/;
30 use Data::Dump qw/dump/;
31 use Storable qw/dclone/;
40 WebPAC::Normalize - describe normalisaton rules using sets
48 our $VERSION = '0.24';
52 This module uses C<conf/normalize/*.pl> files to perform normalisation
53 from input records using perl functions which are specialized for set
56 Sets are implemented as arrays, and normalisation file is valid perl, which
57 means that you check it's validity before running WebPAC using
58 C<perl -c normalize.pl>.
60 Normalisation can generate multiple output normalized data. For now, supported output
61 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
66 Functions which start with C<_> are private and used by WebPAC internally.
67 All other functions are available for use within normalisation rules.
73 my $ds = WebPAC::Normalize::data_structure(
74 lookup => $lookup_hash,
76 rules => $normalize_pl_config,
77 marc_encoding => 'utf-8',
79 load_row_coderef => sub {
80 my ($database,$input,$mfn) = shift;
81 $store->load_row( database => $database, input => $input, id => $mfn );
85 Options C<row>, C<rules> and C<log> are mandatory while all
88 C<load_row_coderef> is closure only used when executing lookups, so they will
89 die if it's not defined.
91 This function will B<die> if normalizastion can't be evaled.
93 Since this function isn't exported you have to call it with
94 C<WebPAC::Normalize::data_structure>.
103 die "need row argument" unless ($arg->{row});
104 die "need normalisation argument" unless ($arg->{rules});
107 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
108 _set_rec( $arg->{row} );
109 _set_config( $arg->{config} ) if defined($arg->{config});
110 _clean_ds( %{ $arg } );
111 $load_row_coderef = $arg->{load_row_coderef};
113 eval "$arg->{rules}";
114 die "error evaling $arg->{rules}: $@\n" if ($@);
121 Set current record hash
130 $rec = shift or die "no record hash";
135 Set current config hash
137 _set_config( $config );
145 Code of current database
163 Return hash formatted as data structure
169 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader);
170 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
178 Clean data structure hash for next record
186 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader) = ();
187 ($marc_record_offset, $marc_fetch_offset) = (0,0);
188 $marc_encoding = $a->{marc_encoding};
193 Set current lookup hash
195 _set_lookup( $lookup );
207 Get current lookup hash
209 my $lookup = _get_lookup();
219 Setup code reference which will return L<data_structure> from
223 my ($database,$input,$mfn) = @_;
224 $store->load_row( database => $database, input => $input, id => $mfn );
231 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
233 $load_row_coderef = $coderef;
236 =head2 _get_marc_fields
238 Get all fields defined by calls to C<marc>
240 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
242 We are using I<magic> which detect repeatable fields only from
243 sequence of field/subfield data generated by normalization.
245 Repeatable field is created when there is second occurence of same subfield or
246 if any of indicators are different.
248 This is sane for most cases. Something like:
254 will be created from any combination of:
256 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
260 marc('900','a', rec('200','a') );
261 marc('900','b', rec('200','b') );
262 marc('900','c', rec('200','c') );
264 which might not be what you have in mind. If you need repeatable subfield,
265 define it using C<marc_repeatable_subfield> like this:
267 marc_repeatable_subfield('900','a');
268 marc('900','a', rec('200','a') );
269 marc('900','b', rec('200','b') );
270 marc('900','c', rec('200','c') );
274 900a-1 900a-2 900a-3 900b-1 900c-1
277 There is also support for returning next or specific using:
279 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
280 # do something with $mf
283 will always return fields from next MARC record or
285 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
287 will return 42th copy record (if it exists).
291 sub _get_marc_fields {
294 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
295 my $offset = $marc_fetch_offset;
296 if ($arg->{offset}) {
297 $offset = $arg->{offset};
298 } elsif($arg->{fetch_next}) {
299 $marc_fetch_offset++;
302 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
304 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
306 my $marc_rec = $marc_record->[ $offset ];
308 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
310 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
312 # first, sort all existing fields
313 # XXX might not be needed, but modern perl might randomize elements in hash
314 my @sorted_marc_record = sort {
315 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
318 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
323 # count unique field-subfields (used for offset when walking to next subfield)
325 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
328 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
329 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
330 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
331 warn "## subfield count = ", dump( $u ), $/;
334 my $len = $#sorted_marc_record;
339 foreach ( 0 .. $len ) {
341 # find next element which isn't visited
342 while ($visited->{$i}) {
343 $i = ($i + 1) % ($len + 1);
349 my $row = dclone( $sorted_marc_record[$i] );
351 # field and subfield which is key for
352 # marc_repeatable_subfield and u
353 my $fsf = $row->[0] . ( $row->[3] || '' );
357 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
358 print "### this [$i]: ", dump( $row ),$/;
359 print "### sf: ", $row->[3], " vs ", $field->[3],
360 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
366 if ( $#$field >= 0 ) {
368 $row->[0] ne $field->[0] || # field
369 $row->[1] ne $field->[1] || # i1
370 $row->[2] ne $field->[2] # i2
373 warn "## saved/1 ", dump( $field ),$/ if ($debug);
377 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
379 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
380 ! $marc_repeatable_subfield->{ $fsf }
384 warn "## saved/2 ", dump( $field ),$/ if ($debug);
388 # append new subfields to existing field
389 push @$field, ( $row->[3], $row->[4] );
396 if (! $marc_repeatable_subfield->{ $fsf }) {
397 # make step to next subfield
398 $i = ($i + $u->{ $fsf } ) % ($len + 1);
404 warn "## saved/3 ", dump( $field ),$/ if ($debug);
412 Change level of debug warnings
420 return $debug unless defined($l);
421 warn "debug level $l",$/ if ($l > 0);
425 =head1 Functions to create C<data_structure>
427 Those functions generally have to first in your normalization file.
431 Define new tag for I<search> and I<display>.
433 tag('Title', rec('200','a') );
439 my $name = shift or die "tag needs name as first argument";
440 my @o = grep { defined($_) && $_ ne '' } @_;
442 $out->{$name}->{tag} = $name;
443 $out->{$name}->{search} = \@o;
444 $out->{$name}->{display} = \@o;
449 Define tag just for I<display>
451 @v = display('Title', rec('200','a') );
456 my $name = shift or die "display needs name as first argument";
457 my @o = grep { defined($_) && $_ ne '' } @_;
459 $out->{$name}->{tag} = $name;
460 $out->{$name}->{display} = \@o;
465 Prepare values just for I<search>
467 @v = search('Title', rec('200','a') );
472 my $name = shift or die "search needs name as first argument";
473 my @o = grep { defined($_) && $_ ne '' } @_;
475 $out->{$name}->{tag} = $name;
476 $out->{$name}->{search} = \@o;
481 Setup fields within MARC leader or get leader
483 marc_leader('05','c');
484 my $leader = marc_leader();
489 my ($offset,$value) = @_;
492 $leader->{ $offset } = $value;
500 Save value for MARC field
502 marc('900','a', rec('200','a') );
503 marc('001', rec('000') );
508 my $f = shift or die "marc needs field";
509 die "marc field must be numer" unless ($f =~ /^\d+$/);
513 $sf = shift or die "marc needs subfield";
517 my $v = $_; # make var read-write for Encode
518 next unless (defined($v) && $v !~ /^\s*$/);
519 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
521 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
523 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
528 =head2 marc_repeatable_subfield
530 Save values for MARC repetable subfield
532 marc_repeatable_subfield('910', 'z', rec('909') );
536 sub marc_repeatable_subfield {
538 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
539 $marc_repeatable_subfield->{ $f . $sf }++;
543 =head2 marc_indicators
545 Set both indicators for MARC field
547 marc_indicators('900', ' ', 1);
549 Any indicator value other than C<0-9> will be treated as undefined.
553 sub marc_indicators {
554 my $f = shift || die "marc_indicators need field!\n";
556 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
557 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
559 $i1 = ' ' if ($i1 !~ /^\d$/);
560 $i2 = ' ' if ($i2 !~ /^\d$/);
561 @{ $marc_indicators->{$f} } = ($i1,$i2);
566 Save values for each MARC subfield explicitly
575 If you specify C<+> for subfield, value will be appended
576 to previous defined subfield.
581 my $f = shift or die "marc_compose needs field";
582 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
584 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
585 my $m = [ $f, $i1, $i2 ];
587 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
590 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
597 next unless (defined($v) && $v !~ /^\s*$/);
598 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
600 push @$m, ( $sf, $v );
606 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
608 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
611 =head2 marc_duplicate
613 Generate copy of current MARC record and continue working on copy
617 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
618 C<< _get_marc_fields( offset => 42 ) >>.
623 my $m = $marc_record->[ -1 ];
624 die "can't duplicate record which isn't defined" unless ($m);
625 push @{ $marc_record }, dclone( $m );
626 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
627 $marc_record_offset = $#{ $marc_record };
628 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
633 Remove some field or subfield from MARC record.
636 marc_remove('200','a');
638 This will erase field C<200> or C<200^a> from current MARC record.
640 This is useful after calling C<marc_duplicate> or on it's own (but, you
641 should probably just remove that subfield definition if you are not
642 using C<marc_duplicate>).
644 FIXME: support fields < 10.
651 die "marc_remove needs record number" unless defined($f);
653 my $marc = $marc_record->[ $marc_record_offset ];
655 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
658 foreach ( 0 .. $#{ $marc } ) {
659 last unless (defined $marc->[$i]);
660 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
661 if ($marc->[$i]->[0] eq $f) {
664 splice @$marc, $i, 1;
665 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
668 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
669 my $o = ($j * 2) + 3;
670 if ($marc->[$i]->[$o] eq $sf) {
672 splice @{$marc->[$i]}, $o, 2;
673 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
674 # is record now empty?
675 if ($#{ $marc->[$i] } == 2) {
676 splice @$marc, $i, 1;
677 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
687 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
689 $marc_record->[ $marc_record_offset ] = $marc;
691 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
694 =head2 marc_original_order
696 Copy all subfields preserving original order to marc field.
698 marc_original_order( marc_field_number, original_input_field_number );
700 Please note that field numbers are consistent with other commands (marc
701 field number first), but somewhat counter-intuitive (destination and then
704 You might want to use this command if you are just renaming subfields or
705 using pre-processing modify_record in C<config.yml> and don't need any
706 post-processing or want to preserve order of original subfields.
711 sub marc_original_order {
713 my ($to, $from) = @_;
714 die "marc_original_order needs from and to fields\n" unless ($from && $to);
716 return unless defined($rec->{$from});
718 my $r = $rec->{$from};
719 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
721 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
722 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
724 foreach my $d (@$r) {
726 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
727 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
731 my @sfs = @{ $d->{subfields} };
733 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
735 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
737 my $m = [ $to, $i1, $i2 ];
739 while (my $sf = shift @sfs) {
741 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
742 my $offset = shift @sfs;
743 die "corrupted sufields specification for field $from\n" unless defined($offset);
746 if (ref($d->{$sf}) eq 'ARRAY') {
747 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
748 } elsif ($offset == 0) {
751 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
753 push @$m, ( $sf, $v ) if (defined($v));
757 push @{ $marc_record->[ $marc_record_offset ] }, $m;
761 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
765 =head1 Functions to extract data from input
767 This function should be used inside functions to create C<data_structure> described
770 =head2 _pack_subfields_hash
772 @subfields = _pack_subfields_hash( $h );
773 $subfields = _pack_subfields_hash( $h, 1 );
775 Return each subfield value in array or pack them all together and return scalar
776 with subfields (denoted by C<^>) and values.
780 sub _pack_subfields_hash {
782 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
784 my ($h,$include_subfields) = @_;
786 if ( defined($h->{subfields}) ) {
787 my $sfs = delete $h->{subfields} || die "no subfields?";
790 my $sf = shift @$sfs;
791 push @out, '^' . $sf if ($include_subfields);
793 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
794 # single element subfields are not arrays
795 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
797 push @out, $h->{$sf};
799 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
800 push @out, $h->{$sf}->[$o];
803 if ($include_subfields) {
804 return join('', @out);
809 if ($include_subfields) {
811 foreach my $sf (sort keys %$h) {
812 if (ref($h->{$sf}) eq 'ARRAY') {
813 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
815 $out .= '^' . $sf . $h->{$sf};
820 # FIXME this should probably be in alphabetical order instead of hash order
828 Return all values in some field
832 TODO: order of values is probably same as in source data, need to investigate that
838 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
839 return unless (defined($rec) && defined($rec->{$f}));
840 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
841 if (ref($rec->{$f}) eq 'ARRAY') {
843 foreach my $h ( @{ $rec->{$f} } ) {
844 if (ref($h) eq 'HASH') {
845 push @out, ( _pack_subfields_hash( $h ) );
851 } elsif( defined($rec->{$f}) ) {
858 Return all values in specific field and subfield
866 return unless (defined($rec && $rec->{$f}));
868 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
870 if (ref($_->{$sf}) eq 'ARRAY') {
875 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
885 If rec() returns just single value, it will
886 return scalar, not array.
897 if ($#out == 0 && ! wantarray) {
908 Apply regex to some or all values
910 @v = regex( 's/foo/bar/g', @v );
917 #warn "r: $r\n", dump(\@_);
921 push @out, $t if ($t && $t ne '');
928 Prefix all values with a string
930 @v = prefix( 'my_', @v );
935 my $p = shift or return;
936 return map { $p . $_ } grep { defined($_) } @_;
941 suffix all values with a string
943 @v = suffix( '_my', @v );
948 my $s = shift or die "suffix needs string as first argument";
949 return map { $_ . $s } grep { defined($_) } @_;
954 surround all values with a two strings
956 @v = surround( 'prefix_', '_suffix', @v );
961 my $p = shift or die "surround need prefix as first argument";
962 my $s = shift or die "surround needs suffix as second argument";
963 return map { $p . $_ . $s } grep { defined($_) } @_;
981 Consult lookup hashes for some value
985 'ffkk/peri/mfn'.rec('000')
987 'ffkk','peri','200-a-200-e',
989 first(rec(200,'a')).' '.first(rec('200','e'))
993 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
994 normal lookup definition in C<conf/lookup/something.pl> which looks like:
997 # which results to return from record recorded in lookup
998 sub { 'ffkk/peri/mfn' . rec('000') },
999 # from which database and input
1001 # such that following values match
1002 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1003 # if this part is missing, we will try to match same fields
1004 # from lookup record and current one, or you can override
1005 # which records to use from current record using
1006 sub { rec('900','x') . ' ' . rec('900','y') },
1009 You can think about this lookup as SQL (if that helps):
1016 sub { filter from lookuped record }
1018 sub { optional filter on current record }
1025 my ($what, $database, $input, $key, $having) = @_;
1027 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1029 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1030 return unless (defined($lookup->{$database}->{$input}->{$key}));
1032 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1035 my @having = $having->();
1037 warn "## having = ", dump( @having ) if ($debug > 2);
1039 foreach my $h ( @having ) {
1040 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1041 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1042 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1046 return unless ($mfns);
1048 my @mfns = sort keys %$mfns;
1050 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1055 foreach my $mfn (@mfns) {
1056 $rec = $load_row_coderef->( $database, $input, $mfn );
1058 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1060 my @vals = $what->();
1062 push @out, ( @vals );
1064 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1067 # if (ref($lookup->{$k}) eq 'ARRAY') {
1068 # return @{ $lookup->{$k} };
1070 # return $lookup->{$k};
1075 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1084 =head2 save_into_lookup
1086 Save value into lookup. It associates current database, input
1087 and specific keys with one or more values which will be
1088 associated over MFN.
1090 MFN will be extracted from first occurence current of field 000
1091 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1093 my $nr = save_into_lookup($database,$input,$key,sub {
1094 # code which produce one or more values
1097 It returns number of items saved.
1099 This function shouldn't be called directly, it's called from code created by
1104 sub save_into_lookup {
1105 my ($database,$input,$key,$coderef) = @_;
1106 die "save_into_lookup needs database" unless defined($database);
1107 die "save_into_lookup needs input" unless defined($input);
1108 die "save_into_lookup needs key" unless defined($key);
1109 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1111 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1114 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1115 defined($config->{_mfn}) ? $config->{_mfn} :
1116 die "mfn not defined or zero";
1120 foreach my $v ( $coderef->() ) {
1121 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1122 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1131 Consult config values stored in C<config.yml>
1133 # return database code (key under databases in yaml)
1134 $database_code = config(); # use _ from hash
1135 $database_name = config('name');
1136 $database_input_name = config('input name');
1137 $tag = config('input normalize tag');
1139 Up to three levels are supported.
1144 return unless ($config);
1152 warn "### getting config($p)\n" if ($debug > 1);
1154 my @p = split(/\s+/,$p);
1156 $v = $config->{ '_' }; # special, database code
1159 my $c = dclone( $config );
1161 foreach my $k (@p) {
1162 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1163 if (ref($c) eq 'ARRAY') {
1165 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1169 if (! defined($c->{$k}) ) {
1180 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1181 warn "config( '$p' ) is empty\n" if (! $v);
1188 Returns unique id of this record
1192 Returns C<42/2> for 2nd occurence of MFN 42.
1197 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1198 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1203 Joins walues with some delimiter
1205 $v = join_with(", ", @v);
1211 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1212 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1213 return '' unless defined($v);
1219 Split record subfield on some regex and take one of parts out
1221 $a_before_semi_column =
1222 split_rec_on('200','a', /\s*;\s*/, $part);
1224 C<$part> is optional number of element. First element is
1227 If there is no C<$part> parameter or C<$part> is 0, this function will
1228 return all values produced by splitting.
1233 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1235 my ($fld, $sf, $regex, $part) = @_;
1236 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1238 my @r = rec( $fld, $sf );
1240 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1242 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1244 my @s = split( $regex, $v );
1245 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1246 if ($part && $part > 0) {
1247 return $s[ $part - 1 ];
1257 set( key => 'value' );
1263 warn "## set ( $k => ", dump($v), " )", $/;
1274 my $k = shift || return;
1275 my $v = $hash->{$k};
1276 warn "## get $k = ", dump( $v ), $/;