1 package WebPAC::Normalize;
11 marc marc_indicators marc_repeatable_subfield
12 marc_compose marc_leader
13 marc_duplicate marc_remove
17 regex prefix suffix surround
18 first lookup join_with
27 #use base qw/WebPAC::Common/;
28 use Data::Dump qw/dump/;
29 use Storable qw/dclone/;
38 WebPAC::Normalize - describe normalisaton rules using sets
46 our $VERSION = '0.23';
50 This module uses C<conf/normalize/*.pl> files to perform normalisation
51 from input records using perl functions which are specialized for set
54 Sets are implemented as arrays, and normalisation file is valid perl, which
55 means that you check it's validity before running WebPAC using
56 C<perl -c normalize.pl>.
58 Normalisation can generate multiple output normalized data. For now, supported output
59 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
64 Functions which start with C<_> are private and used by WebPAC internally.
65 All other functions are available for use within normalisation rules.
71 my $ds = WebPAC::Normalize::data_structure(
72 lookup => $lookup_hash,
74 rules => $normalize_pl_config,
75 marc_encoding => 'utf-8',
77 load_row_coderef => sub {
78 my ($database,$input,$mfn) = shift;
79 $store->load_row( database => $database, input => $input, id => $mfn );
83 Options C<row>, C<rules> and C<log> are mandatory while all
86 C<load_row_coderef> is closure only used when executing lookups, so they will
87 die if it's not defined.
89 This function will B<die> if normalizastion can't be evaled.
91 Since this function isn't exported you have to call it with
92 C<WebPAC::Normalize::data_structure>.
101 die "need row argument" unless ($arg->{row});
102 die "need normalisation argument" unless ($arg->{rules});
105 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
106 _set_rec( $arg->{row} );
107 _set_config( $arg->{config} ) if defined($arg->{config});
108 _clean_ds( %{ $arg } );
109 $load_row_coderef = $arg->{load_row_coderef};
111 eval "$arg->{rules}";
112 die "error evaling $arg->{rules}: $@\n" if ($@);
119 Set current record hash
128 $rec = shift or die "no record hash";
133 Set current config hash
135 _set_config( $config );
143 Code of current database
161 Return hash formatted as data structure
167 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader);
168 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
176 Clean data structure hash for next record
184 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader) = ();
185 ($marc_record_offset, $marc_fetch_offset) = (0,0);
186 $marc_encoding = $a->{marc_encoding};
191 Set current lookup hash
193 _set_lookup( $lookup );
205 Get current lookup hash
207 my $lookup = _get_lookup();
217 Setup code reference which will return L<data_structure> from
221 my ($database,$input,$mfn) = @_;
222 $store->load_row( database => $database, input => $input, id => $mfn );
229 confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
231 $load_row_coderef = $coderef;
234 =head2 _get_marc_fields
236 Get all fields defined by calls to C<marc>
238 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
240 We are using I<magic> which detect repeatable fields only from
241 sequence of field/subfield data generated by normalization.
243 Repeatable field is created when there is second occurence of same subfield or
244 if any of indicators are different.
246 This is sane for most cases. Something like:
252 will be created from any combination of:
254 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
258 marc('900','a', rec('200','a') );
259 marc('900','b', rec('200','b') );
260 marc('900','c', rec('200','c') );
262 which might not be what you have in mind. If you need repeatable subfield,
263 define it using C<marc_repeatable_subfield> like this:
265 marc_repeatable_subfield('900','a');
266 marc('900','a', rec('200','a') );
267 marc('900','b', rec('200','b') );
268 marc('900','c', rec('200','c') );
272 900a-1 900a-2 900a-3 900b-1 900c-1
275 There is also support for returning next or specific using:
277 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
278 # do something with $mf
281 will always return fields from next MARC record or
283 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
285 will return 42th copy record (if it exists).
289 sub _get_marc_fields {
292 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
293 my $offset = $marc_fetch_offset;
294 if ($arg->{offset}) {
295 $offset = $arg->{offset};
296 } elsif($arg->{fetch_next}) {
297 $marc_fetch_offset++;
300 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
302 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
304 my $marc_rec = $marc_record->[ $offset ];
306 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
308 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
310 # first, sort all existing fields
311 # XXX might not be needed, but modern perl might randomize elements in hash
312 my @sorted_marc_record = sort {
313 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
316 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
321 # count unique field-subfields (used for offset when walking to next subfield)
323 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
326 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
327 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
328 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
329 warn "## subfield count = ", dump( $u ), $/;
332 my $len = $#sorted_marc_record;
337 foreach ( 0 .. $len ) {
339 # find next element which isn't visited
340 while ($visited->{$i}) {
341 $i = ($i + 1) % ($len + 1);
347 my $row = dclone( $sorted_marc_record[$i] );
349 # field and subfield which is key for
350 # marc_repeatable_subfield and u
351 my $fsf = $row->[0] . ( $row->[3] || '' );
355 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
356 print "### this [$i]: ", dump( $row ),$/;
357 print "### sf: ", $row->[3], " vs ", $field->[3],
358 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
364 if ( $#$field >= 0 ) {
366 $row->[0] ne $field->[0] || # field
367 $row->[1] ne $field->[1] || # i1
368 $row->[2] ne $field->[2] # i2
371 warn "## saved/1 ", dump( $field ),$/ if ($debug);
375 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
377 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
378 ! $marc_repeatable_subfield->{ $fsf }
382 warn "## saved/2 ", dump( $field ),$/ if ($debug);
386 # append new subfields to existing field
387 push @$field, ( $row->[3], $row->[4] );
394 if (! $marc_repeatable_subfield->{ $fsf }) {
395 # make step to next subfield
396 $i = ($i + $u->{ $fsf } ) % ($len + 1);
402 warn "## saved/3 ", dump( $field ),$/ if ($debug);
410 Change level of debug warnings
418 return $debug unless defined($l);
419 warn "debug level $l",$/ if ($l > 0);
423 =head1 Functions to create C<data_structure>
425 Those functions generally have to first in your normalization file.
429 Define new tag for I<search> and I<display>.
431 tag('Title', rec('200','a') );
437 my $name = shift or die "tag needs name as first argument";
438 my @o = grep { defined($_) && $_ ne '' } @_;
440 $out->{$name}->{tag} = $name;
441 $out->{$name}->{search} = \@o;
442 $out->{$name}->{display} = \@o;
447 Define tag just for I<display>
449 @v = display('Title', rec('200','a') );
454 my $name = shift or die "display needs name as first argument";
455 my @o = grep { defined($_) && $_ ne '' } @_;
457 $out->{$name}->{tag} = $name;
458 $out->{$name}->{display} = \@o;
463 Prepare values just for I<search>
465 @v = search('Title', rec('200','a') );
470 my $name = shift or die "search needs name as first argument";
471 my @o = grep { defined($_) && $_ ne '' } @_;
473 $out->{$name}->{tag} = $name;
474 $out->{$name}->{search} = \@o;
479 Setup fields within MARC leader or get leader
481 marc_leader('05','c');
482 my $leader = marc_leader();
487 my ($offset,$value) = @_;
490 $leader->{ $offset } = $value;
498 Save value for MARC field
500 marc('900','a', rec('200','a') );
501 marc('001', rec('000') );
506 my $f = shift or die "marc needs field";
507 die "marc field must be numer" unless ($f =~ /^\d+$/);
511 $sf = shift or die "marc needs subfield";
515 my $v = $_; # make var read-write for Encode
516 next unless (defined($v) && $v !~ /^\s*$/);
517 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
519 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
521 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
526 =head2 marc_repeatable_subfield
528 Save values for MARC repetable subfield
530 marc_repeatable_subfield('910', 'z', rec('909') );
534 sub marc_repeatable_subfield {
536 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
537 $marc_repeatable_subfield->{ $f . $sf }++;
541 =head2 marc_indicators
543 Set both indicators for MARC field
545 marc_indicators('900', ' ', 1);
547 Any indicator value other than C<0-9> will be treated as undefined.
551 sub marc_indicators {
552 my $f = shift || die "marc_indicators need field!\n";
554 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
555 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
557 $i1 = ' ' if ($i1 !~ /^\d$/);
558 $i2 = ' ' if ($i2 !~ /^\d$/);
559 @{ $marc_indicators->{$f} } = ($i1,$i2);
564 Save values for each MARC subfield explicitly
573 If you specify C<+> for subfield, value will be appended
574 to previous defined subfield.
579 my $f = shift or die "marc_compose needs field";
580 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
582 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
583 my $m = [ $f, $i1, $i2 ];
585 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
588 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
595 next unless (defined($v) && $v !~ /^\s*$/);
596 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
598 push @$m, ( $sf, $v );
604 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
606 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
609 =head2 marc_duplicate
611 Generate copy of current MARC record and continue working on copy
615 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
616 C<< _get_marc_fields( offset => 42 ) >>.
621 my $m = $marc_record->[ -1 ];
622 die "can't duplicate record which isn't defined" unless ($m);
623 push @{ $marc_record }, dclone( $m );
624 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
625 $marc_record_offset = $#{ $marc_record };
626 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
631 Remove some field or subfield from MARC record.
634 marc_remove('200','a');
636 This will erase field C<200> or C<200^a> from current MARC record.
638 This is useful after calling C<marc_duplicate> or on it's own (but, you
639 should probably just remove that subfield definition if you are not
640 using C<marc_duplicate>).
642 FIXME: support fields < 10.
649 die "marc_remove needs record number" unless defined($f);
651 my $marc = $marc_record->[ $marc_record_offset ];
653 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
656 foreach ( 0 .. $#{ $marc } ) {
657 last unless (defined $marc->[$i]);
658 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
659 if ($marc->[$i]->[0] eq $f) {
662 splice @$marc, $i, 1;
663 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
666 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
667 my $o = ($j * 2) + 3;
668 if ($marc->[$i]->[$o] eq $sf) {
670 splice @{$marc->[$i]}, $o, 2;
671 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
672 # is record now empty?
673 if ($#{ $marc->[$i] } == 2) {
674 splice @$marc, $i, 1;
675 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
685 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
687 $marc_record->[ $marc_record_offset ] = $marc;
689 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
692 =head2 marc_original_order
694 Copy all subfields preserving original order to marc field.
696 marc_original_order( marc_field_number, original_input_field_number );
698 Please note that field numbers are consistent with other commands (marc
699 field number first), but somewhat counter-intuitive (destination and then
702 You might want to use this command if you are just renaming subfields or
703 using pre-processing modify_record in C<config.yml> and don't need any
704 post-processing or want to preserve order of original subfields.
709 sub marc_original_order {
711 my ($to, $from) = @_;
712 die "marc_original_order needs from and to fields\n" unless ($from && $to);
714 return unless defined($rec->{$from});
716 my $r = $rec->{$from};
717 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
719 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
720 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
722 foreach my $d (@$r) {
724 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
725 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
729 my @sfs = @{ $d->{subfields} };
731 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
733 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
735 my $m = [ $to, $i1, $i2 ];
737 while (my $sf = shift @sfs) {
739 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
740 my $offset = shift @sfs;
741 die "corrupted sufields specification for field $from\n" unless defined($offset);
744 if (ref($d->{$sf}) eq 'ARRAY') {
745 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
746 } elsif ($offset == 0) {
749 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
751 push @$m, ( $sf, $v ) if (defined($v));
755 push @{ $marc_record->[ $marc_record_offset ] }, $m;
759 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
763 =head1 Functions to extract data from input
765 This function should be used inside functions to create C<data_structure> described
768 =head2 _pack_subfields_hash
770 @subfields = _pack_subfields_hash( $h );
771 $subfields = _pack_subfields_hash( $h, 1 );
773 Return each subfield value in array or pack them all together and return scalar
774 with subfields (denoted by C<^>) and values.
778 sub _pack_subfields_hash {
780 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
782 my ($h,$include_subfields) = @_;
784 if ( defined($h->{subfields}) ) {
785 my $sfs = delete $h->{subfields} || die "no subfields?";
788 my $sf = shift @$sfs;
789 push @out, '^' . $sf if ($include_subfields);
791 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
792 # single element subfields are not arrays
793 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
795 push @out, $h->{$sf};
797 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
798 push @out, $h->{$sf}->[$o];
801 if ($include_subfields) {
802 return join('', @out);
807 if ($include_subfields) {
809 foreach my $sf (sort keys %$h) {
810 if (ref($h->{$sf}) eq 'ARRAY') {
811 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
813 $out .= '^' . $sf . $h->{$sf};
818 # FIXME this should probably be in alphabetical order instead of hash order
826 Return all values in some field
830 TODO: order of values is probably same as in source data, need to investigate that
836 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
837 return unless (defined($rec) && defined($rec->{$f}));
838 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
839 if (ref($rec->{$f}) eq 'ARRAY') {
841 foreach my $h ( @{ $rec->{$f} } ) {
842 if (ref($h) eq 'HASH') {
843 push @out, ( _pack_subfields_hash( $h ) );
849 } elsif( defined($rec->{$f}) ) {
856 Return all values in specific field and subfield
864 return unless (defined($rec && $rec->{$f}));
866 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
868 if (ref($_->{$sf}) eq 'ARRAY') {
873 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
883 If rec() returns just single value, it will
884 return scalar, not array.
895 if ($#out == 0 && ! wantarray) {
906 Apply regex to some or all values
908 @v = regex( 's/foo/bar/g', @v );
915 #warn "r: $r\n", dump(\@_);
919 push @out, $t if ($t && $t ne '');
926 Prefix all values with a string
928 @v = prefix( 'my_', @v );
933 my $p = shift or return;
934 return map { $p . $_ } grep { defined($_) } @_;
939 suffix all values with a string
941 @v = suffix( '_my', @v );
946 my $s = shift or die "suffix needs string as first argument";
947 return map { $_ . $s } grep { defined($_) } @_;
952 surround all values with a two strings
954 @v = surround( 'prefix_', '_suffix', @v );
959 my $p = shift or die "surround need prefix as first argument";
960 my $s = shift or die "surround needs suffix as second argument";
961 return map { $p . $_ . $s } grep { defined($_) } @_;
979 Consult lookup hashes for some value
983 'ffkk/peri/mfn'.rec('000')
985 'ffkk','peri','200-a-200-e',
987 first(rec(200,'a')).' '.first(rec('200','e'))
991 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
992 normal lookup definition in C<conf/lookup/something.pl> which looks like:
995 # which results to return from record recorded in lookup
996 sub { 'ffkk/peri/mfn' . rec('000') },
997 # from which database and input
999 # such that following values match
1000 sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1001 # if this part is missing, we will try to match same fields
1002 # from lookup record and current one, or you can override
1003 # which records to use from current record using
1004 sub { rec('900','x') . ' ' . rec('900','y') },
1007 You can think about this lookup as SQL (if that helps):
1014 sub { filter from lookuped record }
1016 sub { optional filter on current record }
1023 my ($what, $database, $input, $key, $having) = @_;
1025 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1027 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1028 return unless (defined($lookup->{$database}->{$input}->{$key}));
1030 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1033 my @having = $having->();
1035 warn "## having = ", dump( @having ) if ($debug > 2);
1037 foreach my $h ( @having ) {
1038 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1039 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1040 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1044 return unless ($mfns);
1046 my @mfns = sort keys %$mfns;
1048 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1053 foreach my $mfn (@mfns) {
1054 $rec = $load_row_coderef->( $database, $input, $mfn );
1056 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1058 my @vals = $what->();
1060 push @out, ( @vals );
1062 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1065 # if (ref($lookup->{$k}) eq 'ARRAY') {
1066 # return @{ $lookup->{$k} };
1068 # return $lookup->{$k};
1073 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1082 =head2 save_into_lookup
1084 Save value into lookup. It associates current database, input
1085 and specific keys with one or more values which will be
1086 associated over MFN.
1088 MFN will be extracted from first occurence current of field 000
1089 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1091 my $nr = save_into_lookup($database,$input,$key,sub {
1092 # code which produce one or more values
1095 It returns number of items saved.
1097 This function shouldn't be called directly, it's called from code created by
1102 sub save_into_lookup {
1103 my ($database,$input,$key,$coderef) = @_;
1104 die "save_into_lookup needs database" unless defined($database);
1105 die "save_into_lookup needs input" unless defined($input);
1106 die "save_into_lookup needs key" unless defined($key);
1107 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1109 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1112 defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1113 defined($config->{_mfn}) ? $config->{_mfn} :
1114 die "mfn not defined or zero";
1118 foreach my $v ( $coderef->() ) {
1119 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1120 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1129 Consult config values stored in C<config.yml>
1131 # return database code (key under databases in yaml)
1132 $database_code = config(); # use _ from hash
1133 $database_name = config('name');
1134 $database_input_name = config('input name');
1135 $tag = config('input normalize tag');
1137 Up to three levels are supported.
1142 return unless ($config);
1150 warn "### getting config($p)\n" if ($debug > 1);
1152 my @p = split(/\s+/,$p);
1154 $v = $config->{ '_' }; # special, database code
1157 my $c = dclone( $config );
1159 foreach my $k (@p) {
1160 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1161 if (ref($c) eq 'ARRAY') {
1163 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1167 if (! defined($c->{$k}) ) {
1178 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1179 warn "config( '$p' ) is empty\n" if (! $v);
1186 Returns unique id of this record
1190 Returns C<42/2> for 2nd occurence of MFN 42.
1195 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1196 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1201 Joins walues with some delimiter
1203 $v = join_with(", ", @v);
1209 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1210 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1211 return '' unless defined($v);
1217 Split record subfield on some regex and take one of parts out
1219 $a_before_semi_column =
1220 split_rec_on('200','a', /\s*;\s*/, $part);
1222 C<$part> is optional number of element. First element is
1225 If there is no C<$part> parameter or C<$part> is 0, this function will
1226 return all values produced by splitting.
1231 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1233 my ($fld, $sf, $regex, $part) = @_;
1234 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1236 my @r = rec( $fld, $sf );
1238 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1240 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1242 my @s = split( $regex, $v );
1243 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1244 if ($part && $part > 0) {
1245 return $s[ $part - 1 ];