git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 @EXPORT = qw/
   4         _set_rec _set_lookup
   5         _get_ds _clean_ds
   6         _debug
   7
   8         tag search display
   9         marc marc_indicators marc_repeatable_subfield
  10         marc_compose marc_leader
  11         marc_duplicate marc_remove
  12
  13         rec1 rec2 rec
  14         regex prefix suffix surround
  15         first lookup join_with
  16
  17         split_rec_on
  18 /;
  19
  20 use warnings;
  21 use strict;
  22
  23 #use base qw/WebPAC::Common/;
  24 use Data::Dump qw/dump/;
  25 use Encode qw/from_to/;
  26 use Storable qw/dclone/;
  27
  28 # debugging warn(s)
  29 my $debug = 0;
  30
  31
  32 =head1 NAME
  33
  34 WebPAC::Normalize - describe normalisaton rules using sets
  35
  36 =head1 VERSION
  37
  38 Version 0.11
  39
  40 =cut
  41
  42 our $VERSION = '0.11';
  43
  44 =head1 SYNOPSIS
  45
  46 This module uses C<conf/normalize/*.pl> files to perform normalisation
  47 from input records using perl functions which are specialized for set
  48 processing.
  49
  50 Sets are implemented as arrays, and normalisation file is valid perl, which
  51 means that you check it's validity before running WebPAC using
  52 C<perl -c normalize.pl>.
  53
  54 Normalisation can generate multiple output normalized data. For now, supported output
  55 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
  56 C<marc>.
  57
  58 =head1 FUNCTIONS
  59
  60 Functions which start with C<_> are private and used by WebPAC internally.
  61 All other functions are available for use within normalisation rules.
  62
  63 =head2 data_structure
  64
  65 Return data structure
  66
  67   my $ds = WebPAC::Normalize::data_structure(
  68         lookup => $lookup->lookup_hash,
  69         row => $row,
  70         rules => $normalize_pl_config,
  71         marc_encoding => 'utf-8',
  72   );
  73
  74 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
  75 other are optional.
  76
  77 This function will B<die> if normalizastion can't be evaled.
  78
  79 Since this function isn't exported you have to call it with
  80 C<WebPAC::Normalize::data_structure>.
  81
  82 =cut
  83
  84 sub data_structure {
  85         my $arg = {@_};
  86
  87         die "need row argument" unless ($arg->{row});
  88         die "need normalisation argument" unless ($arg->{rules});
  89
  90         no strict 'subs';
  91         _set_lookup( $arg->{lookup} );
  92         _set_rec( $arg->{row} );
  93         _clean_ds( %{ $arg } );
  94         eval "$arg->{rules}";
  95         die "error evaling $arg->{rules}: $@\n" if ($@);
  96
  97         return _get_ds();
  98 }
  99
 100 =head2 _set_rec
 101
 102 Set current record hash
 103
 104   _set_rec( $rec );
 105
 106 =cut
 107
 108 my $rec;
 109
 110 sub _set_rec {
 111         $rec = shift or die "no record hash";
 112 }
 113
 114 =head2 _get_ds
 115
 116 Return hash formatted as data structure
 117
 118   my $ds = _get_ds();
 119
 120 =cut
 121
 122 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
 123 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
 124
 125 sub _get_ds {
 126         return $out;
 127 }
 128
 129 =head2 _clean_ds
 130
 131 Clean data structure hash for next record
 132
 133   _clean_ds();
 134
 135 =cut
 136
 137 sub _clean_ds {
 138         my $a = {@_};
 139         ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
 140         ($marc_record_offset, $marc_fetch_offset) = (0,0);
 141         $marc_encoding = $a->{marc_encoding};
 142 }
 143
 144 =head2 _set_lookup
 145
 146 Set current lookup hash
 147
 148   _set_lookup( $lookup );
 149
 150 =cut
 151
 152 my $lookup;
 153
 154 sub _set_lookup {
 155         $lookup = shift;
 156 }
 157
 158 =head2 _get_marc_fields
 159
 160 Get all fields defined by calls to C<marc>
 161
 162         $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
 163
 164 We are using I<magic> which detect repeatable fields only from
 165 sequence of field/subfield data generated by normalization.
 166
 167 Repeatable field is created when there is second occurence of same subfield or
 168 if any of indicators are different.
 169
 170 This is sane for most cases. Something like:
 171
 172   900a-1 900b-1 900c-1
 173   900a-2 900b-2
 174   900a-3
 175
 176 will be created from any combination of:
 177
 178   900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
 179
 180 and following rules:
 181
 182   marc('900','a', rec('200','a') );
 183   marc('900','b', rec('200','b') );
 184   marc('900','c', rec('200','c') );
 185
 186 which might not be what you have in mind. If you need repeatable subfield,
 187 define it using C<marc_repeatable_subfield> like this:
 188
 189   marc_repeatable_subfield('900','a');
 190   marc('900','a', rec('200','a') );
 191   marc('900','b', rec('200','b') );
 192   marc('900','c', rec('200','c') );
 193
 194 will create:
 195
 196   900a-1 900a-2 900a-3 900b-1 900c-1
 197   900b-2
 198
 199 There is also support for returning next or specific using:
 200
 201   while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
 202         # do something with $mf
 203   }
 204
 205 will always return fields from next MARC record or
 206
 207   my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
 208
 209 will return 42th copy record (if it exists).
 210
 211 =cut
 212
 213 sub _get_marc_fields {
 214
 215         my $arg = {@_};
 216         warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
 217         my $offset = $marc_fetch_offset;
 218         if ($arg->{offset}) {
 219                 $offset = $arg->{offset};
 220         } elsif($arg->{fetch_next}) {
 221                 $marc_fetch_offset++;
 222         }
 223
 224         return if (! $marc_record || ref($marc_record) ne 'ARRAY');
 225
 226         warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
 227
 228         my $marc_rec = $marc_record->[ $offset ];
 229
 230         warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
 231
 232         return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
 233
 234         # first, sort all existing fields
 235         # XXX might not be needed, but modern perl might randomize elements in hash
 236         my @sorted_marc_record = sort {
 237                 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
 238         } @{ $marc_rec };
 239
 240         @sorted_marc_record = @{ $marc_rec };   ### FIXME disable sorting
 241
 242         # output marc fields
 243         my @m;
 244
 245         # count unique field-subfields (used for offset when walking to next subfield)
 246         my $u;
 247         map { $u->{ $_->[0] . ( $_->[3] || '')  }++ } @sorted_marc_record;
 248
 249         if ($debug) {
 250                 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
 251                 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
 252                 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
 253                 warn "## subfield count = ", dump( $u ), $/;
 254         }
 255
 256         my $len = $#sorted_marc_record;
 257         my $visited;
 258         my $i = 0;
 259         my $field;
 260
 261         foreach ( 0 .. $len ) {
 262
 263                 # find next element which isn't visited
 264                 while ($visited->{$i}) {
 265                         $i = ($i + 1) % ($len + 1);
 266                 }
 267
 268                 # mark it visited
 269                 $visited->{$i}++;
 270
 271                 my $row = dclone( $sorted_marc_record[$i] );
 272
 273                 # field and subfield which is key for
 274                 # marc_repeatable_subfield and u
 275                 my $fsf = $row->[0] . ( $row->[3] || '' );
 276
 277                 if ($debug > 1) {
 278
 279                         print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
 280                         print "### this [$i]: ", dump( $row ),$/;
 281                         print "### sf: ", $row->[3], " vs ", $field->[3],
 282                                 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
 283                                 if ($#$field >= 0);
 284
 285                 }
 286
 287                 # if field exists
 288                 if ( $#$field >= 0 ) {
 289                         if (
 290                                 $row->[0] ne $field->[0] ||             # field
 291                                 $row->[1] ne $field->[1] ||             # i1
 292                                 $row->[2] ne $field->[2]                # i2
 293                         ) {
 294                                 push @m, $field;
 295                                 warn "## saved/1 ", dump( $field ),$/ if ($debug);
 296                                 $field = $row;
 297
 298                         } elsif (
 299                                 ( $row->[3] lt $field->[-2] )           # subfield which is not next (e.g. a after c)
 300                                 ||
 301                                 ( $row->[3] eq $field->[-2] &&          # same subfield, but not repeatable
 302                                         ! $marc_repeatable_subfield->{ $fsf }
 303                                 )
 304                         ) {
 305                                 push @m, $field;
 306                                 warn "## saved/2 ", dump( $field ),$/ if ($debug);
 307                                 $field = $row;
 308
 309                         } else {
 310                                 # append new subfields to existing field
 311                                 push @$field, ( $row->[3], $row->[4] );
 312                         }
 313                 } else {
 314                         # insert first field
 315                         $field = $row;
 316                 }
 317
 318                 if (! $marc_repeatable_subfield->{ $fsf }) {
 319                         # make step to next subfield
 320                         $i = ($i + $u->{ $fsf } ) % ($len + 1);
 321                 }
 322         }
 323
 324         if ($#$field >= 0) {
 325                 push @m, $field;
 326                 warn "## saved/3 ", dump( $field ),$/ if ($debug);
 327         }
 328
 329         return \@m;
 330 }
 331
 332 =head2 _debug
 333
 334 Change level of debug warnings
 335
 336   _debug( 2 );
 337
 338 =cut
 339
 340 sub _debug {
 341         my $l = shift;
 342         return $debug unless defined($l);
 343         warn "debug level $l",$/ if ($l > 0);
 344         $debug = $l;
 345 }
 346
 347 =head1 Functions to create C<data_structure>
 348
 349 Those functions generally have to first in your normalization file.
 350
 351 =head2 tag
 352
 353 Define new tag for I<search> and I<display>.
 354
 355   tag('Title', rec('200','a') );
 356
 357
 358 =cut
 359
 360 sub tag {
 361         my $name = shift or die "tag needs name as first argument";
 362         my @o = grep { defined($_) && $_ ne '' } @_;
 363         return unless (@o);
 364         $out->{$name}->{tag} = $name;
 365         $out->{$name}->{search} = \@o;
 366         $out->{$name}->{display} = \@o;
 367 }
 368
 369 =head2 display
 370
 371 Define tag just for I<display>
 372
 373   @v = display('Title', rec('200','a') );
 374
 375 =cut
 376
 377 sub display {
 378         my $name = shift or die "display needs name as first argument";
 379         my @o = grep { defined($_) && $_ ne '' } @_;
 380         return unless (@o);
 381         $out->{$name}->{tag} = $name;
 382         $out->{$name}->{display} = \@o;
 383 }
 384
 385 =head2 search
 386
 387 Prepare values just for I<search>
 388
 389   @v = search('Title', rec('200','a') );
 390
 391 =cut
 392
 393 sub search {
 394         my $name = shift or die "search needs name as first argument";
 395         my @o = grep { defined($_) && $_ ne '' } @_;
 396         return unless (@o);
 397         $out->{$name}->{tag} = $name;
 398         $out->{$name}->{search} = \@o;
 399 }
 400
 401 =head2 marc_leader
 402
 403 Setup fields within MARC leader or get leader
 404
 405   marc_leader('05','c');
 406   my $leader = marc_leader();
 407
 408 =cut
 409
 410 sub marc_leader {
 411         my ($offset,$value) = @_;
 412
 413         if ($offset) {
 414                 $out->{' leader'}->{ $offset } = $value;
 415         } else {
 416                 return $out->{' leader'};
 417         }
 418 }
 419
 420 =head2 marc
 421
 422 Save value for MARC field
 423
 424   marc('900','a', rec('200','a') );
 425   marc('001', rec('000') );
 426
 427 =cut
 428
 429 sub marc {
 430         my $f = shift or die "marc needs field";
 431         die "marc field must be numer" unless ($f =~ /^\d+$/);
 432
 433         my $sf;
 434         if ($f >= 10) {
 435                 $sf = shift or die "marc needs subfield";
 436         }
 437
 438         foreach (@_) {
 439                 my $v = $_;             # make var read-write for Encode
 440                 next unless (defined($v) && $v !~ /^\s*$/);
 441                 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
 442                 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
 443                 if (defined $sf) {
 444                         push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
 445                 } else {
 446                         push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
 447                 }
 448         }
 449 }
 450
 451 =head2 marc_repeatable_subfield
 452
 453 Save values for MARC repetable subfield
 454
 455   marc_repeatable_subfield('910', 'z', rec('909') );
 456
 457 =cut
 458
 459 sub marc_repeatable_subfield {
 460         my ($f,$sf) = @_;
 461         die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
 462         $marc_repeatable_subfield->{ $f . $sf }++;
 463         marc(@_);
 464 }
 465
 466 =head2 marc_indicators
 467
 468 Set both indicators for MARC field
 469
 470   marc_indicators('900', ' ', 1);
 471
 472 Any indicator value other than C<0-9> will be treated as undefined.
 473
 474 =cut
 475
 476 sub marc_indicators {
 477         my $f = shift || die "marc_indicators need field!\n";
 478         my ($i1,$i2) = @_;
 479         die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
 480         die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
 481
 482         $i1 = ' ' if ($i1 !~ /^\d$/);
 483         $i2 = ' ' if ($i2 !~ /^\d$/);
 484         @{ $marc_indicators->{$f} } = ($i1,$i2);
 485 }
 486
 487 =head2 marc_compose
 488
 489 Save values for each MARC subfield explicitly
 490
 491   marc_compose('900',
 492         'a', rec('200','a')
 493         'b', rec('201','a')
 494         'a', rec('200','b')
 495         'c', rec('200','c')
 496   );
 497
 498 =cut
 499
 500 sub marc_compose {
 501         my $f = shift or die "marc_compose needs field";
 502         die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
 503
 504         my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
 505         my $m = [ $f, $i1, $i2 ];
 506
 507         while (@_) {
 508                 my $sf = shift or die "marc_compose $f needs subfield";
 509                 my $v = shift;
 510
 511                 next unless (defined($v) && $v !~ /^\s*$/);
 512                 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
 513                 push @$m, ( $sf, $v );
 514                 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
 515         }
 516
 517         warn "## marc_compose(d) ", dump( $m ),$/ if ($debug > 1);
 518
 519         push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
 520 }
 521
 522 =head2 marc_duplicate
 523
 524 Generate copy of current MARC record and continue working on copy
 525
 526   marc_duplicate();
 527
 528 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
 529 C<< _get_marc_fields( offset => 42 ) >>.
 530
 531 =cut
 532
 533 sub marc_duplicate {
 534          my $m = $marc_record->[ -1 ];
 535          die "can't duplicate record which isn't defined" unless ($m);
 536          push @{ $marc_record }, dclone( $m );
 537          warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
 538          $marc_record_offset = $#{ $marc_record };
 539          warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
 540 }
 541
 542 =head2 marc_remove
 543
 544 Remove some field or subfield from MARC record.
 545
 546   marc_remove('200');
 547   marc_remove('200','a');
 548
 549 This will erase field C<200> or C<200^a> from current MARC record.
 550
 551 This is useful after calling C<marc_duplicate> or on it's own (but, you
 552 should probably just remove that subfield definition if you are not
 553 using C<marc_duplicate>).
 554
 555 FIXME: support fields < 10.
 556
 557 =cut
 558
 559 sub marc_remove {
 560         my ($f, $sf) = @_;
 561
 562         die "marc_remove needs record number" unless defined($f);
 563
 564         my $marc = $marc_record->[ $marc_record_offset ];
 565
 566         warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
 567
 568         my $i = 0;
 569         foreach ( 0 .. $#{ $marc } ) {
 570                 last unless (defined $marc->[$i]);
 571                 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
 572                 if ($marc->[$i]->[0] eq $f) {
 573                         if (! defined $sf) {
 574                                 # remove whole field
 575                                 splice @$marc, $i, 1;
 576                                 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
 577                                 $i--;
 578                         } else {
 579                                 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
 580                                         my $o = ($j * 2) + 3;
 581                                         if ($marc->[$i]->[$o] eq $sf) {
 582                                                 # remove subfield
 583                                                 splice @{$marc->[$i]}, $o, 2;
 584                                                 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
 585                                                 # is record now empty?
 586                                                 if ($#{ $marc->[$i] } == 2) {
 587                                                         splice @$marc, $i, 1;
 588                                                         warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
 589                                                         $i--;
 590                                                 };
 591                                         }
 592                                 }
 593                         }
 594                 }
 595                 $i++;
 596         }
 597
 598         warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
 599
 600         $marc_record->[ $marc_record_offset ] = $marc;
 601
 602         warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
 603 }
 604
 605 =head1 Functions to extract data from input
 606
 607 This function should be used inside functions to create C<data_structure> described
 608 above.
 609
 610 =head2 rec1
 611
 612 Return all values in some field
 613
 614   @v = rec1('200')
 615
 616 TODO: order of values is probably same as in source data, need to investigate that
 617
 618 =cut
 619
 620 sub rec1 {
 621         my $f = shift;
 622         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 623         return unless (defined($rec) && defined($rec->{$f}));
 624         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 625         if (ref($rec->{$f}) eq 'ARRAY') {
 626                 return map {
 627                         if (ref($_) eq 'HASH') {
 628                                 values %{$_};
 629                         } else {
 630                                 $_;
 631                         }
 632                 } @{ $rec->{$f} };
 633         } elsif( defined($rec->{$f}) ) {
 634                 return $rec->{$f};
 635         }
 636 }
 637
 638 =head2 rec2
 639
 640 Return all values in specific field and subfield
 641
 642   @v = rec2('200','a')
 643
 644 =cut
 645
 646 sub rec2 {
 647         my $f = shift;
 648         return unless (defined($rec && $rec->{$f}));
 649         my $sf = shift;
 650         return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
 651 }
 652
 653 =head2 rec
 654
 655 syntaxtic sugar for
 656
 657   @v = rec('200')
 658   @v = rec('200','a')
 659
 660 =cut
 661
 662 sub rec {
 663         if ($#_ == 0) {
 664                 return rec1(@_);
 665         } elsif ($#_ == 1) {
 666                 return rec2(@_);
 667         }
 668 }
 669
 670 =head2 regex
 671
 672 Apply regex to some or all values
 673
 674   @v = regex( 's/foo/bar/g', @v );
 675
 676 =cut
 677
 678 sub regex {
 679         my $r = shift;
 680         my @out;
 681         #warn "r: $r\n", dump(\@_);
 682         foreach my $t (@_) {
 683                 next unless ($t);
 684                 eval "\$t =~ $r";
 685                 push @out, $t if ($t && $t ne '');
 686         }
 687         return @out;
 688 }
 689
 690 =head2 prefix
 691
 692 Prefix all values with a string
 693
 694   @v = prefix( 'my_', @v );
 695
 696 =cut
 697
 698 sub prefix {
 699         my $p = shift or die "prefix needs string as first argument";
 700         return map { $p . $_ } grep { defined($_) } @_;
 701 }
 702
 703 =head2 suffix
 704
 705 suffix all values with a string
 706
 707   @v = suffix( '_my', @v );
 708
 709 =cut
 710
 711 sub suffix {
 712         my $s = shift or die "suffix needs string as first argument";
 713         return map { $_ . $s } grep { defined($_) } @_;
 714 }
 715
 716 =head2 surround
 717
 718 surround all values with a two strings
 719
 720   @v = surround( 'prefix_', '_suffix', @v );
 721
 722 =cut
 723
 724 sub surround {
 725         my $p = shift or die "surround need prefix as first argument";
 726         my $s = shift or die "surround needs suffix as second argument";
 727         return map { $p . $_ . $s } grep { defined($_) } @_;
 728 }
 729
 730 =head2 first
 731
 732 Return first element
 733
 734   $v = first( @v );
 735
 736 =cut
 737
 738 sub first {
 739         my $r = shift;
 740         return $r;
 741 }
 742
 743 =head2 lookup
 744
 745 Consult lookup hashes for some value
 746
 747   @v = lookup( $v );
 748   @v = lookup( @v );
 749
 750 =cut
 751
 752 sub lookup {
 753         my $k = shift or return;
 754         return unless (defined($lookup->{$k}));
 755         if (ref($lookup->{$k}) eq 'ARRAY') {
 756                 return @{ $lookup->{$k} };
 757         } else {
 758                 return $lookup->{$k};
 759         }
 760 }
 761
 762 =head2 join_with
 763
 764 Joins walues with some delimiter
 765
 766   $v = join_with(", ", @v);
 767
 768 =cut
 769
 770 sub join_with {
 771         my $d = shift;
 772         return join($d, grep { defined($_) && $_ ne '' } @_);
 773 }
 774
 775 =head2 split_rec_on
 776
 777 Split record subfield on some regex and take one of parts out
 778
 779   $a_before_semi_column =
 780         split_rec_on('200','a', /\s*;\s*/, $part);
 781
 782 C<$part> is optional number of element. First element is
 783 B<1>, not 0!
 784
 785 If there is no C<$part> parameter or C<$part> is 0, this function will
 786 return all values produced by splitting.
 787
 788 =cut
 789
 790 sub split_rec_on {
 791         die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
 792
 793         my ($fld, $sf, $regex, $part) = @_;
 794         warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
 795
 796         my @r = rec( $fld, $sf );
 797         my $v = shift @r;
 798         warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
 799
 800         return '' if( ! defined($v) || $v =~ /^\s*$/);
 801
 802         my @s = split( $regex, $v );
 803         warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
 804         if ($part && $part > 0) {
 805                 return $s[ $part - 1 ];
 806         } else {
 807                 return @s;
 808         }
 809 }
 810
 811 # END
 812 1;