git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 our @EXPORT = qw/
   4         _set_ds _set_lookup
   5         _set_load_row
   6         _get_ds _clean_ds
   7         _debug
   8         _pack_subfields_hash
   9
  10         to
  11         search_display search display sorted
  12
  13         rec1 rec2 rec
  14         frec frec_eq frec_ne
  15         regex prefix suffix surround
  16         first lookup join_with
  17         save_into_lookup
  18
  19         split_rec_on
  20
  21         get set
  22         count
  23
  24 /;
  25
  26 use warnings;
  27 use strict;
  28
  29 #use base qw/WebPAC::Common/;
  30 use Data::Dump qw/dump/;
  31 use Carp qw/confess/;
  32
  33 # debugging warn(s)
  34 my $debug = 0;
  35 _debug( $debug );
  36
  37 # FIXME
  38 use WebPAC::Normalize::ISBN;
  39 push @EXPORT, ( 'isbn_10', 'isbn_13' );
  40
  41 use WebPAC::Normalize::MARC;
  42 push @EXPORT, ( qw/
  43         marc marc_indicators marc_repeatable_subfield
  44         marc_compose marc_leader marc_fixed
  45         marc_duplicate marc_remove marc_count
  46         marc_original_order
  47         marc_template
  48 /);
  49
  50 use Storable qw/dclone/;
  51
  52 =head1 NAME
  53
  54 WebPAC::Normalize - describe normalisaton rules using sets
  55
  56 =cut
  57
  58 our $VERSION = '0.36';
  59
  60 =head1 SYNOPSIS
  61
  62 This module uses C<conf/normalize/*.pl> files to perform normalisation
  63 from input records using perl functions which are specialized for set
  64 processing.
  65
  66 Sets are implemented as arrays, and normalisation file is valid perl, which
  67 means that you check it's validity before running WebPAC using
  68 C<perl -c normalize.pl>.
  69
  70 Normalisation can generate multiple output normalized data. For now, supported output
  71 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
  72 C<marc>.
  73
  74 =head1 FUNCTIONS
  75
  76 Functions which start with C<_> are private and used by WebPAC internally.
  77 All other functions are available for use within normalisation rules.
  78
  79 =head2 data_structure
  80
  81 Return data structure
  82
  83   my $ds = WebPAC::Normalize::data_structure(
  84         lookup => $lookup_hash,
  85         row => $row,
  86         rules => $normalize_pl_config,
  87         marc_encoding => 'utf-8',
  88         config => $config,
  89         load_row_coderef => sub {
  90                 my ($database,$input,$mfn) = @_;
  91                 $store->load_row( database => $database, input => $input, id => $mfn );
  92         },
  93   );
  94
  95 Options C<row>, C<rules> and C<log> are mandatory while all
  96 other are optional.
  97
  98 C<load_row_coderef> is closure only used when executing lookups, so they will
  99 die if it's not defined.
 100
 101 This function will B<die> if normalizastion can't be evaled.
 102
 103 Since this function isn't exported you have to call it with
 104 C<WebPAC::Normalize::data_structure>.
 105
 106 =cut
 107
 108 my $load_row_coderef;
 109
 110 sub data_structure {
 111         my $arg = {@_};
 112
 113         die "need row argument" unless ($arg->{row});
 114         die "need normalisation argument" unless ($arg->{rules});
 115
 116         _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
 117         _set_ds( $arg->{row} );
 118         _set_config( $arg->{config} ) if defined($arg->{config});
 119         _clean_ds( %{ $arg } );
 120         $load_row_coderef = $arg->{load_row_coderef};
 121
 122         no strict 'subs';
 123         no warnings 'redefine';
 124         eval "$arg->{rules};";
 125         die "error evaling $arg->{rules}: $@\n" if ($@);
 126
 127         return _get_ds();
 128 }
 129
 130 =head2 _set_ds
 131
 132 Set current record hash
 133
 134   _set_ds( $rec );
 135
 136 =cut
 137
 138 my $rec;
 139
 140 sub _set_ds {
 141         $rec = shift or die "no record hash";
 142         $WebPAC::Normalize::MARC::rec = $rec;
 143 }
 144
 145 =head2
 146
 147   my $rec = _get_rec();
 148
 149 =cut
 150
 151 sub _get_rec { $rec };
 152
 153 sub rec_array {
 154         my $d = $rec->{ $_[0] };
 155         return @$d if ref($d) eq 'ARRAY';
 156         die "field $_[0] not array: ",dump( $d );
 157 }
 158
 159 =head2 _set_config
 160
 161 Set current config hash
 162
 163   _set_config( $config );
 164
 165 Magic keys are:
 166
 167 =over 4
 168
 169 =item _
 170
 171 Code of current database
 172
 173 =item _mfn
 174
 175 Current MFN
 176
 177 =back
 178
 179 =cut
 180
 181 my $config;
 182
 183 sub _set_config {
 184         $config = shift;
 185 }
 186
 187 =head2 _get_ds
 188
 189 Return hash formatted as data structure
 190
 191   my $ds = _get_ds();
 192
 193 =cut
 194
 195 my $out;
 196
 197 sub _get_ds {
 198 #warn "## out = ",dump($out);
 199         return $out;
 200 }
 201
 202 =head2 _clean_ds
 203
 204 Clean data structure hash for next record
 205
 206   _clean_ds();
 207
 208 =cut
 209
 210 sub _clean_ds {
 211         my $a = {@_};
 212         $out = undef;
 213         WebPAC::Normalize::MARC::_clean();
 214 }
 215
 216 =head2 _set_lookup
 217
 218 Set current lookup hash
 219
 220   _set_lookup( $lookup );
 221
 222 =cut
 223
 224 my $lookup;
 225
 226 sub _set_lookup {
 227         $lookup = shift;
 228 }
 229
 230 =head2 _get_lookup
 231
 232 Get current lookup hash
 233
 234   my $lookup = _get_lookup();
 235
 236 =cut
 237
 238 sub _get_lookup {
 239         return $lookup;
 240 }
 241
 242 =head2 _set_load_row
 243
 244 Setup code reference which will return L<data_structure> from
 245 L<WebPAC::Store>
 246
 247   _set_load_row(sub {
 248                 my ($database,$input,$mfn) = @_;
 249                 $store->load_row( database => $database, input => $input, id => $mfn );
 250   });
 251
 252 =cut
 253
 254 sub _set_load_row {
 255         my $coderef = shift;
 256         confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
 257
 258         $load_row_coderef = $coderef;
 259 }
 260
 261 =head2 _debug
 262
 263 Change level of debug warnings
 264
 265   _debug( 2 );
 266
 267 =cut
 268
 269 sub _debug {
 270         my $l = shift;
 271         return $debug unless defined($l);
 272         warn "debug level $l",$/ if ($l > 0);
 273         $debug = $l;
 274         $WebPAC::Normalize::MARC::debug = $debug;
 275 }
 276
 277 =head1 Functions to create C<data_structure>
 278
 279 Those functions generally have to first in your normalization file.
 280
 281 =head2 to
 282
 283 Generic way to set values for some name
 284
 285   to('field-name', 'name-value' => rec('200','a') );
 286
 287 There are many helpers defined below which might be easier to use.
 288
 289 =cut
 290
 291 sub to {
 292         my $type = shift or confess "need type -- BUG?";
 293         my $name = shift or confess "needs name as first argument";
 294         my @o = grep { defined($_) && $_ ne '' } @_;
 295         return unless (@o);
 296         $out->{$name}->{$type} = \@o;
 297 }
 298
 299 =head2 search_display
 300
 301 Define output for L<search> and L<display> at the same time
 302
 303   search_display('Title', rec('200','a') );
 304
 305 =cut
 306
 307 sub search_display {
 308         my $name = shift or die "search_display needs name as first argument";
 309         my @o = grep { defined($_) && $_ ne '' } @_;
 310         return unless (@o);
 311         $out->{$name}->{search} = \@o;
 312         $out->{$name}->{display} = \@o;
 313 }
 314
 315 =head2 tag
 316
 317 Old name for L<search_display>, it will probably be removed at one point.
 318
 319 =cut
 320
 321 sub tag {
 322         search_display( @_ );
 323 }
 324
 325 =head2 display
 326
 327 Define output just for I<display>
 328
 329   @v = display('Title', rec('200','a') );
 330
 331 =cut
 332
 333 sub display { to( 'display', @_ ) }
 334
 335 =head2 search
 336
 337 Prepare values just for I<search>
 338
 339   @v = search('Title', rec('200','a') );
 340
 341 =cut
 342
 343 sub search { to( 'search', @_ ) }
 344
 345 =head2 sorted
 346
 347 Insert into lists which will be automatically sorted
 348
 349  sorted('Title', rec('200','a') );
 350
 351 =cut
 352
 353 sub sorted { to( 'sorted', @_ ) }
 354
 355 =head2 row
 356
 357 Insert new row of data into output module
 358
 359   row( column => 'foo', column2 => 'bar' );
 360
 361 =cut
 362
 363 use Data::Dump qw/dump/;
 364
 365 sub row {
 366         die "array doesn't have even number of elements but $#_: ",dump( @_ ) if $#_ % 2 != 1;
 367
 368         push @{ $out->{'_rows'} }, {@_};
 369 }
 370
 371
 372 =head1 Functions to extract data from input
 373
 374 This function should be used inside functions to create C<data_structure> described
 375 above.
 376
 377 =head2 _pack_subfields_hash
 378
 379  @subfields = _pack_subfields_hash( $h );
 380  $subfields = _pack_subfields_hash( $h, 1 );
 381
 382 Return each subfield value in array or pack them all together and return scalar
 383 with subfields (denoted by C<^>) and values.
 384
 385 =cut
 386
 387 sub _pack_subfields_hash {
 388
 389         warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
 390
 391         my ($hash,$include_subfields) = @_;
 392
 393         # sanity and ease of use
 394         return $hash if (ref($hash) ne 'HASH');
 395
 396         my $h = dclone( $hash );
 397
 398         if ( defined($h->{subfields}) ) {
 399                 my $sfs = delete $h->{subfields} || die "no subfields?";
 400                 my @out;
 401                 while (@$sfs) {
 402                         my $sf = shift @$sfs;
 403                         push @out, '^' . $sf if ($include_subfields);
 404                         my $o = shift @$sfs;
 405                         if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
 406                                 # single element subfields are not arrays
 407 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 408
 409                                 push @out, $h->{$sf};
 410                         } else {
 411 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 412                                 push @out, $h->{$sf}->[$o];
 413                         }
 414                 }
 415                 if ($include_subfields) {
 416                         return join('', @out);
 417                 } else {
 418                         return @out;
 419                 }
 420         } else {
 421                 if ($include_subfields) {
 422                         my $out = '';
 423                         foreach my $sf (sort keys %$h) {
 424                                 if (ref($h->{$sf}) eq 'ARRAY') {
 425                                         $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
 426                                 } else {
 427                                         $out .= '^' . $sf . $h->{$sf};
 428                                 }
 429                         }
 430                         return $out;
 431                 } else {
 432                         # FIXME this should probably be in alphabetical order instead of hash order
 433                         values %{$h};
 434                 }
 435         }
 436 }
 437
 438 =head2 rec1
 439
 440 Return all values in some field
 441
 442   @v = rec1('200')
 443
 444 TODO: order of values is probably same as in source data, need to investigate that
 445
 446 =cut
 447
 448 sub rec1 {
 449         my $f = shift;
 450         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 451         return unless (defined($rec) && defined($rec->{$f}));
 452         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 453         if (ref($rec->{$f}) eq 'ARRAY') {
 454                 my @out;
 455                 foreach my $h ( @{ $rec->{$f} } ) {
 456                         if (ref($h) eq 'HASH') {
 457                                 push @out, ( _pack_subfields_hash( $h ) );
 458                         } else {
 459                                 push @out, $h;
 460                         }
 461                 }
 462                 return @out;
 463         } elsif( defined($rec->{$f}) ) {
 464                 return $rec->{$f};
 465         }
 466 }
 467
 468 =head2 rec2
 469
 470 Return all values in specific field and subfield
 471
 472   @v = rec2('200','a')
 473
 474 =cut
 475
 476 sub rec2 {
 477         my $f = shift;
 478         return unless (defined($rec && $rec->{$f}));
 479         my $sf = shift;
 480         warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 481         return map {
 482                 if (ref($_->{$sf}) eq 'ARRAY') {
 483                         @{ $_->{$sf} };
 484                 } else {
 485                         $_->{$sf};
 486                 }
 487         } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
 488 }
 489
 490 =head2 rec
 491
 492 syntaxtic sugar for
 493
 494   @v = rec('200')
 495   @v = rec('200','a')
 496
 497 If rec() returns just single value, it will
 498 return scalar, not array.
 499
 500 =cut
 501
 502 sub rec {
 503         my @out;
 504         if ($#_ == 0) {
 505                 @out = rec1(@_);
 506         } elsif ($#_ == 1) {
 507                 @out = rec2(@_);
 508         }
 509         if ($#out == 0 && ! wantarray) {
 510                 return $out[0];
 511         } elsif (@out) {
 512                 return @out;
 513         } else {
 514                 return '';
 515         }
 516 }
 517
 518 =head2 frec
 519
 520 Returns first value from field
 521
 522   $v = frec('200');
 523   $v = frec('200','a');
 524
 525 =cut
 526
 527 sub frec {
 528         my @out = rec(@_);
 529         warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
 530         return shift @out;
 531 }
 532
 533 =head2 frec_eq
 534
 535 =head2 frec_ne
 536
 537 Check if first values from two fields are same or different
 538
 539   if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
 540         # values are same
 541   } else {
 542     # values are different
 543   }
 544
 545 Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
 546 could write something like:
 547
 548   if ( frec( '900','a' ) eq frec( '910','c' ) ) {
 549         # yada tada
 550   }
 551
 552 but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
 553 in order to parse text and create invalid function C<eqfrec>.
 554
 555 =cut
 556
 557 sub frec_eq {
 558         my ( $f1,$sf1, $f2, $sf2 ) = @_;
 559         return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
 560 }
 561
 562 sub frec_ne {
 563         return ! frec_eq( @_ );
 564 }
 565
 566 =head2 regex
 567
 568 Apply regex to some or all values
 569
 570   @v = regex( 's/foo/bar/g', @v );
 571
 572 =cut
 573
 574 sub regex {
 575         my $r = shift;
 576         my @out;
 577         #warn "r: $r\n", dump(\@_);
 578         foreach my $t (@_) {
 579                 next unless ($t);
 580                 eval "\$t =~ $r";
 581                 push @out, $t if ($t && $t ne '');
 582         }
 583         return @out;
 584 }
 585
 586 =head2 prefix
 587
 588 Prefix all values with a string
 589
 590   @v = prefix( 'my_', @v );
 591
 592 =cut
 593
 594 sub prefix {
 595         my $p = shift;
 596         return @_ unless defined( $p );
 597         return map { $p . $_ } grep { defined($_) } @_;
 598 }
 599
 600 =head2 suffix
 601
 602 suffix all values with a string
 603
 604   @v = suffix( '_my', @v );
 605
 606 =cut
 607
 608 sub suffix {
 609         my $s = shift;
 610         return @_ unless defined( $s );
 611         return map { $_ . $s } grep { defined($_) } @_;
 612 }
 613
 614 =head2 surround
 615
 616 surround all values with a two strings
 617
 618   @v = surround( 'prefix_', '_suffix', @v );
 619
 620 =cut
 621
 622 sub surround {
 623         my $p = shift;
 624         my $s = shift;
 625         $p = '' unless defined( $p );
 626         $s = '' unless defined( $s );
 627         return map { $p . $_ . $s } grep { defined($_) } @_;
 628 }
 629
 630 =head2 first
 631
 632 Return first element
 633
 634   $v = first( @v );
 635
 636 =cut
 637
 638 sub first {
 639         my $r = shift;
 640         return $r;
 641 }
 642
 643 =head2 lookup
 644
 645 Consult lookup hashes for some value
 646
 647   @v = lookup(
 648         sub {
 649                 'ffkk/peri/mfn'.rec('000')
 650         },
 651         'ffkk','peri','200-a-200-e',
 652         sub {
 653                 first(rec(200,'a')).' '.first(rec('200','e'))
 654         }
 655   );
 656
 657 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
 658 normal lookup definition in C<conf/lookup/something.pl> which looks like:
 659
 660   lookup(
 661         # which results to return from record recorded in lookup
 662         sub { 'ffkk/peri/mfn' . rec('000') },
 663         # from which database and input
 664         'ffkk','peri',
 665         # such that following values match
 666         sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
 667         # if this part is missing, we will try to match same fields
 668         # from lookup record and current one, or you can override
 669         # which records to use from current record using
 670         sub { rec('900','x') . ' ' . rec('900','y') },
 671   )
 672
 673 You can think about this lookup as SQL (if that helps):
 674
 675   select
 676         sub { what }
 677   from
 678         database, input
 679   where
 680     sub { filter from lookuped record }
 681   having
 682     sub { optional filter on current record }
 683
 684 Easy as pie, right?
 685
 686 =cut
 687
 688 sub lookup {
 689         my ($what, $database, $input, $key, $having) = @_;
 690
 691         confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
 692
 693         warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
 694         return unless (defined($lookup->{$database}->{$input}->{$key}));
 695
 696         confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
 697
 698         my $mfns;
 699         my @having = $having->();
 700
 701         warn "## having = ", dump( @having ) if ($debug > 2);
 702
 703         foreach my $h ( @having ) {
 704                 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
 705                         warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
 706                         $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
 707                 }
 708         }
 709
 710         return unless ($mfns);
 711
 712         my @mfns = sort keys %$mfns;
 713
 714         warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
 715
 716         my $old_rec = $rec;
 717         my @out;
 718
 719         foreach my $mfn (@mfns) {
 720                 $rec = $load_row_coderef->( $database, $input, $mfn );
 721
 722                 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
 723
 724                 my @vals = $what->();
 725
 726                 push @out, ( @vals );
 727
 728                 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
 729         }
 730
 731 #       if (ref($lookup->{$k}) eq 'ARRAY') {
 732 #               return @{ $lookup->{$k} };
 733 #       } else {
 734 #               return $lookup->{$k};
 735 #       }
 736
 737         $rec = $old_rec;
 738
 739         warn "## lookup returns = ", dump(@out), $/ if ($debug);
 740
 741         if ($#out == 0) {
 742                 return $out[0];
 743         } else {
 744                 return @out;
 745         }
 746 }
 747
 748 =head2 save_into_lookup
 749
 750 Save value into lookup. It associates current database, input
 751 and specific keys with one or more values which will be
 752 associated over MFN.
 753
 754 MFN will be extracted from first occurence current of field 000
 755 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
 756
 757   my $nr = save_into_lookup($database,$input,$key,sub {
 758         # code which produce one or more values
 759   });
 760
 761 It returns number of items saved.
 762
 763 This function shouldn't be called directly, it's called from code created by
 764 L<WebPAC::Parser>.
 765
 766 =cut
 767
 768 sub save_into_lookup {
 769         my ($database,$input,$key,$coderef) = @_;
 770         die "save_into_lookup needs database" unless defined($database);
 771         die "save_into_lookup needs input" unless defined($input);
 772         die "save_into_lookup needs key" unless defined($key);
 773         die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
 774
 775         warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
 776
 777         my $mfn =
 778                 defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
 779                 defined($config->{_mfn})        ?       $config->{_mfn}         :
 780                                                                                 die "mfn not defined or zero";
 781
 782         my $nr = 0;
 783
 784         foreach my $v ( $coderef->() ) {
 785                 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
 786                 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
 787                 $nr++;
 788         }
 789
 790         return $nr;
 791 }
 792
 793 =head2 config
 794
 795 Consult config values stored in C<config.yml>
 796
 797   # return database code (key under databases in yaml)
 798   $database_code = config();    # use _ from hash
 799   $database_name = config('name');
 800   $database_input_name = config('input name');
 801
 802 Up to three levels are supported.
 803
 804 =cut
 805
 806 sub config {
 807         return unless ($config);
 808
 809         my $p = shift;
 810
 811         $p ||= '';
 812
 813         my $v;
 814
 815         warn "### getting config($p)\n" if ($debug > 1);
 816
 817         my @p = split(/\s+/,$p);
 818         if ($#p < 0) {
 819                 $v = $config->{ '_' };  # special, database code
 820         } else {
 821
 822                 my $c = dclone( $config );
 823
 824                 foreach my $k (@p) {
 825                         warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
 826                         if (ref($c) eq 'ARRAY') {
 827                                 $c = shift @$c;
 828                                 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
 829                                 last;
 830                         }
 831
 832                         if (! defined($c->{$k}) ) {
 833                                 $c = undef;
 834                                 last;
 835                         } else {
 836                                 $c = $c->{$k};
 837                         }
 838                 }
 839                 $v = $c if ($c);
 840
 841         }
 842
 843         warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
 844         warn "config( '$p' ) is empty\n" if (! $v);
 845
 846         return $v;
 847 }
 848
 849 =head2 id
 850
 851 Returns unique id of this record
 852
 853   $id = id();
 854
 855 Returns C<42/2> for 2nd occurence of MFN 42.
 856
 857 =cut
 858
 859 sub id {
 860         my $mfn = $config->{_mfn} || die "no _mfn in config data";
 861         return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
 862 }
 863
 864 =head2 join_with
 865
 866 Joins walues with some delimiter
 867
 868   $v = join_with(", ", @v);
 869
 870 =cut
 871
 872 sub join_with {
 873         my $d = shift;
 874         warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
 875         my $v = join($d, grep { defined($_) && $_ ne '' } @_);
 876         return '' unless defined($v);
 877         return $v;
 878 }
 879
 880 =head2 split_rec_on
 881
 882 Split record subfield on some regex and take one of parts out
 883
 884   $a_before_semi_column =
 885         split_rec_on('200','a', /\s*;\s*/, $part);
 886
 887 C<$part> is optional number of element. First element is
 888 B<1>, not 0!
 889
 890 If there is no C<$part> parameter or C<$part> is 0, this function will
 891 return all values produced by splitting.
 892
 893 =cut
 894
 895 sub split_rec_on {
 896         die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
 897
 898         my ($fld, $sf, $regex, $part) = @_;
 899         warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
 900
 901         my @r = rec( $fld, $sf );
 902         my $v = shift @r;
 903         warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
 904
 905         return '' if ( ! defined($v) || $v =~ /^\s*$/);
 906
 907         my @s = split( $regex, $v );
 908         warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
 909         if ($part && $part > 0) {
 910                 return $s[ $part - 1 ];
 911         } else {
 912                 return @s;
 913         }
 914 }
 915
 916 my $hash;
 917
 918 =head2 set
 919
 920   set( key => 'value' );
 921
 922 =cut
 923
 924 sub set {
 925         my ($k,$v) = @_;
 926         warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
 927         $hash->{$k} = $v;
 928 };
 929
 930 =head2 get
 931
 932   get( 'key' );
 933
 934 =cut
 935
 936 sub get {
 937         my $k = shift || return;
 938         my $v = $hash->{$k};
 939         warn "## get $k = ", dump( $v ), $/ if ( $debug );
 940         return $v;
 941 }
 942
 943 =head2 count
 944
 945   if ( count( @result ) == 1 ) {
 946         # do something if only 1 result is there
 947   }
 948
 949 =cut
 950
 951 sub count {
 952         warn "## count ",dump(@_),$/ if ( $debug );
 953         return @_ . '';
 954 }
 955
 956 # END
 957 1;