git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 our @EXPORT = qw/
   4         _set_ds _set_lookup
   5         _set_load_row
   6         _get_ds _clean_ds
   7         _debug
   8         _pack_subfields_hash
   9
  10         to
  11         search_display search display sorted
  12
  13         rec1 rec2 rec
  14         frec frec_eq frec_ne
  15         regex prefix suffix surround
  16         first lookup join_with
  17         save_into_lookup
  18
  19         split_rec_on
  20
  21         get set
  22         count
  23
  24         row
  25         rec_array
  26
  27 /;
  28
  29 use warnings;
  30 use strict;
  31
  32 #use base qw/WebPAC::Common/;
  33 use Data::Dump qw/dump/;
  34 use Carp qw/confess/;
  35
  36 # debugging warn(s)
  37 my $debug = 0;
  38 _debug( $debug );
  39
  40 # FIXME
  41 use WebPAC::Normalize::ISBN;
  42 push @EXPORT, ( 'isbn_10', 'isbn_13' );
  43
  44 use WebPAC::Normalize::MARC;
  45 push @EXPORT, ( qw/
  46         marc marc_indicators marc_repeatable_subfield
  47         marc_compose marc_leader marc_fixed
  48         marc_duplicate marc_remove marc_count
  49         marc_original_order
  50         marc_template
  51 /);
  52
  53 use Storable qw/dclone/;
  54
  55 =head1 NAME
  56
  57 WebPAC::Normalize - describe normalisaton rules using sets
  58
  59 =cut
  60
  61 our $VERSION = '0.36';
  62
  63 =head1 SYNOPSIS
  64
  65 This module uses C<conf/normalize/*.pl> files to perform normalisation
  66 from input records using perl functions which are specialized for set
  67 processing.
  68
  69 Sets are implemented as arrays, and normalisation file is valid perl, which
  70 means that you check it's validity before running WebPAC using
  71 C<perl -c normalize.pl>.
  72
  73 Normalisation can generate multiple output normalized data. For now, supported output
  74 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
  75 C<marc>.
  76
  77 =head1 FUNCTIONS
  78
  79 Functions which start with C<_> are private and used by WebPAC internally.
  80 All other functions are available for use within normalisation rules.
  81
  82 =head2 data_structure
  83
  84 Return data structure
  85
  86   my $ds = WebPAC::Normalize::data_structure(
  87         lookup => $lookup_hash,
  88         row => $row,
  89         rules => $normalize_pl_config,
  90         marc_encoding => 'utf-8',
  91         config => $config,
  92         load_row_coderef => sub {
  93                 my ($database,$input,$mfn) = @_;
  94                 $store->load_row( database => $database, input => $input, id => $mfn );
  95         },
  96   );
  97
  98 Options C<row>, C<rules> and C<log> are mandatory while all
  99 other are optional.
 100
 101 C<load_row_coderef> is closure only used when executing lookups, so they will
 102 die if it's not defined.
 103
 104 This function will B<die> if normalizastion can't be evaled.
 105
 106 Since this function isn't exported you have to call it with
 107 C<WebPAC::Normalize::data_structure>.
 108
 109 =cut
 110
 111 my $load_row_coderef;
 112
 113 sub data_structure {
 114         my $arg = {@_};
 115
 116         die "need row argument" unless ($arg->{row});
 117         die "need normalisation argument" unless ($arg->{rules});
 118
 119         _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
 120         _set_ds( $arg->{row} );
 121         _set_config( $arg->{config} ) if defined($arg->{config});
 122         _clean_ds( %{ $arg } );
 123         $load_row_coderef = $arg->{load_row_coderef};
 124
 125         no strict 'subs';
 126         no warnings 'all';
 127         eval "$arg->{rules};";
 128         die "error evaling [$@] using rules " . $arg->{rules} . "\n" if ($@);
 129
 130         return _get_ds();
 131 }
 132
 133 =head2 _set_ds
 134
 135 Set current record hash
 136
 137   _set_ds( $rec );
 138
 139 =cut
 140
 141 my $rec;
 142
 143 sub _set_ds {
 144         $rec = shift or die "no record hash";
 145         $WebPAC::Normalize::MARC::rec = $rec;
 146 }
 147
 148 =head2 _get_rec
 149
 150   my $rec = _get_rec();
 151
 152 =cut
 153
 154 sub _get_rec { $rec };
 155
 156 =head2 _set_rec
 157
 158   _set_rec( $rec );
 159
 160 =cut
 161
 162 sub _set_rec { $rec = $_[0] }
 163
 164 =head2 _set_config
 165
 166 Set current config hash
 167
 168   _set_config( $config );
 169
 170 Magic keys are:
 171
 172 =over 4
 173
 174 =item _
 175
 176 Code of current database
 177
 178 =item _mfn
 179
 180 Current MFN
 181
 182 =back
 183
 184 =cut
 185
 186 my $config;
 187
 188 sub _set_config {
 189         $config = shift;
 190 }
 191
 192 =head2 _get_ds
 193
 194 Return hash formatted as data structure
 195
 196   my $ds = _get_ds();
 197
 198 =cut
 199
 200 my $out;
 201
 202 sub _get_ds {
 203 #warn "## out = ",dump($out);
 204         return $out;
 205 }
 206
 207 =head2 _clean_ds
 208
 209 Clean data structure hash for next record
 210
 211   _clean_ds();
 212
 213 =cut
 214
 215 sub _clean_ds {
 216         my $a = {@_};
 217         $out = undef;
 218         WebPAC::Normalize::MARC::_clean();
 219 }
 220
 221 =head2 _set_lookup
 222
 223 Set current lookup hash
 224
 225   _set_lookup( $lookup );
 226
 227 =cut
 228
 229 my $lookup;
 230
 231 sub _set_lookup {
 232         $lookup = shift;
 233 }
 234
 235 =head2 _get_lookup
 236
 237 Get current lookup hash
 238
 239   my $lookup = _get_lookup();
 240
 241 =cut
 242
 243 sub _get_lookup {
 244         return $lookup;
 245 }
 246
 247 =head2 _set_load_row
 248
 249 Setup code reference which will return L<data_structure> from
 250 L<WebPAC::Store>
 251
 252   _set_load_row(sub {
 253                 my ($database,$input,$mfn) = @_;
 254                 $store->load_row( database => $database, input => $input, id => $mfn );
 255   });
 256
 257 =cut
 258
 259 sub _set_load_row {
 260         my $coderef = shift;
 261         confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
 262
 263         $load_row_coderef = $coderef;
 264 }
 265
 266 =head2 _debug
 267
 268 Change level of debug warnings
 269
 270   _debug( 2 );
 271
 272 =cut
 273
 274 sub _debug {
 275         my $l = shift;
 276         return $debug unless defined($l);
 277         warn "debug level $l",$/ if ($l > 0);
 278         $debug = $l;
 279         $WebPAC::Normalize::MARC::debug = $debug;
 280 }
 281
 282 =head1 Functions to create C<data_structure>
 283
 284 Those functions generally have to first in your normalization file.
 285
 286 =head2 to
 287
 288 Generic way to set values for some name
 289
 290   to('field-name', 'name-value' => rec('200','a') );
 291
 292 There are many helpers defined below which might be easier to use.
 293
 294 =cut
 295
 296 sub to {
 297         my $type = shift or confess "need type -- BUG?";
 298         my $name = shift or confess "needs name as first argument";
 299         my @o = grep { defined($_) && $_ ne '' } @_;
 300         return unless (@o);
 301         $out->{$name}->{$type} = \@o;
 302 }
 303
 304 =head2 search_display
 305
 306 Define output for L<search> and L<display> at the same time
 307
 308   search_display('Title', rec('200','a') );
 309
 310 =cut
 311
 312 sub search_display {
 313         my $name = shift or die "search_display needs name as first argument";
 314         my @o = grep { defined($_) && $_ ne '' } @_;
 315         return unless (@o);
 316         $out->{$name}->{search} = \@o;
 317         $out->{$name}->{display} = \@o;
 318 }
 319
 320 =head2 tag
 321
 322 Old name for L<search_display>, it will probably be removed at one point.
 323
 324 =cut
 325
 326 sub tag {
 327         search_display( @_ );
 328 }
 329
 330 =head2 display
 331
 332 Define output just for I<display>
 333
 334   @v = display('Title', rec('200','a') );
 335
 336 =cut
 337
 338 sub display { to( 'display', @_ ) }
 339
 340 =head2 search
 341
 342 Prepare values just for I<search>
 343
 344   @v = search('Title', rec('200','a') );
 345
 346 =cut
 347
 348 sub search { to( 'search', @_ ) }
 349
 350 =head2 sorted
 351
 352 Insert into lists which will be automatically sorted
 353
 354  sorted('Title', rec('200','a') );
 355
 356 =cut
 357
 358 sub sorted { to( 'sorted', @_ ) }
 359
 360 =head2 row
 361
 362 Insert new row of data into output module
 363
 364   row( column => 'foo', column2 => 'bar' );
 365
 366 =cut
 367
 368 use Data::Dump qw/dump/;
 369
 370 sub row {
 371         die "array doesn't have odd number of elements but $#_: ",dump( @_ ) if $#_ % 2 == 1;
 372         my $table = shift @_;
 373         push @{ $out->{'_rows'}->{$table} }, {@_};
 374 }
 375
 376
 377 =head1 Functions to extract data from input
 378
 379 This function should be used inside functions to create C<data_structure> described
 380 above.
 381
 382 =head2 _pack_subfields_hash
 383
 384  @subfields = _pack_subfields_hash( $h );
 385  $subfields = _pack_subfields_hash( $h, 1 );
 386
 387 Return each subfield value in array or pack them all together and return scalar
 388 with subfields (denoted by C<^>) and values.
 389
 390 =cut
 391
 392 sub _pack_subfields_hash {
 393
 394         warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
 395
 396         my ($hash,$include_subfields) = @_;
 397
 398         # sanity and ease of use
 399         return $hash if (ref($hash) ne 'HASH');
 400
 401         my $h = dclone( $hash );
 402
 403         if ( defined($h->{subfields}) ) {
 404                 my $sfs = delete $h->{subfields} || die "no subfields?";
 405                 my @out;
 406                 while (@$sfs) {
 407                         my $sf = shift @$sfs;
 408                         push @out, '^' . $sf if ($include_subfields);
 409                         my $o = shift @$sfs;
 410                         if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
 411                                 # single element subfields are not arrays
 412 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 413
 414                                 push @out, $h->{$sf};
 415                         } else {
 416 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 417                                 push @out, $h->{$sf}->[$o];
 418                         }
 419                 }
 420                 if ($include_subfields) {
 421                         return join('', @out);
 422                 } else {
 423                         return @out;
 424                 }
 425         } else {
 426                 if ($include_subfields) {
 427                         my $out = '';
 428                         foreach my $sf (sort keys %$h) {
 429                                 if (ref($h->{$sf}) eq 'ARRAY') {
 430                                         $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
 431                                 } else {
 432                                         $out .= '^' . $sf . $h->{$sf};
 433                                 }
 434                         }
 435                         return $out;
 436                 } else {
 437                         # FIXME this should probably be in alphabetical order instead of hash order
 438                         values %{$h};
 439                 }
 440         }
 441 }
 442
 443 =head2 rec1
 444
 445 Return all values in some field
 446
 447   @v = rec1('200')
 448
 449 TODO: order of values is probably same as in source data, need to investigate that
 450
 451 =cut
 452
 453 sub rec1 {
 454         my $f = shift;
 455         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 456         return unless (defined($rec) && defined($rec->{$f}));
 457         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 458         if (ref($rec->{$f}) eq 'ARRAY') {
 459                 my @out;
 460                 foreach my $h ( @{ $rec->{$f} } ) {
 461                         if (ref($h) eq 'HASH') {
 462                                 push @out, ( _pack_subfields_hash( $h ) );
 463                         } else {
 464                                 push @out, $h;
 465                         }
 466                 }
 467                 return @out;
 468         } elsif( defined($rec->{$f}) ) {
 469                 return $rec->{$f};
 470         }
 471 }
 472
 473 =head2 rec2
 474
 475 Return all values in specific field and subfield
 476
 477   @v = rec2('200','a')
 478
 479 =cut
 480
 481 sub rec2 {
 482         my $f = shift;
 483         return unless (defined($rec && $rec->{$f}));
 484         my $sf = shift;
 485         warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 486         return map {
 487                 if (ref($_->{$sf}) eq 'ARRAY') {
 488                         @{ $_->{$sf} };
 489                 } else {
 490                         $_->{$sf};
 491                 }
 492         } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
 493 }
 494
 495 =head2 rec
 496
 497 syntaxtic sugar for
 498
 499   @v = rec('200')
 500   @v = rec('200','a')
 501
 502 If rec() returns just single value, it will
 503 return scalar, not array.
 504
 505 =cut
 506
 507 sub rec {
 508         my @out;
 509         if ($#_ == 0) {
 510                 @out = rec1(@_);
 511         } elsif ($#_ == 1) {
 512                 @out = rec2(@_);
 513         }
 514         if ($#out == 0 && ! wantarray) {
 515                 return $out[0];
 516         } elsif (@out) {
 517                 return @out;
 518         } else {
 519                 return '';
 520         }
 521 }
 522
 523 =head2 frec
 524
 525 Returns first value from field
 526
 527   $v = frec('200');
 528   $v = frec('200','a');
 529
 530 =cut
 531
 532 sub frec {
 533         my @out = rec(@_);
 534         warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
 535         return shift @out;
 536 }
 537
 538 =head2 frec_eq
 539
 540 =head2 frec_ne
 541
 542 Check if first values from two fields are same or different
 543
 544   if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
 545         # values are same
 546   } else {
 547     # values are different
 548   }
 549
 550 Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
 551 could write something like:
 552
 553   if ( frec( '900','a' ) eq frec( '910','c' ) ) {
 554         # yada tada
 555   }
 556
 557 but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
 558 in order to parse text and create invalid function C<eqfrec>.
 559
 560 =cut
 561
 562 sub frec_eq {
 563         my ( $f1,$sf1, $f2, $sf2 ) = @_;
 564         return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
 565 }
 566
 567 sub frec_ne {
 568         return ! frec_eq( @_ );
 569 }
 570
 571 =head2 regex
 572
 573 Apply regex to some or all values
 574
 575   @v = regex( 's/foo/bar/g', @v );
 576
 577 =cut
 578
 579 sub regex {
 580         my $r = shift;
 581         my @out;
 582         #warn "r: $r\n", dump(\@_);
 583         foreach my $t (@_) {
 584                 next unless ($t);
 585                 eval "\$t =~ $r";
 586                 push @out, $t if ($t && $t ne '');
 587         }
 588         return @out;
 589 }
 590
 591 =head2 prefix
 592
 593 Prefix all values with a string
 594
 595   @v = prefix( 'my_', @v );
 596
 597 =cut
 598
 599 sub prefix {
 600         my $p = shift;
 601         return @_ unless defined( $p );
 602         return map { $p . $_ } grep { defined($_) } @_;
 603 }
 604
 605 =head2 suffix
 606
 607 suffix all values with a string
 608
 609   @v = suffix( '_my', @v );
 610
 611 =cut
 612
 613 sub suffix {
 614         my $s = shift;
 615         return @_ unless defined( $s );
 616         return map { $_ . $s } grep { defined($_) } @_;
 617 }
 618
 619 =head2 surround
 620
 621 surround all values with a two strings
 622
 623   @v = surround( 'prefix_', '_suffix', @v );
 624
 625 =cut
 626
 627 sub surround {
 628         my $p = shift;
 629         my $s = shift;
 630         $p = '' unless defined( $p );
 631         $s = '' unless defined( $s );
 632         return map { $p . $_ . $s } grep { defined($_) } @_;
 633 }
 634
 635 =head2 first
 636
 637 Return first element
 638
 639   $v = first( @v );
 640
 641 =cut
 642
 643 sub first {
 644         my $r = shift;
 645         return $r;
 646 }
 647
 648 =head2 lookup
 649
 650 Consult lookup hashes for some value
 651
 652   @v = lookup(
 653         sub {
 654                 'ffkk/peri/mfn'.rec('000')
 655         },
 656         'ffkk','peri','200-a-200-e',
 657         sub {
 658                 first(rec(200,'a')).' '.first(rec('200','e'))
 659         }
 660   );
 661
 662 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
 663 normal lookup definition in C<conf/lookup/something.pl> which looks like:
 664
 665   lookup(
 666         # which results to return from record recorded in lookup
 667         sub { 'ffkk/peri/mfn' . rec('000') },
 668         # from which database and input
 669         'ffkk','peri',
 670         # such that following values match
 671         sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
 672         # if this part is missing, we will try to match same fields
 673         # from lookup record and current one, or you can override
 674         # which records to use from current record using
 675         sub { rec('900','x') . ' ' . rec('900','y') },
 676   )
 677
 678 You can think about this lookup as SQL (if that helps):
 679
 680   select
 681         sub { what }
 682   from
 683         database, input
 684   where
 685     sub { filter from lookuped record }
 686   having
 687     sub { optional filter on current record }
 688
 689 Easy as pie, right?
 690
 691 =cut
 692
 693 sub lookup {
 694         my ($what, $database, $input, $key, $having) = @_;
 695
 696         confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
 697
 698         warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
 699         return unless (defined($lookup->{$database}->{$input}->{$key}));
 700
 701         confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
 702
 703         my $mfns;
 704         my @having = $having->();
 705
 706         warn "## having = ", dump( @having ) if ($debug > 2);
 707
 708         foreach my $h ( @having ) {
 709                 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
 710                         warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
 711                         $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
 712                 }
 713         }
 714
 715         return unless ($mfns);
 716
 717         my @mfns = sort keys %$mfns;
 718
 719         warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
 720
 721         my $old_rec = $rec;
 722         my @out;
 723
 724         foreach my $mfn (@mfns) {
 725                 $rec = $load_row_coderef->( $database, $input, $mfn );
 726
 727                 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
 728
 729                 my @vals = $what->();
 730
 731                 push @out, ( @vals );
 732
 733                 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
 734         }
 735
 736 #       if (ref($lookup->{$k}) eq 'ARRAY') {
 737 #               return @{ $lookup->{$k} };
 738 #       } else {
 739 #               return $lookup->{$k};
 740 #       }
 741
 742         $rec = $old_rec;
 743
 744         warn "## lookup returns = ", dump(@out), $/ if ($debug);
 745
 746         if ($#out == 0) {
 747                 return $out[0];
 748         } else {
 749                 return @out;
 750         }
 751 }
 752
 753 =head2 save_into_lookup
 754
 755 Save value into lookup. It associates current database, input
 756 and specific keys with one or more values which will be
 757 associated over MFN.
 758
 759 MFN will be extracted from first occurence current of field 000
 760 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
 761
 762   my $nr = save_into_lookup($database,$input,$key,sub {
 763         # code which produce one or more values
 764   });
 765
 766 It returns number of items saved.
 767
 768 This function shouldn't be called directly, it's called from code created by
 769 L<WebPAC::Parser>.
 770
 771 =cut
 772
 773 sub save_into_lookup {
 774         my ($database,$input,$key,$coderef) = @_;
 775         die "save_into_lookup needs database" unless defined($database);
 776         die "save_into_lookup needs input" unless defined($input);
 777         die "save_into_lookup needs key" unless defined($key);
 778         die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
 779
 780         warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
 781
 782         my $mfn =
 783                 defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
 784                 defined($config->{_mfn})        ?       $config->{_mfn}         :
 785                                                                                 die "mfn not defined or zero";
 786
 787         my $nr = 0;
 788
 789         foreach my $v ( $coderef->() ) {
 790                 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
 791                 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
 792                 $nr++;
 793         }
 794
 795         return $nr;
 796 }
 797
 798 =head2 config
 799
 800 Consult config values stored in C<config.yml>
 801
 802   # return database code (key under databases in yaml)
 803   $database_code = config();    # use _ from hash
 804   $database_name = config('name');
 805   $database_input_name = config('input name');
 806
 807 Up to three levels are supported.
 808
 809 =cut
 810
 811 sub config {
 812         return unless ($config);
 813
 814         my $p = shift;
 815
 816         $p ||= '';
 817
 818         my $v;
 819
 820         warn "### getting config($p)\n" if ($debug > 1);
 821
 822         my @p = split(/\s+/,$p);
 823         if ($#p < 0) {
 824                 $v = $config->{ '_' };  # special, database code
 825         } else {
 826
 827                 my $c = dclone( $config );
 828
 829                 foreach my $k (@p) {
 830                         warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
 831                         if (ref($c) eq 'ARRAY') {
 832                                 $c = shift @$c;
 833                                 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
 834                                 last;
 835                         }
 836
 837                         if (! defined($c->{$k}) ) {
 838                                 $c = undef;
 839                                 last;
 840                         } else {
 841                                 $c = $c->{$k};
 842                         }
 843                 }
 844                 $v = $c if ($c);
 845
 846         }
 847
 848         warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
 849         warn "config( '$p' ) is empty\n" if (! $v);
 850
 851         return $v;
 852 }
 853
 854 =head2 id
 855
 856 Returns unique id of this record
 857
 858   $id = id();
 859
 860 Returns C<42/2> for 2nd occurence of MFN 42.
 861
 862 =cut
 863
 864 sub id {
 865         my $mfn = $config->{_mfn} || die "no _mfn in config data";
 866         return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
 867 }
 868
 869 =head2 join_with
 870
 871 Joins walues with some delimiter
 872
 873   $v = join_with(", ", @v);
 874
 875 =cut
 876
 877 sub join_with {
 878         my $d = shift;
 879         warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
 880         my $v = join($d, grep { defined($_) && $_ ne '' } @_);
 881         return '' unless defined($v);
 882         return $v;
 883 }
 884
 885 =head2 split_rec_on
 886
 887 Split record subfield on some regex and take one of parts out
 888
 889   $a_before_semi_column =
 890         split_rec_on('200','a', /\s*;\s*/, $part);
 891
 892 C<$part> is optional number of element. First element is
 893 B<1>, not 0!
 894
 895 If there is no C<$part> parameter or C<$part> is 0, this function will
 896 return all values produced by splitting.
 897
 898 =cut
 899
 900 sub split_rec_on {
 901         die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
 902
 903         my ($fld, $sf, $regex, $part) = @_;
 904         warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
 905
 906         my @r = rec( $fld, $sf );
 907         my $v = shift @r;
 908         warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
 909
 910         return '' if ( ! defined($v) || $v =~ /^\s*$/);
 911
 912         my @s = split( $regex, $v );
 913         warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
 914         if ($part && $part > 0) {
 915                 return $s[ $part - 1 ];
 916         } else {
 917                 return @s;
 918         }
 919 }
 920
 921 my $hash;
 922
 923 =head2 set
 924
 925   set( key => 'value' );
 926
 927 =cut
 928
 929 sub set {
 930         my ($k,$v) = @_;
 931         warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
 932         $hash->{$k} = $v;
 933 };
 934
 935 =head2 get
 936
 937   get( 'key' );
 938
 939 =cut
 940
 941 sub get {
 942         my $k = shift || return;
 943         my $v = $hash->{$k};
 944         warn "## get $k = ", dump( $v ), $/ if ( $debug );
 945         return $v;
 946 }
 947
 948 =head2 count
 949
 950   if ( count( @result ) == 1 ) {
 951         # do something if only 1 result is there
 952   }
 953
 954 =cut
 955
 956 sub count {
 957         warn "## count ",dump(@_),$/ if ( $debug );
 958         return @_ . '';
 959 }
 960
 961 =head2 rec_array
 962
 963 Always return field as array
 964
 965   foreach my $d ( rec_array('field') ) {
 966         warn $d;
 967   }
 968
 969 =cut
 970
 971 sub rec_array {
 972         my $d = $rec->{ $_[0] };
 973         return @$d if ref($d) eq 'ARRAY';
 974         return ($d);
 975 }
 976
 977 # END
 978 1;