git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 our @EXPORT = qw/
   4         _set_ds _set_lookup
   5         _set_load_row
   6         _get_ds _clean_ds
   7         _debug
   8         _pack_subfields_hash
   9
  10         to
  11         search_display search display sorted
  12
  13         rec1 rec2 rec
  14         frec frec_eq frec_ne
  15         regex prefix suffix surround
  16         first lookup join_with
  17         save_into_lookup
  18
  19         split_rec_on
  20
  21         get set
  22         count
  23
  24         row
  25         rec_array
  26
  27 /;
  28
  29 use warnings;
  30 use strict;
  31
  32 #use base qw/WebPAC::Common/;
  33 use Data::Dump qw/dump/;
  34 use Carp qw/confess/;
  35
  36 # debugging warn(s)
  37 my $debug = 0;
  38 _debug( $debug );
  39
  40 # FIXME
  41 use WebPAC::Normalize::ISBN;
  42 push @EXPORT, ( 'isbn_10', 'isbn_13' );
  43
  44 use WebPAC::Normalize::MARC;
  45 push @EXPORT, ( qw/
  46         marc marc_indicators marc_repeatable_subfield
  47         marc_compose marc_leader marc_fixed
  48         marc_duplicate marc_remove marc_count
  49         marc_original_order
  50         marc_template
  51 /);
  52
  53 use Storable qw/dclone/;
  54
  55 =head1 NAME
  56
  57 WebPAC::Normalize - describe normalisaton rules using sets
  58
  59 =cut
  60
  61 our $VERSION = '0.36';
  62
  63 =head1 SYNOPSIS
  64
  65 This module uses C<conf/normalize/*.pl> files to perform normalisation
  66 from input records using perl functions which are specialized for set
  67 processing.
  68
  69 Sets are implemented as arrays, and normalisation file is valid perl, which
  70 means that you check it's validity before running WebPAC using
  71 C<perl -c normalize.pl>.
  72
  73 Normalisation can generate multiple output normalized data. For now, supported output
  74 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
  75 C<marc>.
  76
  77 =head1 FUNCTIONS
  78
  79 Functions which start with C<_> are private and used by WebPAC internally.
  80 All other functions are available for use within normalisation rules.
  81
  82 =head2 data_structure
  83
  84 Return data structure
  85
  86   my $ds = WebPAC::Normalize::data_structure(
  87         lookup => $lookup_hash,
  88         row => $row,
  89         rules => $normalize_pl_config,
  90         marc_encoding => 'utf-8',
  91         config => $config,
  92         load_row_coderef => sub {
  93                 my ($database,$input,$mfn) = @_;
  94                 $store->load_row( database => $database, input => $input, id => $mfn );
  95         },
  96   );
  97
  98 Options C<row>, C<rules> and C<log> are mandatory while all
  99 other are optional.
 100
 101 C<load_row_coderef> is closure only used when executing lookups, so they will
 102 die if it's not defined.
 103
 104 This function will B<die> if normalizastion can't be evaled.
 105
 106 Since this function isn't exported you have to call it with
 107 C<WebPAC::Normalize::data_structure>.
 108
 109 =cut
 110
 111 my $load_row_coderef;
 112
 113 sub data_structure {
 114         my $arg = {@_};
 115
 116         die "need row argument" unless ($arg->{row});
 117         die "need normalisation argument" unless ($arg->{rules});
 118
 119         _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
 120         _set_ds( $arg->{row} );
 121         _set_config( $arg->{config} ) if defined($arg->{config});
 122         _clean_ds( %{ $arg } );
 123         $load_row_coderef = $arg->{load_row_coderef};
 124
 125         no strict 'subs';
 126         no warnings 'redefine';
 127         eval "$arg->{rules};";
 128         die "error evaling $arg->{rules}: $@\n" if ($@);
 129
 130         return _get_ds();
 131 }
 132
 133 =head2 _set_ds
 134
 135 Set current record hash
 136
 137   _set_ds( $rec );
 138
 139 =cut
 140
 141 my $rec;
 142
 143 sub _set_ds {
 144         $rec = shift or die "no record hash";
 145         $WebPAC::Normalize::MARC::rec = $rec;
 146 }
 147
 148 =head2
 149
 150   my $rec = _get_rec();
 151
 152 =cut
 153
 154 sub _get_rec { $rec };
 155
 156 sub rec_array {
 157         my $d = $rec->{ $_[0] };
 158         return @$d if ref($d) eq 'ARRAY';
 159         die "field $_[0] not array: ",dump( $d );
 160 }
 161
 162 =head2 _set_config
 163
 164 Set current config hash
 165
 166   _set_config( $config );
 167
 168 Magic keys are:
 169
 170 =over 4
 171
 172 =item _
 173
 174 Code of current database
 175
 176 =item _mfn
 177
 178 Current MFN
 179
 180 =back
 181
 182 =cut
 183
 184 my $config;
 185
 186 sub _set_config {
 187         $config = shift;
 188 }
 189
 190 =head2 _get_ds
 191
 192 Return hash formatted as data structure
 193
 194   my $ds = _get_ds();
 195
 196 =cut
 197
 198 my $out;
 199
 200 sub _get_ds {
 201 #warn "## out = ",dump($out);
 202         return $out;
 203 }
 204
 205 =head2 _clean_ds
 206
 207 Clean data structure hash for next record
 208
 209   _clean_ds();
 210
 211 =cut
 212
 213 sub _clean_ds {
 214         my $a = {@_};
 215         $out = undef;
 216         WebPAC::Normalize::MARC::_clean();
 217 }
 218
 219 =head2 _set_lookup
 220
 221 Set current lookup hash
 222
 223   _set_lookup( $lookup );
 224
 225 =cut
 226
 227 my $lookup;
 228
 229 sub _set_lookup {
 230         $lookup = shift;
 231 }
 232
 233 =head2 _get_lookup
 234
 235 Get current lookup hash
 236
 237   my $lookup = _get_lookup();
 238
 239 =cut
 240
 241 sub _get_lookup {
 242         return $lookup;
 243 }
 244
 245 =head2 _set_load_row
 246
 247 Setup code reference which will return L<data_structure> from
 248 L<WebPAC::Store>
 249
 250   _set_load_row(sub {
 251                 my ($database,$input,$mfn) = @_;
 252                 $store->load_row( database => $database, input => $input, id => $mfn );
 253   });
 254
 255 =cut
 256
 257 sub _set_load_row {
 258         my $coderef = shift;
 259         confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
 260
 261         $load_row_coderef = $coderef;
 262 }
 263
 264 =head2 _debug
 265
 266 Change level of debug warnings
 267
 268   _debug( 2 );
 269
 270 =cut
 271
 272 sub _debug {
 273         my $l = shift;
 274         return $debug unless defined($l);
 275         warn "debug level $l",$/ if ($l > 0);
 276         $debug = $l;
 277         $WebPAC::Normalize::MARC::debug = $debug;
 278 }
 279
 280 =head1 Functions to create C<data_structure>
 281
 282 Those functions generally have to first in your normalization file.
 283
 284 =head2 to
 285
 286 Generic way to set values for some name
 287
 288   to('field-name', 'name-value' => rec('200','a') );
 289
 290 There are many helpers defined below which might be easier to use.
 291
 292 =cut
 293
 294 sub to {
 295         my $type = shift or confess "need type -- BUG?";
 296         my $name = shift or confess "needs name as first argument";
 297         my @o = grep { defined($_) && $_ ne '' } @_;
 298         return unless (@o);
 299         $out->{$name}->{$type} = \@o;
 300 }
 301
 302 =head2 search_display
 303
 304 Define output for L<search> and L<display> at the same time
 305
 306   search_display('Title', rec('200','a') );
 307
 308 =cut
 309
 310 sub search_display {
 311         my $name = shift or die "search_display needs name as first argument";
 312         my @o = grep { defined($_) && $_ ne '' } @_;
 313         return unless (@o);
 314         $out->{$name}->{search} = \@o;
 315         $out->{$name}->{display} = \@o;
 316 }
 317
 318 =head2 tag
 319
 320 Old name for L<search_display>, it will probably be removed at one point.
 321
 322 =cut
 323
 324 sub tag {
 325         search_display( @_ );
 326 }
 327
 328 =head2 display
 329
 330 Define output just for I<display>
 331
 332   @v = display('Title', rec('200','a') );
 333
 334 =cut
 335
 336 sub display { to( 'display', @_ ) }
 337
 338 =head2 search
 339
 340 Prepare values just for I<search>
 341
 342   @v = search('Title', rec('200','a') );
 343
 344 =cut
 345
 346 sub search { to( 'search', @_ ) }
 347
 348 =head2 sorted
 349
 350 Insert into lists which will be automatically sorted
 351
 352  sorted('Title', rec('200','a') );
 353
 354 =cut
 355
 356 sub sorted { to( 'sorted', @_ ) }
 357
 358 =head2 row
 359
 360 Insert new row of data into output module
 361
 362   row( column => 'foo', column2 => 'bar' );
 363
 364 =cut
 365
 366 use Data::Dump qw/dump/;
 367
 368 sub row {
 369         die "array doesn't have even number of elements but $#_: ",dump( @_ ) if $#_ % 2 != 1;
 370
 371         push @{ $out->{'_rows'} }, {@_};
 372 }
 373
 374
 375 =head1 Functions to extract data from input
 376
 377 This function should be used inside functions to create C<data_structure> described
 378 above.
 379
 380 =head2 _pack_subfields_hash
 381
 382  @subfields = _pack_subfields_hash( $h );
 383  $subfields = _pack_subfields_hash( $h, 1 );
 384
 385 Return each subfield value in array or pack them all together and return scalar
 386 with subfields (denoted by C<^>) and values.
 387
 388 =cut
 389
 390 sub _pack_subfields_hash {
 391
 392         warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
 393
 394         my ($hash,$include_subfields) = @_;
 395
 396         # sanity and ease of use
 397         return $hash if (ref($hash) ne 'HASH');
 398
 399         my $h = dclone( $hash );
 400
 401         if ( defined($h->{subfields}) ) {
 402                 my $sfs = delete $h->{subfields} || die "no subfields?";
 403                 my @out;
 404                 while (@$sfs) {
 405                         my $sf = shift @$sfs;
 406                         push @out, '^' . $sf if ($include_subfields);
 407                         my $o = shift @$sfs;
 408                         if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
 409                                 # single element subfields are not arrays
 410 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 411
 412                                 push @out, $h->{$sf};
 413                         } else {
 414 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 415                                 push @out, $h->{$sf}->[$o];
 416                         }
 417                 }
 418                 if ($include_subfields) {
 419                         return join('', @out);
 420                 } else {
 421                         return @out;
 422                 }
 423         } else {
 424                 if ($include_subfields) {
 425                         my $out = '';
 426                         foreach my $sf (sort keys %$h) {
 427                                 if (ref($h->{$sf}) eq 'ARRAY') {
 428                                         $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
 429                                 } else {
 430                                         $out .= '^' . $sf . $h->{$sf};
 431                                 }
 432                         }
 433                         return $out;
 434                 } else {
 435                         # FIXME this should probably be in alphabetical order instead of hash order
 436                         values %{$h};
 437                 }
 438         }
 439 }
 440
 441 =head2 rec1
 442
 443 Return all values in some field
 444
 445   @v = rec1('200')
 446
 447 TODO: order of values is probably same as in source data, need to investigate that
 448
 449 =cut
 450
 451 sub rec1 {
 452         my $f = shift;
 453         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 454         return unless (defined($rec) && defined($rec->{$f}));
 455         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 456         if (ref($rec->{$f}) eq 'ARRAY') {
 457                 my @out;
 458                 foreach my $h ( @{ $rec->{$f} } ) {
 459                         if (ref($h) eq 'HASH') {
 460                                 push @out, ( _pack_subfields_hash( $h ) );
 461                         } else {
 462                                 push @out, $h;
 463                         }
 464                 }
 465                 return @out;
 466         } elsif( defined($rec->{$f}) ) {
 467                 return $rec->{$f};
 468         }
 469 }
 470
 471 =head2 rec2
 472
 473 Return all values in specific field and subfield
 474
 475   @v = rec2('200','a')
 476
 477 =cut
 478
 479 sub rec2 {
 480         my $f = shift;
 481         return unless (defined($rec && $rec->{$f}));
 482         my $sf = shift;
 483         warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 484         return map {
 485                 if (ref($_->{$sf}) eq 'ARRAY') {
 486                         @{ $_->{$sf} };
 487                 } else {
 488                         $_->{$sf};
 489                 }
 490         } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
 491 }
 492
 493 =head2 rec
 494
 495 syntaxtic sugar for
 496
 497   @v = rec('200')
 498   @v = rec('200','a')
 499
 500 If rec() returns just single value, it will
 501 return scalar, not array.
 502
 503 =cut
 504
 505 sub rec {
 506         my @out;
 507         if ($#_ == 0) {
 508                 @out = rec1(@_);
 509         } elsif ($#_ == 1) {
 510                 @out = rec2(@_);
 511         }
 512         if ($#out == 0 && ! wantarray) {
 513                 return $out[0];
 514         } elsif (@out) {
 515                 return @out;
 516         } else {
 517                 return '';
 518         }
 519 }
 520
 521 =head2 frec
 522
 523 Returns first value from field
 524
 525   $v = frec('200');
 526   $v = frec('200','a');
 527
 528 =cut
 529
 530 sub frec {
 531         my @out = rec(@_);
 532         warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
 533         return shift @out;
 534 }
 535
 536 =head2 frec_eq
 537
 538 =head2 frec_ne
 539
 540 Check if first values from two fields are same or different
 541
 542   if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
 543         # values are same
 544   } else {
 545     # values are different
 546   }
 547
 548 Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
 549 could write something like:
 550
 551   if ( frec( '900','a' ) eq frec( '910','c' ) ) {
 552         # yada tada
 553   }
 554
 555 but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
 556 in order to parse text and create invalid function C<eqfrec>.
 557
 558 =cut
 559
 560 sub frec_eq {
 561         my ( $f1,$sf1, $f2, $sf2 ) = @_;
 562         return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
 563 }
 564
 565 sub frec_ne {
 566         return ! frec_eq( @_ );
 567 }
 568
 569 =head2 regex
 570
 571 Apply regex to some or all values
 572
 573   @v = regex( 's/foo/bar/g', @v );
 574
 575 =cut
 576
 577 sub regex {
 578         my $r = shift;
 579         my @out;
 580         #warn "r: $r\n", dump(\@_);
 581         foreach my $t (@_) {
 582                 next unless ($t);
 583                 eval "\$t =~ $r";
 584                 push @out, $t if ($t && $t ne '');
 585         }
 586         return @out;
 587 }
 588
 589 =head2 prefix
 590
 591 Prefix all values with a string
 592
 593   @v = prefix( 'my_', @v );
 594
 595 =cut
 596
 597 sub prefix {
 598         my $p = shift;
 599         return @_ unless defined( $p );
 600         return map { $p . $_ } grep { defined($_) } @_;
 601 }
 602
 603 =head2 suffix
 604
 605 suffix all values with a string
 606
 607   @v = suffix( '_my', @v );
 608
 609 =cut
 610
 611 sub suffix {
 612         my $s = shift;
 613         return @_ unless defined( $s );
 614         return map { $_ . $s } grep { defined($_) } @_;
 615 }
 616
 617 =head2 surround
 618
 619 surround all values with a two strings
 620
 621   @v = surround( 'prefix_', '_suffix', @v );
 622
 623 =cut
 624
 625 sub surround {
 626         my $p = shift;
 627         my $s = shift;
 628         $p = '' unless defined( $p );
 629         $s = '' unless defined( $s );
 630         return map { $p . $_ . $s } grep { defined($_) } @_;
 631 }
 632
 633 =head2 first
 634
 635 Return first element
 636
 637   $v = first( @v );
 638
 639 =cut
 640
 641 sub first {
 642         my $r = shift;
 643         return $r;
 644 }
 645
 646 =head2 lookup
 647
 648 Consult lookup hashes for some value
 649
 650   @v = lookup(
 651         sub {
 652                 'ffkk/peri/mfn'.rec('000')
 653         },
 654         'ffkk','peri','200-a-200-e',
 655         sub {
 656                 first(rec(200,'a')).' '.first(rec('200','e'))
 657         }
 658   );
 659
 660 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
 661 normal lookup definition in C<conf/lookup/something.pl> which looks like:
 662
 663   lookup(
 664         # which results to return from record recorded in lookup
 665         sub { 'ffkk/peri/mfn' . rec('000') },
 666         # from which database and input
 667         'ffkk','peri',
 668         # such that following values match
 669         sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
 670         # if this part is missing, we will try to match same fields
 671         # from lookup record and current one, or you can override
 672         # which records to use from current record using
 673         sub { rec('900','x') . ' ' . rec('900','y') },
 674   )
 675
 676 You can think about this lookup as SQL (if that helps):
 677
 678   select
 679         sub { what }
 680   from
 681         database, input
 682   where
 683     sub { filter from lookuped record }
 684   having
 685     sub { optional filter on current record }
 686
 687 Easy as pie, right?
 688
 689 =cut
 690
 691 sub lookup {
 692         my ($what, $database, $input, $key, $having) = @_;
 693
 694         confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
 695
 696         warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
 697         return unless (defined($lookup->{$database}->{$input}->{$key}));
 698
 699         confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
 700
 701         my $mfns;
 702         my @having = $having->();
 703
 704         warn "## having = ", dump( @having ) if ($debug > 2);
 705
 706         foreach my $h ( @having ) {
 707                 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
 708                         warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
 709                         $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
 710                 }
 711         }
 712
 713         return unless ($mfns);
 714
 715         my @mfns = sort keys %$mfns;
 716
 717         warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
 718
 719         my $old_rec = $rec;
 720         my @out;
 721
 722         foreach my $mfn (@mfns) {
 723                 $rec = $load_row_coderef->( $database, $input, $mfn );
 724
 725                 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
 726
 727                 my @vals = $what->();
 728
 729                 push @out, ( @vals );
 730
 731                 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
 732         }
 733
 734 #       if (ref($lookup->{$k}) eq 'ARRAY') {
 735 #               return @{ $lookup->{$k} };
 736 #       } else {
 737 #               return $lookup->{$k};
 738 #       }
 739
 740         $rec = $old_rec;
 741
 742         warn "## lookup returns = ", dump(@out), $/ if ($debug);
 743
 744         if ($#out == 0) {
 745                 return $out[0];
 746         } else {
 747                 return @out;
 748         }
 749 }
 750
 751 =head2 save_into_lookup
 752
 753 Save value into lookup. It associates current database, input
 754 and specific keys with one or more values which will be
 755 associated over MFN.
 756
 757 MFN will be extracted from first occurence current of field 000
 758 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
 759
 760   my $nr = save_into_lookup($database,$input,$key,sub {
 761         # code which produce one or more values
 762   });
 763
 764 It returns number of items saved.
 765
 766 This function shouldn't be called directly, it's called from code created by
 767 L<WebPAC::Parser>.
 768
 769 =cut
 770
 771 sub save_into_lookup {
 772         my ($database,$input,$key,$coderef) = @_;
 773         die "save_into_lookup needs database" unless defined($database);
 774         die "save_into_lookup needs input" unless defined($input);
 775         die "save_into_lookup needs key" unless defined($key);
 776         die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
 777
 778         warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
 779
 780         my $mfn =
 781                 defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
 782                 defined($config->{_mfn})        ?       $config->{_mfn}         :
 783                                                                                 die "mfn not defined or zero";
 784
 785         my $nr = 0;
 786
 787         foreach my $v ( $coderef->() ) {
 788                 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
 789                 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
 790                 $nr++;
 791         }
 792
 793         return $nr;
 794 }
 795
 796 =head2 config
 797
 798 Consult config values stored in C<config.yml>
 799
 800   # return database code (key under databases in yaml)
 801   $database_code = config();    # use _ from hash
 802   $database_name = config('name');
 803   $database_input_name = config('input name');
 804
 805 Up to three levels are supported.
 806
 807 =cut
 808
 809 sub config {
 810         return unless ($config);
 811
 812         my $p = shift;
 813
 814         $p ||= '';
 815
 816         my $v;
 817
 818         warn "### getting config($p)\n" if ($debug > 1);
 819
 820         my @p = split(/\s+/,$p);
 821         if ($#p < 0) {
 822                 $v = $config->{ '_' };  # special, database code
 823         } else {
 824
 825                 my $c = dclone( $config );
 826
 827                 foreach my $k (@p) {
 828                         warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
 829                         if (ref($c) eq 'ARRAY') {
 830                                 $c = shift @$c;
 831                                 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
 832                                 last;
 833                         }
 834
 835                         if (! defined($c->{$k}) ) {
 836                                 $c = undef;
 837                                 last;
 838                         } else {
 839                                 $c = $c->{$k};
 840                         }
 841                 }
 842                 $v = $c if ($c);
 843
 844         }
 845
 846         warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
 847         warn "config( '$p' ) is empty\n" if (! $v);
 848
 849         return $v;
 850 }
 851
 852 =head2 id
 853
 854 Returns unique id of this record
 855
 856   $id = id();
 857
 858 Returns C<42/2> for 2nd occurence of MFN 42.
 859
 860 =cut
 861
 862 sub id {
 863         my $mfn = $config->{_mfn} || die "no _mfn in config data";
 864         return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
 865 }
 866
 867 =head2 join_with
 868
 869 Joins walues with some delimiter
 870
 871   $v = join_with(", ", @v);
 872
 873 =cut
 874
 875 sub join_with {
 876         my $d = shift;
 877         warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
 878         my $v = join($d, grep { defined($_) && $_ ne '' } @_);
 879         return '' unless defined($v);
 880         return $v;
 881 }
 882
 883 =head2 split_rec_on
 884
 885 Split record subfield on some regex and take one of parts out
 886
 887   $a_before_semi_column =
 888         split_rec_on('200','a', /\s*;\s*/, $part);
 889
 890 C<$part> is optional number of element. First element is
 891 B<1>, not 0!
 892
 893 If there is no C<$part> parameter or C<$part> is 0, this function will
 894 return all values produced by splitting.
 895
 896 =cut
 897
 898 sub split_rec_on {
 899         die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
 900
 901         my ($fld, $sf, $regex, $part) = @_;
 902         warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
 903
 904         my @r = rec( $fld, $sf );
 905         my $v = shift @r;
 906         warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
 907
 908         return '' if ( ! defined($v) || $v =~ /^\s*$/);
 909
 910         my @s = split( $regex, $v );
 911         warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
 912         if ($part && $part > 0) {
 913                 return $s[ $part - 1 ];
 914         } else {
 915                 return @s;
 916         }
 917 }
 918
 919 my $hash;
 920
 921 =head2 set
 922
 923   set( key => 'value' );
 924
 925 =cut
 926
 927 sub set {
 928         my ($k,$v) = @_;
 929         warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
 930         $hash->{$k} = $v;
 931 };
 932
 933 =head2 get
 934
 935   get( 'key' );
 936
 937 =cut
 938
 939 sub get {
 940         my $k = shift || return;
 941         my $v = $hash->{$k};
 942         warn "## get $k = ", dump( $v ), $/ if ( $debug );
 943         return $v;
 944 }
 945
 946 =head2 count
 947
 948   if ( count( @result ) == 1 ) {
 949         # do something if only 1 result is there
 950   }
 951
 952 =cut
 953
 954 sub count {
 955         warn "## count ",dump(@_),$/ if ( $debug );
 956         return @_ . '';
 957 }
 958
 959 # END
 960 1;