git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 our @EXPORT = qw/
   4         _set_ds _set_lookup
   5         _set_load_row
   6         _get_ds _clean_ds
   7         _debug
   8         _pack_subfields_hash
   9
  10         to
  11         search_display search display sorted
  12
  13         rec1 rec2 rec
  14         frec frec_eq frec_ne
  15         regex prefix suffix surround
  16         first lookup join_with
  17         save_into_lookup
  18
  19         split_rec_on
  20
  21         get set
  22         count
  23
  24 /;
  25
  26 use warnings;
  27 use strict;
  28
  29 #use base qw/WebPAC::Common/;
  30 use Data::Dump qw/dump/;
  31 use Carp qw/confess/;
  32
  33 # debugging warn(s)
  34 my $debug = 0;
  35 _debug( $debug );
  36
  37 # FIXME
  38 use WebPAC::Normalize::ISBN;
  39 push @EXPORT, ( 'isbn_10', 'isbn_13' );
  40
  41 use WebPAC::Normalize::MARC;
  42 push @EXPORT, ( qw/
  43         marc marc_indicators marc_repeatable_subfield
  44         marc_compose marc_leader marc_fixed
  45         marc_duplicate marc_remove marc_count
  46         marc_original_order
  47         marc_template
  48 /);
  49
  50 use Storable qw/dclone/;
  51
  52 =head1 NAME
  53
  54 WebPAC::Normalize - describe normalisaton rules using sets
  55
  56 =cut
  57
  58 our $VERSION = '0.36';
  59
  60 =head1 SYNOPSIS
  61
  62 This module uses C<conf/normalize/*.pl> files to perform normalisation
  63 from input records using perl functions which are specialized for set
  64 processing.
  65
  66 Sets are implemented as arrays, and normalisation file is valid perl, which
  67 means that you check it's validity before running WebPAC using
  68 C<perl -c normalize.pl>.
  69
  70 Normalisation can generate multiple output normalized data. For now, supported output
  71 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
  72 C<marc>.
  73
  74 =head1 FUNCTIONS
  75
  76 Functions which start with C<_> are private and used by WebPAC internally.
  77 All other functions are available for use within normalisation rules.
  78
  79 =head2 data_structure
  80
  81 Return data structure
  82
  83   my $ds = WebPAC::Normalize::data_structure(
  84         lookup => $lookup_hash,
  85         row => $row,
  86         rules => $normalize_pl_config,
  87         marc_encoding => 'utf-8',
  88         config => $config,
  89         load_row_coderef => sub {
  90                 my ($database,$input,$mfn) = @_;
  91                 $store->load_row( database => $database, input => $input, id => $mfn );
  92         },
  93   );
  94
  95 Options C<row>, C<rules> and C<log> are mandatory while all
  96 other are optional.
  97
  98 C<load_row_coderef> is closure only used when executing lookups, so they will
  99 die if it's not defined.
 100
 101 This function will B<die> if normalizastion can't be evaled.
 102
 103 Since this function isn't exported you have to call it with
 104 C<WebPAC::Normalize::data_structure>.
 105
 106 =cut
 107
 108 my $load_row_coderef;
 109
 110 sub data_structure {
 111         my $arg = {@_};
 112
 113         die "need row argument" unless ($arg->{row});
 114         die "need normalisation argument" unless ($arg->{rules});
 115
 116         _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
 117         _set_ds( $arg->{row} );
 118         _set_config( $arg->{config} ) if defined($arg->{config});
 119         _clean_ds( %{ $arg } );
 120         $load_row_coderef = $arg->{load_row_coderef};
 121
 122         no strict 'subs';
 123         no warnings 'redefine';
 124         eval "$arg->{rules};";
 125         die "error evaling $arg->{rules}: $@\n" if ($@);
 126
 127         return _get_ds();
 128 }
 129
 130 =head2 _set_ds
 131
 132 Set current record hash
 133
 134   _set_ds( $rec );
 135
 136 =cut
 137
 138 my $rec;
 139
 140 sub _set_ds {
 141         $rec = shift or die "no record hash";
 142         $WebPAC::Normalize::MARC::rec = $rec;
 143 }
 144
 145 =head2
 146
 147   my $rec = _get_rec();
 148
 149 =cut
 150
 151 sub _get_rec { $rec };
 152
 153 =head2 _set_config
 154
 155 Set current config hash
 156
 157   _set_config( $config );
 158
 159 Magic keys are:
 160
 161 =over 4
 162
 163 =item _
 164
 165 Code of current database
 166
 167 =item _mfn
 168
 169 Current MFN
 170
 171 =back
 172
 173 =cut
 174
 175 my $config;
 176
 177 sub _set_config {
 178         $config = shift;
 179 }
 180
 181 =head2 _get_ds
 182
 183 Return hash formatted as data structure
 184
 185   my $ds = _get_ds();
 186
 187 =cut
 188
 189 my $out;
 190
 191 sub _get_ds {
 192 #warn "## out = ",dump($out);
 193         return $out;
 194 }
 195
 196 =head2 _clean_ds
 197
 198 Clean data structure hash for next record
 199
 200   _clean_ds();
 201
 202 =cut
 203
 204 sub _clean_ds {
 205         my $a = {@_};
 206         $out = undef;
 207         WebPAC::Normalize::MARC::_clean();
 208 }
 209
 210 =head2 _set_lookup
 211
 212 Set current lookup hash
 213
 214   _set_lookup( $lookup );
 215
 216 =cut
 217
 218 my $lookup;
 219
 220 sub _set_lookup {
 221         $lookup = shift;
 222 }
 223
 224 =head2 _get_lookup
 225
 226 Get current lookup hash
 227
 228   my $lookup = _get_lookup();
 229
 230 =cut
 231
 232 sub _get_lookup {
 233         return $lookup;
 234 }
 235
 236 =head2 _set_load_row
 237
 238 Setup code reference which will return L<data_structure> from
 239 L<WebPAC::Store>
 240
 241   _set_load_row(sub {
 242                 my ($database,$input,$mfn) = @_;
 243                 $store->load_row( database => $database, input => $input, id => $mfn );
 244   });
 245
 246 =cut
 247
 248 sub _set_load_row {
 249         my $coderef = shift;
 250         confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
 251
 252         $load_row_coderef = $coderef;
 253 }
 254
 255 =head2 _debug
 256
 257 Change level of debug warnings
 258
 259   _debug( 2 );
 260
 261 =cut
 262
 263 sub _debug {
 264         my $l = shift;
 265         return $debug unless defined($l);
 266         warn "debug level $l",$/ if ($l > 0);
 267         $debug = $l;
 268         $WebPAC::Normalize::MARC::debug = $debug;
 269 }
 270
 271 =head1 Functions to create C<data_structure>
 272
 273 Those functions generally have to first in your normalization file.
 274
 275 =head2 to
 276
 277 Generic way to set values for some name
 278
 279   to('field-name', 'name-value' => rec('200','a') );
 280
 281 There are many helpers defined below which might be easier to use.
 282
 283 =cut
 284
 285 sub to {
 286         my $type = shift or confess "need type -- BUG?";
 287         my $name = shift or confess "needs name as first argument";
 288         my @o = grep { defined($_) && $_ ne '' } @_;
 289         return unless (@o);
 290         $out->{$name}->{$type} = \@o;
 291 }
 292
 293 =head2 search_display
 294
 295 Define output for L<search> and L<display> at the same time
 296
 297   search_display('Title', rec('200','a') );
 298
 299 =cut
 300
 301 sub search_display {
 302         my $name = shift or die "search_display needs name as first argument";
 303         my @o = grep { defined($_) && $_ ne '' } @_;
 304         return unless (@o);
 305         $out->{$name}->{search} = \@o;
 306         $out->{$name}->{display} = \@o;
 307 }
 308
 309 =head2 tag
 310
 311 Old name for L<search_display>, it will probably be removed at one point.
 312
 313 =cut
 314
 315 sub tag {
 316         search_display( @_ );
 317 }
 318
 319 =head2 display
 320
 321 Define output just for I<display>
 322
 323   @v = display('Title', rec('200','a') );
 324
 325 =cut
 326
 327 sub display { to( 'display', @_ ) }
 328
 329 =head2 search
 330
 331 Prepare values just for I<search>
 332
 333   @v = search('Title', rec('200','a') );
 334
 335 =cut
 336
 337 sub search { to( 'search', @_ ) }
 338
 339 =head2 sorted
 340
 341 Insert into lists which will be automatically sorted
 342
 343  sorted('Title', rec('200','a') );
 344
 345 =cut
 346
 347 sub sorted { to( 'sorted', @_ ) }
 348
 349
 350 =head1 Functions to extract data from input
 351
 352 This function should be used inside functions to create C<data_structure> described
 353 above.
 354
 355 =head2 _pack_subfields_hash
 356
 357  @subfields = _pack_subfields_hash( $h );
 358  $subfields = _pack_subfields_hash( $h, 1 );
 359
 360 Return each subfield value in array or pack them all together and return scalar
 361 with subfields (denoted by C<^>) and values.
 362
 363 =cut
 364
 365 sub _pack_subfields_hash {
 366
 367         warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
 368
 369         my ($hash,$include_subfields) = @_;
 370
 371         # sanity and ease of use
 372         return $hash if (ref($hash) ne 'HASH');
 373
 374         my $h = dclone( $hash );
 375
 376         if ( defined($h->{subfields}) ) {
 377                 my $sfs = delete $h->{subfields} || die "no subfields?";
 378                 my @out;
 379                 while (@$sfs) {
 380                         my $sf = shift @$sfs;
 381                         push @out, '^' . $sf if ($include_subfields);
 382                         my $o = shift @$sfs;
 383                         if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
 384                                 # single element subfields are not arrays
 385 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 386
 387                                 push @out, $h->{$sf};
 388                         } else {
 389 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
 390                                 push @out, $h->{$sf}->[$o];
 391                         }
 392                 }
 393                 if ($include_subfields) {
 394                         return join('', @out);
 395                 } else {
 396                         return @out;
 397                 }
 398         } else {
 399                 if ($include_subfields) {
 400                         my $out = '';
 401                         foreach my $sf (sort keys %$h) {
 402                                 if (ref($h->{$sf}) eq 'ARRAY') {
 403                                         $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
 404                                 } else {
 405                                         $out .= '^' . $sf . $h->{$sf};
 406                                 }
 407                         }
 408                         return $out;
 409                 } else {
 410                         # FIXME this should probably be in alphabetical order instead of hash order
 411                         values %{$h};
 412                 }
 413         }
 414 }
 415
 416 =head2 rec1
 417
 418 Return all values in some field
 419
 420   @v = rec1('200')
 421
 422 TODO: order of values is probably same as in source data, need to investigate that
 423
 424 =cut
 425
 426 sub rec1 {
 427         my $f = shift;
 428         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 429         return unless (defined($rec) && defined($rec->{$f}));
 430         warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 431         if (ref($rec->{$f}) eq 'ARRAY') {
 432                 my @out;
 433                 foreach my $h ( @{ $rec->{$f} } ) {
 434                         if (ref($h) eq 'HASH') {
 435                                 push @out, ( _pack_subfields_hash( $h ) );
 436                         } else {
 437                                 push @out, $h;
 438                         }
 439                 }
 440                 return @out;
 441         } elsif( defined($rec->{$f}) ) {
 442                 return $rec->{$f};
 443         }
 444 }
 445
 446 =head2 rec2
 447
 448 Return all values in specific field and subfield
 449
 450   @v = rec2('200','a')
 451
 452 =cut
 453
 454 sub rec2 {
 455         my $f = shift;
 456         return unless (defined($rec && $rec->{$f}));
 457         my $sf = shift;
 458         warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
 459         return map {
 460                 if (ref($_->{$sf}) eq 'ARRAY') {
 461                         @{ $_->{$sf} };
 462                 } else {
 463                         $_->{$sf};
 464                 }
 465         } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
 466 }
 467
 468 =head2 rec
 469
 470 syntaxtic sugar for
 471
 472   @v = rec('200')
 473   @v = rec('200','a')
 474
 475 If rec() returns just single value, it will
 476 return scalar, not array.
 477
 478 =cut
 479
 480 sub rec {
 481         my @out;
 482         if ($#_ == 0) {
 483                 @out = rec1(@_);
 484         } elsif ($#_ == 1) {
 485                 @out = rec2(@_);
 486         }
 487         if ($#out == 0 && ! wantarray) {
 488                 return $out[0];
 489         } elsif (@out) {
 490                 return @out;
 491         } else {
 492                 return '';
 493         }
 494 }
 495
 496 =head2 frec
 497
 498 Returns first value from field
 499
 500   $v = frec('200');
 501   $v = frec('200','a');
 502
 503 =cut
 504
 505 sub frec {
 506         my @out = rec(@_);
 507         warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
 508         return shift @out;
 509 }
 510
 511 =head2 frec_eq
 512
 513 =head2 frec_ne
 514
 515 Check if first values from two fields are same or different
 516
 517   if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
 518         # values are same
 519   } else {
 520     # values are different
 521   }
 522
 523 Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
 524 could write something like:
 525
 526   if ( frec( '900','a' ) eq frec( '910','c' ) ) {
 527         # yada tada
 528   }
 529
 530 but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
 531 in order to parse text and create invalid function C<eqfrec>.
 532
 533 =cut
 534
 535 sub frec_eq {
 536         my ( $f1,$sf1, $f2, $sf2 ) = @_;
 537         return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
 538 }
 539
 540 sub frec_ne {
 541         return ! frec_eq( @_ );
 542 }
 543
 544 =head2 regex
 545
 546 Apply regex to some or all values
 547
 548   @v = regex( 's/foo/bar/g', @v );
 549
 550 =cut
 551
 552 sub regex {
 553         my $r = shift;
 554         my @out;
 555         #warn "r: $r\n", dump(\@_);
 556         foreach my $t (@_) {
 557                 next unless ($t);
 558                 eval "\$t =~ $r";
 559                 push @out, $t if ($t && $t ne '');
 560         }
 561         return @out;
 562 }
 563
 564 =head2 prefix
 565
 566 Prefix all values with a string
 567
 568   @v = prefix( 'my_', @v );
 569
 570 =cut
 571
 572 sub prefix {
 573         my $p = shift;
 574         return @_ unless defined( $p );
 575         return map { $p . $_ } grep { defined($_) } @_;
 576 }
 577
 578 =head2 suffix
 579
 580 suffix all values with a string
 581
 582   @v = suffix( '_my', @v );
 583
 584 =cut
 585
 586 sub suffix {
 587         my $s = shift;
 588         return @_ unless defined( $s );
 589         return map { $_ . $s } grep { defined($_) } @_;
 590 }
 591
 592 =head2 surround
 593
 594 surround all values with a two strings
 595
 596   @v = surround( 'prefix_', '_suffix', @v );
 597
 598 =cut
 599
 600 sub surround {
 601         my $p = shift;
 602         my $s = shift;
 603         $p = '' unless defined( $p );
 604         $s = '' unless defined( $s );
 605         return map { $p . $_ . $s } grep { defined($_) } @_;
 606 }
 607
 608 =head2 first
 609
 610 Return first element
 611
 612   $v = first( @v );
 613
 614 =cut
 615
 616 sub first {
 617         my $r = shift;
 618         return $r;
 619 }
 620
 621 =head2 lookup
 622
 623 Consult lookup hashes for some value
 624
 625   @v = lookup(
 626         sub {
 627                 'ffkk/peri/mfn'.rec('000')
 628         },
 629         'ffkk','peri','200-a-200-e',
 630         sub {
 631                 first(rec(200,'a')).' '.first(rec('200','e'))
 632         }
 633   );
 634
 635 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
 636 normal lookup definition in C<conf/lookup/something.pl> which looks like:
 637
 638   lookup(
 639         # which results to return from record recorded in lookup
 640         sub { 'ffkk/peri/mfn' . rec('000') },
 641         # from which database and input
 642         'ffkk','peri',
 643         # such that following values match
 644         sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
 645         # if this part is missing, we will try to match same fields
 646         # from lookup record and current one, or you can override
 647         # which records to use from current record using
 648         sub { rec('900','x') . ' ' . rec('900','y') },
 649   )
 650
 651 You can think about this lookup as SQL (if that helps):
 652
 653   select
 654         sub { what }
 655   from
 656         database, input
 657   where
 658     sub { filter from lookuped record }
 659   having
 660     sub { optional filter on current record }
 661
 662 Easy as pie, right?
 663
 664 =cut
 665
 666 sub lookup {
 667         my ($what, $database, $input, $key, $having) = @_;
 668
 669         confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
 670
 671         warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
 672         return unless (defined($lookup->{$database}->{$input}->{$key}));
 673
 674         confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
 675
 676         my $mfns;
 677         my @having = $having->();
 678
 679         warn "## having = ", dump( @having ) if ($debug > 2);
 680
 681         foreach my $h ( @having ) {
 682                 if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
 683                         warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
 684                         $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
 685                 }
 686         }
 687
 688         return unless ($mfns);
 689
 690         my @mfns = sort keys %$mfns;
 691
 692         warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
 693
 694         my $old_rec = $rec;
 695         my @out;
 696
 697         foreach my $mfn (@mfns) {
 698                 $rec = $load_row_coderef->( $database, $input, $mfn );
 699
 700                 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
 701
 702                 my @vals = $what->();
 703
 704                 push @out, ( @vals );
 705
 706                 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
 707         }
 708
 709 #       if (ref($lookup->{$k}) eq 'ARRAY') {
 710 #               return @{ $lookup->{$k} };
 711 #       } else {
 712 #               return $lookup->{$k};
 713 #       }
 714
 715         $rec = $old_rec;
 716
 717         warn "## lookup returns = ", dump(@out), $/ if ($debug);
 718
 719         if ($#out == 0) {
 720                 return $out[0];
 721         } else {
 722                 return @out;
 723         }
 724 }
 725
 726 =head2 save_into_lookup
 727
 728 Save value into lookup. It associates current database, input
 729 and specific keys with one or more values which will be
 730 associated over MFN.
 731
 732 MFN will be extracted from first occurence current of field 000
 733 in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
 734
 735   my $nr = save_into_lookup($database,$input,$key,sub {
 736         # code which produce one or more values
 737   });
 738
 739 It returns number of items saved.
 740
 741 This function shouldn't be called directly, it's called from code created by
 742 L<WebPAC::Parser>.
 743
 744 =cut
 745
 746 sub save_into_lookup {
 747         my ($database,$input,$key,$coderef) = @_;
 748         die "save_into_lookup needs database" unless defined($database);
 749         die "save_into_lookup needs input" unless defined($input);
 750         die "save_into_lookup needs key" unless defined($key);
 751         die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
 752
 753         warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
 754
 755         my $mfn =
 756                 defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
 757                 defined($config->{_mfn})        ?       $config->{_mfn}         :
 758                                                                                 die "mfn not defined or zero";
 759
 760         my $nr = 0;
 761
 762         foreach my $v ( $coderef->() ) {
 763                 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
 764                 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
 765                 $nr++;
 766         }
 767
 768         return $nr;
 769 }
 770
 771 =head2 config
 772
 773 Consult config values stored in C<config.yml>
 774
 775   # return database code (key under databases in yaml)
 776   $database_code = config();    # use _ from hash
 777   $database_name = config('name');
 778   $database_input_name = config('input name');
 779
 780 Up to three levels are supported.
 781
 782 =cut
 783
 784 sub config {
 785         return unless ($config);
 786
 787         my $p = shift;
 788
 789         $p ||= '';
 790
 791         my $v;
 792
 793         warn "### getting config($p)\n" if ($debug > 1);
 794
 795         my @p = split(/\s+/,$p);
 796         if ($#p < 0) {
 797                 $v = $config->{ '_' };  # special, database code
 798         } else {
 799
 800                 my $c = dclone( $config );
 801
 802                 foreach my $k (@p) {
 803                         warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
 804                         if (ref($c) eq 'ARRAY') {
 805                                 $c = shift @$c;
 806                                 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
 807                                 last;
 808                         }
 809
 810                         if (! defined($c->{$k}) ) {
 811                                 $c = undef;
 812                                 last;
 813                         } else {
 814                                 $c = $c->{$k};
 815                         }
 816                 }
 817                 $v = $c if ($c);
 818
 819         }
 820
 821         warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
 822         warn "config( '$p' ) is empty\n" if (! $v);
 823
 824         return $v;
 825 }
 826
 827 =head2 id
 828
 829 Returns unique id of this record
 830
 831   $id = id();
 832
 833 Returns C<42/2> for 2nd occurence of MFN 42.
 834
 835 =cut
 836
 837 sub id {
 838         my $mfn = $config->{_mfn} || die "no _mfn in config data";
 839         return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
 840 }
 841
 842 =head2 join_with
 843
 844 Joins walues with some delimiter
 845
 846   $v = join_with(", ", @v);
 847
 848 =cut
 849
 850 sub join_with {
 851         my $d = shift;
 852         warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
 853         my $v = join($d, grep { defined($_) && $_ ne '' } @_);
 854         return '' unless defined($v);
 855         return $v;
 856 }
 857
 858 =head2 split_rec_on
 859
 860 Split record subfield on some regex and take one of parts out
 861
 862   $a_before_semi_column =
 863         split_rec_on('200','a', /\s*;\s*/, $part);
 864
 865 C<$part> is optional number of element. First element is
 866 B<1>, not 0!
 867
 868 If there is no C<$part> parameter or C<$part> is 0, this function will
 869 return all values produced by splitting.
 870
 871 =cut
 872
 873 sub split_rec_on {
 874         die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
 875
 876         my ($fld, $sf, $regex, $part) = @_;
 877         warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
 878
 879         my @r = rec( $fld, $sf );
 880         my $v = shift @r;
 881         warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
 882
 883         return '' if ( ! defined($v) || $v =~ /^\s*$/);
 884
 885         my @s = split( $regex, $v );
 886         warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
 887         if ($part && $part > 0) {
 888                 return $s[ $part - 1 ];
 889         } else {
 890                 return @s;
 891         }
 892 }
 893
 894 my $hash;
 895
 896 =head2 set
 897
 898   set( key => 'value' );
 899
 900 =cut
 901
 902 sub set {
 903         my ($k,$v) = @_;
 904         warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
 905         $hash->{$k} = $v;
 906 };
 907
 908 =head2 get
 909
 910   get( 'key' );
 911
 912 =cut
 913
 914 sub get {
 915         my $k = shift || return;
 916         my $v = $hash->{$k};
 917         warn "## get $k = ", dump( $v ), $/ if ( $debug );
 918         return $v;
 919 }
 920
 921 =head2 count
 922
 923   if ( count( @result ) == 1 ) {
 924         # do something if only 1 result is there
 925   }
 926
 927 =cut
 928
 929 sub count {
 930         warn "## count ",dump(@_),$/ if ( $debug );
 931         return @_ . '';
 932 }
 933
 934 # END
 935 1;