git.rot13.org Git - webpac2/blob - lib/WebPAC/Normalize.pm

   1 package WebPAC::Normalize;
   2 use Exporter 'import';
   3 @EXPORT = qw/
   4         set_rec set_lookup
   5         get_ds clean_ds
   6         tag search display
   7         rec1 rec2 rec
   8         regex prefix suffix surround
   9         first lookup join_with
  10 /;
  11
  12 use warnings;
  13 use strict;
  14
  15 #use base qw/WebPAC::Common/;
  16 use Data::Dumper;
  17
  18 =head1 NAME
  19
  20 WebPAC::Normalize - describe normalisaton rules using sets
  21
  22 =head1 VERSION
  23
  24 Version 0.04
  25
  26 =cut
  27
  28 our $VERSION = '0.04';
  29
  30 =head1 SYNOPSIS
  31
  32 This module uses C<conf/normalize/*.pl> files to perform normalisation
  33 from input records using perl functions which are specialized for set
  34 processing.
  35
  36 Sets are implemented as arrays, and normalisation file is valid perl, which
  37 means that you check it's validity before running WebPAC using
  38 C<perl -c normalize.pl>.
  39
  40 Normalisation can generate multiple output normalized data. For now, supported output
  41 types (on the left side of definition) are: C<tag>, C<display> and C<search>.
  42
  43 =head1 FUNCTIONS
  44
  45 =head2 data_structure
  46
  47 Return data structure
  48
  49   my $ds = WebPAC::Normalize(
  50         lookup => $lookup->lookup_hash,
  51         row => $row,
  52         rules => $normalize_pl_config,
  53   );
  54
  55 This function will B<die> if normalizastion can't be evaled.
  56
  57 =cut
  58
  59 sub data_structure {
  60         my $arg = {@_};
  61
  62         die "need row argument" unless ($arg->{row});
  63         die "need normalisation argument" unless ($arg->{rules});
  64
  65         no strict 'subs';
  66         set_lookup( $arg->{lookup} );
  67         set_rec( $arg->{row} );
  68         clean_ds();
  69         eval "$arg->{rules}";
  70         die "error evaling $arg->{rules}: $@\n" if ($@);
  71         return get_ds();
  72 }
  73
  74 =head2 set_rec
  75
  76 Set current record hash
  77
  78   set_rec( $rec );
  79
  80 =cut
  81
  82 my $rec;
  83
  84 sub set_rec {
  85         $rec = shift or die "no record hash";
  86 }
  87
  88 =head2 tag
  89
  90 Define new tag for I<search> and I<display>.
  91
  92   tag('Title', rec('200','a') );
  93
  94
  95 =cut
  96
  97 my $out;
  98
  99 sub tag {
 100         my $name = shift or die "tag needs name as first argument";
 101         my @o = grep { defined($_) && $_ ne '' } @_;
 102         return unless (@o);
 103         $out->{$name}->{tag} = $name;
 104         $out->{$name}->{search} = \@o;
 105         $out->{$name}->{display} = \@o;
 106 }
 107
 108 =head2 display
 109
 110 Define tag just for I<display>
 111
 112   @v = display('Title', rec('200','a') );
 113
 114 =cut
 115
 116 sub display {
 117         my $name = shift or die "display needs name as first argument";
 118         my @o = grep { defined($_) && $_ ne '' } @_;
 119         return unless (@o);
 120         $out->{$name}->{tag} = $name;
 121         $out->{$name}->{display} = \@o;
 122 }
 123
 124 =head2 search
 125
 126 Prepare values just for I<search>
 127
 128   @v = search('Title', rec('200','a') );
 129
 130 =cut
 131
 132 sub search {
 133         my $name = shift or die "search needs name as first argument";
 134         my @o = grep { defined($_) && $_ ne '' } @_;
 135         return unless (@o);
 136         $out->{$name}->{tag} = $name;
 137         $out->{$name}->{search} = \@o;
 138 }
 139
 140 =head2 get_ds
 141
 142 Return hash formatted as data structure
 143
 144   my $ds = get_ds();
 145
 146 =cut
 147
 148 sub get_ds {
 149         return $out;
 150 }
 151
 152 =head2 clean_ds
 153
 154 Clean data structure hash for next record
 155
 156   clean_ds();
 157
 158 =cut
 159
 160 sub clean_ds {
 161         $out = undef;
 162 }
 163
 164 =head2 set_lookup
 165
 166 Set current lookup hash
 167
 168   set_lookup( $lookup );
 169
 170 =cut
 171
 172 my $lookup;
 173
 174 sub set_lookup {
 175         $lookup = shift;
 176 }
 177
 178 =head2 rec1
 179
 180 Return all values in some field
 181
 182   @v = rec1('200')
 183
 184 TODO: order of values is probably same as in source data, need to investigate that
 185
 186 =cut
 187
 188 sub rec1 {
 189         my $f = shift;
 190         return unless (defined($rec) && defined($rec->{$f}));
 191         if (ref($rec->{$f}) eq 'ARRAY') {
 192                 return map {
 193                         if (ref($_) eq 'HASH') {
 194                                 values %{$_};
 195                         } else {
 196                                 $_;
 197                         }
 198                 } @{ $rec->{$f} };
 199         } elsif( defined($rec->{$f}) ) {
 200                 return $rec->{$f};
 201         }
 202 }
 203
 204 =head2 rec2
 205
 206 Return all values in specific field and subfield
 207
 208   @v = rec2('200','a')
 209
 210 =cut
 211
 212 sub rec2 {
 213         my $f = shift;
 214         return unless (defined($rec && $rec->{$f}));
 215         my $sf = shift;
 216         return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
 217 }
 218
 219 =head2 rec
 220
 221 syntaxtic sugar for
 222
 223   @v = rec('200')
 224   @v = rec('200','a')
 225
 226 =cut
 227
 228 sub rec {
 229         if ($#_ == 0) {
 230                 return rec1(@_);
 231         } elsif ($#_ == 1) {
 232                 return rec2(@_);
 233         }
 234 }
 235
 236 =head2 regex
 237
 238 Apply regex to some or all values
 239
 240   @v = regex( 's/foo/bar/g', @v );
 241
 242 =cut
 243
 244 sub regex {
 245         my $r = shift;
 246         my @out;
 247         #warn "r: $r\n",Dumper(\@_);
 248         foreach my $t (@_) {
 249                 next unless ($t);
 250                 eval "\$t =~ $r";
 251                 push @out, $t if ($t && $t ne '');
 252         }
 253         return @out;
 254 }
 255
 256 =head2 prefix
 257
 258 Prefix all values with a string
 259
 260   @v = prefix( 'my_', @v );
 261
 262 =cut
 263
 264 sub prefix {
 265         my $p = shift or die "prefix needs string as first argument";
 266         return map { $p . $_ } grep { defined($_) } @_;
 267 }
 268
 269 =head2 suffix
 270
 271 suffix all values with a string
 272
 273   @v = suffix( '_my', @v );
 274
 275 =cut
 276
 277 sub suffix {
 278         my $s = shift or die "suffix needs string as first argument";
 279         return map { $_ . $s } grep { defined($_) } @_;
 280 }
 281
 282 =head2 surround
 283
 284 surround all values with a two strings
 285
 286   @v = surround( 'prefix_', '_suffix', @v );
 287
 288 =cut
 289
 290 sub surround {
 291         my $p = shift or die "surround need prefix as first argument";
 292         my $s = shift or die "surround needs suffix as second argument";
 293         return map { $p . $_ . $s } grep { defined($_) } @_;
 294 }
 295
 296 =head2 first
 297
 298 Return first element
 299
 300   $v = first( @v );
 301
 302 =cut
 303
 304 sub first {
 305         my $r = shift;
 306         return $r;
 307 }
 308
 309 =head2 lookup
 310
 311 Consult lookup hashes for some value
 312
 313   @v = lookup( $v );
 314   @v = lookup( @v );
 315
 316 =cut
 317
 318 sub lookup {
 319         my $k = shift or return;
 320         return unless (defined($lookup->{$k}));
 321         if (ref($lookup->{$k}) eq 'ARRAY') {
 322                 return @{ $lookup->{$k} };
 323         } else {
 324                 return $lookup->{$k};
 325         }
 326 }
 327
 328 =head2 join_with
 329
 330 Joins walues with some delimiter
 331
 332   $v = join_with(", ", @v);
 333
 334 =cut
 335
 336 sub join_with {
 337         my $d = shift;
 338         return join($d, grep { defined($_) && $_ ne '' } @_);
 339 }
 340
 341 # END
 342 1;