1 package WebPAC::Normalize;
11 regex prefix suffix surround
12 first lookup join_with
18 #use base qw/WebPAC::Common/;
20 use Encode qw/from_to/;
24 WebPAC::Normalize - describe normalisaton rules using sets
32 our $VERSION = '0.06';
36 This module uses C<conf/normalize/*.pl> files to perform normalisation
37 from input records using perl functions which are specialized for set
40 Sets are implemented as arrays, and normalisation file is valid perl, which
41 means that you check it's validity before running WebPAC using
42 C<perl -c normalize.pl>.
44 Normalisation can generate multiple output normalized data. For now, supported output
45 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
50 Functions which start with C<_> are private and used by WebPAC internally.
51 All other functions are available for use within normalisation rules.
57 my $ds = WebPAC::Normalize::data_structure(
58 lookup => $lookup->lookup_hash,
60 rules => $normalize_pl_config,
61 marc_encoding => 'utf-8',
64 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
67 This function will B<die> if normalizastion can't be evaled.
69 Since this function isn't exported you have to call it with
70 C<WebPAC::Normalize::data_structure>.
77 die "need row argument" unless ($arg->{row});
78 die "need normalisation argument" unless ($arg->{rules});
81 _set_lookup( $arg->{lookup} );
82 _set_rec( $arg->{row} );
83 _clean_ds( %{ $arg } );
85 die "error evaling $arg->{rules}: $@\n" if ($@);
92 Set current record hash
101 $rec = shift or die "no record hash";
106 Return hash formatted as data structure
122 Clean data structure hash for next record
132 $marc_encoding = $a->{marc_encoding};
137 Set current lookup hash
139 _set_lookup( $lookup );
149 =head2 _get_marc21_fields
151 Get all fields defined by calls to C<marc21>
153 $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
157 sub _get_marc21_fields {
161 =head1 Functions to create C<data_structure>
163 Those functions generally have to first in your normalization file.
167 Define new tag for I<search> and I<display>.
169 tag('Title', rec('200','a') );
175 my $name = shift or die "tag needs name as first argument";
176 my @o = grep { defined($_) && $_ ne '' } @_;
178 $out->{$name}->{tag} = $name;
179 $out->{$name}->{search} = \@o;
180 $out->{$name}->{display} = \@o;
185 Define tag just for I<display>
187 @v = display('Title', rec('200','a') );
192 my $name = shift or die "display needs name as first argument";
193 my @o = grep { defined($_) && $_ ne '' } @_;
195 $out->{$name}->{tag} = $name;
196 $out->{$name}->{display} = \@o;
201 Prepare values just for I<search>
203 @v = search('Title', rec('200','a') );
208 my $name = shift or die "search needs name as first argument";
209 my @o = grep { defined($_) && $_ ne '' } @_;
211 $out->{$name}->{tag} = $name;
212 $out->{$name}->{search} = \@o;
217 Save value for MARC field
219 marc21('900','a', rec('200','a') );
224 my $f = shift or die "marc21 needs field";
225 die "marc21 field must be numer" unless ($f =~ /^\d+$/);
227 my $sf = shift or die "marc21 needs subfield";
230 my $v = $_; # make var read-write for Encode
231 next unless (defined($v) && $v !~ /^\s+$/);
232 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
233 push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
237 =head1 Functions to extract data from input
239 This function should be used inside functions to create C<data_structure> described
244 Return all values in some field
248 TODO: order of values is probably same as in source data, need to investigate that
254 return unless (defined($rec) && defined($rec->{$f}));
255 if (ref($rec->{$f}) eq 'ARRAY') {
257 if (ref($_) eq 'HASH') {
263 } elsif( defined($rec->{$f}) ) {
270 Return all values in specific field and subfield
278 return unless (defined($rec && $rec->{$f}));
280 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
302 Apply regex to some or all values
304 @v = regex( 's/foo/bar/g', @v );
311 #warn "r: $r\n",Dumper(\@_);
315 push @out, $t if ($t && $t ne '');
322 Prefix all values with a string
324 @v = prefix( 'my_', @v );
329 my $p = shift or die "prefix needs string as first argument";
330 return map { $p . $_ } grep { defined($_) } @_;
335 suffix all values with a string
337 @v = suffix( '_my', @v );
342 my $s = shift or die "suffix needs string as first argument";
343 return map { $_ . $s } grep { defined($_) } @_;
348 surround all values with a two strings
350 @v = surround( 'prefix_', '_suffix', @v );
355 my $p = shift or die "surround need prefix as first argument";
356 my $s = shift or die "surround needs suffix as second argument";
357 return map { $p . $_ . $s } grep { defined($_) } @_;
375 Consult lookup hashes for some value
383 my $k = shift or return;
384 return unless (defined($lookup->{$k}));
385 if (ref($lookup->{$k}) eq 'ARRAY') {
386 return @{ $lookup->{$k} };
388 return $lookup->{$k};
394 Joins walues with some delimiter
396 $v = join_with(", ", @v);
402 return join($d, grep { defined($_) && $_ ne '' } @_);