r719@llin: dpavlin | 2006-06-26 18:40:57 +0200

[webpac2] / lib / WebPAC / Normalize.pm
diff --git a/lib/WebPAC/Normalize.pm b/lib/WebPAC/Normalize.pm

index 91a0945..cf78071 100644 (file)
--- a/lib/WebPAC/Normalize.pm
+++ b/lib/WebPAC/Normalize.pm
@@ -1,725 +1,342 @@
  package WebPAC::Normalize;
+use Exporter 'import';
+@EXPORT = qw/
+       set_rec set_lookup
+       get_ds clean_ds
+       tag search display
+       rec1 rec2 rec
+       regex prefix suffix surround
+       first lookup join_with
+/;
  
  use warnings;
  use strict;
+
+#use base qw/WebPAC::Common/;
  use Data::Dumper;
-use Storable;
  
  =head1 NAME
  
-WebPAC::Normalize - data mungling for normalisation
+WebPAC::Normalize - describe normalisaton rules using sets
  
  =head1 VERSION
  
-Version 0.01
+Version 0.04
  
  =cut
  
-our $VERSION = '0.01';
+our $VERSION = '0.04';
  
  =head1 SYNOPSIS
  
-This package contains code that mungle data to produce normalized format.
-
-It contains several assumptions:
-
-=over
-
-=item *
-
-format of fields is defined using C<v123^a> notation for repeatable fields
-or C<s123^a> for single (or first) value, where C<123> is field number and
-C<a> is subfield.
-
-=item *
-
-source data records (C<$rec>) have unique identifiers in field C<000>
-
-=item *
-
-optional C<eval{length('v123^a') == 3}> tag at B<beginning of format> will be
-perl code that is evaluated before producing output (value of field will be
-interpolated before that)
-
-=item *
-
-optional C<filter{filter_name}> at B<begining of format> will apply perl
-code defined as code ref on format after field substitution to producing
-output
-
-=item *
-
-optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.
-
-=item *
-
-at end, optional C<format>s rules are resolved. Format rules are similar to
-C<sprintf> and can also contain C<lookup{...}> which is performed after
-values are inserted in format.
-
-=back
-
-This also describes order in which transformations are applied (eval,
-filter, lookup, format) which is important to undestand when deciding how to
-solve your data mungling and normalisation process.
-
+This module uses C<conf/normalize/*.pl> files to perform normalisation
+from input records using perl functions which are specialized for set
+processing.
  
+Sets are implemented as arrays, and normalisation file is valid perl, which
+means that you check it's validity before running WebPAC using
+C<perl -c normalize.pl>.
  
+Normalisation can generate multiple output normalized data. For now, supported output
+types (on the left side of definition) are: C<tag>, C<display> and C<search>.
  
  =head1 FUNCTIONS
  
-=head2 new
+=head2 data_structure
  
-Create new normalisation object
+Return data structure
  
-  my $n = new WebPAC::Normalize::Something(
-       filter => {
-               'filter_name_1' => sub {
-                       # filter code
-                       return length($_);
-               }, ...
-       },
-       cache_data_structure => './cache/ds/',
-       lookup_regex => $lookup->regex,
+  my $ds = WebPAC::Normalize(
+       lookup => $lookup->lookup_hash,
+       row => $row,
+       rules => $normalize_pl_config,
    );
  
-Parametar C<filter> defines user supplied snippets of perl code which can
-be use with C<filter{...}> notation.
-
-Optional parameter C<cache_data_structure> defines path to directory
-in which cache file for C<data_structure> call will be created.
-
-Recommended parametar C<lookup_regex> is used to enable parsing of lookups
-in structures.
+This function will B<die> if normalizastion can't be evaled.
  
  =cut
  
-sub new {
-       my $class = shift;
-        my $self = {@_};
-        bless($self, $class);
-
-       $self->setup_cache_dir( $self->{'cache_data_structure'} );
-
-       $self ? return $self : return undef;
+sub data_structure {
+       my $arg = {@_};
+
+       die "need row argument" unless ($arg->{row});
+       die "need normalisation argument" unless ($arg->{rules});
+
+       no strict 'subs';
+       set_lookup( $arg->{lookup} );
+       set_rec( $arg->{row} );
+       clean_ds();
+       eval "$arg->{rules}";
+       die "error evaling $arg->{rules}: $@\n" if ($@);
+       return get_ds();
  }
  
-=head2 setup_cache_dir
+=head2 set_rec
  
-Check if specified cache directory exist, and if not, disable caching.
+Set current record hash
  
- $setup_cache_dir('./cache/ds/');
-
-If you pass false or zero value to this function, it will disable
-cacheing.
+  set_rec( $rec );
  
  =cut
  
-sub setup_cache_dir {
-       my $self = shift;
-
-       my $dir = shift;
-
-       my $log = $self->_get_logger();
-
-       if ($dir) {
-               my $msg;
-               if (! -e $dir) {
-                       $msg = "doesn't exist";
-               } elsif (! -d $dir) {
-                       $msg = "is not directory";
-               } elsif (! -w $dir) {
-                       $msg = "not writable";
-               }
-
-               if ($msg) {
-                       undef $self->{'cache_data_structure'};
-                       $log->warn("cache_data_structure $dir $msg, disabling...");
-               } else {
-                       $log->debug("using cache dir $dir");
-               }
-       } else {
-               $log->debug("disabling cache");
-               undef $self->{'cache_data_structure'};
-       }
-}
-
+my $rec;
  
-=head2 data_structure
-
-Create in-memory data structure which represents normalized layout from
-C<conf/normalize/*.xml>.
+sub set_rec {
+       $rec = shift or die "no record hash";
+}
  
-This structures are used to produce output.
+=head2 tag
  
- my @ds = $webpac->data_structure($rec);
+Define new tag for I<search> and I<display>.
  
-B<Note: historical oddity follows>
+  tag('Title', rec('200','a') );
  
-This method will also set C<< $webpac->{'currnet_filename'} >> if there is
-C<< <filename> >> tag and C<< $webpac->{'headline'} >> if there is
-C<< <headline> >> tag.
  
  =cut
  
-sub data_structure {
-       my $self = shift;
-
-       my $log = $self->_get_logger();
-
-       my $rec = shift;
-       $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
-
-       my $cache_file;
-
-       if (my $cache_path = $self->{'cache_data_structure'}) {
-               my $id = $rec->{'000'};
-               $id = $rec->{'000'}->[0] if ($id =~ m/^ARRAY/o);
-               unless (defined($id)) {
-                       $log->warn("Can't use cache_data_structure on records without unique identifier in field 000");
-                       undef $self->{'cache_data_structure'};
-               } else {
-                       $cache_file = "$cache_path/$id";
-                       if (-r $cache_file) {
-                               my $ds_ref = retrieve($cache_file);
-                               if ($ds_ref) {
-                                       $log->debug("cache hit: $cache_file");
-                                       my $ok = 1;
-                                       foreach my $f (qw(current_filename headline)) {
-                                               if ($ds_ref->{$f}) {
-                                                       $self->{$f} = $ds_ref->{$f};
-                                               } else {
-                                                       $ok = 0;
-                                               }
-                                       };
-                                       if ($ok && $ds_ref->{'ds'}) {
-                                               return @{ $ds_ref->{'ds'} };
-                                       } else {
-                                               $log->warn("cache_data_structure $cache_path corrupt. Use rm $cache_path/* to re-create it on next run!");
-                                               undef $self->{'cache_data_structure'};
-                                       }
-                               }
-                       }
-               }
-       }
-
-       undef $self->{'currnet_filename'};
-       undef $self->{'headline'};
-
-       my @sorted_tags;
-       if ($self->{tags_by_order}) {
-               @sorted_tags = @{$self->{tags_by_order}};
-       } else {
-               @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
-               $self->{tags_by_order} = \@sorted_tags;
-       }
-
-       my @ds;
-
-       $log->debug("tags: ",sub { join(", ",@sorted_tags) });
-
-       foreach my $field (@sorted_tags) {
-
-               my $row;
-
-#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
-
-               foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
-                       my $format = $tag->{'value'} || $tag->{'content'};
-
-                       $log->debug("format: $format");
-
-                       my @v;
-                       if ($self->{'lookup_regex'} && $format =~ $self->{'lookup_regex'}) {
-                               @v = $self->fill_in_to_arr($rec,$format);
-                       } else {
-                               @v = $self->parse_to_arr($rec,$format);
-                       }
-                       next if (! @v);
-
-                       if ($tag->{'sort'}) {
-                               @v = $self->sort_arr(@v);
-                       }
-
-                       # use format?
-                       if ($tag->{'format_name'}) {
-                               @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
-                       }
-
-                       if ($field eq 'filename') {
-                               $self->{'current_filename'} = join('',@v);
-                               $log->debug("filename: ",$self->{'current_filename'});
-                       } elsif ($field eq 'headline') {
-                               $self->{'headline'} .= join('',@v);
-                               $log->debug("headline: ",$self->{'headline'});
-                               next; # don't return headline in data_structure!
-                       }
-
-                       # delimiter will join repeatable fields
-                       if ($tag->{'delimiter'}) {
-                               @v = ( join($tag->{'delimiter'}, @v) );
-                       }
-
-                       # default types 
-                       my @types = qw(display swish);
-                       # override by type attribute
-                       @types = ( $tag->{'type'} ) if ($tag->{'type'});
-
-                       foreach my $type (@types) {
-                               # append to previous line?
-                               $log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append');
-                               if ($tag->{'append'}) {
-
-                                       # I will delimit appended part with
-                                       # delimiter (or ,)
-                                       my $d = $tag->{'delimiter'};
-                                       # default delimiter
-                                       $d ||= " ";
-
-                                       my $last = pop @{$row->{$type}};
-                                       $d = "" if (! $last);
-                                       $last .= $d . join($d, @v);
-                                       push @{$row->{$type}}, $last;
-
-                               } else {
-                                       push @{$row->{$type}}, @v;
-                               }
-                       }
-
-
-               }
-
-               if ($row) {
-                       $row->{'tag'} = $field;
-
-                       # TODO: name_sigular, name_plural
-                       my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
-                       $row->{'name'} = $name ? $self->_x($name) : $field;
-
-                       # post-sort all values in field
-                       if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) {
-                               $log->warn("sort at field tag not implemented");
-                       }
-
-                       push @ds, $row;
-
-                       $log->debug("row $field: ",sub { Dumper($row) });
-               }
-
-       }
-
-       if ($cache_file) {
-               store {
-                       ds => \@ds,
-                       current_filename => $self->{'current_filename'},
-                       headline => $self->{'headline'},
-               }, $cache_file;
-               $log->debug("created storable cache file $cache_file");
-       }
-
-       return @ds;
+my $out;
  
+sub tag {
+       my $name = shift or die "tag needs name as first argument";
+       my @o = grep { defined($_) && $_ ne '' } @_;
+       return unless (@o);
+       $out->{$name}->{tag} = $name;
+       $out->{$name}->{search} = \@o;
+       $out->{$name}->{display} = \@o;
  }
  
-=head2 parse
+=head2 display
  
-Perform smart parsing of string, skipping delimiters for fields which aren't
-defined. It can also eval code in format starting with C<eval{...}> and
-return output or nothing depending on eval code.
+Define tag just for I<display>
  
- my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
+  @v = display('Title', rec('200','a') );
  
  =cut
  
-sub parse {
-       my $self = shift;
-
-       my ($rec, $format_utf8, $i) = @_;
-
-       return if (! $format_utf8);
-
-       my $log = $self->_get_logger();
-
-       $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
-
-       $i = 0 if (! $i);
-
-       my $format = $self->_x($format_utf8) || $log->logconfess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
-
-       my @out;
-
-       $log->debug("format: $format");
-
-       my $eval_code;
-       # remove eval{...} from beginning
-       $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
-
-       my $filter_name;
-       # remove filter{...} from beginning
-       $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s);
-
-       my $prefix;
-       my $all_found=0;
-
-       while ($format =~ s/^(.*?)(v|s)(\d+)(?:\^(\w))?//s) {
-
-               my $del = $1 || '';
-               $prefix ||= $del if ($all_found == 0);
+sub display {
+       my $name = shift or die "display needs name as first argument";
+       my @o = grep { defined($_) && $_ ne '' } @_;
+       return unless (@o);
+       $out->{$name}->{tag} = $name;
+       $out->{$name}->{display} = \@o;
+}
  
-               # repeatable index
-               my $r = $i;
-               $r = 0 if (lc("$2") eq 's');
+=head2 search
  
-               my $found = 0;
-               my $tmp = $self->get_data(\$rec,$3,$4,$r,\$found);
+Prepare values just for I<search>
  
-               if ($found) {
-                       push @out, $del;
-                       push @out, $tmp;
-                       $all_found += $found;
-               }
-       }
+  @v = search('Title', rec('200','a') );
  
-       return if (! $all_found);
+=cut
  
-       my $out = join('',@out);
+sub search {
+       my $name = shift or die "search needs name as first argument";
+       my @o = grep { defined($_) && $_ ne '' } @_;
+       return unless (@o);
+       $out->{$name}->{tag} = $name;
+       $out->{$name}->{search} = \@o;
+}
  
-       if ($out) {
-               # add rest of format (suffix)
-               $out .= $format;
+=head2 get_ds
  
-               # add prefix if not there
-               $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
+Return hash formatted as data structure
  
-               $log->debug("result: $out");
-       }
+  my $ds = get_ds();
  
-       if ($eval_code) {
-               my $eval = $self->fill_in($rec,$eval_code,$i) || return;
-               $log->debug("about to eval{$eval} format: $out");
-               return if (! $self->_eval($eval));
-       }
-       
-       if ($filter_name && $self->{'filter'}->{$filter_name}) {
-               $log->debug("about to filter{$filter_name} format: $out");
-               $out = $self->{'filter'}->{$filter_name}->($out);
-               return unless(defined($out));
-               $log->debug("filter result: $out");
-       }
+=cut
  
+sub get_ds {
         return $out;
  }
  
-=head2 parse_to_arr
+=head2 clean_ds
  
-Similar to C<parse>, but returns array of all repeatable fields
+Clean data structure hash for next record
  
- my @arr = $webpac->parse_to_arr($rec,'v250^a');
+  clean_ds();
  
  =cut
  
-sub parse_to_arr {
-       my $self = shift;
-
-       my ($rec, $format_utf8) = @_;
+sub clean_ds {
+       $out = undef;
+}
  
-       my $log = $self->_get_logger();
+=head2 set_lookup
  
-       $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
-       return if (! $format_utf8);
+Set current lookup hash
  
-       my $i = 0;
-       my @arr;
+  set_lookup( $lookup );
  
-       while (my $v = $self->parse($rec,$format_utf8,$i++)) {
-               push @arr, $v;
-       }
+=cut
  
-       $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
+my $lookup;
  
-       return @arr;
+sub set_lookup {
+       $lookup = shift;
  }
  
+=head2 rec1
  
-=head2 fill_in
-
-Workhourse of all: takes record from in-memory structure of database and
-strings with placeholders and returns string or array of with substituted
-values from record.
-
- my $text = $webpac->fill_in($rec,'v250^a');
+Return all values in some field
  
-Optional argument is ordinal number for repeatable fields. By default,
-it's assume to be first repeatable field (fields are perl array, so first
-element is 0).
-Following example will read second value from repeatable field.
+  @v = rec1('200')
  
- my $text = $webpac->fill_in($rec,'Title: v250^a',1);
-
-This function B<does not> perform parsing of format to inteligenty skip
-delimiters before fields which aren't used.
-
-This method will automatically decode UTF-8 string to local code page
-if needed.
+TODO: order of values is probably same as in source data, need to investigate that
  
  =cut
  
-sub fill_in {
-       my $self = shift;
-
-       my $log = $self->_get_logger();
-
-       my $rec = shift || $log->logconfess("need data record");
-       my $format = shift || $log->logconfess("need format to parse");
-       # iteration (for repeatable fields)
-       my $i = shift || 0;
-
-       $log->logdie("infitite loop in format $format") if ($i > ($self->{'max_mfn'} || 9999));
-
-       # FIXME remove for speedup?
-       $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
-
-       if (utf8::is_utf8($format)) {
-               $format = $self->_x($format);
-       }
-
-       my $found = 0;
-
-       my $eval_code;
-       # remove eval{...} from beginning
-       $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
-
-       my $filter_name;
-       # remove filter{...} from beginning
-       $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s);
-
-       # do actual replacement of placeholders
-       # repeatable fields
-       $format =~ s/v(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
-       # non-repeatable fields
-       $format =~ s/s(\d+)(?:\^(\w))?/$self->get_data(\$rec,$1,$2,0,\$found)/ges;
-
-       if ($found) {
-               $log->debug("format: $format");
-               if ($eval_code) {
-                       my $eval = $self->fill_in($rec,$eval_code,$i);
-                       return if (! $self->_eval($eval));
-               }
-               if ($filter_name && $self->{'filter'}->{$filter_name}) {
-                       $log->debug("filter '$filter_name' for $format");
-                       $format = $self->{'filter'}->{$filter_name}->($format);
-                       return unless(defined($format));
-                       $log->debug("filter result: $format");
-               }
-               # do we have lookups?
-               if ($self->{'lookup'}) {
-                       return $self->lookup($format);
-               } else {
-                       return $format;
-               }
-       } else {
-               return;
+sub rec1 {
+       my $f = shift;
+       return unless (defined($rec) && defined($rec->{$f}));
+       if (ref($rec->{$f}) eq 'ARRAY') {
+               return map { 
+                       if (ref($_) eq 'HASH') {
+                               values %{$_};
+                       } else {
+                               $_;
+                       }
+               } @{ $rec->{$f} };
+       } elsif( defined($rec->{$f}) ) {
+               return $rec->{$f};
         }
  }
  
+=head2 rec2
  
-=head2 fill_in_to_arr
+Return all values in specific field and subfield
  
-Similar to C<fill_in>, but returns array of all repeatable fields. Usable
-for fields which have lookups, so they shouldn't be parsed but rather
-C<fill_id>ed.
-
- my @arr = $webpac->fill_in_to_arr($rec,'[v900];;[v250^a]');
+  @v = rec2('200','a')
  
  =cut
  
-sub fill_in_to_arr {
-       my $self = shift;
+sub rec2 {
+       my $f = shift;
+       return unless (defined($rec && $rec->{$f}));
+       my $sf = shift;
+       return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
+}
  
-       my ($rec, $format_utf8) = @_;
+=head2 rec
  
-       my $log = $self->_get_logger();
+syntaxtic sugar for
  
-       $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
-       return if (! $format_utf8);
+  @v = rec('200')
+  @v = rec('200','a')
  
-       my $i = 0;
-       my @arr;
+=cut
  
-       while (my @v = $self->fill_in($rec,$format_utf8,$i++)) {
-               push @arr, @v;
+sub rec {
+       if ($#_ == 0) {
+               return rec1(@_);
+       } elsif ($#_ == 1) {
+               return rec2(@_);
         }
-
-       $log->debug("format '$format_utf8' returned ",--$i," elements: ", sub { join(" | ",@arr) }) if (@arr);
-
-       return @arr;
  }
  
+=head2 regex
  
-=head2 get_data
-
-Returns value from record.
-
- my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
+Apply regex to some or all values
  
-Arguments are:
-record reference C<$rec>,
-field C<$f>,
-optional subfiled C<$sf>,
-index for repeatable values C<$i>.
-
-Optinal variable C<$found> will be incremeted if there
-is field.
-
-Returns value or empty string.
+  @v = regex( 's/foo/bar/g', @v );
  
  =cut
  
-sub get_data {
-       my $self = shift;
-
-       my ($rec,$f,$sf,$i,$found) = @_;
-
-       if ($$rec->{$f}) {
-               return '' if (! $$rec->{$f}->[$i]);
-               no strict 'refs';
-               if ($sf && $$rec->{$f}->[$i]->{$sf}) {
-                       $$found++ if (defined($$found));
-                       return $$rec->{$f}->[$i]->{$sf};
-               } elsif ($$rec->{$f}->[$i]) {
-                       $$found++ if (defined($$found));
-                       # it still might have subfield, just
-                       # not specified, so we'll dump all
-                       if ($$rec->{$f}->[$i] =~ /HASH/o) {
-                               my $out;
-                               foreach my $k (keys %{$$rec->{$f}->[$i]}) {
-                                       $out .= $$rec->{$f}->[$i]->{$k}." ";
-                               }
-                               return $out;
-                       } else {
-                               return $$rec->{$f}->[$i];
-                       }
-               }
-       } else {
-               return '';
+sub regex {
+       my $r = shift;
+       my @out;
+       #warn "r: $r\n",Dumper(\@_);
+       foreach my $t (@_) {
+               next unless ($t);
+               eval "\$t =~ $r";
+               push @out, $t if ($t && $t ne '');
         }
+       return @out;
  }
  
+=head2 prefix
  
-=head2 apply_format
-
-Apply format specified in tag with C<format_name="name"> and
-C<format_delimiter=";;">.
-
- my $text = $webpac->apply_format($format_name,$format_delimiter,$data);
+Prefix all values with a string
  
-Formats can contain C<lookup{...}> if you need them.
+  @v = prefix( 'my_', @v );
  
  =cut
  
-sub apply_format {
-       my $self = shift;
-
-       my ($name,$delimiter,$data) = @_;
-
-       my $log = $self->_get_logger();
-
-       if (! $self->{'import_xml'}->{'format'}->{$name}) {
-               $log->warn("<format name=\"$name\"> is not defined in ",$self->{'import_xml_file'});
-               return $data;
-       }
-
-       $log->warn("no delimiter for format $name") if (! $delimiter);
+sub prefix {
+       my $p = shift or die "prefix needs string as first argument";
+       return map { $p . $_ } grep { defined($_) } @_;
+}
  
-       my $format = $self->_x($self->{'import_xml'}->{'format'}->{$name}->{'content'}) || $log->logdie("can't find format '$name'");
+=head2 suffix
  
-       my @data = split(/\Q$delimiter\E/, $data);
+suffix all values with a string
  
-       my $out = sprintf($format, @data);
-       $log->debug("using format $name [$format] on $data to produce: $out");
+  @v = suffix( '_my', @v );
  
-       if ($self->{'lookup_regex'} && $out =~ $self->{'lookup_regex'}) {
-               return $self->lookup($out);
-       } else {
-               return $out;
-       }
+=cut
  
+sub suffix {
+       my $s = shift or die "suffix needs string as first argument";
+       return map { $_ . $s } grep { defined($_) } @_;
  }
  
-=head2 sort_arr
+=head2 surround
  
-Sort array ignoring case and html in data
+surround all values with a two strings
  
- my @sorted = $webpac->sort_arr(@unsorted);
+  @v = surround( 'prefix_', '_suffix', @v );
  
  =cut
  
-sub sort_arr {
-       my $self = shift;
-
-       my $log = $self->_get_logger();
-
-       # FIXME add Schwartzian Transformation?
-
-       my @sorted = sort {
-               $a =~ s#<[^>]+/*>##;
-               $b =~ s#<[^>]+/*>##;
-               lc($b) cmp lc($a)
-       } @_;
-       $log->debug("sorted values: ",sub { join(", ",@sorted) });
-
-       return @sorted;
+sub surround {
+       my $p = shift or die "surround need prefix as first argument";
+       my $s = shift or die "surround needs suffix as second argument";
+       return map { $p . $_ . $s } grep { defined($_) } @_;
  }
  
+=head2 first
  
-=head1 INTERNAL METHODS
+Return first element
  
-=head2 _sort_by_order
-
-Sort xml tags data structure accoding to C<order=""> attribute.
+  $v = first( @v );
  
  =cut
  
-sub _sort_by_order {
-       my $self = shift;
-
-       my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
-               $self->{'import_xml'}->{'indexer'}->{$a};
-       my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
-               $self->{'import_xml'}->{'indexer'}->{$b};
-
-       return $va <=> $vb;
+sub first {
+       my $r = shift;
+       return $r;
  }
  
-=head2 _x
-
-Convert strings from C<conf/normalize/*.xml> encoding into application
-specific encoding (optinally specified using C<code_page> to C<new>
-constructor).
+=head2 lookup
  
- my $text = $n->_x('normalize text string');
+Consult lookup hashes for some value
  
-This is a stub so that other modules doesn't have to implement it.
+  @v = lookup( $v );
+  @v = lookup( @v );
  
  =cut
  
-sub _x {
-       my $self = shift;
-       return shift;
+sub lookup {
+       my $k = shift or return;
+       return unless (defined($lookup->{$k}));
+       if (ref($lookup->{$k}) eq 'ARRAY') {
+               return @{ $lookup->{$k} };
+       } else {
+               return $lookup->{$k};
+       }
  }
  
+=head2 join_with
  
-=head1 AUTHOR
-
-Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+Joins walues with some delimiter
  
-=head1 COPYRIGHT & LICENSE
-
-Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it
-under the same terms as Perl itself.
+  $v = join_with(", ", @v);
  
  =cut
  
-1; # End of WebPAC::DB
+sub join_with {
+       my $d = shift;
+       return join($d, grep { defined($_) && $_ ne '' } @_);
+}
+
+# END
+1;