Re-wrote parsing for ISO-type data (isis, marc) to use in-memory cache of
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 23 Nov 2003 15:42:16 +0000 (15:42 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 23 Nov 2003 15:42:16 +0000 (15:42 +0000)
format... 10% speed improvement and cleaner code. Include filter functions
just once.

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@170 13eb9ef6-21d5-0310-b721-a9d68796d827

all2xml.pl
parse_format.pm

index 76009de..98f5de1 100755 (executable)
@@ -61,6 +61,8 @@ my %type2tag = (
        'feed' => 'feed'
 );
 
+my $cache;     # for cacheing
+
 sub data2xml {
 
        use xmlify;
@@ -90,7 +92,15 @@ sub data2xml {
                return $va <=> $vb;
        }
 
-       foreach my $field (sort by_order keys %{$config->{indexer}}) {
+       my @sorted_tags;
+       if ($cache->{tags_by_order}->{$type}) {
+               @sorted_tags = @{$cache->{tags_by_order}->{$type}};
+       } else {
+               @sorted_tags = sort by_order keys %{$config->{indexer}};
+               $cache->{tags_by_order}->{$type} = \@sorted_tags;
+       }
+
+       foreach my $field (@sorted_tags) {
 
                $field=x($field);
                $field_usage{$field}++;
@@ -167,8 +177,9 @@ sub data2xml {
                                # filter="name" ; filter this field through
                                # filter/[name].pm
                                my $filter = $x->{filter};
-                               if ($filter) {
+                               if ($filter && !$cache->{filter_loaded}->{$filter}) {
                                        require "filter/".$filter.".pm";
+                                       $cache->{filter_loaded}->{$filter}++;
                                }
                                # type="swish" ; field for swish
                                if ($swish) {
index ac4e9ab..b7c0e47 100644 (file)
@@ -3,7 +3,6 @@
 # parse_format(...)
 #
 
-
 sub parse_format {
        my $type = shift || die "parset_format must be called with type!";
        my $format = shift || die "parse_format must be called with format!";
@@ -37,11 +36,6 @@ sub parse_iso_format {
        my $out;
        my $out_swish;
 
-       my $prefix = "";
-       if ($format =~ s/^([^\d]+)//) {
-               $prefix = $1;
-       }
-
        my $display;
        my $swish;
 
@@ -54,45 +48,99 @@ sub parse_iso_format {
                return $tmp;
        }
 
-       while ($format) {
-#print STDERR "\n#### $format";
-               # this is EBSCO special to support numeric subfield in
-               # form of 856#3
-               if ($format =~ s/^(\d\d\d)#*(\w?)//) {
-                       my $tmp = cnv_cp($codepage,&$func($row,$1,$2,$i));
-                       if ($tmp) {
-                               $display .= $prefix.$tmp;
-                               $swish .= $tmp." ";
-#print STDERR " == $tmp";
+       # if format doesn't exits, store it in cache
+       if (! defined($cache->{format}->{$format})) {
+#              print STDERR "parsing format for '$format'\n";
+               my @fmt;
+
+               my $f = $format;
+
+               if ($f =~ s/^([^\d]+)//) {
+                       if ($f) {       # there is more to parse
+                               push @fmt,$1;
+                       } else {
+                               @fmt = ('',$1,undef,'');
+#print STDERR "just one field: $1\n";
                        }
-                       $prefix = "";
-               # this might be our local scpeciality -- fields 10 and 11
-               # (as opposed to 010 and 011) so they are strictly listed
-               # here
-               } elsif ($format =~ s/^(1[01])//) {
-                       my $tmp = cnv_cp($codepage,&$func($row,$1,undef,$i));
-                       if ($tmp) {
-                               $display .= $prefix.$tmp;
-                               $swish .= $tmp." ";
+               } else {
+                       push @fmt,'';
+               }
+
+               while ($f) {
+#      print STDERR "\n#### $f";
+                       # this is EBSCO special to support numeric subfield in
+                       # form of 856#3
+                       if ($f =~ s/^(\d\d\d)#*(\w?)//) {
+                               push @fmt,$1;
+                               if ($2) {
+                                       push @fmt,$2;
+                               } else {
+                                       push @fmt,undef;
+                               }
+                       # this might be our local scpeciality -- fields 10 and 11
+                       # (as opposed to 010 and 011) so they are strictly listed
+                       # here
+                       } elsif ($f =~ s/^(1[01])//) {
+                               push @fmt,$1;
+                               push @fmt,undef;
+                       } elsif ($f =~ s/^mfn//i) {
+                               push @fmt,'mfn';
+                               push @fmt,'';
+                       } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
+                               push @fmt,$1;
+                       } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
+                               push @fmt,$1;
+                       } elsif ($f =~ s/^(\d{1,2})//) {
+                               push @fmt,$1;
+                       } else {
+                               print STDERR "unparsed format: $f\n";
+                               $f = "";
                        }
-                       $prefix = "";
-               } elsif ($format =~ s/^mfn//i) {
-                       $display .= $prefix . $row->{mfn};
-                       $prefix = "";
-               } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) {
-                       $prefix .= $1 if ($display);
-               } elsif ($format =~ s/^([^\d]+\d{0,2})//) {
-                       $prefix .= $1 if ($display);
-               } elsif ($format =~ s/^(\d{1,2})//) {
-                       $prefix .= $1 if ($display);
+               }
+               push @fmt,'' if ($#fmt % 3 != 0);       # add empty suffix
+               $cache->{format}->{$format} = \@fmt;
+               
+#              print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
+#              print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
+#              print STDERR Dumper($cache->{format}->{$format});
+       }
+
+       # now produce actual record
+       my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
+       my @fmt = @{$tmp};
+#      print STDERR "using format for '$format':",Dumper(@fmt),"\n";
+#      print STDERR "tmp ",Dumper($tmp);
+#      print STDERR "cache: ",Dumper($cache->{format}->{$format});
+
+       # prefix
+       my $prefix = shift @fmt;
+       my $sufix;
+       while($#fmt > 1) {
+               my $f = shift @fmt || die "BUG: field name can't be empty!";
+               my $sf = shift @fmt;
+
+               if ($f eq 'mfn' && $i == 0) {
+                       $display .= $sufix if ($display);
+                       $display .= $row->{mfn};
                } else {
-                       print STDERR "unparsed format: $format\n";
-                       $prefix .= $format;
-                       $format = "";
+                       my $val = &$func($row,$f,$sf,$i);
+                       if ($val) {
+#                              print STDERR "val: $val\n";
+                               my $tmp = cnv_cp($codepage,$val);
+                               if ($display) {
+                                       $display .= $sufix.$tmp;
+                               } else {
+                                       $display = $tmp;
+                               }
+                               $swish .= $tmp." ";
+                       }
                }
+               $sufix = shift @fmt;
        }
-       # add suffix
-       $display .= $prefix if ($display);
+       $display = $prefix.$display.$sufix if ($display);
+       print STDERR "format left unused: [",join("|",@fmt),"]\n" if (@fmt);
+
+#      print STDERR "display: $display swish: $swish\n";
 
        return ($swish,$display);
 }