fix CROASCII (B1.002:1982) filter
[webpac] / parse_format.pm
index a0e82db..e5b23a6 100644 (file)
@@ -3,7 +3,6 @@
 # parse_format(...)
 #
 
-
 sub parse_format {
        my $type = shift || die "parset_format must be called with type!";
        my $format = shift || die "parse_format must be called with format!";
@@ -37,11 +36,6 @@ sub parse_iso_format {
        my $out;
        my $out_swish;
 
-       my $prefix = "";
-       if ($format =~ s/^([^\d]+)//) {
-               $prefix = $1;
-       }
-
        my $display;
        my $swish;
 
@@ -54,45 +48,140 @@ sub parse_iso_format {
                return $tmp;
        }
 
-       while ($format) {
-#print STDERR "\n#### $format";
-               # this is EBSCO special to support numeric subfield in
-               # form of 856#3
-               if ($format =~ s/^(\d\d\d)#*(\w?)//) {
-                       my $tmp = cnv_cp($codepage,get_sf($row,$1,$2,$i));
-                       if ($tmp) {
-                               $display .= $prefix.$tmp;
-                               $swish .= $tmp." ";
-#print STDERR " == $tmp";
+       # if format doesn't exits, store it in cache
+       if (! defined($cache->{format}->{$format})) {
+#              print STDERR "parsing format for '$format'\n";
+               my @fmt;
+
+               my $f = $format;
+
+               my $eval;
+               $eval = $1 if ($f =~ s/^eval{([^}]+)}//);
+
+               if ($f =~ s/^([^\d]+)//) {
+                       if ($f) {       # there is more to parse
+                               push @fmt,$1;
+                       } else {
+                               @fmt = ('',$1,undef,'');
+#print STDERR "just one field: $1\n";
                        }
-                       $prefix = "";
-               # this might be our local scpeciality -- fields 10 and 11
-               # (as opposed to 010 and 011) so they are strictly listed
-               # here
-               } elsif ($format =~ s/^(1[01])//) {
-                       my $tmp = cnv_cp($codepage,get_sf($row,$1,undef,$i));
-                       if ($tmp) {
-                               $display .= $prefix.$tmp;
+               } else {
+                       push @fmt,'';
+               }
+
+               while ($f) {
+#      print STDERR "\n#### $f";
+                       # this is EBSCO special to support numeric subfield in
+                       # form of 856#3
+                       if ($f =~ s/^(\d\d\d)#*(\w?)//) {
+                               push @fmt,$1;
+                               if ($2) {
+                                       push @fmt,$2;
+                               } else {
+                                       push @fmt,undef;
+                               }
+                       # this might be our local scpeciality -- fields 10 and 11
+                       # (as opposed to 010 and 011) so they are strictly listed
+                       # here
+                       } elsif ($f =~ s/^(1[01]\w?)//) {
+                               push @fmt,$1;
+                               push @fmt,undef;
+                       } elsif ($f =~ s/^mfn//i) {
+                               push @fmt,'mfn';
+                               push @fmt,'';
+                       } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
+                               # still prefix?
+                               if ($#fmt == 0) {
+                                       $fmt[0] .= $1;
+                               } else {
+                                       push @fmt,$1;
+                               }
+                       } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
+                               if ($#fmt == 0) {
+                                       $fmt[0] .= $1;
+                               } else {
+                                       push @fmt,$1;
+                               }
+                       } elsif ($f =~ s/^(\d{1,2})//) {
+                               if ($#fmt == 0) {
+                                       $fmt[0] .= $1;
+                               } else {
+                                       push @fmt,$1;
+                               }
+                       } else {
+                               print STDERR "unparsed format: $f\n";
+                               $f = "";
+                       }
+               }
+               push @fmt,'' if ($#fmt % 3 != 0);       # add empty suffix
+
+               $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
+
+               $cache->{format}->{$format} = \@fmt;
+               
+#              print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
+#              print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
+#              print STDERR Dumper($cache->{format}->{$format});
+       }
+
+       # now produce actual record
+       my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
+       my @fmt = @{$tmp};
+#      print STDERR "using format for '$format':",Dumper(@fmt),"\n";
+#      print STDERR "tmp ",Dumper($tmp);
+#      print STDERR "cache: ",Dumper($cache->{format}->{$format});
+
+       # prefix
+       my $prefix = shift @fmt;
+       my $sufix;
+       while($#fmt > 1) {
+               my $f = shift @fmt || die "BUG: field name can't be empty!";
+               my $sf = shift @fmt;
+
+               if ($f eq 'mfn' && $i == 0) {
+                       $display .= $sufix if ($display);
+                       $display .= $row->{mfn};
+               } else {
+                       my $val = &$func($row,$f,$sf,$i);
+                       if ($val) {
+#                              print STDERR "val: $val\n";
+                               my $tmp = cnv_cp($codepage,$val);
+                               if ($display) {
+                                       $display .= $sufix.$tmp;
+                               } else {
+                                       $display = $tmp;
+                               }
                                $swish .= $tmp." ";
                        }
-                       $prefix = "";
-               } elsif ($format =~ s/^mfn//i) {
-                       $display .= $prefix . $row->{mfn};
-                       $prefix = "";
-               } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) {
-                       $prefix .= $1 if ($display);
-               } elsif ($format =~ s/^([^\d]+\d{0,2})//) {
-                       $prefix .= $1 if ($display);
-               } elsif ($format =~ s/^(\d{1,2})//) {
-                       $prefix .= $1 if ($display);
+               }
+               $sufix = shift @fmt;
+       }
+       $display = $prefix.$display.$sufix if ($display);
+
+       my $eval = $cache->{format_eval}->{$format};
+       if ($eval) {
+               sub fld2str {
+                       my ($func,$row,$f,$sf,$i) = @_;
+#print STDERR "## in fld2str\n";
+                       my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || '';
+                       return "'$tmp'";
+               }
+
+               $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
+#print STDERR "## eval: $eval\n";
+               if (eval "$eval") {
+                       return ($swish,$display);
                } else {
-                       print STDERR "unparsed format: $format\n";
-                       $prefix .= $format;
-                       $format = "";
+                       return (undef,undef);
                }
        }
-       # add suffix
-       $display .= $prefix if ($display);
+
+       if (@fmt) {
+               print STDERR "format left unused: [",join("|",@fmt),"]\n";
+               print STDERR "format: [",join("|",@{$tmp}),"]\n";
+       }
+
+#      print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
 
        return ($swish,$display);
 }
@@ -135,7 +224,7 @@ sub parse_excel_format {
                } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
                        $prefix .= $1 if ($display);
                } else {
-                       print STDERR "unparsed format: $format\n";
+                       #print STDERR "unparsed format: $format\n";
                        $prefix .= $format;
                        $format = "";
                }