#-------------------------------------------------------------
#
-# parse_format('format',$isis_row);
+# parse_format(...)
#
-use isis_sf;
-
sub parse_format {
+ my $type = shift || die "parset_format must be called with type!";
+ my $format = shift || die "parse_format must be called with format!";
+ my $row = shift || die "parse_format must be called with row!";
+ my $i = shift || 0; # isis repeatable number
+ my $codepage = shift || die "parse_format must be called with codepage!";
+ if ($type eq "isis") {
+ return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
+ } elsif ($type eq "excel") {
+ return parse_excel_format($format,$row,$i,$codepage);
+ } elsif ($type eq "marc") {
+ return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
+ } elsif ($type eq "feed") {
+ return parse_feed_format($format,$row,$i,$codepage);
+ }
+}
+
+#-------------------------------------------------------------
+
+sub parse_iso_format {
+
my $format = shift;
my $row = shift;
- my $i = shift || 0; # isis repeatable number
- my $codepage = shift || die;
+ my $i = shift;
+ my $codepage = shift;
+
+ my $func = shift || die "need to know which sub-field function to use";
+
+ require $func.".pm";
+
+ my $out;
+ my $out_swish;
+
+ my $display;
+ my $swish;
+
+ sub cnv_cp {
+ my $codepage = shift;
+ my $tmp = shift || return;
+ if ($codepage) {
+ $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
+ }
+ return $tmp;
+ }
+
+ # if format doesn't exits, store it in cache
+ if (! defined($cache->{format}->{$format})) {
+# print STDERR "parsing format for '$format'\n";
+ my @fmt;
+
+ my $f = $format;
+
+ my $eval;
+ $eval = $1 if ($f =~ s/^eval{([^}]+)}//);
+
+ if ($f =~ s/^([^\d]+)//) {
+ if ($f) { # there is more to parse
+ push @fmt,$1;
+ } else {
+ @fmt = ('',$1,undef,'');
+#print STDERR "just one field: $1\n";
+ }
+ } else {
+ push @fmt,'';
+ }
+
+ while ($f) {
+# print STDERR "\n#### $f";
+ # this is EBSCO special to support numeric subfield in
+ # form of 856#3
+ if ($f =~ s/^(\d\d\d)#*(\w?)//) {
+ push @fmt,$1;
+ if ($2) {
+ push @fmt,$2;
+ } else {
+ push @fmt,undef;
+ }
+ # this might be our local scpeciality -- fields 10 and 11
+ # (as opposed to 010 and 011) so they are strictly listed
+ # here
+ } elsif ($f =~ s/^(1[01]\w?)//) {
+ push @fmt,$1;
+ push @fmt,undef;
+ } elsif ($f =~ s/^mfn//i) {
+ push @fmt,'mfn';
+ push @fmt,'';
+ } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
+ # still prefix?
+ if ($#fmt == 0) {
+ $fmt[0] .= $1;
+ } else {
+ push @fmt,$1;
+ }
+ } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
+ if ($#fmt == 0) {
+ $fmt[0] .= $1;
+ } else {
+ push @fmt,$1;
+ }
+ } elsif ($f =~ s/^(\d{1,2})//) {
+ if ($#fmt == 0) {
+ $fmt[0] .= $1;
+ } else {
+ push @fmt,$1;
+ }
+ } else {
+ print STDERR "unparsed format: $f\n";
+ $f = "";
+ }
+ }
+ push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
+
+ $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
+
+ $cache->{format}->{$format} = \@fmt;
+
+# print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
+# print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
+# print STDERR Dumper($cache->{format}->{$format});
+ }
+
+ # now produce actual record
+ my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
+ my @fmt = @{$tmp};
+# print STDERR "using format for '$format':",Dumper(@fmt),"\n";
+# print STDERR "tmp ",Dumper($tmp);
+# print STDERR "cache: ",Dumper($cache->{format}->{$format});
+
+ # prefix
+ my $prefix = shift @fmt;
+ my $sufix;
+ while($#fmt > 1) {
+ my $f = shift @fmt || die "BUG: field name can't be empty!";
+ my $sf = shift @fmt;
+
+ if ($f eq 'mfn' && $i == 0) {
+ $display .= $sufix if ($display);
+ $display .= $row->{mfn};
+ } else {
+ my $val = &$func($row,$f,$sf,$i);
+ if ($val) {
+# print STDERR "val: $val\n";
+ my $tmp = cnv_cp($codepage,$val);
+ if ($display) {
+ $display .= $sufix.$tmp;
+ } else {
+ $display = $tmp;
+ }
+ $swish .= $tmp." ";
+ }
+ }
+ $sufix = shift @fmt;
+ }
+ $display = $prefix.$display.$sufix if ($display);
+
+ my $eval = $cache->{format_eval}->{$format};
+ if ($eval) {
+ sub fld2str {
+ my ($func,$row,$f,$sf,$i) = @_;
+#print STDERR "## in fld2str\n";
+ my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || '';
+ return "'$tmp'";
+ }
+
+ $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
+#print STDERR "## eval: $eval\n";
+ if (eval "$eval") {
+ return ($swish,$display);
+ } else {
+ return (undef,undef);
+ }
+ }
+
+ if (@fmt) {
+ print STDERR "format left unused: [",join("|",@fmt),"]\n";
+ print STDERR "format: [",join("|",@{$tmp}),"]\n";
+ }
+
+# print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
+
+ return ($swish,$display);
+}
+
+#-------------------------------------------------------------
+
+sub parse_excel_format {
+ my $format = shift;
+ my $row = shift;
+ my $i = shift;
+ my $codepage = shift;
+
+ return if ($i > 0); # Excel doesn't support repeatable fields
my $out;
my $out_swish;
my $prefix = "";
- if ($format =~ s/^([^\d]+)//) {
+ if ($format =~ s/^([^A-Z\|]{1,3})//) {
$prefix = $1;
}
my $display;
my $swish;
- while ($format) {
-#print STDERR "\n#### $format";
- if ($format =~ s/^(\d\d\d)(\w?)//) {
- my $isis_tmp = isis_sf($row,$1,$2,$i);
- if ($isis_tmp) {
- $isis_tmp = $codepage->convert($isis_tmp) if ($codepage);
- $display .= $prefix . $isis_tmp;
- $swish .= $isis_tmp." ";
-#print STDERR " == $isis_tmp";
+ while ($format && length($format) > 0) {
+#print STDERR "\n#### $format #";
+ if ($format =~ s/^\|([A-Z]{1,2})\|//) {
+#print STDERR "--$1-> $format -[",length($format),"] ";
+ if ($row->{$1}) {
+ my $tmp = $row->{$1};
+ if ($codepage) {
+ $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
+ }
+ $display .= $prefix . $tmp;
+ $swish .= $tmp." ";
+#print STDERR " == $tmp";
}
$prefix = "";
- # this might be our local scpeciality -- fields 10 and 11
- # (as opposed to 010 and 011) so they are strictly listed
- # here
- } elsif ($format =~ s/^(1[01])//) {
- my $isis_tmp = isis_sf($row,$1,undef,$i);
- if ($isis_tmp) {
- $isis_tmp = $codepage->convert($isis_tmp) if ($codepage);
- $display .= $prefix . $isis_tmp;
- $swish .= $isis_tmp." ";
+ } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
+ $prefix .= $1 if ($display);
+ } else {
+ #print STDERR "unparsed format: $format\n";
+ $prefix .= $format;
+ $format = "";
+ }
+#print STDERR " display: $display swish: $swish [format: $format]";
+ }
+ # add suffix
+ $display .= $prefix if ($display);
+
+ return ($swish,$display);
+}
+
+#-------------------------------------------------------------
+
+sub parse_feed_format {
+ my $format = shift;
+ my $data = shift;
+ my $i = shift;
+ my $codepage = shift;
+
+ # XXX feed doesn't support repeatable fields, but they really
+ # should, This is a bug. It should be fixed!
+ return if ($i > 0);
+
+ my $out;
+ my $out_swish;
+
+ my $prefix = "";
+ if ($format =~ s/^([^\d\|]{1,3})//) {
+ $prefix = $1;
+ }
+
+ my $display;
+ my $swish;
+
+ while ($format && length($format) > 0) {
+#print STDERR "\n#### $format #";
+ if ($format =~ s/^\|(\d+)\|//) {
+#print STDERR "--$1-> $format -[",length($format),"] ";
+ if ($data->{$1}) {
+ my $tmp = $data->{$1};
+ if ($codepage) {
+ $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
+ }
+ $display .= $prefix . $tmp;
+ $swish .= $tmp." ";
+#print STDERR " == $tmp";
}
$prefix = "";
- } elsif ($format =~ s/^mfn//i) {
- $display .= $prefix . $row->{mfn};
- $prefix = "";
- } elsif ($format =~ s/^([^\d]+)(\d{0,3})/$2/) {
- $prefix .= $1 if ($display);
- } elsif ($format =~ s/^([^\d]+\d{0,2})//) {
- $prefix .= $1 if ($display);
- } elsif ($format =~ s/^(\d{1,2})//) {
+ } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
$prefix .= $1 if ($display);
} else {
print STDERR "unparsed format: $format\n";
$prefix .= $format;
$format = "";
}
+#print STDERR " display: $display swish: $swish [format: $format]";
}
# add suffix
$display .= $prefix if ($display);
}
#-------------------------------------------------------------
+
1;