1 #-------------------------------------------------------------
7 my $type = shift || die "parset_format must be called with type!";
8 my $format = shift || die "parse_format must be called with format!";
9 my $row = shift || die "parse_format must be called with row!";
10 my $i = shift || 0; # isis repeatable number
11 my $codepage = shift || die "parse_format must be called with codepage!";
12 if ($type eq "isis") {
13 return parse_iso_format($format,$row,$i,$codepage,'isis_sf');
14 } elsif ($type eq "excel") {
15 return parse_excel_format($format,$row,$i,$codepage);
16 } elsif ($type eq "marc") {
17 return parse_iso_format($format,$row,$i,$codepage,'marc_sf');
18 } elsif ($type eq "feed") {
19 return parse_feed_format($format,$row,$i,$codepage);
23 #-------------------------------------------------------------
25 sub parse_iso_format {
32 my $func = shift || die "need to know which sub-field function to use";
44 my $tmp = shift || return;
46 $tmp = $codepage->convert($tmp) || print STDERR "iso: '$tmp' can't convert\n";
51 # if format doesn't exits, store it in cache
52 if (! defined($cache->{format}->{$format})) {
53 # print STDERR "parsing format for '$format'\n";
59 $eval = $1 if ($f =~ s/^eval{([^}]+?)}//);
61 if ($f =~ s/^([^\d]+)//) {
62 if ($f) { # there is more to parse
65 @fmt = ('',$1,undef,'');
66 #print STDERR "just one field: $1\n";
73 # print STDERR "\n#### $f";
74 # this is EBSCO special to support numeric subfield in
76 if ($f =~ s/^(\d\d\d)#*(\w?)//) {
83 # this might be our local scpeciality -- fields 10 and 11
84 # (as opposed to 010 and 011) so they are strictly listed
86 } elsif ($f =~ s/^(1[01]\w?)//) {
89 } elsif ($f =~ s/^mfn//i) {
92 } elsif ($f =~ s/^([^\d]+)(\d{0,3})/$2/) {
99 } elsif ($f =~ s/^([^\d]+\d{0,2})//) {
105 } elsif ($f =~ s/^(\d{1,2})//) {
112 print STDERR "unparsed format: $f\n";
116 push @fmt,'' if ($#fmt % 3 != 0); # add empty suffix
118 $cache->{format_eval}->{$format} = $eval; # store eval string (if any)
120 $cache->{format}->{$format} = \@fmt;
122 # print STDERR "storing format for '$format': [",join("|",@fmt),"]\n";
123 # print STDERR "storing format for '$format':",Dumper(@fmt),"\n";
124 # print STDERR Dumper($cache->{format}->{$format});
127 # now produce actual record
128 my $tmp = $cache->{format}->{$format} || die "no format cache for '$format'";
130 # print STDERR "using format for '$format':",Dumper(@fmt),"\n";
131 # print STDERR "tmp ",Dumper($tmp);
132 # print STDERR "cache: ",Dumper($cache->{format}->{$format});
135 my $prefix = shift @fmt;
138 my $f = shift @fmt || die "BUG: field name can't be empty!";
141 if ($f eq 'mfn' && $i == 0) {
142 $display .= $sufix if ($display);
143 $display .= $row->{mfn};
145 my $val = &$func($row,$f,$sf,$i);
147 # print STDERR "val: $val\n";
148 my $tmp = cnv_cp($codepage,$val);
150 $display .= $sufix.$tmp;
159 $display = $prefix.$display.$sufix if ($display);
161 my $eval = $cache->{format_eval}->{$format};
164 my ($func,$row,$f,$sf,$i) = @_;
165 #print STDERR "## in fld2str\n";
166 my $tmp = $codepage->convert(&$func($row,$f,$sf,$i)) || $codepage->convert(&$func($row,$f,$sf,0)) || '';
170 $eval =~ s/v(\d+)\^(\w*)/fld2str($func,$row,$1,$2,$i)/eg;
171 #print STDERR "## eval: $eval\n";
173 die "eval error: eval{$eval}: $@" if ($@);
174 return ($swish,$display);
176 die "eval error: eval{$eval}: $@" if ($@);
177 return (undef,undef);
182 print STDERR "format left unused: [",join("|",@fmt),"]\n";
183 print STDERR "format: [",join("|",@{$tmp}),"]\n";
186 # print STDERR "format: {",$format || '',"} display: {",$display || '',"} swish: {",$swish || '',"}\n";
188 return ($swish,$display);
191 #-------------------------------------------------------------
193 sub parse_excel_format {
197 #my $codepage = shift;
199 # data allready comes in utf-8 due to change in
200 # SpreadSheet::ParseExcel::FmtDefault line 69 from
201 # return pack('C*', unpack('n*', $sTxt));
202 # to following which returns utf-8:
203 # return pack('U*', unpack('n*', $sTxt));
206 return if ($i > 0); # Excel doesn't support repeatable fields
212 if ($format =~ s/^([^A-Z\|]{1,3})//) {
219 while ($format && length($format) > 0) {
220 #print STDERR "\n#### $format #";
221 if ($format =~ s/^\|([A-Z]{1,2})\|//) {
222 #print STDERR "--$1-> $format -[",length($format),"] ";
224 my $tmp = $row->{$1};
225 $display .= $prefix . $tmp;
227 #print STDERR " == $tmp";
230 } elsif ($format =~ s/^([^A-Z\|]+)(\|[A-Z]{1,2}\|)/$2/) {
231 $prefix .= $1 if ($display);
233 #print STDERR "unparsed format: $format\n";
237 #print STDERR " display: $display swish: $swish [format: $format]";
240 $display .= $prefix if ($display);
242 return ($swish,$display);
245 #-------------------------------------------------------------
247 sub parse_feed_format {
251 my $codepage = shift;
253 # XXX feed doesn't support repeatable fields, but they really
254 # should, This is a bug. It should be fixed!
261 if ($format =~ s/^([^\d\|]{1,3})//) {
268 while ($format && length($format) > 0) {
269 #print STDERR "\n#### $format #";
270 if ($format =~ s/^\|(\d+)\|//) {
271 #print STDERR "--$1-> $format -[",length($format),"] ";
273 my $tmp = $data->{$1};
275 $tmp = $codepage->convert($tmp) || warn "feed: $1 '$tmp' can't convert\n";
277 $display .= $prefix . $tmp;
279 #print STDERR " == $tmp";
282 } elsif ($format =~ s/^([^\d\|]+)(\|\d+\|)/$2/) {
283 $prefix .= $1 if ($display);
285 print STDERR "unparsed format: $format\n";
289 #print STDERR " display: $display swish: $swish [format: $format]";
292 $display .= $prefix if ($display);
294 return ($swish,$display);
297 #-------------------------------------------------------------