collect results to support offset in fetch record
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 23 Oct 2010 20:19:16 +0000 (22:19 +0200)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 23 Oct 2010 20:19:16 +0000 (22:19 +0200)
Aleph.pm
COBISS.pm
server.pl

index 8fdb1de..8dda877 100644 (file)
--- a/Aleph.pm
+++ b/Aleph.pm
@@ -181,7 +181,7 @@ warn "## ++ ", dump( $f, $i1, $i2, @sf );
 
                my $id = $hash->{SYS} || die "no SYS";
 
-               $self->save_marc( $id, $marc->as_usmarc );
+               $self->save_marc( "$id.marc", $marc->as_usmarc );
 
                if ( $nr < $self->{hits} ) {
                        $nr++;
@@ -189,7 +189,7 @@ warn "## ++ ", dump( $f, $i1, $i2, @sf );
                        $mech->follow_link( url_regex => qr/set_entry=0*$nr/ );
                }
 
-               return $marc->as_usmarc;
+               return $id;
        } else {
                die "can't fetch COMARC format from ", $mech->content;
        }
index 1f5f079..247efec 100644 (file)
--- a/COBISS.pm
+++ b/COBISS.pm
@@ -123,28 +123,24 @@ sub next_marc {
 
        if ( $mech->content =~ m{<pre>\s*(.+?(\d+)\.\s+ID=(\d+).+?)\s*</pre>}s ) {
 
-               my $comarc = $1;
+               my $markup = $1;
                my $nr = $2;
                my $id = $3;
 
-diag "fetch_marc $nr [$id] $format";
+diag "fetch $nr [$id] $format";
 
-               $comarc =~ s{</?b>}{}gs;
-               $comarc =~ s{<font[^>]*>}{<s>}gs;
-               $comarc =~ s{</font>}{<e>}gs;
+               $markup =~ s{</?b>}{}gs;
+               $markup =~ s{<font[^>]*>}{<s>}gs;
+               $markup =~ s{</font>}{<e>}gs;
 
-               open(my $out, '>:utf8', "comarc/$id");
-               print $out $comarc;
-               close($out);
+               $markup =~ s/[\r\n]+\s{5}//gs; # join continuation lines
 
-               print $comarc;
+               $self->save_marc( "$id.xml", $markup );
 
                my $marc = MARC::Record->new;
+               my $comarc = MARC::Record->new;
 
-               $comarc =~ s/[\r\n]+\s{5}//gs; # join continuation lines
-warn "## comarc join: $comarc\n";
-
-               foreach my $line ( split(/[\r\n]+/, $comarc) ) {
+               foreach my $line ( split(/[\r\n]+/, $markup) ) {
 
                        if ( $line !~ s{^(\d\d\d)([01 ])([01 ])}{} ) {
                                diag "SKIP: $line";
@@ -153,37 +149,40 @@ warn "## comarc join: $comarc\n";
 
                                my ( $f, $i1, $i2 ) = ( $1, $2, $3 );
 
-                               our $out = undef;
+                               our $marc_map = undef;
+                               our $comarc_map = undef;
                                our $ignored = undef;
 
-                               sub sf_us {
-                                       my ($format,$f,$sf,$v) = @_;
+                               sub sf_parse {
+                                       my ($f,$sf,$v) = @_;
 
                                        $v =~ s/\s+$//;
 
-                                       if ( $format =~ m/unimarc/i ) {
-                                               push @{ $out->{ $f } }, ( $sf, $v );
-                                       } elsif ( $format =~ m/marc/ ) {
-                                               if ( my $m = $cobiss_marc21->{$f}->{$sf} ) {
-                                                       push @{ $out->{ $m->[0] } }, ( $m->[1], $v );
-                                               } else {
-                                                       $ignored->{$f}++;
-                                               }
+                                       push @{ $comarc_map->{ $f } }, ( $sf, $v );
+                                       if ( my $m = $cobiss_marc21->{$f}->{$sf} ) {
+                                               push @{ $marc_map->{ $m->[0] } }, ( $m->[1], $v );
+                                       } else {
+                                               $ignored->{$f}++;
                                        }
                                        return ''; # fix warning
                                }
                                my $l = $line;
-                               $l =~ s{<s>(\w)<e>([^<]+)}{sf_us($format,$f,$1, $2)}ges;
+                               $l =~ s{<s>(\w)<e>([^<]+)}{sf_parse($f,$1, $2)}ges;
+
+                               diag "[$format] $line -> ",dump( $comarc_map, $marc_map ) if $comarc_map;
 
-                               diag "[$format] $line -> ",dump( $out ) if $out;
+                               foreach my $f ( keys %$comarc_map ) {
+                                       $comarc->add_fields( $f, $i1, $i2, @{ $comarc_map->{$f} } );
+                               }
 
-                               foreach my $f ( keys %$out ) {
-                                       $marc->add_fields( $f, $i1, $i2, @{ $out->{$f} } );
+                               foreach my $f ( keys %$marc_map ) {
+                                       $marc->add_fields( $f, $i1, $i2, @{ $marc_map->{$f} } );
                                }
                        }
                }
 
-               $self->save_marc( "$id.$format", $marc->as_usmarc );
+               $self->save_marc( "$id.marc", $marc->as_usmarc );
+               $self->save_marc( "$id.unimarc", $comarc->as_usmarc );
                diag $marc->as_formatted;
 
                if ( $nr < $self->{hits} ) {
@@ -194,7 +193,7 @@ warn "## comarc join: $comarc\n";
                        warn "# no more results";
                }
 
-               return $marc->as_usmarc;
+               return $id;
        } else {
                die "can't fetch COMARC format from ", $mech->content;
        }
index 9fdaa95..9394cd8 100755 (executable)
--- a/server.pl
+++ b/server.pl
@@ -46,7 +46,7 @@ diag "SearchHandle ",Dumper($this);
        my $module = $databases->{$database};
        if ( ! defined $module ) {
         $this->{ERR_CODE} = 108;
-               warn $this->{ERR_STR} = "unknown database $database - available: " . keys %$databases;
+               warn $this->{ERR_STR} = "$database NOT FOUND in available databases: " . join(" ", keys %$databases);
                return;
        }
 
@@ -78,6 +78,7 @@ diag "got $hits hits";
         upper => $hits < $max_records ? $max_records : $hits,
         hits  => $hits,
                from => $from,
+               results => [ undef ], # we don't use 0 element
     };
     my $sets = $session->{SETS};
 
@@ -115,10 +116,10 @@ sub FetchHandle {
         return;
     }
 
-       my $from = $rs->{from} || die "no from?";
-
     $this->{BASENAME} = "HtmlZ3950";
 
+       my $format = 'usmarc';
+
 #    if ( !defined($req_form) || ( $req_form eq &Net::Z3950::OID::xml ) )
     if (0)
     {    ## XML records
@@ -127,17 +128,45 @@ sub FetchHandle {
     }
     elsif ( $req_form eq &Net::Z3950::OID::unimarc ) { # FIXME convert to usmarc
         $this->{REP_FORM} = &Net::Z3950::OID::unimarc;
-        $this->{RECORD} = $from->next_marc('unimarc');
+               $format = 'unimarc';
     }
     elsif ( $req_form eq &Net::Z3950::OID::usmarc ) {  # FIXME convert to usmarc
         $this->{REP_FORM} = &Net::Z3950::OID::usmarc;
-        $this->{RECORD} = $from->next_marc('usmarc');
+               $format = 'usmarc';
     }
     else {    ## Unsupported record format
         $this->{ERR_CODE} = 239;
         $this->{ERR_STR}  = $req_form;
         return;
     }
+
+
+       my $from = $rs->{from} || die "no from?";
+       # fetch records up to offset
+       while(  $#{ $rs->{results} } < $offset ) {
+               push @{ $rs->{results} }, $from->next_marc;
+               warn "# rs result ", $#{ $rs->{results} }, " $format\n";
+       }
+
+       my $id = $rs->{results}->[$offset] || die "no id for record $offset in ",Dumper( $rs->{results} );
+
+       my $path = "marc/$id.$format";
+       if ( ! -e $path ) {
+               ## Unsupported record format
+        $this->{ERR_CODE} = 239;
+        $this->{ERR_STR}  = $req_form;
+        return;
+    }
+
+       {
+               open(my $in, '<', $path) || die "$path: $!";
+               local $/ = undef;
+               my $marc = <$in>;
+               close($in);
+               $this->{RECORD} = $in;
+       }
+
+
     if ( $offset == $hits ) {
         $this->{LAST} = 1;
     }