implement efficiant offset
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 20 Sep 2009 19:56:33 +0000 (19:56 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sun, 20 Sep 2009 19:56:33 +0000 (19:56 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1303 07558da8-63fa-0310-ba24-9fe276d99e06

lib/WebPAC/Input/ISI.pm

index 86d39b4..41d0284 100644 (file)
@@ -104,29 +104,24 @@ sub new {
        my $tag;
        my $rec;
 
-       $self->{size} = 0;
-       my $max_size;
-       $max_size = ( $self->{offset} || 0 ) + $self->{limit} if $self->{limit};
+       my $pos = 0;
+       my $offset = $self->{offset} || 0;
+       my $end_pos = 0;
+       $end_pos = $offset + $self->{limit} if $self->{limit};
 
-       warn "# max_size: $max_size";
+       warn "# range: $offset - $end_pos";
 
        while( $line = <$fh> ) {
                chomp($line);
-
                my $v;
 
-               if ( $line =~ /^(\S\S)\s(.+)$/ ) {
-                               $tag = $1;
-                               $v = $2;
-               } elsif ( $line =~ /^\s{3}(.+)$/ ) {
-                               $v = $1;
-                               if ( $tag eq 'CR' && $v =~ m{DOI$} ) {
-                                       my $doi = <$fh>;
-                                       chomp($doi);
-                                       $doi =~ s{^\s{3}}{ } || die "can't find DOI in: $doi";
-                                       $v .= $doi;
-                               }
+               if ( $line eq 'EF' ) {
+                       last;
                } elsif ( $line eq 'ER' ) {
+                       last if $end_pos && $pos >= $end_pos;
+                       $pos++;
+                       next if $offset && $pos < $offset;
+
                        # join tags
                        foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) {
                                $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
@@ -135,17 +130,27 @@ sub new {
                        foreach ( qw/ID SC DE/ ) {
                                $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_};
                        }
-                       $rec->{'000'} = [ ++$self->{size} ];
+                       $rec->{'000'} = [ $pos ];
                        push @{ $self->{_rec} }, $rec;
 
-                       last if $max_size && $self->{size} == $max_size;
-
                        $rec = {};
                        $line = <$fh>;
                        chomp $line;
                        $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
-               } elsif ( $line eq 'EF' ) {
-                       last;
+               } elsif ( $offset && $pos < $offset ) {
+                       next;
+               } elsif ( $line =~ /^(\S\S)\s(.+)$/ ) {
+                               $tag = $1;
+                               $v = $2;
+               } elsif ( $line =~ /^\s{3}(.+)$/ ) {
+                               $v = $1;
+                               if ( $tag eq 'CR' && $v =~ m{DOI$} ) {
+                                       my $doi = <$fh>;
+                                       chomp($doi);
+                                       $doi =~ s{^\s{3}}{ } || die "can't find DOI in: $doi";
+                                       $v .= $doi;
+                               }
+
                } elsif ( $line =~ m{^(\S\S)\s*$} ) {
                        warn "# $arg->{path} +$. empty |$line|\n";
                } else {
@@ -161,6 +166,8 @@ sub new {
 
        }
 
+       $self->{size} = $pos - $offset;
+
        $log->debug("loaded ", $self->size, " records");
 
        $self ? return $self : return undef;