use base qw/WebPAC::Common/;
use Data::Dump qw/dump/;
+use Carp qw/confess/;
=head1 NAME
=cut
-our $VERSION = '0.03';
+our $VERSION = '0.04';
+
+our $debug = 0;
+
=head1 SYNOPSIS
=cut
-my $subfields = {
+our $subfields = {
'CR' => sub {
my $full_cr = shift;
my @v = split(/, /, $full_cr);
my $arg = {@_};
- my $log = $self->_get_logger();
-
- open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
+ open( my $fh, '<', $arg->{path} ) || confess "can't open $arg->{path}: $!";
my ( $format, $version );
if ( $line =~ /^FN\s(.+)$/) {
$format = $1;
} else {
- $log->logdie("first line of $arg->{path} has to be FN, but is: $line");
+ die "first line of $arg->{path} has to be FN, but is: $line";
}
$line = <$fh>;
if ( $line =~ /^VR\s(.+)$/) {
$version = $1;
} else {
- $log->logdie("second line of $arg->{path} has to be VN, but is: $line");
+ die "second line of $arg->{path} has to be VN, but is: $line";
}
- $log->info("opening $format $version database '$arg->{path}'");
+ warn "I: $arg->{path} $format $version - generating record offsets\n";
- my $tag;
- my $rec;
+ $self->{fh} = $fh;
+ $self->{record_offset} = [];
+ $self->{offset} ||= 0;
- my $offset = $self->{offset} || 0;
- my $limit = $self->{limit} || 0;
+ while( $line = <$fh> ) {
+ chomp($line);
+ next unless $line eq 'ER';
+ push @{ $self->{record_offset} }, tell($fh);
+ last if $self->{limit} && $#{ $self->{record_offset} } >= $self->{limit} - 1 + $self->{offset};
+ }
+ push @{ $self->{record_offset} }, tell($fh); # end of file
- my $file_pos = 0;
- my $end_pos = 0;
- $end_pos = $offset + $limit if $limit;
+ warn "I $arg->{path} read ", tell($fh), " bytes $#{ $self->{record_offset} } records\n";
- $self->{_rec} = [];
-
- warn "# offset: $offset limit: $limit end: $end_pos";
+ return $self;
+}
- while( $line = <$fh> ) {
+
+
+=head2 fetch_rec
+
+Return record with ID C<$mfn> from database
+
+ my $rec = $input->fetch_rec( $mfn, $filter_coderef );
+
+=cut
+
+sub fetch_rec {
+ my ( $self, $mfn, $filter_coderef ) = @_;
+
+ seek $self->{fh}, $self->{record_offset}->[ $mfn - 1 ], 0;
+
+ my $tag;
+ my $rec;
+
+ my $fh = $self->{fh};
+
+ while( my $line = <$fh> ) {
chomp($line);
my $v;
if ( $line eq 'EF' ) {
- last;
+ return;
} elsif ( $line eq 'ER' ) {
- $file_pos++;
- last if $end_pos && $file_pos > $end_pos;
-
- if ( ! $offset || $file_pos > $offset ) {
-
- # join tags
- foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) {
- $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
- }
- # split on ;
- foreach ( qw/ID SC DE/ ) {
- $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_};
- }
- $rec->{'000'} = [ $file_pos ];
- push @{ $self->{_rec} }, $rec;
- }
- $rec = {};
$line = <$fh>;
chomp $line;
- $log->logdie("expected blank like in ",$arg->{path}, " +$.: $line") unless ( $line eq '' );
- } elsif ( $offset && $file_pos < $offset ) {
- next;
+ die "expected blank like in ",$self->{path}, " +$.: $line" unless $line eq '';
+
+ # join tags
+ foreach ( qw/AB DE ID TI SO RP SC FU FX PA JI/ ) {
+ $rec->{$_} = join(' ', @{ $rec->{$_} }) if defined $rec->{$_};
+ }
+
+ # split on ;
+ foreach ( qw/ID SC DE/ ) {
+ $rec->{$_} = [ split(/;\s/, $rec->{$_}) ] if defined $rec->{$_};
+ }
+
+ $rec->{'000'} = [ $mfn ];
+ warn "## mfn $mfn" if $debug;
+ return $rec;
+
} elsif ( $line =~ /^(\S\S)\s(.+)$/ ) {
$tag = $1;
$v = $2;
$v .= $doi;
}
} elsif ( $line =~ m{^(\S\S)\s*$} ) {
- warn "# $arg->{path} +$. empty |$line|\n";
- } else {
- $log->logdie("can't parse +$. $arg->{path} |$line|");
+ warn "# $self->{path} +$. empty |$line|\n";
+ } elsif ( $line ne '' ) {
+ warn "E: $self->{path} +$ | can't parse |$line|";
}
if ( defined $v ) {
$v = $subfields->{$tag}->($v) if defined $subfields->{$tag};
- $log->debug("$tag: ", sub { dump( $v ) });
+ warn "## $tag: ", sub { dump( $v ) } if $debug;
push @{ $rec->{$tag} }, $v;
}
-
}
- $log->debug("loaded ", $self->size, " records");
-
- $self ? return $self : return undef;
-}
-
-=head2 fetch_rec
-
-Return record with ID C<$mfn> from database
-
- my $rec = $input->fetch_rec( $mfn, $filter_coderef );
-
-=cut
-
-sub fetch_rec {
- my $self = shift;
-
- my ( $mfn, $filter_coderef ) = @_;
- $mfn -= $self->{offset} if $self->{offset};
- return $self->{_rec}->[$mfn-1];
+ warn "can't get full record $mfn got ", dump $rec;
+ return $rec;
}
sub size {
my $self = shift;
- $#{ $self->{_rec} } + 1;
+ my $size = $#{ $self->{record_offset} };
+ return 0 if $size < 0;
+ # no need for +1 since we record end of file as last record
+ return $size - $self->{offset};
}
-=head1 SEE ALSO
-L<http://isibasic.com/help/helpprn.html> is only sane source of document format which Google could find...
-
=head1 AUTHOR
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
=head1 COPYRIGHT & LICENSE
-Copyright 2007 Dobrica Pavlinusic, All Rights Reserved.
+Copyright 2009 Dobrica Pavlinusic, All Rights Reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.