From: Dobrica Pavlinusic Date: Tue, 20 Nov 2007 10:08:02 +0000 (+0000) Subject: r1650@llin: dpavlin | 2007-11-20 11:07:57 +0100 X-Git-Url: http://git.rot13.org/?p=webpac2;a=commitdiff_plain;h=5d5fc808565b88bca0e2d10aa62bd89c7e748743 r1650@llin: dpavlin | 2007-11-20 11:07:57 +0100 final tweaks for WebPAC::Input::PDF, emit fields A .. ZZ git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1057 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/lib/WebPAC/Input/PDF.pm b/lib/WebPAC/Input/PDF.pm index 91b9920..8cba888 100644 --- a/lib/WebPAC/Input/PDF.pm +++ b/lib/WebPAC/Input/PDF.pm @@ -69,7 +69,7 @@ sub new { $log->info("opend $file with $pages pages"); - my @lines; + my @lines = (); foreach my $p ( 1 .. $pages ) { my $tree = $doc->getPageContentTree($p); @@ -123,7 +123,7 @@ sub new { } } - $self->size( $#lines ); + $self->{_lines} = \@lines; $log->debug("loaded ", $self->size, " records", sub { dump( @lines ) }); @@ -136,6 +136,10 @@ Return record with ID C<$mfn> from database my $rec = $input->fetch_rec( $mfn, $filter_coderef ); +Records are returned as field C, C and so on... + +Last supported column is C. + =cut sub fetch_rec { @@ -143,7 +147,36 @@ sub fetch_rec { my ( $mfn, $filter_coderef ) = @_; - return $self->{_rec}->[$mfn-1]; + my $rec = { + '000' => [ $mfn ], + }; + + my $line = $self->{_lines}->[ $mfn - 1 ] || return; + confess "expected ARRAY for _lines $mfn" unless ref($line) eq 'ARRAY'; + +# warn "## line = ",dump( $line ); + + my $col = 'A'; + my $c = 0; + foreach my $e ( @$line ) { + $rec->{$col} = $e; + $c++; + # FIXME what about columns > ZZ + if ( $col eq 'Z' ) { + $col .= 'AA'; + } elsif ( $col eq 'ZZ' ) { + $self->_get_logger()->logwarn("ignoring colums above ZZ (original ", $#$line + 1, " > $c max columns)"); + last; + } elsif ( $col =~ m/([A-Z])Z$/ ) { + $col .= $1++ . 'A'; + } else { + $col++; + } + } + +# warn "## rec = ",dump( $rec ); + + return $rec; } @@ -157,7 +190,7 @@ Return number of records in database sub size { my $self = shift; - return $#{$self->{_rec}} + 1; + return $#{$self->{_lines}} + 1; } =head1 SEE ALSO diff --git a/t/2-input-pdf.t b/t/2-input-pdf.t index c0098ba..6a2151a 100755 --- a/t/2-input-pdf.t +++ b/t/2-input-pdf.t @@ -3,7 +3,7 @@ use strict; use blib; -use Test::More tests => 9; +use Test::More tests => 11; BEGIN { use_ok( 'WebPAC::Test' ); @@ -24,6 +24,8 @@ ok(my $db = $input->open( ), "open"); ok(my $size = $input->size, "size"); +$size = 3; + foreach my $mfn ( 1 ... $size ) { my $rec = $input->fetch; if ($mfn <= 10 || $mfn == 20) {