- added support for Direct Export format to existing BRS/Tagged
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 19 Sep 2009 23:29:23 +0000 (23:29 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 19 Sep 2009 23:29:23 +0000 (23:29 +0000)
- specify file glob (as from ovid-download-results.pl) for multiple files

git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1296 07558da8-63fa-0310-ba24-9fe276d99e06

lib/WebPAC/Input/Ovid.pm

index d96a689..e08a679 100644 (file)
@@ -11,7 +11,7 @@ use Data::Dump qw/dump/;
 
 =head1 NAME
 
-WebPAC::Input::Ovid - support for Ovid citation export
+WebPAC::Input::Ovid - support for Ovid citation export (BRS/Tagged and Direct Export)
 
 =head1 VERSION
 
@@ -30,6 +30,10 @@ Open file in Ovid citation export fromat
        path => '/path/to/ovid-cites.txt',
  );
 
+You can also specify file glob:
+
+  my $input = WebPAC::Input::Ovid->new( path => '/path/to/ovid.*.txt' );
+
 =head1 FUNCTIONS
 
 =head2 new
@@ -66,45 +70,61 @@ sub new {
 
        my $log = $self->_get_logger();
 
-       open( my $fh, '<', $arg->{path} ) || $log->logconfess("can't open $arg->{path}: $!");
+       my @paths;
 
-       $log->info("reading '$arg->{path}'");
+       if ( $arg->{path} =~ m/\*/ ) {
+               @paths = glob $arg->{path};
+       } else {
+               @paths = ( $arg->{path} );
+       }
 
-       my $rec;
        my $size = 0;
-       my $tag;
-
        $self->{_rec} = [];
 
-       while( my $line = <$fh> ) {
-               $line =~ s{[\r\n]+$}{};
-               next if $line eq '';
-
-               warn "<< $line\n" if $debug;
-
-               if ( $line =~ m/^<(\d+)>$/ ) {
-                       push @{ $self->{_rec} }, $rec if $rec;
-                       warn "## rec = ",dump( $rec ),$/ if $debug;
-                       my $expect_rec = $#{ $self->{_rec} } + 2;
-                       warn "wrong Ovid record number: $1 != $expect_rec" if $debug && $1 != $expect_rec;
-                       $rec = { '000' => [ $1 ] };
-               } elsif ( $line =~ /^(\w.+)/ ) {
-                       $tag = $1;
-                       warn "++ $tag\n" if $debug;
-               } elsif ( $line =~ /^\s\s(.+)/ ) {
-                       my $v = $1;
-                       $v =~ s{[\s\.]+$}{};
-                       $rec->{$tag} = [ $v ];
-               } else {
-                       warn "### skip: '$line'\n" if $debug;
+       my $max_size;
+       $max_size = ( $self->{offset} || 0 ) + $self->{limit} if $self->{limit};
+
+       foreach my $path ( @paths ) {
+
+               open( my $fh, '<', $path ) || $log->logconfess("can't open $path: $!");
+               $log->info("reading '$path'");
+
+               my $tag;
+               my $rec;
+
+               while( my $line = <$fh> ) {
+                       $line =~ s{[\r\n]+$}{};
+                       next if $line eq '';
+
+                       warn "<< $line\n" if $debug;
+
+                       if ( $line =~ m/^<(\d+)>$/ ) {
+                               last if $max_size && $size > $max_size;
+
+                               push @{ $self->{_rec} }, $rec if $rec;
+                               warn "## rec = ",dump( $rec ),$/ if $debug;
+                               my $expect_rec = $#{ $self->{_rec} } + 2;
+                               warn "wrong Ovid record number: $1 != $expect_rec" if $debug && $1 != $expect_rec;
+                               $rec = { '000' => [ ++$size ] };
+                       } elsif ( $line =~ /^(\w+)\s+(-\s)?(.*)/ ) {
+                               $tag = $1;
+                               warn "++ $tag\n" if $debug;
+                               $rec->{$tag} = [ $3 ] if $3;
+                       } elsif ( $line =~ /^\s+(-\s)?(.+)/ ) {
+                               push @{ $rec->{$tag} }, $2;
+                       } else {
+                               warn "### skip: '$line'\n" if $debug;
+                       }
+
                }
 
-       }
+               # save last rec
+               push @{ $self->{_rec} }, $rec if $rec;
+               warn "### rec ",dump $rec if $debug;
 
-       # save last rec
-       push @{ $self->{_rec} }, $rec if $rec;
+               $log->debug("loaded ", $self->size, " records from $path");
 
-       $log->debug("loaded ", $self->size, " records");
+       }
 
        $self ? return $self : return undef;
 }