import OAI repository
authorDobrica Pavlinusic <dpavlin@rot13.org>
Thu, 15 Dec 2011 21:40:02 +0000 (21:40 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Thu, 15 Dec 2011 21:40:02 +0000 (21:40 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1366 07558da8-63fa-0310-ba24-9fe276d99e06

Makefile.PL
bin/install-debian.sh
lib/WebPAC/Input/OAI.pm [new file with mode: 0644]
t/2-Input-OAI.t [new file with mode: 0755]

index 488e67f..358dee5 100644 (file)
@@ -57,6 +57,10 @@ features(
                -default => 0,
                'XBase',
        ],
+       'WebPAC::Input::OAI' => [
+               -default => 0,
+               'HTTP::OAI',
+       ],
        'WebPAC::Output::Estraier' => [
                -default => 0,
                'Search::Estraier' => 0.06,
index 1df3176..df79357 100755 (executable)
@@ -2,5 +2,5 @@
 
 sudo apt-get install liblog-log4perl-perl libclass-accessor-perl libyaml-perl libtest-exception-perl \
        libbiblio-isis-perl libmarc-lint-perl libppi-perl libspreadsheet-parseexcel-perl libtext-csv-perl \
-       swish-e
+       swish-e libhttp-oai-perl
 
diff --git a/lib/WebPAC/Input/OAI.pm b/lib/WebPAC/Input/OAI.pm
new file mode 100644 (file)
index 0000000..863c5ee
--- /dev/null
@@ -0,0 +1,122 @@
+package WebPAC::Input::OAI;
+
+use warnings;
+use strict;
+
+use HTTP::OAI;
+use HTTP::OAI::Metadata::OAI_DC;
+use base qw/WebPAC::Common/;
+use Carp qw/confess/;
+use Data::Dump qw/dump/;
+
+=head1 NAME
+
+WebPAC::Input::OAI - read MARC records from OAI
+
+=cut
+
+our $VERSION = '0.00';
+
+=head1 FUNCTIONS
+
+=head2 new
+
+  my $input = new WebPAC::Input::OAI(
+       url   => 'http://arXiv.org/oai2',
+       from  => '2001-02-03',
+       until => '2001-04-10',
+       path  => 'var/oai/arXiv',
+  }
+
+=cut
+
+sub new {
+       my $class = shift;
+       my $self = {@_};
+       bless($self, $class);
+
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+       $log->debug( 'arg = ', dump($arg) );
+
+       open(my $fh, '<', $arg->{path});
+       if ( ! $fh ) {
+               $log->error("can't open $arg->{path}: $!");
+               return;
+       }
+
+       my $h = HTTP::OAI::Harvester->new( baseURL => $self->{url} );
+
+       my $list;
+       $list->{$_} = $self->{$_} foreach ( qw( from until ) );
+
+       $log->info("ListRecords ", dump($list));
+
+       my $response = $h->ListRecords(
+               metadataPrefix=>'oai_dc',
+               handlers=>{metadata=>'HTTP::OAI::Metadata::OAI_DC'},
+               %$list,
+       );
+
+       warn "## ",dump($response);
+
+       if ( $response->is_error ) {
+               $log->logdie("Error harvesting $self->{url}: $response->message");
+       }
+
+       $self->{oai_response} = $response;
+
+       $self ? return $self : return undef;
+}
+
+=head2 fetch_rec
+
+Return record with ID C<$mfn> from database
+
+  my $rec = $input->fetch_rec( $mfn );
+
+=cut
+
+sub fetch_rec {
+       my $self = shift;
+
+       my $mfn = shift;
+
+       my $rec = $self->{oai_response}->next;
+
+       my $row = $rec->metadata->dc;
+       warn "# row ",dump($row);
+
+       push @{$row->{'000'}}, $mfn;
+       return $row;
+}
+
+=head2 size
+
+Return number of records in database
+
+  my $size = $isis->size;
+
+=cut
+
+sub size {
+       my $self = shift;
+       return $self->{oai_response}->resumptionToken->completeListSize;
+}
+
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2011 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1;
diff --git a/t/2-Input-OAI.t b/t/2-Input-OAI.t
new file mode 100755 (executable)
index 0000000..33ec507
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/perl -w
+
+use strict;
+use lib 'lib';
+
+use Test::More tests => 28;
+
+BEGIN {
+use_ok( 'WebPAC::Test' );
+use_ok( 'WebPAC::Input' );
+}
+
+my $module = 'WebPAC::Input::OAI';
+diag "testing with $module";
+
+ok(my $input = new WebPAC::Input(
+       module => $module,
+       no_progress_bar => 1,
+       %LOG
+), "new");
+
+my $path = '/tank/oai/hrcak';
+
+ok(my $db = $input->open(
+       url  => 'http://hrcak.srce.hr/oai/',
+       from => '2010-01-01',
+       until => '2011-12-31',
+       path => 'var/oai/hrcak',
+), "open");
+ok(my $size = $input->size, "size");
+cmp_ok( $size, '==', 7, 'size ok' );
+
+foreach my $mfn ( 3 + 1 ... 3 + $size ) {
+       my $rec = $input->fetch;
+       ok($rec, "fetch $mfn");
+       cmp_ok($rec->{'000'}->[0], '==', $mfn, 'has mfn');
+       cmp_ok($input->pos, '==', $mfn, "pos $mfn");
+       diag "rec: ", dump($rec), "\n" if $debug;
+}
+
+ok( unlink $path, "unlink $path" );