From 3099c9009ea7de30c2b90ff072a1ad2fce24b27a Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Thu, 15 Dec 2011 21:40:02 +0000 Subject: [PATCH] import OAI repository git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@1366 07558da8-63fa-0310-ba24-9fe276d99e06 --- Makefile.PL | 4 ++ bin/install-debian.sh | 2 +- lib/WebPAC/Input/OAI.pm | 122 ++++++++++++++++++++++++++++++++++++++++ t/2-Input-OAI.t | 41 ++++++++++++++ 4 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 lib/WebPAC/Input/OAI.pm create mode 100755 t/2-Input-OAI.t diff --git a/Makefile.PL b/Makefile.PL index 488e67f..358dee5 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -57,6 +57,10 @@ features( -default => 0, 'XBase', ], + 'WebPAC::Input::OAI' => [ + -default => 0, + 'HTTP::OAI', + ], 'WebPAC::Output::Estraier' => [ -default => 0, 'Search::Estraier' => 0.06, diff --git a/bin/install-debian.sh b/bin/install-debian.sh index 1df3176..df79357 100755 --- a/bin/install-debian.sh +++ b/bin/install-debian.sh @@ -2,5 +2,5 @@ sudo apt-get install liblog-log4perl-perl libclass-accessor-perl libyaml-perl libtest-exception-perl \ libbiblio-isis-perl libmarc-lint-perl libppi-perl libspreadsheet-parseexcel-perl libtext-csv-perl \ - swish-e + swish-e libhttp-oai-perl diff --git a/lib/WebPAC/Input/OAI.pm b/lib/WebPAC/Input/OAI.pm new file mode 100644 index 0000000..863c5ee --- /dev/null +++ b/lib/WebPAC/Input/OAI.pm @@ -0,0 +1,122 @@ +package WebPAC::Input::OAI; + +use warnings; +use strict; + +use HTTP::OAI; +use HTTP::OAI::Metadata::OAI_DC; +use base qw/WebPAC::Common/; +use Carp qw/confess/; +use Data::Dump qw/dump/; + +=head1 NAME + +WebPAC::Input::OAI - read MARC records from OAI + +=cut + +our $VERSION = '0.00'; + +=head1 FUNCTIONS + +=head2 new + + my $input = new WebPAC::Input::OAI( + url => 'http://arXiv.org/oai2', + from => '2001-02-03', + until => '2001-04-10', + path => 'var/oai/arXiv', + } + +=cut + +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); + + my $arg = {@_}; + + my $log = $self->_get_logger(); + $log->debug( 'arg = ', dump($arg) ); + + open(my $fh, '<', $arg->{path}); + if ( ! $fh ) { + $log->error("can't open $arg->{path}: $!"); + return; + } + + my $h = HTTP::OAI::Harvester->new( baseURL => $self->{url} ); + + my $list; + $list->{$_} = $self->{$_} foreach ( qw( from until ) ); + + $log->info("ListRecords ", dump($list)); + + my $response = $h->ListRecords( + metadataPrefix=>'oai_dc', + handlers=>{metadata=>'HTTP::OAI::Metadata::OAI_DC'}, + %$list, + ); + + warn "## ",dump($response); + + if ( $response->is_error ) { + $log->logdie("Error harvesting $self->{url}: $response->message"); + } + + $self->{oai_response} = $response; + + $self ? return $self : return undef; +} + +=head2 fetch_rec + +Return record with ID C<$mfn> from database + + my $rec = $input->fetch_rec( $mfn ); + +=cut + +sub fetch_rec { + my $self = shift; + + my $mfn = shift; + + my $rec = $self->{oai_response}->next; + + my $row = $rec->metadata->dc; + warn "# row ",dump($row); + + push @{$row->{'000'}}, $mfn; + return $row; +} + +=head2 size + +Return number of records in database + + my $size = $isis->size; + +=cut + +sub size { + my $self = shift; + return $self->{oai_response}->resumptionToken->completeListSize; +} + + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2011 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; diff --git a/t/2-Input-OAI.t b/t/2-Input-OAI.t new file mode 100755 index 0000000..33ec507 --- /dev/null +++ b/t/2-Input-OAI.t @@ -0,0 +1,41 @@ +#!/usr/bin/perl -w + +use strict; +use lib 'lib'; + +use Test::More tests => 28; + +BEGIN { +use_ok( 'WebPAC::Test' ); +use_ok( 'WebPAC::Input' ); +} + +my $module = 'WebPAC::Input::OAI'; +diag "testing with $module"; + +ok(my $input = new WebPAC::Input( + module => $module, + no_progress_bar => 1, + %LOG +), "new"); + +my $path = '/tank/oai/hrcak'; + +ok(my $db = $input->open( + url => 'http://hrcak.srce.hr/oai/', + from => '2010-01-01', + until => '2011-12-31', + path => 'var/oai/hrcak', +), "open"); +ok(my $size = $input->size, "size"); +cmp_ok( $size, '==', 7, 'size ok' ); + +foreach my $mfn ( 3 + 1 ... 3 + $size ) { + my $rec = $input->fetch; + ok($rec, "fetch $mfn"); + cmp_ok($rec->{'000'}->[0], '==', $mfn, 'has mfn'); + cmp_ok($input->pos, '==', $mfn, "pos $mfn"); + diag "rec: ", dump($rec), "\n" if $debug; +} + +ok( unlink $path, "unlink $path" ); -- 2.20.1