From: Dobrica Pavlinusic Date: Sat, 16 Jul 2005 14:44:38 +0000 (+0000) Subject: added WebPAC::Input::ISIS X-Git-Url: http://git.rot13.org/?a=commitdiff_plain;h=656fdf7f1501627a8019dc502f23044b7b351c97;p=webpac2 added WebPAC::Input::ISIS git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@6 07558da8-63fa-0310-ba24-9fe276d99e06 --- diff --git a/lib/WebPAC.pm b/lib/WebPAC.pm index 585dd75..ca21446 100644 --- a/lib/WebPAC.pm +++ b/lib/WebPAC.pm @@ -18,8 +18,8 @@ our $VERSION = '2.00'; =head1 SYNOPSIS This is quick description of what WebPAC is. This is third iteration of -WebPAC desing (second one was semi-private creatation of CD ROM with L -module). This code will eventually become official webpac version 2. +WebPAC design (second one was semi-private creatation of CD ROM with L +module). This code will eventually become official WebPAC version 2. use WebPAC; @@ -35,22 +35,6 @@ if you don't export anything, such as for a purely object-oriented module. =head2 function1 -foo - -=cut - -sub function1 { -} - -=head2 function2 - -bar - -=cut - -sub function2 { -} - =head1 AUTHOR Dobrica Pavlinusic, C<< >> diff --git a/lib/WebPAC/Common.pm b/lib/WebPAC/Common.pm index 4020397..fbc241b 100644 --- a/lib/WebPAC/Common.pm +++ b/lib/WebPAC/Common.pm @@ -22,6 +22,19 @@ our $VERSION = '0.01'; my $LOOKUP_REGEX = 'lookup{[^\{\}]+}'; my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}'; +=head1 SYNOPSYS + +This module defines common functions, and is used as base for other, more +specific modules. + + my $webpac = new WebPAC::Common( + filter => { + 'filter_name_1' => sub { + # filter code + return length($_); + }, ... + }, + } =head1 FUNCTIONS @@ -234,6 +247,9 @@ This function will init C using provided configuration file. $webpac->_init_logger('/path/to/log.conf'); +If no path to configuration file is given, dummy empty configuration +will be create. + =cut sub _init_logger { diff --git a/lib/WebPAC/Input.pm b/lib/WebPAC/Input.pm index 79c859b..3d52a91 100644 --- a/lib/WebPAC/Input.pm +++ b/lib/WebPAC/Input.pm @@ -26,6 +26,7 @@ Perhaps a little code snippet. my $db = WebPAC::Input->new( format => 'NULL', config => $config, + lookup => $lookup_obj, ); $db->open('/path/to/database'); @@ -43,8 +44,6 @@ Create new input database object. my $db = new WebPAC::Input( format => 'NULL' ); -This function will load needed wrapper module and - =cut sub new { @@ -55,22 +54,6 @@ sub new { $self ? return $self : return undef; } -=head2 open - -=cut - -sub open { -} - -=head2 function2 - -=cut - -sub function2 { -} - - - =head1 MEMORY USAGE C options is double-edged sword. If enabled, WebPAC diff --git a/lib/WebPAC/Input/ISIS.pm b/lib/WebPAC/Input/ISIS.pm new file mode 100644 index 0000000..2ef8194 --- /dev/null +++ b/lib/WebPAC/Input/ISIS.pm @@ -0,0 +1,263 @@ +package WebPAC::Input::ISIS; + +use warnings; +use strict; + +use WebPAC::Common; +use base qw/WebPAC::Input WebPAC::Common/; + +=head1 NAME + +WebPAC::Input::ISIS - support for CDS/ISIS source files + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + + +# auto-configure + +my ($have_biblio_isis, $have_openisis) = (0,0); + +eval "use Biblio::Isis 0.13;"; +unless ($@) { + $have_biblio_isis = 1 +} else { + eval "use OpenIsis;"; + $have_openisis = 1 unless ($@); +} + +=head1 SYNOPSIS + +Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis +module and read all records to memory. + + my $isis = new WebPAC::Input::ISIS(); + $isis->open( filename => '/path/to/ISIS/ISIS' ); + +=head1 FUNCTIONS + +=head2 open + +This function will read whole database in memory and produce lookups. + + $isis->open( + filename => '/data/ISIS/ISIS', + code_page => '852', + limit_mfn => 500, + start_mfn => 6000, + lookup => $lookup_obj, + ); + +By default, ISIS code page is assumed to be C<852>. + +If optional parametar C is set, this will be first MFN to read +from database (so you can skip beginning of your database if you need to). + +If optional parametar C is set, it will read just 500 records +from database in example above. + +Returns number of last record read into memory (size of database, really). + +=cut + +sub open { + my $self = shift; + my $arg = {@_}; + + my $log = $self->_get_logger(); + + $log->logcroak("need filename") if (! $arg->{'filename'}); + my $code_page = $arg->{'code_page'} || '852'; + + $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*')); + + # store data in object + $self->{'isis_filename'} = $arg->{'filename'}; + $self->{'isis_code_page'} = $code_page; + + #$self->{'isis_code_page'} = $code_page; + + # create Text::Iconv object + my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); + + $log->info("reading ISIS database '",$arg->{'filename'},"'"); + $log->debug("isis code page: $code_page"); + + my ($isis_db,$maxmfn); + + if ($have_openisis) { + $log->debug("using OpenIsis perl bindings"); + $isis_db = OpenIsis::open($arg->{'filename'}); + $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; + } elsif ($have_biblio_isis) { + $log->debug("using Biblio::Isis"); + use Biblio::Isis; + $isis_db = new Biblio::Isis( + isisdb => $arg->{'filename'}, + include_deleted => 1, + hash_filter => sub { + my $l = shift || return; + $l = $cp->convert($l); + return $l; + }, + ); + $maxmfn = $isis_db->count; + + unless ($maxmfn) { + $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping..."); + return; + } + + } else { + $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN."); + } + + + my $startmfn = 1; + + if (my $s = $self->{'start_mfn'}) { + $log->info("skipping to MFN $s"); + $startmfn = $s; + } else { + $self->{'start_mfn'} = $startmfn; + } + + $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn}); + + $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis')); + + + # read database + for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) { + + $log->debug("mfn: $mfn\n"); + + my $rec; + + if ($have_openisis) { + + # read record using OpenIsis + my $row = OpenIsis::read( $isis_db, $mfn ); + foreach my $k (keys %{$row}) { + if ($k ne "mfn") { + foreach my $l (@{$row->{$k}}) { + $l = $cp->convert($l); + # has subfields? + my $val; + if ($l =~ m/\^/) { + foreach my $t (split(/\^/,$l)) { + next if (! $t); + $val->{substr($t,0,1)} = substr($t,1); + } + } else { + $val = $l; + } + + push @{$rec->{$k}}, $val; + } + } else { + push @{$rec->{'000'}}, $mfn; + } + } + + } elsif ($have_biblio_isis) { + $rec = $isis_db->to_hash($mfn); + } else { + $log->logdie("hum? implementation missing?"); + } + + $log->confess("record $mfn empty?") unless ($rec); + + # store + if ($self->{'low_mem'}) { + $self->{'db'}->put($mfn, $rec); + } else { + $self->{'data'}->{$mfn} = $rec; + } + + # create lookup + $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add)); + + $self->progress_bar($mfn,$maxmfn); + + } + + $self->{'current_mfn'} = -1; + $self->{'last_pcnt'} = 0; + + $log->debug("max mfn: $maxmfn"); + + # store max mfn and return it. + return $self->{'max_mfn'} = $maxmfn; +} + +=head2 fetch_rec + +Fetch next record from database. It will also displays progress bar. + + my $rec = $webpac->fetch_rec; + +=cut + +sub fetch_rec { + my $self = shift; + + my $log = $self->_get_logger(); + + $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'}); + + if ($self->{'current_mfn'} == -1) { + $self->{'current_mfn'} = $self->{'start_mfn'}; + } else { + $self->{'current_mfn'}++; + } + + my $mfn = $self->{'current_mfn'}; + + if ($mfn > $self->{'max_mfn'}) { + $self->{'current_mfn'} = $self->{'max_mfn'}; + $log->debug("at EOF"); + return; + } + + $self->progress_bar($mfn,$self->{'max_mfn'}); + + if ($self->{'low_mem'}) { + return $self->{'db'}->get($mfn); + } else { + return $self->{'data'}->{$mfn}; + } +} + +=head2 mfn + +Returns current record number (MFN). + + print $webpac->mfn; + +=cut + +sub mfn { + my $self = shift; + return $self->{'current_mfn'}; +} + +=head1 AUTHOR + +Dobrica Pavlinusic, C<< >> + +=head1 COPYRIGHT & LICENSE + +Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +1; # End of WebPAC::Input::ISIS diff --git a/t/0-load.t b/t/0-load.t new file mode 100644 index 0000000..95040cf --- /dev/null +++ b/t/0-load.t @@ -0,0 +1,19 @@ +#!/usr/bin/perl -w + +use Test::More tests => 9; +use blib; +use strict; + +BEGIN { +use_ok( 'WebPAC' ); +use_ok( 'WebPAC::Input' ); +use_ok( 'WebPAC::DB' ); +use_ok( 'WebPAC::Output' ); +use_ok( 'WebPAC::Output::Estraier' ); +use_ok( 'WebPAC::Output::html' ); +use_ok( 'WebPAC::Output::OAI' ); +use_ok( 'WebPAC::Output::ClassDBI' ); +use_ok( 'WebPAC::Lookup' ); +} + +diag( "Testing WebPAC $WebPAC::VERSION, Perl 5.008007, /usr/bin/perl" ); diff --git a/t/00-load.t b/t/00-load.t deleted file mode 100644 index 95040cf..0000000 --- a/t/00-load.t +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/perl -w - -use Test::More tests => 9; -use blib; -use strict; - -BEGIN { -use_ok( 'WebPAC' ); -use_ok( 'WebPAC::Input' ); -use_ok( 'WebPAC::DB' ); -use_ok( 'WebPAC::Output' ); -use_ok( 'WebPAC::Output::Estraier' ); -use_ok( 'WebPAC::Output::html' ); -use_ok( 'WebPAC::Output::OAI' ); -use_ok( 'WebPAC::Output::ClassDBI' ); -use_ok( 'WebPAC::Lookup' ); -} - -diag( "Testing WebPAC $WebPAC::VERSION, Perl 5.008007, /usr/bin/perl" ); diff --git a/t/01-lookup.t b/t/01-lookup.t deleted file mode 100755 index 3854dea..0000000 --- a/t/01-lookup.t +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/perl -w - -use strict; -use Test::More tests => 6; -use Test::Exception; -use blib; - -use Data::Dumper; - -BEGIN { -use_ok( 'WebPAC::Lookup' ); -} - -throws_ok { new WebPAC::Lookup() } qr/lookup_file/, "new without lookup_file"; - -ok(my $lookup = new WebPAC::Lookup( - lookup_file => 'conf/lookup/example.pm', - filter => { - 'upper' => sub { return uc(shift); }, - }, -), "new"); - -my $rec = { - '000' => [ '001' ], - '800' => [ 'foo' ], - '900' => [ 'bar' ], -}; - -ok($lookup->add( $rec ), "add"); - -print Dumper($lookup); - -ok($lookup->{'lookup'}, "have lookup hash"); - -my $lookup_res = { - '800:foo' => [ 'bar' ], - '000:001' => [ '001' ], - '900:bar' => [ 'FOO', 'foo' ] -}; - -is_deeply($lookup_res, $lookup->{'lookup'}, "lookup data"); - diff --git a/t/1-lookup.t b/t/1-lookup.t new file mode 100755 index 0000000..3854dea --- /dev/null +++ b/t/1-lookup.t @@ -0,0 +1,42 @@ +#!/usr/bin/perl -w + +use strict; +use Test::More tests => 6; +use Test::Exception; +use blib; + +use Data::Dumper; + +BEGIN { +use_ok( 'WebPAC::Lookup' ); +} + +throws_ok { new WebPAC::Lookup() } qr/lookup_file/, "new without lookup_file"; + +ok(my $lookup = new WebPAC::Lookup( + lookup_file => 'conf/lookup/example.pm', + filter => { + 'upper' => sub { return uc(shift); }, + }, +), "new"); + +my $rec = { + '000' => [ '001' ], + '800' => [ 'foo' ], + '900' => [ 'bar' ], +}; + +ok($lookup->add( $rec ), "add"); + +print Dumper($lookup); + +ok($lookup->{'lookup'}, "have lookup hash"); + +my $lookup_res = { + '800:foo' => [ 'bar' ], + '000:001' => [ '001' ], + '900:bar' => [ 'FOO', 'foo' ] +}; + +is_deeply($lookup_res, $lookup->{'lookup'}, "lookup data"); +