added WebPAC::Input::ISIS
authorDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 16 Jul 2005 14:44:38 +0000 (14:44 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Sat, 16 Jul 2005 14:44:38 +0000 (14:44 +0000)
git-svn-id: svn+ssh://mjesec/home/dpavlin/svn/webpac2/trunk@6 07558da8-63fa-0310-ba24-9fe276d99e06

lib/WebPAC.pm
lib/WebPAC/Common.pm
lib/WebPAC/Input.pm
lib/WebPAC/Input/ISIS.pm [new file with mode: 0644]
t/0-load.t [new file with mode: 0644]
t/00-load.t [deleted file]
t/01-lookup.t [deleted file]
t/1-lookup.t [new file with mode: 0755]

index 585dd75..ca21446 100644 (file)
@@ -18,8 +18,8 @@ our $VERSION = '2.00';
 =head1 SYNOPSIS
 
 This is quick description of what WebPAC is. This is third iteration of
-WebPAC desing (second one was semi-private creatation of CD ROM with L<jsFind>
-module). This code will eventually become official webpac version 2.
+WebPAC design (second one was semi-private creatation of CD ROM with L<jsFind>
+module). This code will eventually become official WebPAC version 2.
 
     use WebPAC;
 
@@ -35,22 +35,6 @@ if you don't export anything, such as for a purely object-oriented module.
 
 =head2 function1
 
-foo
-
-=cut
-
-sub function1 {
-}
-
-=head2 function2
-
-bar
-
-=cut
-
-sub function2 {
-}
-
 =head1 AUTHOR
 
 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
index 4020397..fbc241b 100644 (file)
@@ -22,6 +22,19 @@ our $VERSION = '0.01';
 my $LOOKUP_REGEX = 'lookup{[^\{\}]+}';
 my $LOOKUP_REGEX_SAVE = 'lookup{([^\{\}]+)}';
 
+=head1 SYNOPSYS
+
+This module defines common functions, and is used as base for other, more
+specific modules.
+
+ my $webpac = new WebPAC::Common(
+       filter => {
+               'filter_name_1' => sub {
+                       # filter code
+                       return length($_);
+               }, ...
+       },
+  }
 
 =head1 FUNCTIONS
 
@@ -234,6 +247,9 @@ This function will init C<Log::Log4perl> using provided configuration file.
 
   $webpac->_init_logger('/path/to/log.conf');
 
+If no path to configuration file is given, dummy empty configuration
+will be create.
+
 =cut
 
 sub _init_logger {
index 79c859b..3d52a91 100644 (file)
@@ -26,6 +26,7 @@ Perhaps a little code snippet.
     my $db = WebPAC::Input->new(
        format => 'NULL',
        config => $config,
+       lookup => $lookup_obj,
     );
 
     $db->open('/path/to/database');
@@ -43,8 +44,6 @@ Create new input database object.
 
   my $db = new WebPAC::Input( format => 'NULL' );
 
-This function will load needed wrapper module and
-
 =cut
 
 sub new {
@@ -55,22 +54,6 @@ sub new {
        $self ? return $self : return undef;
 }
 
-=head2 open
-
-=cut
-
-sub open {
-}
-
-=head2 function2
-
-=cut
-
-sub function2 {
-}
-
-
-
 =head1 MEMORY USAGE
 
 C<low_mem> options is double-edged sword. If enabled, WebPAC
diff --git a/lib/WebPAC/Input/ISIS.pm b/lib/WebPAC/Input/ISIS.pm
new file mode 100644 (file)
index 0000000..2ef8194
--- /dev/null
@@ -0,0 +1,263 @@
+package WebPAC::Input::ISIS;
+
+use warnings;
+use strict;
+
+use WebPAC::Common;
+use base qw/WebPAC::Input WebPAC::Common/;
+
+=head1 NAME
+
+WebPAC::Input::ISIS - support for CDS/ISIS source files
+
+=head1 VERSION
+
+Version 0.01
+
+=cut
+
+our $VERSION = '0.01';
+
+
+# auto-configure
+
+my ($have_biblio_isis, $have_openisis) = (0,0);
+
+eval "use Biblio::Isis 0.13;";
+unless ($@) { 
+       $have_biblio_isis = 1
+} else {
+       eval "use OpenIsis;";
+       $have_openisis = 1 unless ($@);
+}
+
+=head1 SYNOPSIS
+
+Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
+module and read all records to memory.
+
+ my $isis = new WebPAC::Input::ISIS();
+ $isis->open( filename => '/path/to/ISIS/ISIS' );
+
+=head1 FUNCTIONS
+
+=head2 open
+
+This function will read whole database in memory and produce lookups.
+
+ $isis->open(
+       filename => '/data/ISIS/ISIS',
+       code_page => '852',
+       limit_mfn => 500,
+       start_mfn => 6000,
+       lookup => $lookup_obj,
+ );
+
+By default, ISIS code page is assumed to be C<852>.
+
+If optional parametar C<start_mfn> is set, this will be first MFN to read
+from database (so you can skip beginning of your database if you need to).
+
+If optional parametar C<limit_mfn> is set, it will read just 500 records
+from database in example above.
+
+Returns number of last record read into memory (size of database, really).
+
+=cut
+
+sub open {
+       my $self = shift;
+       my $arg = {@_};
+
+       my $log = $self->_get_logger();
+
+       $log->logcroak("need filename") if (! $arg->{'filename'});
+       my $code_page = $arg->{'code_page'} || '852';
+
+       $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
+
+       # store data in object
+       $self->{'isis_filename'} = $arg->{'filename'};
+       $self->{'isis_code_page'} = $code_page;
+
+       #$self->{'isis_code_page'} = $code_page;
+
+       # create Text::Iconv object
+       my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
+
+       $log->info("reading ISIS database '",$arg->{'filename'},"'");
+       $log->debug("isis code page: $code_page");
+
+       my ($isis_db,$maxmfn);
+
+       if ($have_openisis) {
+               $log->debug("using OpenIsis perl bindings");
+               $isis_db = OpenIsis::open($arg->{'filename'});
+               $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
+       } elsif ($have_biblio_isis) {
+               $log->debug("using Biblio::Isis");
+               use Biblio::Isis;
+               $isis_db = new Biblio::Isis(
+                       isisdb => $arg->{'filename'},
+                       include_deleted => 1,
+                       hash_filter => sub {
+                               my $l = shift || return;
+                               $l = $cp->convert($l);
+                               return $l;
+                       },
+               );
+               $maxmfn = $isis_db->count;
+
+               unless ($maxmfn) {
+                       $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
+                       return;
+               }
+
+       } else {
+               $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
+       }
+
+
+       my $startmfn = 1;
+
+       if (my $s = $self->{'start_mfn'}) {
+               $log->info("skipping to MFN $s");
+               $startmfn = $s;
+       } else {
+               $self->{'start_mfn'} = $startmfn;
+       }
+
+       $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
+
+       $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
+
+
+       # read database
+       for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
+
+               $log->debug("mfn: $mfn\n");
+
+               my $rec;
+
+               if ($have_openisis) {
+
+                       # read record using OpenIsis
+                       my $row = OpenIsis::read( $isis_db, $mfn );
+                       foreach my $k (keys %{$row}) {
+                               if ($k ne "mfn") {
+                                       foreach my $l (@{$row->{$k}}) {
+                                               $l = $cp->convert($l);
+                                               # has subfields?
+                                               my $val;
+                                               if ($l =~ m/\^/) {
+                                                       foreach my $t (split(/\^/,$l)) {
+                                                               next if (! $t);
+                                                               $val->{substr($t,0,1)} = substr($t,1);
+                                                       }
+                                               } else {
+                                                       $val = $l;
+                                               }
+
+                                               push @{$rec->{$k}}, $val;
+                                       }
+                               } else {
+                                       push @{$rec->{'000'}}, $mfn;
+                               }
+                       }
+
+               } elsif ($have_biblio_isis) {
+                       $rec = $isis_db->to_hash($mfn);
+               } else {
+                       $log->logdie("hum? implementation missing?");
+               }
+
+               $log->confess("record $mfn empty?") unless ($rec);
+
+               # store
+               if ($self->{'low_mem'}) {
+                       $self->{'db'}->put($mfn, $rec);
+               } else {
+                       $self->{'data'}->{$mfn} = $rec;
+               }
+
+               # create lookup
+               $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
+
+               $self->progress_bar($mfn,$maxmfn);
+
+       }
+
+       $self->{'current_mfn'} = -1;
+       $self->{'last_pcnt'} = 0;
+
+       $log->debug("max mfn: $maxmfn");
+
+       # store max mfn and return it.
+       return $self->{'max_mfn'} = $maxmfn;
+}
+
+=head2 fetch_rec
+
+Fetch next record from database. It will also displays progress bar.
+
+ my $rec = $webpac->fetch_rec;
+
+=cut
+
+sub fetch_rec {
+       my $self = shift;
+
+       my $log = $self->_get_logger();
+
+       $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
+
+       if ($self->{'current_mfn'} == -1) {
+               $self->{'current_mfn'} = $self->{'start_mfn'};
+       } else {
+               $self->{'current_mfn'}++;
+       }
+
+       my $mfn = $self->{'current_mfn'};
+
+       if ($mfn > $self->{'max_mfn'}) {
+               $self->{'current_mfn'} = $self->{'max_mfn'};
+               $log->debug("at EOF");
+               return;
+       }
+
+       $self->progress_bar($mfn,$self->{'max_mfn'});
+
+       if ($self->{'low_mem'}) {
+               return $self->{'db'}->get($mfn);
+       } else {
+               return $self->{'data'}->{$mfn};
+       }
+}
+
+=head2 mfn
+
+Returns current record number (MFN).
+
+ print $webpac->mfn;
+
+=cut
+
+sub mfn {
+       my $self = shift;
+       return $self->{'current_mfn'};
+}
+
+=head1 AUTHOR
+
+Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=cut
+
+1; # End of WebPAC::Input::ISIS
diff --git a/t/0-load.t b/t/0-load.t
new file mode 100644 (file)
index 0000000..95040cf
--- /dev/null
@@ -0,0 +1,19 @@
+#!/usr/bin/perl -w
+
+use Test::More tests => 9;
+use blib;
+use strict;
+
+BEGIN {
+use_ok( 'WebPAC' );
+use_ok( 'WebPAC::Input' );
+use_ok( 'WebPAC::DB' );
+use_ok( 'WebPAC::Output' );
+use_ok( 'WebPAC::Output::Estraier' );
+use_ok( 'WebPAC::Output::html' );
+use_ok( 'WebPAC::Output::OAI' );
+use_ok( 'WebPAC::Output::ClassDBI' );
+use_ok( 'WebPAC::Lookup' );
+}
+
+diag( "Testing WebPAC $WebPAC::VERSION, Perl 5.008007, /usr/bin/perl" );
diff --git a/t/00-load.t b/t/00-load.t
deleted file mode 100644 (file)
index 95040cf..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/perl -w
-
-use Test::More tests => 9;
-use blib;
-use strict;
-
-BEGIN {
-use_ok( 'WebPAC' );
-use_ok( 'WebPAC::Input' );
-use_ok( 'WebPAC::DB' );
-use_ok( 'WebPAC::Output' );
-use_ok( 'WebPAC::Output::Estraier' );
-use_ok( 'WebPAC::Output::html' );
-use_ok( 'WebPAC::Output::OAI' );
-use_ok( 'WebPAC::Output::ClassDBI' );
-use_ok( 'WebPAC::Lookup' );
-}
-
-diag( "Testing WebPAC $WebPAC::VERSION, Perl 5.008007, /usr/bin/perl" );
diff --git a/t/01-lookup.t b/t/01-lookup.t
deleted file mode 100755 (executable)
index 3854dea..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-use Test::More tests => 6;
-use Test::Exception;
-use blib;
-
-use Data::Dumper;
-
-BEGIN {
-use_ok( 'WebPAC::Lookup' );
-}
-
-throws_ok { new WebPAC::Lookup() } qr/lookup_file/, "new without lookup_file";
-
-ok(my $lookup = new WebPAC::Lookup(
-       lookup_file => 'conf/lookup/example.pm',
-       filter => {
-               'upper' => sub { return uc(shift); },
-       },
-), "new");
-
-my $rec = {
-       '000' => [ '001' ],
-       '800' => [ 'foo' ],
-       '900' => [ 'bar' ],
-};
-
-ok($lookup->add( $rec ), "add");
-
-print Dumper($lookup);
-
-ok($lookup->{'lookup'}, "have lookup hash");
-
-my $lookup_res = { 
-       '800:foo' => [ 'bar' ],
-       '000:001' => [ '001' ],
-       '900:bar' => [ 'FOO', 'foo' ]
-};
-
-is_deeply($lookup_res, $lookup->{'lookup'}, "lookup data");
-
diff --git a/t/1-lookup.t b/t/1-lookup.t
new file mode 100755 (executable)
index 0000000..3854dea
--- /dev/null
@@ -0,0 +1,42 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Test::More tests => 6;
+use Test::Exception;
+use blib;
+
+use Data::Dumper;
+
+BEGIN {
+use_ok( 'WebPAC::Lookup' );
+}
+
+throws_ok { new WebPAC::Lookup() } qr/lookup_file/, "new without lookup_file";
+
+ok(my $lookup = new WebPAC::Lookup(
+       lookup_file => 'conf/lookup/example.pm',
+       filter => {
+               'upper' => sub { return uc(shift); },
+       },
+), "new");
+
+my $rec = {
+       '000' => [ '001' ],
+       '800' => [ 'foo' ],
+       '900' => [ 'bar' ],
+};
+
+ok($lookup->add( $rec ), "add");
+
+print Dumper($lookup);
+
+ok($lookup->{'lookup'}, "have lookup hash");
+
+my $lookup_res = { 
+       '800:foo' => [ 'bar' ],
+       '000:001' => [ '001' ],
+       '900:bar' => [ 'FOO', 'foo' ]
+};
+
+is_deeply($lookup_res, $lookup->{'lookup'}, "lookup data");
+