use warnings;
use strict;
+use blib;
+
+use WebPAC::Common;
+use base qw/WebPAC::Common/;
+use Text::Iconv;
+
=head1 NAME
WebPAC::Input - core module for input file format
=head1 VERSION
-Version 0.01
+Version 0.02
=cut
-our $VERSION = '0.01';
+our $VERSION = '0.02';
=head1 SYNOPSIS
-This module will load particular loader module and execute it's functions.
+This module is used as base class for all database specific modules
+(basically, files which have one handle, fixed size while indexing and some
+kind of numeric idefinirier which goes from 1 to filesize).
Perhaps a little code snippet.
Default is not to use C<low_mem> options (see L<MEMORY USAGE> below).
+This function will also call low-level C<init> if it exists with same
+parametars.
+
=cut
sub new {
- my $class = shift;
- my $self = {@_};
+ my $class = shift;
+ my $self = {@_};
bless($self, $class);
- $self->{'code_page'} ||= 'ISO-8859-2';
-
my $log = $self->_get_logger;
+ # check if required subclasses are implemented
+ foreach my $subclass (qw/open_db fetch_rec/) {
+ $log->logdie("missing implementation of $subclass") unless ($self->SUPER::can($subclass));
+ }
+
+ if ($self->can('init')) {
+ $log->debug("calling init");
+ $self->init(@_);
+ }
+
+ $self->{'code_page'} ||= 'ISO-8859-2';
+
# running with low_mem flag? well, use DBM::Deep then.
if ($self->{'low_mem'}) {
$log->info("running with low_mem which impacts performance (<32 Mb memory usage)");
$self ? return $self : return undef;
}
+=head2 open
+
+This function will read whole database in memory and produce lookups.
+
+ $isis->open(
+ path => '/path/to/database/file',
+ code_page => '852',
+ limit_mfn => 500,
+ start_mfn => 6000,
+ lookup => $lookup_obj,
+ );
+
+By default, C<code_page> is assumed to be C<852>.
+
+If optional parametar C<start_mfn> is set, this will be first MFN to read
+from database (so you can skip beginning of your database if you need to).
+
+If optional parametar C<limit_mfn> is set, it will read just 500 records
+from database in example above.
+
+Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
+parametars, see also C<$isis->size>.
+
+=cut
+
+sub open {
+ my $self = shift;
+ my $arg = {@_};
+
+ my $log = $self->_get_logger();
+
+ $log->logcroak("need path") if (! $arg->{'path'});
+ my $code_page = $arg->{'code_page'} || '852';
+
+ # store data in object
+ $self->{'code_page'} = $code_page;
+ foreach my $v (qw/path start_mfn limit_mfn/) {
+ $self->{$v} = $arg->{$v} if ($arg->{$v});
+ }
+
+ # create Text::Iconv object
+ $self->{iconv} = Text::Iconv->new($code_page,$self->{'code_page'});
+
+ my ($db, $size) = $self->open_db(
+ path => $arg->{path},
+ );
+
+ unless ($db) {
+ $log->logwarn("can't open database $arg->{path}, skipping...");
+ return;
+ }
+
+ unless ($size) {
+ $log->logwarn("no records in database $arg->{path}, skipping...");
+ return;
+ }
+
+ my $startmfn = 1;
+ my $maxmfn = $size;
+
+ if (my $s = $self->{start_mfn}) {
+ $log->info("skipping to MFN $s");
+ $startmfn = $s;
+ } else {
+ $self->{start_mfn} = $startmfn;
+ }
+
+ if ($self->{limit_mfn}) {
+ $log->info("limiting to ",$self->{limit_mfn}," records");
+ $maxmfn = $startmfn + $self->{limit_mfn} - 1;
+ $maxmfn = $size if ($maxmfn > $size);
+ }
+
+ # store size for later
+ $self->{size} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
+
+ $log->info("processing $self->{size} records in $code_page, convert to $self->{code_page}");
+
+ # read database
+ for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
+
+ $log->debug("mfn: $mfn\n");
+
+ my $rec = $self->fetch_rec( $db, $mfn );
+
+ if (! $rec) {
+ $log->warn("record $mfn empty? skipping...");
+ next;
+ }
+
+ # store
+ if ($self->{'low_mem'}) {
+ $self->{'db'}->put($mfn, $rec);
+ } else {
+ $self->{'data'}->{$mfn} = $rec;
+ }
+
+ # create lookup
+ $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
+
+ $self->progress_bar($mfn,$maxmfn);
+
+ }
+
+ $self->{'current_mfn'} = -1;
+ $self->{'last_pcnt'} = 0;
+
+ $log->debug("max mfn: $maxmfn");
+
+ # store max mfn and return it.
+ $self->{'max_mfn'} = $maxmfn;
+
+ return $size;
+}
+
+=head2 fetch
+
+Fetch next record from database. It will also displays progress bar.
+
+ my $rec = $isis->fetch;
+
+Record from this function should probably go to C<data_structure> for
+normalisation.
+
+=cut
+
+sub fetch {
+ my $self = shift;
+
+ my $log = $self->_get_logger();
+
+ $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
+
+ if ($self->{'current_mfn'} == -1) {
+ $self->{'current_mfn'} = $self->{'start_mfn'};
+ } else {
+ $self->{'current_mfn'}++;
+ }
+
+ my $mfn = $self->{'current_mfn'};
+
+ if ($mfn > $self->{'max_mfn'}) {
+ $self->{'current_mfn'} = $self->{'max_mfn'};
+ $log->debug("at EOF");
+ return;
+ }
+
+ $self->progress_bar($mfn,$self->{'max_mfn'});
+
+ my $rec;
+
+ if ($self->{'low_mem'}) {
+ $rec = $self->{'db'}->get($mfn);
+ } else {
+ $rec = $self->{'data'}->{$mfn};
+ }
+
+ $rec ||= 0E0;
+}
+
+=head2 pos
+
+Returns current record number (MFN).
+
+ print $isis->pos;
+
+First record in database has position 1.
+
+=cut
+
+sub pos {
+ my $self = shift;
+ return $self->{'current_mfn'};
+}
+
+
+=head2 size
+
+Returns number of records in database
+
+ print $isis->size;
+
+Result from this function can be used to loop through all records
+
+ foreach my $mfn ( 1 ... $isis->size ) { ... }
+
+because it takes into account C<start_mfn> and C<limit_mfn>.
+
+=cut
+
+sub size {
+ my $self = shift;
+ return $self->{'size'};
+}
+
+=head2 seek
+
+Seek to specified MFN in file.
+
+ $isis->seek(42);
+
+First record in database has position 1.
+
+=cut
+
+sub seek {
+ my $self = shift;
+ my $pos = shift || return;
+
+ my $log = $self->_get_logger();
+
+ if ($pos < 1) {
+ $log->warn("seek before first record");
+ $pos = 1;
+ } elsif ($pos > $self->{'max_mfn'}) {
+ $log->warn("seek beyond last record");
+ $pos = $self->{'max_mfn'};
+ }
+
+ return $self->{'current_mfn'} = (($pos - 1) || -1);
+}
+
+
=head1 MEMORY USAGE
C<low_mem> options is double-edged sword. If enabled, WebPAC
use warnings;
use strict;
-use WebPAC::Common;
-use base qw/WebPAC::Input WebPAC::Common/;
-use Text::Iconv;
+use blib;
+
+use WebPAC::Input;
+use base qw/WebPAC::Input/;
=head1 NAME
-WebPAC::Input::ISIS - support for CDS/ISIS source files
+WebPAC::Input::ISIS - support for CDS/ISIS database files
=head1 VERSION
-Version 0.01
+Version 0.02
=cut
-our $VERSION = '0.01';
-
+our $VERSION = '0.02';
-# auto-configure
-
-my ($have_biblio_isis, $have_openisis) = (0,0);
-
-eval "use Biblio::Isis 0.13;";
-unless ($@) {
- $have_biblio_isis = 1
-} else {
- eval "use OpenIsis;";
- $have_openisis = 1 unless ($@);
-}
=head1 SYNOPSIS
-Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
-module and read all records to memory.
+Open CDS/ISIS, WinISIS or IsisMarc database using C<Biblio::Isis> or
+C<OpenIsis> module and read all records to memory.
my $isis = new WebPAC::Input::ISIS();
- $isis->open( filename => '/path/to/ISIS/ISIS' );
+ $isis->open( path => '/path/to/ISIS/ISIS' );
=head1 FUNCTIONS
-=head2 open
+=head2 init
-This function will read whole database in memory and produce lookups.
+Autoconfigure this module to use C<Biblio::Isis> or C<OpenIsis>.
- $isis->open(
- filename => '/data/ISIS/ISIS',
- code_page => '852',
- limit_mfn => 500,
- start_mfn => 6000,
- lookup => $lookup_obj,
- );
+=cut
-By default, ISIS code page is assumed to be C<852>.
+sub init {
+ my $self = shift;
-If optional parametar C<start_mfn> is set, this will be first MFN to read
-from database (so you can skip beginning of your database if you need to).
+ eval "use Biblio::Isis 0.13;";
+ unless ($@) {
+ $self->{have_biblio_isis} = 1
+ } else {
+ eval "use OpenIsis;";
+ $self->{have_openisis} = 1 unless ($@);
+ }
+}
-If optional parametar C<limit_mfn> is set, it will read just 500 records
-from database in example above.
+=head2 open_db
-Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
-parametars, see also C<$isis->size>.
+Returns handle to database
+
+ my $db = $open_db(
+ path => '/path/to/LIBRI'
+ }
=cut
-sub open {
+sub open_db {
my $self = shift;
+
my $arg = {@_};
my $log = $self->_get_logger();
- $log->logcroak("need filename") if (! $arg->{'filename'});
- my $code_page = $arg->{'code_page'} || '852';
-
- # store data in object
- $self->{'isis_code_page'} = $code_page;
- foreach my $v (qw/isis_filename start_mfn limit_mfn/) {
- $self->{$v} = $arg->{$v} if ($arg->{$v});
- }
-
- # create Text::Iconv object
- my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
-
- $log->info("reading ISIS database '",$arg->{'filename'},"'");
- $log->debug("isis code page: $code_page");
+ $log->info("opening ISIS database '$arg->{path}'");
my ($isis_db,$db_size);
- if ($have_openisis) {
+ if ($self->{have_openisis}) {
$log->debug("using OpenIsis perl bindings");
- $isis_db = OpenIsis::open($arg->{'filename'});
+ $isis_db = OpenIsis::open($arg->{path});
$db_size = OpenIsis::maxRowid( $isis_db ) || 1;
- } elsif ($have_biblio_isis) {
+ } elsif ($self->{have_biblio_isis}) {
$log->debug("using Biblio::Isis");
use Biblio::Isis;
$isis_db = new Biblio::Isis(
- isisdb => $arg->{'filename'},
+ isisdb => $arg->{path},
include_deleted => 1,
hash_filter => sub {
my $l = shift || return;
- $l = $cp->convert($l);
+ $l = $self->{iconv}->convert($l) if ($self->{iconv});
return $l;
},
- ) or $log->logdie("can't find database ",$arg->{'filename'});
+ ) or $log->logdie("can't find database $arg->{path}");
$db_size = $isis_db->count;
- unless ($db_size) {
- $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
- return;
- }
-
} else {
$log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
}
+ return ($isis_db, $db_size);
+}
- my $startmfn = 1;
- my $maxmfn = $db_size;
-
- if (my $s = $self->{'start_mfn'}) {
- $log->info("skipping to MFN $s");
- $startmfn = $s;
- } else {
- $self->{'start_mfn'} = $startmfn;
- }
-
- if ($self->{limit_mfn}) {
- $log->info("limiting to ",$self->{limit_mfn}," records");
- $maxmfn = $startmfn + $self->{limit_mfn} - 1;
- $maxmfn = $db_size if ($maxmfn > $db_size);
- }
+=head2 fetch_rec
- # store size for later
- $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
+Return record with ID C<$mfn> from database
- $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
+ my $rec = $self->fetch_rec( $db, $mfn );
+}
- # read database
- for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
+=cut
- $log->debug("mfn: $mfn\n");
+sub fetch_rec {
+ my $self = shift;
- my $rec;
+ my ($isis_db, $mfn) = @_;
- if ($have_openisis) {
+ my $rec;
- # read record using OpenIsis
- my $row = OpenIsis::read( $isis_db, $mfn );
- foreach my $k (keys %{$row}) {
- if ($k ne "mfn") {
- foreach my $l (@{$row->{$k}}) {
- $l = $cp->convert($l);
- # has subfields?
- my $val;
- if ($l =~ m/\^/) {
- foreach my $t (split(/\^/,$l)) {
- next if (! $t);
- $val->{substr($t,0,1)} = substr($t,1);
- }
- } else {
- $val = $l;
+ if ($self->{have_openisis}) {
+
+ # read record using OpenIsis
+ my $row = OpenIsis::read( $isis_db, $mfn );
+
+ # convert record to hash
+ foreach my $k (keys %{$row}) {
+ if ($k ne "mfn") {
+ foreach my $l (@{$row->{$k}}) {
+ $l = $self->{iconv}->convert($l) if ($self->{iconv});
+ # has subfields?
+ my $val;
+ if ($l =~ m/\^/) {
+ foreach my $t (split(/\^/,$l)) {
+ next if (! $t);
+ $val->{substr($t,0,1)} = substr($t,1);
}
-
- push @{$rec->{$k}}, $val;
+ } else {
+ $val = $l;
}
- } else {
- push @{$rec->{'000'}}, $mfn;
+ push @{$rec->{$k}}, $val;
}
+ } else {
+ push @{$rec->{'000'}}, $mfn;
}
-
- } elsif ($have_biblio_isis) {
- $rec = $isis_db->to_hash($mfn);
- } else {
- $log->logdie("hum? implementation missing?");
- }
-
- if (! $rec) {
- $log->warn("record $mfn empty? skipping...");
- next;
- }
-
- # store
- if ($self->{'low_mem'}) {
- $self->{'db'}->put($mfn, $rec);
- } else {
- $self->{'data'}->{$mfn} = $rec;
}
- # create lookup
- $self->{'lookup'}->add( $rec ) if ($rec && $self->{'lookup'});
-
- $self->progress_bar($mfn,$maxmfn);
-
- }
-
- $self->{'current_mfn'} = -1;
- $self->{'last_pcnt'} = 0;
-
- $log->debug("max mfn: $maxmfn");
-
- # store max mfn and return it.
- $self->{'max_mfn'} = $maxmfn;
-
- return $db_size;
-}
-
-=head2 fetch
-
-Fetch next record from database. It will also displays progress bar.
-
- my $rec = $isis->fetch;
-
-Record from this function should probably go to C<data_structure> for
-normalisation.
-
-=cut
-
-sub fetch {
- my $self = shift;
-
- my $log = $self->_get_logger();
-
- $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
-
- if ($self->{'current_mfn'} == -1) {
- $self->{'current_mfn'} = $self->{'start_mfn'};
+ } elsif ($self->{have_biblio_isis}) {
+ $rec = $isis_db->to_hash($mfn);
} else {
- $self->{'current_mfn'}++;
- }
-
- my $mfn = $self->{'current_mfn'};
-
- if ($mfn > $self->{'max_mfn'}) {
- $self->{'current_mfn'} = $self->{'max_mfn'};
- $log->debug("at EOF");
- return;
- }
-
- $self->progress_bar($mfn,$self->{'max_mfn'});
-
- my $rec;
-
- if ($self->{'low_mem'}) {
- $rec = $self->{'db'}->get($mfn);
- } else {
- $rec = $self->{'data'}->{$mfn};
- }
-
- $rec ||= 0E0;
-}
-
-=head2 pos
-
-Returns current record number (MFN).
-
- print $isis->pos;
-
-First record in database has position 1.
-
-=cut
-
-sub pos {
- my $self = shift;
- return $self->{'current_mfn'};
-}
-
-
-=head2 size
-
-Returns number of records in database
-
- print $isis->size;
-
-Result from this function can be used to loop through all records
-
- foreach my $mfn ( 1 ... $isis->size ) { ... }
-
-because it takes into account C<start_mfn> and C<limit_mfn>.
-
-=cut
-
-sub size {
- my $self = shift;
- return $self->{'size'};
-}
-
-=head2 seek
-
-Seek to specified MFN in file.
-
- $isis->seek(42);
-
-First record in database has position 1.
-
-=cut
-
-sub seek {
- my $self = shift;
- my $pos = shift || return;
-
- my $log = $self->_get_logger();
-
- if ($pos < 1) {
- $log->warn("seek before first record");
- $pos = 1;
- } elsif ($pos > $self->{'max_mfn'}) {
- $log->warn("seek beyond last record");
- $pos = $self->{'max_mfn'};
+ $self->_get_logger()->logdie("hum? implementation missing?");
}
- return $self->{'current_mfn'} = (($pos - 1) || -1);
+ return $rec;
}
=head1 AUTHOR
ok(my $isis = new WebPAC::Input::ISIS( no_log => 1 ), "new");
ok(my $isis_lm = new WebPAC::Input::ISIS( low_mem => 1, no_log => 1 ), "new");
-throws_ok { $isis->open( ) } qr/filename/, "need filename";
+throws_ok { $isis->open( ) } qr/path/, "need path";
-throws_ok { $isis->open( filename => '/dev/null', ) } qr/can't find database/ , "open";
+throws_ok { $isis->open( path => '/dev/null', ) } qr/can't find database/ , "open";
-ok($isis->open( filename => "$abs_path/winisis/BIBL" ), "open");
-ok($isis_lm->open( filename => "$abs_path/winisis/BIBL", low_mem => 1 ), "open");
+ok($isis->open( path => "$abs_path/winisis/BIBL" ), "open");
+ok($isis_lm->open( path => "$abs_path/winisis/BIBL", low_mem => 1 ), "open");
cmp_ok($isis->pos, '==', -1, "mfn");
diag "start_mfn: $s, limit_mfn: $l, expected: $e";
- ok($s = $isis->open( filename => "$abs_path/winisis/BIBL", start_mfn => $s, limit_mfn => $l, debug => 1 ), "open");
+ ok($s = $isis->open( path => "$abs_path/winisis/BIBL", start_mfn => $s, limit_mfn => $l, debug => 1 ), "open");
cmp_ok($s, '==', $size, "db size from open");
cmp_ok($isis->size, '==', $e, "size");
}