r9064@llin: dpavlin | 2005-11-23 01:15:24 +0100
[webpac2] / lib / WebPAC / Input / ISIS.pm
1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8 use Text::Iconv;
9
10 =head1 NAME
11
12 WebPAC::Input::ISIS - support for CDS/ISIS source files
13
14 =head1 VERSION
15
16 Version 0.01
17
18 =cut
19
20 our $VERSION = '0.01';
21
22
23 # auto-configure
24
25 my ($have_biblio_isis, $have_openisis) = (0,0);
26
27 eval "use Biblio::Isis 0.13;";
28 unless ($@) { 
29         $have_biblio_isis = 1
30 } else {
31         eval "use OpenIsis;";
32         $have_openisis = 1 unless ($@);
33 }
34
35 =head1 SYNOPSIS
36
37 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38 module and read all records to memory.
39
40  my $isis = new WebPAC::Input::ISIS();
41  $isis->open( filename => '/path/to/ISIS/ISIS' );
42
43 =head1 FUNCTIONS
44
45 =head2 open
46
47 This function will read whole database in memory and produce lookups.
48
49  $isis->open(
50         filename => '/data/ISIS/ISIS',
51         code_page => '852',
52         limit_mfn => 500,
53         start_mfn => 6000,
54         lookup => $lookup_obj,
55  );
56
57 By default, ISIS code page is assumed to be C<852>.
58
59 If optional parametar C<start_mfn> is set, this will be first MFN to read
60 from database (so you can skip beginning of your database if you need to).
61
62 If optional parametar C<limit_mfn> is set, it will read just 500 records
63 from database in example above.
64
65 Returns size of database, regardless of C<start_mfn> and C<limit_mfn>
66 parametars, see also C<$isis->size>.
67
68 =cut
69
70 sub open {
71         my $self = shift;
72         my $arg = {@_};
73
74         my $log = $self->_get_logger();
75
76         $log->logcroak("need filename") if (! $arg->{'filename'});
77         my $code_page = $arg->{'code_page'} || '852';
78
79         $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
80
81         # store data in object
82         $self->{'isis_code_page'} = $code_page;
83         foreach my $v (qw/isis_filename start_mfn limit_mfn/) {
84                 $self->{$v} = $arg->{$v} if ($arg->{$v});
85         }
86
87         # create Text::Iconv object
88         my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
89
90         $log->info("reading ISIS database '",$arg->{'filename'},"'");
91         $log->debug("isis code page: $code_page");
92
93         my ($isis_db,$db_size);
94
95         if ($have_openisis) {
96                 $log->debug("using OpenIsis perl bindings");
97                 $isis_db = OpenIsis::open($arg->{'filename'});
98                 $db_size = OpenIsis::maxRowid( $isis_db ) || 1;
99         } elsif ($have_biblio_isis) {
100                 $log->debug("using Biblio::Isis");
101                 use Biblio::Isis;
102                 $isis_db = new Biblio::Isis(
103                         isisdb => $arg->{'filename'},
104                         include_deleted => 1,
105                         hash_filter => sub {
106                                 my $l = shift || return;
107                                 $l = $cp->convert($l);
108                                 return $l;
109                         },
110                 );
111                 $db_size = $isis_db->count;
112
113                 unless ($db_size) {
114                         $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
115                         return;
116                 }
117
118         } else {
119                 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
120         }
121
122
123         my $startmfn = 1;
124         my $maxmfn = $db_size;
125
126         if (my $s = $self->{'start_mfn'}) {
127                 $log->info("skipping to MFN $s");
128                 $startmfn = $s;
129         } else {
130                 $self->{'start_mfn'} = $startmfn;
131         }
132
133         if ($self->{limit_mfn}) {
134                 $log->info("limiting to ",$self->{limit_mfn}," records");
135                 $maxmfn = $startmfn + $self->{limit_mfn} - 1;
136                 $maxmfn = $db_size if ($maxmfn > $db_size);
137         }
138
139         # store size for later
140         $self->{'size'} = ($maxmfn - $startmfn) ? ($maxmfn - $startmfn + 1) : 0;
141
142         $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
143
144
145         # read database
146         for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
147
148                 $log->debug("mfn: $mfn\n");
149
150                 my $rec;
151
152                 if ($have_openisis) {
153
154                         # read record using OpenIsis
155                         my $row = OpenIsis::read( $isis_db, $mfn );
156                         foreach my $k (keys %{$row}) {
157                                 if ($k ne "mfn") {
158                                         foreach my $l (@{$row->{$k}}) {
159                                                 $l = $cp->convert($l);
160                                                 # has subfields?
161                                                 my $val;
162                                                 if ($l =~ m/\^/) {
163                                                         foreach my $t (split(/\^/,$l)) {
164                                                                 next if (! $t);
165                                                                 $val->{substr($t,0,1)} = substr($t,1);
166                                                         }
167                                                 } else {
168                                                         $val = $l;
169                                                 }
170
171                                                 push @{$rec->{$k}}, $val;
172                                         }
173                                 } else {
174                                         push @{$rec->{'000'}}, $mfn;
175                                 }
176                         }
177
178                 } elsif ($have_biblio_isis) {
179                         $rec = $isis_db->to_hash($mfn);
180                 } else {
181                         $log->logdie("hum? implementation missing?");
182                 }
183
184                 if (! $rec) {
185                         $log->warn("record $mfn empty? skipping...");
186                         next;
187                 }
188
189                 # store
190                 if ($self->{'low_mem'}) {
191                         $self->{'db'}->put($mfn, $rec);
192                 } else {
193                         $self->{'data'}->{$mfn} = $rec;
194                 }
195
196                 # create lookup
197                 $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
198
199                 $self->progress_bar($mfn,$maxmfn);
200
201         }
202
203         $self->{'current_mfn'} = -1;
204         $self->{'last_pcnt'} = 0;
205
206         $log->debug("max mfn: $maxmfn");
207
208         # store max mfn and return it.
209         $self->{'max_mfn'} = $maxmfn;
210
211         return $db_size;
212 }
213
214 =head2 fetch
215
216 Fetch next record from database. It will also displays progress bar.
217
218  my $rec = $isis->fetch;
219
220 Record from this function should probably go to C<data_structure> for
221 normalisation.
222
223 =cut
224
225 sub fetch {
226         my $self = shift;
227
228         my $log = $self->_get_logger();
229
230         $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
231
232         if ($self->{'current_mfn'} == -1) {
233                 $self->{'current_mfn'} = $self->{'start_mfn'};
234         } else {
235                 $self->{'current_mfn'}++;
236         }
237
238         my $mfn = $self->{'current_mfn'};
239
240         if ($mfn > $self->{'max_mfn'}) {
241                 $self->{'current_mfn'} = $self->{'max_mfn'};
242                 $log->debug("at EOF");
243                 return;
244         }
245
246         $self->progress_bar($mfn,$self->{'max_mfn'});
247
248         my $rec;
249
250         if ($self->{'low_mem'}) {
251                 $rec = $self->{'db'}->get($mfn);
252         } else {
253                 $rec = $self->{'data'}->{$mfn};
254         }
255
256         $rec ||= 0E0;
257 }
258
259 =head2 pos
260
261 Returns current record number (MFN).
262
263  print $isis->pos;
264
265 First record in database has position 1.
266
267 =cut
268
269 sub pos {
270         my $self = shift;
271         return $self->{'current_mfn'};
272 }
273
274
275 =head2 size
276
277 Returns number of records in database
278
279  print $isis->size;
280
281 Result from this function can be used to loop through all records
282
283  foreach my $mfn ( 1 ... $isis->size ) { ... }
284
285 because it takes into account C<start_mfn> and C<limit_mfn>.
286
287 =cut
288
289 sub size {
290         my $self = shift;
291         return $self->{'size'};
292 }
293
294 =head2 seek
295
296 Seek to specified MFN in file.
297
298  $isis->seek(42);
299
300 First record in database has position 1.
301
302 =cut
303
304 sub seek {
305         my $self = shift;
306         my $pos = shift || return;
307
308         my $log = $self->_get_logger();
309
310         if ($pos < 1) {
311                 $log->warn("seek before first record");
312                 $pos = 1;
313         } elsif ($pos > $self->{'max_mfn'}) {
314                 $log->warn("seek beyond last record");
315                 $pos = $self->{'max_mfn'};
316         }
317
318         return $self->{'current_mfn'} = (($pos - 1) || -1);
319 }
320
321 =head1 AUTHOR
322
323 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
324
325 =head1 COPYRIGHT & LICENSE
326
327 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
328
329 This program is free software; you can redistribute it and/or modify it
330 under the same terms as Perl itself.
331
332 =cut
333
334 1; # End of WebPAC::Input::ISIS