a bit more work on WebPAC::Input::ISIS
[webpac2] / lib / WebPAC / Input / ISIS.pm
1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8 use Text::Iconv;
9
10 =head1 NAME
11
12 WebPAC::Input::ISIS - support for CDS/ISIS source files
13
14 =head1 VERSION
15
16 Version 0.01
17
18 =cut
19
20 our $VERSION = '0.01';
21
22
23 # auto-configure
24
25 my ($have_biblio_isis, $have_openisis) = (0,0);
26
27 eval "use Biblio::Isis 0.13;";
28 unless ($@) { 
29         $have_biblio_isis = 1
30 } else {
31         eval "use OpenIsis;";
32         $have_openisis = 1 unless ($@);
33 }
34
35 =head1 SYNOPSIS
36
37 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
38 module and read all records to memory.
39
40  my $isis = new WebPAC::Input::ISIS();
41  $isis->open( filename => '/path/to/ISIS/ISIS' );
42
43 =head1 FUNCTIONS
44
45 =head2 open
46
47 This function will read whole database in memory and produce lookups.
48
49  $isis->open(
50         filename => '/data/ISIS/ISIS',
51         code_page => '852',
52         limit_mfn => 500,
53         start_mfn => 6000,
54         lookup => $lookup_obj,
55  );
56
57 By default, ISIS code page is assumed to be C<852>.
58
59 If optional parametar C<start_mfn> is set, this will be first MFN to read
60 from database (so you can skip beginning of your database if you need to).
61
62 If optional parametar C<limit_mfn> is set, it will read just 500 records
63 from database in example above.
64
65 Returns number of last record read into memory (size of database, really).
66
67 =cut
68
69 sub open {
70         my $self = shift;
71         my $arg = {@_};
72
73         my $log = $self->_get_logger();
74
75         $log->logcroak("need filename") if (! $arg->{'filename'});
76         my $code_page = $arg->{'code_page'} || '852';
77
78         $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
79
80         # store data in object
81         $self->{'isis_filename'} = $arg->{'filename'};
82         $self->{'isis_code_page'} = $code_page;
83
84         #$self->{'isis_code_page'} = $code_page;
85
86         # create Text::Iconv object
87         my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
88
89         $log->info("reading ISIS database '",$arg->{'filename'},"'");
90         $log->debug("isis code page: $code_page");
91
92         my ($isis_db,$maxmfn);
93
94         if ($have_openisis) {
95                 $log->debug("using OpenIsis perl bindings");
96                 $isis_db = OpenIsis::open($arg->{'filename'});
97                 $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
98         } elsif ($have_biblio_isis) {
99                 $log->debug("using Biblio::Isis");
100                 use Biblio::Isis;
101                 $isis_db = new Biblio::Isis(
102                         isisdb => $arg->{'filename'},
103                         include_deleted => 1,
104                         hash_filter => sub {
105                                 my $l = shift || return;
106                                 $l = $cp->convert($l);
107                                 return $l;
108                         },
109                 );
110                 $maxmfn = $isis_db->count;
111
112                 unless ($maxmfn) {
113                         $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
114                         return;
115                 }
116
117         } else {
118                 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
119         }
120
121
122         my $startmfn = 1;
123
124         if (my $s = $self->{'start_mfn'}) {
125                 $log->info("skipping to MFN $s");
126                 $startmfn = $s;
127         } else {
128                 $self->{'start_mfn'} = $startmfn;
129         }
130
131         $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
132
133         $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
134
135
136         # read database
137         for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
138
139                 $log->debug("mfn: $mfn\n");
140
141                 my $rec;
142
143                 if ($have_openisis) {
144
145                         # read record using OpenIsis
146                         my $row = OpenIsis::read( $isis_db, $mfn );
147                         foreach my $k (keys %{$row}) {
148                                 if ($k ne "mfn") {
149                                         foreach my $l (@{$row->{$k}}) {
150                                                 $l = $cp->convert($l);
151                                                 # has subfields?
152                                                 my $val;
153                                                 if ($l =~ m/\^/) {
154                                                         foreach my $t (split(/\^/,$l)) {
155                                                                 next if (! $t);
156                                                                 $val->{substr($t,0,1)} = substr($t,1);
157                                                         }
158                                                 } else {
159                                                         $val = $l;
160                                                 }
161
162                                                 push @{$rec->{$k}}, $val;
163                                         }
164                                 } else {
165                                         push @{$rec->{'000'}}, $mfn;
166                                 }
167                         }
168
169                 } elsif ($have_biblio_isis) {
170                         $rec = $isis_db->to_hash($mfn);
171                 } else {
172                         $log->logdie("hum? implementation missing?");
173                 }
174
175                 $log->confess("record $mfn empty?") unless ($rec);
176
177                 # store
178                 if ($self->{'low_mem'}) {
179                         $self->{'db'}->put($mfn, $rec);
180                 } else {
181                         $self->{'data'}->{$mfn} = $rec;
182                 }
183
184                 # create lookup
185                 $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
186
187                 $self->progress_bar($mfn,$maxmfn);
188
189         }
190
191         $self->{'current_mfn'} = -1;
192         $self->{'last_pcnt'} = 0;
193
194         $log->debug("max mfn: $maxmfn");
195
196         # store max mfn and return it.
197         return $self->{'max_mfn'} = $maxmfn;
198 }
199
200 =head2 fetch_rec
201
202 Fetch next record from database. It will also displays progress bar.
203
204  my $rec = $webpac->fetch_rec;
205
206 You should rearly have the need to call this function directly. Instead use
207 C<fetch_data_structure> which returns normalised data.
208
209 =cut
210
211 sub fetch_rec {
212         my $self = shift;
213
214         my $log = $self->_get_logger();
215
216         $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
217
218         if ($self->{'current_mfn'} == -1) {
219                 $self->{'current_mfn'} = $self->{'start_mfn'};
220         } else {
221                 $self->{'current_mfn'}++;
222         }
223
224         my $mfn = $self->{'current_mfn'};
225
226         if ($mfn > $self->{'max_mfn'}) {
227                 $self->{'current_mfn'} = $self->{'max_mfn'};
228                 $log->debug("at EOF");
229                 return;
230         }
231
232         $self->progress_bar($mfn,$self->{'max_mfn'});
233
234         if ($self->{'low_mem'}) {
235                 return $self->{'db'}->get($mfn);
236         } else {
237                 return $self->{'data'}->{$mfn};
238         }
239 }
240
241 =head2 fetch_data_structure
242
243 Fetch data structure of next record from database.
244
245  my @ds = $webpac->fetch_data_structure;
246
247 =cut
248
249 sub fetch_data_structure {
250         my $self = shift;
251
252         return $self->data_structure(
253                 $self->fetch_rec(@_)
254         );
255 }
256
257 =head2 mfn
258
259 Returns current record number (MFN).
260
261  print $webpac->mfn;
262
263 =cut
264
265 sub mfn {
266         my $self = shift;
267         return $self->{'current_mfn'};
268 }
269
270 =head1 AUTHOR
271
272 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
273
274 =head1 COPYRIGHT & LICENSE
275
276 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
277
278 This program is free software; you can redistribute it and/or modify it
279 under the same terms as Perl itself.
280
281 =cut
282
283 1; # End of WebPAC::Input::ISIS