little cleanup and first cut into WebPAC::Normalize::XML
[webpac2] / lib / WebPAC / Input / ISIS.pm
1 package WebPAC::Input::ISIS;
2
3 use warnings;
4 use strict;
5
6 use WebPAC::Common;
7 use base qw/WebPAC::Input WebPAC::Common/;
8
9 =head1 NAME
10
11 WebPAC::Input::ISIS - support for CDS/ISIS source files
12
13 =head1 VERSION
14
15 Version 0.01
16
17 =cut
18
19 our $VERSION = '0.01';
20
21
22 # auto-configure
23
24 my ($have_biblio_isis, $have_openisis) = (0,0);
25
26 eval "use Biblio::Isis 0.13;";
27 unless ($@) { 
28         $have_biblio_isis = 1
29 } else {
30         eval "use OpenIsis;";
31         $have_openisis = 1 unless ($@);
32 }
33
34 =head1 SYNOPSIS
35
36 Open CDS/ISIS, WinISIS or IsisMarc database using Biblio::Isis or OpenIsis
37 module and read all records to memory.
38
39  my $isis = new WebPAC::Input::ISIS();
40  $isis->open( filename => '/path/to/ISIS/ISIS' );
41
42 =head1 FUNCTIONS
43
44 =head2 open
45
46 This function will read whole database in memory and produce lookups.
47
48  $isis->open(
49         filename => '/data/ISIS/ISIS',
50         code_page => '852',
51         limit_mfn => 500,
52         start_mfn => 6000,
53         lookup => $lookup_obj,
54  );
55
56 By default, ISIS code page is assumed to be C<852>.
57
58 If optional parametar C<start_mfn> is set, this will be first MFN to read
59 from database (so you can skip beginning of your database if you need to).
60
61 If optional parametar C<limit_mfn> is set, it will read just 500 records
62 from database in example above.
63
64 Returns number of last record read into memory (size of database, really).
65
66 =cut
67
68 sub open {
69         my $self = shift;
70         my $arg = {@_};
71
72         my $log = $self->_get_logger();
73
74         $log->logcroak("need filename") if (! $arg->{'filename'});
75         my $code_page = $arg->{'code_page'} || '852';
76
77         $log->logdie("can't find database ",$arg->{'filename'}) unless (glob($arg->{'filename'}.'.*'));
78
79         # store data in object
80         $self->{'isis_filename'} = $arg->{'filename'};
81         $self->{'isis_code_page'} = $code_page;
82
83         #$self->{'isis_code_page'} = $code_page;
84
85         # create Text::Iconv object
86         my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
87
88         $log->info("reading ISIS database '",$arg->{'filename'},"'");
89         $log->debug("isis code page: $code_page");
90
91         my ($isis_db,$maxmfn);
92
93         if ($have_openisis) {
94                 $log->debug("using OpenIsis perl bindings");
95                 $isis_db = OpenIsis::open($arg->{'filename'});
96                 $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;
97         } elsif ($have_biblio_isis) {
98                 $log->debug("using Biblio::Isis");
99                 use Biblio::Isis;
100                 $isis_db = new Biblio::Isis(
101                         isisdb => $arg->{'filename'},
102                         include_deleted => 1,
103                         hash_filter => sub {
104                                 my $l = shift || return;
105                                 $l = $cp->convert($l);
106                                 return $l;
107                         },
108                 );
109                 $maxmfn = $isis_db->count;
110
111                 unless ($maxmfn) {
112                         $log->logwarn("no records in database ", $arg->{'filename'}, ", skipping...");
113                         return;
114                 }
115
116         } else {
117                 $log->logdie("Can't find supported ISIS library for perl. I suggent that you install Bilbio::Isis from CPAN.");
118         }
119
120
121         my $startmfn = 1;
122
123         if (my $s = $self->{'start_mfn'}) {
124                 $log->info("skipping to MFN $s");
125                 $startmfn = $s;
126         } else {
127                 $self->{'start_mfn'} = $startmfn;
128         }
129
130         $maxmfn = $startmfn + $self->{limit_mfn} if ($self->{limit_mfn});
131
132         $log->info("processing ",($maxmfn-$startmfn)." records using ",( $have_openisis ? 'OpenIsis' : 'Biblio::Isis'));
133
134
135         # read database
136         for (my $mfn = $startmfn; $mfn <= $maxmfn; $mfn++) {
137
138                 $log->debug("mfn: $mfn\n");
139
140                 my $rec;
141
142                 if ($have_openisis) {
143
144                         # read record using OpenIsis
145                         my $row = OpenIsis::read( $isis_db, $mfn );
146                         foreach my $k (keys %{$row}) {
147                                 if ($k ne "mfn") {
148                                         foreach my $l (@{$row->{$k}}) {
149                                                 $l = $cp->convert($l);
150                                                 # has subfields?
151                                                 my $val;
152                                                 if ($l =~ m/\^/) {
153                                                         foreach my $t (split(/\^/,$l)) {
154                                                                 next if (! $t);
155                                                                 $val->{substr($t,0,1)} = substr($t,1);
156                                                         }
157                                                 } else {
158                                                         $val = $l;
159                                                 }
160
161                                                 push @{$rec->{$k}}, $val;
162                                         }
163                                 } else {
164                                         push @{$rec->{'000'}}, $mfn;
165                                 }
166                         }
167
168                 } elsif ($have_biblio_isis) {
169                         $rec = $isis_db->to_hash($mfn);
170                 } else {
171                         $log->logdie("hum? implementation missing?");
172                 }
173
174                 $log->confess("record $mfn empty?") unless ($rec);
175
176                 # store
177                 if ($self->{'low_mem'}) {
178                         $self->{'db'}->put($mfn, $rec);
179                 } else {
180                         $self->{'data'}->{$mfn} = $rec;
181                 }
182
183                 # create lookup
184                 $self->{'lookup'}->add( $rec ) if ($self->{'lookup'} && can($self->{'lookup'}->add));
185
186                 $self->progress_bar($mfn,$maxmfn);
187
188         }
189
190         $self->{'current_mfn'} = -1;
191         $self->{'last_pcnt'} = 0;
192
193         $log->debug("max mfn: $maxmfn");
194
195         # store max mfn and return it.
196         return $self->{'max_mfn'} = $maxmfn;
197 }
198
199 =head2 fetch_rec
200
201 Fetch next record from database. It will also displays progress bar.
202
203  my $rec = $webpac->fetch_rec;
204
205 You should rearly have the need to call this function directly. Instead use
206 C<fetch_data_structure> which returns normalised data.
207
208 =cut
209
210 sub fetch_rec {
211         my $self = shift;
212
213         my $log = $self->_get_logger();
214
215         $log->logconfess("it seems that you didn't load database!") unless ($self->{'current_mfn'});
216
217         if ($self->{'current_mfn'} == -1) {
218                 $self->{'current_mfn'} = $self->{'start_mfn'};
219         } else {
220                 $self->{'current_mfn'}++;
221         }
222
223         my $mfn = $self->{'current_mfn'};
224
225         if ($mfn > $self->{'max_mfn'}) {
226                 $self->{'current_mfn'} = $self->{'max_mfn'};
227                 $log->debug("at EOF");
228                 return;
229         }
230
231         $self->progress_bar($mfn,$self->{'max_mfn'});
232
233         if ($self->{'low_mem'}) {
234                 return $self->{'db'}->get($mfn);
235         } else {
236                 return $self->{'data'}->{$mfn};
237         }
238 }
239
240 =head2 fetch_data_structure
241
242 Fetch data structure of next record from database.
243
244  my @ds = $webpac->fetch_data_structure;
245
246 =cut
247
248 sub fetch_data_structure {
249         my $self = shift;
250
251         return $self->data_structure(
252                 $self->fetch_rec(@_)
253         );
254 }
255
256 =head2 mfn
257
258 Returns current record number (MFN).
259
260  print $webpac->mfn;
261
262 =cut
263
264 sub mfn {
265         my $self = shift;
266         return $self->{'current_mfn'};
267 }
268
269 =head1 AUTHOR
270
271 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
272
273 =head1 COPYRIGHT & LICENSE
274
275 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
276
277 This program is free software; you can redistribute it and/or modify it
278 under the same terms as Perl itself.
279
280 =cut
281
282 1; # End of WebPAC::Input::ISIS