9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12 #Give a hoot don't pollute, do not export more than needed by default
21 IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
27 my $isis = new IsisDB(
28 isisdb => './cds/cds',
31 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32 print $isis->to_ascii($mfn),"\n";
37 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
38 IsisMarc. It can be used as perl-only alternative to OpenIsis module.
40 It can create hash values from data in ISIS database (using C<to_hash>),
41 ASCII dump (using C<to_ascii>) or just hash with field names and packed
42 values (like C<^asomething^belse>).
44 Unique feature of this module is ability to C<include_deleted> records.
45 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46 fields which are zero sized will be filled with random junk from memory).
48 It also has support for identifiers (only if ISIS database is created by
49 IsisMarc), see C<to_hash>.
51 This will module will always be slower than OpenIsis module which use C
52 library. However, since it's written in perl, it's platform independent (so
53 you don't need C compiler), and can be easily modified. I hope that it
54 creates data structures which are easier to use than ones created by
55 OpenIsis, so reduced time in other parts of the code should compensate for
56 slower performance of this module (speed of reading ISIS database is
63 # my $ORDN; # Nodes Order
64 # my $ORDF; # Leafs Order
65 # my $N; # Number of Memory buffers for nodes
66 # my $K; # Number of buffers for first level index
67 # my $LIV; # Current number of Index Levels
68 # my $POSRX; # Pointer to Root Record in N0x
69 # my $NMAXPOS; # Next Available position in N0x
70 # my $FMAXPOS; # Next available position in L0x
71 # my $ABNORMAL; # Formal BTree normality indicator
81 my $isis = new IsisDB(
82 isisdb => './cds/cds',
92 Options are described below:
98 This is full or relative path to ISIS database files which include
99 common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
101 In this example it uses C<./cds/cds.MST> and related files.
105 Boolean flag to specify if field definition table should be read. It's off
108 =item include_deleted
110 Don't skip logically deleted records in ISIS.
114 Filter code ref which will be used before data is converted to hash.
118 Dump a B<lot> of debugging output.
122 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
129 bless($self, $class);
131 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
133 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
134 $self->{$v} = {@_}->{$v};
137 # if you want to read .FDT file use read_fdt argument when creating class!
138 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
140 # read the $db.FDT file for tags
143 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
148 my $name=substr($_,0,30);
149 my $tag=substr($_,50,3);
154 $self->{'TagName'}->{$tag}=$name;
165 # Get the Maximum MFN from $db.MST
167 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
169 # MST format: (* = 32 bit signed)
171 # NXTMFN* MFN to be assigned to the next record created
172 # NXTMFB* last block allocated to master file
173 # NXTMFP offset to next available position in last block
174 # MFTYPE always 0 for user db file (1 for system)
179 read(fileMST, $buff, 4);
180 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
183 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
187 # Get the index information from $db.CNT
189 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
191 # There is two 26 Bytes fixed lenght records
193 # 0: IDTYPE BTree type 16
194 # 2: ORDN Nodes Order 16
195 # 4: ORDF Leafs Order 16
196 # 6: N Number of Memory buffers for nodes 16
197 # 8: K Number of buffers for first level index 16
198 # 10: LIV Current number of Index Levels 16
199 # 12: POSRX* Pointer to Root Record in N0x 32
200 # 16: NMAXPOS* Next Available position in N0x 32
201 # 20: FMAXPOS* Next available position in L0x 32
202 # 24: ABNORMAL Formal BTree normality indicator 16
208 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
210 my $buff = shift || return;
211 my @arr = unpack("ssssssllls", $buff);
213 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
215 my $IDTYPE = shift @arr;
217 $self->{$IDTYPE}->{$_} = abs(shift @arr);
221 read(fileCNT, $buff, 26);
222 $self->unpack_cnt($buff);
224 read(fileCNT, $buff, 26);
225 $self->unpack_cnt($buff);
230 print STDERR Dumper($self),"\n" if ($self->{debug});
232 # open files for later
233 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
235 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
237 $self ? return $self : return undef;
242 Read record with selected MFN
244 my $rec = $isis->fetch(55);
246 Returns hash with keys which are field names and values are unpacked values
247 for that field like this:
250 '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
251 '990' => [ '2140', '88', 'HAY' ],
259 my $mfn = shift || croak "fetch needs MFN as argument!";
261 # is mfn allready in memory?
262 my $old_mfn = $self->{'current_mfn'} || -1;
263 return if ($mfn == $old_mfn);
265 print STDERR "## fetch: $mfn\n" if ($self->{debug});
268 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
270 print STDERR "## seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
271 seek($self->{'fileXRF'},$mfnpos,0);
275 # read XRFMFB abd XRFMFP
276 read($self->{'fileXRF'}, $buff, 4);
277 my $pointer=unpack("l",$buff) || carp "pointer is null";
279 my $XRFMFB = int($pointer/2048);
280 my $XRFMFP = $pointer - ($XRFMFB*2048);
283 # (XRFMFB - 1) * 512 + XRFMFP
284 # why do i have to do XRFMFP % 1024 ?
286 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);
288 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
290 # Get Record Information
292 seek($self->{'fileMST'},$blk_off,0);
294 read($self->{'fileMST'}, $buff, 4);
295 my $value=unpack("l",$buff);
297 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
300 carp "Error: MFN ".$mfn." not found in MST(".$value.")";
301 #return; # XXX deleted record?
304 # $MFRL=$self->Read16($fileMST);
305 # $MFBWB=$self->Read32($fileMST);
306 # $MFBWP=$self->Read16($fileMST);
307 # $BASE=$self->Read16($fileMST);
308 # $NVF=$self->Read16($fileMST);
309 # $STATUS=$self->Read16($fileMST);
311 read($self->{'fileMST'}, $buff, 14);
313 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
315 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
318 delete $self->{record};
320 ## FIXME this is a bug
321 if (! $self->{'include_deleted'} && $MFRL < 0) {
322 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
326 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
328 # Get Directory Format
334 read($self->{'fileMST'}, $buff, 6 * $NVF);
338 for (my $i = 0 ; $i < $NVF ; $i++) {
340 # $TAG=$self->Read16($fileMST);
341 # $POS=$self->Read16($fileMST);
342 # $LEN=$self->Read16($fileMST);
344 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
346 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
348 # The TAG does not exists in .FDT so we set it to 0.
350 # XXX This is removed from perl version; .FDT file is updated manually, so
351 # you will often have fields in .MST file which aren't in .FDT. On the other
352 # hand, IsisMarc doesn't use .FDT files at all!
354 #if (! $self->{TagName}->{$TAG}) {
365 # Get Variable Fields
367 read($self->{'fileMST'},$buff,$rec_len);
369 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
371 for (my $i = 0 ; $i < $NVF ; $i++) {
372 # skip zero-sized fields
373 next if ($FieldLEN[$i] == 0);
375 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
379 $self->{'current_mfn'} = $mfn;
381 print Dumper($self),"\n" if ($self->{debug});
383 return $self->{'record'};
388 Dump ASCII output of record with specified MFN
390 print $isis->to_ascii(42);
392 It outputs something like this:
394 210 ^aNew York^cNew York University press^dcop. 1988
399 If C<read_fdt> is specified when calling C<new> it will display field names
400 from C<.FDT> file instead of numeric tags.
407 my $mfn = shift || croak "need MFN";
409 my $rec = $self->fetch($mfn);
413 foreach my $f (sort keys %{$rec}) {
414 my $fn = $self->tag_name($f);
415 $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
425 Read record with specified MFN and convert it to hash
427 my $hash = $isis->to_hash($mfn);
429 It has ability to convert characters (using C<hash_filter> from ISIS
430 database before creating structures enabling character re-mapping or quick
433 This function returns hash which is like this:
438 'c' => 'New York University press',
450 You can later use that hash to produce any output from ISIS data.
452 If database is created using IsisMarc, it will also have to special fields
453 which will be used for identifiers, C<i1> and C<i2> like this:
460 'f' => 'Valdo D\'Arienzo',
461 'e' => 'tipografie e tipografi nel XVI secolo',
465 This method will also create additional field C<000> with MFN.
472 my $mfn = shift || confess "need mfn!";
474 # init record to include MFN as field 000
475 my $rec = { '000' => [ $mfn ] };
477 my $row = $self->fetch($mfn);
479 foreach my $k (keys %{$row}) {
480 foreach my $l (@{$row->{$k}}) {
483 $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
488 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);
492 foreach my $t (split(/\^/,$l)) {
494 $val->{substr($t,0,1)} = substr($t,1);
500 push @{$rec->{$k}}, $val;
509 Return name of selected tag
511 print $isis->tag_name('200');
517 my $tag = shift || return;
518 return $self->{'TagName'}->{$tag} || $tag;
525 This module has been very lightly tested. Use with caution and report bugs.
532 http://www.rot13.org/~dpavlin/
534 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
535 written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
539 This program is free software; you can redistribute
540 it and/or modify it under the same terms as Perl itself.
542 The full text of the license can be found in the
543 LICENSE file included with this module.
548 OpenIsis web site L<http://www.openisis.org>
550 perl4lib site L<http://perl4lib.perl.org>