5 use File::Glob qw(:globally :nocase);
11 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
14 #Give a hoot don't pollute, do not export more than needed by default
23 Biblio::Isis - Read CDS/ISIS, WinISIS and IsisMarc database
29 my $isis = new Biblio::Isis(
30 isisdb => './cds/cds',
33 for(my $mfn = 1; $mfn <= $isis->count; $mfn++) {
34 print $isis->to_ascii($mfn),"\n";
39 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41 seems to depriciate it's old C<XS> bindings for perl.
43 It can create hash values from data in ISIS database (using C<to_hash>),
44 ASCII dump (using C<to_ascii>) or just hash with field names and packed
45 values (like C<^asomething^belse>).
47 Unique feature of this module is ability to C<include_deleted> records.
48 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
49 fields which are zero sized will be filled with random junk from memory).
51 It also has support for identifiers (only if ISIS database is created by
52 IsisMarc), see C<to_hash>.
54 This module will always be slower than OpenIsis module which use C
55 library. However, since it's written in perl, it's platform independent (so
56 you don't need C compiler), and can be easily modified. I hope that it
57 creates data structures which are easier to use than ones created by
58 OpenIsis, so reduced time in other parts of the code should compensate for
59 slower performance of this module (speed of reading ISIS database is
66 # my $ORDN; # Nodes Order
67 # my $ORDF; # Leafs Order
68 # my $N; # Number of Memory buffers for nodes
69 # my $K; # Number of buffers for first level index
70 # my $LIV; # Current number of Index Levels
71 # my $POSRX; # Pointer to Root Record in N0x
72 # my $NMAXPOS; # Next Available position in N0x
73 # my $FMAXPOS; # Next available position in L0x
74 # my $ABNORMAL; # Formal BTree normality indicator
84 my $isis = new Biblio::Isis(
85 isisdb => './cds/cds',
95 Options are described below:
101 This is full or relative path to ISIS database files which include
102 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
103 C<read_fdt> option) files.
105 In this example it uses C<./cds/cds.MST> and related files.
109 Boolean flag to specify if field definition table should be read. It's off
112 =item include_deleted
114 Don't skip logically deleted records in ISIS.
118 Filter code ref which will be used before data is converted to hash.
122 Dump a B<lot> of debugging output.
131 bless($self, $class);
133 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
135 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
136 $self->{$v} = {@_}->{$v};
139 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
141 foreach my $f (@isis_files) {
142 my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
143 $self->{lc($ext)."_file"} = $f;
146 my @must_exist = qw(mst xrf);
147 push @must_exist, "fdt" if ($self->{read_fdt});
149 foreach my $ext (@must_exist) {
150 unless ($self->{$ext."_file"}) {
151 carp "missing ",uc($ext)," file in ",$self->{isisdb};
156 print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
158 # if you want to read .FDT file use read_fdt argument when creating class!
159 if ($self->{read_fdt} && -e $self->{fdt_file}) {
161 # read the $db.FDT file for tags
164 open(my $fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
170 my $name=substr($_,0,30);
171 my $tag=substr($_,50,3);
176 $self->{'TagName'}->{$tag}=$name;
187 # Get the Maximum MFN from $db.MST
189 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
190 binmode($self->{'fileMST'});
192 # MST format: (* = 32 bit signed)
194 # NXTMFN* MFN to be assigned to the next record created
195 # NXTMFB* last block allocated to master file
196 # NXTMFP offset to next available position in last block
197 # MFTYPE always 0 for user db file (1 for system)
198 seek($self->{'fileMST'},4,0) || croak "can't seek to offset 0 in MST: $!";
202 read($self->{'fileMST'}, $buff, 4) || croak "can't read NXTMFN from MST: $!";
203 $self->{'NXTMFN'}=unpack("V",$buff) || croak "NXTNFN is zero";
205 print STDERR Dumper($self),"\n" if ($self->{debug});
207 # open files for later
208 open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
209 binmode($self->{'fileXRF'});
211 $self ? return $self : return undef;
216 Return number of records in database
224 return $self->{'NXTMFN'} - 1;
229 Read record with selected MFN
231 my $rec = $isis->fetch(55);
233 Returns hash with keys which are field names and values are unpacked values
234 for that field like this:
237 '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
238 '990' => [ '2140', '88', 'HAY' ],
246 my $mfn = shift || croak "fetch needs MFN as argument!";
248 # is mfn allready in memory?
249 my $old_mfn = $self->{'current_mfn'} || -1;
250 return $self->{record} if ($mfn == $old_mfn);
252 print STDERR "## fetch: $mfn\n" if ($self->{debug});
255 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
257 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
258 seek($self->{'fileXRF'},$mfnpos,0);
263 delete $self->{record};
265 # read XRFMFB abd XRFMFP
266 read($self->{'fileXRF'}, $buff, 4);
267 my $pointer=unpack("V",$buff);
269 if ($self->{include_deleted}) {
272 warn "pointer for MFN $mfn is null\n";
277 # check for logically deleted record
278 if ($pointer & 0x80000000) {
279 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
280 $self->{deleted} = $mfn;
282 return unless $self->{include_deleted};
285 $pointer = ($pointer ^ 0xffffffff) + 1;
288 my $XRFMFB = int($pointer/2048);
289 my $XRFMFP = $pointer - ($XRFMFB*2048);
291 # (XRFMFB - 1) * 512 + XRFMFP
292 # why do i have to do XRFMFP % 1024 ?
294 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
296 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
298 # Get Record Information
300 seek($self->{'fileMST'},$blk_off,0) || croak "can't seek to $blk_off: $!";
302 read($self->{'fileMST'}, $buff, 4) || croak "can't read 4 bytes at offset $blk_off from MST file: $!";
303 my $value=unpack("V",$buff);
305 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
309 print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
310 $self->{deleted} = $mfn;
314 carp "Error: MFN ".$mfn." not found in MST file, found $value";
318 read($self->{'fileMST'}, $buff, 14);
320 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("vVvvvv", $buff);
322 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
324 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
326 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
328 # Get Directory Format
334 read($self->{'fileMST'}, $buff, 6 * $NVF);
338 for (my $i = 0 ; $i < $NVF ; $i++) {
340 my ($TAG,$POS,$LEN) = unpack("vvv", substr($buff,$i * 6, 6));
342 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
344 # The TAG does not exists in .FDT so we set it to 0.
346 # XXX This is removed from perl version; .FDT file is updated manually, so
347 # you will often have fields in .MST file which aren't in .FDT. On the other
348 # hand, IsisMarc doesn't use .FDT files at all!
350 #if (! $self->{TagName}->{$TAG}) {
361 # Get Variable Fields
363 read($self->{'fileMST'},$buff,$rec_len);
365 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
367 for (my $i = 0 ; $i < $NVF ; $i++) {
368 # skip zero-sized fields
369 next if ($FieldLEN[$i] == 0);
371 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
374 $self->{'current_mfn'} = $mfn;
376 print STDERR Dumper($self),"\n" if ($self->{debug});
378 return $self->{'record'};
383 Returns ASCII output of record with specified MFN
385 print $isis->to_ascii(42);
387 This outputs something like this:
389 210 ^aNew York^cNew York University press^dcop. 1988
394 If C<read_fdt> is specified when calling C<new> it will display field names
395 from C<.FDT> file instead of numeric tags.
402 my $mfn = shift || croak "need MFN";
404 my $rec = $self->fetch($mfn) || return;
408 foreach my $f (sort keys %{$rec}) {
409 my $fn = $self->tag_name($f);
410 $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
420 Read record with specified MFN and convert it to hash
422 my $hash = $isis->to_hash($mfn);
424 It has ability to convert characters (using C<hash_filter>) from ISIS
425 database before creating structures enabling character re-mapping or quick
428 This function returns hash which is like this:
433 'c' => 'New York University press',
445 You can later use that hash to produce any output from ISIS data.
447 If database is created using IsisMarc, it will also have to special fields
448 which will be used for identifiers, C<i1> and C<i2> like this:
455 'f' => 'Valdo D\'Arienzo',
456 'e' => 'tipografie e tipografi nel XVI secolo',
460 This method will also create additional field C<000> with MFN.
467 my $mfn = shift || confess "need mfn!";
469 # init record to include MFN as field 000
470 my $rec = { '000' => [ $mfn ] };
472 my $row = $self->fetch($mfn) || return;
474 foreach my $k (keys %{$row}) {
475 foreach my $l (@{$row->{$k}}) {
478 if ($self->{'hash_filter'}) {
479 $l = $self->{'hash_filter'}->($l);
480 next unless defined($l);
486 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
490 foreach my $t (split(/\^/,$l)) {
492 $val->{substr($t,0,1)} = substr($t,1);
498 push @{$rec->{$k}}, $val;
507 Return name of selected tag
509 print $isis->tag_name('200');
515 my $tag = shift || return;
516 return $self->{'TagName'}->{$tag} || $tag;
522 Read content of C<.CNT> file and return hash containing it.
524 print Dumper($isis->read_cnt);
526 This function is not used by module (C<.CNT> files are not required for this
527 module to work), but it can be useful to examine your index (while debugging
535 croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
537 # Get the index information from $db.CNT
539 open(my $fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
544 read($fileCNT, $buff, 26) || croak "can't read first table from CNT: $!";
545 $self->unpack_cnt($buff);
547 read($fileCNT, $buff, 26) || croak "can't read second table from CNT: $!";
548 $self->unpack_cnt($buff);
557 Unpack one of two 26 bytes fixed length record in C<.CNT> file.
559 Here is definition of record:
561 off key description size
562 0: IDTYPE BTree type s
563 2: ORDN Nodes Order s
564 4: ORDF Leafs Order s
565 6: N Number of Memory buffers for nodes s
566 8: K Number of buffers for first level index s
567 10: LIV Current number of Index Levels s
568 12: POSRX Pointer to Root Record in N0x l
569 16: NMAXPOS Next Available position in N0x l
570 20: FMAXPOS Next available position in L0x l
571 24: ABNORMAL Formal BTree normality indicator s
574 This will fill C<$self> object under C<cnt> with hash. It's used by C<read_cnt>.
581 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
583 my $buff = shift || return;
584 my @arr = unpack("vvvvvvVVVv", $buff);
586 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
588 my $IDTYPE = shift @arr;
590 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
598 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
599 some variations in input databases which has been tested with this module.
600 When I was in doubt, I assumed that OpenIsis's implementation was right
601 (except for obvious bugs).
603 However, every effort has been made to test this module with as much
604 databases (and programs that create them) as possible.
606 I would be very greatful for success or failure reports about usage of this
607 module with databases from programs other than WinIsis and IsisMarc. I had
608 tested this against ouput of one C<isis.dll>-based application, but I don't
609 know any details about it's version.
616 http://www.rot13.org/~dpavlin/
618 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
619 written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
623 This program is free software; you can redistribute
624 it and/or modify it under the same terms as Perl itself.
626 The full text of the license can be found in the
627 LICENSE file included with this module.
632 OpenIsis web site L<http://www.openisis.org>
634 perl4lib site L<http://perl4lib.perl.org>