5 use File::Glob qw(:globally :nocase);
11 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
14 #Give a hoot don't pollute, do not export more than needed by default
23 IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
29 my $isis = new IsisDB(
30 isisdb => './cds/cds',
33 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
34 print $isis->to_ascii($mfn),"\n";
39 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40 IsisMarc. It can be used as perl-only alternative to OpenIsis module which
41 seems to depriciate it's old C<XS> bindings for perl.
43 It can create hash values from data in ISIS database (using C<to_hash>),
44 ASCII dump (using C<to_ascii>) or just hash with field names and packed
45 values (like C<^asomething^belse>).
47 Unique feature of this module is ability to C<include_deleted> records.
48 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
49 fields which are zero sized will be filled with random junk from memory).
51 It also has support for identifiers (only if ISIS database is created by
52 IsisMarc), see C<to_hash>.
54 This module will always be slower than OpenIsis module which use C
55 library. However, since it's written in perl, it's platform independent (so
56 you don't need C compiler), and can be easily modified. I hope that it
57 creates data structures which are easier to use than ones created by
58 OpenIsis, so reduced time in other parts of the code should compensate for
59 slower performance of this module (speed of reading ISIS database is
66 # my $ORDN; # Nodes Order
67 # my $ORDF; # Leafs Order
68 # my $N; # Number of Memory buffers for nodes
69 # my $K; # Number of buffers for first level index
70 # my $LIV; # Current number of Index Levels
71 # my $POSRX; # Pointer to Root Record in N0x
72 # my $NMAXPOS; # Next Available position in N0x
73 # my $FMAXPOS; # Next available position in L0x
74 # my $ABNORMAL; # Formal BTree normality indicator
84 my $isis = new IsisDB(
85 isisdb => './cds/cds',
95 Options are described below:
101 This is full or relative path to ISIS database files which include
102 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
103 C<read_fdt> option) files.
105 In this example it uses C<./cds/cds.MST> and related files.
109 Boolean flag to specify if field definition table should be read. It's off
112 =item include_deleted
114 Don't skip logically deleted records in ISIS.
118 Filter code ref which will be used before data is converted to hash.
122 Dump a B<lot> of debugging output.
126 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
133 bless($self, $class);
135 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
137 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
138 $self->{$v} = {@_}->{$v};
141 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
143 foreach my $f (@isis_files) {
144 my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
145 $self->{lc($ext)."_file"} = $f;
148 my @must_exist = qw(mst xrf);
149 push @must_exist, "fdt" if ($self->{read_fdt});
151 foreach my $ext (@must_exist) {
152 croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
155 print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
157 # if you want to read .FDT file use read_fdt argument when creating class!
158 if ($self->{read_fdt} && -e $self->{fdt_file}) {
160 # read the $db.FDT file for tags
163 open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
168 my $name=substr($_,0,30);
169 my $tag=substr($_,50,3);
174 $self->{'TagName'}->{$tag}=$name;
185 # Get the Maximum MFN from $db.MST
187 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
189 # MST format: (* = 32 bit signed)
191 # NXTMFN* MFN to be assigned to the next record created
192 # NXTMFB* last block allocated to master file
193 # NXTMFP offset to next available position in last block
194 # MFTYPE always 0 for user db file (1 for system)
195 seek($self->{'fileMST'},4,0);
199 read($self->{'fileMST'}, $buff, 4);
200 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
203 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
208 print STDERR Dumper($self),"\n" if ($self->{debug});
210 # open files for later
211 open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
213 $self ? return $self : return undef;
218 Read content of C<.CNT> file and return hash containing it.
220 print Dumper($isis->read_cnt);
222 This function is not used by module (C<.CNT> files are not required for this
223 module to work), but it can be useful to examine your index (while debugging
231 croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
233 # Get the index information from $db.CNT
235 open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
237 # There is two 26 Bytes fixed lenght records
239 # 0: IDTYPE BTree type 16
240 # 2: ORDN Nodes Order 16
241 # 4: ORDF Leafs Order 16
242 # 6: N Number of Memory buffers for nodes 16
243 # 8: K Number of buffers for first level index 16
244 # 10: LIV Current number of Index Levels 16
245 # 12: POSRX* Pointer to Root Record in N0x 32
246 # 16: NMAXPOS* Next Available position in N0x 32
247 # 20: FMAXPOS* Next available position in L0x 32
248 # 24: ABNORMAL Formal BTree normality indicator 16
254 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
256 my $buff = shift || return;
257 my @arr = unpack("ssssssllls", $buff);
259 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
261 my $IDTYPE = shift @arr;
263 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
269 read(fileCNT, $buff, 26);
270 $self->unpack_cnt($buff);
272 read(fileCNT, $buff, 26);
273 $self->unpack_cnt($buff);
282 Read record with selected MFN
284 my $rec = $isis->fetch(55);
286 Returns hash with keys which are field names and values are unpacked values
287 for that field like this:
290 '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
291 '990' => [ '2140', '88', 'HAY' ],
299 my $mfn = shift || croak "fetch needs MFN as argument!";
301 # is mfn allready in memory?
302 my $old_mfn = $self->{'current_mfn'} || -1;
303 return $self->{record} if ($mfn == $old_mfn);
305 print STDERR "## fetch: $mfn\n" if ($self->{debug});
308 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
310 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
311 seek($self->{'fileXRF'},$mfnpos,0);
316 delete $self->{record};
318 # read XRFMFB abd XRFMFP
319 read($self->{'fileXRF'}, $buff, 4);
320 my $pointer=unpack("l",$buff) || carp "pointer is null";
322 # check for logically deleted record
324 print STDERR "## record $mfn is logically deleted\n" if ($self->{debug});
325 $self->{deleted} = $mfn;
327 return unless $self->{include_deleted};
329 $pointer = abs($pointer);
332 my $XRFMFB = int($pointer/2048);
333 my $XRFMFP = $pointer - ($XRFMFB*2048);
335 # (XRFMFB - 1) * 512 + XRFMFP
336 # why do i have to do XRFMFP % 1024 ?
338 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 512);
340 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
342 # Get Record Information
344 seek($self->{'fileMST'},$blk_off,0);
346 read($self->{'fileMST'}, $buff, 4);
347 my $value=unpack("l",$buff);
349 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
353 print STDERR "## record $mfn is physically deleted\n" if ($self->{debug});
354 $self->{deleted} = $mfn;
358 carp "Error: MFN ".$mfn." not found in MST file, found $value";
362 read($self->{'fileMST'}, $buff, 14);
364 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
366 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
368 warn "MFRL $MFRL is not even number" unless ($MFRL % 2 == 0);
370 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
372 # Get Directory Format
378 read($self->{'fileMST'}, $buff, 6 * $NVF);
382 for (my $i = 0 ; $i < $NVF ; $i++) {
384 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
386 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
388 # The TAG does not exists in .FDT so we set it to 0.
390 # XXX This is removed from perl version; .FDT file is updated manually, so
391 # you will often have fields in .MST file which aren't in .FDT. On the other
392 # hand, IsisMarc doesn't use .FDT files at all!
394 #if (! $self->{TagName}->{$TAG}) {
405 # Get Variable Fields
407 read($self->{'fileMST'},$buff,$rec_len);
409 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
411 for (my $i = 0 ; $i < $NVF ; $i++) {
412 # skip zero-sized fields
413 next if ($FieldLEN[$i] == 0);
415 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
418 $self->{'current_mfn'} = $mfn;
420 print STDERR Dumper($self),"\n" if ($self->{debug});
422 return $self->{'record'};
427 Returns ASCII output of record with specified MFN
429 print $isis->to_ascii(42);
431 This outputs something like this:
433 210 ^aNew York^cNew York University press^dcop. 1988
438 If C<read_fdt> is specified when calling C<new> it will display field names
439 from C<.FDT> file instead of numeric tags.
446 my $mfn = shift || croak "need MFN";
448 my $rec = $self->fetch($mfn);
452 foreach my $f (sort keys %{$rec}) {
453 my $fn = $self->tag_name($f);
454 $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
464 Read record with specified MFN and convert it to hash
466 my $hash = $isis->to_hash($mfn);
468 It has ability to convert characters (using C<hash_filter>) from ISIS
469 database before creating structures enabling character re-mapping or quick
472 This function returns hash which is like this:
477 'c' => 'New York University press',
489 You can later use that hash to produce any output from ISIS data.
491 If database is created using IsisMarc, it will also have to special fields
492 which will be used for identifiers, C<i1> and C<i2> like this:
499 'f' => 'Valdo D\'Arienzo',
500 'e' => 'tipografie e tipografi nel XVI secolo',
504 This method will also create additional field C<000> with MFN.
511 my $mfn = shift || confess "need mfn!";
513 # init record to include MFN as field 000
514 my $rec = { '000' => [ $mfn ] };
516 my $row = $self->fetch($mfn);
518 foreach my $k (keys %{$row}) {
519 foreach my $l (@{$row->{$k}}) {
522 $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
527 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])\^/\^/);
531 foreach my $t (split(/\^/,$l)) {
533 $val->{substr($t,0,1)} = substr($t,1);
539 push @{$rec->{$k}}, $val;
548 Return name of selected tag
550 print $isis->tag_name('200');
556 my $tag = shift || return;
557 return $self->{'TagName'}->{$tag} || $tag;
564 Some parts of CDS/ISIS documentation are not detailed enough to exmplain
565 some variations in input databases which has been tested with this module.
566 When I was in doubt, I assumed that OpenIsis's implementation was right
567 (except for obvious bugs).
569 However, every effort has been made to test this module with as much
570 databases (and programs that create them) as possible.
572 I would be very greatful for success or failure reports about usage of this
573 module with databases from programs other than WinIsis and IsisMarc. I had
574 tested this against ouput of one C<isis.dll>-based application, but I don't
575 know any details about it's version.
582 http://www.rot13.org/~dpavlin/
584 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
585 written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
589 This program is free software; you can redistribute
590 it and/or modify it under the same terms as Perl itself.
592 The full text of the license can be found in the
593 LICENSE file included with this module.
598 OpenIsis web site L<http://www.openisis.org>
600 perl4lib site L<http://perl4lib.perl.org>