5 use File::Glob qw(:globally :nocase);
11 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
14 #Give a hoot don't pollute, do not export more than needed by default
23 IsisDB - Read CDS/ISIS, WinISIS and IsisMarc database
29 my $isis = new IsisDB(
30 isisdb => './cds/cds',
33 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
34 print $isis->to_ascii($mfn),"\n";
39 This module will read ISIS databases created by DOS CDS/ISIS, WinIsis or
40 IsisMarc. It can be used as perl-only alternative to OpenIsis module.
42 It can create hash values from data in ISIS database (using C<to_hash>),
43 ASCII dump (using C<to_ascii>) or just hash with field names and packed
44 values (like C<^asomething^belse>).
46 Unique feature of this module is ability to C<include_deleted> records.
47 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
48 fields which are zero sized will be filled with random junk from memory).
50 It also has support for identifiers (only if ISIS database is created by
51 IsisMarc), see C<to_hash>.
53 This will module will always be slower than OpenIsis module which use C
54 library. However, since it's written in perl, it's platform independent (so
55 you don't need C compiler), and can be easily modified. I hope that it
56 creates data structures which are easier to use than ones created by
57 OpenIsis, so reduced time in other parts of the code should compensate for
58 slower performance of this module (speed of reading ISIS database is
65 # my $ORDN; # Nodes Order
66 # my $ORDF; # Leafs Order
67 # my $N; # Number of Memory buffers for nodes
68 # my $K; # Number of buffers for first level index
69 # my $LIV; # Current number of Index Levels
70 # my $POSRX; # Pointer to Root Record in N0x
71 # my $NMAXPOS; # Next Available position in N0x
72 # my $FMAXPOS; # Next available position in L0x
73 # my $ABNORMAL; # Formal BTree normality indicator
83 my $isis = new IsisDB(
84 isisdb => './cds/cds',
94 Options are described below:
100 This is full or relative path to ISIS database files which include
101 common prefix of C<.MST>, and C<.XRF> and optionally C<.FDT> (if using
102 C<read_fdt> option) files.
104 In this example it uses C<./cds/cds.MST> and related files.
108 Boolean flag to specify if field definition table should be read. It's off
111 =item include_deleted
113 Don't skip logically deleted records in ISIS.
117 Filter code ref which will be used before data is converted to hash.
121 Dump a B<lot> of debugging output.
125 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
132 bless($self, $class);
134 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
136 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
137 $self->{$v} = {@_}->{$v};
140 my @isis_files = grep(/\.(FDT|MST|XRF|CNT)$/i,glob($self->{isisdb}."*"));
142 foreach my $f (@isis_files) {
143 my $ext = $1 if ($f =~ m/\.(\w\w\w)$/);
144 $self->{lc($ext)."_file"} = $f;
147 my @must_exist = qw(mst xrf);
148 push @must_exist, "fdt" if ($self->{read_fdt});
150 foreach my $ext (@must_exist) {
151 croak "missing ",uc($ext)," file in ",$self->{isisdb} unless ($self->{$ext."_file"});
154 print STDERR "## using files: ",join(" ",@isis_files),"\n" if ($self->{debug});
156 # if you want to read .FDT file use read_fdt argument when creating class!
157 if ($self->{read_fdt} && -e $self->{fdt_file}) {
159 # read the $db.FDT file for tags
162 open(fileFDT, $self->{fdt_file}) || croak "can't read '$self->{fdt_file}': $!";
167 my $name=substr($_,0,30);
168 my $tag=substr($_,50,3);
173 $self->{'TagName'}->{$tag}=$name;
184 # Get the Maximum MFN from $db.MST
186 open($self->{'fileMST'}, $self->{mst_file}) || croak "can't open '$self->{mst_file}': $!";
188 # MST format: (* = 32 bit signed)
190 # NXTMFN* MFN to be assigned to the next record created
191 # NXTMFB* last block allocated to master file
192 # NXTMFP offset to next available position in last block
193 # MFTYPE always 0 for user db file (1 for system)
194 seek($self->{'fileMST'},4,0);
198 read($self->{'fileMST'}, $buff, 4);
199 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
202 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
207 print STDERR Dumper($self),"\n" if ($self->{debug});
209 # open files for later
210 open($self->{'fileXRF'}, $self->{xrf_file}) || croak "can't open '$self->{xrf_file}': $!";
212 $self ? return $self : return undef;
217 This function is not really used by module, but can be useful to find info
218 about your index (if debugging it for example).
220 print Dumper($isis->read_cnt);
227 croak "missing CNT file in ",$self->{isisdb} unless ($self->{cnt_file});
229 # Get the index information from $db.CNT
231 open(fileCNT, $self->{cnt_file}) || croak "can't read '$self->{cnt_file}': $!";
233 # There is two 26 Bytes fixed lenght records
235 # 0: IDTYPE BTree type 16
236 # 2: ORDN Nodes Order 16
237 # 4: ORDF Leafs Order 16
238 # 6: N Number of Memory buffers for nodes 16
239 # 8: K Number of buffers for first level index 16
240 # 10: LIV Current number of Index Levels 16
241 # 12: POSRX* Pointer to Root Record in N0x 32
242 # 16: NMAXPOS* Next Available position in N0x 32
243 # 20: FMAXPOS* Next available position in L0x 32
244 # 24: ABNORMAL Formal BTree normality indicator 16
250 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
252 my $buff = shift || return;
253 my @arr = unpack("ssssssllls", $buff);
255 print STDERR "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
257 my $IDTYPE = shift @arr;
259 $self->{cnt}->{$IDTYPE}->{$_} = abs(shift @arr);
265 read(fileCNT, $buff, 26);
266 $self->unpack_cnt($buff);
268 read(fileCNT, $buff, 26);
269 $self->unpack_cnt($buff);
278 Read record with selected MFN
280 my $rec = $isis->fetch(55);
282 Returns hash with keys which are field names and values are unpacked values
283 for that field like this:
286 '210' => [ '^aNew York^cNew York University press^dcop. 1988' ],
287 '990' => [ '2140', '88', 'HAY' ],
295 my $mfn = shift || croak "fetch needs MFN as argument!";
297 # is mfn allready in memory?
298 my $old_mfn = $self->{'current_mfn'} || -1;
299 return if ($mfn == $old_mfn);
301 print STDERR "## fetch: $mfn\n" if ($self->{debug});
304 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
306 print STDERR "## seeking to $mfnpos in file '$self->{xrf_file}'\n" if ($self->{debug});
307 seek($self->{'fileXRF'},$mfnpos,0);
311 # read XRFMFB abd XRFMFP
312 read($self->{'fileXRF'}, $buff, 4);
313 my $pointer=unpack("l",$buff) || carp "pointer is null";
315 my $XRFMFB = int($pointer/2048);
316 my $XRFMFP = $pointer - ($XRFMFB*2048);
319 # (XRFMFB - 1) * 512 + XRFMFP
320 # why do i have to do XRFMFP % 1024 ?
322 my $blk_off = (($XRFMFB - 1) * 512) + ($XRFMFP % 1024);
324 print STDERR "## pointer: $pointer XRFMFB: $XRFMFB XRFMFP: $XRFMFP offset: $blk_off\n" if ($self->{'debug'});
326 # Get Record Information
328 seek($self->{'fileMST'},$blk_off,0);
330 read($self->{'fileMST'}, $buff, 4);
331 my $value=unpack("l",$buff);
333 print STDERR "## offset for rowid $value is $blk_off (blk $XRFMFB off $XRFMFP)\n" if ($self->{debug});
336 carp "Error: MFN ".$mfn." not found in MST(".$value.")";
337 #return; # XXX deleted record?
340 # $MFRL=$self->Read16($fileMST);
341 # $MFBWB=$self->Read32($fileMST);
342 # $MFBWP=$self->Read16($fileMST);
343 # $BASE=$self->Read16($fileMST);
344 # $NVF=$self->Read16($fileMST);
345 # $STATUS=$self->Read16($fileMST);
347 read($self->{'fileMST'}, $buff, 14);
349 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
351 print STDERR "## MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
354 delete $self->{record};
356 ## FIXME this is a bug
357 if (! $self->{'include_deleted'} && $MFRL < 0) {
358 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
362 warn "BASE is not 18+6*NVF" unless ($BASE == 18 + 6 * $NVF);
364 # Get Directory Format
370 read($self->{'fileMST'}, $buff, 6 * $NVF);
374 for (my $i = 0 ; $i < $NVF ; $i++) {
376 # $TAG=$self->Read16($fileMST);
377 # $POS=$self->Read16($fileMST);
378 # $LEN=$self->Read16($fileMST);
380 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
382 print STDERR "## TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
384 # The TAG does not exists in .FDT so we set it to 0.
386 # XXX This is removed from perl version; .FDT file is updated manually, so
387 # you will often have fields in .MST file which aren't in .FDT. On the other
388 # hand, IsisMarc doesn't use .FDT files at all!
390 #if (! $self->{TagName}->{$TAG}) {
401 # Get Variable Fields
403 read($self->{'fileMST'},$buff,$rec_len);
405 print STDERR "## rec_len: $rec_len poc: ",tell($self->{'fileMST'})."\n" if ($self->{debug});
407 for (my $i = 0 ; $i < $NVF ; $i++) {
408 # skip zero-sized fields
409 next if ($FieldLEN[$i] == 0);
411 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
414 $self->{'current_mfn'} = $mfn;
416 print Dumper($self),"\n" if ($self->{debug});
418 return $self->{'record'};
423 Dump ASCII output of record with specified MFN
425 print $isis->to_ascii(42);
427 It outputs something like this:
429 210 ^aNew York^cNew York University press^dcop. 1988
434 If C<read_fdt> is specified when calling C<new> it will display field names
435 from C<.FDT> file instead of numeric tags.
442 my $mfn = shift || croak "need MFN";
444 my $rec = $self->fetch($mfn);
448 foreach my $f (sort keys %{$rec}) {
449 my $fn = $self->tag_name($f);
450 $out .= "\n$fn\t".join("\n$fn\t",@{$self->{record}->{$f}});
460 Read record with specified MFN and convert it to hash
462 my $hash = $isis->to_hash($mfn);
464 It has ability to convert characters (using C<hash_filter> from ISIS
465 database before creating structures enabling character re-mapping or quick
468 This function returns hash which is like this:
473 'c' => 'New York University press',
485 You can later use that hash to produce any output from ISIS data.
487 If database is created using IsisMarc, it will also have to special fields
488 which will be used for identifiers, C<i1> and C<i2> like this:
495 'f' => 'Valdo D\'Arienzo',
496 'e' => 'tipografie e tipografi nel XVI secolo',
500 This method will also create additional field C<000> with MFN.
507 my $mfn = shift || confess "need mfn!";
509 # init record to include MFN as field 000
510 my $rec = { '000' => [ $mfn ] };
512 my $row = $self->fetch($mfn);
514 foreach my $k (keys %{$row}) {
515 foreach my $l (@{$row->{$k}}) {
518 $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
523 ($val->{'i1'},$val->{'i2'}) = ($1,$2) if ($l =~ s/^([01 #])([01 #])//);
527 foreach my $t (split(/\^/,$l)) {
529 $val->{substr($t,0,1)} = substr($t,1);
535 push @{$rec->{$k}}, $val;
544 Return name of selected tag
546 print $isis->tag_name('200');
552 my $tag = shift || return;
553 return $self->{'TagName'}->{$tag} || $tag;
560 This module has been very lightly tested. Use with caution and report bugs.
567 http://www.rot13.org/~dpavlin/
569 This module is based heavily on code from C<LIBISIS.PHP> library to read ISIS files V0.1.1
570 written in php and (c) 2000 Franck Martin <franck@sopac.org> and released under LGPL.
574 This program is free software; you can redistribute
575 it and/or modify it under the same terms as Perl itself.
577 The full text of the license can be found in the
578 LICENSE file included with this module.
583 OpenIsis web site L<http://www.openisis.org>
585 perl4lib site L<http://perl4lib.perl.org>