9 use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
12 #Give a hoot don't pollute, do not export more than needed by default
21 IsisDB - Read CDS/ISIS database
27 my $isis = new IsisDB(
28 isisdb => './cds/cds',
31 for(my $mfn = 1; $mfn <= $isis->{'maxmfn'}; $mfn++) {
32 print $isis->to_ascii($mfn),"\n";
37 This module will read CDS/ISIS databases and create hash values out of it.
38 It can be used as perl-only alternative to OpenIsis module.
40 This will module will always be slower that OpenIsis module which use C
41 library. However, since it's written in perl, it's platform independent (so
42 you don't need C compiler), and can be easily modified.
44 Unique feature of this module is ability to C<include_deleted> records.
45 It will also skip zero sized fields (OpenIsis has a bug in XS bindings, so
46 fields which are zero sized will be filled with random junk from memory).
52 # my $ORDN; # Nodes Order
53 # my $ORDF; # Leafs Order
54 # my $N; # Number of Memory buffers for nodes
55 # my $K; # Number of buffers for first level index
56 # my $LIV; # Current number of Index Levels
57 # my $POSRX; # Pointer to Root Record in N0x
58 # my $NMAXPOS; # Next Available position in N0x
59 # my $FMAXPOS; # Next available position in L0x
60 # my $ABNORMAL; # Formal BTree normality indicator
68 Open CDS/ISIS database
70 my $isis = new IsisDB(
71 isisdb => './cds/cds',
81 Options are described below:
87 Prefix path to CDS/ISIS. It should contain full or relative path to database
88 and common prefix of C<.FDT>, C<.MST>, C<.CNT>, C<.XRF> and C<.MST> files.
92 Boolean flag to specify if field definition table should be read. It's off
97 Don't skip logically deleted records in ISIS.
101 Filter code ref which will be used before data is converted to hash.
105 Dump a B<lot> of debugging output.
109 It will also set C<$isis-E<gt>{'maxmfn'}> which is maximum MFN stored in database.
116 bless($self, $class);
118 croak "new needs database name (isisdb) as argument!" unless ({@_}->{isisdb});
120 foreach my $v (qw{isisdb debug include_deleted hash_filter}) {
121 $self->{$v} = {@_}->{$v};
124 # if you want to read .FDT file use read_fdt argument when creating class!
125 if ({@_}->{read_fdt} && -e $self->{isisdb}.".FDT") {
127 # read the $db.FDT file for tags
130 open(fileFDT, $self->{isisdb}.".FDT") || croak "can't read '$self->{isisdb}.FDT': $!";
135 my $name=substr($_,0,30);
136 my $tag=substr($_,50,3);
141 $self->{'TagName'}->{$tag}=$name;
152 # Get the Maximum MFN from $db.MST
154 open(fileMST,$self->{isisdb}.".MST") || croak "can't read '$self->{isisdb}.MST': $!";
156 # MST format: (* = 32 bit signed)
158 # NXTMFN* MFN to be assigned to the next record created
159 # NXTMFB* last block allocated to master file
160 # NXTMFP offset to next available position in last block
161 # MFTYPE always 0 for user db file (1 for system)
166 read(fileMST, $buff, 4);
167 $self->{'NXTMFN'}=unpack("l",$buff) || carp "NXTNFN is zero";
170 $self->{'maxmfn'} = $self->{'NXTMFN'} - 1;
174 # Get the index information from $db.CNT
176 open(fileCNT, $self->{isisdb}.".CNT") || croak "can't read '$self->{isisdb}.CNT': $!";
178 # There is two 26 Bytes fixed lenght records
180 # 0: IDTYPE BTree type 16
181 # 2: ORDN Nodes Order 16
182 # 4: ORDF Leafs Order 16
183 # 6: N Number of Memory buffers for nodes 16
184 # 8: K Number of buffers for first level index 16
185 # 10: LIV Current number of Index Levels 16
186 # 12: POSRX* Pointer to Root Record in N0x 32
187 # 16: NMAXPOS* Next Available position in N0x 32
188 # 20: FMAXPOS* Next available position in L0x 32
189 # 24: ABNORMAL Formal BTree normality indicator 16
195 my @flds = qw(ORDN ORDF N K LIV POSRX NMAXPOS FMAXPOS ABNORMAL);
197 my $buff = shift || return;
198 my @arr = unpack("ssssssllls", $buff);
200 print "unpack_cnt: ",join(" ",@arr),"\n" if ($self->{'debug'});
202 my $IDTYPE = shift @arr;
204 $self->{$IDTYPE}->{$_} = abs(shift @arr);
208 read(fileCNT, $buff, 26);
209 $self->unpack_cnt($buff);
211 read(fileCNT, $buff, 26);
212 $self->unpack_cnt($buff);
217 print Dumper($self) if ($self->{debug});
219 # open files for later
220 open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
222 open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
224 $self ? return $self : return undef;
229 Read record with selected MFN
231 my $rec = $isis->fetch(55);
233 Returns hash with keys which are field names and values are unpacked values
234 for that field (like C<^asometing^bsomething else>)
241 my $mfn = shift || croak "fetch needs MFN as argument!";
243 print "fetch: $mfn\n" if ($self->{debug});
246 my $mfnpos=($mfn+int(($mfn-1)/127))*4;
248 print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
249 seek($self->{'fileXRF'},$mfnpos,0);
253 # read XRFMFB abd XRFMFP
254 read($self->{'fileXRF'}, $buff, 4);
255 my $pointer=unpack("l",$buff) || carp "pointer is null";
257 my $XRFMFB = int($pointer/2048);
258 my $XRFMFP = $pointer - ($XRFMFB*2048);
260 print "XRFMFB: $XRFMFB XRFMFP: $XRFMFP\n" if ($self->{debug});
262 # XXX fix this to be more readable!!
263 # e.g. (XRFMFB - 1) * 512 + XRFMFP
265 my $offset = $pointer;
266 my $offset2=int($offset/2048)-1;
267 my $offset22=int($offset/4096);
268 my $offset3=$offset-($offset22*4096);
270 $offset3=$offset3-2048;
272 my $offset4=($offset2*512)+$offset3;
274 print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
276 # Get Record Information
278 seek($self->{'fileMST'},$offset4,0);
280 read($self->{'fileMST'}, $buff, 4);
281 my $value=unpack("l",$buff);
284 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
285 return -1; # XXX deleted record?
288 # $MFRL=$self->Read16($fileMST);
289 # $MFBWB=$self->Read32($fileMST);
290 # $MFBWP=$self->Read16($fileMST);
291 # $BASE=$self->Read16($fileMST);
292 # $NVF=$self->Read16($fileMST);
293 # $STATUS=$self->Read16($fileMST);
295 read($self->{'fileMST'}, $buff, 14);
297 my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
299 print "MFRL: $MFRL MFBWB: $MFBWB MFBWP: $MFBWP BASE: $BASE NVF: $NVF STATUS: $STATUS\n" if ($self->{debug});
302 delete $self->{record};
304 if (! $self->{'include_deleted'} && $MFRL < 0) {
305 print "## logically deleted record $mfn, skipping...\n" if ($self->{debug});
309 # Get Directory Format
315 read($self->{'fileMST'}, $buff, 6 * $NVF);
319 for (my $i = 0 ; $i < $NVF ; $i++) {
321 # $TAG=$self->Read16($fileMST);
322 # $POS=$self->Read16($fileMST);
323 # $LEN=$self->Read16($fileMST);
325 my ($TAG,$POS,$LEN) = unpack("sss", substr($buff,$i * 6, 6));
327 print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
329 # The TAG does not exists in .FDT so we set it to 0.
331 # XXX This is removed from perl version; .FDT file is updated manually, so
332 # you will often have fields in .MST file which aren't in .FDT. On the other
333 # hand, IsisMarc doesn't use .FDT files at all!
335 #if (! $self->{TagName}->{$TAG}) {
346 # Get Variable Fields
348 read($self->{'fileMST'},$buff,$fld_len);
350 for (my $i = 0 ; $i < $NVF ; $i++) {
351 # skip zero-sized fields
352 next if ($FieldLEN[$i] == 0);
354 push @{$self->{record}->{$FieldTAG[$i]}}, substr($buff,$FieldPOS[$i],$FieldLEN[$i]);
358 print Dumper($self) if ($self->{debug});
360 return $self->{'record'};
365 Dump ascii output of selected MFN
367 print $isis->to_ascii(55);
374 my $mfn = shift || croak "need MFN";
376 my $rec = $self->fetch($mfn);
380 foreach my $f (sort keys %{$rec}) {
381 $out .= "\n$f\t".join("\n$f\t",@{$self->{record}->{$f}});
391 Read mfn and convert it to hash
393 my $hash = $isis->to_hash($mfn);
395 It has ability to convert characters (using C<hash_filter> from ISIS
396 database before creating structures enabling character remapping or quick
399 This function returns hash which is like this:
404 'c' => 'New York University press',
416 You can later use that has to produce any output from ISIS data.
423 my $mfn = shift || confess "need mfn!";
426 my $row = $self->fetch($mfn);
428 foreach my $k (keys %{$row}) {
429 foreach my $l (@{$row->{$k}}) {
432 $l = $self->{'hash_filter'}->($l) if ($self->{'hash_filter'});
437 foreach my $t (split(/\^/,$l)) {
439 $val->{substr($t,0,1)} = substr($t,1);
445 push @{$rec->{$k}}, $val;
453 # XXX porting from php left-over:
455 # do I *REALLY* need those methods, or should I use
456 # $self->{something} directly?
458 # Probably direct usage is better!
463 return $self->{TagName};
468 return $self->{NXTMFN};
475 This module has been very lightly tested. Use with caution and report bugs.
482 http://www.rot13.org/~dpavlin/
484 This module is based heavily on code from LIBISIS.PHP - Library to read ISIS files V0.1.1
485 written in php and (c) 2000 Franck Martin - <franck@sopac.org> released under LGPL.
489 This program is free software; you can redistribute
490 it and/or modify it under the same terms as Perl itself.
492 The full text of the license can be found in the
493 LICENSE file included with this module.
498 L<http://www.openisis.org|OpenIsis>, perl(1).