added benchmarking script, some speedup (7029.54/s vs 5829.19/s),
authorDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 29 Dec 2004 15:10:34 +0000 (15:10 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 29 Dec 2004 15:10:34 +0000 (15:10 +0000)
removed left-overs from php porting (dictionaries are not supported by this module),
make dump_isis.pl arguments same as dump_openisis.pl,
renamed GetMFN to fetch

git-svn-id: file:///home/dpavlin/svn/Biblio-Isis/trunk@7 4670fa4d-42ec-0310-ab5b-a66af6943492

IsisDB.pm
scripts/bench.pl [new file with mode: 0755]
scripts/dump_isis.pl

index b2bd43f..13f173b 100644 (file)
--- a/IsisDB.pm
+++ b/IsisDB.pm
@@ -7,7 +7,7 @@ use Data::Dumper;
 BEGIN {
        use Exporter ();
        use vars qw ($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
-       $VERSION     = 0.01;
+       $VERSION     = 0.02;
        @ISA         = qw (Exporter);
        #Give a hoot don't pollute, do not export more than needed by default
        @EXPORT      = qw ();
@@ -191,37 +191,40 @@ sub new {
 
        print Dumper($self) if ($self->{debug});
 
+       # open files for later
+       open($self->{'fileXRF'}, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
+
+       open($self->{'fileMST'}, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
+
        $self ? return $self : return undef;
 }
 
-=head2 GetMFN
+=head2 fetch
 
 Read record with selected MFN
 
-  my $rec = $isis->GetMFN(55);
+  my $rec = $isis->fetch(55);
 
 Returns hash with keys which are field names and values are unpacked values
 for that field.
 
 =cut
 
-sub GetMFN {
+sub fetch {
        my $self = shift;
 
-       my $mfn = shift || croak "GetMFN needs MFN as argument!";
-
-       print "GetMFN: $mfn\n" if ($self->{debug});
+       my $mfn = shift || croak "fetch needs MFN as argument!";
 
-       open(fileXRF, $self->{isisdb}.".XRF") || croak "can't open '$self->{isisdb}.XRF': $!";
+       print "fetch: $mfn\n" if ($self->{debug});
 
        # XXX check this?
        my $mfnpos=($mfn+int(($mfn-1)/127))*4;
 
        print "seeking to $mfnpos in file '$self->{isisdb}.XRF'\n" if ($self->{debug});
-       seek(fileXRF,$mfnpos,0);
+       seek($self->{'fileXRF'},$mfnpos,0);
 
        # read XRFMFB abd XRFMFP
-       my $pointer=$self->Read32(\*fileXRF);
+       my $pointer=$self->Read32(\*{$self->{'fileXRF'}});
 
        my $XRFMFB = int($pointer/2048);
        my $XRFMFP = $pointer - ($XRFMFB*2048);
@@ -242,15 +245,11 @@ sub GetMFN {
 
        print "$offset - $offset2 - $offset3 - $offset4\n" if ($self->{debug});
 
-       close(fileXRF);
-
        # Get Record Information
 
-       open(fileMST, $self->{isisdb}.".MST") || croak "can't open '$self->{isisdb}.MST': $!";
-
-       seek(fileMST,$offset4,0);
+       seek($self->{'fileMST'},$offset4,0);
 
-       my $value=$self->Read32(\*fileMST);
+       my $value=$self->Read32(\*{$self->{'fileMST'}});
 
        if ($value!=$mfn) {
 print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");    
@@ -265,7 +264,7 @@ print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
 #      $STATUS=$self->Read16($fileMST);
 
        my $buff;
-       read(fileMST, $buff, 14);
+       read($self->{'fileMST'}, $buff, 14);
 
        my ($MFRL,$MFBWB,$MFBWP,$BASE,$NVF,$STATUS) = unpack("slssss", $buff);
 
@@ -283,7 +282,7 @@ print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
 #              $POS=$self->Read16($fileMST);
 #              $LEN=$self->Read16($fileMST);
 
-               read(fileMST, $buff, 6);
+               read($self->{'fileMST'}, $buff, 6);
                my ($TAG,$POS,$LEN) = unpack("sss", $buff);
 
                print "TAG: $TAG POS: $POS LEN: $LEN\n" if ($self->{debug});
@@ -309,7 +308,7 @@ print ("Error: The MFN:".$mfn." is not found in MST(".$value.")");
 
        for (my $i = 0 ; $i < $NVF ; $i++) {
                my $rec;
-               read(fileMST,$rec,$FieldLEN[$i]);
+               read($self->{'fileMST'},$rec,$FieldLEN[$i]);
                push @{$self->{record}->{$FieldTAG[$i]}}, $rec;
        }
        close(fileMST);
@@ -337,7 +336,7 @@ sub to_ascii {
 
        my $mfn = shift || croak "need MFN";
 
-       my $rec = $self->GetMFN($mfn);
+       my $rec = $self->fetch($mfn);
 
        my $out = "0\t$mfn";
 
@@ -350,265 +349,6 @@ sub to_ascii {
        return $out;
 }
 
-################# old cruft which is not ported from php to perl
-
-=begin php
-
-  # Load the dictionary from the $db.L0x files.
-  # Not usefull Yet
-  
-  sub LoadDictionary()
-  {
-    $fileL01=fopen($self->{isisdb}.".L01","r");
-    rewind($fileL01);  
-
-    do
-    {
-
-      $POS=$self->Read32($fileL01);
-      $OCK=$self->Read16($fileL01);
-      $IT=$self->Read16($fileL01);
-      $PS=$self->Read32($fileL01);
-print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
-      for ($i=0;$i<$OCK;$i++)
-      {
-        $KEY=fread($fileL01,10);
-       
-        print $KEY." ### ";
-
-        $INFO1=$self->Read32($fileL01);
-        $INFO2=$self->Read32($fileL01);
-
-        #L01Key->{$key}=array($INFO1,$INFO2);
-      }
-    
-      rewind($fileL01);
-      $offset=($PS-1)*(12+$self->{ORDF}->{1}*18*2);
-      fseek($fileL01,$offset);
-
-    } While (!feof($fileL01));
-
-    fclose($fileL01);
-  }
-
-  # self function search through the tree and returns an array of pointers to IFP
-  # The function must be recursive
-
-  sub SearchTree($search,$fileNB,$PUNT)
-  {       
-      $offset=(($PUNT-1)*(8+2*$self->{ORDN}->{1}*14)); 
-
-        rewind($fileNB1); 
-
-        fseek($fileNB,$offset);
-        $POS=$self->Read32($fileNB);
-        $OCK=$self->Read16($fileNB);
-        $IT=$self->Read16($fileNB);
-
-#print "<br>".$POS." - ".$OCK." - ".$IT;
-
-        $OLDPUNT=$POS;
-        $j=0;
-        for ($i=0;$i<$OCK;$i++)
-        {
-          $KEY=fread($fileNB,10);
-       
-          $PUNT=$self->Read32($fileNB);
-
-#print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## "; 
-
-          If (strcmp($search,chop($KEY))<0)
-          {
-            break;
-          }
-          $OLDPUNT=$PUNT;   
-        }        
-#print $OLDPUNT; 
-        Return $OLDPUNT;
-  }
-
-  # Search ISIS for record containing search
-  # Return a sorted array of MFN
-
-  sub Search($search)
-  {
-
-  $search=strtoupper($search);
-#print "Searching....".$search." - ".$self->{POSRX}->{1}."<br>";
-    # first search .x01
-    
-
-    # Search in .N01  
-
-
-    $fileN01=fopen($self->{isisdb}.".N01","r");
-    $offset=(($self->{POSRX}->{1}-1)*(8+2*$self->{ORDN}->{1}*14));
-
-      do
-      {
-        rewind($fileN01); 
-
-        fseek($fileN01,$offset);
-        $POS=$self->Read32($fileN01);
-        $OCK=$self->Read16($fileN01);
-        $IT=$self->Read16($fileN01);
-
-#print "<br>".$POS." - ".$OCK." - ".$IT;
-
-        $OLDPUNT=$POS;
-        for ($i=0;$i<$OCK;$i++)
-        {
-          $KEY=fread($fileN01,10);
-       
-          $PUNT=$self->Read32($fileN01);
-
-#print " ## ".chop($KEY)."(".$PUNT."-".$OLDPUNT.") ## "; 
-
-          If (strcmp($search,chop($KEY))<0)
-          {
-            break;
-          }
-          $OLDPUNT=$PUNT;   
-        }
-        $offset=(($OLDPUNT-1)*(8+2*$self->{ORDN}->{1}*14));      
-      } while ($OLDPUNT>0);
-#print $OLDPUNT; 
-
-
-    fclose($fileN01);
-
-    # Now look for records in .L01 file
-    $fileL01=fopen($self->{isisdb}.".L01","r");
-    rewind($fileL01);
-
-    $offset=(-$OLDPUNT-1)*(12+$self->{ORDF}->{1}*18*2);
-    fseek($fileL01,$offset);
-
-    $POS=$self->Read32($fileL01);
-    $OCK=$self->Read16($fileL01);
-    $IT=$self->Read16($fileL01);
-    $PS=$self->Read32($fileL01);
-#print "<br>POS:".$POS." ".$self->{ORDF}->{1}." ";
-    for ($i=0;$i<$OCK;$i++)
-    {
-      $KEY=fread($fileL01,10);
-       
-#print $KEY." ### ";
-
-      $INFO1=$self->Read32($fileL01);
-      $INFO2=$self->Read32($fileL01);
-
-      If (strcmp($search,chop($KEY))==0)
-      {
-        break;
-      }
-    }    
-
-    fclose($fileL01);
-
-#print $INFO1."--".$INFO2;
-
-    # Now look in .IFP for the MFN
-    $fileIFP=fopen($self->{isisdb}.".IFP","r");
-    rewind($fileIFP);
-    $offset=($INFO1-1)*512+($INFO2*4);
-    fseek($fileIFP,$offset);   
-    $IFPBLK=$self->Read32($fileIFP);
-
-    $IFPNXTB=$self->Read32($fileIFP);
-    $IFPNXTP=$self->Read32($fileIFP);
-    $IFPTOTP=$self->Read32($fileIFP);
-    $IFPSEGP=$self->Read32($fileIFP);
-    $IFPSEGC=$self->Read32($fileIFP);
-
-
-#print "<br>IFP:".$IFPBLK." # ".$IFPNXTB." - ".$IFPNXTP." - ".$IFPTOTP." - ".$IFPSEGP." - ".$IFPSEGC;
-
-    rewind($fileIFP);
-    $offset=($INFO1-1)*512+24+($INFO2*4);
-    fseek($fileIFP,$offset);    
-    
-    $j=24+($INFO2*4);
-    $k=0;
-    $l=1;
-    $OLDPMFN="";
-    for ($i=0;$i<$IFPSEGP;$i++)
-    {
-      $B1=$self->Read8($fileIFP);
-      $B2=$self->Read8($fileIFP);
-      $B3=$self->Read8($fileIFP);
-      $B4=$self->Read8($fileIFP);
-      $B5=$self->Read8($fileIFP);
-      $B6=$self->Read8($fileIFP);
-      $B7=$self->Read8($fileIFP);
-      $B8=$self->Read8($fileIFP);
-
-      $PMFN=$B1*65536+$B2*256+$B3;
-      $PTAG=$B4*256+$B5;
-      $POCC=$B6;
-      $PCNT=$B7*256+$B8;
-
-      if ($OLDPMFN!=$PMFN)
-      {
-        if ($PMFN!=0)
-        {
-          $self->{MFNArray}->{$l}=$PMFN;
-          $OLDPMFN=$PMFN;
-          $l+=1;
-        }
-      }
-
-      $j=$j+8;
-#print "<br>".$PMFN."-".$PTAG." - ".$POCC." - ".$PCNT;
-#print "@@".$j."@@@@";
-      if ($j>=504)
-      {
-        if ($IFPNXTB==0 && $IFPNXTP==0)
-        {
-          $k=$k+1;
-          rewind($fileIFP);
-          $offset=($INFO1-1+$k)*512;  
-          fseek($fileIFP,$offset);      
-          $B=$self->Read32($fileIFP);
-#print "<br>-".$B."-<br>";
-          $j=0;
-        } else
-        {
-          rewind($fileIFP);
-          $offset=($IFPNXTB-1)*512;  
-          fseek($fileIFP,$offset);
-
-         $OLDIFPNXTB=$IFPNXTB;
-         $OLDIFPNXTP=$IFPNXTP;
-
-          $IFPBLK=$self->Read32($fileIFP);
-
-          $IFPNXTB=$self->Read32($fileIFP);
-          $IFPNXTP=$self->Read32($fileIFP);
-          $IFPTOTP=$self->Read32($fileIFP);
-          $IFPSEGP=$self->Read32($fileIFP);
-          $IFPSEGC=$self->Read32($fileIFP);
-
-          rewind($fileIFP);
-          $offset=($OLDIFPNXTB-1)*512+24+($OLDIFPNXTP*4);
-          fseek($fileIFP,$offset);    
-    
-          $j=24+($OLDIFPNXTP*4);
-          $k=0;
-          $j=0;
-        }
-      }
-
-    }    
-    fclose($fileIFP);
-    return $l-1;
-  }
-
-=cut
-
 #
 # XXX porting from php left-over:
 #
@@ -618,110 +358,17 @@ print "<br>PS:".$PS." ".$self->{ORDF}->{1}." ";
 # Probably direct usage is better!
 #
 
-sub GetFieldName {
-       my $self = shift;
-       return $self->{FieldName};
-}
-
-sub GetTagName {
+sub TagName {
        my $self = shift;
        return $self->{TagName};
 }
 
-sub GetFieldTag {
-       my $self = shift;
-       return $self->{FieldTAG};
-}
-
-sub GetNextMFN {
+sub NextMFN {
        my $self = shift;
        return $self->{NXTMFN};
 }
 
-sub GetMFNArray {
-       my $self = shift;
-       return $self->{MFNArray};
-}
-=begin php
-
-  sub Read32($fileNB)
-  {
-    $B1=ord(fread($fileNB,1));
-    $B2=ord(fread($fileNB,1));
-    $B3=ord(fread($fileNB,1));
-    $B4=ord(fread($fileNB,1));
-
-    if ($B4<=128)
-    {
-      $value=$B1+$B2*256+$B3*65536+$B4*16777216;
-    } else
-    {
-      $value=$self->Not8($B1)+$self->Not8($B2)*256+$self->Not8($B3)*65536+$self->Not8($B4)*16777216;
-      $value=-($value+1);
-    }
-#    print "(".$B1.",".$B2.",".$B3.",".$B4.":".$value.")";
-
-    return $value;   
-  }
-
-  sub Read24($fileNB)
-  {
-    $B1=ord(fread($fileNB,1));
-    $B2=ord(fread($fileNB,1));
-    $B3=ord(fread($fileNB,1));
-
-    $value=$B1+$B2*256+$B3*65536;
-
-#    print "(".$B1.",".$B2.",".$B3.":".$value.")";
-
-    return $value;   
-  }
-
-  sub Read16($fileNB)
-  {
-    $B1=ord(fread($fileNB,1));
-    $B2=ord(fread($fileNB,1));
-
-    $value=$B1+$B2*256;
-#    print "(".$B1.",".$B2.":".$value.")";
-
-    return $value;  
-  }
-
-  sub Read8($fileNB)
-  {
-    $B1=ord(fread($fileNB,1));
-
-    $value=$B1;
-#    print "(".$value.")";
-
-    return $value;  
-  }
-
-  sub Not8($value)
-  { 
-    $value=decbin($value);
-    if (strlen($value)<8)
-    {
-      $buffer="";
-      for($i=0;$i<(8-strlen($value));$i++)
-      {
-        $buffer.="0";
-      }
-      $value=$buffer.$value;
-    }
-    $value=ereg_replace("0","3",$value);
-    $value=ereg_replace("1","0",$value);
-    $value=ereg_replace("3","1",$value); 
-    $value=bindec($value);
-    return $value;
-  }
-}
-
-=cut
-
 1;
-__END__
 
 =head1 BUGS
 
diff --git a/scripts/bench.pl b/scripts/bench.pl
new file mode 100755 (executable)
index 0000000..e7e130b
--- /dev/null
@@ -0,0 +1,36 @@
+#!/usr/bin/perl -w
+
+use strict;
+use blib;
+
+use IsisDB;
+use OpenIsis;
+use MARC::File::USMARC;
+
+use Benchmark qw( timethese cmpthese ) ;
+
+my $isisdb = shift @ARGV || '/data/isis_data/ps/LIBRI/LIBRI';
+
+my $isis = IsisDB->new (
+       isisdb => $isisdb,
+       debug => shift @ARGV,
+);
+
+my $rows = $isis->{'maxmfn'};
+
+my $db = OpenIsis::open( $isisdb );
+
+print "rows: $rows\n\n";
+
+my $mfn = 1;
+
+my $r = timethese( -5, {
+       IsisDB => sub {
+               $isis->fetch( $mfn++ % $rows + 1 );
+       },
+       OpenIsis => sub {
+               OpenIsis::read( $db, $mfn++ % $rows + 1 );
+       },
+} );
+cmpthese $r;
+
index b4ea742..bd0e379 100755 (executable)
@@ -7,7 +7,8 @@ use IsisDB;
 use Data::Dumper;
 
 my $isis = IsisDB->new (
-       isisdb => '/data/isis_data/ps/LIBRI/LIBRI',
+       isisdb => shift @ARGV || '/data/isis_data/ps/LIBRI/LIBRI',
+       debug => shift @ARGV,
 );
 
 print "rows: ",$isis->{'maxmfn'},"\n\n";