fixed parsing for science direct html with more than one <a href=> per one <tr>
[webpac] / index_DBI_cache.pm
index 3422069..3c43bc7 100644 (file)
@@ -44,11 +44,20 @@ sub new {
        my $user = shift || die "need dbi_user= in [global] section of configuration file";
        my $passwd = shift || die "need dbi_passwd= in [global] section of configuration file";
 
+       $self->{dbd} = $dbd;
+
        $self->{dbh} = DBI->connect("DBI:$dbd:$dsn",$user,$passwd) || die $DBI::errstr;
        $Count++;
 
        $self->bench("connected to $dbd as $user");
 
+       # force SQLite to support binary 0 in data (which shouldn't
+       # happend, but it did to me)
+       eval {
+               no warnings 'all';
+               $self->{dbh}->{sqlite_handle_binary_nulls} = 1;
+       };
+
        return $self;
 }
 
@@ -64,7 +73,7 @@ sub delete_and_create {
 # FIX: this is not a good way to check if table exists!
        if ($sth->execute() && $sth->fetchrow_hashref) {
                my $sql = "drop table $field";
-               my $sth = $self->{dbh}->do($sql) || die "SQL: $sql ".$self->{dbh}->errstr();
+               my $sth = $self->{dbh}->do($sql) || warn "SQL: $sql - ".$sth->errstr();
        }
        $sql = "create table $field (
                        item varchar(255),
@@ -193,10 +202,10 @@ sub close {
 
                $self->{dbh}->begin_work || die $self->{dbh}->errstr();
 
-               $self->bench("Sorting ".$Table{$table}." items in $table");
+               $self->bench("Sorting ".$Table{$table}." (with duplicates) items in $table");
                my @keys = sort keys %{$c_table->{$table}};
 
-               $self->bench("Dumping data into $table");
+               $self->bench("Dumping ".($#keys+1)." items into $table");
                my $sql = "insert into $table (ord,item,display,count) values (?,?,?,?)";
                my $sth = $self->{dbh}->prepare($sql) || die "sql: $sql; ".$self->{dbh}->errstr();
 
@@ -211,6 +220,11 @@ sub close {
 
                $self->{dbh}->commit || die $self->{dbh}->errstr();
        }
+
+       if ($self->{dbd} =~ m/(Pg|SQLite)/) {
+               $self->{dbh}->do(qq{vacuum}) || warn "vacumming failed. It shouldn't if you are using PostgreSQL or SQLite: ".$self->{dbh}->errstr();
+       }
+
        $self->bench("disconnecting from database");
 
        $self->{dbh}->disconnect;