fixed parsing for science direct html with more than one <a href=> per one <tr>
[webpac] / index_DBI_cache.pm
index a798ce4..3c43bc7 100644 (file)
@@ -11,6 +11,7 @@ use strict qw(vars);
 use vars qw($Count);
 use HTML::Entities;
 use URI::Escape;
+use locale;
 
 use DBI;
 
@@ -43,11 +44,20 @@ sub new {
        my $user = shift || die "need dbi_user= in [global] section of configuration file";
        my $passwd = shift || die "need dbi_passwd= in [global] section of configuration file";
 
+       $self->{dbd} = $dbd;
+
        $self->{dbh} = DBI->connect("DBI:$dbd:$dsn",$user,$passwd) || die $DBI::errstr;
        $Count++;
 
        $self->bench("connected to $dbd as $user");
 
+       # force SQLite to support binary 0 in data (which shouldn't
+       # happend, but it did to me)
+       eval {
+               no warnings 'all';
+               $self->{dbh}->{sqlite_handle_binary_nulls} = 1;
+       };
+
        return $self;
 }
 
@@ -63,7 +73,7 @@ sub delete_and_create {
 # FIX: this is not a good way to check if table exists!
        if ($sth->execute() && $sth->fetchrow_hashref) {
                my $sql = "drop table $field";
-               my $sth = $self->{dbh}->do($sql) || die "SQL: $sql ".$self->{dbh}->errstr();
+               my $sth = $self->{dbh}->do($sql) || warn "SQL: $sql - ".$sth->errstr();
        }
        $sql = "create table $field (
                        item varchar(255),
@@ -160,7 +170,7 @@ sub fetch {
                } else {
                        # if no match is found when searching from beginning
                        # of word in index, try substring match anywhere
-                       $sql2 = "select ord from $field where upper(item) like '%'||upper(?)||'%'";
+                       $sql2 = "select ord from $field where upper(item) like '% '||upper(?)||'%'";
                        $sth = $self->{dbh}->prepare($sql2) || die "sql2: $sql2; ".$self->{dbh}->errstr();
                        $sth->execute($where) || die "sql2: $sql2; ".$self->{dbh}->errstr();
                        if (my $row = $sth->fetchrow_hashref) {
@@ -192,10 +202,10 @@ sub close {
 
                $self->{dbh}->begin_work || die $self->{dbh}->errstr();
 
-               $self->bench("Sorting ".$Table{$table}." items in $table");
+               $self->bench("Sorting ".$Table{$table}." (with duplicates) items in $table");
                my @keys = sort keys %{$c_table->{$table}};
 
-               $self->bench("Dumping data into $table");
+               $self->bench("Dumping ".($#keys+1)." items into $table");
                my $sql = "insert into $table (ord,item,display,count) values (?,?,?,?)";
                my $sth = $self->{dbh}->prepare($sql) || die "sql: $sql; ".$self->{dbh}->errstr();
 
@@ -210,6 +220,11 @@ sub close {
 
                $self->{dbh}->commit || die $self->{dbh}->errstr();
        }
+
+       if ($self->{dbd} =~ m/(Pg|SQLite)/) {
+               $self->{dbh}->do(qq{vacuum}) || warn "vacumming failed. It shouldn't if you are using PostgreSQL or SQLite: ".$self->{dbh}->errstr();
+       }
+
        $self->bench("disconnecting from database");
 
        $self->{dbh}->disconnect;