# # this file implements index functions using DBI # and huge amounts of memory for cache speedup # # this version doesn't support ident (which sould be location in # library). But, that functionality is not used anyway... # package index_DBI; use strict qw(vars); use vars qw($Count); use HTML::Entities; use URI::Escape; use locale; use DBI; my %Table; # index tables which where visited in this run my %sth_cache; # cache prepared statements # cache var my $c_table; my $c_count; # bench time my $bench_time = time(); sub bench { my $self = shift; my $msg = shift; print STDERR "last operation took ",time()-$bench_time," seconds...\n"; $bench_time=time(); print STDERR "$msg\n"; } sub new { my $class = shift; my $self = {}; bless($self, $class); my $dbd = shift || die "need dbi_dbd= in [global] section of configuration file"; my $dsn = shift || die "need dbi_dsn= in [global] section of configuration file"; my $user = shift || die "need dbi_user= in [global] section of configuration file"; my $passwd = shift || die "need dbi_passwd= in [global] section of configuration file"; $self->{dbd} = $dbd; $self->{dbh} = DBI->connect("DBI:$dbd:$dsn",$user,$passwd) || die $DBI::errstr; $Count++; $self->bench("connected to $dbd as $user"); # force SQLite to support binary 0 in data (which shouldn't # happend, but it did to me) eval { no warnings 'all'; $self->{dbh}->{sqlite_handle_binary_nulls} = 1; }; return $self; } sub delete_and_create { my $self = shift; my $field = shift; #print "#### delete_and_create($field)\n"; my $sql = "select count(*) from $field"; my $sth = $self->{dbh}->prepare($sql); # FIX: this is not a good way to check if table exists! if ($sth && $sth->execute() && $sth->fetchrow_hashref) { my $sql = "drop table $field"; my $sth = $self->{dbh}->do($sql) || warn "SQL: $sql - ".$sth->errstr(); } $sql = "create table $field ( item varchar(255), display text, count int, ord int, primary key (item) )"; $sth = $self->{dbh}->do($sql) || warn "SQL: $sql ".$self->{dbh}->errstr(); } sub insert { my $self = shift; my $field = shift; my $index_data = shift || print STDERR "\$index->insert($field,NULL,...)"; my $display = shift || $index_data; if (! $index_data) { print STDERR "\$index->insert() -- no value to insert\n"; return; } $Table{$field}++; #$sth_cache{$field."select"}->execute($index_data) || die "cache: $field select; ".$self->{dbh}->errstr(); $index_data =~ s#&(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#$1#gi; my $uc = uc($index_data); if (! $c_table->{$field}->{$uc}) { #print stderr "in index: $index_data\n"; $c_table->{$field}->{$uc} = $index_data; $c_table->{$field}->{$uc}->{display} = $display; $c_count->{$field}->{$uc} = 1; } else { $c_count->{$field}->{$uc}++; } } sub count { my $self = shift; my $field = shift; my $where = shift; my $sql = "select count(*) from $field where upper(item) like upper(?)||'%'"; my $sth = $self->{dbh}->prepare($sql) || die $self->{dbh}->errstr(); $sth->execute($where) || die "sql: $sql; ".$self->{dbh}->errstr(); my ($total) = $sth->fetchrow_array(); # no results, count all if (! $total) { my $sql = "select count(*) from $field"; my $sth = $self->{dbh}->prepare($sql) || die $self->{dbh}->errstr(); $sth->execute() || die "sql: $sql; ".$self->{dbh}->errstr(); $total = $sth->fetchrow_array(); } return $total || 1; } sub fetch { my $self = shift; my $field = shift; my $where = shift; my $from_ord = shift || 0; my $rows = shift || 10; my @sql_args; my $sql = "select item,display,ord from $field"; if ($where) { my $sql2 = "select ord from $field where upper(item) like upper(?)||'%'"; my $sth = $self->{dbh}->prepare($sql2) || die "sql2: $sql2; ".$self->{dbh}->errstr(); $sth->execute($where) || die "sql2: $sql2; ".$self->{dbh}->errstr(); if (my $row = $sth->fetchrow_hashref) { $from_ord += $row->{ord} - 1; } else { # if no match is found when searching from beginning # of word in index, try substring match anywhere $sql2 = "select ord from $field where upper(item) like '% '||upper(?)||'%'"; $sth = $self->{dbh}->prepare($sql2) || die "sql2: $sql2; ".$self->{dbh}->errstr(); $sth->execute($where) || die "sql2: $sql2; ".$self->{dbh}->errstr(); if (my $row = $sth->fetchrow_hashref) { $from_ord += $row->{ord} - 1; } } } $sql .= " order by ord limit $rows offset $from_ord"; my $sth = $self->{dbh}->prepare($sql) || die "prepare: $sql; ".$self->{dbh}->errstr(); $sth->execute() || die "execute: $sql; ".$self->{dbh}->errstr(); my @arr; while (my $row = $sth->fetchrow_hashref) { $row->{item} = HTML::Entities::encode($row->{item},' <>&"'); $row->{display} = HTML::Entities::encode($row->{display},'<>&"'); $row->{item} =~ s#&(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#$1#gi; $row->{display} =~ s#&(\w)(acute|cedil|circ|grave|ring|slash|tilde|uml);#&$1$2;#gi; push @arr,$row; } return @arr; } sub close { my $self = shift; return if (! $self->{dbh}); foreach my $table (keys %Table) { $self->bench("Crating table $table"); $self->delete_and_create($table); $self->{dbh}->begin_work || die $self->{dbh}->errstr(); $self->bench("Sorting ".$Table{$table}." (with duplicates) items in $table"); my @keys = sort keys %{$c_table->{$table}}; $self->bench("Dumping ".($#keys+1)." items into $table"); my $sql = "insert into $table (ord,item,display,count) values (?,?,?,?)"; my $sth = $self->{dbh}->prepare($sql) || die "sql: $sql; ".$self->{dbh}->errstr(); my $ord = 0; foreach my $key (@keys) { $sth->execute(++$ord, $c_table->{$table}->{$key}, $c_table->{$table}->{$key}->{display}, $c_count->{$table}->{$key} ); } $self->{dbh}->commit || die $self->{dbh}->errstr(); } if ($self->{dbd} =~ m/(Pg|SQLite)/) { $self->{dbh}->do(qq{vacuum}) || warn "vacumming failed. It shouldn't if you are using PostgreSQL or SQLite: ".$self->{dbh}->errstr(); } $self->bench("disconnecting from database"); $self->{dbh}->disconnect; undef $self->{dbh}; } END { $Count--; print STDERR "index_DBI fatal error: \$index->close() not called... $Count references left!\n" if ($Count > 0); # FIX: debug output # print STDERR "usage\ttable\n"; # foreach (keys %Table) { # print STDERR $Table{$_},"\t$_\n"; # } } 1;