added links from result headline to details, CSS style to make headline links
[webpac] / WebPac.pm
index 73080ae..7299822 100644 (file)
--- a/WebPac.pm
+++ b/WebPac.pm
@@ -8,24 +8,32 @@ use HTML::FillInForm;
 use SWISH;
 use Text::Iconv;
 use DBI;
+use Config::IniFiles;
+use Text::Unaccent;
 
 use lib '..';
 use index_DBI;
 use back2html;
 
-# configuration options
-# FIX: they really should go in configuration file!
-my $TEMPLATE_PATH = '/data/webpac/template_html';
-my $CHARSET = 'ISO-8859-2';
-my $SWISH = '/usr/bin/swish-e';
-my $INDEX = '/data/webpac/index/isis.index';
-my $MAX_HITS = 500;
-my $ON_PAGE = 10;
+
+# read global.conf configuration
+my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
+
+# configuration options from global.conf
+my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
+my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
+my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
+my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
+my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
+my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
+my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
+
 
 Text::Iconv->raise_error(0);     # Conversion errors raise exceptions
 
 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
 
+
 sub setup {
        my $self = shift;
        $self->tmpl_path($TEMPLATE_PATH);
@@ -67,36 +75,68 @@ sub show_results_list {
 
        my @s_arr;      # all queries are located here
 
-       for(my $i = 1; $i <=10; $i++) {
+       my @path_arr = $q->param('path');
+       my $full = $q->param('full');
+
+       my @persist_vars = ( 'rm' ); 
+       my @url_params = ( 'rm=results', 'show_full=1', 'last_PAGER_offset='.$q->param('PAGER_offset') || 0 );
+
+       for(my $i = 1; $i <=30; $i++) {
 
                return show_index($self, $i) if ($q->param("f".$i."_index"));
-               next if (! $q->param("f$i"));
+
                next if (! $q->param("v$i"));
+               next if (! $q->param("f$i"));
+
+               push @persist_vars, "f$i";
+               push @persist_vars, "v$i";
+
+               push @url_params,"f$i=".$q->url_param("f$i");
+               push @url_params,"v$i=".$q->url_param("v$i");
 
                # re-write query from +/- to and/and not
-               my $s;
-               my $search = $q->param("v$i");
-               while ($search =~ s/\s*("[^"]+")\s*/ /) {
-                       $s .= "$1 ";
-               }
-               $search =~ s/^\s+//;
-               $search =~ s/\s+$//;
-
-               foreach (split(/\s+/,$search)) {
-                       if (m/^([+-])(\S+)/) {
-                               $s.= ($s) ? "and " : "";
-                               $s.="not " if ($1 eq "-");
-                               $s.="$2* ";
-                       } else {
-                               $s.="$_* ";
+               my @param_vals = $q->param("v$i");
+               my @swish_q;
+               while (my $search = shift @param_vals) {
+                       my $s;
+                       # remove accents
+                       $search = unac_string($CHARSET,$search);
+                       while ($search =~ s/\s*("[^"]+")\s*/ /) {
+                               $s .= "$1 ";
                        }
+                       $search =~ s/^\s+//;
+                       $search =~ s/\s+$//;
+
+                       foreach (split(/\s+/,$search)) {
+                               if (m/^([+-])(\S+)/) {
+                                       $s.= ($s) ? "and " : "";
+                                       $s.="not " if ($1 eq "-");
+                                       $s.="$2* ";
+                               } elsif (m/^\s*(and|or|not)\s*$/i) {
+                                       $s.="$_ ";
+                               # don't add * to words with less than x chars
+                               } elsif (length($_) <= $MIN_WILDCARD) {
+                                       $s.="$_ ";
+                               } else {
+                                       $s.="$_* ";
+                               }
+                       }
+                       $s =~ s/\*+/*/g;
+                       push @swish_q,$s;
                }
-               $s =~ s/\*+/*/g;
-
-               push @s_arr,$q->param("f$i")."_swish=($s)";
+               # FIXME default operator for multi-value fields is or. There is
+               # no way to change it, except here for now. Is there need?
+               push @s_arr, $q->param("f$i")."_swish=(".join(" or ",@swish_q).")";
        }
 
-       my $tmpl = $self->load_tmpl('results.html');
+       my $tmpl = $self->load_tmpl('results.html', global_vars => 1);
+
+       sub esc_html {
+               my $html = shift;
+               $html =~ s/</&lt;/g;
+               $html =~ s/>/&gt;/g;
+               return $html;
+       }
 
        # call swish
        my $sh = SWISH->connect('Fork',
@@ -109,23 +149,36 @@ sub show_results_list {
                        push @swish_results, {
                                nr => ($#swish_results + 2),
                                path => $hit->swishdocpath,
-#                              headline => $from_utf8->convert($hit->headline),
-#                              html => back2html($from_utf8->convert($hit->html)),
-                               headline => $hit->headline,
-                               html => back2html($hit->html),
+                               headline => esc_html($from_utf8->convert($hit->headline)),
+                               html => back2html($from_utf8->convert($hit->html)),
                                rank => $hit->swishrank };
 
                },
                #startnum => 0,
-               maxhits => $MAX_HITS,
+               maxhits => $MAX_HITS
        );
 
        die $SWISH::errstr unless $sh;
+       # construct swish query
+       my $sw_q = join(" and ",@s_arr);
+       if (@path_arr && $q->param('show_full')) {
+               $sw_q .= "and (swishdocpath=\"";
+               $sw_q .= join("\" or swishdocpath=\"",@path_arr);
+               $sw_q .= "\")";
+               $tmpl->param('full',1); # show full records
+       } else {
+               $tmpl->param('full',0);
+       }
 
-       my $hits = $sh->query(join(" and ",@s_arr)) || 0;       # FIX: and/or
+       my $hits = $sh->query($sw_q);
 
        $tmpl->param('hits',$hits);
-       $tmpl->param('search',join(" and ",@s_arr));
+       $tmpl->param('search',$sw_q);
+
+       $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
+       $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
+
+       $tmpl->param('url_params',"?".join("&",@url_params));
 
        # create a Pager object
        my $pager = HTML::Pager->new(
@@ -136,25 +189,21 @@ sub show_results_list {
 
                        my @result;
                        for (my $i=0; $i<$rows; $i++) {
-                               push @result, $swish_results[$offset+$i] if $swish_results[$offset+$i];
+                               my $r = $swish_results[$offset+$i];
+                               if ($r && $tmpl->param('full')) {
+                                       push @result, $r;
+                               } elsif ($r) {
+                                       # if not full output, skip html
+                                       delete $r->{html};
+                                       push @result, $r;
+                               }
                        }
                        return \@result;
                },
                rows => $hits,
                page_size => $ON_PAGE,
                # some optional parameters
-               persist_vars => [
-                       'rm',
-                       'f1', 'v1',
-                       'f2', 'v2',
-                       'f3', 'v3',
-                       'f4', 'v4',
-                       'f5', 'v5',
-                       'f6', 'v6',
-                       'f7', 'v7',
-                       'f8', 'v8',
-                       'f9', 'v9',
-                       ],
+               persist_vars => [ @persist_vars ],
                #cell_space_color => '#000000',
                #cell_background_color => '#ffffff',
                #nav_background_color => '#dddddd',
@@ -179,7 +228,12 @@ sub show_index {
 
        my $html;
 
-       my $index = new index_DBI();
+       my $index = new index_DBI(
+               $cfg_global->val('global', 'dbi_dbd'),
+               $cfg_global->val('global', 'dbi_dsn'),
+               $cfg_global->val('global', 'dbi_user'),
+               $cfg_global->val('global', 'dbi_passwd') || ''
+       );
 
        my $total = $index->check($field);
        if (! $total) {
@@ -189,12 +243,12 @@ sub show_index {
                return $html;
        }
 
-       my $tmpl = $self->load_tmpl('index_res.html');
+       my $tmpl = $self->load_tmpl('index_res.html', global_vars => 1);
        $tmpl->param('field',$field);
        $tmpl->param('limit',$limit);
        $tmpl->param('total',$total);
 
-# FIX: I should set offset and leave out limit from fetch!!
+# FIXME I should set offset and leave out limit from fetch!!
 #      if (! $q->param("PAGER_offset") {
 #              $q->param("Pager_offet)
 #      }