remove all accents from pager links
[webpac] / WebPac.pm
index b36fee0..872aec0 100644 (file)
--- a/WebPac.pm
+++ b/WebPac.pm
@@ -3,16 +3,17 @@ package WebPac;
 use base 'CGI::Application';
 use strict;
 
-use HTML::Pager;
 use HTML::FillInForm;
-use SWISH;
+use SWISH::API;
 use Text::Iconv;
 use DBI;
 use Config::IniFiles;
 use Text::Unaccent;
+use Data::Pageset;
+use POSIX qw(locale_h);
 
 use lib '..';
-use index_DBI;
+use index_DBI_filter;
 use back2html;
 
 
@@ -27,12 +28,67 @@ my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.con
 my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
 my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
 my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
-
+my $TEMPLATE =$cfg_global->val('webpac', 'template');
+my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
+my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
+# for pager
+my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
+my $locale = $cfg_global->val('locale') || 'hr_HR';
 
 Text::Iconv->raise_error(0);     # Conversion errors raise exceptions
 
 my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
 
+setlocale(LC_CTYPE, $locale);
+setlocale(LC_COLLATE, $locale);
+
+if ($UNAC_FILTER) {
+       require $UNAC_FILTER;
+} else {
+       sub WebPac::my_unac_string {
+               my ($charset, $string) = (@_);
+               return $string;
+       }
+}
+
+# use path from cgi script to support templates in subdirs
+sub url_ex {
+       my $q = shift || die "suff2file needs CGI object!";
+       my $tpl = shift || die "url_ex needs template name!";
+       return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl);
+}
+
+sub suff2file($$$$) {
+       my ($base_path, $p, $path, $tpl) = @_;
+
+       return $tpl if (! $base_path);
+
+       #warn "base_path: $base_path, p: $p, path: $path, tpl: $tpl\n";
+
+       $p =~ s#/[^/]*$##;
+
+       # strip everything to and including base path, leaving only
+       # additional (virtual) path
+       if ($base_path eq "/") {
+               $p =~ s,/*,,g;
+               my ($name,$ext) = split(/\./,$tpl);
+               $p = $name . "-" . $p . "." . $ext;
+       } elsif ($p =~ s,^.*?$base_path,,) {
+               $p =~ s,/*,,g;
+               my ($name,$ext) = split(/\./,$tpl);
+               $p = $name . $p . "." . $ext;
+       } else {
+               # if unable reset it!
+               $p = $tpl;
+       }
+
+       if ( -e "$path/$p") {
+               return $p;
+       } else {
+               return $tpl;
+       }
+
+}
 
 sub setup {
        my $self = shift;
@@ -49,19 +105,152 @@ sub setup {
        $self->header_props(-charset=>$CHARSET);
 }
 
+sub in_template {
+       my $q = shift || die "need CGI object!";
+       my $html = shift || die "This page is left unintentionally blank";
+       return $html if (! defined($TEMPLATE));
+
+       my ($dir,$tpl);
+       if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) {
+               ($dir,$tpl) = ($1,$2);
+       } else {
+               die "can't parse TEMPLATE path";
+       }
+
+       my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl);
+       if (open(T, $master_tpl)) {
+               my $template_html = join("\n",<T>);
+               close(T);
+               $template_html =~ s/##webpac##/$html/gsi;
+               return $template_html;
+       } else {
+               return "Can't read template '$master_tpl'";
+       }
+}
+
+#--------------------------------------------------------------------------
+
+#
+# make pager navigation and fill template variables
+# compatibile with HTML::Pager
+#
+
+sub make_pager($$$) {
+       my ($q,$tmpl,$pager) = @_;
+
+       #
+       # pager navigation
+       #
+       my ($pager_prev,$pager_next, $pager_jump) = ('','','');
+
+       sub url_with_params {
+               my ($q,$text) = @_;
+               my %param = $q->Vars;
+               my @p;
+               foreach my $p ( keys %param ) {
+                       my $v = $param{$p};
+                       next unless defined $v and length($v) > 0;
+                       if ( $v =~ m{\0} ) {
+                               push @p, $p . '=' . my_unac_string($CHARSET, $_)
+                                       foreach (split(/\0/, $v ));
+                       } else {
+                               push @p, $p . '=' . my_unac_string($CHARSET, $v);
+                       }
+               }
+
+               return
+                 qq{ <a href="}
+               . $q->url( -relative => 1 )
+               . '?'
+               . join(';', @p)
+               . qq{">$text</a> };
+       }
+
+       if ($pager->current_page() > $pager->first_page) {
+               $q->param('PAGER_offset', $pager->current_page - 1);
+               $pager_prev .= url_with_params( $q, '&lt;&lt;');
+       }
+
+       if ($pager->previous_set) {
+               $q->param('PAGER_offset', $pager->previous_set);
+               $pager_prev .= url_with_params( $q,'..');
+       }
+
+
+       foreach my $p (@{$pager->pages_in_set()}) {
+               next if ($p <= 0);
+               if($p == $pager->current_page()) {
+                       $pager_jump .= "<b>$p</b> ";
+               } else {
+                       $q->param('PAGER_offset', $p);
+                       $pager_jump .= url_with_params($q,$p);
+               }
+       }
+
+       if ($pager->next_set) {
+               $q->param('PAGER_offset', $pager->next_set);
+               $pager_next .= url_with_params($q,'..');
+       }
+
+       if ($pager->current_page() < $pager->last_page) {
+               $q->param('PAGER_offset', $pager->current_page + 1);
+               $pager_next .= url_with_params($q,'&gt;&gt;');
+       }
+
+       $tmpl->param('PAGER_PREV', $pager_prev);
+       $tmpl->param('PAGER_JUMP', $pager_jump);
+       $tmpl->param('PAGER_NEXT', $pager_next);
+
+}
+
+#
+# put persisten variables in template
+#
+
+sub make_pager_vars {
+       my $q = shift @_;
+       my $tmpl = shift @_;
+       my @persist_vars = @_;
+       my $hidden_vars = '';
+       my $hidden_search = '';
+       foreach my $v (@persist_vars) {
+               foreach my $val ($q->param($v)) {
+                       next if (! $val || $val eq '');
+                       $val =~ s/"/&quot;/g;
+                       $hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
+                       $hidden_search .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n" if ($v ne "rm");
+               }
+       }
+
+       $tmpl->param('PAGER_HIDDEN', $hidden_vars);
+       $tmpl->param('SEARCH_HIDDEN', $hidden_search);
+       $tmpl->param('PAGER_JAVASCRIPT', qq#
+<SCRIPT LANGUAGE="Javascript">
+<!-- Begin
+       // dummy emulator for HTML::Pager templates
+       function PAGER_set_offset_and_submit() {
+               return true;
+       }
+// End -->
+</script>  
+       #);
+}
+
+#--------------------------------------------------------------------------
+
 sub show_search_form {
        my $self = shift;
 
        # Get the CGI.pm query object
        my $q = $self->query();
 
-       my $tmpl = $self->load_tmpl('search.html');
+       my $tmpl = $self->load_tmpl(url_ex($q,'search.html'));
        my $html = $tmpl->output;
 
        my $fif = new HTML::FillInForm;
 
-       return $fif->fill(scalarref => \$html, fobject => $q,
-               target => 'search');
+       return in_template($q,$fif->fill(scalarref => \$html, fobject => $q,
+               target => 'search'));
 }
  
 sub show_results_list {
@@ -69,7 +258,11 @@ sub show_results_list {
 
        my $q = $self->query();
 
-       my @swish_results;      # results from swish
+       # submit was reset?
+       if ($q->param('reset')) {
+               $q->delete_all;
+               return $self->show_search_form();
+       }
 
        # load template for this page
 
@@ -78,54 +271,114 @@ sub show_results_list {
        my @path_arr = $q->param('path');
        my $full = $q->param('full');
 
-       my @persist_vars = ( 'rm' ); 
+       my @persist_vars = ( 'rm', 'persist_search' );
+       my $url_params = {
+               'rm' => 'results',
+               'show_full' => 1,
+               'last_PAGER_offset' => ($q->param('PAGER_offset') || 0),
+       };
+
+       my @persist_search_vars;
+       my $url_params_persist = {};
+       if ($q->param("persist_search")) {
+               @persist_search_vars = split(/\s*,\s*/, $q->param("persist_search"));
+               $url_params_persist->{'persist_search'} = $q->url_param("persist_search");
+               $url_params->{'persist_search'} = $q->url_param("persist_search");
+       }
+
+       # support parametars "f" and "v" for start
+       for(my $i = 0; $i <=30; $i++) {
 
-       for(my $i = 1; $i <=30; $i++) {
+               $i = '' if ($i == 0);
 
                return show_index($self, $i) if ($q->param("f".$i."_index"));
 
-               next if (! $q->param("v$i"));
+               next if (! $q->param("v$i") || $q->param("v$i") eq '');
                next if (! $q->param("f$i"));
 
+               my $persist = grep(/^$i$/,@persist_search_vars);
+       
                push @persist_vars, "f$i";
                push @persist_vars, "v$i";
+               push @persist_vars, "e$i" if ($q->param("e$i"));
+
+               # create url parametars (and persistent ones)
+
+               $url_params->{"f$i"} = $q->url_param("f$i");
+               $url_params_persist->{"f$i"} = $q->url_param("f$i") if ($persist);
+
+               my @v;
+
+               foreach my $v ($q->url_param("v$i")) {
+                       # escape quotes so that phrase search work
+                       $v =~ s/"/%22/g;
+                       push @v, $v;
+               }
+               $url_params->{"v$i"} = \@v;
+               $url_params_persist->{"v$i"} = \@v if ($persist);
+
+               if ($q->param("e$i")) {
+                       $url_params->{"e$i"} = $q->url_param("e$i");
+#                      $url_params_persist->{"e$i"} = $q->url_param("e$i");
+               }
+
+               my $wc="*";     # swish wildcard
+               $wc="" if ($i eq "");   # don't apply wildcard on field 0
 
                # re-write query from +/- to and/and not
                my @param_vals = $q->param("v$i");
                my @swish_q;
+               my ($pre,$post,$exact) = ('','','');
                while (my $search = shift @param_vals) {
                        my $s;
                        # remove accents
-                       $search = unac_string($CHARSET,$search);
+                       $search = my_unac_string($CHARSET,$search);
                        while ($search =~ s/\s*("[^"]+")\s*/ /) {
                                $s .= "$1 ";
                        }
                        $search =~ s/^\s+//;
                        $search =~ s/\s+$//;
 
+                       # filed e[nr] is exact match bitmask
+                       # 1 = beginning, 2=end, 3=both
+                       my $exact_flag = $q->param("e$i") || 0;
+                       $pre = '"xxbxx ' if ($exact_flag & 1);
+                       $post = ' xxexx"' if ($exact_flag & 2);
+                       # add qotes on other side
+                       if ($q->param("e$i")) {
+                               $pre = '"' if (! $pre);
+                               $post = '"' if (! $post);
+                               # what about wildcards?
+                               $wc = '';
+                               $wc = '*' if ($q->param("e$i") & 4);
+                               $exact = '_exact';
+                       }
+
                        foreach (split(/\s+/,$search)) {
                                if (m/^([+-])(\S+)/) {
                                        $s.= ($s) ? "and " : "";
                                        $s.="not " if ($1 eq "-");
-                                       $s.="$2* ";
+                                       $s.=$2.$wc." ";
                                } elsif (m/^\s*(and|or|not)\s*$/i) {
-                                       $s.="$_ ";
+                                       $s.=$_." ";
                                # don't add * to words with less than x chars
                                } elsif (length($_) <= $MIN_WILDCARD) {
-                                       $s.="$_ ";
+                                       $s.=$_." ";
                                } else {
-                                       $s.="$_* ";
+                                       $s.=$_.$wc." ";
                                }
                        }
                        $s =~ s/\*+/*/g;
+                       $s =~ s/[()]//g;        # () are used in query language
+                       $s = $pre.$s.$post if ($q->param("e$i"));
                        push @swish_q,$s;
                }
                # FIXME default operator for multi-value fields is or. There is
                # no way to change it, except here for now. Is there need?
-               push @s_arr, $q->param("f$i")."_swish=(".join(" or ",@swish_q).")";
+               push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
        }
 
-       my $tmpl = $self->load_tmpl('results.html');
+       my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1, die_on_bad_params => 0);
 
        sub esc_html {
                my $html = shift;
@@ -134,81 +387,137 @@ sub show_results_list {
                return $html;
        }
 
-       # call swish
-       my $sh = SWISH->connect('Fork',
-               prog     => $SWISH,
-               indexes  => $INDEX,
-               properties  => [qw/swishdocpath swishrank swishtitle headline html/],
-               results  => sub {
-                       my ($sh,$hit) = @_;
-
-                       push @swish_results, {
-                               nr => ($#swish_results + 2),
-                               path => $hit->swishdocpath,
-                               headline => esc_html($from_utf8->convert($hit->headline)),
-                               html => back2html($from_utf8->convert($hit->html)),
-                               rank => $hit->swishrank };
-
-               },
-               #startnum => 0,
-               maxhits => $MAX_HITS
-       );
+       my $sort = 'swishrank';
+       if ($q->param("sort")) {
+               $sort = 'headline';
+               push @persist_vars, "sort";
+       }
+
+       my $sortby = $q->param("sortby");
+       if ($sortby) {
+               $sort = $sortby;
+               push @persist_vars, "sortby";
+       }
+       # used to filter entries in index and swish
+       my $filter = $q->param("filter");
 
-       die $SWISH::errstr unless $sh;
        # construct swish query
        my $sw_q = join(" and ",@s_arr);
        if (@path_arr && $q->param('show_full')) {
-               $sw_q .= "and (swishdocpath=\"";
+               $sw_q .= " and (swishdocpath=\"";
                $sw_q .= join("\" or swishdocpath=\"",@path_arr);
                $sw_q .= "\")";
                $tmpl->param('full',1); # show full records
+#      } elsif (@path_arr && $#path_arr == 0) {
+#              # I will assume that it's a filter since there isn't show_full
+#              $filter = shift @path_arr;
+       } elsif ($q->param('show_full')) {
+               # just show full path, no path defined
+               $tmpl->param('full',1);
        } else {
                $tmpl->param('full',0);
        }
 
-       my $hits = $sh->query($sw_q);
+       if ($filter) {
+               $sw_q .= " and (swishdocpath=\"$filter\")" unless (@path_arr);
+               push @persist_vars, "filter";
+               $url_params->{'filter'} = $filter;
+               $url_params_persist->{'filter'} = $filter;
+       }
+
+       my $swish_msg = ' ';
+
+       # create new swish instance
+       my $swish = SWISH::API->new($INDEX);
+       $swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;
+
+       # execute query and get number of results from SWISH-E
+       my $search = $swish->New_Search_Object;
+
+       $search->SetSort($sort);
+
+       my $results = $search->Execute($sw_q);
+       $swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;
+
+       my $hits = $results->Hits;
 
        $tmpl->param('hits',$hits);
-       $tmpl->param('search',$sw_q);
+       my $search_msg = $sw_q;
+       $search_msg .= '<em>'.$swish_msg.'</em>' if ($swish_msg);
+       $tmpl->param('search', $search_msg);
 
        $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
        $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
 
-       # create a Pager object
-       my $pager = HTML::Pager->new(
-               # required parameters
-               query => $q,
-               get_data_callback => sub {
-                       my ($offset, $rows) = @_;
-
-                       my @result;
-                       for (my $i=0; $i<$rows; $i++) {
-                               my $r = $swish_results[$offset+$i];
-                               if ($r && $tmpl->param('full')) {
-                                       push @result, $r;
-                               } elsif ($r) {
-                                       # if not full output, skip html
-                                       delete $r->{html};
-                                       push @result, $r;
-                               }
+       # URL parametars for search results
+       sub cook_url_params {
+               my $hash = shift || return;
+               return join("&", map {
+                       my $var = $_;
+                       if (ref($hash->{$var}) eq 'ARRAY') {
+                               join('&',
+                                       map { $var.'='.$_ } @{$hash->{$var}}
+                               );
+                       } else {
+                               $var."=".$hash->{$var};
                        }
-                       return \@result;
-               },
-               rows => $hits,
-               page_size => $ON_PAGE,
-               # some optional parameters
-               persist_vars => [ @persist_vars ],
-               #cell_space_color => '#000000',
-               #cell_background_color => '#ffffff',
-               #nav_background_color => '#dddddd',
-               #javascript_presubmit => 'last_minute_javascript()',
-               debug => 1,
-               template => $tmpl,
-       );
+               } keys %{$hash});
+       }
 
-       my $html = $pager->output;
+       $tmpl->param('url_params',"?".cook_url_params($url_params));
+       $tmpl->param('url_params_paths',"?".cook_url_params($url_params).'&'.join("&",map { my $t = $_; $t =~ s/\#/%23/g; "path=$t"; } @path_arr));
 
-       return $html;
+
+
+       #
+       # build pager
+       #
+
+       my $current_page = $q->param('PAGER_offset') || 1;
+
+       my $pager = Data::Pageset->new({
+               'total_entries' => $hits,
+               'entries_per_page' => $ON_PAGE,
+               'current_page' => $current_page,
+               'pages_per_set' => $pages_per_set,
+       });
+
+       $results->SeekResult( $pager->first - 1 );
+
+       # get number of entries on this page
+       my $i = $pager->entries_on_this_page;
+
+       # results from swish for template
+       my @pager_data_list;
+
+       for(my $i=$pager->first; $i<=$pager->last; $i++) {
+
+               my $result = $results->NextResult;
+               last if (! $result);
+
+               my $r = {
+                       nr => $i,
+                       path => $result->Property('swishdocpath'),
+                       headline => esc_html($from_utf8->convert($result->Property('headline'))),
+                       rank => $result->Property('swishrank')
+               };
+
+               #$r->{html} = back2html($from_utf8->convert($result->Property('html')), cook_url_params($url_params_persist)) if ($q->param('show_full'));
+               $r->{html} = back2html($from_utf8->convert($result->Property('html')), $filter ? 'filter='.$filter : '') if ($q->param('show_full'));
+
+               push @pager_data_list, $r;
+       }
+
+
+
+       # put something in template
+       make_pager($q, $tmpl, $pager);
+       make_pager_vars($q, $tmpl, @persist_vars);
+       $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
+
+       my $html = $tmpl->output;
+
+       return in_template($q,$html);
 }
  
 sub show_index {
@@ -220,6 +529,8 @@ sub show_index {
        my $field = $q->param("f$i");
        my $limit = $q->param("v$i");
 
+       my $filter = $q->param("filter");
+
        my $html;
 
        my $index = new index_DBI(
@@ -229,44 +540,46 @@ sub show_index {
                $cfg_global->val('global', 'dbi_passwd') || ''
        );
 
-       my $total = $index->check($field);
-       if (! $total) {
-               my $tmpl = $self->load_tmpl('no_index.html');
+       my $total = $index->count($field,$limit,$filter);
+
+       if (! defined($total)) {
+               my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html'));
                $tmpl->param('field',$field);
                $html = $tmpl->output;
                return $html;
        }
 
-       my $tmpl = $self->load_tmpl('index_res.html');
+       my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1, die_on_bad_params => 0);
        $tmpl->param('field',$field);
        $tmpl->param('limit',$limit);
        $tmpl->param('total',$total);
+       $tmpl->param('filter',$filter);
 
 # FIXME I should set offset and leave out limit from fetch!!
 #      if (! $q->param("PAGER_offset") {
 #              $q->param("Pager_offet)
 #      }
 
-       my $pager = HTML::Pager->new(
-               query => $q,
-               get_data_callback => sub {
-                       my ($offset, $rows) = @_;
-
-                       my @result = $index->fetch($field,'item',$limit, $offset, $rows);
-                       return \@result;
-               },
-               rows => $total,
-               page_size => $ON_PAGE,
-               persist_vars => [
-                       'rm', 
-                       "f$i", "v$i", "f".$i."_index", 
-                       'offset',
-                       ],
-               debug => 1,
-               template => $tmpl,
-       );
 
-       return $pager->output;
+       #
+       # build pager
+       #
+       my $pager = Data::Pageset->new({
+               'total_entries' => $total,
+               'entries_per_page' => $ON_PAGE,
+               'current_page' => $q->param('PAGER_offset') || 1,
+               'pages_per_set' => $pages_per_set
+       });
+
+       my @persist_vars = qw{rm f$i v$i f$i_index offset};
+
+       make_pager($q, $tmpl, $pager);
+       make_pager_vars($q, $tmpl, @persist_vars);
+
+       my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page, $filter);
+       $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
+
+       return in_template($q,$tmpl->output);
 }
 
 1;