use base 'CGI::Application';
use strict;
-use HTML::Pager;
use HTML::FillInForm;
-use SWISH;
+use SWISH::API;
use Text::Iconv;
use DBI;
+use Config::IniFiles;
+use Text::Unaccent;
+use Data::Pageset;
+use POSIX qw(locale_h);
use lib '..';
-use index_DBI;
+use index_DBI_filter;
use back2html;
-# configuration options
-# FIX: they really should go in configuration file!
-my $TEMPLATE_PATH = '/data/webpac/template_html';
-my $CHARSET = 'ISO-8859-2';
-my $SWISH = '/usr/bin/swish-e';
-my $INDEX = '/data/webpac/index/isis.index';
-my $MAX_HITS = 500;
-my $ON_PAGE = 10;
-Text::Iconv->raise_error(1); # Conversion errors raise exceptions
+# read global.conf configuration
+my $cfg_global = new Config::IniFiles( -file => '../global.conf' ) || die "can't open 'global.conf'";
+
+# configuration options from global.conf
+my $TEMPLATE_PATH = $cfg_global->val('webpac', 'template_html') || die "need template_html in global.conf, section webpac";
+my $CHARSET = $cfg_global->val('webpac', 'charset') || 'ISO-8859-1';
+my $SWISH = $cfg_global->val('webpac', 'swish') || '/usr/bin/swish-e';
+my $INDEX = $cfg_global->val('webpac', 'index') || die "need index in global.conf, section webpac";
+my $MAX_HITS = $cfg_global->val('webpac', 'max_hits') || 0;
+my $ON_PAGE =$cfg_global->val('webpac', 'on_page') || 10;
+my $MIN_WILDCARD =$cfg_global->val('webpac', 'min_wildcard') || 1;
+my $TEMPLATE =$cfg_global->val('webpac', 'template');
+my $UNAC_FILTER =$cfg_global->val('global', 'my_unac_filter');
+my $BASE_PATH =$cfg_global->val('webpac', 'base_path');
+# for pager
+my $pages_per_set = $cfg_global->val('webpac', 'pages_per_set') || 10;
+my $locale = $cfg_global->val('locale') || 'hr_HR';
+
+Text::Iconv->raise_error(0); # Conversion errors raise exceptions
my $from_utf8 = Text::Iconv->new('UTF8', $CHARSET);
+setlocale(LC_CTYPE, $locale);
+setlocale(LC_COLLATE, $locale);
+
+if ($UNAC_FILTER) {
+ require $UNAC_FILTER;
+} else {
+ sub WebPac::my_unac_string {
+ my ($charset, $string) = (@_);
+ return $string;
+ }
+}
+
+# use path from cgi script to support templates in subdirs
+sub url_ex {
+ my $q = shift || die "suff2file needs CGI object!";
+ my $tpl = shift || die "url_ex needs template name!";
+ return suff2file($BASE_PATH, $q->url(-absolute => 1,-path => 1),$TEMPLATE_PATH,$tpl);
+}
+
+sub suff2file($$$$) {
+ my ($base_path, $p, $path, $tpl) = @_;
+
+ return $tpl if (! $base_path);
+
+ #warn "base_path: $base_path, p: $p, path: $path, tpl: $tpl\n";
+
+ $p =~ s#/[^/]*$##;
+
+ # strip everything to and including base path, leaving only
+ # additional (virtual) path
+ if ($base_path eq "/") {
+ $p =~ s,/*,,g;
+ my ($name,$ext) = split(/\./,$tpl);
+ $p = $name . "-" . $p . "." . $ext;
+ } elsif ($p =~ s,^.*?$base_path,,) {
+ $p =~ s,/*,,g;
+ my ($name,$ext) = split(/\./,$tpl);
+ $p = $name . $p . "." . $ext;
+ } else {
+ # if unable reset it!
+ $p = $tpl;
+ }
+
+ if ( -e "$path/$p") {
+ return $p;
+ } else {
+ return $tpl;
+ }
+
+}
+
sub setup {
my $self = shift;
$self->tmpl_path($TEMPLATE_PATH);
$self->header_props(-charset=>$CHARSET);
}
+sub in_template {
+ my $q = shift || die "need CGI object!";
+ my $html = shift || die "This page is left unintentionally blank";
+ return $html if (! defined($TEMPLATE));
+
+ my ($dir,$tpl);
+ if ($TEMPLATE =~ m,^(.*?/*)([^/]+)$,) {
+ ($dir,$tpl) = ($1,$2);
+ } else {
+ die "can't parse TEMPLATE path";
+ }
+
+ my $master_tpl = suff2file($BASE_PATH, $q->url(-absolute => 1, -path => 1),$dir,$tpl);
+ if (open(T, $master_tpl)) {
+ my $template_html = join("\n",<T>);
+ close(T);
+ $template_html =~ s/##webpac##/$html/gsi;
+ return $template_html;
+ } else {
+ return "Can't read template '$master_tpl'";
+ }
+}
+
+#--------------------------------------------------------------------------
+
+#
+# make pager navigation and fill template variables
+# compatibile with HTML::Pager
+#
+
+sub make_pager($$$) {
+ my ($q,$tmpl,$pager) = @_;
+
+ #
+ # pager navigation
+ #
+ my ($pager_prev,$pager_next, $pager_jump) = ('','','');
+
+ sub url_with_params {
+ my ($q,$text) = @_;
+ my %param = $q->Vars;
+ my @p;
+ foreach my $p ( keys %param ) {
+ my $v = $param{$p};
+ next unless defined $v and length($v) > 0;
+ if ( $v =~ m{\0} ) {
+ push @p, $p . '=' . my_unac_string($CHARSET, $_)
+ foreach (split(/\0/, $v ));
+ } else {
+ push @p, $p . '=' . my_unac_string($CHARSET, $v);
+ }
+ }
+
+ return
+ qq{ <a href="}
+ . $q->url( -relative => 1 )
+ . '?'
+ . join(';', @p)
+ . qq{">$text</a> };
+ }
+
+ if ($pager->current_page() > $pager->first_page) {
+ $q->param('PAGER_offset', $pager->current_page - 1);
+ $pager_prev .= url_with_params( $q, '<<');
+ }
+
+ if ($pager->previous_set) {
+ $q->param('PAGER_offset', $pager->previous_set);
+ $pager_prev .= url_with_params( $q,'..');
+ }
+
+
+ foreach my $p (@{$pager->pages_in_set()}) {
+ next if ($p <= 0);
+ if($p == $pager->current_page()) {
+ $pager_jump .= "<b>$p</b> ";
+ } else {
+ $q->param('PAGER_offset', $p);
+ $pager_jump .= url_with_params($q,$p);
+ }
+ }
+
+ if ($pager->next_set) {
+ $q->param('PAGER_offset', $pager->next_set);
+ $pager_next .= url_with_params($q,'..');
+ }
+
+ if ($pager->current_page() < $pager->last_page) {
+ $q->param('PAGER_offset', $pager->current_page + 1);
+ $pager_next .= url_with_params($q,'>>');
+ }
+
+ $tmpl->param('PAGER_PREV', $pager_prev);
+ $tmpl->param('PAGER_JUMP', $pager_jump);
+ $tmpl->param('PAGER_NEXT', $pager_next);
+
+}
+
+#
+# put persisten variables in template
+#
+
+sub make_pager_vars {
+ my $q = shift @_;
+ my $tmpl = shift @_;
+ my @persist_vars = @_;
+ my $hidden_vars = '';
+ my $hidden_search = '';
+ foreach my $v (@persist_vars) {
+ foreach my $val ($q->param($v)) {
+ next if (! $val || $val eq '');
+ $val =~ s/"/"/g;
+ $hidden_vars .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n";
+ $hidden_search .= '<input type="hidden" name="'.$v.'" value="'.$val.'"/>'."\n" if ($v ne "rm");
+ }
+ }
+
+ $tmpl->param('PAGER_HIDDEN', $hidden_vars);
+ $tmpl->param('SEARCH_HIDDEN', $hidden_search);
+ $tmpl->param('PAGER_JAVASCRIPT', qq#
+<SCRIPT LANGUAGE="Javascript">
+<!-- Begin
+ // dummy emulator for HTML::Pager templates
+ function PAGER_set_offset_and_submit() {
+ return true;
+ }
+// End -->
+</script>
+ #);
+}
+
+#--------------------------------------------------------------------------
+
sub show_search_form {
my $self = shift;
# Get the CGI.pm query object
my $q = $self->query();
- my $tmpl = $self->load_tmpl('search.html');
+ my $tmpl = $self->load_tmpl(url_ex($q,'search.html'));
my $html = $tmpl->output;
my $fif = new HTML::FillInForm;
- return $fif->fill(scalarref => \$html, fobject => $q,
- target => 'search');
+ return in_template($q,$fif->fill(scalarref => \$html, fobject => $q,
+ target => 'search'));
}
sub show_results_list {
my $q = $self->query();
- my @swish_results; # results from swish
+ # submit was reset?
+ if ($q->param('reset')) {
+ $q->delete_all;
+ return $self->show_search_form();
+ }
# load template for this page
my @s_arr; # all queries are located here
- for(my $i = 1; $i <=10; $i++) {
+ my @path_arr = $q->param('path');
+ my $full = $q->param('full');
+
+ my @persist_vars = ( 'rm', 'persist_search' );
+ my $url_params = {
+ 'rm' => 'results',
+ 'show_full' => 1,
+ 'last_PAGER_offset' => ($q->param('PAGER_offset') || 0),
+ };
+
+ my @persist_search_vars;
+ my $url_params_persist = {};
+ if ($q->param("persist_search")) {
+ @persist_search_vars = split(/\s*,\s*/, $q->param("persist_search"));
+ $url_params_persist->{'persist_search'} = $q->url_param("persist_search");
+ $url_params->{'persist_search'} = $q->url_param("persist_search");
+ }
+
+ # support parametars "f" and "v" for start
+ for(my $i = 0; $i <=30; $i++) {
+
+ $i = '' if ($i == 0);
return show_index($self, $i) if ($q->param("f".$i."_index"));
+
+ next if (! $q->param("v$i") || $q->param("v$i") eq '');
next if (! $q->param("f$i"));
- next if (! $q->param("v$i"));
- # re-write query from +/- to and/and not
- my $s;
- my $search = $q->param("v$i");
- while ($search =~ s/\s*("[^"]+")\s*/ /) {
- $s .= "$1 ";
+ my $persist = grep(/^$i$/,@persist_search_vars);
+
+ push @persist_vars, "f$i";
+ push @persist_vars, "v$i";
+ push @persist_vars, "e$i" if ($q->param("e$i"));
+
+ # create url parametars (and persistent ones)
+
+ $url_params->{"f$i"} = $q->url_param("f$i");
+ $url_params_persist->{"f$i"} = $q->url_param("f$i") if ($persist);
+
+ my @v;
+
+ foreach my $v ($q->url_param("v$i")) {
+ # escape quotes so that phrase search work
+ $v =~ s/"/%22/g;
+ push @v, $v;
}
- $search =~ s/^\s+//;
- $search =~ s/\s+$//;
-
- foreach (split(/\s+/,$search)) {
- if (m/^([+-])(\S+)/) {
- $s.= ($s) ? "and " : "";
- $s.="not " if ($1 eq "-");
- $s.="$2* ";
- } else {
- $s.="$_* ";
+ $url_params->{"v$i"} = \@v;
+ $url_params_persist->{"v$i"} = \@v if ($persist);
+
+ if ($q->param("e$i")) {
+ $url_params->{"e$i"} = $q->url_param("e$i");
+# $url_params_persist->{"e$i"} = $q->url_param("e$i");
+ }
+
+ my $wc="*"; # swish wildcard
+ $wc="" if ($i eq ""); # don't apply wildcard on field 0
+
+ # re-write query from +/- to and/and not
+ my @param_vals = $q->param("v$i");
+ my @swish_q;
+ my ($pre,$post,$exact) = ('','','');
+ while (my $search = shift @param_vals) {
+ my $s;
+ # remove accents
+ $search = my_unac_string($CHARSET,$search);
+ while ($search =~ s/\s*("[^"]+")\s*/ /) {
+ $s .= "$1 ";
+ }
+ $search =~ s/^\s+//;
+ $search =~ s/\s+$//;
+
+ # filed e[nr] is exact match bitmask
+ # 1 = beginning, 2=end, 3=both
+ my $exact_flag = $q->param("e$i") || 0;
+ $pre = '"xxbxx ' if ($exact_flag & 1);
+ $post = ' xxexx"' if ($exact_flag & 2);
+ # add qotes on other side
+ if ($q->param("e$i")) {
+ $pre = '"' if (! $pre);
+ $post = '"' if (! $post);
+ # what about wildcards?
+ $wc = '';
+ $wc = '*' if ($q->param("e$i") & 4);
+ $exact = '_exact';
+ }
+
+ foreach (split(/\s+/,$search)) {
+ if (m/^([+-])(\S+)/) {
+ $s.= ($s) ? "and " : "";
+ $s.="not " if ($1 eq "-");
+ $s.=$2.$wc." ";
+ } elsif (m/^\s*(and|or|not)\s*$/i) {
+ $s.=$_." ";
+ # don't add * to words with less than x chars
+ } elsif (length($_) <= $MIN_WILDCARD) {
+ $s.=$_." ";
+ } else {
+ $s.=$_.$wc." ";
+ }
}
+ $s =~ s/\*+/*/g;
+ $s =~ s/[()]//g; # () are used in query language
+ $s = $pre.$s.$post if ($q->param("e$i"));
+ push @swish_q,$s;
}
+ # FIXME default operator for multi-value fields is or. There is
+ # no way to change it, except here for now. Is there need?
+ push @s_arr, $q->param("f$i")."_swish".$exact."=(".join(" or ",@swish_q).")";
+ }
+
+ my $tmpl = $self->load_tmpl(url_ex($q,'results.html'), global_vars => 1, die_on_bad_params => 0);
+
+ sub esc_html {
+ my $html = shift;
+ $html =~ s/</</g;
+ $html =~ s/>/>/g;
+ return $html;
+ }
+
+ my $sort = 'swishrank';
+ if ($q->param("sort")) {
+ $sort = 'headline';
+ push @persist_vars, "sort";
+ }
- push @s_arr,$q->param("f$i")."_swish=($s)";
+ my $sortby = $q->param("sortby");
+ if ($sortby) {
+ $sort = $sortby;
+ push @persist_vars, "sortby";
+ }
+ # used to filter entries in index and swish
+ my $filter = $q->param("filter");
+
+ # construct swish query
+ my $sw_q = join(" and ",@s_arr);
+ if (@path_arr && $q->param('show_full')) {
+ $sw_q .= " and (swishdocpath=\"";
+ $sw_q .= join("\" or swishdocpath=\"",@path_arr);
+ $sw_q .= "\")";
+ $tmpl->param('full',1); # show full records
+# } elsif (@path_arr && $#path_arr == 0) {
+# # I will assume that it's a filter since there isn't show_full
+# $filter = shift @path_arr;
+ } elsif ($q->param('show_full')) {
+ # just show full path, no path defined
+ $tmpl->param('full',1);
+ } else {
+ $tmpl->param('full',0);
+ }
+
+ if ($filter) {
+ $sw_q .= " and (swishdocpath=\"$filter\")" unless (@path_arr);
+ push @persist_vars, "filter";
+ $url_params->{'filter'} = $filter;
+ $url_params_persist->{'filter'} = $filter;
}
- my $tmpl = $self->load_tmpl('results.html');
+ my $swish_msg = ' ';
- # call swish
- my $sh = SWISH->connect('Fork',
- prog => $SWISH,
- indexes => $INDEX,
- properties => [qw/swishdocpath swishrank swishtitle headline html/],
- results => sub {
- my ($sh,$hit) = @_;
+ # create new swish instance
+ my $swish = SWISH::API->new($INDEX);
+ $swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;
- push @swish_results, {
- nr => ($#swish_results + 2),
- path => $hit->swishdocpath,
- headline => $from_utf8->convert($hit->headline),
- html => back2html($from_utf8->convert($hit->html)),
- rank => $hit->swishrank };
+ # execute query and get number of results from SWISH-E
+ my $search = $swish->New_Search_Object;
- },
- #startnum => 0,
- maxhits => $MAX_HITS,
- );
+ $search->SetSort($sort);
- die $SWISH::errstr unless $sh;
+ my $results = $search->Execute($sw_q);
+ $swish_msg .= $swish->ErrorString." ".$swish->LastErrorMsg if $swish->Error;
- my $hits = $sh->query(join(" and ",@s_arr)) || 0; # FIX: and/or
+ my $hits = $results->Hits;
$tmpl->param('hits',$hits);
- $tmpl->param('search',join(" and ",@s_arr));
-
- # create a Pager object
- my $pager = HTML::Pager->new(
- # required parameters
- query => $q,
- get_data_callback => sub {
- my ($offset, $rows) = @_;
-
- my @result;
- for (my $i=0; $i<$rows; $i++) {
- push @result, $swish_results[$offset+$i] if $swish_results[$offset+$i];
+ my $search_msg = $sw_q;
+ $search_msg .= '<em>'.$swish_msg.'</em>' if ($swish_msg);
+ $tmpl->param('search', $search_msg);
+
+ $tmpl->param('PAGER_offset',$q->param("PAGER_offset") || 0);
+ $tmpl->param('last_PAGER_offset',$q->param("last_PAGER_offset") || 0);
+
+ # URL parametars for search results
+ sub cook_url_params {
+ my $hash = shift || return;
+ return join("&", map {
+ my $var = $_;
+ if (ref($hash->{$var}) eq 'ARRAY') {
+ join('&',
+ map { $var.'='.$_ } @{$hash->{$var}}
+ );
+ } else {
+ $var."=".$hash->{$var};
}
- return \@result;
- },
- rows => $hits,
- page_size => $ON_PAGE,
- # some optional parameters
- persist_vars => [
- 'rm',
- 'f1', 'v1',
- 'f2', 'v2',
- 'f3', 'v3',
- 'f4', 'v4',
- 'f5', 'v5',
- 'f6', 'v6',
- 'f7', 'v7',
- 'f8', 'v8',
- 'f9', 'v9',
- ],
- #cell_space_color => '#000000',
- #cell_background_color => '#ffffff',
- #nav_background_color => '#dddddd',
- #javascript_presubmit => 'last_minute_javascript()',
- debug => 1,
- template => $tmpl,
- );
+ } keys %{$hash});
+ }
- my $html = $pager->output;
+ $tmpl->param('url_params',"?".cook_url_params($url_params));
+ $tmpl->param('url_params_paths',"?".cook_url_params($url_params).'&'.join("&",map { my $t = $_; $t =~ s/\#/%23/g; "path=$t"; } @path_arr));
- return $html;
+
+
+ #
+ # build pager
+ #
+
+ my $current_page = $q->param('PAGER_offset') || 1;
+
+ my $pager = Data::Pageset->new({
+ 'total_entries' => $hits,
+ 'entries_per_page' => $ON_PAGE,
+ 'current_page' => $current_page,
+ 'pages_per_set' => $pages_per_set,
+ });
+
+ $results->SeekResult( $pager->first - 1 );
+
+ # get number of entries on this page
+ my $i = $pager->entries_on_this_page;
+
+ # results from swish for template
+ my @pager_data_list;
+
+ for(my $i=$pager->first; $i<=$pager->last; $i++) {
+
+ my $result = $results->NextResult;
+ last if (! $result);
+
+ my $r = {
+ nr => $i,
+ path => $result->Property('swishdocpath'),
+ headline => esc_html($from_utf8->convert($result->Property('headline'))),
+ rank => $result->Property('swishrank')
+ };
+
+ #$r->{html} = back2html($from_utf8->convert($result->Property('html')), cook_url_params($url_params_persist)) if ($q->param('show_full'));
+ $r->{html} = back2html($from_utf8->convert($result->Property('html')), $filter ? 'filter='.$filter : '') if ($q->param('show_full'));
+
+ push @pager_data_list, $r;
+ }
+
+
+
+ # put something in template
+ make_pager($q, $tmpl, $pager);
+ make_pager_vars($q, $tmpl, @persist_vars);
+ $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
+
+ my $html = $tmpl->output;
+
+ return in_template($q,$html);
}
sub show_index {
my $field = $q->param("f$i");
my $limit = $q->param("v$i");
+ my $filter = $q->param("filter");
+
my $html;
- my $index = new index_DBI();
+ my $index = new index_DBI(
+ $cfg_global->val('global', 'dbi_dbd'),
+ $cfg_global->val('global', 'dbi_dsn'),
+ $cfg_global->val('global', 'dbi_user'),
+ $cfg_global->val('global', 'dbi_passwd') || ''
+ );
+
+ my $total = $index->count($field,$limit,$filter);
- my $total = $index->check($field);
- if (! $total) {
- my $tmpl = $self->load_tmpl('no_index.html');
+ if (! defined($total)) {
+ my $tmpl = $self->load_tmpl(url_ex($q,'no_index.html'));
$tmpl->param('field',$field);
$html = $tmpl->output;
return $html;
}
- my $tmpl = $self->load_tmpl('index_res.html');
+ my $tmpl = $self->load_tmpl(url_ex($q,'index_res.html'), global_vars => 1, die_on_bad_params => 0);
$tmpl->param('field',$field);
$tmpl->param('limit',$limit);
$tmpl->param('total',$total);
+ $tmpl->param('filter',$filter);
- my $pager = HTML::Pager->new(
- query => $q,
- get_data_callback => sub {
- my ($offset, $rows) = @_;
-
- my @result = $index->fetch($field,'item',$limit, $offset, $rows);
- return \@result;
- },
- rows => $total,
- page_size => $ON_PAGE,
- persist_vars => [
- 'rm',
- "f$i", "v$i", "f".$i."_index",
- 'offset',
- ],
- debug => 1,
- template => $tmpl,
- );
+# FIXME I should set offset and leave out limit from fetch!!
+# if (! $q->param("PAGER_offset") {
+# $q->param("Pager_offet)
+# }
+
+
+ #
+ # build pager
+ #
+ my $pager = Data::Pageset->new({
+ 'total_entries' => $total,
+ 'entries_per_page' => $ON_PAGE,
+ 'current_page' => $q->param('PAGER_offset') || 1,
+ 'pages_per_set' => $pages_per_set
+ });
+
+ my @persist_vars = qw{rm f$i v$i f$i_index offset};
+
+ make_pager($q, $tmpl, $pager);
+ make_pager_vars($q, $tmpl, @persist_vars);
+
+ my @pager_data_list = $index->fetch($field,$limit, $pager->first - 1, $pager->entries_on_this_page, $filter);
+ $tmpl->param('PAGER_DATA_LIST', \@pager_data_list);
- return $pager->output;
+ return in_template($q,$tmpl->output);
}
1;