C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30 use URI::Escape;
  31
  32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  33
  34 # set the version for version checking
  35 BEGIN {
  36     $VERSION = 3.01;
  37     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  38 }
  39
  40 =head1 NAME
  41
  42 C4::Search - Functions for searching the Koha catalog.
  43
  44 =head1 SYNOPSIS
  45
  46 See opac/opac-search.pl or catalogue/search.pl for example of usage
  47
  48 =head1 DESCRIPTION
  49
  50 This module provides searching functions for Koha's bibliographic databases
  51
  52 =head1 FUNCTIONS
  53
  54 =cut
  55
  56 @ISA    = qw(Exporter);
  57 @EXPORT = qw(
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 FindDuplicate
  69
  70 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  71
  72 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  73
  74 =cut
  75
  76 sub FindDuplicate {
  77     my ($record) = @_;
  78     my $dbh = C4::Context->dbh;
  79     my $result = TransformMarcToKoha( $dbh, $record, '' );
  80     my $sth;
  81     my $query;
  82     my $search;
  83     my $type;
  84     my ( $biblionumber, $title );
  85
  86     # search duplicate on ISBN, easy and fast..
  87     # ... normalize first
  88     if ( $result->{isbn} ) {
  89         $result->{isbn} =~ s/\(.*$//;
  90         $result->{isbn} =~ s/\s+$//;
  91         $query = "isbn=$result->{isbn}";
  92     }
  93     else {
  94         $result->{title} =~ s /\\//g;
  95         $result->{title} =~ s /\"//g;
  96         $result->{title} =~ s /\(//g;
  97         $result->{title} =~ s /\)//g;
  98
  99         # FIXME: instead of removing operators, could just do
 100         # quotes around the value
 101         $result->{title} =~ s/(and|or|not)//g;
 102         $query = "ti,ext=$result->{title}";
 103         $query .= " and itemtype=$result->{itemtype}"
 104           if ( $result->{itemtype} );
 105         if   ( $result->{author} ) {
 106             $result->{author} =~ s /\\//g;
 107             $result->{author} =~ s /\"//g;
 108             $result->{author} =~ s /\(//g;
 109             $result->{author} =~ s /\)//g;
 110
 111             # remove valid operators
 112             $result->{author} =~ s/(and|or|not)//g;
 113             $query .= " and au,ext=$result->{author}";
 114         }
 115     }
 116
 117     # FIXME: add error handling
 118     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 119     my @results;
 120     foreach my $possible_duplicate_record (@$searchresults) {
 121         my $marcrecord =
 122           MARC::Record->new_from_usmarc($possible_duplicate_record);
 123         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 124
 125         # FIXME :: why 2 $biblionumber ?
 126         if ($result) {
 127             push @results, $result->{'biblionumber'};
 128             push @results, $result->{'title'};
 129         }
 130     }
 131     return @results;
 132 }
 133
 134 =head2 SimpleSearch
 135
 136 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 137
 138 This function provides a simple search API on the bibliographic catalog
 139
 140 =over 2
 141
 142 =item C<input arg:>
 143
 144     * $query can be a simple keyword or a complete CCL query
 145     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 146     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 147     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 148
 149
 150 =item C<Output:>
 151
 152     * $error is a empty unless an error is detected
 153     * \@results is an array of records.
 154     * $total_hits is the number of hits that would have been returned with no limit
 155
 156 =item C<usage in the script:>
 157
 158 =back
 159
 160 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 161
 162 if (defined $error) {
 163     $template->param(query_error => $error);
 164     warn "error: ".$error;
 165     output_html_with_http_headers $input, $cookie, $template->output;
 166     exit;
 167 }
 168
 169 my $hits = scalar @$marcresults;
 170 my @results;
 171
 172 for my $i (0..$hits) {
 173     my %resultsloop;
 174     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 175     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 176
 177     #build the hash for the template.
 178     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 179     $resultsloop{title}           = $biblio->{'title'};
 180     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 181     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 182     $resultsloop{author}          = $biblio->{'author'};
 183     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 184     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 185
 186     push @results, \%resultsloop;
 187 }
 188
 189 $template->param(result=>\@results);
 190
 191 =cut
 192
 193 sub SimpleSearch {
 194     my ( $query, $offset, $max_results, $servers )  = @_;
 195
 196     if ( C4::Context->preference('NoZebra') ) {
 197         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 198         my $search_result =
 199           (      $result->{hits}
 200               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 201         return ( undef, $search_result, scalar($result->{hits}) );
 202     }
 203     else {
 204         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 205         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 206         my @results;
 207         my @zoom_queries;
 208         my @tmpresults;
 209         my @zconns;
 210         my $total_hits;
 211         return ( "No query entered", undef, undef ) unless $query;
 212
 213         # Initialize & Search Zebra
 214         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 215             eval {
 216                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 217                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 218                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 219
 220                 # error handling
 221                 my $error =
 222                     $zconns[$i]->errmsg() . " ("
 223                   . $zconns[$i]->errcode() . ") "
 224                   . $zconns[$i]->addinfo() . " "
 225                   . $zconns[$i]->diagset();
 226
 227                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 228             };
 229             if ($@) {
 230
 231                 # caught a ZOOM::Exception
 232                 my $error =
 233                     $@->message() . " ("
 234                   . $@->code() . ") "
 235                   . $@->addinfo() . " "
 236                   . $@->diagset();
 237                 warn $error;
 238                 return ( $error, undef, undef );
 239             }
 240         }
 241         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 242             my $event = $zconns[ $i - 1 ]->last_event();
 243             if ( $event == ZOOM::Event::ZEND ) {
 244
 245                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 246                 my $hits = $tmpresults[ $i - 1 ]->size();
 247                 $total_hits += $hits;
 248                 my $last_record = $hits;
 249                 if ( defined $max_results && $offset + $max_results < $hits ) {
 250                     $last_record  = $offset + $max_results;
 251                 }
 252
 253                 for my $j ( $first_record..$last_record ) {
 254                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 255                     push @results, $record;
 256                 }
 257             }
 258         }
 259
 260         foreach my $result (@tmpresults) {
 261             $result->destroy();
 262         }
 263         foreach my $zoom_query (@zoom_queries) {
 264             $zoom_query->destroy();
 265         }
 266
 267         return ( undef, \@results, $total_hits );
 268     }
 269 }
 270
 271 =head2 getRecords
 272
 273 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 274
 275         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 276         $results_per_page, $offset,       $expanded_facet, $branches,
 277         $query_type,       $scan
 278     );
 279
 280 The all singing, all dancing, multi-server, asynchronous, scanning,
 281 searching, record nabbing, facet-building
 282
 283 See verbse embedded documentation.
 284
 285 =cut
 286
 287 sub getRecords {
 288     my (
 289         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 290         $results_per_page, $offset,       $expanded_facet, $branches,
 291         $query_type,       $scan
 292     ) = @_;
 293
 294     my @servers = @$servers_ref;
 295     my @sort_by = @$sort_by_ref;
 296
 297     # Initialize variables for the ZOOM connection and results object
 298     my $zconn;
 299     my @zconns;
 300     my @results;
 301     my $results_hashref = ();
 302
 303     # Initialize variables for the faceted results objects
 304     my $facets_counter = ();
 305     my $facets_info    = ();
 306     my $facets         = getFacets();
 307
 308     my @facets_loop
 309       ;    # stores the ref to array of hashes for template facets loop
 310
 311     ### LOOP THROUGH THE SERVERS
 312     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 313         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 314
 315 # perform the search, create the results objects
 316 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 317         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 318
 319         #$query_to_use = $simple_query if $scan;
 320         warn $simple_query if ( $scan and $DEBUG );
 321
 322         # Check if we've got a query_type defined, if so, use it
 323         eval {
 324             if ($query_type) {
 325                 if ($query_type =~ /^ccl/) {
 326                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 327                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 328                 } elsif ($query_type =~ /^cql/) {
 329                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
 330                 } elsif ($query_type =~ /^pqf/) {
 331                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
 332                 } else {
 333                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 334                 }
 335             } elsif ($scan) {
 336                     $results[$i] = $zconns[$i]->scan(  new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 337             } else {
 338                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 339             }
 340         };
 341         if ($@) {
 342             warn "WARNING: query problem with $query_to_use " . $@;
 343         }
 344
 345         # Concatenate the sort_by limits and pass them to the results object
 346         # Note: sort will override rank
 347         my $sort_by;
 348         foreach my $sort (@sort_by) {
 349             if ( $sort eq "author_az" ) {
 350                 $sort_by .= "1=1003 <i ";
 351             }
 352             elsif ( $sort eq "author_za" ) {
 353                 $sort_by .= "1=1003 >i ";
 354             }
 355             elsif ( $sort eq "popularity_asc" ) {
 356                 $sort_by .= "1=9003 <i ";
 357             }
 358             elsif ( $sort eq "popularity_dsc" ) {
 359                 $sort_by .= "1=9003 >i ";
 360             }
 361             elsif ( $sort eq "call_number_asc" ) {
 362                 $sort_by .= "1=20  <i ";
 363             }
 364             elsif ( $sort eq "call_number_dsc" ) {
 365                 $sort_by .= "1=20 >i ";
 366             }
 367             elsif ( $sort eq "pubdate_asc" ) {
 368                 $sort_by .= "1=31 <i ";
 369             }
 370             elsif ( $sort eq "pubdate_dsc" ) {
 371                 $sort_by .= "1=31 >i ";
 372             }
 373             elsif ( $sort eq "acqdate_asc" ) {
 374                 $sort_by .= "1=32 <i ";
 375             }
 376             elsif ( $sort eq "acqdate_dsc" ) {
 377                 $sort_by .= "1=32 >i ";
 378             }
 379             elsif ( $sort eq "title_az" ) {
 380                 $sort_by .= "1=4 <i ";
 381             }
 382             elsif ( $sort eq "title_za" ) {
 383                 $sort_by .= "1=4 >i ";
 384             }
 385             else {
 386                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 387             }
 388         }
 389         if ($sort_by) {
 390             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 391                 warn "WARNING sort $sort_by failed";
 392             }
 393         }
 394     }    # finished looping through servers
 395
 396     # The big moment: asynchronously retrieve results from all servers
 397     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 398         my $ev = $zconns[ $i - 1 ]->last_event();
 399         if ( $ev == ZOOM::Event::ZEND ) {
 400             next unless $results[ $i - 1 ];
 401             my $size = $results[ $i - 1 ]->size();
 402             if ( $size > 0 ) {
 403                 my $results_hash;
 404
 405                 # loop through the results
 406                 $results_hash->{'hits'} = $size;
 407                 my $times;
 408                 if ( $offset + $results_per_page <= $size ) {
 409                     $times = $offset + $results_per_page;
 410                 }
 411                 else {
 412                     $times = $size;
 413                 }
 414                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 415                     my $records_hash;
 416                     my $record;
 417                     my $facet_record;
 418
 419                     ## Check if it's an index scan
 420                     if ($scan) {
 421                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 422
 423                  # here we create a minimal MARC record and hand it off to the
 424                  # template just like a normal result ... perhaps not ideal, but
 425                  # it works for now
 426                         my $tmprecord = MARC::Record->new();
 427                         $tmprecord->encoding('UTF-8');
 428                         my $tmptitle;
 429                         my $tmpauthor;
 430
 431                 # the minimal record in author/title (depending on MARC flavour)
 432                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 433                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 434                             $tmprecord->append_fields($tmptitle);
 435                         } else {
 436                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 437                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 438                             $tmprecord->append_fields($tmptitle);
 439                             $tmprecord->append_fields($tmpauthor);
 440                         }
 441                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 442                     }
 443
 444                     # not an index scan
 445                     else {
 446                         $record = $results[ $i - 1 ]->record($j)->raw();
 447
 448                         # warn "RECORD $j:".$record;
 449                         $results_hash->{'RECORDS'}[$j] = $record;
 450
 451             # Fill the facets while we're looping, but only for the biblioserver
 452                         $facet_record = MARC::Record->new_from_usmarc($record)
 453                           if $servers[ $i - 1 ] =~ /biblioserver/;
 454
 455                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 456                         if ($facet_record) {
 457                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 458                                 ($facets->[$k]) or next;
 459                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 460                                 for my $field (@fields) {
 461                                     my @subfields = $field->subfields();
 462                                     for my $subfield (@subfields) {
 463                                         my ( $code, $data ) = @$subfield;
 464                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 465                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 466                                     }
 467                                 }
 468                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 469                                     $facets->[$k]->{'label_value'};
 470                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 471                                     $facets->[$k]->{'expanded'};
 472                             }
 473                         }
 474                     }
 475                 }
 476                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 477             }
 478
 479             # warn "connection ", $i-1, ": $size hits";
 480             # warn $results[$i-1]->record(0)->render() if $size > 0;
 481
 482             # BUILD FACETS
 483             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 484                 for my $link_value (
 485                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 486                         keys %$facets_counter )
 487                 {
 488                     my $expandable;
 489                     my $number_of_facets;
 490                     my @this_facets_array;
 491                     for my $one_facet (
 492                         sort {
 493                              $facets_counter->{$link_value}->{$b}
 494                          <=> $facets_counter->{$link_value}->{$a}
 495                         } keys %{ $facets_counter->{$link_value} }
 496                       )
 497                     {
 498                         $number_of_facets++;
 499                         if (   ( $number_of_facets < 6 )
 500                             || ( $expanded_facet eq $link_value )
 501                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 502                         {
 503
 504                       # Sanitize the link value ), ( will cause errors with CCL,
 505                             my $facet_link_value = $one_facet;
 506                             $facet_link_value =~ s/(\(|\))/ /g;
 507
 508                             # fix the length that will display in the label,
 509                             my $facet_label_value = $one_facet;
 510                             $facet_label_value =
 511                               substr( $one_facet, 0, 20 ) . "..."
 512                               unless length($facet_label_value) <= 20;
 513
 514                             # if it's a branch, label by the name, not the code,
 515                             if ( $link_value =~ /branch/ ) {
 516                                 $facet_label_value =
 517                                   $branches->{$one_facet}->{'branchname'};
 518                             }
 519
 520                             # but we're down with the whole label being in the link's title.
 521                             push @this_facets_array, {
 522                                 facet_count       => $facets_counter->{$link_value}->{$one_facet},
 523                                 facet_label_value => $facet_label_value,
 524                                 facet_title_value => $one_facet,
 525                                 facet_link_value  => $facet_link_value,
 526                                 type_link_value   => $link_value,
 527                             };
 528                         }
 529                     }
 530
 531                     # handle expanded option
 532                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 533                         $expandable = 1
 534                           if ( ( $number_of_facets > 6 )
 535                             && ( $expanded_facet ne $link_value ) );
 536                     }
 537                     push @facets_loop, {
 538                         type_link_value => $link_value,
 539                         type_id         => $link_value . "_id",
 540                         "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 541                         facets     => \@this_facets_array,
 542                         expandable => $expandable,
 543                         expand     => $link_value,
 544                     } unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 545                 }
 546             }
 547         }
 548     }
 549     return ( undef, $results_hashref, \@facets_loop );
 550 }
 551
 552 sub pazGetRecords {
 553     my (
 554         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 555         $results_per_page, $offset,       $expanded_facet, $branches,
 556         $query_type,       $scan
 557     ) = @_;
 558
 559     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 560     $paz->init();
 561     $paz->search($simple_query);
 562     sleep 1;
 563
 564     # do results
 565     my $results_hashref = {};
 566     my $stats = XMLin($paz->stat);
 567     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 568
 569     # for a grouped search result, the number of hits
 570     # is the number of groups returned; 'bib_hits' will have
 571     # the total number of bibs.
 572     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 573     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 574
 575     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 576         my $recid = $hit->{recid}->[0];
 577
 578         my $work_title = $hit->{'md-work-title'}->[0];
 579         my $work_author;
 580         if (exists $hit->{'md-work-author'}) {
 581             $work_author = $hit->{'md-work-author'}->[0];
 582         }
 583         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 584
 585         my $result_group = {};
 586         $result_group->{'group_label'} = $group_label;
 587         $result_group->{'group_merge_key'} = $recid;
 588
 589         my $count = 1;
 590         if (exists $hit->{count}) {
 591             $count = $hit->{count}->[0];
 592         }
 593         $result_group->{'group_count'} = $count;
 594
 595         for (my $i = 0; $i < $count; $i++) {
 596             # FIXME -- may need to worry about diacritics here
 597             my $rec = $paz->record($recid, $i);
 598             push @{ $result_group->{'RECORDS'} }, $rec;
 599         }
 600
 601         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 602     }
 603
 604     # pass through facets
 605     my $termlist_xml = $paz->termlist('author,subject');
 606     my $terms = XMLin($termlist_xml, forcearray => 1);
 607     my @facets_loop = ();
 608     #die Dumper($results);
 609 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 610 #        my @facets = ();
 611 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 612 #            push @facets, {
 613 #                facet_label_value => $facet->{'name'}->[0],
 614 #            };
 615 #        }
 616 #        push @facets_loop, ( {
 617 #            type_label => $list,
 618 #            facets => \@facets,
 619 #        } );
 620 #    }
 621
 622     return ( undef, $results_hashref, \@facets_loop );
 623 }
 624
 625 # STOPWORDS
 626 sub _remove_stopwords {
 627     my ( $operand, $index ) = @_;
 628     my @stopwords_removed;
 629
 630     # phrase and exact-qualified indexes shouldn't have stopwords removed
 631     if ( $index !~ m/phr|ext/ ) {
 632
 633 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 634 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 635 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 636 #       is a stopword, we'd get "çon" and wouldn't find anything...
 637                 foreach ( keys %{ C4::Context->stopwords } ) {
 638                         next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 639                         if ( my ($matched) = ($operand =~
 640                                 /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
 641                         {
 642                                 $operand =~ s/\Q$matched\E/ /gi;
 643                                 push @stopwords_removed, $_;
 644                         }
 645                 }
 646         }
 647     return ( $operand, \@stopwords_removed );
 648 }
 649
 650 # TRUNCATION
 651 sub _detect_truncation {
 652     my ( $operand, $index ) = @_;
 653     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 654         @regexpr );
 655     $operand =~ s/^ //g;
 656     my @wordlist = split( /\s/, $operand );
 657     foreach my $word (@wordlist) {
 658         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 659             push @rightlefttruncated, $word;
 660         }
 661         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 662             push @lefttruncated, $word;
 663         }
 664         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 665             push @righttruncated, $word;
 666         }
 667         elsif ( index( $word, "*" ) < 0 ) {
 668             push @nontruncated, $word;
 669         }
 670         else {
 671             push @regexpr, $word;
 672         }
 673     }
 674     return (
 675         \@nontruncated,       \@righttruncated, \@lefttruncated,
 676         \@rightlefttruncated, \@regexpr
 677     );
 678 }
 679
 680 # STEMMING
 681 sub _build_stemmed_operand {
 682     my ($operand) = @_;
 683     my $stemmed_operand;
 684
 685     # If operand contains a digit, it is almost certainly an identifier, and should
 686     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 687     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 688     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 689     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 690     return $operand if $operand =~ /\d/;
 691
 692 # FIXME: the locale should be set based on the user's language and/or search choice
 693     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 694
 695 # FIXME: these should be stored in the db so the librarian can modify the behavior
 696     $stemmer->add_exceptions(
 697         {
 698             'and' => 'and',
 699             'or'  => 'or',
 700             'not' => 'not',
 701         }
 702     );
 703     my @words = split( / /, $operand );
 704     my $stems = $stemmer->stem(@words);
 705     for my $stem (@$stems) {
 706         $stemmed_operand .= "$stem";
 707         $stemmed_operand .= "?"
 708           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 709         $stemmed_operand .= " ";
 710     }
 711     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 712     return $stemmed_operand;
 713 }
 714
 715 # FIELD WEIGHTING
 716 sub _build_weighted_query {
 717
 718 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 719 # pretty well but could work much better if we had a smarter query parser
 720     my ( $operand, $stemmed_operand, $index ) = @_;
 721     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 722     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 723     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 724
 725     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 726
 727     # Keyword, or, no index specified
 728     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 729         $weighted_query .=
 730           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 731         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 732         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 733           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 734           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 735         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 736           if $fuzzy_enabled;    # add fuzzy, word list
 737         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 738           if ( $stemming and $stemmed_operand )
 739           ;                     # add stemming, right truncation
 740         $weighted_query .= " or wrdl,r9=\"$operand\"";
 741
 742         # embedded sorting: 0 a-z; 1 z-a
 743         # $weighted_query .= ") or (sort1,aut=1";
 744     }
 745
 746     # Barcode searches should skip this process
 747     elsif ( $index eq 'bc' ) {
 748         $weighted_query .= "bc=\"$operand\"";
 749     }
 750
 751     # Authority-number searches should skip this process
 752     elsif ( $index eq 'an' ) {
 753         $weighted_query .= "an=\"$operand\"";
 754     }
 755
 756     # If the index already has more than one qualifier, wrap the operand
 757     # in quotes and pass it back (assumption is that the user knows what they
 758     # are doing and won't appreciate us mucking up their query
 759     elsif ( $index =~ ',' ) {
 760         $weighted_query .= " $index=\"$operand\"";
 761     }
 762
 763     #TODO: build better cases based on specific search indexes
 764     else {
 765         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 766           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 767         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 768         $weighted_query .=
 769           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 770     }
 771
 772     $weighted_query .= "))";                       # close rank specification
 773     return $weighted_query;
 774 }
 775
 776 =head2 buildQuery
 777
 778 ( $error, $query,
 779 $simple_query, $query_cgi,
 780 $query_desc, $limit,
 781 $limit_cgi, $limit_desc,
 782 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 783
 784 Build queries and limits in CCL, CGI, Human,
 785 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 786
 787 See verbose embedded documentation.
 788
 789
 790 =cut
 791
 792 sub buildQuery {
 793     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 794
 795     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 796
 797     # dereference
 798     my @operators = $operators ? @$operators : ();
 799     my @indexes   = $indexes   ? @$indexes   : ();
 800     my @operands  = $operands  ? @$operands  : ();
 801     my @limits    = $limits    ? @$limits    : ();
 802     my @sort_by   = $sort_by   ? @$sort_by   : ();
 803
 804     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 805     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 806     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 807     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 808     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 809
 810     # no stemming/weight/fuzzy in NoZebra
 811     if ( C4::Context->preference("NoZebra") ) {
 812         $stemming      = 0;
 813         $weight_fields = 0;
 814         $fuzzy_enabled = 0;
 815     }
 816
 817     my $query        = $operands[0];
 818     my $simple_query = $operands[0];
 819
 820     # initialize the variables we're passing back
 821     my $query_cgi;
 822     my $query_desc;
 823     my $query_type;
 824
 825     my $limit;
 826     my $limit_cgi;
 827     my $limit_desc;
 828
 829     my $stopwords_removed;    # flag to determine if stopwords have been removed
 830
 831 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 832 # DIAGNOSTIC ONLY!!
 833     if ( $query =~ /^ccl=/ ) {
 834         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
 835     }
 836     if ( $query =~ /^cql=/ ) {
 837         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
 838     }
 839     if ( $query =~ /^pqf=/ ) {
 840         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
 841     }
 842
 843     # pass nested queries directly
 844     # FIXME: need better handling of some of these variables in this case
 845     if ( $query =~ /(\(|\))/ ) {
 846         return (
 847             undef,              $query, $simple_query, $query_cgi,
 848             $query,             $limit, $limit_cgi,    $limit_desc,
 849             $stopwords_removed, 'ccl'
 850         );
 851     }
 852
 853 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 854 # query operands and indexes and add stemming, truncation, field weighting, etc.
 855 # Once we do so, we'll end up with a value in $query, just like if we had an
 856 # incoming $query from the user
 857     else {
 858         $query = ""
 859           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 860         my $previous_operand
 861           ;    # a flag used to keep track if there was a previous query
 862                # if there was, we can apply the current operator
 863                # for every operand
 864         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 865
 866             # COMBINE OPERANDS, INDEXES AND OPERATORS
 867             if ( $operands[$i] ) {
 868
 869               # A flag to determine whether or not to add the index to the query
 870                 my $indexes_set;
 871
 872 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 873                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 874                     $weight_fields    = 0;
 875                     $stemming         = 0;
 876                     $remove_stopwords = 0;
 877                 }
 878                 my $operand = $operands[$i];
 879                 my $index   = $indexes[$i];
 880
 881                 # Add index-specific attributes
 882                 # Date of Publication
 883                 if ( $index eq 'yr' ) {
 884                     $index .= ",st-numeric";
 885 #                     $indexes_set++;
 886                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 887                 }
 888
 889                 # Date of Acquisition
 890                 elsif ( $index eq 'acqdate' ) {
 891                     $index .= ",st-date-normalized";
 892 #                     $indexes_set++;
 893                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 894                 }
 895                 # ISBN,ISSN,Standard Number, don't need special treatment
 896                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 897 #                     $indexes_set++;
 898                     (
 899                         $stemming,      $auto_truncation,
 900                         $weight_fields, $fuzzy_enabled,
 901                         $remove_stopwords
 902                     ) = ( 0, 0, 0, 0, 0 );
 903
 904                 }
 905                 # Set default structure attribute (word list)
 906                 my $struct_attr;
 907                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 908                     $struct_attr = ",wrdl";
 909                 }
 910
 911                 # Some helpful index variants
 912                 my $index_plus       = $index . $struct_attr . ":" if $index;
 913                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 914
 915                 # Remove Stopwords
 916                 if ($remove_stopwords) {
 917                     ( $operand, $stopwords_removed ) =
 918                       _remove_stopwords( $operand, $index );
 919                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 920                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 921                       if ( $stopwords_removed && $DEBUG );
 922                 }
 923
 924                 if ($auto_truncation){
 925                                         #FIXME only valid with LTR scripts
 926                                         $operand=join(" ",map{
 927                                                                                         "$_*"
 928                                                                              }split (/\s+/,$operand));
 929                         warn $operand if $DEBUG;
 930                                 }
 931
 932                 # Detect Truncation
 933                 my $truncated_operand;
 934                 my( $nontruncated, $righttruncated, $lefttruncated,
 935                     $rightlefttruncated, $regexpr
 936                 ) = _detect_truncation( $operand, $index );
 937                 warn
 938 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 939                   if $DEBUG;
 940
 941                 # Apply Truncation
 942                 if (
 943                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 944                     scalar(@$rightlefttruncated) > 0 )
 945                 {
 946
 947                # Don't field weight or add the index to the query, we do it here
 948                     $indexes_set = 1;
 949                     undef $weight_fields;
 950                     my $previous_truncation_operand;
 951                     if (scalar @$nontruncated) {
 952                         $truncated_operand .= "$index_plus @$nontruncated ";
 953                         $previous_truncation_operand = 1;
 954                     }
 955                     if (scalar @$righttruncated) {
 956                         $truncated_operand .= "and " if $previous_truncation_operand;
 957                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
 958                         $previous_truncation_operand = 1;
 959                     }
 960                     if (scalar @$lefttruncated) {
 961                         $truncated_operand .= "and " if $previous_truncation_operand;
 962                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
 963                         $previous_truncation_operand = 1;
 964                     }
 965                     if (scalar @$rightlefttruncated) {
 966                         $truncated_operand .= "and " if $previous_truncation_operand;
 967                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
 968                         $previous_truncation_operand = 1;
 969                     }
 970                 }
 971                 $operand = $truncated_operand if $truncated_operand;
 972                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
 973
 974                 # Handle Stemming
 975                 my $stemmed_operand;
 976                 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
 977
 978                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
 979
 980                 # Handle Field Weighting
 981                 my $weighted_operand;
 982                 if ($weight_fields) {
 983                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
 984                     $operand = $weighted_operand;
 985                     $indexes_set = 1;
 986                 }
 987
 988                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
 989
 990                 # If there's a previous operand, we need to add an operator
 991                 if ($previous_operand) {
 992
 993                     # User-specified operator
 994                     if ( $operators[ $i - 1 ] ) {
 995                         $query     .= " $operators[$i-1] ";
 996                         $query     .= " $index_plus " unless $indexes_set;
 997                         $query     .= " $operand";
 998                         $query_cgi .= "&op=$operators[$i-1]";
 999                         $query_cgi .= "&idx=$index" if $index;
1000                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1001                         $query_desc .=
1002                           " $operators[$i-1] $index_plus $operands[$i]";
1003                     }
1004
1005                     # Default operator is and
1006                     else {
1007                         $query      .= " and ";
1008                         $query      .= "$index_plus " unless $indexes_set;
1009                         $query      .= "$operand";
1010                         $query_cgi  .= "&op=and&idx=$index" if $index;
1011                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1012                         $query_desc .= " and $index_plus $operands[$i]";
1013                     }
1014                 }
1015
1016                 # There isn't a pervious operand, don't need an operator
1017                 else {
1018
1019                     # Field-weighted queries already have indexes set
1020                     $query .= " $index_plus " unless $indexes_set;
1021                     $query .= $operand;
1022                     $query_desc .= " $index_plus $operands[$i]";
1023                     $query_cgi  .= "&idx=$index" if $index;
1024                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1025                     $previous_operand = 1;
1026                 }
1027             }    #/if $operands
1028         }    # /for
1029     }
1030     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1031
1032     # add limits
1033     my $group_OR_limits;
1034     my $availability_limit;
1035     foreach my $this_limit (@limits) {
1036         if ( $this_limit =~ /available/ ) {
1037
1038 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1039 # In English:
1040 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1041             $availability_limit .=
1042 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1043             $limit_cgi  .= "&limit=available";
1044             $limit_desc .= "";
1045         }
1046
1047         # group_OR_limits, prefixed by mc-
1048         # OR every member of the group
1049         elsif ( $this_limit =~ /mc/ ) {
1050             $group_OR_limits .= " or " if $group_OR_limits;
1051             $limit_desc      .= " or " if $group_OR_limits;
1052             $group_OR_limits .= "$this_limit";
1053             $limit_cgi       .= "&limit=$this_limit";
1054             $limit_desc      .= " $this_limit";
1055         }
1056
1057         # Regular old limits
1058         else {
1059             $limit .= " and " if $limit || $query;
1060             $limit      .= "$this_limit";
1061             $limit_cgi  .= "&limit=$this_limit";
1062             if ($this_limit =~ /^branch:(.+)/) {
1063                 my $branchcode = $1;
1064                 my $branchname = GetBranchName($branchcode);
1065                 if (defined $branchname) {
1066                     $limit_desc .= " branch:$branchname";
1067                 } else {
1068                     $limit_desc .= " $this_limit";
1069                 }
1070             } else {
1071                 $limit_desc .= " $this_limit";
1072             }
1073         }
1074     }
1075     if ($group_OR_limits) {
1076         $limit .= " and " if ( $query || $limit );
1077         $limit .= "($group_OR_limits)";
1078     }
1079     if ($availability_limit) {
1080         $limit .= " and " if ( $query || $limit );
1081         $limit .= "($availability_limit)";
1082     }
1083
1084     # Normalize the query and limit strings
1085     $query =~ s/:/=/g;
1086     $limit =~ s/:/=/g;
1087     for ( $query, $query_desc, $limit, $limit_desc ) {
1088         s/  / /g;    # remove extra spaces
1089         s/^ //g;     # remove any beginning spaces
1090         s/ $//g;     # remove any ending spaces
1091         s/==/=/g;    # remove double == from query
1092     }
1093     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1094
1095     for ($query_cgi,$simple_query) {
1096         s/"//g;
1097     }
1098     # append the limit to the query
1099     $query .= " " . $limit;
1100
1101     # Warnings if DEBUG
1102     if ($DEBUG) {
1103         warn "QUERY:" . $query;
1104         warn "QUERY CGI:" . $query_cgi;
1105         warn "QUERY DESC:" . $query_desc;
1106         warn "LIMIT:" . $limit;
1107         warn "LIMIT CGI:" . $limit_cgi;
1108         warn "LIMIT DESC:" . $limit_desc;
1109         warn "---------\nLeave buildQuery\n---------";
1110     }
1111     return (
1112         undef,              $query, $simple_query, $query_cgi,
1113         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1114         $stopwords_removed, $query_type
1115     );
1116 }
1117
1118 =head2 searchResults
1119
1120 Format results in a form suitable for passing to the template
1121
1122 =cut
1123
1124 # IMO this subroutine is pretty messy still -- it's responsible for
1125 # building the HTML output for the template
1126 sub searchResults {
1127     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1128     my $dbh = C4::Context->dbh;
1129     my $even = 1;
1130     my @newresults;
1131
1132     # add search-term highlighting via <span>s on the search terms
1133     my $span_terms_hashref;
1134     for my $span_term ( split( / /, $searchdesc ) ) {
1135         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1136         $span_terms_hashref->{$span_term}++;
1137     }
1138
1139     #Build branchnames hash
1140     #find branchname
1141     #get branch information.....
1142     my %branches;
1143     my $bsth =
1144       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1145       ;    # FIXME : use C4::Koha::GetBranches
1146     $bsth->execute();
1147     while ( my $bdata = $bsth->fetchrow_hashref ) {
1148         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1149     }
1150 # FIXME - We build an authorised values hash here, using the default framework
1151 # though it is possible to have different authvals for different fws.
1152
1153     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1154
1155     # get notforloan authorised value list (see $shelflocations  FIXME)
1156     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1157
1158     #Build itemtype hash
1159     #find itemtype & itemtype image
1160     my %itemtypes;
1161     $bsth =
1162       $dbh->prepare(
1163         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1164       );
1165     $bsth->execute();
1166     while ( my $bdata = $bsth->fetchrow_hashref ) {
1167                 foreach (qw(description imageurl summary notforloan)) {
1168                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1169                 }
1170     }
1171
1172     #search item field code
1173     my $sth =
1174       $dbh->prepare(
1175 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1176       );
1177     $sth->execute;
1178     my ($itemtag) = $sth->fetchrow;
1179
1180     ## find column names of items related to MARC
1181     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1182     $sth2->execute;
1183     my %subfieldstosearch;
1184     while ( ( my $column ) = $sth2->fetchrow ) {
1185         my ( $tagfield, $tagsubfield ) =
1186           &GetMarcFromKohaField( "items." . $column, "" );
1187         $subfieldstosearch{$column} = $tagsubfield;
1188     }
1189
1190     # handle which records to actually retrieve
1191     my $times;
1192     if ( $hits && $offset + $results_per_page <= $hits ) {
1193         $times = $offset + $results_per_page;
1194     }
1195     else {
1196         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1197     }
1198
1199     # loop through all of the records we've retrieved
1200     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1201         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1202         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1203         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1204         $oldbiblio->{result_number} = $i + 1;
1205
1206         # add imageurl to itemtype if there is one
1207         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1208
1209         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1210         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1211         $aisbn =~ s/-//g;
1212         $oldbiblio->{amazonisbn} = $aisbn;
1213         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1214         $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1215         $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1216         $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1217         $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1218         $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1219
1220         # edition information, if any
1221         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1222         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1223  # Build summary if there is one (the summary is defined in the itemtypes table)
1224  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1225         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1226             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1227             my @fields  = $marcrecord->fields();
1228             foreach my $field (@fields) {
1229                 my $tag      = $field->tag();
1230                 my $tagvalue = $field->as_string();
1231                 if (! utf8::is_utf8($tagvalue)) {
1232                     utf8::decode($tagvalue);
1233                 }
1234
1235                 $summary =~
1236                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1237                 unless ( $tag < 10 ) {
1238                     my @subf = $field->subfields;
1239                     for my $i ( 0 .. $#subf ) {
1240                         my $subfieldcode  = $subf[$i][0];
1241                         my $subfieldvalue = $subf[$i][1];
1242                         if (! utf8::is_utf8($subfieldvalue)) {
1243                             utf8::decode($subfieldvalue);
1244                         }
1245                         my $tagsubf       = $tag . $subfieldcode;
1246                         $summary =~
1247 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1248                     }
1249                 }
1250             }
1251             # FIXME: yuk
1252             $summary =~ s/\[(.*?)]//g;
1253             $summary =~ s/\n/<br\/>/g;
1254             $oldbiblio->{summary} = $summary;
1255         }
1256
1257         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1258         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1259         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1260         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1261         # Add search-term highlighting to the whole record where they match using <span>s
1262         if (C4::Context->preference("OpacHighlightedWords")){
1263             my $searchhighlightblob;
1264             for my $highlight_field ( $marcrecord->fields ) {
1265
1266     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1267                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1268                 for my $subfield ($highlight_field->subfields()) {
1269                     my $match;
1270                     next if $subfield->[0] eq '9';
1271                     my $field = $subfield->[1];
1272                     for my $term ( keys %$span_terms_hashref ) {
1273                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1274                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1275                         $match++;
1276                         }
1277                     }
1278                     $searchhighlightblob .= $field . " ... " if $match;
1279                 }
1280
1281             }
1282             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1283             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1284         }
1285
1286         # Add search-term highlighting to the title, subtitle, etc. fields
1287         for my $term ( keys %$span_terms_hashref ) {
1288             my $old_term = $term;
1289             if ( length($term) > 3 ) {
1290                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1291                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1292                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1293                                 }
1294             }
1295         }
1296
1297         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1298
1299         # Pull out the items fields
1300         my @fields = $marcrecord->field($itemtag);
1301
1302         # Setting item statuses for display
1303         my @available_items_loop;
1304         my @onloan_items_loop;
1305         my @notforloan_items_loop;
1306         my @other_items_loop;
1307
1308         my $available_items;
1309         my $onloan_items;
1310         my $notforloan_items;
1311         my $other_items;
1312
1313         my $ordered_count         = 0;
1314         my $available_count       = 0;
1315         my $onloan_count          = 0;
1316         my $notforloan_count      = 0;
1317         my $longoverdue_count     = 0;
1318         my $other_count           = 0;
1319         my $wthdrawn_count        = 0;
1320         my $itemlost_count        = 0;
1321         my $itembinding_count     = 0;
1322         my $itemdamaged_count     = 0;
1323         my $item_in_transit_count = 0;
1324         my $can_place_holds       = 0;
1325         my $items_count           = scalar(@fields);
1326         my $maxitems =
1327           ( C4::Context->preference('maxItemsinSearchResults') )
1328           ? C4::Context->preference('maxItemsinSearchResults') - 1
1329           : 1;
1330
1331         # loop through every item
1332         foreach my $field (@fields) {
1333             my $item;
1334
1335             # populate the items hash
1336             foreach my $code ( keys %subfieldstosearch ) {
1337                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1338             }
1339                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1340                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1341             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1342             if ($item->{$hbranch}) {
1343                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1344             }
1345             elsif ($item->{$otherbranch}) {     # Last resort
1346                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1347             }
1348
1349             ($item->{'reserved'}) = C4::Reserves::CheckReserves($item->{itemnumber});
1350
1351                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1352 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1353             if ( $item->{onloan} or $item->{reserved} ) {
1354                 $onloan_count++;
1355                                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1356                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1357                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1358                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1359                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1360                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1361                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1362                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1363                 # if something's checked out and lost, mark it as 'long overdue'
1364                 if ( $item->{itemlost} ) {
1365                     $onloan_items->{$prefix}->{longoverdue}++;
1366                     $longoverdue_count++;
1367                 } else {        # can place holds as long as item isn't lost
1368                     $can_place_holds = 1;
1369                 }
1370             }
1371
1372          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1373             else {
1374
1375                 # item is on order
1376                 if ( $item->{notforloan} == -1 ) {
1377                     $ordered_count++;
1378                 }
1379
1380                 # is item in transit?
1381                 my $transfertwhen = '';
1382                 my ($transfertfrom, $transfertto);
1383
1384                 unless ($item->{wthdrawn}
1385                         || $item->{itemlost}
1386                         || $item->{damaged}
1387                         || $item->{notforloan}
1388                         || $items_count > 20) {
1389
1390                     # A couple heuristics to limit how many times
1391                     # we query the database for item transfer information, sacrificing
1392                     # accuracy in some cases for speed;
1393                     #
1394                     # 1. don't query if item has one of the other statuses
1395                     # 2. don't check transit status if the bib has
1396                     #    more than 20 items
1397                     #
1398                     # FIXME: to avoid having the query the database like this, and to make
1399                     #        the in transit status count as unavailable for search limiting,
1400                     #        should map transit status to record indexed in Zebra.
1401                     #
1402                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1403                 }
1404
1405                 # item is withdrawn, lost or damaged
1406                 if (   $item->{wthdrawn}
1407                     || $item->{itemlost}
1408                     || $item->{damaged}
1409                     || $item->{notforloan}
1410                     || $item->{reserved}
1411                     || ($transfertwhen ne ''))
1412                 {
1413                     $wthdrawn_count++        if $item->{wthdrawn};
1414                     $itemlost_count++        if $item->{itemlost};
1415                     $itemdamaged_count++     if $item->{damaged};
1416                     $item_in_transit_count++ if $transfertwhen ne '';
1417                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1418
1419                                         my $key = $prefix . $item->{status};
1420
1421                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1422                                             if($item->{notforloan} == 1){
1423                                                 $notforloan_items->{$key}->{$_} = $item->{$_};
1424                                             }else{
1425                            $other_items->{$key}->{$_} = $item->{$_};
1426                                             }
1427                                         }
1428                                         if($item->{notforloan} == 1){
1429                         $notforloan_count++;
1430
1431                         $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1432                                         $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1433                                         $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1434                                         $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1435                                         $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1436                                         $notforloan_items->{$key}->{barcode} = $item->{barcode};
1437                     }else{
1438                         $other_count++;
1439
1440                         $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1441                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1442                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1443                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1444                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1445                                         $other_items->{$key}->{barcode} = $item->{barcode};
1446                     }
1447
1448                 }
1449                 # item is available
1450                 else {
1451                     $can_place_holds = 1;
1452                     $available_count++;
1453                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1454                                         foreach (qw(branchname itemcallnumber barcode)) {
1455                         $available_items->{$prefix}->{$_} = $item->{$_};
1456                                         }
1457                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1458                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1459                 }
1460             }
1461         }    # notforloan, item level and biblioitem level
1462         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1463         $maxitems =
1464           ( C4::Context->preference('maxItemsinSearchResults') )
1465           ? C4::Context->preference('maxItemsinSearchResults') - 1
1466           : 1;
1467         for my $key ( sort keys %$onloan_items ) {
1468             (++$onloanitemscount > $maxitems) and last;
1469             push @onloan_items_loop, $onloan_items->{$key};
1470         }
1471         for my $key ( sort keys %$other_items ) {
1472             (++$otheritemscount > $maxitems) and last;
1473             push @other_items_loop, $other_items->{$key};
1474         }
1475         for my $key ( sort keys %$notforloan_items ) {
1476             (++$notforloanitemscount > $maxitems) and last;
1477             push @notforloan_items_loop, $notforloan_items->{$key};
1478         }
1479         for my $key ( sort keys %$available_items ) {
1480             (++$availableitemscount > $maxitems) and last;
1481             push @available_items_loop, $available_items->{$key}
1482         }
1483
1484         # XSLT processing of some stuff
1485         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1486             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1487                 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1488         }
1489
1490         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1491         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1492         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1493         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1494         $oldbiblio->{items_count}          = $items_count;
1495         $oldbiblio->{available_items_loop} = \@available_items_loop;
1496         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1497         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1498         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1499         $oldbiblio->{availablecount}       = $available_count;
1500         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1501         $oldbiblio->{onloancount}          = $onloan_count;
1502         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1503         $oldbiblio->{notforloancount}      = $notforloan_count;
1504         $oldbiblio->{othercount}           = $other_count;
1505         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1506         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1507         $oldbiblio->{itemlostcount}        = $itemlost_count;
1508         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1509         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1510         $oldbiblio->{orderedcount}         = $ordered_count;
1511         $oldbiblio->{isbn} =~
1512           s/-//g;    # deleting - in isbn to enable amazon content
1513         push( @newresults, $oldbiblio );
1514     }
1515     return @newresults;
1516 }
1517
1518 #----------------------------------------------------------------------
1519 #
1520 # Non-Zebra GetRecords#
1521 #----------------------------------------------------------------------
1522
1523 =head2 NZgetRecords
1524
1525   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1526
1527 =cut
1528
1529 sub NZgetRecords {
1530     my (
1531         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1532         $results_per_page, $offset,       $expanded_facet, $branches,
1533         $query_type,       $scan
1534     ) = @_;
1535     warn "query =$query" if $DEBUG;
1536     my $result = NZanalyse($query);
1537     warn "results =$result" if $DEBUG;
1538     return ( undef,
1539         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1540         undef );
1541 }
1542
1543 =head2 NZanalyse
1544
1545   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1546   the list is built from an inverted index in the nozebra SQL table
1547   note that title is here only for convenience : the sorting will be very fast when requested on title
1548   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1549
1550 =cut
1551
1552 sub NZanalyse {
1553     my ( $string, $server ) = @_;
1554 #     warn "---------"       if $DEBUG;
1555     warn " NZanalyse" if $DEBUG;
1556 #     warn "---------"       if $DEBUG;
1557
1558  # $server contains biblioserver or authorities, depending on what we search on.
1559  #warn "querying : $string on $server";
1560     $server = 'biblioserver' unless $server;
1561
1562 # if we have a ", replace the content to discard temporarily any and/or/not inside
1563     my $commacontent;
1564     if ( $string =~ /"/ ) {
1565         $string =~ s/"(.*?)"/__X__/;
1566         $commacontent = $1;
1567         warn "commacontent : $commacontent" if $DEBUG;
1568     }
1569
1570 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1571 # then, call again NZanalyse with $left and $right
1572 # (recursive until we find a leaf (=> something without and/or/not)
1573 # delete repeated operator... Would then go in infinite loop
1574     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1575     }
1576
1577     #process parenthesis before.
1578     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1579         my $left     = $1;
1580         my $right    = $4;
1581         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1582         warn
1583 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1584           if $DEBUG;
1585         my $leftresult = NZanalyse( $left, $server );
1586         if ($operator) {
1587             my $rightresult = NZanalyse( $right, $server );
1588
1589             # OK, we have the results for right and left part of the query
1590             # depending of operand, intersect, union or exclude both lists
1591             # to get a result list
1592             if ( $operator eq ' and ' ) {
1593                 return NZoperatorAND($leftresult,$rightresult);
1594             }
1595             elsif ( $operator eq ' or ' ) {
1596
1597                 # just merge the 2 strings
1598                 return $leftresult . $rightresult;
1599             }
1600             elsif ( $operator eq ' not ' ) {
1601                 return NZoperatorNOT($leftresult,$rightresult);
1602             }
1603         }
1604         else {
1605 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1606             return $leftresult;
1607         }
1608     }
1609     warn "string :" . $string if $DEBUG;
1610     my $left = "";
1611     my $right = "";
1612     my $operator = "";
1613     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1614         $left     = $1;
1615         $right    = $3;
1616         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1617     }
1618     warn "no parenthesis. left : $left operator: $operator right: $right"
1619       if $DEBUG;
1620
1621     # it's not a leaf, we have a and/or/not
1622     if ($operator) {
1623
1624         # reintroduce comma content if needed
1625         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1626         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1627         warn "node : $left / $operator / $right\n" if $DEBUG;
1628         my $leftresult  = NZanalyse( $left,  $server );
1629         my $rightresult = NZanalyse( $right, $server );
1630         warn " leftresult : $leftresult" if $DEBUG;
1631         warn " rightresult : $rightresult" if $DEBUG;
1632         # OK, we have the results for right and left part of the query
1633         # depending of operand, intersect, union or exclude both lists
1634         # to get a result list
1635         if ( $operator eq ' and ' ) {
1636             warn "NZAND";
1637             return NZoperatorAND($leftresult,$rightresult);
1638         }
1639         elsif ( $operator eq ' or ' ) {
1640
1641             # just merge the 2 strings
1642             return $leftresult . $rightresult;
1643         }
1644         elsif ( $operator eq ' not ' ) {
1645             return NZoperatorNOT($leftresult,$rightresult);
1646         }
1647         else {
1648
1649 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1650             die "error : operand unknown : $operator for $string";
1651         }
1652
1653         # it's a leaf, do the real SQL query and return the result
1654     }
1655     else {
1656         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1657         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1658         #remove trailing blank at the beginning
1659         $string =~ s/^ //g;
1660         warn "leaf:$string" if $DEBUG;
1661
1662         # parse the string in in operator/operand/value again
1663         my $left = "";
1664         my $operator = "";
1665         my $right = "";
1666         if ($string =~ /(.*)(>=|<=)(.*)/) {
1667             $left     = $1;
1668             $operator = $2;
1669             $right    = $3;
1670         } else {
1671             $left = $string;
1672         }
1673 #         warn "handling leaf... left:$left operator:$operator right:$right"
1674 #           if $DEBUG;
1675         unless ($operator) {
1676             if ($string =~ /(.*)(>|<|=)(.*)/) {
1677                 $left     = $1;
1678                 $operator = $2;
1679                 $right    = $3;
1680                 warn
1681     "handling unless (operator)... left:$left operator:$operator right:$right"
1682                 if $DEBUG;
1683             } else {
1684                 $left = $string;
1685             }
1686         }
1687         my $results;
1688
1689 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1690         $left =~ s/ .*$//;
1691
1692         # automatic replace for short operators
1693         $left = 'title'            if $left =~ '^ti$';
1694         $left = 'author'           if $left =~ '^au$';
1695         $left = 'publisher'        if $left =~ '^pb$';
1696         $left = 'subject'          if $left =~ '^su$';
1697         $left = 'koha-Auth-Number' if $left =~ '^an$';
1698         $left = 'keyword'          if $left =~ '^kw$';
1699         $left = 'itemtype'         if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1700         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1701         my $dbh = C4::Context->dbh;
1702         if ( $operator && $left ne 'keyword' ) {
1703             #do a specific search
1704             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1705             my $sth = $dbh->prepare(
1706 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1707             );
1708             warn "$left / $operator / $right\n" if $DEBUG;
1709
1710             # split each word, query the DB and build the biblionumbers result
1711             #sanitizing leftpart
1712             $left =~ s/^\s+|\s+$//;
1713             foreach ( split / /, $right ) {
1714                 my $biblionumbers;
1715                 $_ =~ s/^\s+|\s+$//;
1716                 next unless $_;
1717                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1718                 $sth->execute( $server, $left, $_ )
1719                   or warn "execute failed: $!";
1720                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1721
1722 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1723 # otherwise, fill the result
1724                     $biblionumbers .= $line
1725                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1726                     warn "result : $value "
1727                       . ( $right  =~ /\d/ ) . "=="
1728                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1729                 }
1730
1731 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1732                 if ($results) {
1733                     warn "NZAND" if $DEBUG;
1734                     $results = NZoperatorAND($biblionumbers,$results);
1735                 } else {
1736                     $results = $biblionumbers;
1737                 }
1738             }
1739         }
1740         else {
1741       #do a complete search (all indexes), if index='kw' do complete search too.
1742             my $sth = $dbh->prepare(
1743 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1744             );
1745
1746             # split each word, query the DB and build the biblionumbers result
1747             foreach ( split / /, $string ) {
1748                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1749                 warn "search on all indexes on $_" if $DEBUG;
1750                 my $biblionumbers;
1751                 next unless $_;
1752                 $sth->execute( $server, $_ );
1753                 while ( my $line = $sth->fetchrow ) {
1754                     $biblionumbers .= $line;
1755                 }
1756
1757 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1758                 if ($results) {
1759                     $results = NZoperatorAND($biblionumbers,$results);
1760                 }
1761                 else {
1762                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1763                     $results = $biblionumbers;
1764                 }
1765             }
1766         }
1767         warn "return : $results for LEAF : $string" if $DEBUG;
1768         return $results;
1769     }
1770     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1771 }
1772
1773 sub NZoperatorAND{
1774     my ($rightresult, $leftresult)=@_;
1775
1776     my @leftresult = split /;/, $leftresult;
1777     warn " @leftresult / $rightresult \n" if $DEBUG;
1778
1779     #             my @rightresult = split /;/,$leftresult;
1780     my $finalresult;
1781
1782 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1783 # the result is stored twice, to have the same weight for AND than OR.
1784 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1785 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1786     foreach (@leftresult) {
1787         my $value = $_;
1788         my $countvalue;
1789         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1790         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1791             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1792             $finalresult .=
1793                 "$value-$countvalue;$value-$countvalue;";
1794         }
1795     }
1796     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1797     return $finalresult;
1798 }
1799
1800 sub NZoperatorOR{
1801     my ($rightresult, $leftresult)=@_;
1802     return $rightresult.$leftresult;
1803 }
1804
1805 sub NZoperatorNOT{
1806     my ($leftresult, $rightresult)=@_;
1807
1808     my @leftresult = split /;/, $leftresult;
1809
1810     #             my @rightresult = split /;/,$leftresult;
1811     my $finalresult;
1812     foreach (@leftresult) {
1813         my $value=$_;
1814         $value=$1 if $value=~m/(.*)-\d+$/;
1815         unless ($rightresult =~ "$value-") {
1816             $finalresult .= "$_;";
1817         }
1818     }
1819     return $finalresult;
1820 }
1821
1822 =head2 NZorder
1823
1824   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1825
1826   TODO :: Description
1827
1828 =cut
1829
1830 sub NZorder {
1831     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1832     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1833
1834     # order title asc by default
1835     #     $ordering = '1=36 <i' unless $ordering;
1836     $results_per_page = 20 unless $results_per_page;
1837     $offset           = 0  unless $offset;
1838     my $dbh = C4::Context->dbh;
1839
1840     #
1841     # order by POPULARITY
1842     #
1843     if ( $ordering =~ /popularity/ ) {
1844         my %result;
1845         my %popularity;
1846
1847         # popularity is not in MARC record, it's builded from a specific query
1848         my $sth =
1849           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1850         foreach ( split /;/, $biblionumbers ) {
1851             my ( $biblionumber, $title ) = split /,/, $_;
1852             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1853             $sth->execute($biblionumber);
1854             my $popularity = $sth->fetchrow || 0;
1855
1856 # hint : the key is popularity.title because we can have
1857 # many results with the same popularity. In this case, sub-ordering is done by title
1858 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1859 # (un-frequent, I agree, but we won't forget anything that way ;-)
1860             $popularity{ sprintf( "%10d", $popularity ) . $title
1861                   . $biblionumber } = $biblionumber;
1862         }
1863
1864     # sort the hash and return the same structure as GetRecords (Zebra querying)
1865         my $result_hash;
1866         my $numbers = 0;
1867         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1868             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1869                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1870                   $result{ $popularity{$key} }->as_usmarc();
1871             }
1872         }
1873         else {                                    # sort popularity ASC
1874             foreach my $key ( sort ( keys %popularity ) ) {
1875                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1876                   $result{ $popularity{$key} }->as_usmarc();
1877             }
1878         }
1879         my $finalresult = ();
1880         $result_hash->{'hits'}         = $numbers;
1881         $finalresult->{'biblioserver'} = $result_hash;
1882         return $finalresult;
1883
1884         #
1885         # ORDER BY author
1886         #
1887     }
1888     elsif ( $ordering =~ /author/ ) {
1889         my %result;
1890         foreach ( split /;/, $biblionumbers ) {
1891             my ( $biblionumber, $title ) = split /,/, $_;
1892             my $record = GetMarcBiblio($biblionumber);
1893             my $author;
1894             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1895                 $author = $record->subfield( '200', 'f' );
1896                 $author = $record->subfield( '700', 'a' ) unless $author;
1897             }
1898             else {
1899                 $author = $record->subfield( '100', 'a' );
1900             }
1901
1902 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1903 # and we don't want to get only 1 result for each of them !!!
1904             $result{ $author . $biblionumber } = $record;
1905         }
1906
1907     # sort the hash and return the same structure as GetRecords (Zebra querying)
1908         my $result_hash;
1909         my $numbers = 0;
1910         if ( $ordering eq 'author_za' ) {    # sort by author desc
1911             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1912                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1913                   $result{$key}->as_usmarc();
1914             }
1915         }
1916         else {                               # sort by author ASC
1917             foreach my $key ( sort ( keys %result ) ) {
1918                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1919                   $result{$key}->as_usmarc();
1920             }
1921         }
1922         my $finalresult = ();
1923         $result_hash->{'hits'}         = $numbers;
1924         $finalresult->{'biblioserver'} = $result_hash;
1925         return $finalresult;
1926
1927         #
1928         # ORDER BY callnumber
1929         #
1930     }
1931     elsif ( $ordering =~ /callnumber/ ) {
1932         my %result;
1933         foreach ( split /;/, $biblionumbers ) {
1934             my ( $biblionumber, $title ) = split /,/, $_;
1935             my $record = GetMarcBiblio($biblionumber);
1936             my $callnumber;
1937             my ( $callnumber_tag, $callnumber_subfield ) =
1938               GetMarcFromKohaField( 'items.itemcallnumber','' );
1939             ( $callnumber_tag, $callnumber_subfield ) =
1940               GetMarcFromKohaField('biblioitems.callnumber','')
1941               unless $callnumber_tag;
1942             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1943                 $callnumber = $record->subfield( '200', 'f' );
1944             }
1945             else {
1946                 $callnumber = $record->subfield( '100', 'a' );
1947             }
1948
1949 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1950 # and we don't want to get only 1 result for each of them !!!
1951             $result{ $callnumber . $biblionumber } = $record;
1952         }
1953
1954     # sort the hash and return the same structure as GetRecords (Zebra querying)
1955         my $result_hash;
1956         my $numbers = 0;
1957         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1958             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1959                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1960                   $result{$key}->as_usmarc();
1961             }
1962         }
1963         else {                                     # sort by title ASC
1964             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1965                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1966                   $result{$key}->as_usmarc();
1967             }
1968         }
1969         my $finalresult = ();
1970         $result_hash->{'hits'}         = $numbers;
1971         $finalresult->{'biblioserver'} = $result_hash;
1972         return $finalresult;
1973     }
1974     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1975         my %result;
1976         foreach ( split /;/, $biblionumbers ) {
1977             my ( $biblionumber, $title ) = split /,/, $_;
1978             my $record = GetMarcBiblio($biblionumber);
1979             my ( $publicationyear_tag, $publicationyear_subfield ) =
1980               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1981             my $publicationyear =
1982               $record->subfield( $publicationyear_tag,
1983                 $publicationyear_subfield );
1984
1985 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1986 # and we don't want to get only 1 result for each of them !!!
1987             $result{ $publicationyear . $biblionumber } = $record;
1988         }
1989
1990     # sort the hash and return the same structure as GetRecords (Zebra querying)
1991         my $result_hash;
1992         my $numbers = 0;
1993         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
1994             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1995                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1996                   $result{$key}->as_usmarc();
1997             }
1998         }
1999         else {                                 # sort by pub year ASC
2000             foreach my $key ( sort ( keys %result ) ) {
2001                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2002                   $result{$key}->as_usmarc();
2003             }
2004         }
2005         my $finalresult = ();
2006         $result_hash->{'hits'}         = $numbers;
2007         $finalresult->{'biblioserver'} = $result_hash;
2008         return $finalresult;
2009
2010         #
2011         # ORDER BY title
2012         #
2013     }
2014     elsif ( $ordering =~ /title/ ) {
2015
2016 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2017         my %result;
2018         foreach ( split /;/, $biblionumbers ) {
2019             my ( $biblionumber, $title ) = split /,/, $_;
2020
2021 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2022 # and we don't want to get only 1 result for each of them !!!
2023 # hint & speed improvement : we can order without reading the record
2024 # so order, and read records only for the requested page !
2025             $result{ $title . $biblionumber } = $biblionumber;
2026         }
2027
2028     # sort the hash and return the same structure as GetRecords (Zebra querying)
2029         my $result_hash;
2030         my $numbers = 0;
2031         if ( $ordering eq 'title_az' ) {    # sort by title desc
2032             foreach my $key ( sort ( keys %result ) ) {
2033                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2034             }
2035         }
2036         else {                              # sort by title ASC
2037             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2038                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2039             }
2040         }
2041
2042         # limit the $results_per_page to result size if it's more
2043         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2044
2045         # for the requested page, replace biblionumber by the complete record
2046         # speed improvement : avoid reading too much things
2047         for (
2048             my $counter = $offset ;
2049             $counter <= $offset + $results_per_page ;
2050             $counter++
2051           )
2052         {
2053             $result_hash->{'RECORDS'}[$counter] =
2054               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2055         }
2056         my $finalresult = ();
2057         $result_hash->{'hits'}         = $numbers;
2058         $finalresult->{'biblioserver'} = $result_hash;
2059         return $finalresult;
2060     }
2061     else {
2062
2063 #
2064 # order by ranking
2065 #
2066 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2067         my %result;
2068         my %count_ranking;
2069         foreach ( split /;/, $biblionumbers ) {
2070             my ( $biblionumber, $title ) = split /,/, $_;
2071             $title =~ /(.*)-(\d)/;
2072
2073             # get weight
2074             my $ranking = $2;
2075
2076 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2077 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2078 # biblio N has ranking = 6
2079             $count_ranking{$biblionumber} += $ranking;
2080         }
2081
2082 # build the result by "inverting" the count_ranking hash
2083 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2084 #         warn "counting";
2085         foreach ( keys %count_ranking ) {
2086             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2087         }
2088
2089     # sort the hash and return the same structure as GetRecords (Zebra querying)
2090         my $result_hash;
2091         my $numbers = 0;
2092         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2093             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2094         }
2095
2096         # limit the $results_per_page to result size if it's more
2097         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2098
2099         # for the requested page, replace biblionumber by the complete record
2100         # speed improvement : avoid reading too much things
2101         for (
2102             my $counter = $offset ;
2103             $counter <= $offset + $results_per_page ;
2104             $counter++
2105           )
2106         {
2107             $result_hash->{'RECORDS'}[$counter] =
2108               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2109               if $result_hash->{'RECORDS'}[$counter];
2110         }
2111         my $finalresult = ();
2112         $result_hash->{'hits'}         = $numbers;
2113         $finalresult->{'biblioserver'} = $result_hash;
2114         return $finalresult;
2115     }
2116 }
2117
2118 =head2 enabled_staff_search_views
2119
2120 %hash = enabled_staff_search_views()
2121
2122 This function returns a hash that contains three flags obtained from the system
2123 preferences, used to determine whether a particular staff search results view
2124 is enabled.
2125
2126 =over 2
2127
2128 =item C<Output arg:>
2129
2130     * $hash{can_view_MARC} is true only if the MARC view is enabled
2131     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2132     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2133
2134 =item C<usage in the script:>
2135
2136 =back
2137
2138 $template->param ( C4::Search::enabled_staff_search_views );
2139
2140 =cut
2141
2142 sub enabled_staff_search_views
2143 {
2144         return (
2145                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2146                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2147                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2148         );
2149 }
2150
2151
2152 =head2 z3950_search_args
2153
2154 $arrayref = z3950_search_args($matchpoints)
2155
2156 This function returns an array reference that contains the search parameters to be
2157 passed to the Z39.50 search script (z3950_search.pl). The array elements
2158 are hash refs whose keys are name, value and encvalue, and whose values are the
2159 name of a search parameter, the value of that search parameter and the URL encoded
2160 value of that parameter.
2161
2162 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2163
2164 The search parameter values are obtained from the bibliographic record whose
2165 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2166
2167 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2168 a general purpose search argument. In this case, the returned array contains only
2169 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2170
2171 If a search parameter value is undefined or empty, it is not included in the returned
2172 array.
2173
2174 The returned array reference may be passed directly to the template parameters.
2175
2176 =over 2
2177
2178 =item C<Output arg:>
2179
2180     * $array containing hash refs as described above
2181
2182 =item C<usage in the script:>
2183
2184 =back
2185
2186 $data = Biblio::GetBiblioData($bibno);
2187 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2188
2189 *OR*
2190
2191 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2192
2193 =cut
2194
2195 sub z3950_search_args {
2196     my $bibrec = shift;
2197     $bibrec = { title => $bibrec } if !ref $bibrec;
2198     my $array = [];
2199     for my $field (qw/ lccn isbn issn title author dewey subject /)
2200     {
2201         my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2202         push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2203     }
2204     return $array;
2205 }
2206
2207
2208 END { }    # module clean-up code here (global destructor)
2209
2210 1;
2211 __END__
2212
2213 =head1 AUTHOR
2214
2215 Koha Developement team <info@koha.org>
2216
2217 =cut