C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30
  31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  32
  33 # set the version for version checking
  34 BEGIN {
  35     $VERSION = 3.01;
  36     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Search - Functions for searching the Koha catalog.
  42
  43 =head1 SYNOPSIS
  44
  45 See opac/opac-search.pl or catalogue/search.pl for example of usage
  46
  47 =head1 DESCRIPTION
  48
  49 This module provides searching functions for Koha's bibliographic databases
  50
  51 =head1 FUNCTIONS
  52
  53 =cut
  54
  55 @ISA    = qw(Exporter);
  56 @EXPORT = qw(
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 FindDuplicate
  68
  69 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  70
  71 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  72
  73 =cut
  74
  75 sub FindDuplicate {
  76     my ($record) = @_;
  77     my $dbh = C4::Context->dbh;
  78     my $result = TransformMarcToKoha( $dbh, $record, '' );
  79     my $sth;
  80     my $query;
  81     my $search;
  82     my $type;
  83     my ( $biblionumber, $title );
  84
  85     # search duplicate on ISBN, easy and fast..
  86     # ... normalize first
  87     if ( $result->{isbn} ) {
  88         $result->{isbn} =~ s/\(.*$//;
  89         $result->{isbn} =~ s/\s+$//;
  90         $query = "isbn=$result->{isbn}";
  91     }
  92     else {
  93         $result->{title} =~ s /\\//g;
  94         $result->{title} =~ s /\"//g;
  95         $result->{title} =~ s /\(//g;
  96         $result->{title} =~ s /\)//g;
  97
  98         # FIXME: instead of removing operators, could just do
  99         # quotes around the value
 100         $result->{title} =~ s/(and|or|not)//g;
 101         $query = "ti,ext=$result->{title}";
 102         $query .= " and itemtype=$result->{itemtype}"
 103           if ( $result->{itemtype} );
 104         if   ( $result->{author} ) {
 105             $result->{author} =~ s /\\//g;
 106             $result->{author} =~ s /\"//g;
 107             $result->{author} =~ s /\(//g;
 108             $result->{author} =~ s /\)//g;
 109
 110             # remove valid operators
 111             $result->{author} =~ s/(and|or|not)//g;
 112             $query .= " and au,ext=$result->{author}";
 113         }
 114     }
 115
 116     # FIXME: add error handling
 117     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 118     my @results;
 119     foreach my $possible_duplicate_record (@$searchresults) {
 120         my $marcrecord =
 121           MARC::Record->new_from_usmarc($possible_duplicate_record);
 122         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 123
 124         # FIXME :: why 2 $biblionumber ?
 125         if ($result) {
 126             push @results, $result->{'biblionumber'};
 127             push @results, $result->{'title'};
 128         }
 129     }
 130     return @results;
 131 }
 132
 133 =head2 SimpleSearch
 134
 135 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 136
 137 This function provides a simple search API on the bibliographic catalog
 138
 139 =over 2
 140
 141 =item C<input arg:>
 142
 143     * $query can be a simple keyword or a complete CCL query
 144     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 145     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 146     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 147
 148
 149 =item C<Output:>
 150
 151     * $error is a empty unless an error is detected
 152     * \@results is an array of records.
 153     * $total_hits is the number of hits that would have been returned with no limit
 154
 155 =item C<usage in the script:>
 156
 157 =back
 158
 159 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 160
 161 if (defined $error) {
 162     $template->param(query_error => $error);
 163     warn "error: ".$error;
 164     output_html_with_http_headers $input, $cookie, $template->output;
 165     exit;
 166 }
 167
 168 my $hits = scalar @$marcresults;
 169 my @results;
 170
 171 for my $i (0..$hits) {
 172     my %resultsloop;
 173     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 174     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 175
 176     #build the hash for the template.
 177     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 178     $resultsloop{title}           = $biblio->{'title'};
 179     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 180     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 181     $resultsloop{author}          = $biblio->{'author'};
 182     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 183     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 184
 185     push @results, \%resultsloop;
 186 }
 187
 188 $template->param(result=>\@results);
 189
 190 =cut
 191
 192 sub SimpleSearch {
 193     my ( $query, $offset, $max_results, $servers )  = @_;
 194
 195     if ( C4::Context->preference('NoZebra') ) {
 196         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 197         my $search_result =
 198           (      $result->{hits}
 199               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 200         return ( undef, $search_result, scalar($result->{hits}) );
 201     }
 202     else {
 203         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 204         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 205         my @results;
 206         my @zoom_queries;
 207         my @tmpresults;
 208         my @zconns;
 209         my $total_hits;
 210         return ( "No query entered", undef, undef ) unless $query;
 211
 212         # Initialize & Search Zebra
 213         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 214             eval {
 215                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 216                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 217                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 218
 219                 # error handling
 220                 my $error =
 221                     $zconns[$i]->errmsg() . " ("
 222                   . $zconns[$i]->errcode() . ") "
 223                   . $zconns[$i]->addinfo() . " "
 224                   . $zconns[$i]->diagset();
 225
 226                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 227             };
 228             if ($@) {
 229
 230                 # caught a ZOOM::Exception
 231                 my $error =
 232                     $@->message() . " ("
 233                   . $@->code() . ") "
 234                   . $@->addinfo() . " "
 235                   . $@->diagset();
 236                 warn $error;
 237                 return ( $error, undef, undef );
 238             }
 239         }
 240         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 241             my $event = $zconns[ $i - 1 ]->last_event();
 242             if ( $event == ZOOM::Event::ZEND ) {
 243
 244                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 245                 my $hits = $tmpresults[ $i - 1 ]->size();
 246                 $total_hits += $hits;
 247                 my $last_record = $hits;
 248                 if ( defined $max_results && $offset + $max_results < $hits ) {
 249                     $last_record  = $offset + $max_results;
 250                 }
 251
 252                 for my $j ( $first_record..$last_record ) {
 253                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 254                     push @results, $record;
 255                 }
 256             }
 257         }
 258
 259         foreach my $result (@tmpresults) {
 260             $result->destroy();
 261         }
 262         foreach my $zoom_query (@zoom_queries) {
 263             $zoom_query->destroy();
 264         }
 265
 266         return ( undef, \@results, $total_hits );
 267     }
 268 }
 269
 270 =head2 getRecords
 271
 272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 273
 274         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 275         $results_per_page, $offset,       $expanded_facet, $branches,
 276         $query_type,       $scan
 277     );
 278
 279 The all singing, all dancing, multi-server, asynchronous, scanning,
 280 searching, record nabbing, facet-building
 281
 282 See verbse embedded documentation.
 283
 284 =cut
 285
 286 sub getRecords {
 287     my (
 288         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 289         $results_per_page, $offset,       $expanded_facet, $branches,
 290         $query_type,       $scan
 291     ) = @_;
 292
 293     my @servers = @$servers_ref;
 294     my @sort_by = @$sort_by_ref;
 295
 296     # Initialize variables for the ZOOM connection and results object
 297     my $zconn;
 298     my @zconns;
 299     my @results;
 300     my $results_hashref = ();
 301
 302     # Initialize variables for the faceted results objects
 303     my $facets_counter = ();
 304     my $facets_info    = ();
 305     my $facets         = getFacets();
 306
 307     my @facets_loop
 308       ;    # stores the ref to array of hashes for template facets loop
 309
 310     ### LOOP THROUGH THE SERVERS
 311     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 312         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 313
 314 # perform the search, create the results objects
 315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 316         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 317
 318         #$query_to_use = $simple_query if $scan;
 319         warn $simple_query if ( $scan and $DEBUG );
 320
 321         # Check if we've got a query_type defined, if so, use it
 322         eval {
 323             if ($query_type)
 324             {
 325                 if ( $query_type =~ /^ccl/ ) {
 326                     $query_to_use =~
 327                       s/\:/\=/g;    # change : to = last minute (FIXME)
 328                     $results[$i] =
 329                       $zconns[$i]->search(
 330                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 331                       );
 332                 }
 333                 elsif ( $query_type =~ /^cql/ ) {
 334                     $results[$i] =
 335                       $zconns[$i]->search(
 336                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 337                 }
 338                 elsif ( $query_type =~ /^pqf/ ) {
 339                     $results[$i] =
 340                       $zconns[$i]->search(
 341                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 342                 }
 343             }
 344             else {
 345                 if ($scan) {
 346                     $results[$i] =
 347                       $zconns[$i]->scan(
 348                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 349                       );
 350                 }
 351                 else {
 352                     $results[$i] =
 353                       $zconns[$i]->search(
 354                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 355                       );
 356                 }
 357             }
 358         };
 359         if ($@) {
 360             warn "WARNING: query problem with $query_to_use " . $@;
 361         }
 362
 363         # Concatenate the sort_by limits and pass them to the results object
 364         # Note: sort will override rank
 365         my $sort_by;
 366         foreach my $sort (@sort_by) {
 367             if ( $sort eq "author_az" ) {
 368                 $sort_by .= "1=1003 <i ";
 369             }
 370             elsif ( $sort eq "author_za" ) {
 371                 $sort_by .= "1=1003 >i ";
 372             }
 373             elsif ( $sort eq "popularity_asc" ) {
 374                 $sort_by .= "1=9003 <i ";
 375             }
 376             elsif ( $sort eq "popularity_dsc" ) {
 377                 $sort_by .= "1=9003 >i ";
 378             }
 379             elsif ( $sort eq "call_number_asc" ) {
 380                 $sort_by .= "1=20  <i ";
 381             }
 382             elsif ( $sort eq "call_number_dsc" ) {
 383                 $sort_by .= "1=20 >i ";
 384             }
 385             elsif ( $sort eq "pubdate_asc" ) {
 386                 $sort_by .= "1=31 <i ";
 387             }
 388             elsif ( $sort eq "pubdate_dsc" ) {
 389                 $sort_by .= "1=31 >i ";
 390             }
 391             elsif ( $sort eq "acqdate_asc" ) {
 392                 $sort_by .= "1=32 <i ";
 393             }
 394             elsif ( $sort eq "acqdate_dsc" ) {
 395                 $sort_by .= "1=32 >i ";
 396             }
 397             elsif ( $sort eq "title_az" ) {
 398                 $sort_by .= "1=4 <i ";
 399             }
 400             elsif ( $sort eq "title_za" ) {
 401                 $sort_by .= "1=4 >i ";
 402             }
 403             else {
 404                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 405             }
 406         }
 407         if ($sort_by) {
 408             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 409                 warn "WARNING sort $sort_by failed";
 410             }
 411         }
 412     }    # finished looping through servers
 413
 414     # The big moment: asynchronously retrieve results from all servers
 415     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 416         my $ev = $zconns[ $i - 1 ]->last_event();
 417         if ( $ev == ZOOM::Event::ZEND ) {
 418             next unless $results[ $i - 1 ];
 419             my $size = $results[ $i - 1 ]->size();
 420             if ( $size > 0 ) {
 421                 my $results_hash;
 422
 423                 # loop through the results
 424                 $results_hash->{'hits'} = $size;
 425                 my $times;
 426                 if ( $offset + $results_per_page <= $size ) {
 427                     $times = $offset + $results_per_page;
 428                 }
 429                 else {
 430                     $times = $size;
 431                 }
 432                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 433                     my $records_hash;
 434                     my $record;
 435                     my $facet_record;
 436
 437                     ## Check if it's an index scan
 438                     if ($scan) {
 439                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 440
 441                  # here we create a minimal MARC record and hand it off to the
 442                  # template just like a normal result ... perhaps not ideal, but
 443                  # it works for now
 444                         my $tmprecord = MARC::Record->new();
 445                         $tmprecord->encoding('UTF-8');
 446                         my $tmptitle;
 447                         my $tmpauthor;
 448
 449                 # the minimal record in author/title (depending on MARC flavour)
 450                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 451                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 452                             $tmprecord->append_fields($tmptitle);
 453                         } else {
 454                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 455                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 456                             $tmprecord->append_fields($tmptitle);
 457                             $tmprecord->append_fields($tmpauthor);
 458                         }
 459                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 460                     }
 461
 462                     # not an index scan
 463                     else {
 464                         $record = $results[ $i - 1 ]->record($j)->raw();
 465
 466                         # warn "RECORD $j:".$record;
 467                         $results_hash->{'RECORDS'}[$j] = $record;
 468
 469             # Fill the facets while we're looping, but only for the biblioserver
 470                         $facet_record = MARC::Record->new_from_usmarc($record)
 471                           if $servers[ $i - 1 ] =~ /biblioserver/;
 472
 473                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 474                         if ($facet_record) {
 475                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 476                                 ($facets->[$k]) or next;
 477                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 478                                 for my $field (@fields) {
 479                                     my @subfields = $field->subfields();
 480                                     for my $subfield (@subfields) {
 481                                         my ( $code, $data ) = @$subfield;
 482                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 483                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 484                                     }
 485                                 }
 486                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 487                                     $facets->[$k]->{'label_value'};
 488                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 489                                     $facets->[$k]->{'expanded'};
 490                             }
 491                         }
 492                     }
 493                 }
 494                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 495             }
 496
 497             # warn "connection ", $i-1, ": $size hits";
 498             # warn $results[$i-1]->record(0)->render() if $size > 0;
 499
 500             # BUILD FACETS
 501             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 502                 for my $link_value (
 503                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 504                     keys %$facets_counter )
 505                 {
 506                     my $expandable;
 507                     my $number_of_facets;
 508                     my @this_facets_array;
 509                     for my $one_facet (
 510                         sort {
 511                             $facets_counter->{$link_value}
 512                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 513                         } keys %{ $facets_counter->{$link_value} }
 514                       )
 515                     {
 516                         $number_of_facets++;
 517                         if (   ( $number_of_facets < 6 )
 518                             || ( $expanded_facet eq $link_value )
 519                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 520                         {
 521
 522                       # Sanitize the link value ), ( will cause errors with CCL,
 523                             my $facet_link_value = $one_facet;
 524                             $facet_link_value =~ s/(\(|\))/ /g;
 525
 526                             # fix the length that will display in the label,
 527                             my $facet_label_value = $one_facet;
 528                             $facet_label_value =
 529                               substr( $one_facet, 0, 20 ) . "..."
 530                               unless length($facet_label_value) <= 20;
 531
 532                             # if it's a branch, label by the name, not the code,
 533                             if ( $link_value =~ /branch/ ) {
 534                                 $facet_label_value =
 535                                   $branches->{$one_facet}->{'branchname'};
 536                             }
 537
 538                 # but we're down with the whole label being in the link's title.
 539                             my $facet_title_value = $one_facet;
 540
 541                             push @this_facets_array,
 542                               (
 543                                 {
 544                                     facet_count =>
 545                                       $facets_counter->{$link_value}
 546                                       ->{$one_facet},
 547                                     facet_label_value => $facet_label_value,
 548                                     facet_title_value => $facet_title_value,
 549                                     facet_link_value  => $facet_link_value,
 550                                     type_link_value   => $link_value,
 551                                 },
 552                               );
 553                         }
 554                     }
 555
 556                     # handle expanded option
 557                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 558                         $expandable = 1
 559                           if ( ( $number_of_facets > 6 )
 560                             && ( $expanded_facet ne $link_value ) );
 561                     }
 562                     push @facets_loop,
 563                       (
 564                         {
 565                             type_link_value => $link_value,
 566                             type_id         => $link_value . "_id",
 567                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 568                             facets     => \@this_facets_array,
 569                             expandable => $expandable,
 570                             expand     => $link_value,
 571                         }
 572                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 573                 }
 574             }
 575         }
 576     }
 577     return ( undef, $results_hashref, \@facets_loop );
 578 }
 579
 580 sub pazGetRecords {
 581     my (
 582         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 583         $results_per_page, $offset,       $expanded_facet, $branches,
 584         $query_type,       $scan
 585     ) = @_;
 586
 587     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 588     $paz->init();
 589     $paz->search($simple_query);
 590     sleep 1;
 591
 592     # do results
 593     my $results_hashref = {};
 594     my $stats = XMLin($paz->stat);
 595     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 596
 597     # for a grouped search result, the number of hits
 598     # is the number of groups returned; 'bib_hits' will have
 599     # the total number of bibs.
 600     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 601     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 602
 603     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 604         my $recid = $hit->{recid}->[0];
 605
 606         my $work_title = $hit->{'md-work-title'}->[0];
 607         my $work_author;
 608         if (exists $hit->{'md-work-author'}) {
 609             $work_author = $hit->{'md-work-author'}->[0];
 610         }
 611         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 612
 613         my $result_group = {};
 614         $result_group->{'group_label'} = $group_label;
 615         $result_group->{'group_merge_key'} = $recid;
 616
 617         my $count = 1;
 618         if (exists $hit->{count}) {
 619             $count = $hit->{count}->[0];
 620         }
 621         $result_group->{'group_count'} = $count;
 622
 623         for (my $i = 0; $i < $count; $i++) {
 624             # FIXME -- may need to worry about diacritics here
 625             my $rec = $paz->record($recid, $i);
 626             push @{ $result_group->{'RECORDS'} }, $rec;
 627         }
 628
 629         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 630     }
 631
 632     # pass through facets
 633     my $termlist_xml = $paz->termlist('author,subject');
 634     my $terms = XMLin($termlist_xml, forcearray => 1);
 635     my @facets_loop = ();
 636     #die Dumper($results);
 637 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 638 #        my @facets = ();
 639 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 640 #            push @facets, {
 641 #                facet_label_value => $facet->{'name'}->[0],
 642 #            };
 643 #        }
 644 #        push @facets_loop, ( {
 645 #            type_label => $list,
 646 #            facets => \@facets,
 647 #        } );
 648 #    }
 649
 650     return ( undef, $results_hashref, \@facets_loop );
 651 }
 652
 653 # STOPWORDS
 654 sub _remove_stopwords {
 655     my ( $operand, $index ) = @_;
 656     my @stopwords_removed;
 657
 658     # phrase and exact-qualified indexes shouldn't have stopwords removed
 659     if ( $index !~ m/phr|ext/ ) {
 660
 661 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 662 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 663 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 664 #       is a stopword, we'd get "çon" and wouldn't find anything...
 665         foreach ( keys %{ C4::Context->stopwords } ) {
 666             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 667             if ( $operand =~
 668                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 669             {
 670                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 671                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 672                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 673                                 $operand =~ s/$1//gi;
 674                 push @stopwords_removed, $_;
 675             }
 676         }
 677     }
 678     return ( $operand, \@stopwords_removed );
 679 }
 680
 681 # TRUNCATION
 682 sub _detect_truncation {
 683     my ( $operand, $index ) = @_;
 684     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 685         @regexpr );
 686     $operand =~ s/^ //g;
 687     my @wordlist = split( /\s/, $operand );
 688     foreach my $word (@wordlist) {
 689         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 690             push @rightlefttruncated, $word;
 691         }
 692         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 693             push @lefttruncated, $word;
 694         }
 695         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 696             push @righttruncated, $word;
 697         }
 698         elsif ( index( $word, "*" ) < 0 ) {
 699             push @nontruncated, $word;
 700         }
 701         else {
 702             push @regexpr, $word;
 703         }
 704     }
 705     return (
 706         \@nontruncated,       \@righttruncated, \@lefttruncated,
 707         \@rightlefttruncated, \@regexpr
 708     );
 709 }
 710
 711 # STEMMING
 712 sub _build_stemmed_operand {
 713     my ($operand) = @_;
 714     my $stemmed_operand;
 715
 716     # If operand contains a digit, it is almost certainly an identifier, and should
 717     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 718     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 719     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 720     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 721     return $operand if $operand =~ /\d/;
 722
 723 # FIXME: the locale should be set based on the user's language and/or search choice
 724     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 725
 726 # FIXME: these should be stored in the db so the librarian can modify the behavior
 727     $stemmer->add_exceptions(
 728         {
 729             'and' => 'and',
 730             'or'  => 'or',
 731             'not' => 'not',
 732         }
 733     );
 734     my @words = split( / /, $operand );
 735     my $stems = $stemmer->stem(@words);
 736     for my $stem (@$stems) {
 737         $stemmed_operand .= "$stem";
 738         $stemmed_operand .= "?"
 739           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 740         $stemmed_operand .= " ";
 741     }
 742     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 743     return $stemmed_operand;
 744 }
 745
 746 # FIELD WEIGHTING
 747 sub _build_weighted_query {
 748
 749 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 750 # pretty well but could work much better if we had a smarter query parser
 751     my ( $operand, $stemmed_operand, $index ) = @_;
 752     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 753     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 754     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 755
 756     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 757
 758     # Keyword, or, no index specified
 759     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 760         $weighted_query .=
 761           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 762         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 763         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 764           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 765           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 766         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 767           if $fuzzy_enabled;    # add fuzzy, word list
 768         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 769           if ( $stemming and $stemmed_operand )
 770           ;                     # add stemming, right truncation
 771         $weighted_query .= " or wrdl,r9=\"$operand\"";
 772
 773         # embedded sorting: 0 a-z; 1 z-a
 774         # $weighted_query .= ") or (sort1,aut=1";
 775     }
 776
 777     # Barcode searches should skip this process
 778     elsif ( $index eq 'bc' ) {
 779         $weighted_query .= "bc=\"$operand\"";
 780     }
 781
 782     # Authority-number searches should skip this process
 783     elsif ( $index eq 'an' ) {
 784         $weighted_query .= "an=\"$operand\"";
 785     }
 786
 787     # If the index already has more than one qualifier, wrap the operand
 788     # in quotes and pass it back (assumption is that the user knows what they
 789     # are doing and won't appreciate us mucking up their query
 790     elsif ( $index =~ ',' ) {
 791         $weighted_query .= " $index=\"$operand\"";
 792     }
 793
 794     #TODO: build better cases based on specific search indexes
 795     else {
 796         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 797           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 798         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 799         $weighted_query .=
 800           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 801     }
 802
 803     $weighted_query .= "))";                       # close rank specification
 804     return $weighted_query;
 805 }
 806
 807 =head2 buildQuery
 808
 809 ( $error, $query,
 810 $simple_query, $query_cgi,
 811 $query_desc, $limit,
 812 $limit_cgi, $limit_desc,
 813 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 814
 815 Build queries and limits in CCL, CGI, Human,
 816 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 817
 818 See verbose embedded documentation.
 819
 820
 821 =cut
 822
 823 sub buildQuery {
 824     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 825
 826     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 827
 828     # dereference
 829     my @operators = @$operators if $operators;
 830     my @indexes   = @$indexes   if $indexes;
 831     my @operands  = @$operands  if $operands;
 832     my @limits    = @$limits    if $limits;
 833     my @sort_by   = @$sort_by   if $sort_by;
 834
 835     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 836     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 837     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 838     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 839     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 840
 841     # no stemming/weight/fuzzy in NoZebra
 842     if ( C4::Context->preference("NoZebra") ) {
 843         $stemming      = 0;
 844         $weight_fields = 0;
 845         $fuzzy_enabled = 0;
 846     }
 847
 848     my $query        = $operands[0];
 849     my $simple_query = $operands[0];
 850
 851     # initialize the variables we're passing back
 852     my $query_cgi;
 853     my $query_desc;
 854     my $query_type;
 855
 856     my $limit;
 857     my $limit_cgi;
 858     my $limit_desc;
 859
 860     my $stopwords_removed;    # flag to determine if stopwords have been removed
 861
 862 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 863 # DIAGNOSTIC ONLY!!
 864     if ( $query =~ /^ccl=/ ) {
 865         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
 866     }
 867     if ( $query =~ /^cql=/ ) {
 868         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
 869     }
 870     if ( $query =~ /^pqf=/ ) {
 871         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
 872     }
 873
 874     # pass nested queries directly
 875     # FIXME: need better handling of some of these variables in this case
 876     if ( $query =~ /(\(|\))/ ) {
 877         return (
 878             undef,              $query, $simple_query, $query_cgi,
 879             $query,             $limit, $limit_cgi,    $limit_desc,
 880             $stopwords_removed, 'ccl'
 881         );
 882     }
 883
 884 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 885 # query operands and indexes and add stemming, truncation, field weighting, etc.
 886 # Once we do so, we'll end up with a value in $query, just like if we had an
 887 # incoming $query from the user
 888     else {
 889         $query = ""
 890           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 891         my $previous_operand
 892           ;    # a flag used to keep track if there was a previous query
 893                # if there was, we can apply the current operator
 894                # for every operand
 895         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 896
 897             # COMBINE OPERANDS, INDEXES AND OPERATORS
 898             if ( $operands[$i] ) {
 899
 900               # A flag to determine whether or not to add the index to the query
 901                 my $indexes_set;
 902
 903 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 904                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 905                     $weight_fields    = 0;
 906                     $stemming         = 0;
 907                     $remove_stopwords = 0;
 908                 }
 909                 my $operand = $operands[$i];
 910                 my $index   = $indexes[$i];
 911
 912                 # Add index-specific attributes
 913                 # Date of Publication
 914                 if ( $index eq 'yr' ) {
 915                     $index .= ",st-numeric";
 916 #                     $indexes_set++;
 917                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 918                 }
 919
 920                 # Date of Acquisition
 921                 elsif ( $index eq 'acqdate' ) {
 922                     $index .= ",st-date-normalized";
 923 #                     $indexes_set++;
 924                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 925                 }
 926                 # ISBN,ISSN,Standard Number, don't need special treatment
 927                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 928 #                     $indexes_set++;
 929                     (
 930                         $stemming,      $auto_truncation,
 931                         $weight_fields, $fuzzy_enabled,
 932                         $remove_stopwords
 933                     ) = ( 0, 0, 0, 0, 0 );
 934
 935                 }
 936                 # Set default structure attribute (word list)
 937                 my $struct_attr;
 938                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 939                     $struct_attr = ",wrdl";
 940                 }
 941
 942                 # Some helpful index variants
 943                 my $index_plus       = $index . $struct_attr . ":" if $index;
 944                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 945                 if ($auto_truncation){
 946 #                                       FIXME Auto Truncation is only valid for LTR languages
 947 #                                       use C4::Output;
 948 #                                       use C4::Languages qw(regex_lang_subtags get_bidi);
 949 #                               $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
 950 #                                   my $current_lang = regex_lang_subtags($lang);
 951 #                                   my $bidi;
 952 #                                   $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
 953                                         $index_plus_comma .= "rtrn:";
 954                                 }
 955
 956                 # Remove Stopwords
 957                 if ($remove_stopwords) {
 958                     ( $operand, $stopwords_removed ) =
 959                       _remove_stopwords( $operand, $index );
 960                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 961                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 962                       if ( $stopwords_removed && $DEBUG );
 963                 }
 964
 965                 # Detect Truncation
 966                 my $truncated_operand;
 967                 my( $nontruncated, $righttruncated, $lefttruncated,
 968                     $rightlefttruncated, $regexpr
 969                 ) = _detect_truncation( $operand, $index );
 970                 warn
 971 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 972                   if $DEBUG;
 973
 974                 # Apply Truncation
 975                 if (
 976                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 977                     scalar(@$rightlefttruncated) > 0 )
 978                 {
 979
 980                # Don't field weight or add the index to the query, we do it here
 981                     $indexes_set = 1;
 982                     undef $weight_fields;
 983                     my $previous_truncation_operand;
 984                     if ( scalar(@$nontruncated) > 0 ) {
 985                         $truncated_operand .= "$index_plus @$nontruncated ";
 986                         $previous_truncation_operand = 1;
 987                     }
 988                     if ( scalar(@$righttruncated) > 0 ) {
 989                         $truncated_operand .= "and "
 990                           if $previous_truncation_operand;
 991                         $truncated_operand .=
 992                           "$index_plus_comma" . "rtrn:@$righttruncated ";
 993                         $previous_truncation_operand = 1;
 994                     }
 995                     if ( scalar(@$lefttruncated) > 0 ) {
 996                         $truncated_operand .= "and "
 997                           if $previous_truncation_operand;
 998                         $truncated_operand .=
 999                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1000                         $previous_truncation_operand = 1;
1001                     }
1002                     if ( scalar(@$rightlefttruncated) > 0 ) {
1003                         $truncated_operand .= "and "
1004                           if $previous_truncation_operand;
1005                         $truncated_operand .=
1006                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1007                         $previous_truncation_operand = 1;
1008                     }
1009                 }
1010                 $operand = $truncated_operand if $truncated_operand;
1011                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1012
1013                 # Handle Stemming
1014                 my $stemmed_operand;
1015                 $stemmed_operand = _build_stemmed_operand($operand)
1016                   if $stemming;
1017                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1018
1019                 # Handle Field Weighting
1020                 my $weighted_operand;
1021                 $weighted_operand =
1022                   _build_weighted_query( $operand, $stemmed_operand, $index )
1023                   if $weight_fields;
1024                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1025                 $operand = $weighted_operand if $weight_fields;
1026                 $indexes_set = 1 if $weight_fields;
1027
1028                 # If there's a previous operand, we need to add an operator
1029                 if ($previous_operand) {
1030
1031                     # User-specified operator
1032                     if ( $operators[ $i - 1 ] ) {
1033                         $query     .= " $operators[$i-1] ";
1034                         $query     .= " $index_plus " unless $indexes_set;
1035                         $query     .= " $operand";
1036                         $query_cgi .= "&op=$operators[$i-1]";
1037                         $query_cgi .= "&idx=$index" if $index;
1038                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1039                         $query_desc .=
1040                           " $operators[$i-1] $index_plus $operands[$i]";
1041                     }
1042
1043                     # Default operator is and
1044                     else {
1045                         $query      .= " and ";
1046                         $query      .= "$index_plus " unless $indexes_set;
1047                         $query      .= "$operand";
1048                         $query_cgi  .= "&op=and&idx=$index" if $index;
1049                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1050                         $query_desc .= " and $index_plus $operands[$i]";
1051                     }
1052                 }
1053
1054                 # There isn't a pervious operand, don't need an operator
1055                 else {
1056
1057                     # Field-weighted queries already have indexes set
1058                     $query .= " $index_plus " unless $indexes_set;
1059                     $query .= $operand;
1060                     $query_desc .= " $index_plus $operands[$i]";
1061                     $query_cgi  .= "&idx=$index" if $index;
1062                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1063                     $previous_operand = 1;
1064                 }
1065             }    #/if $operands
1066         }    # /for
1067     }
1068     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1069
1070     # add limits
1071     my $group_OR_limits;
1072     my $availability_limit;
1073     foreach my $this_limit (@limits) {
1074         if ( $this_limit =~ /available/ ) {
1075
1076 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1077 # In English:
1078 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1079             $availability_limit .=
1080 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1081             $limit_cgi  .= "&limit=available";
1082             $limit_desc .= "";
1083         }
1084
1085         # group_OR_limits, prefixed by mc-
1086         # OR every member of the group
1087         elsif ( $this_limit =~ /mc/ ) {
1088             $group_OR_limits .= " or " if $group_OR_limits;
1089             $limit_desc      .= " or " if $group_OR_limits;
1090             $group_OR_limits .= "$this_limit";
1091             $limit_cgi       .= "&limit=$this_limit";
1092             $limit_desc      .= " $this_limit";
1093         }
1094
1095         # Regular old limits
1096         else {
1097             $limit .= " and " if $limit || $query;
1098             $limit      .= "$this_limit";
1099             $limit_cgi  .= "&limit=$this_limit";
1100             if ($this_limit =~ /^branch:(.+)/) {
1101                 my $branchcode = $1;
1102                 my $branchname = GetBranchName($branchcode);
1103                 if (defined $branchname) {
1104                     $limit_desc .= " branch:$branchname";
1105                 } else {
1106                     $limit_desc .= " $this_limit";
1107                 }
1108             } else {
1109                 $limit_desc .= " $this_limit";
1110             }
1111         }
1112     }
1113     if ($group_OR_limits) {
1114         $limit .= " and " if ( $query || $limit );
1115         $limit .= "($group_OR_limits)";
1116     }
1117     if ($availability_limit) {
1118         $limit .= " and " if ( $query || $limit );
1119         $limit .= "($availability_limit)";
1120     }
1121
1122     # Normalize the query and limit strings
1123     $query =~ s/:/=/g;
1124     $limit =~ s/:/=/g;
1125     for ( $query, $query_desc, $limit, $limit_desc ) {
1126         $_ =~ s/  / /g;    # remove extra spaces
1127         $_ =~ s/^ //g;     # remove any beginning spaces
1128         $_ =~ s/ $//g;     # remove any ending spaces
1129         $_ =~ s/==/=/g;    # remove double == from query
1130     }
1131     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1132
1133     for ($query_cgi,$simple_query) {
1134         $_ =~ s/"//g;
1135     }
1136     # append the limit to the query
1137     $query .= " " . $limit;
1138
1139     # Warnings if DEBUG
1140     if ($DEBUG) {
1141         warn "QUERY:" . $query;
1142         warn "QUERY CGI:" . $query_cgi;
1143         warn "QUERY DESC:" . $query_desc;
1144         warn "LIMIT:" . $limit;
1145         warn "LIMIT CGI:" . $limit_cgi;
1146         warn "LIMIT DESC:" . $limit_desc;
1147         warn "---------\nLeave buildQuery\n---------";
1148     }
1149     return (
1150         undef,              $query, $simple_query, $query_cgi,
1151         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1152         $stopwords_removed, $query_type
1153     );
1154 }
1155
1156 =head2 searchResults
1157
1158 Format results in a form suitable for passing to the template
1159
1160 =cut
1161
1162 # IMO this subroutine is pretty messy still -- it's responsible for
1163 # building the HTML output for the template
1164 sub searchResults {
1165     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1166     my $dbh = C4::Context->dbh;
1167     my $even = 1;
1168     my @newresults;
1169
1170     # add search-term highlighting via <span>s on the search terms
1171     my $span_terms_hashref;
1172     for my $span_term ( split( / /, $searchdesc ) ) {
1173         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1174         $span_terms_hashref->{$span_term}++;
1175     }
1176
1177     #Build branchnames hash
1178     #find branchname
1179     #get branch information.....
1180     my %branches;
1181     my $bsth =
1182       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1183       ;    # FIXME : use C4::Koha::GetBranches
1184     $bsth->execute();
1185     while ( my $bdata = $bsth->fetchrow_hashref ) {
1186         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1187     }
1188 # FIXME - We build an authorised values hash here, using the default framework
1189 # though it is possible to have different authvals for different fws.
1190
1191     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1192
1193     # get notforloan authorised value list (see $shelflocations  FIXME)
1194     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1195
1196     #Build itemtype hash
1197     #find itemtype & itemtype image
1198     my %itemtypes;
1199     $bsth =
1200       $dbh->prepare(
1201         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1202       );
1203     $bsth->execute();
1204     while ( my $bdata = $bsth->fetchrow_hashref ) {
1205                 foreach (qw(description imageurl summary notforloan)) {
1206                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1207                 }
1208     }
1209
1210     #search item field code
1211     my $sth =
1212       $dbh->prepare(
1213 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1214       );
1215     $sth->execute;
1216     my ($itemtag) = $sth->fetchrow;
1217
1218     ## find column names of items related to MARC
1219     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1220     $sth2->execute;
1221     my %subfieldstosearch;
1222     while ( ( my $column ) = $sth2->fetchrow ) {
1223         my ( $tagfield, $tagsubfield ) =
1224           &GetMarcFromKohaField( "items." . $column, "" );
1225         $subfieldstosearch{$column} = $tagsubfield;
1226     }
1227
1228     # handle which records to actually retrieve
1229     my $times;
1230     if ( $hits && $offset + $results_per_page <= $hits ) {
1231         $times = $offset + $results_per_page;
1232     }
1233     else {
1234         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1235     }
1236
1237     # loop through all of the records we've retrieved
1238     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1239         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1240         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1241         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1242         $oldbiblio->{result_number} = $i + 1;
1243
1244         # add imageurl to itemtype if there is one
1245         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1246
1247         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1248         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1249         $aisbn =~ s/-//g;
1250         $oldbiblio->{amazonisbn} = $aisbn;
1251         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1252  # Build summary if there is one (the summary is defined in the itemtypes table)
1253  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1254         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1255             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1256             my @fields  = $marcrecord->fields();
1257             foreach my $field (@fields) {
1258                 my $tag      = $field->tag();
1259                 my $tagvalue = $field->as_string();
1260                 if (! utf8::is_utf8($tagvalue)) {
1261                     utf8::decode($tagvalue);
1262                 }
1263
1264                 $summary =~
1265                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1266                 unless ( $tag < 10 ) {
1267                     my @subf = $field->subfields;
1268                     for my $i ( 0 .. $#subf ) {
1269                         my $subfieldcode  = $subf[$i][0];
1270                         my $subfieldvalue = $subf[$i][1];
1271                         if (! utf8::is_utf8($subfieldvalue)) {
1272                             utf8::decode($subfieldvalue);
1273                         }
1274                         my $tagsubf       = $tag . $subfieldcode;
1275                         $summary =~
1276 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1277                     }
1278                 }
1279             }
1280             # FIXME: yuk
1281             $summary =~ s/\[(.*?)]//g;
1282             $summary =~ s/\n/<br\/>/g;
1283             $oldbiblio->{summary} = $summary;
1284         }
1285
1286         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1287         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1288         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1289         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1290         # Add search-term highlighting to the whole record where they match using <span>s
1291         if (C4::Context->preference("OpacHighlightedWords")){
1292             my $searchhighlightblob;
1293             for my $highlight_field ( $marcrecord->fields ) {
1294
1295     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1296                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1297                 for my $subfield ($highlight_field->subfields()) {
1298                     my $match;
1299                     next if $subfield->[0] eq '9';
1300                     my $field = $subfield->[1];
1301                     for my $term ( keys %$span_terms_hashref ) {
1302                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1303                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1304                         $match++;
1305                         }
1306                     }
1307                     $searchhighlightblob .= $field . " ... " if $match;
1308                 }
1309
1310             }
1311             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1312             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1313         }
1314
1315         # Add search-term highlighting to the title, subtitle, etc. fields
1316         for my $term ( keys %$span_terms_hashref ) {
1317             my $old_term = $term;
1318             if ( length($term) > 3 ) {
1319                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1320                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1321                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1322                                 }
1323             }
1324         }
1325
1326         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1327
1328         # Pull out the items fields
1329         my @fields = $marcrecord->field($itemtag);
1330
1331         # Setting item statuses for display
1332         my @available_items_loop;
1333         my @onloan_items_loop;
1334         my @notforloan_items_loop;
1335         my @other_items_loop;
1336
1337         my $available_items;
1338         my $onloan_items;
1339         my $notforloan_items;
1340         my $other_items;
1341
1342         my $ordered_count         = 0;
1343         my $available_count       = 0;
1344         my $onloan_count          = 0;
1345         my $notforloan_count      = 0;
1346         my $longoverdue_count     = 0;
1347         my $other_count           = 0;
1348         my $wthdrawn_count        = 0;
1349         my $itemlost_count        = 0;
1350         my $itembinding_count     = 0;
1351         my $itemdamaged_count     = 0;
1352         my $item_in_transit_count = 0;
1353         my $can_place_holds       = 0;
1354         my $items_count           = scalar(@fields);
1355         my $maxitems =
1356           ( C4::Context->preference('maxItemsinSearchResults') )
1357           ? C4::Context->preference('maxItemsinSearchResults') - 1
1358           : 1;
1359
1360         # loop through every item
1361         foreach my $field (@fields) {
1362             my $item;
1363
1364             # populate the items hash
1365             foreach my $code ( keys %subfieldstosearch ) {
1366                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1367             }
1368                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1369                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1370             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1371             if ($item->{$hbranch}) {
1372                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1373             }
1374             elsif ($item->{$otherbranch}) {     # Last resort
1375                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1376             }
1377
1378                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1379 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1380             if ( $item->{onloan} ) {
1381                 $onloan_count++;
1382                                 my $key = $prefix . $item->{due_date};
1383                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1384                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1385                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1386                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1387                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1388                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1389                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1390                 # if something's checked out and lost, mark it as 'long overdue'
1391                 if ( $item->{itemlost} ) {
1392                     $onloan_items->{$prefix}->{longoverdue}++;
1393                     $longoverdue_count++;
1394                 } else {        # can place holds as long as item isn't lost
1395                     $can_place_holds = 1;
1396                 }
1397             }
1398
1399          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1400             else {
1401
1402                 # item is on order
1403                 if ( $item->{notforloan} == -1 ) {
1404                     $ordered_count++;
1405                 }
1406
1407                 # is item in transit?
1408                 my $transfertwhen = '';
1409                 my ($transfertfrom, $transfertto);
1410
1411                 unless ($item->{wthdrawn}
1412                         || $item->{itemlost}
1413                         || $item->{damaged}
1414                         || $item->{notforloan}
1415                         || $items_count > 20) {
1416
1417                     # A couple heuristics to limit how many times
1418                     # we query the database for item transfer information, sacrificing
1419                     # accuracy in some cases for speed;
1420                     #
1421                     # 1. don't query if item has one of the other statuses
1422                     # 2. don't check transit status if the bib has
1423                     #    more than 20 items
1424                     #
1425                     # FIXME: to avoid having the query the database like this, and to make
1426                     #        the in transit status count as unavailable for search limiting,
1427                     #        should map transit status to record indexed in Zebra.
1428                     #
1429                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1430                 }
1431
1432                 # item is withdrawn, lost or damaged
1433                 if (   $item->{wthdrawn}
1434                     || $item->{itemlost}
1435                     || $item->{damaged}
1436                     || $item->{notforloan}
1437                     || ($transfertwhen ne ''))
1438                 {
1439                     $wthdrawn_count++        if $item->{wthdrawn};
1440                     $itemlost_count++        if $item->{itemlost};
1441                     $itemdamaged_count++     if $item->{damaged};
1442                     $item_in_transit_count++ if $transfertwhen ne '';
1443                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1444
1445                                         my $key = $prefix . $item->{status};
1446
1447                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1448                                             if($item->{notforloan} == 1){
1449                                                 $notforloan_items->{$key}->{$_} = $item->{$_};
1450                                             }else{
1451                            $other_items->{$key}->{$_} = $item->{$_};
1452                                             }
1453                                         }
1454                                         if($item->{notforloan} == 1){
1455                         $notforloan_count++;
1456
1457                         $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1458                                         $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1459                                         $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1460                                         $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1461                                         $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1462                                         $notforloan_items->{$key}->{barcode} = $item->{barcode};
1463                     }else{
1464                         $other_count++;
1465
1466                         $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1467                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1468                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1469                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1470                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1471                                         $other_items->{$key}->{barcode} = $item->{barcode};
1472                     }
1473
1474                 }
1475                 # item is available
1476                 else {
1477                     $can_place_holds = 1;
1478                     $available_count++;
1479                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1480                                         foreach (qw(branchname itemcallnumber barcode)) {
1481                         $available_items->{$prefix}->{$_} = $item->{$_};
1482                                         }
1483                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1484                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1485                 }
1486             }
1487         }    # notforloan, item level and biblioitem level
1488         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1489         $maxitems =
1490           ( C4::Context->preference('maxItemsinSearchResults') )
1491           ? C4::Context->preference('maxItemsinSearchResults') - 1
1492           : 1;
1493         for my $key ( sort keys %$onloan_items ) {
1494             (++$onloanitemscount > $maxitems) and last;
1495             push @onloan_items_loop, $onloan_items->{$key};
1496         }
1497         for my $key ( sort keys %$other_items ) {
1498             (++$otheritemscount > $maxitems) and last;
1499             push @other_items_loop, $other_items->{$key};
1500         }
1501         for my $key ( sort keys %$notforloan_items ) {
1502             (++$notforloanitemscount > $maxitems) and last;
1503             push @notforloan_items_loop, $notforloan_items->{$key};
1504         }
1505         for my $key ( sort keys %$available_items ) {
1506             (++$availableitemscount > $maxitems) and last;
1507             push @available_items_loop, $available_items->{$key}
1508         }
1509
1510         # XSLT processing of some stuff
1511         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1512             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1513                 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1514         }
1515
1516         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1517         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1518         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1519         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1520         $oldbiblio->{items_count}          = $items_count;
1521         $oldbiblio->{available_items_loop} = \@available_items_loop;
1522         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1523         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1524         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1525         $oldbiblio->{availablecount}       = $available_count;
1526         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1527         $oldbiblio->{onloancount}          = $onloan_count;
1528         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1529         $oldbiblio->{notforloancount}      = $notforloan_count;
1530         $oldbiblio->{othercount}           = $other_count;
1531         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1532         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1533         $oldbiblio->{itemlostcount}        = $itemlost_count;
1534         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1535         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1536         $oldbiblio->{orderedcount}         = $ordered_count;
1537         $oldbiblio->{isbn} =~
1538           s/-//g;    # deleting - in isbn to enable amazon content
1539         push( @newresults, $oldbiblio );
1540     }
1541     return @newresults;
1542 }
1543
1544 #----------------------------------------------------------------------
1545 #
1546 # Non-Zebra GetRecords#
1547 #----------------------------------------------------------------------
1548
1549 =head2 NZgetRecords
1550
1551   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1552
1553 =cut
1554
1555 sub NZgetRecords {
1556     my (
1557         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1558         $results_per_page, $offset,       $expanded_facet, $branches,
1559         $query_type,       $scan
1560     ) = @_;
1561     warn "query =$query" if $DEBUG;
1562     my $result = NZanalyse($query);
1563     warn "results =$result" if $DEBUG;
1564     return ( undef,
1565         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1566         undef );
1567 }
1568
1569 =head2 NZanalyse
1570
1571   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1572   the list is built from an inverted index in the nozebra SQL table
1573   note that title is here only for convenience : the sorting will be very fast when requested on title
1574   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1575
1576 =cut
1577
1578 sub NZanalyse {
1579     my ( $string, $server ) = @_;
1580 #     warn "---------"       if $DEBUG;
1581     warn " NZanalyse" if $DEBUG;
1582 #     warn "---------"       if $DEBUG;
1583
1584  # $server contains biblioserver or authorities, depending on what we search on.
1585  #warn "querying : $string on $server";
1586     $server = 'biblioserver' unless $server;
1587
1588 # if we have a ", replace the content to discard temporarily any and/or/not inside
1589     my $commacontent;
1590     if ( $string =~ /"/ ) {
1591         $string =~ s/"(.*?)"/__X__/;
1592         $commacontent = $1;
1593         warn "commacontent : $commacontent" if $DEBUG;
1594     }
1595
1596 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1597 # then, call again NZanalyse with $left and $right
1598 # (recursive until we find a leaf (=> something without and/or/not)
1599 # delete repeated operator... Would then go in infinite loop
1600     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1601     }
1602
1603     #process parenthesis before.
1604     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1605         my $left     = $1;
1606         my $right    = $4;
1607         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1608         warn
1609 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1610           if $DEBUG;
1611         my $leftresult = NZanalyse( $left, $server );
1612         if ($operator) {
1613             my $rightresult = NZanalyse( $right, $server );
1614
1615             # OK, we have the results for right and left part of the query
1616             # depending of operand, intersect, union or exclude both lists
1617             # to get a result list
1618             if ( $operator eq ' and ' ) {
1619                 return NZoperatorAND($leftresult,$rightresult);
1620             }
1621             elsif ( $operator eq ' or ' ) {
1622
1623                 # just merge the 2 strings
1624                 return $leftresult . $rightresult;
1625             }
1626             elsif ( $operator eq ' not ' ) {
1627                 return NZoperatorNOT($leftresult,$rightresult);
1628             }
1629         }
1630         else {
1631 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1632             return $leftresult;
1633         }
1634     }
1635     warn "string :" . $string if $DEBUG;
1636     my $left = "";
1637     my $right = "";
1638     my $operator = "";
1639     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1640         $left     = $1;
1641         $right    = $3;
1642         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1643     }
1644     warn "no parenthesis. left : $left operator: $operator right: $right"
1645       if $DEBUG;
1646
1647     # it's not a leaf, we have a and/or/not
1648     if ($operator) {
1649
1650         # reintroduce comma content if needed
1651         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1652         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1653         warn "node : $left / $operator / $right\n" if $DEBUG;
1654         my $leftresult  = NZanalyse( $left,  $server );
1655         my $rightresult = NZanalyse( $right, $server );
1656         warn " leftresult : $leftresult" if $DEBUG;
1657         warn " rightresult : $rightresult" if $DEBUG;
1658         # OK, we have the results for right and left part of the query
1659         # depending of operand, intersect, union or exclude both lists
1660         # to get a result list
1661         if ( $operator eq ' and ' ) {
1662             warn "NZAND";
1663             return NZoperatorAND($leftresult,$rightresult);
1664         }
1665         elsif ( $operator eq ' or ' ) {
1666
1667             # just merge the 2 strings
1668             return $leftresult . $rightresult;
1669         }
1670         elsif ( $operator eq ' not ' ) {
1671             return NZoperatorNOT($leftresult,$rightresult);
1672         }
1673         else {
1674
1675 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1676             die "error : operand unknown : $operator for $string";
1677         }
1678
1679         # it's a leaf, do the real SQL query and return the result
1680     }
1681     else {
1682         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1683         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1684         #remove trailing blank at the beginning
1685         $string =~ s/^ //g;
1686         warn "leaf:$string" if $DEBUG;
1687
1688         # parse the string in in operator/operand/value again
1689         my $left = "";
1690         my $operator = "";
1691         my $right = "";
1692         if ($string =~ /(.*)(>=|<=)(.*)/) {
1693             $left     = $1;
1694             $operator = $2;
1695             $right    = $3;
1696         } else {
1697             $left = $string;
1698         }
1699 #         warn "handling leaf... left:$left operator:$operator right:$right"
1700 #           if $DEBUG;
1701         unless ($operator) {
1702             if ($string =~ /(.*)(>|<|=)(.*)/) {
1703                 $left     = $1;
1704                 $operator = $2;
1705                 $right    = $3;
1706                 warn
1707     "handling unless (operator)... left:$left operator:$operator right:$right"
1708                 if $DEBUG;
1709             } else {
1710                 $left = $string;
1711             }
1712         }
1713         my $results;
1714
1715 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1716         $left =~ s/ .*$//;
1717
1718         # automatic replace for short operators
1719         $left = 'title'            if $left =~ '^ti$';
1720         $left = 'author'           if $left =~ '^au$';
1721         $left = 'publisher'        if $left =~ '^pb$';
1722         $left = 'subject'          if $left =~ '^su$';
1723         $left = 'koha-Auth-Number' if $left =~ '^an$';
1724         $left = 'keyword'          if $left =~ '^kw$';
1725         $left = 'itemtype'         if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1726         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1727         if ( $operator && $left ne 'keyword' ) {
1728
1729             #do a specific search
1730             my $dbh = C4::Context->dbh;
1731             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1732             my $sth =
1733               $dbh->prepare(
1734 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1735               );
1736             warn "$left / $operator / $right\n" if $DEBUG;
1737
1738             # split each word, query the DB and build the biblionumbers result
1739             #sanitizing leftpart
1740             $left =~ s/^\s+|\s+$//;
1741             foreach ( split / /, $right ) {
1742                 my $biblionumbers;
1743                 $_ =~ s/^\s+|\s+$//;
1744                 next unless $_;
1745                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1746                 $sth->execute( $server, $left, $_ )
1747                   or warn "execute failed: $!";
1748                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1749
1750 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1751 # otherwise, fill the result
1752                     $biblionumbers .= $line
1753                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1754                     warn "result : $value "
1755                       . ( $right  =~ /\d/ ) . "=="
1756                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1757                 }
1758
1759 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1760                 if ($results) {
1761                     warn "NZAND" if $DEBUG;
1762                     $results = NZoperatorAND($biblionumbers,$results);
1763                 }
1764                 else {
1765                     $results = $biblionumbers;
1766                 }
1767             }
1768         }
1769         else {
1770
1771       #do a complete search (all indexes), if index='kw' do complete search too.
1772             my $dbh = C4::Context->dbh;
1773             my $sth =
1774               $dbh->prepare(
1775 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1776               );
1777
1778             # split each word, query the DB and build the biblionumbers result
1779             foreach ( split / /, $string ) {
1780                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1781                 warn "search on all indexes on $_" if $DEBUG;
1782                 my $biblionumbers;
1783                 next unless $_;
1784                 $sth->execute( $server, $_ );
1785                 while ( my $line = $sth->fetchrow ) {
1786                     $biblionumbers .= $line;
1787                 }
1788
1789 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1790                 if ($results) {
1791                     $results = NZoperatorAND($biblionumbers,$results);
1792                 }
1793                 else {
1794                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1795                     $results = $biblionumbers;
1796                 }
1797             }
1798         }
1799         warn "return : $results for LEAF : $string" if $DEBUG;
1800         return $results;
1801     }
1802     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1803 }
1804
1805 sub NZoperatorAND{
1806     my ($rightresult, $leftresult)=@_;
1807
1808     my @leftresult = split /;/, $leftresult;
1809     warn " @leftresult / $rightresult \n" if $DEBUG;
1810
1811     #             my @rightresult = split /;/,$leftresult;
1812     my $finalresult;
1813
1814 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1815 # the result is stored twice, to have the same weight for AND than OR.
1816 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1817 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1818     foreach (@leftresult) {
1819         my $value = $_;
1820         my $countvalue;
1821         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1822         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1823             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1824             $finalresult .=
1825                 "$value-$countvalue;$value-$countvalue;";
1826         }
1827     }
1828     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1829     return $finalresult;
1830 }
1831
1832 sub NZoperatorOR{
1833     my ($rightresult, $leftresult)=@_;
1834     return $rightresult.$leftresult;
1835 }
1836
1837 sub NZoperatorNOT{
1838     my ($leftresult, $rightresult)=@_;
1839
1840     my @leftresult = split /;/, $leftresult;
1841
1842     #             my @rightresult = split /;/,$leftresult;
1843     my $finalresult;
1844     foreach (@leftresult) {
1845         my $value=$_;
1846         $value=$1 if $value=~m/(.*)-\d+$/;
1847         unless ($rightresult =~ "$value-") {
1848             $finalresult .= "$_;";
1849         }
1850     }
1851     return $finalresult;
1852 }
1853
1854 =head2 NZorder
1855
1856   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1857
1858   TODO :: Description
1859
1860 =cut
1861
1862 sub NZorder {
1863     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1864     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1865
1866     # order title asc by default
1867     #     $ordering = '1=36 <i' unless $ordering;
1868     $results_per_page = 20 unless $results_per_page;
1869     $offset           = 0  unless $offset;
1870     my $dbh = C4::Context->dbh;
1871
1872     #
1873     # order by POPULARITY
1874     #
1875     if ( $ordering =~ /popularity/ ) {
1876         my %result;
1877         my %popularity;
1878
1879         # popularity is not in MARC record, it's builded from a specific query
1880         my $sth =
1881           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1882         foreach ( split /;/, $biblionumbers ) {
1883             my ( $biblionumber, $title ) = split /,/, $_;
1884             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1885             $sth->execute($biblionumber);
1886             my $popularity = $sth->fetchrow || 0;
1887
1888 # hint : the key is popularity.title because we can have
1889 # many results with the same popularity. In this cas, sub-ordering is done by title
1890 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1891 # (un-frequent, I agree, but we won't forget anything that way ;-)
1892             $popularity{ sprintf( "%10d", $popularity ) . $title
1893                   . $biblionumber } = $biblionumber;
1894         }
1895
1896     # sort the hash and return the same structure as GetRecords (Zebra querying)
1897         my $result_hash;
1898         my $numbers = 0;
1899         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1900             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1901                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1902                   $result{ $popularity{$key} }->as_usmarc();
1903             }
1904         }
1905         else {                                    # sort popularity ASC
1906             foreach my $key ( sort ( keys %popularity ) ) {
1907                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1908                   $result{ $popularity{$key} }->as_usmarc();
1909             }
1910         }
1911         my $finalresult = ();
1912         $result_hash->{'hits'}         = $numbers;
1913         $finalresult->{'biblioserver'} = $result_hash;
1914         return $finalresult;
1915
1916         #
1917         # ORDER BY author
1918         #
1919     }
1920     elsif ( $ordering =~ /author/ ) {
1921         my %result;
1922         foreach ( split /;/, $biblionumbers ) {
1923             my ( $biblionumber, $title ) = split /,/, $_;
1924             my $record = GetMarcBiblio($biblionumber);
1925             my $author;
1926             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1927                 $author = $record->subfield( '200', 'f' );
1928                 $author = $record->subfield( '700', 'a' ) unless $author;
1929             }
1930             else {
1931                 $author = $record->subfield( '100', 'a' );
1932             }
1933
1934 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1935 # and we don't want to get only 1 result for each of them !!!
1936             $result{ $author . $biblionumber } = $record;
1937         }
1938
1939     # sort the hash and return the same structure as GetRecords (Zebra querying)
1940         my $result_hash;
1941         my $numbers = 0;
1942         if ( $ordering eq 'author_za' ) {    # sort by author desc
1943             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1944                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1945                   $result{$key}->as_usmarc();
1946             }
1947         }
1948         else {                               # sort by author ASC
1949             foreach my $key ( sort ( keys %result ) ) {
1950                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1951                   $result{$key}->as_usmarc();
1952             }
1953         }
1954         my $finalresult = ();
1955         $result_hash->{'hits'}         = $numbers;
1956         $finalresult->{'biblioserver'} = $result_hash;
1957         return $finalresult;
1958
1959         #
1960         # ORDER BY callnumber
1961         #
1962     }
1963     elsif ( $ordering =~ /callnumber/ ) {
1964         my %result;
1965         foreach ( split /;/, $biblionumbers ) {
1966             my ( $biblionumber, $title ) = split /,/, $_;
1967             my $record = GetMarcBiblio($biblionumber);
1968             my $callnumber;
1969             my ( $callnumber_tag, $callnumber_subfield ) =
1970               GetMarcFromKohaField( 'items.itemcallnumber','' );
1971             ( $callnumber_tag, $callnumber_subfield ) =
1972               GetMarcFromKohaField('biblioitems.callnumber','')
1973               unless $callnumber_tag;
1974             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1975                 $callnumber = $record->subfield( '200', 'f' );
1976             }
1977             else {
1978                 $callnumber = $record->subfield( '100', 'a' );
1979             }
1980
1981 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1982 # and we don't want to get only 1 result for each of them !!!
1983             $result{ $callnumber . $biblionumber } = $record;
1984         }
1985
1986     # sort the hash and return the same structure as GetRecords (Zebra querying)
1987         my $result_hash;
1988         my $numbers = 0;
1989         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1990             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1991                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1992                   $result{$key}->as_usmarc();
1993             }
1994         }
1995         else {                                     # sort by title ASC
1996             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1997                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1998                   $result{$key}->as_usmarc();
1999             }
2000         }
2001         my $finalresult = ();
2002         $result_hash->{'hits'}         = $numbers;
2003         $finalresult->{'biblioserver'} = $result_hash;
2004         return $finalresult;
2005     }
2006     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2007         my %result;
2008         foreach ( split /;/, $biblionumbers ) {
2009             my ( $biblionumber, $title ) = split /,/, $_;
2010             my $record = GetMarcBiblio($biblionumber);
2011             my ( $publicationyear_tag, $publicationyear_subfield ) =
2012               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2013             my $publicationyear =
2014               $record->subfield( $publicationyear_tag,
2015                 $publicationyear_subfield );
2016
2017 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2018 # and we don't want to get only 1 result for each of them !!!
2019             $result{ $publicationyear . $biblionumber } = $record;
2020         }
2021
2022     # sort the hash and return the same structure as GetRecords (Zebra querying)
2023         my $result_hash;
2024         my $numbers = 0;
2025         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2026             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2027                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2028                   $result{$key}->as_usmarc();
2029             }
2030         }
2031         else {                                 # sort by pub year ASC
2032             foreach my $key ( sort ( keys %result ) ) {
2033                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2034                   $result{$key}->as_usmarc();
2035             }
2036         }
2037         my $finalresult = ();
2038         $result_hash->{'hits'}         = $numbers;
2039         $finalresult->{'biblioserver'} = $result_hash;
2040         return $finalresult;
2041
2042         #
2043         # ORDER BY title
2044         #
2045     }
2046     elsif ( $ordering =~ /title/ ) {
2047
2048 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2049         my %result;
2050         foreach ( split /;/, $biblionumbers ) {
2051             my ( $biblionumber, $title ) = split /,/, $_;
2052
2053 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2054 # and we don't want to get only 1 result for each of them !!!
2055 # hint & speed improvement : we can order without reading the record
2056 # so order, and read records only for the requested page !
2057             $result{ $title . $biblionumber } = $biblionumber;
2058         }
2059
2060     # sort the hash and return the same structure as GetRecords (Zebra querying)
2061         my $result_hash;
2062         my $numbers = 0;
2063         if ( $ordering eq 'title_az' ) {    # sort by title desc
2064             foreach my $key ( sort ( keys %result ) ) {
2065                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2066             }
2067         }
2068         else {                              # sort by title ASC
2069             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2070                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2071             }
2072         }
2073
2074         # limit the $results_per_page to result size if it's more
2075         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2076
2077         # for the requested page, replace biblionumber by the complete record
2078         # speed improvement : avoid reading too much things
2079         for (
2080             my $counter = $offset ;
2081             $counter <= $offset + $results_per_page ;
2082             $counter++
2083           )
2084         {
2085             $result_hash->{'RECORDS'}[$counter] =
2086               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2087         }
2088         my $finalresult = ();
2089         $result_hash->{'hits'}         = $numbers;
2090         $finalresult->{'biblioserver'} = $result_hash;
2091         return $finalresult;
2092     }
2093     else {
2094
2095 #
2096 # order by ranking
2097 #
2098 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2099         my %result;
2100         my %count_ranking;
2101         foreach ( split /;/, $biblionumbers ) {
2102             my ( $biblionumber, $title ) = split /,/, $_;
2103             $title =~ /(.*)-(\d)/;
2104
2105             # get weight
2106             my $ranking = $2;
2107
2108 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2109 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2110 # biblio N has ranking = 6
2111             $count_ranking{$biblionumber} += $ranking;
2112         }
2113
2114 # build the result by "inverting" the count_ranking hash
2115 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2116 #         warn "counting";
2117         foreach ( keys %count_ranking ) {
2118             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2119         }
2120
2121     # sort the hash and return the same structure as GetRecords (Zebra querying)
2122         my $result_hash;
2123         my $numbers = 0;
2124         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2125             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2126         }
2127
2128         # limit the $results_per_page to result size if it's more
2129         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2130
2131         # for the requested page, replace biblionumber by the complete record
2132         # speed improvement : avoid reading too much things
2133         for (
2134             my $counter = $offset ;
2135             $counter <= $offset + $results_per_page ;
2136             $counter++
2137           )
2138         {
2139             $result_hash->{'RECORDS'}[$counter] =
2140               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2141               if $result_hash->{'RECORDS'}[$counter];
2142         }
2143         my $finalresult = ();
2144         $result_hash->{'hits'}         = $numbers;
2145         $finalresult->{'biblioserver'} = $result_hash;
2146         return $finalresult;
2147     }
2148 }
2149
2150 END { }    # module clean-up code here (global destructor)
2151
2152 1;
2153 __END__
2154
2155 =head1 AUTHOR
2156
2157 Koha Developement team <info@koha.org>
2158
2159 =cut