C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29
  30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  31
  32 # set the version for version checking
  33 BEGIN {
  34     $VERSION = 3.01;
  35     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  36 }
  37
  38 =head1 NAME
  39
  40 C4::Search - Functions for searching the Koha catalog.
  41
  42 =head1 SYNOPSIS
  43
  44 See opac/opac-search.pl or catalogue/search.pl for example of usage
  45
  46 =head1 DESCRIPTION
  47
  48 This module provides searching functions for Koha's bibliographic databases
  49
  50 =head1 FUNCTIONS
  51
  52 =cut
  53
  54 @ISA    = qw(Exporter);
  55 @EXPORT = qw(
  56   &findseealso
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 findseealso($dbh,$fields);
  68
  69 C<$dbh> is a link to the DB handler.
  70
  71 use C4::Context;
  72 my $dbh =C4::Context->dbh;
  73
  74 C<$fields> is a reference to the fields array
  75
  76 This function modifies the @$fields array and adds related fields to search on.
  77
  78 FIXME: this function is probably deprecated in Koha 3
  79
  80 =cut
  81
  82 sub findseealso {
  83     my ( $dbh, $fields ) = @_;
  84     my $tagslib = GetMarcStructure(1);
  85     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  86         my ($tag)      = substr( @$fields[$i], 1, 3 );
  87         my ($subfield) = substr( @$fields[$i], 4, 1 );
  88         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  89           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  90     }
  91 }
  92
  93 =head2 FindDuplicate
  94
  95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  96
  97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  98
  99 =cut
 100
 101 sub FindDuplicate {
 102     my ($record) = @_;
 103     my $dbh = C4::Context->dbh;
 104     my $result = TransformMarcToKoha( $dbh, $record, '' );
 105     my $sth;
 106     my $query;
 107     my $search;
 108     my $type;
 109     my ( $biblionumber, $title );
 110
 111     # search duplicate on ISBN, easy and fast..
 112     # ... normalize first
 113     if ( $result->{isbn} ) {
 114         $result->{isbn} =~ s/\(.*$//;
 115         $result->{isbn} =~ s/\s+$//;
 116         $query = "isbn=$result->{isbn}";
 117     }
 118     else {
 119         $result->{title} =~ s /\\//g;
 120         $result->{title} =~ s /\"//g;
 121         $result->{title} =~ s /\(//g;
 122         $result->{title} =~ s /\)//g;
 123
 124         # FIXME: instead of removing operators, could just do
 125         # quotes around the value
 126         $result->{title} =~ s/(and|or|not)//g;
 127         $query = "ti,ext=$result->{title}";
 128         $query .= " and itemtype=$result->{itemtype}"
 129           if ( $result->{itemtype} );
 130         if   ( $result->{author} ) {
 131             $result->{author} =~ s /\\//g;
 132             $result->{author} =~ s /\"//g;
 133             $result->{author} =~ s /\(//g;
 134             $result->{author} =~ s /\)//g;
 135
 136             # remove valid operators
 137             $result->{author} =~ s/(and|or|not)//g;
 138             $query .= " and au,ext=$result->{author}";
 139         }
 140     }
 141
 142     # FIXME: add error handling
 143     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 144     my @results;
 145     foreach my $possible_duplicate_record (@$searchresults) {
 146         my $marcrecord =
 147           MARC::Record->new_from_usmarc($possible_duplicate_record);
 148         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 149
 150         # FIXME :: why 2 $biblionumber ?
 151         if ($result) {
 152             push @results, $result->{'biblionumber'};
 153             push @results, $result->{'title'};
 154         }
 155     }
 156     return @results;
 157 }
 158
 159 =head2 SimpleSearch
 160
 161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 162
 163 This function provides a simple search API on the bibliographic catalog
 164
 165 =over 2
 166
 167 =item C<input arg:>
 168
 169     * $query can be a simple keyword or a complete CCL query
 170     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 171     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 172     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 173
 174
 175 =item C<Output:>
 176
 177     * $error is a empty unless an error is detected
 178     * \@results is an array of records.
 179     * $total_hits is the number of hits that would have been returned with no limit
 180
 181 =item C<usage in the script:>
 182
 183 =back
 184
 185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 186
 187 if (defined $error) {
 188     $template->param(query_error => $error);
 189     warn "error: ".$error;
 190     output_html_with_http_headers $input, $cookie, $template->output;
 191     exit;
 192 }
 193
 194 my $hits = scalar @$marcresults;
 195 my @results;
 196
 197 for my $i (0..$hits) {
 198     my %resultsloop;
 199     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 200     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 201
 202     #build the hash for the template.
 203     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 204     $resultsloop{title}           = $biblio->{'title'};
 205     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 206     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 207     $resultsloop{author}          = $biblio->{'author'};
 208     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 209     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 210
 211     push @results, \%resultsloop;
 212 }
 213
 214 $template->param(result=>\@results);
 215
 216 =cut
 217
 218 sub SimpleSearch {
 219     my ( $query, $offset, $max_results, $servers )  = @_;
 220
 221     if ( C4::Context->preference('NoZebra') ) {
 222         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 223         my $search_result =
 224           (      $result->{hits}
 225               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 226         return ( undef, $search_result, scalar($search_result) );
 227     }
 228     else {
 229         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 230         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 231         my @results;
 232         my @zoom_queries;
 233         my @tmpresults;
 234         my @zconns;
 235         my $total_hits;
 236         return ( "No query entered", undef, undef ) unless $query;
 237
 238         # Initialize & Search Zebra
 239         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 240             eval {
 241                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 242                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 243                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 244
 245                 # error handling
 246                 my $error =
 247                     $zconns[$i]->errmsg() . " ("
 248                   . $zconns[$i]->errcode() . ") "
 249                   . $zconns[$i]->addinfo() . " "
 250                   . $zconns[$i]->diagset();
 251
 252                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 253             };
 254             if ($@) {
 255
 256                 # caught a ZOOM::Exception
 257                 my $error =
 258                     $@->message() . " ("
 259                   . $@->code() . ") "
 260                   . $@->addinfo() . " "
 261                   . $@->diagset();
 262                 warn $error;
 263                 return ( $error, undef, undef );
 264             }
 265         }
 266         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 267             my $event = $zconns[ $i - 1 ]->last_event();
 268             if ( $event == ZOOM::Event::ZEND ) {
 269
 270                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 271                 my $hits = $tmpresults[ $i - 1 ]->size();
 272                 $total_hits += $hits;
 273                 my $last_record = $hits;
 274                 if ( defined $max_results && $offset + $max_results < $hits ) {
 275                     $last_record  = $offset + $max_results;
 276                 }
 277
 278                 for my $j ( $first_record..$last_record ) {
 279                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 280                     push @results, $record;
 281                 }
 282             }
 283         }
 284
 285         foreach my $result (@tmpresults) {
 286             $result->destroy();
 287         }
 288         foreach my $zoom_query (@zoom_queries) {
 289             $zoom_query->destroy();
 290         }
 291
 292         return ( undef, \@results, $total_hits );
 293     }
 294 }
 295
 296 =head2 getRecords
 297
 298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 299
 300         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 301         $results_per_page, $offset,       $expanded_facet, $branches,
 302         $query_type,       $scan
 303     );
 304
 305 The all singing, all dancing, multi-server, asynchronous, scanning,
 306 searching, record nabbing, facet-building
 307
 308 See verbse embedded documentation.
 309
 310 =cut
 311
 312 sub getRecords {
 313     my (
 314         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 315         $results_per_page, $offset,       $expanded_facet, $branches,
 316         $query_type,       $scan
 317     ) = @_;
 318
 319     my @servers = @$servers_ref;
 320     my @sort_by = @$sort_by_ref;
 321
 322     # Initialize variables for the ZOOM connection and results object
 323     my $zconn;
 324     my @zconns;
 325     my @results;
 326     my $results_hashref = ();
 327
 328     # Initialize variables for the faceted results objects
 329     my $facets_counter = ();
 330     my $facets_info    = ();
 331     my $facets         = getFacets();
 332
 333     my @facets_loop
 334       ;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429         }
 430         if ($sort_by) {
 431             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 432                 warn "WARNING sort $sort_by failed";
 433             }
 434         }
 435     }    # finished looping through servers
 436
 437     # The big moment: asynchronously retrieve results from all servers
 438     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 439         my $ev = $zconns[ $i - 1 ]->last_event();
 440         if ( $ev == ZOOM::Event::ZEND ) {
 441             next unless $results[ $i - 1 ];
 442             my $size = $results[ $i - 1 ]->size();
 443             if ( $size > 0 ) {
 444                 my $results_hash;
 445
 446                 # loop through the results
 447                 $results_hash->{'hits'} = $size;
 448                 my $times;
 449                 if ( $offset + $results_per_page <= $size ) {
 450                     $times = $offset + $results_per_page;
 451                 }
 452                 else {
 453                     $times = $size;
 454                 }
 455                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 456                     my $records_hash;
 457                     my $record;
 458                     my $facet_record;
 459
 460                     ## Check if it's an index scan
 461                     if ($scan) {
 462                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 463
 464                  # here we create a minimal MARC record and hand it off to the
 465                  # template just like a normal result ... perhaps not ideal, but
 466                  # it works for now
 467                         my $tmprecord = MARC::Record->new();
 468                         $tmprecord->encoding('UTF-8');
 469                         my $tmptitle;
 470                         my $tmpauthor;
 471
 472                 # the minimal record in author/title (depending on MARC flavour)
 473                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 474                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 475                             $tmprecord->append_fields($tmptitle);
 476                         } else {
 477                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 478                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 479                             $tmprecord->append_fields($tmptitle);
 480                             $tmprecord->append_fields($tmpauthor);
 481                         }
 482                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 483                     }
 484
 485                     # not an index scan
 486                     else {
 487                         $record = $results[ $i - 1 ]->record($j)->raw();
 488
 489                         # warn "RECORD $j:".$record;
 490                         $results_hash->{'RECORDS'}[$j] = $record;
 491
 492             # Fill the facets while we're looping, but only for the biblioserver
 493                         $facet_record = MARC::Record->new_from_usmarc($record)
 494                           if $servers[ $i - 1 ] =~ /biblioserver/;
 495
 496                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 497                         if ($facet_record) {
 498                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 499                                 ($facets->[$k]) or next;
 500                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 501                                 for my $field (@fields) {
 502                                     my @subfields = $field->subfields();
 503                                     for my $subfield (@subfields) {
 504                                         my ( $code, $data ) = @$subfield;
 505                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 506                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 507                                     }
 508                                 }
 509                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 510                                     $facets->[$k]->{'label_value'};
 511                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 512                                     $facets->[$k]->{'expanded'};
 513                             }
 514                         }
 515                     }
 516                 }
 517                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 518             }
 519
 520             # warn "connection ", $i-1, ": $size hits";
 521             # warn $results[$i-1]->record(0)->render() if $size > 0;
 522
 523             # BUILD FACETS
 524             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 525                 for my $link_value (
 526                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 527                     keys %$facets_counter )
 528                 {
 529                     my $expandable;
 530                     my $number_of_facets;
 531                     my @this_facets_array;
 532                     for my $one_facet (
 533                         sort {
 534                             $facets_counter->{$link_value}
 535                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 536                         } keys %{ $facets_counter->{$link_value} }
 537                       )
 538                     {
 539                         $number_of_facets++;
 540                         if (   ( $number_of_facets < 6 )
 541                             || ( $expanded_facet eq $link_value )
 542                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 543                         {
 544
 545                       # Sanitize the link value ), ( will cause errors with CCL,
 546                             my $facet_link_value = $one_facet;
 547                             $facet_link_value =~ s/(\(|\))/ /g;
 548
 549                             # fix the length that will display in the label,
 550                             my $facet_label_value = $one_facet;
 551                             $facet_label_value =
 552                               substr( $one_facet, 0, 20 ) . "..."
 553                               unless length($facet_label_value) <= 20;
 554
 555                             # if it's a branch, label by the name, not the code,
 556                             if ( $link_value =~ /branch/ ) {
 557                                 $facet_label_value =
 558                                   $branches->{$one_facet}->{'branchname'};
 559                             }
 560
 561                 # but we're down with the whole label being in the link's title.
 562                             my $facet_title_value = $one_facet;
 563
 564                             push @this_facets_array,
 565                               (
 566                                 {
 567                                     facet_count =>
 568                                       $facets_counter->{$link_value}
 569                                       ->{$one_facet},
 570                                     facet_label_value => $facet_label_value,
 571                                     facet_title_value => $facet_title_value,
 572                                     facet_link_value  => $facet_link_value,
 573                                     type_link_value   => $link_value,
 574                                 },
 575                               );
 576                         }
 577                     }
 578
 579                     # handle expanded option
 580                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 581                         $expandable = 1
 582                           if ( ( $number_of_facets > 6 )
 583                             && ( $expanded_facet ne $link_value ) );
 584                     }
 585                     push @facets_loop,
 586                       (
 587                         {
 588                             type_link_value => $link_value,
 589                             type_id         => $link_value . "_id",
 590                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 591                             facets     => \@this_facets_array,
 592                             expandable => $expandable,
 593                             expand     => $link_value,
 594                         }
 595                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 596                 }
 597             }
 598         }
 599     }
 600     return ( undef, $results_hashref, \@facets_loop );
 601 }
 602
 603 sub pazGetRecords {
 604     my (
 605         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 606         $results_per_page, $offset,       $expanded_facet, $branches,
 607         $query_type,       $scan
 608     ) = @_;
 609
 610     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 611     $paz->init();
 612     $paz->search($simple_query);
 613     sleep 1;
 614
 615     # do results
 616     my $results_hashref = {};
 617     my $stats = XMLin($paz->stat);
 618     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 619
 620     # for a grouped search result, the number of hits
 621     # is the number of groups returned; 'bib_hits' will have
 622     # the total number of bibs.
 623     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 624     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 625
 626     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 627         my $recid = $hit->{recid}->[0];
 628
 629         my $work_title = $hit->{'md-work-title'}->[0];
 630         my $work_author;
 631         if (exists $hit->{'md-work-author'}) {
 632             $work_author = $hit->{'md-work-author'}->[0];
 633         }
 634         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 635
 636         my $result_group = {};
 637         $result_group->{'group_label'} = $group_label;
 638         $result_group->{'group_merge_key'} = $recid;
 639
 640         my $count = 1;
 641         if (exists $hit->{count}) {
 642             $count = $hit->{count}->[0];
 643         }
 644         $result_group->{'group_count'} = $count;
 645
 646         for (my $i = 0; $i < $count; $i++) {
 647             # FIXME -- may need to worry about diacritics here
 648             my $rec = $paz->record($recid, $i);
 649             push @{ $result_group->{'RECORDS'} }, $rec;
 650         }
 651
 652         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 653     }
 654
 655     # pass through facets
 656     my $termlist_xml = $paz->termlist('author,subject');
 657     my $terms = XMLin($termlist_xml, forcearray => 1);
 658     my @facets_loop = ();
 659     #die Dumper($results);
 660 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 661 #        my @facets = ();
 662 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 663 #            push @facets, {
 664 #                facet_label_value => $facet->{'name'}->[0],
 665 #            };
 666 #        }
 667 #        push @facets_loop, ( {
 668 #            type_label => $list,
 669 #            facets => \@facets,
 670 #        } );
 671 #    }
 672
 673     return ( undef, $results_hashref, \@facets_loop );
 674 }
 675
 676 # STOPWORDS
 677 sub _remove_stopwords {
 678     my ( $operand, $index ) = @_;
 679     my @stopwords_removed;
 680
 681     # phrase and exact-qualified indexes shouldn't have stopwords removed
 682     if ( $index !~ m/phr|ext/ ) {
 683
 684 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 685 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 686 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 687 #       is a stopword, we'd get "çon" and wouldn't find anything...
 688         foreach ( keys %{ C4::Context->stopwords } ) {
 689             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 690             if ( $operand =~
 691                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 692             {
 693                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 694                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 695                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 696                                 $operand =~ s/$1//gi;
 697                 push @stopwords_removed, $_;
 698             }
 699         }
 700     }
 701     return ( $operand, \@stopwords_removed );
 702 }
 703
 704 # TRUNCATION
 705 sub _detect_truncation {
 706     my ( $operand, $index ) = @_;
 707     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 708         @regexpr );
 709     $operand =~ s/^ //g;
 710     my @wordlist = split( /\s/, $operand );
 711     foreach my $word (@wordlist) {
 712         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 713             push @rightlefttruncated, $word;
 714         }
 715         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 716             push @lefttruncated, $word;
 717         }
 718         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 719             push @righttruncated, $word;
 720         }
 721         elsif ( index( $word, "*" ) < 0 ) {
 722             push @nontruncated, $word;
 723         }
 724         else {
 725             push @regexpr, $word;
 726         }
 727     }
 728     return (
 729         \@nontruncated,       \@righttruncated, \@lefttruncated,
 730         \@rightlefttruncated, \@regexpr
 731     );
 732 }
 733
 734 # STEMMING
 735 sub _build_stemmed_operand {
 736     my ($operand) = @_;
 737     my $stemmed_operand;
 738
 739     # If operand contains a digit, it is almost certainly an identifier, and should
 740     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 741     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 742     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 743     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 744     return $operand if $operand =~ /\d/;
 745
 746 # FIXME: the locale should be set based on the user's language and/or search choice
 747     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 748
 749 # FIXME: these should be stored in the db so the librarian can modify the behavior
 750     $stemmer->add_exceptions(
 751         {
 752             'and' => 'and',
 753             'or'  => 'or',
 754             'not' => 'not',
 755         }
 756     );
 757     my @words = split( / /, $operand );
 758     my $stems = $stemmer->stem(@words);
 759     for my $stem (@$stems) {
 760         $stemmed_operand .= "$stem";
 761         $stemmed_operand .= "?"
 762           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 763         $stemmed_operand .= " ";
 764     }
 765     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 766     return $stemmed_operand;
 767 }
 768
 769 # FIELD WEIGHTING
 770 sub _build_weighted_query {
 771
 772 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 773 # pretty well but could work much better if we had a smarter query parser
 774     my ( $operand, $stemmed_operand, $index ) = @_;
 775     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 776     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 777     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 778
 779     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 780
 781     # Keyword, or, no index specified
 782     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 783         $weighted_query .=
 784           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 785         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 786         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 787           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 788           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 789         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 790           if $fuzzy_enabled;    # add fuzzy, word list
 791         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 792           if ( $stemming and $stemmed_operand )
 793           ;                     # add stemming, right truncation
 794         $weighted_query .= " or wrdl,r9=\"$operand\"";
 795
 796         # embedded sorting: 0 a-z; 1 z-a
 797         # $weighted_query .= ") or (sort1,aut=1";
 798     }
 799
 800     # Barcode searches should skip this process
 801     elsif ( $index eq 'bc' ) {
 802         $weighted_query .= "bc=\"$operand\"";
 803     }
 804
 805     # Authority-number searches should skip this process
 806     elsif ( $index eq 'an' ) {
 807         $weighted_query .= "an=\"$operand\"";
 808     }
 809
 810     # If the index already has more than one qualifier, wrap the operand
 811     # in quotes and pass it back (assumption is that the user knows what they
 812     # are doing and won't appreciate us mucking up their query
 813     elsif ( $index =~ ',' ) {
 814         $weighted_query .= " $index=\"$operand\"";
 815     }
 816
 817     #TODO: build better cases based on specific search indexes
 818     else {
 819         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 820           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 821         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 822         $weighted_query .=
 823           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 824     }
 825
 826     $weighted_query .= "))";                       # close rank specification
 827     return $weighted_query;
 828 }
 829
 830 =head2 buildQuery
 831
 832 ( $error, $query,
 833 $simple_query, $query_cgi,
 834 $query_desc, $limit,
 835 $limit_cgi, $limit_desc,
 836 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 837
 838 Build queries and limits in CCL, CGI, Human,
 839 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 840
 841 See verbose embedded documentation.
 842
 843
 844 =cut
 845
 846 sub buildQuery {
 847     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 848
 849     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 850
 851     # dereference
 852     my @operators = @$operators if $operators;
 853     my @indexes   = @$indexes   if $indexes;
 854     my @operands  = @$operands  if $operands;
 855     my @limits    = @$limits    if $limits;
 856     my @sort_by   = @$sort_by   if $sort_by;
 857
 858     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 859     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 860     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 861     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 862     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 863
 864     # no stemming/weight/fuzzy in NoZebra
 865     if ( C4::Context->preference("NoZebra") ) {
 866         $stemming      = 0;
 867         $weight_fields = 0;
 868         $fuzzy_enabled = 0;
 869     }
 870
 871     my $query        = $operands[0];
 872     my $simple_query = $operands[0];
 873
 874     # initialize the variables we're passing back
 875     my $query_cgi;
 876     my $query_desc;
 877     my $query_type;
 878
 879     my $limit;
 880     my $limit_cgi;
 881     my $limit_desc;
 882
 883     my $stopwords_removed;    # flag to determine if stopwords have been removed
 884
 885 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 886 # DIAGNOSTIC ONLY!!
 887     if ( $query =~ /^ccl=/ ) {
 888         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 889     }
 890     if ( $query =~ /^cql=/ ) {
 891         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 892     }
 893     if ( $query =~ /^pqf=/ ) {
 894         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 895     }
 896
 897     # pass nested queries directly
 898     # FIXME: need better handling of some of these variables in this case
 899     if ( $query =~ /(\(|\))/ ) {
 900         return (
 901             undef,              $query, $simple_query, $query_cgi,
 902             $query,             $limit, $limit_cgi,    $limit_desc,
 903             $stopwords_removed, 'ccl'
 904         );
 905     }
 906
 907 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 908 # query operands and indexes and add stemming, truncation, field weighting, etc.
 909 # Once we do so, we'll end up with a value in $query, just like if we had an
 910 # incoming $query from the user
 911     else {
 912         $query = ""
 913           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 914         my $previous_operand
 915           ;    # a flag used to keep track if there was a previous query
 916                # if there was, we can apply the current operator
 917                # for every operand
 918         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 919
 920             # COMBINE OPERANDS, INDEXES AND OPERATORS
 921             if ( $operands[$i] ) {
 922
 923               # A flag to determine whether or not to add the index to the query
 924                 my $indexes_set;
 925
 926 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 927                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 928                     $weight_fields    = 0;
 929                     $stemming         = 0;
 930                     $remove_stopwords = 0;
 931                 }
 932                 my $operand = $operands[$i];
 933                 my $index   = $indexes[$i];
 934
 935                 # Add index-specific attributes
 936                 # Date of Publication
 937                 if ( $index eq 'yr' ) {
 938                     $index .= ",st-numeric";
 939 #                     $indexes_set++;
 940                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 941                 }
 942
 943                 # Date of Acquisition
 944                 elsif ( $index eq 'acqdate' ) {
 945                     $index .= ",st-date-normalized";
 946 #                     $indexes_set++;
 947                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 948                 }
 949                 # ISBN,ISSN,Standard Number, don't need special treatment
 950                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 951 #                     $indexes_set++;
 952                     (
 953                         $stemming,      $auto_truncation,
 954                         $weight_fields, $fuzzy_enabled,
 955                         $remove_stopwords
 956                     ) = ( 0, 0, 0, 0, 0 );
 957
 958                 }
 959                 # Set default structure attribute (word list)
 960                 my $struct_attr;
 961                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 962                     $struct_attr = ",wrdl";
 963                 }
 964
 965                 # Some helpful index variants
 966                 my $index_plus       = $index . $struct_attr . ":" if $index;
 967                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 968
 969                 # Remove Stopwords
 970                 if ($remove_stopwords) {
 971                     ( $operand, $stopwords_removed ) =
 972                       _remove_stopwords( $operand, $index );
 973                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 974                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 975                       if ( $stopwords_removed && $DEBUG );
 976                 }
 977
 978                 # Detect Truncation
 979                 my $truncated_operand;
 980                 my( $nontruncated, $righttruncated, $lefttruncated,
 981                     $rightlefttruncated, $regexpr
 982                 ) = _detect_truncation( $operand, $index );
 983                 warn
 984 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 985                   if $DEBUG;
 986
 987                 # Apply Truncation
 988                 if (
 989                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 990                     scalar(@$rightlefttruncated) > 0 )
 991                 {
 992
 993                # Don't field weight or add the index to the query, we do it here
 994                     $indexes_set = 1;
 995                     undef $weight_fields;
 996                     my $previous_truncation_operand;
 997                     if ( scalar(@$nontruncated) > 0 ) {
 998                         $truncated_operand .= "$index_plus @$nontruncated ";
 999                         $previous_truncation_operand = 1;
1000                     }
1001                     if ( scalar(@$righttruncated) > 0 ) {
1002                         $truncated_operand .= "and "
1003                           if $previous_truncation_operand;
1004                         $truncated_operand .=
1005                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1006                         $previous_truncation_operand = 1;
1007                     }
1008                     if ( scalar(@$lefttruncated) > 0 ) {
1009                         $truncated_operand .= "and "
1010                           if $previous_truncation_operand;
1011                         $truncated_operand .=
1012                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1013                         $previous_truncation_operand = 1;
1014                     }
1015                     if ( scalar(@$rightlefttruncated) > 0 ) {
1016                         $truncated_operand .= "and "
1017                           if $previous_truncation_operand;
1018                         $truncated_operand .=
1019                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1020                         $previous_truncation_operand = 1;
1021                     }
1022                 }
1023                 $operand = $truncated_operand if $truncated_operand;
1024                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1025
1026                 # Handle Stemming
1027                 my $stemmed_operand;
1028                 $stemmed_operand = _build_stemmed_operand($operand)
1029                   if $stemming;
1030                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1031
1032                 # Handle Field Weighting
1033                 my $weighted_operand;
1034                 $weighted_operand =
1035                   _build_weighted_query( $operand, $stemmed_operand, $index )
1036                   if $weight_fields;
1037                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1038                 $operand = $weighted_operand if $weight_fields;
1039                 $indexes_set = 1 if $weight_fields;
1040
1041                 # If there's a previous operand, we need to add an operator
1042                 if ($previous_operand) {
1043
1044                     # User-specified operator
1045                     if ( $operators[ $i - 1 ] ) {
1046                         $query     .= " $operators[$i-1] ";
1047                         $query     .= " $index_plus " unless $indexes_set;
1048                         $query     .= " $operand";
1049                         $query_cgi .= "&op=$operators[$i-1]";
1050                         $query_cgi .= "&idx=$index" if $index;
1051                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1052                         $query_desc .=
1053                           " $operators[$i-1] $index_plus $operands[$i]";
1054                     }
1055
1056                     # Default operator is and
1057                     else {
1058                         $query      .= " and ";
1059                         $query      .= "$index_plus " unless $indexes_set;
1060                         $query      .= "$operand";
1061                         $query_cgi  .= "&op=and&idx=$index" if $index;
1062                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1063                         $query_desc .= " and $index_plus $operands[$i]";
1064                     }
1065                 }
1066
1067                 # There isn't a pervious operand, don't need an operator
1068                 else {
1069
1070                     # Field-weighted queries already have indexes set
1071                     $query .= " $index_plus " unless $indexes_set;
1072                     $query .= $operand;
1073                     $query_desc .= " $index_plus $operands[$i]";
1074                     $query_cgi  .= "&idx=$index" if $index;
1075                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1076                     $previous_operand = 1;
1077                 }
1078             }    #/if $operands
1079         }    # /for
1080     }
1081     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1082
1083     # add limits
1084     my $group_OR_limits;
1085     my $availability_limit;
1086     foreach my $this_limit (@limits) {
1087         if ( $this_limit =~ /available/ ) {
1088
1089 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1090 # In English:
1091 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1092             $availability_limit .=
1093 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1094             $limit_cgi  .= "&limit=available";
1095             $limit_desc .= "";
1096         }
1097
1098         # group_OR_limits, prefixed by mc-
1099         # OR every member of the group
1100         elsif ( $this_limit =~ /mc/ ) {
1101             $group_OR_limits .= " or " if $group_OR_limits;
1102             $limit_desc      .= " or " if $group_OR_limits;
1103             $group_OR_limits .= "$this_limit";
1104             $limit_cgi       .= "&limit=$this_limit";
1105             $limit_desc      .= " $this_limit";
1106         }
1107
1108         # Regular old limits
1109         else {
1110             if ($this_limit){
1111                 $limit .= " and " if $limit || $query;
1112                 $limit      .= "$this_limit";
1113                 $limit_cgi  .= "&limit=$this_limit";
1114                 $limit_desc .= " $this_limit";
1115             }
1116         }
1117     }
1118     if ($group_OR_limits) {
1119         $limit .= " and " if ( $query || $limit );
1120         $limit .= "($group_OR_limits)";
1121     }
1122     if ($availability_limit) {
1123         $limit .= " and " if ( $query || $limit );
1124         $limit .= "($availability_limit)";
1125     }
1126
1127     # Normalize the query and limit strings
1128     $query =~ s/:/=/g;
1129     $limit =~ s/:/=/g;
1130     for ( $query, $query_desc, $limit, $limit_desc ) {
1131         $_ =~ s/  / /g;    # remove extra spaces
1132         $_ =~ s/^ //g;     # remove any beginning spaces
1133         $_ =~ s/ $//g;     # remove any ending spaces
1134         $_ =~ s/==/=/g;    # remove double == from query
1135     }
1136     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1137
1138     for ($query_cgi,$simple_query) {
1139         $_ =~ s/"//g;
1140     }
1141     # append the limit to the query
1142     $query .= " " . $limit;
1143
1144     # Warnings if DEBUG
1145     if ($DEBUG) {
1146         warn "QUERY:" . $query;
1147         warn "QUERY CGI:" . $query_cgi;
1148         warn "QUERY DESC:" . $query_desc;
1149         warn "LIMIT:" . $limit;
1150         warn "LIMIT CGI:" . $limit_cgi;
1151         warn "LIMIT DESC:" . $limit_desc;
1152         warn "---------\nLeave buildQuery\n---------";
1153     }
1154     return (
1155         undef,              $query, $simple_query, $query_cgi,
1156         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1157         $stopwords_removed, $query_type
1158     );
1159 }
1160
1161 =head2 searchResults
1162
1163 Format results in a form suitable for passing to the template
1164
1165 =cut
1166
1167 # IMO this subroutine is pretty messy still -- it's responsible for
1168 # building the HTML output for the template
1169 sub searchResults {
1170     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1171     my $dbh = C4::Context->dbh;
1172     my $even = 1;
1173     my @newresults;
1174
1175     # add search-term highlighting via <span>s on the search terms
1176     my $span_terms_hashref;
1177     for my $span_term ( split( / /, $searchdesc ) ) {
1178         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1179         $span_terms_hashref->{$span_term}++;
1180     }
1181
1182     #Build branchnames hash
1183     #find branchname
1184     #get branch information.....
1185     my %branches;
1186     my $bsth =
1187       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1188       ;    # FIXME : use C4::Koha::GetBranches
1189     $bsth->execute();
1190     while ( my $bdata = $bsth->fetchrow_hashref ) {
1191         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1192     }
1193 # FIXME - We build an authorised values hash here, using the default framework
1194 # though it is possible to have different authvals for different fws.
1195
1196     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1197
1198     # get notforloan authorised value list (see $shelflocations  FIXME)
1199     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1200
1201     #Build itemtype hash
1202     #find itemtype & itemtype image
1203     my %itemtypes;
1204     $bsth =
1205       $dbh->prepare(
1206         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1207       );
1208     $bsth->execute();
1209     while ( my $bdata = $bsth->fetchrow_hashref ) {
1210                 foreach (qw(description imageurl summary notforloan)) {
1211                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1212                 }
1213     }
1214
1215     #search item field code
1216     my $sth =
1217       $dbh->prepare(
1218 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1219       );
1220     $sth->execute;
1221     my ($itemtag) = $sth->fetchrow;
1222
1223     ## find column names of items related to MARC
1224     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1225     $sth2->execute;
1226     my %subfieldstosearch;
1227     while ( ( my $column ) = $sth2->fetchrow ) {
1228         my ( $tagfield, $tagsubfield ) =
1229           &GetMarcFromKohaField( "items." . $column, "" );
1230         $subfieldstosearch{$column} = $tagsubfield;
1231     }
1232
1233     # handle which records to actually retrieve
1234     my $times;
1235     if ( $hits && $offset + $results_per_page <= $hits ) {
1236         $times = $offset + $results_per_page;
1237     }
1238     else {
1239         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1240     }
1241
1242     # loop through all of the records we've retrieved
1243     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1244         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1245         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1246         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1247         $oldbiblio->{result_number} = $i + 1;
1248
1249         # add imageurl to itemtype if there is one
1250         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1251
1252         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1253         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1254         $aisbn =~ s/-//g;
1255         $oldbiblio->{amazonisbn} = $aisbn;
1256                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1257  # Build summary if there is one (the summary is defined in the itemtypes table)
1258  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1259         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1260             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1261             my @fields  = $marcrecord->fields();
1262             foreach my $field (@fields) {
1263                 my $tag      = $field->tag();
1264                 my $tagvalue = $field->as_string();
1265                 if (! utf8::is_utf8($tagvalue)) {
1266                     utf8::decode($tagvalue);
1267                 }
1268
1269                 $summary =~
1270                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1271                 unless ( $tag < 10 ) {
1272                     my @subf = $field->subfields;
1273                     for my $i ( 0 .. $#subf ) {
1274                         my $subfieldcode  = $subf[$i][0];
1275                         my $subfieldvalue = $subf[$i][1];
1276                         if (! utf8::is_utf8($subfieldvalue)) {
1277                             utf8::decode($subfieldvalue);
1278                         }
1279                         my $tagsubf       = $tag . $subfieldcode;
1280                         $summary =~
1281 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1282                     }
1283                 }
1284             }
1285             # FIXME: yuk
1286             $summary =~ s/\[(.*?)]//g;
1287             $summary =~ s/\n/<br\/>/g;
1288             $oldbiblio->{summary} = $summary;
1289         }
1290
1291         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1292         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1293         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1294         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1295         # Add search-term highlighting to the whole record where they match using <span>s
1296         if (C4::Context->preference("OpacHighlightedWords")){
1297             my $searchhighlightblob;
1298             for my $highlight_field ( $marcrecord->fields ) {
1299
1300     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1301                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1302                 for my $subfield ($highlight_field->subfields()) {
1303                     my $match;
1304                     next if $subfield->[0] eq '9';
1305                     my $field = $subfield->[1];
1306                     for my $term ( keys %$span_terms_hashref ) {
1307                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1308                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1309                         $match++;
1310                         }
1311                     }
1312                     $searchhighlightblob .= $field . " ... " if $match;
1313                 }
1314
1315             }
1316             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1317             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1318         }
1319
1320         # Add search-term highlighting to the title, subtitle, etc. fields
1321         for my $term ( keys %$span_terms_hashref ) {
1322             my $old_term = $term;
1323             if ( length($term) > 3 ) {
1324                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1325                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1326                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1327                                 }
1328             }
1329         }
1330
1331         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1332
1333         # Pull out the items fields
1334         my @fields = $marcrecord->field($itemtag);
1335
1336         # Setting item statuses for display
1337         my @available_items_loop;
1338         my @onloan_items_loop;
1339         my @other_items_loop;
1340
1341         my $available_items;
1342         my $onloan_items;
1343         my $other_items;
1344
1345         my $ordered_count         = 0;
1346         my $available_count       = 0;
1347         my $onloan_count          = 0;
1348         my $longoverdue_count     = 0;
1349         my $other_count           = 0;
1350         my $wthdrawn_count        = 0;
1351         my $itemlost_count        = 0;
1352         my $itembinding_count     = 0;
1353         my $itemdamaged_count     = 0;
1354         my $item_in_transit_count = 0;
1355         my $can_place_holds       = 0;
1356         my $items_count           = scalar(@fields);
1357         my $maxitems =
1358           ( C4::Context->preference('maxItemsinSearchResults') )
1359           ? C4::Context->preference('maxItemsinSearchResults') - 1
1360           : 1;
1361
1362         # loop through every item
1363         foreach my $field (@fields) {
1364             my $item;
1365
1366             # populate the items hash
1367             foreach my $code ( keys %subfieldstosearch ) {
1368                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1369             }
1370                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1371                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1372             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1373             if ($item->{$hbranch}) {
1374                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1375             }
1376             elsif ($item->{$otherbranch}) {     # Last resort
1377                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1378             }
1379
1380                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1381 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1382             if ( $item->{onloan} ) {
1383                 $onloan_count++;
1384                                 my $key = $prefix . $item->{due_date};
1385                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1386                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1387                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1388                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1389                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1390                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1391                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1392                 # if something's checked out and lost, mark it as 'long overdue'
1393                 if ( $item->{itemlost} ) {
1394                     $onloan_items->{$prefix}->{longoverdue}++;
1395                     $longoverdue_count++;
1396                 } else {        # can place holds as long as item isn't lost
1397                     $can_place_holds = 1;
1398                 }
1399             }
1400
1401          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1402             else {
1403
1404                 # item is on order
1405                 if ( $item->{notforloan} == -1 ) {
1406                     $ordered_count++;
1407                 }
1408
1409                 # is item in transit?
1410                 my $transfertwhen = '';
1411                 my ($transfertfrom, $transfertto);
1412
1413                 unless ($item->{wthdrawn}
1414                         || $item->{itemlost}
1415                         || $item->{damaged}
1416                         || $item->{notforloan}
1417                         || $items_count > 20) {
1418
1419                     # A couple heuristics to limit how many times
1420                     # we query the database for item transfer information, sacrificing
1421                     # accuracy in some cases for speed;
1422                     #
1423                     # 1. don't query if item has one of the other statuses
1424                     # 2. don't check transit status if the bib has
1425                     #    more than 20 items
1426                     #
1427                     # FIXME: to avoid having the query the database like this, and to make
1428                     #        the in transit status count as unavailable for search limiting,
1429                     #        should map transit status to record indexed in Zebra.
1430                     #
1431                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1432                 }
1433
1434                 # item is withdrawn, lost or damaged
1435                 if (   $item->{wthdrawn}
1436                     || $item->{itemlost}
1437                     || $item->{damaged}
1438                     || $item->{notforloan}
1439                     || ($transfertwhen ne ''))
1440                 {
1441                     $wthdrawn_count++        if $item->{wthdrawn};
1442                     $itemlost_count++        if $item->{itemlost};
1443                     $itemdamaged_count++     if $item->{damaged};
1444                     $item_in_transit_count++ if $transfertwhen ne '';
1445                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1446                     $other_count++;
1447
1448                                         my $key = $prefix . $item->{status};
1449                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1450                         $other_items->{$key}->{$_} = $item->{$_};
1451                                         }
1452                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1453                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1454                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1455                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1456                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1457                                         $other_items->{$key}->{barcode} = $item->{barcode};
1458                 }
1459                 # item is available
1460                 else {
1461                     $can_place_holds = 1;
1462                     $available_count++;
1463                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1464                                         foreach (qw(branchname itemcallnumber barcode)) {
1465                         $available_items->{$prefix}->{$_} = $item->{$_};
1466                                         }
1467                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1468                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1469                 }
1470             }
1471         }    # notforloan, item level and biblioitem level
1472         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1473         $maxitems =
1474           ( C4::Context->preference('maxItemsinSearchResults') )
1475           ? C4::Context->preference('maxItemsinSearchResults') - 1
1476           : 1;
1477         for my $key ( sort keys %$onloan_items ) {
1478             (++$onloanitemscount > $maxitems) and last;
1479             push @onloan_items_loop, $onloan_items->{$key};
1480         }
1481         for my $key ( sort keys %$other_items ) {
1482             (++$otheritemscount > $maxitems) and last;
1483             push @other_items_loop, $other_items->{$key};
1484         }
1485         for my $key ( sort keys %$available_items ) {
1486             (++$availableitemscount > $maxitems) and last;
1487             push @available_items_loop, $available_items->{$key}
1488         }
1489
1490         # XSLT processing of some stuff
1491         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1492             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1493             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1494         }
1495
1496         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1497         $can_place_holds = 0
1498           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1499         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1500         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1501         $oldbiblio->{items_count}          = $items_count;
1502         $oldbiblio->{available_items_loop} = \@available_items_loop;
1503         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1504         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1505         $oldbiblio->{availablecount}       = $available_count;
1506         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1507         $oldbiblio->{onloancount}          = $onloan_count;
1508         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1509         $oldbiblio->{othercount}           = $other_count;
1510         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1511         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1512         $oldbiblio->{itemlostcount}        = $itemlost_count;
1513         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1514         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1515         $oldbiblio->{orderedcount}         = $ordered_count;
1516         $oldbiblio->{isbn} =~
1517           s/-//g;    # deleting - in isbn to enable amazon content
1518         push( @newresults, $oldbiblio );
1519     }
1520     return @newresults;
1521 }
1522
1523 #----------------------------------------------------------------------
1524 #
1525 # Non-Zebra GetRecords#
1526 #----------------------------------------------------------------------
1527
1528 =head2 NZgetRecords
1529
1530   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1531
1532 =cut
1533
1534 sub NZgetRecords {
1535     my (
1536         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1537         $results_per_page, $offset,       $expanded_facet, $branches,
1538         $query_type,       $scan
1539     ) = @_;
1540     warn "query =$query" if $DEBUG;
1541     my $result = NZanalyse($query);
1542     warn "results =$result" if $DEBUG;
1543     return ( undef,
1544         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1545         undef );
1546 }
1547
1548 =head2 NZanalyse
1549
1550   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1551   the list is built from an inverted index in the nozebra SQL table
1552   note that title is here only for convenience : the sorting will be very fast when requested on title
1553   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1554
1555 =cut
1556
1557 sub NZanalyse {
1558     my ( $string, $server ) = @_;
1559 #     warn "---------"       if $DEBUG;
1560     warn " NZanalyse" if $DEBUG;
1561 #     warn "---------"       if $DEBUG;
1562
1563  # $server contains biblioserver or authorities, depending on what we search on.
1564  #warn "querying : $string on $server";
1565     $server = 'biblioserver' unless $server;
1566
1567 # if we have a ", replace the content to discard temporarily any and/or/not inside
1568     my $commacontent;
1569     if ( $string =~ /"/ ) {
1570         $string =~ s/"(.*?)"/__X__/;
1571         $commacontent = $1;
1572         warn "commacontent : $commacontent" if $DEBUG;
1573     }
1574
1575 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1576 # then, call again NZanalyse with $left and $right
1577 # (recursive until we find a leaf (=> something without and/or/not)
1578 # delete repeated operator... Would then go in infinite loop
1579     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1580     }
1581
1582     #process parenthesis before.
1583     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1584         my $left     = $1;
1585         my $right    = $4;
1586         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1587         warn
1588 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1589           if $DEBUG;
1590         my $leftresult = NZanalyse( $left, $server );
1591         if ($operator) {
1592             my $rightresult = NZanalyse( $right, $server );
1593
1594             # OK, we have the results for right and left part of the query
1595             # depending of operand, intersect, union or exclude both lists
1596             # to get a result list
1597             if ( $operator eq ' and ' ) {
1598                 return NZoperatorAND($leftresult,$rightresult);
1599             }
1600             elsif ( $operator eq ' or ' ) {
1601
1602                 # just merge the 2 strings
1603                 return $leftresult . $rightresult;
1604             }
1605             elsif ( $operator eq ' not ' ) {
1606                 return NZoperatorNOT($leftresult,$rightresult);
1607             }
1608         }
1609         else {
1610 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1611             return $leftresult;
1612         }
1613     }
1614     warn "string :" . $string if $DEBUG;
1615     my $left = "";
1616     my $right = "";
1617     my $operator = "";
1618     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1619         $left     = $1;
1620         $right    = $3;
1621         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1622     }
1623     warn "no parenthesis. left : $left operator: $operator right: $right"
1624       if $DEBUG;
1625
1626     # it's not a leaf, we have a and/or/not
1627     if ($operator) {
1628
1629         # reintroduce comma content if needed
1630         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1631         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1632         warn "node : $left / $operator / $right\n" if $DEBUG;
1633         my $leftresult  = NZanalyse( $left,  $server );
1634         my $rightresult = NZanalyse( $right, $server );
1635         warn " leftresult : $leftresult" if $DEBUG;
1636         warn " rightresult : $rightresult" if $DEBUG;
1637         # OK, we have the results for right and left part of the query
1638         # depending of operand, intersect, union or exclude both lists
1639         # to get a result list
1640         if ( $operator eq ' and ' ) {
1641             warn "NZAND";
1642             return NZoperatorAND($leftresult,$rightresult);
1643         }
1644         elsif ( $operator eq ' or ' ) {
1645
1646             # just merge the 2 strings
1647             return $leftresult . $rightresult;
1648         }
1649         elsif ( $operator eq ' not ' ) {
1650             return NZoperatorNOT($leftresult,$rightresult);
1651         }
1652         else {
1653
1654 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1655             die "error : operand unknown : $operator for $string";
1656         }
1657
1658         # it's a leaf, do the real SQL query and return the result
1659     }
1660     else {
1661         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1662         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1663         #remove trailing blank at the beginning
1664         $string =~ s/^ //g;
1665         warn "leaf:$string" if $DEBUG;
1666
1667         # parse the string in in operator/operand/value again
1668         my $left = "";
1669         my $operator = "";
1670         my $right = "";
1671         if ($string =~ /(.*)(>=|<=)(.*)/) {
1672             $left     = $1;
1673             $operator = $2;
1674             $right    = $3;
1675         } else {
1676             $left = $string;
1677         }
1678 #         warn "handling leaf... left:$left operator:$operator right:$right"
1679 #           if $DEBUG;
1680         unless ($operator) {
1681             if ($string =~ /(.*)(>|<|=)(.*)/) {
1682                 $left     = $1;
1683                 $operator = $2;
1684                 $right    = $3;
1685                 warn
1686     "handling unless (operator)... left:$left operator:$operator right:$right"
1687                 if $DEBUG;
1688             } else {
1689                 $left = $string;
1690             }
1691         }
1692         my $results;
1693
1694 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1695         $left =~ s/ .*$//;
1696
1697         # automatic replace for short operators
1698         $left = 'title'            if $left =~ '^ti$';
1699         $left = 'author'           if $left =~ '^au$';
1700         $left = 'publisher'        if $left =~ '^pb$';
1701         $left = 'subject'          if $left =~ '^su$';
1702         $left = 'koha-Auth-Number' if $left =~ '^an$';
1703         $left = 'keyword'          if $left =~ '^kw$';
1704         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1705         if ( $operator && $left ne 'keyword' ) {
1706
1707             #do a specific search
1708             my $dbh = C4::Context->dbh;
1709             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1710             my $sth =
1711               $dbh->prepare(
1712 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1713               );
1714             warn "$left / $operator / $right\n" if $DEBUG;
1715
1716             # split each word, query the DB and build the biblionumbers result
1717             #sanitizing leftpart
1718             $left =~ s/^\s+|\s+$//;
1719             foreach ( split / /, $right ) {
1720                 my $biblionumbers;
1721                 $_ =~ s/^\s+|\s+$//;
1722                 next unless $_;
1723                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1724                 $sth->execute( $server, $left, $_ )
1725                   or warn "execute failed: $!";
1726                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1727
1728 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1729 # otherwise, fill the result
1730                     $biblionumbers .= $line
1731                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1732                     warn "result : $value "
1733                       . ( $right  =~ /\d/ ) . "=="
1734                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1735                 }
1736
1737 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1738                 if ($results) {
1739                     warn "NZAND" if $DEBUG;
1740                     $results = NZoperatorAND($biblionumbers,$results);
1741                 }
1742                 else {
1743                     $results = $biblionumbers;
1744                 }
1745             }
1746         }
1747         else {
1748
1749       #do a complete search (all indexes), if index='kw' do complete search too.
1750             my $dbh = C4::Context->dbh;
1751             my $sth =
1752               $dbh->prepare(
1753 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1754               );
1755
1756             # split each word, query the DB and build the biblionumbers result
1757             foreach ( split / /, $string ) {
1758                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1759                 warn "search on all indexes on $_" if $DEBUG;
1760                 my $biblionumbers;
1761                 next unless $_;
1762                 $sth->execute( $server, $_ );
1763                 while ( my $line = $sth->fetchrow ) {
1764                     $biblionumbers .= $line;
1765                 }
1766
1767 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1768                 if ($results) {
1769                     $results = NZoperatorAND($biblionumbers,$results);
1770                 }
1771                 else {
1772                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1773                     $results = $biblionumbers;
1774                 }
1775             }
1776         }
1777         warn "return : $results for LEAF : $string" if $DEBUG;
1778         return $results;
1779     }
1780     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1781 }
1782
1783 sub NZoperatorAND{
1784     my ($rightresult, $leftresult)=@_;
1785
1786     my @leftresult = split /;/, $leftresult;
1787     warn " @leftresult / $rightresult \n" if $DEBUG;
1788
1789     #             my @rightresult = split /;/,$leftresult;
1790     my $finalresult;
1791
1792 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1793 # the result is stored twice, to have the same weight for AND than OR.
1794 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1795 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1796     foreach (@leftresult) {
1797         my $value = $_;
1798         my $countvalue;
1799         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1800         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1801             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1802             $finalresult .=
1803                 "$value-$countvalue;$value-$countvalue;";
1804         }
1805     }
1806     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1807     return $finalresult;
1808 }
1809
1810 sub NZoperatorOR{
1811     my ($rightresult, $leftresult)=@_;
1812     return $rightresult.$leftresult;
1813 }
1814
1815 sub NZoperatorNOT{
1816     my ($leftresult, $rightresult)=@_;
1817
1818     my @leftresult = split /;/, $leftresult;
1819
1820     #             my @rightresult = split /;/,$leftresult;
1821     my $finalresult;
1822     foreach (@leftresult) {
1823         my $value=$_;
1824         $value=$1 if $value=~m/(.*)-\d+$/;
1825         unless ($rightresult =~ "$value-") {
1826             $finalresult .= "$_;";
1827         }
1828     }
1829     return $finalresult;
1830 }
1831
1832 =head2 NZorder
1833
1834   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1835
1836   TODO :: Description
1837
1838 =cut
1839
1840 sub NZorder {
1841     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1842     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1843
1844     # order title asc by default
1845     #     $ordering = '1=36 <i' unless $ordering;
1846     $results_per_page = 20 unless $results_per_page;
1847     $offset           = 0  unless $offset;
1848     my $dbh = C4::Context->dbh;
1849
1850     #
1851     # order by POPULARITY
1852     #
1853     if ( $ordering =~ /popularity/ ) {
1854         my %result;
1855         my %popularity;
1856
1857         # popularity is not in MARC record, it's builded from a specific query
1858         my $sth =
1859           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1860         foreach ( split /;/, $biblionumbers ) {
1861             my ( $biblionumber, $title ) = split /,/, $_;
1862             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1863             $sth->execute($biblionumber);
1864             my $popularity = $sth->fetchrow || 0;
1865
1866 # hint : the key is popularity.title because we can have
1867 # many results with the same popularity. In this cas, sub-ordering is done by title
1868 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1869 # (un-frequent, I agree, but we won't forget anything that way ;-)
1870             $popularity{ sprintf( "%10d", $popularity ) . $title
1871                   . $biblionumber } = $biblionumber;
1872         }
1873
1874     # sort the hash and return the same structure as GetRecords (Zebra querying)
1875         my $result_hash;
1876         my $numbers = 0;
1877         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1878             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1879                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1880                   $result{ $popularity{$key} }->as_usmarc();
1881             }
1882         }
1883         else {                                    # sort popularity ASC
1884             foreach my $key ( sort ( keys %popularity ) ) {
1885                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1886                   $result{ $popularity{$key} }->as_usmarc();
1887             }
1888         }
1889         my $finalresult = ();
1890         $result_hash->{'hits'}         = $numbers;
1891         $finalresult->{'biblioserver'} = $result_hash;
1892         return $finalresult;
1893
1894         #
1895         # ORDER BY author
1896         #
1897     }
1898     elsif ( $ordering =~ /author/ ) {
1899         my %result;
1900         foreach ( split /;/, $biblionumbers ) {
1901             my ( $biblionumber, $title ) = split /,/, $_;
1902             my $record = GetMarcBiblio($biblionumber);
1903             my $author;
1904             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1905                 $author = $record->subfield( '200', 'f' );
1906                 $author = $record->subfield( '700', 'a' ) unless $author;
1907             }
1908             else {
1909                 $author = $record->subfield( '100', 'a' );
1910             }
1911
1912 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1913 # and we don't want to get only 1 result for each of them !!!
1914             $result{ $author . $biblionumber } = $record;
1915         }
1916
1917     # sort the hash and return the same structure as GetRecords (Zebra querying)
1918         my $result_hash;
1919         my $numbers = 0;
1920         if ( $ordering eq 'author_za' ) {    # sort by author desc
1921             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1922                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1923                   $result{$key}->as_usmarc();
1924             }
1925         }
1926         else {                               # sort by author ASC
1927             foreach my $key ( sort ( keys %result ) ) {
1928                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1929                   $result{$key}->as_usmarc();
1930             }
1931         }
1932         my $finalresult = ();
1933         $result_hash->{'hits'}         = $numbers;
1934         $finalresult->{'biblioserver'} = $result_hash;
1935         return $finalresult;
1936
1937         #
1938         # ORDER BY callnumber
1939         #
1940     }
1941     elsif ( $ordering =~ /callnumber/ ) {
1942         my %result;
1943         foreach ( split /;/, $biblionumbers ) {
1944             my ( $biblionumber, $title ) = split /,/, $_;
1945             my $record = GetMarcBiblio($biblionumber);
1946             my $callnumber;
1947             my ( $callnumber_tag, $callnumber_subfield ) =
1948               GetMarcFromKohaField( 'items.itemcallnumber','' );
1949             ( $callnumber_tag, $callnumber_subfield ) =
1950               GetMarcFromKohaField('biblioitems.callnumber','')
1951               unless $callnumber_tag;
1952             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1953                 $callnumber = $record->subfield( '200', 'f' );
1954             }
1955             else {
1956                 $callnumber = $record->subfield( '100', 'a' );
1957             }
1958
1959 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1960 # and we don't want to get only 1 result for each of them !!!
1961             $result{ $callnumber . $biblionumber } = $record;
1962         }
1963
1964     # sort the hash and return the same structure as GetRecords (Zebra querying)
1965         my $result_hash;
1966         my $numbers = 0;
1967         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1968             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1969                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1970                   $result{$key}->as_usmarc();
1971             }
1972         }
1973         else {                                     # sort by title ASC
1974             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1975                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1976                   $result{$key}->as_usmarc();
1977             }
1978         }
1979         my $finalresult = ();
1980         $result_hash->{'hits'}         = $numbers;
1981         $finalresult->{'biblioserver'} = $result_hash;
1982         return $finalresult;
1983     }
1984     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1985         my %result;
1986         foreach ( split /;/, $biblionumbers ) {
1987             my ( $biblionumber, $title ) = split /,/, $_;
1988             my $record = GetMarcBiblio($biblionumber);
1989             my ( $publicationyear_tag, $publicationyear_subfield ) =
1990               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1991             my $publicationyear =
1992               $record->subfield( $publicationyear_tag,
1993                 $publicationyear_subfield );
1994
1995 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1996 # and we don't want to get only 1 result for each of them !!!
1997             $result{ $publicationyear . $biblionumber } = $record;
1998         }
1999
2000     # sort the hash and return the same structure as GetRecords (Zebra querying)
2001         my $result_hash;
2002         my $numbers = 0;
2003         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2004             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2005                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2006                   $result{$key}->as_usmarc();
2007             }
2008         }
2009         else {                                 # sort by pub year ASC
2010             foreach my $key ( sort ( keys %result ) ) {
2011                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2012                   $result{$key}->as_usmarc();
2013             }
2014         }
2015         my $finalresult = ();
2016         $result_hash->{'hits'}         = $numbers;
2017         $finalresult->{'biblioserver'} = $result_hash;
2018         return $finalresult;
2019
2020         #
2021         # ORDER BY title
2022         #
2023     }
2024     elsif ( $ordering =~ /title/ ) {
2025
2026 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2027         my %result;
2028         foreach ( split /;/, $biblionumbers ) {
2029             my ( $biblionumber, $title ) = split /,/, $_;
2030
2031 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2032 # and we don't want to get only 1 result for each of them !!!
2033 # hint & speed improvement : we can order without reading the record
2034 # so order, and read records only for the requested page !
2035             $result{ $title . $biblionumber } = $biblionumber;
2036         }
2037
2038     # sort the hash and return the same structure as GetRecords (Zebra querying)
2039         my $result_hash;
2040         my $numbers = 0;
2041         if ( $ordering eq 'title_az' ) {    # sort by title desc
2042             foreach my $key ( sort ( keys %result ) ) {
2043                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2044             }
2045         }
2046         else {                              # sort by title ASC
2047             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2048                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2049             }
2050         }
2051
2052         # limit the $results_per_page to result size if it's more
2053         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2054
2055         # for the requested page, replace biblionumber by the complete record
2056         # speed improvement : avoid reading too much things
2057         for (
2058             my $counter = $offset ;
2059             $counter <= $offset + $results_per_page ;
2060             $counter++
2061           )
2062         {
2063             $result_hash->{'RECORDS'}[$counter] =
2064               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2065         }
2066         my $finalresult = ();
2067         $result_hash->{'hits'}         = $numbers;
2068         $finalresult->{'biblioserver'} = $result_hash;
2069         return $finalresult;
2070     }
2071     else {
2072
2073 #
2074 # order by ranking
2075 #
2076 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2077         my %result;
2078         my %count_ranking;
2079         foreach ( split /;/, $biblionumbers ) {
2080             my ( $biblionumber, $title ) = split /,/, $_;
2081             $title =~ /(.*)-(\d)/;
2082
2083             # get weight
2084             my $ranking = $2;
2085
2086 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2087 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2088 # biblio N has ranking = 6
2089             $count_ranking{$biblionumber} += $ranking;
2090         }
2091
2092 # build the result by "inverting" the count_ranking hash
2093 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2094 #         warn "counting";
2095         foreach ( keys %count_ranking ) {
2096             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2097         }
2098
2099     # sort the hash and return the same structure as GetRecords (Zebra querying)
2100         my $result_hash;
2101         my $numbers = 0;
2102         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2103             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2104         }
2105
2106         # limit the $results_per_page to result size if it's more
2107         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2108
2109         # for the requested page, replace biblionumber by the complete record
2110         # speed improvement : avoid reading too much things
2111         for (
2112             my $counter = $offset ;
2113             $counter <= $offset + $results_per_page ;
2114             $counter++
2115           )
2116         {
2117             $result_hash->{'RECORDS'}[$counter] =
2118               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2119               if $result_hash->{'RECORDS'}[$counter];
2120         }
2121         my $finalresult = ();
2122         $result_hash->{'hits'}         = $numbers;
2123         $finalresult->{'biblioserver'} = $result_hash;
2124         return $finalresult;
2125     }
2126 }
2127
2128 END { }    # module clean-up code here (global destructor)
2129
2130 1;
2131 __END__
2132
2133 =head1 AUTHOR
2134
2135 Koha Developement team <info@koha.org>
2136
2137 =cut