C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29
  30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  31
  32 # set the version for version checking
  33 BEGIN {
  34     $VERSION = 3.01;
  35     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  36 }
  37
  38 =head1 NAME
  39
  40 C4::Search - Functions for searching the Koha catalog.
  41
  42 =head1 SYNOPSIS
  43
  44 See opac/opac-search.pl or catalogue/search.pl for example of usage
  45
  46 =head1 DESCRIPTION
  47
  48 This module provides searching functions for Koha's bibliographic databases
  49
  50 =head1 FUNCTIONS
  51
  52 =cut
  53
  54 @ISA    = qw(Exporter);
  55 @EXPORT = qw(
  56   &findseealso
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 findseealso($dbh,$fields);
  68
  69 C<$dbh> is a link to the DB handler.
  70
  71 use C4::Context;
  72 my $dbh =C4::Context->dbh;
  73
  74 C<$fields> is a reference to the fields array
  75
  76 This function modifies the @$fields array and adds related fields to search on.
  77
  78 FIXME: this function is probably deprecated in Koha 3
  79
  80 =cut
  81
  82 sub findseealso {
  83     my ( $dbh, $fields ) = @_;
  84     my $tagslib = GetMarcStructure(1);
  85     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  86         my ($tag)      = substr( @$fields[$i], 1, 3 );
  87         my ($subfield) = substr( @$fields[$i], 4, 1 );
  88         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  89           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  90     }
  91 }
  92
  93 =head2 FindDuplicate
  94
  95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  96
  97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  98
  99 =cut
 100
 101 sub FindDuplicate {
 102     my ($record) = @_;
 103     my $dbh = C4::Context->dbh;
 104     my $result = TransformMarcToKoha( $dbh, $record, '' );
 105     my $sth;
 106     my $query;
 107     my $search;
 108     my $type;
 109     my ( $biblionumber, $title );
 110
 111     # search duplicate on ISBN, easy and fast..
 112     # ... normalize first
 113     if ( $result->{isbn} ) {
 114         $result->{isbn} =~ s/\(.*$//;
 115         $result->{isbn} =~ s/\s+$//;
 116         $query = "isbn=$result->{isbn}";
 117     }
 118     else {
 119         $result->{title} =~ s /\\//g;
 120         $result->{title} =~ s /\"//g;
 121         $result->{title} =~ s /\(//g;
 122         $result->{title} =~ s /\)//g;
 123
 124         # FIXME: instead of removing operators, could just do
 125         # quotes around the value
 126         $result->{title} =~ s/(and|or|not)//g;
 127         $query = "ti,ext=$result->{title}";
 128         $query .= " and itemtype=$result->{itemtype}"
 129           if ( $result->{itemtype} );
 130         if   ( $result->{author} ) {
 131             $result->{author} =~ s /\\//g;
 132             $result->{author} =~ s /\"//g;
 133             $result->{author} =~ s /\(//g;
 134             $result->{author} =~ s /\)//g;
 135
 136             # remove valid operators
 137             $result->{author} =~ s/(and|or|not)//g;
 138             $query .= " and au,ext=$result->{author}";
 139         }
 140     }
 141
 142     # FIXME: add error handling
 143     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 144     my @results;
 145     foreach my $possible_duplicate_record (@$searchresults) {
 146         my $marcrecord =
 147           MARC::Record->new_from_usmarc($possible_duplicate_record);
 148         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 149
 150         # FIXME :: why 2 $biblionumber ?
 151         if ($result) {
 152             push @results, $result->{'biblionumber'};
 153             push @results, $result->{'title'};
 154         }
 155     }
 156     return @results;
 157 }
 158
 159 =head2 SimpleSearch
 160
 161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 162
 163 This function provides a simple search API on the bibliographic catalog
 164
 165 =over 2
 166
 167 =item C<input arg:>
 168
 169     * $query can be a simple keyword or a complete CCL query
 170     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 171     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 172     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 173
 174
 175 =item C<Output:>
 176
 177     * $error is a empty unless an error is detected
 178     * \@results is an array of records.
 179     * $total_hits is the number of hits that would have been returned with no limit
 180
 181 =item C<usage in the script:>
 182
 183 =back
 184
 185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 186
 187 if (defined $error) {
 188     $template->param(query_error => $error);
 189     warn "error: ".$error;
 190     output_html_with_http_headers $input, $cookie, $template->output;
 191     exit;
 192 }
 193
 194 my $hits = scalar @$marcresults;
 195 my @results;
 196
 197 for my $i (0..$hits) {
 198     my %resultsloop;
 199     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 200     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 201
 202     #build the hash for the template.
 203     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 204     $resultsloop{title}           = $biblio->{'title'};
 205     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 206     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 207     $resultsloop{author}          = $biblio->{'author'};
 208     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 209     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 210
 211     push @results, \%resultsloop;
 212 }
 213
 214 $template->param(result=>\@results);
 215
 216 =cut
 217
 218 sub SimpleSearch {
 219     my ( $query, $offset, $max_results, $servers )  = @_;
 220
 221     if ( C4::Context->preference('NoZebra') ) {
 222         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 223         my $search_result =
 224           (      $result->{hits}
 225               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 226         return ( undef, $search_result, scalar($search_result) );
 227     }
 228     else {
 229         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 230         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 231         my @results;
 232         my @zoom_queries;
 233         my @tmpresults;
 234         my @zconns;
 235         my $total_hits;
 236         return ( "No query entered", undef, undef ) unless $query;
 237
 238         # Initialize & Search Zebra
 239         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 240             eval {
 241                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 242                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 243                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 244
 245                 # error handling
 246                 my $error =
 247                     $zconns[$i]->errmsg() . " ("
 248                   . $zconns[$i]->errcode() . ") "
 249                   . $zconns[$i]->addinfo() . " "
 250                   . $zconns[$i]->diagset();
 251
 252                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 253             };
 254             if ($@) {
 255
 256                 # caught a ZOOM::Exception
 257                 my $error =
 258                     $@->message() . " ("
 259                   . $@->code() . ") "
 260                   . $@->addinfo() . " "
 261                   . $@->diagset();
 262                 warn $error;
 263                 return ( $error, undef, undef );
 264             }
 265         }
 266         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 267             my $event = $zconns[ $i - 1 ]->last_event();
 268             if ( $event == ZOOM::Event::ZEND ) {
 269
 270                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 271                 my $hits = $tmpresults[ $i - 1 ]->size();
 272                 $total_hits += $hits;
 273                 my $last_record = $hits;
 274                 if ( defined $max_results && $offset + $max_results < $hits ) {
 275                     $last_record  = $offset + $max_results;
 276                 }
 277
 278                 for my $j ( $first_record..$last_record ) {
 279                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 280                     push @results, $record;
 281                 }
 282             }
 283         }
 284
 285         foreach my $result (@tmpresults) {
 286             $result->destroy();
 287         }
 288         foreach my $zoom_query (@zoom_queries) {
 289             $zoom_query->destroy();
 290         }
 291
 292         return ( undef, \@results, $total_hits );
 293     }
 294 }
 295
 296 =head2 getRecords
 297
 298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 299
 300         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 301         $results_per_page, $offset,       $expanded_facet, $branches,
 302         $query_type,       $scan
 303     );
 304
 305 The all singing, all dancing, multi-server, asynchronous, scanning,
 306 searching, record nabbing, facet-building
 307
 308 See verbse embedded documentation.
 309
 310 =cut
 311
 312 sub getRecords {
 313     my (
 314         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 315         $results_per_page, $offset,       $expanded_facet, $branches,
 316         $query_type,       $scan
 317     ) = @_;
 318
 319     my @servers = @$servers_ref;
 320     my @sort_by = @$sort_by_ref;
 321
 322     # Initialize variables for the ZOOM connection and results object
 323     my $zconn;
 324     my @zconns;
 325     my @results;
 326     my $results_hashref = ();
 327
 328     # Initialize variables for the faceted results objects
 329     my $facets_counter = ();
 330     my $facets_info    = ();
 331     my $facets         = getFacets();
 332
 333     my @facets_loop
 334       ;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429         }
 430         if ($sort_by) {
 431             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 432                 warn "WARNING sort $sort_by failed";
 433             }
 434         }
 435     }    # finished looping through servers
 436
 437     # The big moment: asynchronously retrieve results from all servers
 438     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 439         my $ev = $zconns[ $i - 1 ]->last_event();
 440         if ( $ev == ZOOM::Event::ZEND ) {
 441             next unless $results[ $i - 1 ];
 442             my $size = $results[ $i - 1 ]->size();
 443             if ( $size > 0 ) {
 444                 my $results_hash;
 445
 446                 # loop through the results
 447                 $results_hash->{'hits'} = $size;
 448                 my $times;
 449                 if ( $offset + $results_per_page <= $size ) {
 450                     $times = $offset + $results_per_page;
 451                 }
 452                 else {
 453                     $times = $size;
 454                 }
 455                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 456                     my $records_hash;
 457                     my $record;
 458                     my $facet_record;
 459
 460                     ## Check if it's an index scan
 461                     if ($scan) {
 462                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 463
 464                  # here we create a minimal MARC record and hand it off to the
 465                  # template just like a normal result ... perhaps not ideal, but
 466                  # it works for now
 467                         my $tmprecord = MARC::Record->new();
 468                         $tmprecord->encoding('UTF-8');
 469                         my $tmptitle;
 470                         my $tmpauthor;
 471
 472                 # the minimal record in author/title (depending on MARC flavour)
 473                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 474                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 475                             $tmprecord->append_fields($tmptitle);
 476                         } else {
 477                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 478                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 479                             $tmprecord->append_fields($tmptitle);
 480                             $tmprecord->append_fields($tmpauthor);
 481                         }
 482                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 483                     }
 484
 485                     # not an index scan
 486                     else {
 487                         $record = $results[ $i - 1 ]->record($j)->raw();
 488
 489                         # warn "RECORD $j:".$record;
 490                         $results_hash->{'RECORDS'}[$j] = $record;
 491
 492             # Fill the facets while we're looping, but only for the biblioserver
 493                         $facet_record = MARC::Record->new_from_usmarc($record)
 494                           if $servers[ $i - 1 ] =~ /biblioserver/;
 495
 496                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 497                         if ($facet_record) {
 498                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 499                                 ($facets->[$k]) or next;
 500                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 501                                 for my $field (@fields) {
 502                                     my @subfields = $field->subfields();
 503                                     for my $subfield (@subfields) {
 504                                         my ( $code, $data ) = @$subfield;
 505                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 506                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 507                                     }
 508                                 }
 509                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 510                                     $facets->[$k]->{'label_value'};
 511                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 512                                     $facets->[$k]->{'expanded'};
 513                             }
 514                         }
 515                     }
 516                 }
 517                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 518             }
 519
 520             # warn "connection ", $i-1, ": $size hits";
 521             # warn $results[$i-1]->record(0)->render() if $size > 0;
 522
 523             # BUILD FACETS
 524             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 525                 for my $link_value (
 526                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 527                     keys %$facets_counter )
 528                 {
 529                     my $expandable;
 530                     my $number_of_facets;
 531                     my @this_facets_array;
 532                     for my $one_facet (
 533                         sort {
 534                             $facets_counter->{$link_value}
 535                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 536                         } keys %{ $facets_counter->{$link_value} }
 537                       )
 538                     {
 539                         $number_of_facets++;
 540                         if (   ( $number_of_facets < 6 )
 541                             || ( $expanded_facet eq $link_value )
 542                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 543                         {
 544
 545                       # Sanitize the link value ), ( will cause errors with CCL,
 546                             my $facet_link_value = $one_facet;
 547                             $facet_link_value =~ s/(\(|\))/ /g;
 548
 549                             # fix the length that will display in the label,
 550                             my $facet_label_value = $one_facet;
 551                             $facet_label_value =
 552                               substr( $one_facet, 0, 20 ) . "..."
 553                               unless length($facet_label_value) <= 20;
 554
 555                             # if it's a branch, label by the name, not the code,
 556                             if ( $link_value =~ /branch/ ) {
 557                                 $facet_label_value =
 558                                   $branches->{$one_facet}->{'branchname'};
 559                             }
 560
 561                 # but we're down with the whole label being in the link's title.
 562                             my $facet_title_value = $one_facet;
 563
 564                             push @this_facets_array,
 565                               (
 566                                 {
 567                                     facet_count =>
 568                                       $facets_counter->{$link_value}
 569                                       ->{$one_facet},
 570                                     facet_label_value => $facet_label_value,
 571                                     facet_title_value => $facet_title_value,
 572                                     facet_link_value  => $facet_link_value,
 573                                     type_link_value   => $link_value,
 574                                 },
 575                               );
 576                         }
 577                     }
 578
 579                     # handle expanded option
 580                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 581                         $expandable = 1
 582                           if ( ( $number_of_facets > 6 )
 583                             && ( $expanded_facet ne $link_value ) );
 584                     }
 585                     push @facets_loop,
 586                       (
 587                         {
 588                             type_link_value => $link_value,
 589                             type_id         => $link_value . "_id",
 590                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 591                             facets     => \@this_facets_array,
 592                             expandable => $expandable,
 593                             expand     => $link_value,
 594                         }
 595                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 596                 }
 597             }
 598         }
 599     }
 600     return ( undef, $results_hashref, \@facets_loop );
 601 }
 602
 603 sub pazGetRecords {
 604     my (
 605         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 606         $results_per_page, $offset,       $expanded_facet, $branches,
 607         $query_type,       $scan
 608     ) = @_;
 609
 610     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 611     $paz->init();
 612     $paz->search($simple_query);
 613     sleep 1;
 614
 615     # do results
 616     my $results_hashref = {};
 617     my $stats = XMLin($paz->stat);
 618     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 619
 620     # for a grouped search result, the number of hits
 621     # is the number of groups returned; 'bib_hits' will have
 622     # the total number of bibs.
 623     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 624     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 625
 626     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 627         my $recid = $hit->{recid}->[0];
 628
 629         my $work_title = $hit->{'md-work-title'}->[0];
 630         my $work_author;
 631         if (exists $hit->{'md-work-author'}) {
 632             $work_author = $hit->{'md-work-author'}->[0];
 633         }
 634         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 635
 636         my $result_group = {};
 637         $result_group->{'group_label'} = $group_label;
 638         $result_group->{'group_merge_key'} = $recid;
 639
 640         my $count = 1;
 641         if (exists $hit->{count}) {
 642             $count = $hit->{count}->[0];
 643         }
 644         $result_group->{'group_count'} = $count;
 645
 646         for (my $i = 0; $i < $count; $i++) {
 647             # FIXME -- may need to worry about diacritics here
 648             my $rec = $paz->record($recid, $i);
 649             push @{ $result_group->{'RECORDS'} }, $rec;
 650         }
 651
 652         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 653     }
 654
 655     # pass through facets
 656     my $termlist_xml = $paz->termlist('author,subject');
 657     my $terms = XMLin($termlist_xml, forcearray => 1);
 658     my @facets_loop = ();
 659     #die Dumper($results);
 660 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 661 #        my @facets = ();
 662 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 663 #            push @facets, {
 664 #                facet_label_value => $facet->{'name'}->[0],
 665 #            };
 666 #        }
 667 #        push @facets_loop, ( {
 668 #            type_label => $list,
 669 #            facets => \@facets,
 670 #        } );
 671 #    }
 672
 673     return ( undef, $results_hashref, \@facets_loop );
 674 }
 675
 676 # STOPWORDS
 677 sub _remove_stopwords {
 678     my ( $operand, $index ) = @_;
 679     my @stopwords_removed;
 680
 681     # phrase and exact-qualified indexes shouldn't have stopwords removed
 682     if ( $index !~ m/phr|ext/ ) {
 683
 684 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 685 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 686 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 687 #       is a stopword, we'd get "çon" and wouldn't find anything...
 688         foreach ( keys %{ C4::Context->stopwords } ) {
 689             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 690             if ( $operand =~
 691                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 692             {
 693                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 694                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 695                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 696                                 $operand =~ s/$1//gi;
 697                 push @stopwords_removed, $_;
 698             }
 699         }
 700     }
 701     return ( $operand, \@stopwords_removed );
 702 }
 703
 704 # TRUNCATION
 705 sub _detect_truncation {
 706     my ( $operand, $index ) = @_;
 707     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 708         @regexpr );
 709     $operand =~ s/^ //g;
 710     my @wordlist = split( /\s/, $operand );
 711     foreach my $word (@wordlist) {
 712         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 713             push @rightlefttruncated, $word;
 714         }
 715         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 716             push @lefttruncated, $word;
 717         }
 718         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 719             push @righttruncated, $word;
 720         }
 721         elsif ( index( $word, "*" ) < 0 ) {
 722             push @nontruncated, $word;
 723         }
 724         else {
 725             push @regexpr, $word;
 726         }
 727     }
 728     return (
 729         \@nontruncated,       \@righttruncated, \@lefttruncated,
 730         \@rightlefttruncated, \@regexpr
 731     );
 732 }
 733
 734 # STEMMING
 735 sub _build_stemmed_operand {
 736     my ($operand) = @_;
 737     my $stemmed_operand;
 738
 739     # If operand contains a digit, it is almost certainly an identifier, and should
 740     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 741     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 742     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 743     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 744     return $operand if $operand =~ /\d/;
 745
 746 # FIXME: the locale should be set based on the user's language and/or search choice
 747     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 748
 749 # FIXME: these should be stored in the db so the librarian can modify the behavior
 750     $stemmer->add_exceptions(
 751         {
 752             'and' => 'and',
 753             'or'  => 'or',
 754             'not' => 'not',
 755         }
 756     );
 757     my @words = split( / /, $operand );
 758     my $stems = $stemmer->stem(@words);
 759     for my $stem (@$stems) {
 760         $stemmed_operand .= "$stem";
 761         $stemmed_operand .= "?"
 762           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 763         $stemmed_operand .= " ";
 764     }
 765     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 766     return $stemmed_operand;
 767 }
 768
 769 # FIELD WEIGHTING
 770 sub _build_weighted_query {
 771
 772 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 773 # pretty well but could work much better if we had a smarter query parser
 774     my ( $operand, $stemmed_operand, $index ) = @_;
 775     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 776     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 777     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 778
 779     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 780
 781     # Keyword, or, no index specified
 782     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 783         $weighted_query .=
 784           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 785         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 786         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 787           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 788           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 789         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 790           if $fuzzy_enabled;    # add fuzzy, word list
 791         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 792           if ( $stemming and $stemmed_operand )
 793           ;                     # add stemming, right truncation
 794         $weighted_query .= " or wrdl,r9=\"$operand\"";
 795
 796         # embedded sorting: 0 a-z; 1 z-a
 797         # $weighted_query .= ") or (sort1,aut=1";
 798     }
 799
 800     # Barcode searches should skip this process
 801     elsif ( $index eq 'bc' ) {
 802         $weighted_query .= "bc=\"$operand\"";
 803     }
 804
 805     # Authority-number searches should skip this process
 806     elsif ( $index eq 'an' ) {
 807         $weighted_query .= "an=\"$operand\"";
 808     }
 809
 810     # If the index already has more than one qualifier, wrap the operand
 811     # in quotes and pass it back (assumption is that the user knows what they
 812     # are doing and won't appreciate us mucking up their query
 813     elsif ( $index =~ ',' ) {
 814         $weighted_query .= " $index=\"$operand\"";
 815     }
 816
 817     #TODO: build better cases based on specific search indexes
 818     else {
 819         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 820           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 821         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 822         $weighted_query .=
 823           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 824     }
 825
 826     $weighted_query .= "))";                       # close rank specification
 827     return $weighted_query;
 828 }
 829
 830 =head2 buildQuery
 831
 832 ( $error, $query,
 833 $simple_query, $query_cgi,
 834 $query_desc, $limit,
 835 $limit_cgi, $limit_desc,
 836 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 837
 838 Build queries and limits in CCL, CGI, Human,
 839 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 840
 841 See verbose embedded documentation.
 842
 843
 844 =cut
 845
 846 sub buildQuery {
 847     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 848
 849     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 850
 851     # dereference
 852     my @operators = @$operators if $operators;
 853     my @indexes   = @$indexes   if $indexes;
 854     my @operands  = @$operands  if $operands;
 855     my @limits    = @$limits    if $limits;
 856     my @sort_by   = @$sort_by   if $sort_by;
 857
 858     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 859     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 860     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 861     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 862     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 863
 864     # no stemming/weight/fuzzy in NoZebra
 865     if ( C4::Context->preference("NoZebra") ) {
 866         $stemming      = 0;
 867         $weight_fields = 0;
 868         $fuzzy_enabled = 0;
 869     }
 870
 871     my $query        = $operands[0];
 872     my $simple_query = $operands[0];
 873
 874     # initialize the variables we're passing back
 875     my $query_cgi;
 876     my $query_desc;
 877     my $query_type;
 878
 879     my $limit;
 880     my $limit_cgi;
 881     my $limit_desc;
 882
 883     my $stopwords_removed;    # flag to determine if stopwords have been removed
 884
 885 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 886 # DIAGNOSTIC ONLY!!
 887     if ( $query =~ /^ccl=/ ) {
 888         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 889     }
 890     if ( $query =~ /^cql=/ ) {
 891         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 892     }
 893     if ( $query =~ /^pqf=/ ) {
 894         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 895     }
 896
 897     # pass nested queries directly
 898     # FIXME: need better handling of some of these variables in this case
 899     if ( $query =~ /(\(|\))/ ) {
 900         return (
 901             undef,              $query, $simple_query, $query_cgi,
 902             $query,             $limit, $limit_cgi,    $limit_desc,
 903             $stopwords_removed, 'ccl'
 904         );
 905     }
 906
 907 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 908 # query operands and indexes and add stemming, truncation, field weighting, etc.
 909 # Once we do so, we'll end up with a value in $query, just like if we had an
 910 # incoming $query from the user
 911     else {
 912         $query = ""
 913           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 914         my $previous_operand
 915           ;    # a flag used to keep track if there was a previous query
 916                # if there was, we can apply the current operator
 917                # for every operand
 918         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 919
 920             # COMBINE OPERANDS, INDEXES AND OPERATORS
 921             if ( $operands[$i] ) {
 922
 923               # A flag to determine whether or not to add the index to the query
 924                 my $indexes_set;
 925
 926 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 927                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 928                     $weight_fields    = 0;
 929                     $stemming         = 0;
 930                     $remove_stopwords = 0;
 931                 }
 932                 my $operand = $operands[$i];
 933                 my $index   = $indexes[$i];
 934
 935                 # Add index-specific attributes
 936                 # Date of Publication
 937                 if ( $index eq 'yr' ) {
 938                     $index .= ",st-numeric";
 939 #                     $indexes_set++;
 940                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 941                 }
 942
 943                 # Date of Acquisition
 944                 elsif ( $index eq 'acqdate' ) {
 945                     $index .= ",st-date-normalized";
 946 #                     $indexes_set++;
 947                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 948                 }
 949                 # ISBN,ISSN,Standard Number, don't need special treatment
 950                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 951 #                     $indexes_set++;
 952                     (
 953                         $stemming,      $auto_truncation,
 954                         $weight_fields, $fuzzy_enabled,
 955                         $remove_stopwords
 956                     ) = ( 0, 0, 0, 0, 0 );
 957
 958                 }
 959                 # Set default structure attribute (word list)
 960                 my $struct_attr;
 961                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 962                     $struct_attr = ",wrdl";
 963                 }
 964
 965                 # Some helpful index variants
 966                 my $index_plus       = $index . $struct_attr . ":" if $index;
 967                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 968                 if ($auto_truncation){
 969 #                                       FIXME Auto Truncation is only valid for LTR languages
 970 #                                       use C4::Output;
 971 #                                       use C4::Languages qw(regex_lang_subtags get_bidi);
 972 #                               $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
 973 #                                   my $current_lang = regex_lang_subtags($lang);
 974 #                                   my $bidi;
 975 #                                   $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
 976                                         $index_plus_comma .= "rtrn:";
 977                                 }
 978
 979                 # Remove Stopwords
 980                 if ($remove_stopwords) {
 981                     ( $operand, $stopwords_removed ) =
 982                       _remove_stopwords( $operand, $index );
 983                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 984                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 985                       if ( $stopwords_removed && $DEBUG );
 986                 }
 987
 988                 # Detect Truncation
 989                 my $truncated_operand;
 990                 my( $nontruncated, $righttruncated, $lefttruncated,
 991                     $rightlefttruncated, $regexpr
 992                 ) = _detect_truncation( $operand, $index );
 993                 warn
 994 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 995                   if $DEBUG;
 996
 997                 # Apply Truncation
 998                 if (
 999                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1000                     scalar(@$rightlefttruncated) > 0 )
1001                 {
1002
1003                # Don't field weight or add the index to the query, we do it here
1004                     $indexes_set = 1;
1005                     undef $weight_fields;
1006                     my $previous_truncation_operand;
1007                     if ( scalar(@$nontruncated) > 0 ) {
1008                         $truncated_operand .= "$index_plus @$nontruncated ";
1009                         $previous_truncation_operand = 1;
1010                     }
1011                     if ( scalar(@$righttruncated) > 0 ) {
1012                         $truncated_operand .= "and "
1013                           if $previous_truncation_operand;
1014                         $truncated_operand .=
1015                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1016                         $previous_truncation_operand = 1;
1017                     }
1018                     if ( scalar(@$lefttruncated) > 0 ) {
1019                         $truncated_operand .= "and "
1020                           if $previous_truncation_operand;
1021                         $truncated_operand .=
1022                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1023                         $previous_truncation_operand = 1;
1024                     }
1025                     if ( scalar(@$rightlefttruncated) > 0 ) {
1026                         $truncated_operand .= "and "
1027                           if $previous_truncation_operand;
1028                         $truncated_operand .=
1029                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1030                         $previous_truncation_operand = 1;
1031                     }
1032                 }
1033                 $operand = $truncated_operand if $truncated_operand;
1034                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1035
1036                 # Handle Stemming
1037                 my $stemmed_operand;
1038                 $stemmed_operand = _build_stemmed_operand($operand)
1039                   if $stemming;
1040                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1041
1042                 # Handle Field Weighting
1043                 my $weighted_operand;
1044                 $weighted_operand =
1045                   _build_weighted_query( $operand, $stemmed_operand, $index )
1046                   if $weight_fields;
1047                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1048                 $operand = $weighted_operand if $weight_fields;
1049                 $indexes_set = 1 if $weight_fields;
1050
1051                 # If there's a previous operand, we need to add an operator
1052                 if ($previous_operand) {
1053
1054                     # User-specified operator
1055                     if ( $operators[ $i - 1 ] ) {
1056                         $query     .= " $operators[$i-1] ";
1057                         $query     .= " $index_plus " unless $indexes_set;
1058                         $query     .= " $operand";
1059                         $query_cgi .= "&op=$operators[$i-1]";
1060                         $query_cgi .= "&idx=$index" if $index;
1061                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1062                         $query_desc .=
1063                           " $operators[$i-1] $index_plus $operands[$i]";
1064                     }
1065
1066                     # Default operator is and
1067                     else {
1068                         $query      .= " and ";
1069                         $query      .= "$index_plus " unless $indexes_set;
1070                         $query      .= "$operand";
1071                         $query_cgi  .= "&op=and&idx=$index" if $index;
1072                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1073                         $query_desc .= " and $index_plus $operands[$i]";
1074                     }
1075                 }
1076
1077                 # There isn't a pervious operand, don't need an operator
1078                 else {
1079
1080                     # Field-weighted queries already have indexes set
1081                     $query .= " $index_plus " unless $indexes_set;
1082                     $query .= $operand;
1083                     $query_desc .= " $index_plus $operands[$i]";
1084                     $query_cgi  .= "&idx=$index" if $index;
1085                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1086                     $previous_operand = 1;
1087                 }
1088             }    #/if $operands
1089         }    # /for
1090     }
1091     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1092
1093     # add limits
1094     my $group_OR_limits;
1095     my $availability_limit;
1096     foreach my $this_limit (@limits) {
1097         if ( $this_limit =~ /available/ ) {
1098
1099 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1100 # In English:
1101 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1102             $availability_limit .=
1103 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1104             $limit_cgi  .= "&limit=available";
1105             $limit_desc .= "";
1106         }
1107
1108         # group_OR_limits, prefixed by mc-
1109         # OR every member of the group
1110         elsif ( $this_limit =~ /mc/ ) {
1111             $group_OR_limits .= " or " if $group_OR_limits;
1112             $limit_desc      .= " or " if $group_OR_limits;
1113             $group_OR_limits .= "$this_limit";
1114             $limit_cgi       .= "&limit=$this_limit";
1115             $limit_desc      .= " $this_limit";
1116         }
1117
1118         # Regular old limits
1119         else {
1120             if ($this_limit){
1121                 $limit .= " and " if $limit || $query;
1122                 $limit      .= "$this_limit";
1123                 $limit_cgi  .= "&limit=$this_limit";
1124                 $limit_desc .= " $this_limit";
1125             }
1126         }
1127     }
1128     if ($group_OR_limits) {
1129         $limit .= " and " if ( $query || $limit );
1130         $limit .= "($group_OR_limits)";
1131     }
1132     if ($availability_limit) {
1133         $limit .= " and " if ( $query || $limit );
1134         $limit .= "($availability_limit)";
1135     }
1136
1137     # Normalize the query and limit strings
1138     $query =~ s/:/=/g;
1139     $limit =~ s/:/=/g;
1140     for ( $query, $query_desc, $limit, $limit_desc ) {
1141         $_ =~ s/  / /g;    # remove extra spaces
1142         $_ =~ s/^ //g;     # remove any beginning spaces
1143         $_ =~ s/ $//g;     # remove any ending spaces
1144         $_ =~ s/==/=/g;    # remove double == from query
1145     }
1146     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1147
1148     for ($query_cgi,$simple_query) {
1149         $_ =~ s/"//g;
1150     }
1151     # append the limit to the query
1152     $query .= " " . $limit;
1153
1154     # Warnings if DEBUG
1155     if ($DEBUG) {
1156         warn "QUERY:" . $query;
1157         warn "QUERY CGI:" . $query_cgi;
1158         warn "QUERY DESC:" . $query_desc;
1159         warn "LIMIT:" . $limit;
1160         warn "LIMIT CGI:" . $limit_cgi;
1161         warn "LIMIT DESC:" . $limit_desc;
1162         warn "---------\nLeave buildQuery\n---------";
1163     }
1164     return (
1165         undef,              $query, $simple_query, $query_cgi,
1166         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1167         $stopwords_removed, $query_type
1168     );
1169 }
1170
1171 =head2 searchResults
1172
1173 Format results in a form suitable for passing to the template
1174
1175 =cut
1176
1177 # IMO this subroutine is pretty messy still -- it's responsible for
1178 # building the HTML output for the template
1179 sub searchResults {
1180     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1181     my $dbh = C4::Context->dbh;
1182     my $even = 1;
1183     my @newresults;
1184
1185     # add search-term highlighting via <span>s on the search terms
1186     my $span_terms_hashref;
1187     for my $span_term ( split( / /, $searchdesc ) ) {
1188         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1189         $span_terms_hashref->{$span_term}++;
1190     }
1191
1192     #Build branchnames hash
1193     #find branchname
1194     #get branch information.....
1195     my %branches;
1196     my $bsth =
1197       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1198       ;    # FIXME : use C4::Koha::GetBranches
1199     $bsth->execute();
1200     while ( my $bdata = $bsth->fetchrow_hashref ) {
1201         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1202     }
1203 # FIXME - We build an authorised values hash here, using the default framework
1204 # though it is possible to have different authvals for different fws.
1205
1206     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1207
1208     # get notforloan authorised value list (see $shelflocations  FIXME)
1209     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1210
1211     #Build itemtype hash
1212     #find itemtype & itemtype image
1213     my %itemtypes;
1214     $bsth =
1215       $dbh->prepare(
1216         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1217       );
1218     $bsth->execute();
1219     while ( my $bdata = $bsth->fetchrow_hashref ) {
1220                 foreach (qw(description imageurl summary notforloan)) {
1221                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1222                 }
1223     }
1224
1225     #search item field code
1226     my $sth =
1227       $dbh->prepare(
1228 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1229       );
1230     $sth->execute;
1231     my ($itemtag) = $sth->fetchrow;
1232
1233     ## find column names of items related to MARC
1234     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1235     $sth2->execute;
1236     my %subfieldstosearch;
1237     while ( ( my $column ) = $sth2->fetchrow ) {
1238         my ( $tagfield, $tagsubfield ) =
1239           &GetMarcFromKohaField( "items." . $column, "" );
1240         $subfieldstosearch{$column} = $tagsubfield;
1241     }
1242
1243     # handle which records to actually retrieve
1244     my $times;
1245     if ( $hits && $offset + $results_per_page <= $hits ) {
1246         $times = $offset + $results_per_page;
1247     }
1248     else {
1249         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1250     }
1251
1252     # loop through all of the records we've retrieved
1253     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1254         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1255         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1256         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1257         $oldbiblio->{result_number} = $i + 1;
1258
1259         # add imageurl to itemtype if there is one
1260         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1261
1262         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1263         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1264         $aisbn =~ s/-//g;
1265         $oldbiblio->{amazonisbn} = $aisbn;
1266                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1267  # Build summary if there is one (the summary is defined in the itemtypes table)
1268  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1269         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1270             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1271             my @fields  = $marcrecord->fields();
1272             foreach my $field (@fields) {
1273                 my $tag      = $field->tag();
1274                 my $tagvalue = $field->as_string();
1275                 if (! utf8::is_utf8($tagvalue)) {
1276                     utf8::decode($tagvalue);
1277                 }
1278
1279                 $summary =~
1280                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1281                 unless ( $tag < 10 ) {
1282                     my @subf = $field->subfields;
1283                     for my $i ( 0 .. $#subf ) {
1284                         my $subfieldcode  = $subf[$i][0];
1285                         my $subfieldvalue = $subf[$i][1];
1286                         if (! utf8::is_utf8($subfieldvalue)) {
1287                             utf8::decode($subfieldvalue);
1288                         }
1289                         my $tagsubf       = $tag . $subfieldcode;
1290                         $summary =~
1291 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1292                     }
1293                 }
1294             }
1295             # FIXME: yuk
1296             $summary =~ s/\[(.*?)]//g;
1297             $summary =~ s/\n/<br\/>/g;
1298             $oldbiblio->{summary} = $summary;
1299         }
1300
1301         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1302         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1303         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1304         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1305         # Add search-term highlighting to the whole record where they match using <span>s
1306         if (C4::Context->preference("OpacHighlightedWords")){
1307             my $searchhighlightblob;
1308             for my $highlight_field ( $marcrecord->fields ) {
1309
1310     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1311                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1312                 for my $subfield ($highlight_field->subfields()) {
1313                     my $match;
1314                     next if $subfield->[0] eq '9';
1315                     my $field = $subfield->[1];
1316                     for my $term ( keys %$span_terms_hashref ) {
1317                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1318                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1319                         $match++;
1320                         }
1321                     }
1322                     $searchhighlightblob .= $field . " ... " if $match;
1323                 }
1324
1325             }
1326             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1327             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1328         }
1329
1330         # Add search-term highlighting to the title, subtitle, etc. fields
1331         for my $term ( keys %$span_terms_hashref ) {
1332             my $old_term = $term;
1333             if ( length($term) > 3 ) {
1334                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1335                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1336                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1337                                 }
1338             }
1339         }
1340
1341         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1342
1343         # Pull out the items fields
1344         my @fields = $marcrecord->field($itemtag);
1345
1346         # Setting item statuses for display
1347         my @available_items_loop;
1348         my @onloan_items_loop;
1349         my @other_items_loop;
1350
1351         my $available_items;
1352         my $onloan_items;
1353         my $other_items;
1354
1355         my $ordered_count         = 0;
1356         my $available_count       = 0;
1357         my $onloan_count          = 0;
1358         my $longoverdue_count     = 0;
1359         my $other_count           = 0;
1360         my $wthdrawn_count        = 0;
1361         my $itemlost_count        = 0;
1362         my $itembinding_count     = 0;
1363         my $itemdamaged_count     = 0;
1364         my $item_in_transit_count = 0;
1365         my $can_place_holds       = 0;
1366         my $items_count           = scalar(@fields);
1367         my $maxitems =
1368           ( C4::Context->preference('maxItemsinSearchResults') )
1369           ? C4::Context->preference('maxItemsinSearchResults') - 1
1370           : 1;
1371
1372         # loop through every item
1373         foreach my $field (@fields) {
1374             my $item;
1375
1376             # populate the items hash
1377             foreach my $code ( keys %subfieldstosearch ) {
1378                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1379             }
1380                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1381                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1382             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1383             if ($item->{$hbranch}) {
1384                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1385             }
1386             elsif ($item->{$otherbranch}) {     # Last resort
1387                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1388             }
1389
1390                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1391 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1392             if ( $item->{onloan} ) {
1393                 $onloan_count++;
1394                                 my $key = $prefix . $item->{due_date};
1395                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1396                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1397                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1398                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1399                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1400                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1401                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1402                 # if something's checked out and lost, mark it as 'long overdue'
1403                 if ( $item->{itemlost} ) {
1404                     $onloan_items->{$prefix}->{longoverdue}++;
1405                     $longoverdue_count++;
1406                 } else {        # can place holds as long as item isn't lost
1407                     $can_place_holds = 1;
1408                 }
1409             }
1410
1411          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1412             else {
1413
1414                 # item is on order
1415                 if ( $item->{notforloan} == -1 ) {
1416                     $ordered_count++;
1417                 }
1418
1419                 # is item in transit?
1420                 my $transfertwhen = '';
1421                 my ($transfertfrom, $transfertto);
1422
1423                 unless ($item->{wthdrawn}
1424                         || $item->{itemlost}
1425                         || $item->{damaged}
1426                         || $item->{notforloan}
1427                         || $items_count > 20) {
1428
1429                     # A couple heuristics to limit how many times
1430                     # we query the database for item transfer information, sacrificing
1431                     # accuracy in some cases for speed;
1432                     #
1433                     # 1. don't query if item has one of the other statuses
1434                     # 2. don't check transit status if the bib has
1435                     #    more than 20 items
1436                     #
1437                     # FIXME: to avoid having the query the database like this, and to make
1438                     #        the in transit status count as unavailable for search limiting,
1439                     #        should map transit status to record indexed in Zebra.
1440                     #
1441                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1442                 }
1443
1444                 # item is withdrawn, lost or damaged
1445                 if (   $item->{wthdrawn}
1446                     || $item->{itemlost}
1447                     || $item->{damaged}
1448                     || $item->{notforloan}
1449                     || ($transfertwhen ne ''))
1450                 {
1451                     $wthdrawn_count++        if $item->{wthdrawn};
1452                     $itemlost_count++        if $item->{itemlost};
1453                     $itemdamaged_count++     if $item->{damaged};
1454                     $item_in_transit_count++ if $transfertwhen ne '';
1455                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1456                     $other_count++;
1457
1458                                         my $key = $prefix . $item->{status};
1459                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1460                         $other_items->{$key}->{$_} = $item->{$_};
1461                                         }
1462                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1463                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1464                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1465                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1466                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1467                                         $other_items->{$key}->{barcode} = $item->{barcode};
1468                 }
1469                 # item is available
1470                 else {
1471                     $can_place_holds = 1;
1472                     $available_count++;
1473                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1474                                         foreach (qw(branchname itemcallnumber barcode)) {
1475                         $available_items->{$prefix}->{$_} = $item->{$_};
1476                                         }
1477                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1478                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1479                 }
1480             }
1481         }    # notforloan, item level and biblioitem level
1482         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1483         $maxitems =
1484           ( C4::Context->preference('maxItemsinSearchResults') )
1485           ? C4::Context->preference('maxItemsinSearchResults') - 1
1486           : 1;
1487         for my $key ( sort keys %$onloan_items ) {
1488             (++$onloanitemscount > $maxitems) and last;
1489             push @onloan_items_loop, $onloan_items->{$key};
1490         }
1491         for my $key ( sort keys %$other_items ) {
1492             (++$otheritemscount > $maxitems) and last;
1493             push @other_items_loop, $other_items->{$key};
1494         }
1495         for my $key ( sort keys %$available_items ) {
1496             (++$availableitemscount > $maxitems) and last;
1497             push @available_items_loop, $available_items->{$key}
1498         }
1499
1500         # XSLT processing of some stuff
1501         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1502             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1503             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1504         }
1505
1506         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1507         $can_place_holds = 0
1508           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1509         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1510         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1511         $oldbiblio->{items_count}          = $items_count;
1512         $oldbiblio->{available_items_loop} = \@available_items_loop;
1513         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1514         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1515         $oldbiblio->{availablecount}       = $available_count;
1516         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1517         $oldbiblio->{onloancount}          = $onloan_count;
1518         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1519         $oldbiblio->{othercount}           = $other_count;
1520         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1521         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1522         $oldbiblio->{itemlostcount}        = $itemlost_count;
1523         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1524         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1525         $oldbiblio->{orderedcount}         = $ordered_count;
1526         $oldbiblio->{isbn} =~
1527           s/-//g;    # deleting - in isbn to enable amazon content
1528         push( @newresults, $oldbiblio );
1529     }
1530     return @newresults;
1531 }
1532
1533 #----------------------------------------------------------------------
1534 #
1535 # Non-Zebra GetRecords#
1536 #----------------------------------------------------------------------
1537
1538 =head2 NZgetRecords
1539
1540   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1541
1542 =cut
1543
1544 sub NZgetRecords {
1545     my (
1546         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1547         $results_per_page, $offset,       $expanded_facet, $branches,
1548         $query_type,       $scan
1549     ) = @_;
1550     warn "query =$query" if $DEBUG;
1551     my $result = NZanalyse($query);
1552     warn "results =$result" if $DEBUG;
1553     return ( undef,
1554         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1555         undef );
1556 }
1557
1558 =head2 NZanalyse
1559
1560   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1561   the list is built from an inverted index in the nozebra SQL table
1562   note that title is here only for convenience : the sorting will be very fast when requested on title
1563   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1564
1565 =cut
1566
1567 sub NZanalyse {
1568     my ( $string, $server ) = @_;
1569 #     warn "---------"       if $DEBUG;
1570     warn " NZanalyse" if $DEBUG;
1571 #     warn "---------"       if $DEBUG;
1572
1573  # $server contains biblioserver or authorities, depending on what we search on.
1574  #warn "querying : $string on $server";
1575     $server = 'biblioserver' unless $server;
1576
1577 # if we have a ", replace the content to discard temporarily any and/or/not inside
1578     my $commacontent;
1579     if ( $string =~ /"/ ) {
1580         $string =~ s/"(.*?)"/__X__/;
1581         $commacontent = $1;
1582         warn "commacontent : $commacontent" if $DEBUG;
1583     }
1584
1585 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1586 # then, call again NZanalyse with $left and $right
1587 # (recursive until we find a leaf (=> something without and/or/not)
1588 # delete repeated operator... Would then go in infinite loop
1589     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1590     }
1591
1592     #process parenthesis before.
1593     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1594         my $left     = $1;
1595         my $right    = $4;
1596         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1597         warn
1598 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1599           if $DEBUG;
1600         my $leftresult = NZanalyse( $left, $server );
1601         if ($operator) {
1602             my $rightresult = NZanalyse( $right, $server );
1603
1604             # OK, we have the results for right and left part of the query
1605             # depending of operand, intersect, union or exclude both lists
1606             # to get a result list
1607             if ( $operator eq ' and ' ) {
1608                 return NZoperatorAND($leftresult,$rightresult);
1609             }
1610             elsif ( $operator eq ' or ' ) {
1611
1612                 # just merge the 2 strings
1613                 return $leftresult . $rightresult;
1614             }
1615             elsif ( $operator eq ' not ' ) {
1616                 return NZoperatorNOT($leftresult,$rightresult);
1617             }
1618         }
1619         else {
1620 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1621             return $leftresult;
1622         }
1623     }
1624     warn "string :" . $string if $DEBUG;
1625     my $left = "";
1626     my $right = "";
1627     my $operator = "";
1628     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1629         $left     = $1;
1630         $right    = $3;
1631         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1632     }
1633     warn "no parenthesis. left : $left operator: $operator right: $right"
1634       if $DEBUG;
1635
1636     # it's not a leaf, we have a and/or/not
1637     if ($operator) {
1638
1639         # reintroduce comma content if needed
1640         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1641         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1642         warn "node : $left / $operator / $right\n" if $DEBUG;
1643         my $leftresult  = NZanalyse( $left,  $server );
1644         my $rightresult = NZanalyse( $right, $server );
1645         warn " leftresult : $leftresult" if $DEBUG;
1646         warn " rightresult : $rightresult" if $DEBUG;
1647         # OK, we have the results for right and left part of the query
1648         # depending of operand, intersect, union or exclude both lists
1649         # to get a result list
1650         if ( $operator eq ' and ' ) {
1651             warn "NZAND";
1652             return NZoperatorAND($leftresult,$rightresult);
1653         }
1654         elsif ( $operator eq ' or ' ) {
1655
1656             # just merge the 2 strings
1657             return $leftresult . $rightresult;
1658         }
1659         elsif ( $operator eq ' not ' ) {
1660             return NZoperatorNOT($leftresult,$rightresult);
1661         }
1662         else {
1663
1664 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1665             die "error : operand unknown : $operator for $string";
1666         }
1667
1668         # it's a leaf, do the real SQL query and return the result
1669     }
1670     else {
1671         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1672         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1673         #remove trailing blank at the beginning
1674         $string =~ s/^ //g;
1675         warn "leaf:$string" if $DEBUG;
1676
1677         # parse the string in in operator/operand/value again
1678         my $left = "";
1679         my $operator = "";
1680         my $right = "";
1681         if ($string =~ /(.*)(>=|<=)(.*)/) {
1682             $left     = $1;
1683             $operator = $2;
1684             $right    = $3;
1685         } else {
1686             $left = $string;
1687         }
1688 #         warn "handling leaf... left:$left operator:$operator right:$right"
1689 #           if $DEBUG;
1690         unless ($operator) {
1691             if ($string =~ /(.*)(>|<|=)(.*)/) {
1692                 $left     = $1;
1693                 $operator = $2;
1694                 $right    = $3;
1695                 warn
1696     "handling unless (operator)... left:$left operator:$operator right:$right"
1697                 if $DEBUG;
1698             } else {
1699                 $left = $string;
1700             }
1701         }
1702         my $results;
1703
1704 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1705         $left =~ s/ .*$//;
1706
1707         # automatic replace for short operators
1708         $left = 'title'            if $left =~ '^ti$';
1709         $left = 'author'           if $left =~ '^au$';
1710         $left = 'publisher'        if $left =~ '^pb$';
1711         $left = 'subject'          if $left =~ '^su$';
1712         $left = 'koha-Auth-Number' if $left =~ '^an$';
1713         $left = 'keyword'          if $left =~ '^kw$';
1714         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1715         if ( $operator && $left ne 'keyword' ) {
1716
1717             #do a specific search
1718             my $dbh = C4::Context->dbh;
1719             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1720             my $sth =
1721               $dbh->prepare(
1722 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1723               );
1724             warn "$left / $operator / $right\n" if $DEBUG;
1725
1726             # split each word, query the DB and build the biblionumbers result
1727             #sanitizing leftpart
1728             $left =~ s/^\s+|\s+$//;
1729             foreach ( split / /, $right ) {
1730                 my $biblionumbers;
1731                 $_ =~ s/^\s+|\s+$//;
1732                 next unless $_;
1733                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1734                 $sth->execute( $server, $left, $_ )
1735                   or warn "execute failed: $!";
1736                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1737
1738 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1739 # otherwise, fill the result
1740                     $biblionumbers .= $line
1741                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1742                     warn "result : $value "
1743                       . ( $right  =~ /\d/ ) . "=="
1744                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1745                 }
1746
1747 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1748                 if ($results) {
1749                     warn "NZAND" if $DEBUG;
1750                     $results = NZoperatorAND($biblionumbers,$results);
1751                 }
1752                 else {
1753                     $results = $biblionumbers;
1754                 }
1755             }
1756         }
1757         else {
1758
1759       #do a complete search (all indexes), if index='kw' do complete search too.
1760             my $dbh = C4::Context->dbh;
1761             my $sth =
1762               $dbh->prepare(
1763 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1764               );
1765
1766             # split each word, query the DB and build the biblionumbers result
1767             foreach ( split / /, $string ) {
1768                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1769                 warn "search on all indexes on $_" if $DEBUG;
1770                 my $biblionumbers;
1771                 next unless $_;
1772                 $sth->execute( $server, $_ );
1773                 while ( my $line = $sth->fetchrow ) {
1774                     $biblionumbers .= $line;
1775                 }
1776
1777 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1778                 if ($results) {
1779                     $results = NZoperatorAND($biblionumbers,$results);
1780                 }
1781                 else {
1782                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1783                     $results = $biblionumbers;
1784                 }
1785             }
1786         }
1787         warn "return : $results for LEAF : $string" if $DEBUG;
1788         return $results;
1789     }
1790     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1791 }
1792
1793 sub NZoperatorAND{
1794     my ($rightresult, $leftresult)=@_;
1795
1796     my @leftresult = split /;/, $leftresult;
1797     warn " @leftresult / $rightresult \n" if $DEBUG;
1798
1799     #             my @rightresult = split /;/,$leftresult;
1800     my $finalresult;
1801
1802 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1803 # the result is stored twice, to have the same weight for AND than OR.
1804 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1805 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1806     foreach (@leftresult) {
1807         my $value = $_;
1808         my $countvalue;
1809         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1810         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1811             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1812             $finalresult .=
1813                 "$value-$countvalue;$value-$countvalue;";
1814         }
1815     }
1816     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1817     return $finalresult;
1818 }
1819
1820 sub NZoperatorOR{
1821     my ($rightresult, $leftresult)=@_;
1822     return $rightresult.$leftresult;
1823 }
1824
1825 sub NZoperatorNOT{
1826     my ($leftresult, $rightresult)=@_;
1827
1828     my @leftresult = split /;/, $leftresult;
1829
1830     #             my @rightresult = split /;/,$leftresult;
1831     my $finalresult;
1832     foreach (@leftresult) {
1833         my $value=$_;
1834         $value=$1 if $value=~m/(.*)-\d+$/;
1835         unless ($rightresult =~ "$value-") {
1836             $finalresult .= "$_;";
1837         }
1838     }
1839     return $finalresult;
1840 }
1841
1842 =head2 NZorder
1843
1844   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1845
1846   TODO :: Description
1847
1848 =cut
1849
1850 sub NZorder {
1851     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1852     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1853
1854     # order title asc by default
1855     #     $ordering = '1=36 <i' unless $ordering;
1856     $results_per_page = 20 unless $results_per_page;
1857     $offset           = 0  unless $offset;
1858     my $dbh = C4::Context->dbh;
1859
1860     #
1861     # order by POPULARITY
1862     #
1863     if ( $ordering =~ /popularity/ ) {
1864         my %result;
1865         my %popularity;
1866
1867         # popularity is not in MARC record, it's builded from a specific query
1868         my $sth =
1869           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1870         foreach ( split /;/, $biblionumbers ) {
1871             my ( $biblionumber, $title ) = split /,/, $_;
1872             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1873             $sth->execute($biblionumber);
1874             my $popularity = $sth->fetchrow || 0;
1875
1876 # hint : the key is popularity.title because we can have
1877 # many results with the same popularity. In this cas, sub-ordering is done by title
1878 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1879 # (un-frequent, I agree, but we won't forget anything that way ;-)
1880             $popularity{ sprintf( "%10d", $popularity ) . $title
1881                   . $biblionumber } = $biblionumber;
1882         }
1883
1884     # sort the hash and return the same structure as GetRecords (Zebra querying)
1885         my $result_hash;
1886         my $numbers = 0;
1887         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1888             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1889                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1890                   $result{ $popularity{$key} }->as_usmarc();
1891             }
1892         }
1893         else {                                    # sort popularity ASC
1894             foreach my $key ( sort ( keys %popularity ) ) {
1895                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1896                   $result{ $popularity{$key} }->as_usmarc();
1897             }
1898         }
1899         my $finalresult = ();
1900         $result_hash->{'hits'}         = $numbers;
1901         $finalresult->{'biblioserver'} = $result_hash;
1902         return $finalresult;
1903
1904         #
1905         # ORDER BY author
1906         #
1907     }
1908     elsif ( $ordering =~ /author/ ) {
1909         my %result;
1910         foreach ( split /;/, $biblionumbers ) {
1911             my ( $biblionumber, $title ) = split /,/, $_;
1912             my $record = GetMarcBiblio($biblionumber);
1913             my $author;
1914             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1915                 $author = $record->subfield( '200', 'f' );
1916                 $author = $record->subfield( '700', 'a' ) unless $author;
1917             }
1918             else {
1919                 $author = $record->subfield( '100', 'a' );
1920             }
1921
1922 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1923 # and we don't want to get only 1 result for each of them !!!
1924             $result{ $author . $biblionumber } = $record;
1925         }
1926
1927     # sort the hash and return the same structure as GetRecords (Zebra querying)
1928         my $result_hash;
1929         my $numbers = 0;
1930         if ( $ordering eq 'author_za' ) {    # sort by author desc
1931             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1932                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1933                   $result{$key}->as_usmarc();
1934             }
1935         }
1936         else {                               # sort by author ASC
1937             foreach my $key ( sort ( keys %result ) ) {
1938                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1939                   $result{$key}->as_usmarc();
1940             }
1941         }
1942         my $finalresult = ();
1943         $result_hash->{'hits'}         = $numbers;
1944         $finalresult->{'biblioserver'} = $result_hash;
1945         return $finalresult;
1946
1947         #
1948         # ORDER BY callnumber
1949         #
1950     }
1951     elsif ( $ordering =~ /callnumber/ ) {
1952         my %result;
1953         foreach ( split /;/, $biblionumbers ) {
1954             my ( $biblionumber, $title ) = split /,/, $_;
1955             my $record = GetMarcBiblio($biblionumber);
1956             my $callnumber;
1957             my ( $callnumber_tag, $callnumber_subfield ) =
1958               GetMarcFromKohaField( 'items.itemcallnumber','' );
1959             ( $callnumber_tag, $callnumber_subfield ) =
1960               GetMarcFromKohaField('biblioitems.callnumber','')
1961               unless $callnumber_tag;
1962             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1963                 $callnumber = $record->subfield( '200', 'f' );
1964             }
1965             else {
1966                 $callnumber = $record->subfield( '100', 'a' );
1967             }
1968
1969 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1970 # and we don't want to get only 1 result for each of them !!!
1971             $result{ $callnumber . $biblionumber } = $record;
1972         }
1973
1974     # sort the hash and return the same structure as GetRecords (Zebra querying)
1975         my $result_hash;
1976         my $numbers = 0;
1977         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1978             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1979                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1980                   $result{$key}->as_usmarc();
1981             }
1982         }
1983         else {                                     # sort by title ASC
1984             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1985                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1986                   $result{$key}->as_usmarc();
1987             }
1988         }
1989         my $finalresult = ();
1990         $result_hash->{'hits'}         = $numbers;
1991         $finalresult->{'biblioserver'} = $result_hash;
1992         return $finalresult;
1993     }
1994     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1995         my %result;
1996         foreach ( split /;/, $biblionumbers ) {
1997             my ( $biblionumber, $title ) = split /,/, $_;
1998             my $record = GetMarcBiblio($biblionumber);
1999             my ( $publicationyear_tag, $publicationyear_subfield ) =
2000               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2001             my $publicationyear =
2002               $record->subfield( $publicationyear_tag,
2003                 $publicationyear_subfield );
2004
2005 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2006 # and we don't want to get only 1 result for each of them !!!
2007             $result{ $publicationyear . $biblionumber } = $record;
2008         }
2009
2010     # sort the hash and return the same structure as GetRecords (Zebra querying)
2011         my $result_hash;
2012         my $numbers = 0;
2013         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2014             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2015                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2016                   $result{$key}->as_usmarc();
2017             }
2018         }
2019         else {                                 # sort by pub year ASC
2020             foreach my $key ( sort ( keys %result ) ) {
2021                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2022                   $result{$key}->as_usmarc();
2023             }
2024         }
2025         my $finalresult = ();
2026         $result_hash->{'hits'}         = $numbers;
2027         $finalresult->{'biblioserver'} = $result_hash;
2028         return $finalresult;
2029
2030         #
2031         # ORDER BY title
2032         #
2033     }
2034     elsif ( $ordering =~ /title/ ) {
2035
2036 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2037         my %result;
2038         foreach ( split /;/, $biblionumbers ) {
2039             my ( $biblionumber, $title ) = split /,/, $_;
2040
2041 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2042 # and we don't want to get only 1 result for each of them !!!
2043 # hint & speed improvement : we can order without reading the record
2044 # so order, and read records only for the requested page !
2045             $result{ $title . $biblionumber } = $biblionumber;
2046         }
2047
2048     # sort the hash and return the same structure as GetRecords (Zebra querying)
2049         my $result_hash;
2050         my $numbers = 0;
2051         if ( $ordering eq 'title_az' ) {    # sort by title desc
2052             foreach my $key ( sort ( keys %result ) ) {
2053                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2054             }
2055         }
2056         else {                              # sort by title ASC
2057             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2058                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2059             }
2060         }
2061
2062         # limit the $results_per_page to result size if it's more
2063         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2064
2065         # for the requested page, replace biblionumber by the complete record
2066         # speed improvement : avoid reading too much things
2067         for (
2068             my $counter = $offset ;
2069             $counter <= $offset + $results_per_page ;
2070             $counter++
2071           )
2072         {
2073             $result_hash->{'RECORDS'}[$counter] =
2074               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2075         }
2076         my $finalresult = ();
2077         $result_hash->{'hits'}         = $numbers;
2078         $finalresult->{'biblioserver'} = $result_hash;
2079         return $finalresult;
2080     }
2081     else {
2082
2083 #
2084 # order by ranking
2085 #
2086 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2087         my %result;
2088         my %count_ranking;
2089         foreach ( split /;/, $biblionumbers ) {
2090             my ( $biblionumber, $title ) = split /,/, $_;
2091             $title =~ /(.*)-(\d)/;
2092
2093             # get weight
2094             my $ranking = $2;
2095
2096 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2097 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2098 # biblio N has ranking = 6
2099             $count_ranking{$biblionumber} += $ranking;
2100         }
2101
2102 # build the result by "inverting" the count_ranking hash
2103 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2104 #         warn "counting";
2105         foreach ( keys %count_ranking ) {
2106             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2107         }
2108
2109     # sort the hash and return the same structure as GetRecords (Zebra querying)
2110         my $result_hash;
2111         my $numbers = 0;
2112         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2113             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2114         }
2115
2116         # limit the $results_per_page to result size if it's more
2117         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2118
2119         # for the requested page, replace biblionumber by the complete record
2120         # speed improvement : avoid reading too much things
2121         for (
2122             my $counter = $offset ;
2123             $counter <= $offset + $results_per_page ;
2124             $counter++
2125           )
2126         {
2127             $result_hash->{'RECORDS'}[$counter] =
2128               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2129               if $result_hash->{'RECORDS'}[$counter];
2130         }
2131         my $finalresult = ();
2132         $result_hash->{'hits'}         = $numbers;
2133         $finalresult->{'biblioserver'} = $result_hash;
2134         return $finalresult;
2135     }
2136 }
2137
2138 END { }    # module clean-up code here (global destructor)
2139
2140 1;
2141 __END__
2142
2143 =head1 AUTHOR
2144
2145 Koha Developement team <info@koha.org>
2146
2147 =cut