C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29
  30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  31
  32 # set the version for version checking
  33 BEGIN {
  34     $VERSION = 3.01;
  35     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  36 }
  37
  38 =head1 NAME
  39
  40 C4::Search - Functions for searching the Koha catalog.
  41
  42 =head1 SYNOPSIS
  43
  44 See opac/opac-search.pl or catalogue/search.pl for example of usage
  45
  46 =head1 DESCRIPTION
  47
  48 This module provides searching functions for Koha's bibliographic databases
  49
  50 =head1 FUNCTIONS
  51
  52 =cut
  53
  54 @ISA    = qw(Exporter);
  55 @EXPORT = qw(
  56   &findseealso
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63 );
  64
  65 # make all your functions, whether exported or not;
  66
  67 =head2 findseealso($dbh,$fields);
  68
  69 C<$dbh> is a link to the DB handler.
  70
  71 use C4::Context;
  72 my $dbh =C4::Context->dbh;
  73
  74 C<$fields> is a reference to the fields array
  75
  76 This function modifies the @$fields array and adds related fields to search on.
  77
  78 FIXME: this function is probably deprecated in Koha 3
  79
  80 =cut
  81
  82 sub findseealso {
  83     my ( $dbh, $fields ) = @_;
  84     my $tagslib = GetMarcStructure(1);
  85     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  86         my ($tag)      = substr( @$fields[$i], 1, 3 );
  87         my ($subfield) = substr( @$fields[$i], 4, 1 );
  88         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  89           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  90     }
  91 }
  92
  93 =head2 FindDuplicate
  94
  95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  96
  97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  98
  99 =cut
 100
 101 sub FindDuplicate {
 102     my ($record) = @_;
 103     my $dbh = C4::Context->dbh;
 104     my $result = TransformMarcToKoha( $dbh, $record, '' );
 105     my $sth;
 106     my $query;
 107     my $search;
 108     my $type;
 109     my ( $biblionumber, $title );
 110
 111     # search duplicate on ISBN, easy and fast..
 112     # ... normalize first
 113     if ( $result->{isbn} ) {
 114         $result->{isbn} =~ s/\(.*$//;
 115         $result->{isbn} =~ s/\s+$//;
 116         $query = "isbn=$result->{isbn}";
 117     }
 118     else {
 119         $result->{title} =~ s /\\//g;
 120         $result->{title} =~ s /\"//g;
 121         $result->{title} =~ s /\(//g;
 122         $result->{title} =~ s /\)//g;
 123
 124         # FIXME: instead of removing operators, could just do
 125         # quotes around the value
 126         $result->{title} =~ s/(and|or|not)//g;
 127         $query = "ti,ext=$result->{title}";
 128         $query .= " and itemtype=$result->{itemtype}"
 129           if ( $result->{itemtype} );
 130         if   ( $result->{author} ) {
 131             $result->{author} =~ s /\\//g;
 132             $result->{author} =~ s /\"//g;
 133             $result->{author} =~ s /\(//g;
 134             $result->{author} =~ s /\)//g;
 135
 136             # remove valid operators
 137             $result->{author} =~ s/(and|or|not)//g;
 138             $query .= " and au,ext=$result->{author}";
 139         }
 140     }
 141
 142     # FIXME: add error handling
 143     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 144     my @results;
 145     foreach my $possible_duplicate_record (@$searchresults) {
 146         my $marcrecord =
 147           MARC::Record->new_from_usmarc($possible_duplicate_record);
 148         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 149
 150         # FIXME :: why 2 $biblionumber ?
 151         if ($result) {
 152             push @results, $result->{'biblionumber'};
 153             push @results, $result->{'title'};
 154         }
 155     }
 156     return @results;
 157 }
 158
 159 =head2 SimpleSearch
 160
 161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 162
 163 This function provides a simple search API on the bibliographic catalog
 164
 165 =over 2
 166
 167 =item C<input arg:>
 168
 169     * $query can be a simple keyword or a complete CCL query
 170     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 171     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 172     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 173
 174
 175 =item C<Output:>
 176
 177     * $error is a empty unless an error is detected
 178     * \@results is an array of records.
 179     * $total_hits is the number of hits that would have been returned with no limit
 180
 181 =item C<usage in the script:>
 182
 183 =back
 184
 185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 186
 187 if (defined $error) {
 188     $template->param(query_error => $error);
 189     warn "error: ".$error;
 190     output_html_with_http_headers $input, $cookie, $template->output;
 191     exit;
 192 }
 193
 194 my $hits = scalar @$marcresults;
 195 my @results;
 196
 197 for my $i (0..$hits) {
 198     my %resultsloop;
 199     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 200     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 201
 202     #build the hash for the template.
 203     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 204     $resultsloop{title}           = $biblio->{'title'};
 205     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 206     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 207     $resultsloop{author}          = $biblio->{'author'};
 208     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 209     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 210
 211     push @results, \%resultsloop;
 212 }
 213
 214 $template->param(result=>\@results);
 215
 216 =cut
 217
 218 sub SimpleSearch {
 219     my ( $query, $offset, $max_results, $servers )  = @_;
 220
 221     if ( C4::Context->preference('NoZebra') ) {
 222         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 223         my $search_result =
 224           (      $result->{hits}
 225               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 226         return ( undef, $search_result, scalar($result->{hits}) );
 227     }
 228     else {
 229         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 230         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 231         my @results;
 232         my @zoom_queries;
 233         my @tmpresults;
 234         my @zconns;
 235         my $total_hits;
 236         return ( "No query entered", undef, undef ) unless $query;
 237
 238         # Initialize & Search Zebra
 239         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 240             eval {
 241                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 242                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 243                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 244
 245                 # error handling
 246                 my $error =
 247                     $zconns[$i]->errmsg() . " ("
 248                   . $zconns[$i]->errcode() . ") "
 249                   . $zconns[$i]->addinfo() . " "
 250                   . $zconns[$i]->diagset();
 251
 252                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 253             };
 254             if ($@) {
 255
 256                 # caught a ZOOM::Exception
 257                 my $error =
 258                     $@->message() . " ("
 259                   . $@->code() . ") "
 260                   . $@->addinfo() . " "
 261                   . $@->diagset();
 262                 warn $error;
 263                 return ( $error, undef, undef );
 264             }
 265         }
 266         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 267             my $event = $zconns[ $i - 1 ]->last_event();
 268             if ( $event == ZOOM::Event::ZEND ) {
 269
 270                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 271                 my $hits = $tmpresults[ $i - 1 ]->size();
 272                 $total_hits += $hits;
 273                 my $last_record = $hits;
 274                 if ( defined $max_results && $offset + $max_results < $hits ) {
 275                     $last_record  = $offset + $max_results;
 276                 }
 277
 278                 for my $j ( $first_record..$last_record ) {
 279                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 280                     push @results, $record;
 281                 }
 282             }
 283         }
 284
 285         foreach my $result (@tmpresults) {
 286             $result->destroy();
 287         }
 288         foreach my $zoom_query (@zoom_queries) {
 289             $zoom_query->destroy();
 290         }
 291
 292         return ( undef, \@results, $total_hits );
 293     }
 294 }
 295
 296 =head2 getRecords
 297
 298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 299
 300         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 301         $results_per_page, $offset,       $expanded_facet, $branches,
 302         $query_type,       $scan
 303     );
 304
 305 The all singing, all dancing, multi-server, asynchronous, scanning,
 306 searching, record nabbing, facet-building
 307
 308 See verbse embedded documentation.
 309
 310 =cut
 311
 312 sub getRecords {
 313     my (
 314         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 315         $results_per_page, $offset,       $expanded_facet, $branches,
 316         $query_type,       $scan
 317     ) = @_;
 318
 319     my @servers = @$servers_ref;
 320     my @sort_by = @$sort_by_ref;
 321
 322     # Initialize variables for the ZOOM connection and results object
 323     my $zconn;
 324     my @zconns;
 325     my @results;
 326     my $results_hashref = ();
 327
 328     # Initialize variables for the faceted results objects
 329     my $facets_counter = ();
 330     my $facets_info    = ();
 331     my $facets         = getFacets();
 332
 333     my @facets_loop
 334       ;    # stores the ref to array of hashes for template facets loop
 335
 336     ### LOOP THROUGH THE SERVERS
 337     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 338         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 339
 340 # perform the search, create the results objects
 341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 342         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 343
 344         #$query_to_use = $simple_query if $scan;
 345         warn $simple_query if ( $scan and $DEBUG );
 346
 347         # Check if we've got a query_type defined, if so, use it
 348         eval {
 349             if ($query_type)
 350             {
 351                 if ( $query_type =~ /^ccl/ ) {
 352                     $query_to_use =~
 353                       s/\:/\=/g;    # change : to = last minute (FIXME)
 354                     $results[$i] =
 355                       $zconns[$i]->search(
 356                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 357                       );
 358                 }
 359                 elsif ( $query_type =~ /^cql/ ) {
 360                     $results[$i] =
 361                       $zconns[$i]->search(
 362                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 363                 }
 364                 elsif ( $query_type =~ /^pqf/ ) {
 365                     $results[$i] =
 366                       $zconns[$i]->search(
 367                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 368                 }
 369             }
 370             else {
 371                 if ($scan) {
 372                     $results[$i] =
 373                       $zconns[$i]->scan(
 374                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 375                       );
 376                 }
 377                 else {
 378                     $results[$i] =
 379                       $zconns[$i]->search(
 380                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 381                       );
 382                 }
 383             }
 384         };
 385         if ($@) {
 386             warn "WARNING: query problem with $query_to_use " . $@;
 387         }
 388
 389         # Concatenate the sort_by limits and pass them to the results object
 390         # Note: sort will override rank
 391         my $sort_by;
 392         foreach my $sort (@sort_by) {
 393             if ( $sort eq "author_az" ) {
 394                 $sort_by .= "1=1003 <i ";
 395             }
 396             elsif ( $sort eq "author_za" ) {
 397                 $sort_by .= "1=1003 >i ";
 398             }
 399             elsif ( $sort eq "popularity_asc" ) {
 400                 $sort_by .= "1=9003 <i ";
 401             }
 402             elsif ( $sort eq "popularity_dsc" ) {
 403                 $sort_by .= "1=9003 >i ";
 404             }
 405             elsif ( $sort eq "call_number_asc" ) {
 406                 $sort_by .= "1=20  <i ";
 407             }
 408             elsif ( $sort eq "call_number_dsc" ) {
 409                 $sort_by .= "1=20 >i ";
 410             }
 411             elsif ( $sort eq "pubdate_asc" ) {
 412                 $sort_by .= "1=31 <i ";
 413             }
 414             elsif ( $sort eq "pubdate_dsc" ) {
 415                 $sort_by .= "1=31 >i ";
 416             }
 417             elsif ( $sort eq "acqdate_asc" ) {
 418                 $sort_by .= "1=32 <i ";
 419             }
 420             elsif ( $sort eq "acqdate_dsc" ) {
 421                 $sort_by .= "1=32 >i ";
 422             }
 423             elsif ( $sort eq "title_az" ) {
 424                 $sort_by .= "1=4 <i ";
 425             }
 426             elsif ( $sort eq "title_za" ) {
 427                 $sort_by .= "1=4 >i ";
 428             }
 429             else {
 430                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 431             }
 432         }
 433         if ($sort_by) {
 434             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 435                 warn "WARNING sort $sort_by failed";
 436             }
 437         }
 438     }    # finished looping through servers
 439
 440     # The big moment: asynchronously retrieve results from all servers
 441     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 442         my $ev = $zconns[ $i - 1 ]->last_event();
 443         if ( $ev == ZOOM::Event::ZEND ) {
 444             next unless $results[ $i - 1 ];
 445             my $size = $results[ $i - 1 ]->size();
 446             if ( $size > 0 ) {
 447                 my $results_hash;
 448
 449                 # loop through the results
 450                 $results_hash->{'hits'} = $size;
 451                 my $times;
 452                 if ( $offset + $results_per_page <= $size ) {
 453                     $times = $offset + $results_per_page;
 454                 }
 455                 else {
 456                     $times = $size;
 457                 }
 458                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 459                     my $records_hash;
 460                     my $record;
 461                     my $facet_record;
 462
 463                     ## Check if it's an index scan
 464                     if ($scan) {
 465                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 466
 467                  # here we create a minimal MARC record and hand it off to the
 468                  # template just like a normal result ... perhaps not ideal, but
 469                  # it works for now
 470                         my $tmprecord = MARC::Record->new();
 471                         $tmprecord->encoding('UTF-8');
 472                         my $tmptitle;
 473                         my $tmpauthor;
 474
 475                 # the minimal record in author/title (depending on MARC flavour)
 476                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 477                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 478                             $tmprecord->append_fields($tmptitle);
 479                         } else {
 480                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 481                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 482                             $tmprecord->append_fields($tmptitle);
 483                             $tmprecord->append_fields($tmpauthor);
 484                         }
 485                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 486                     }
 487
 488                     # not an index scan
 489                     else {
 490                         $record = $results[ $i - 1 ]->record($j)->raw();
 491
 492                         # warn "RECORD $j:".$record;
 493                         $results_hash->{'RECORDS'}[$j] = $record;
 494
 495             # Fill the facets while we're looping, but only for the biblioserver
 496                         $facet_record = MARC::Record->new_from_usmarc($record)
 497                           if $servers[ $i - 1 ] =~ /biblioserver/;
 498
 499                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 500                         if ($facet_record) {
 501                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 502                                 ($facets->[$k]) or next;
 503                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 504                                 for my $field (@fields) {
 505                                     my @subfields = $field->subfields();
 506                                     for my $subfield (@subfields) {
 507                                         my ( $code, $data ) = @$subfield;
 508                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 509                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 510                                     }
 511                                 }
 512                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 513                                     $facets->[$k]->{'label_value'};
 514                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 515                                     $facets->[$k]->{'expanded'};
 516                             }
 517                         }
 518                     }
 519                 }
 520                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 521             }
 522
 523             # warn "connection ", $i-1, ": $size hits";
 524             # warn $results[$i-1]->record(0)->render() if $size > 0;
 525
 526             # BUILD FACETS
 527             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 528                 for my $link_value (
 529                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 530                     keys %$facets_counter )
 531                 {
 532                     my $expandable;
 533                     my $number_of_facets;
 534                     my @this_facets_array;
 535                     for my $one_facet (
 536                         sort {
 537                             $facets_counter->{$link_value}
 538                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 539                         } keys %{ $facets_counter->{$link_value} }
 540                       )
 541                     {
 542                         $number_of_facets++;
 543                         if (   ( $number_of_facets < 6 )
 544                             || ( $expanded_facet eq $link_value )
 545                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 546                         {
 547
 548                       # Sanitize the link value ), ( will cause errors with CCL,
 549                             my $facet_link_value = $one_facet;
 550                             $facet_link_value =~ s/(\(|\))/ /g;
 551
 552                             # fix the length that will display in the label,
 553                             my $facet_label_value = $one_facet;
 554                             $facet_label_value =
 555                               substr( $one_facet, 0, 20 ) . "..."
 556                               unless length($facet_label_value) <= 20;
 557
 558                             # if it's a branch, label by the name, not the code,
 559                             if ( $link_value =~ /branch/ ) {
 560                                 $facet_label_value =
 561                                   $branches->{$one_facet}->{'branchname'};
 562                             }
 563
 564                 # but we're down with the whole label being in the link's title.
 565                             my $facet_title_value = $one_facet;
 566
 567                             push @this_facets_array,
 568                               (
 569                                 {
 570                                     facet_count =>
 571                                       $facets_counter->{$link_value}
 572                                       ->{$one_facet},
 573                                     facet_label_value => $facet_label_value,
 574                                     facet_title_value => $facet_title_value,
 575                                     facet_link_value  => $facet_link_value,
 576                                     type_link_value   => $link_value,
 577                                 },
 578                               );
 579                         }
 580                     }
 581
 582                     # handle expanded option
 583                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 584                         $expandable = 1
 585                           if ( ( $number_of_facets > 6 )
 586                             && ( $expanded_facet ne $link_value ) );
 587                     }
 588                     push @facets_loop,
 589                       (
 590                         {
 591                             type_link_value => $link_value,
 592                             type_id         => $link_value . "_id",
 593                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 594                             facets     => \@this_facets_array,
 595                             expandable => $expandable,
 596                             expand     => $link_value,
 597                         }
 598                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 599                 }
 600             }
 601         }
 602     }
 603     return ( undef, $results_hashref, \@facets_loop );
 604 }
 605
 606 sub pazGetRecords {
 607     my (
 608         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 609         $results_per_page, $offset,       $expanded_facet, $branches,
 610         $query_type,       $scan
 611     ) = @_;
 612
 613     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 614     $paz->init();
 615     $paz->search($simple_query);
 616     sleep 1;
 617
 618     # do results
 619     my $results_hashref = {};
 620     my $stats = XMLin($paz->stat);
 621     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 622
 623     # for a grouped search result, the number of hits
 624     # is the number of groups returned; 'bib_hits' will have
 625     # the total number of bibs.
 626     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 627     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 628
 629     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 630         my $recid = $hit->{recid}->[0];
 631
 632         my $work_title = $hit->{'md-work-title'}->[0];
 633         my $work_author;
 634         if (exists $hit->{'md-work-author'}) {
 635             $work_author = $hit->{'md-work-author'}->[0];
 636         }
 637         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 638
 639         my $result_group = {};
 640         $result_group->{'group_label'} = $group_label;
 641         $result_group->{'group_merge_key'} = $recid;
 642
 643         my $count = 1;
 644         if (exists $hit->{count}) {
 645             $count = $hit->{count}->[0];
 646         }
 647         $result_group->{'group_count'} = $count;
 648
 649         for (my $i = 0; $i < $count; $i++) {
 650             # FIXME -- may need to worry about diacritics here
 651             my $rec = $paz->record($recid, $i);
 652             push @{ $result_group->{'RECORDS'} }, $rec;
 653         }
 654
 655         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 656     }
 657
 658     # pass through facets
 659     my $termlist_xml = $paz->termlist('author,subject');
 660     my $terms = XMLin($termlist_xml, forcearray => 1);
 661     my @facets_loop = ();
 662     #die Dumper($results);
 663 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 664 #        my @facets = ();
 665 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 666 #            push @facets, {
 667 #                facet_label_value => $facet->{'name'}->[0],
 668 #            };
 669 #        }
 670 #        push @facets_loop, ( {
 671 #            type_label => $list,
 672 #            facets => \@facets,
 673 #        } );
 674 #    }
 675
 676     return ( undef, $results_hashref, \@facets_loop );
 677 }
 678
 679 # STOPWORDS
 680 sub _remove_stopwords {
 681     my ( $operand, $index ) = @_;
 682     my @stopwords_removed;
 683
 684     # phrase and exact-qualified indexes shouldn't have stopwords removed
 685     if ( $index !~ m/phr|ext/ ) {
 686
 687 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 688 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 689 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 690 #       is a stopword, we'd get "çon" and wouldn't find anything...
 691         foreach ( keys %{ C4::Context->stopwords } ) {
 692             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 693             if ( $operand =~
 694                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 695             {
 696                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 697                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 698                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 699                                 $operand =~ s/$1//gi;
 700                 push @stopwords_removed, $_;
 701             }
 702         }
 703     }
 704     return ( $operand, \@stopwords_removed );
 705 }
 706
 707 # TRUNCATION
 708 sub _detect_truncation {
 709     my ( $operand, $index ) = @_;
 710     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 711         @regexpr );
 712     $operand =~ s/^ //g;
 713     my @wordlist = split( /\s/, $operand );
 714     foreach my $word (@wordlist) {
 715         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 716             push @rightlefttruncated, $word;
 717         }
 718         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 719             push @lefttruncated, $word;
 720         }
 721         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 722             push @righttruncated, $word;
 723         }
 724         elsif ( index( $word, "*" ) < 0 ) {
 725             push @nontruncated, $word;
 726         }
 727         else {
 728             push @regexpr, $word;
 729         }
 730     }
 731     return (
 732         \@nontruncated,       \@righttruncated, \@lefttruncated,
 733         \@rightlefttruncated, \@regexpr
 734     );
 735 }
 736
 737 # STEMMING
 738 sub _build_stemmed_operand {
 739     my ($operand) = @_;
 740     my $stemmed_operand;
 741
 742     # If operand contains a digit, it is almost certainly an identifier, and should
 743     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 744     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 745     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 746     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 747     return $operand if $operand =~ /\d/;
 748
 749 # FIXME: the locale should be set based on the user's language and/or search choice
 750     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 751
 752 # FIXME: these should be stored in the db so the librarian can modify the behavior
 753     $stemmer->add_exceptions(
 754         {
 755             'and' => 'and',
 756             'or'  => 'or',
 757             'not' => 'not',
 758         }
 759     );
 760     my @words = split( / /, $operand );
 761     my $stems = $stemmer->stem(@words);
 762     for my $stem (@$stems) {
 763         $stemmed_operand .= "$stem";
 764         $stemmed_operand .= "?"
 765           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 766         $stemmed_operand .= " ";
 767     }
 768     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 769     return $stemmed_operand;
 770 }
 771
 772 # FIELD WEIGHTING
 773 sub _build_weighted_query {
 774
 775 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 776 # pretty well but could work much better if we had a smarter query parser
 777     my ( $operand, $stemmed_operand, $index ) = @_;
 778     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 779     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 780     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 781
 782     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 783
 784     # Keyword, or, no index specified
 785     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 786         $weighted_query .=
 787           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 788         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 789         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 790           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 791           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 792         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 793           if $fuzzy_enabled;    # add fuzzy, word list
 794         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 795           if ( $stemming and $stemmed_operand )
 796           ;                     # add stemming, right truncation
 797         $weighted_query .= " or wrdl,r9=\"$operand\"";
 798
 799         # embedded sorting: 0 a-z; 1 z-a
 800         # $weighted_query .= ") or (sort1,aut=1";
 801     }
 802
 803     # Barcode searches should skip this process
 804     elsif ( $index eq 'bc' ) {
 805         $weighted_query .= "bc=\"$operand\"";
 806     }
 807
 808     # Authority-number searches should skip this process
 809     elsif ( $index eq 'an' ) {
 810         $weighted_query .= "an=\"$operand\"";
 811     }
 812
 813     # If the index already has more than one qualifier, wrap the operand
 814     # in quotes and pass it back (assumption is that the user knows what they
 815     # are doing and won't appreciate us mucking up their query
 816     elsif ( $index =~ ',' ) {
 817         $weighted_query .= " $index=\"$operand\"";
 818     }
 819
 820     #TODO: build better cases based on specific search indexes
 821     else {
 822         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 823           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 824         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 825         $weighted_query .=
 826           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 827     }
 828
 829     $weighted_query .= "))";                       # close rank specification
 830     return $weighted_query;
 831 }
 832
 833 =head2 buildQuery
 834
 835 ( $error, $query,
 836 $simple_query, $query_cgi,
 837 $query_desc, $limit,
 838 $limit_cgi, $limit_desc,
 839 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 840
 841 Build queries and limits in CCL, CGI, Human,
 842 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 843
 844 See verbose embedded documentation.
 845
 846
 847 =cut
 848
 849 sub buildQuery {
 850     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 851
 852     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 853
 854     # dereference
 855     my @operators = @$operators if $operators;
 856     my @indexes   = @$indexes   if $indexes;
 857     my @operands  = @$operands  if $operands;
 858     my @limits    = @$limits    if $limits;
 859     my @sort_by   = @$sort_by   if $sort_by;
 860
 861     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 862     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 863     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 864     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 865     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 866
 867     # no stemming/weight/fuzzy in NoZebra
 868     if ( C4::Context->preference("NoZebra") ) {
 869         $stemming      = 0;
 870         $weight_fields = 0;
 871         $fuzzy_enabled = 0;
 872     }
 873
 874     my $query        = $operands[0];
 875     my $simple_query = $operands[0];
 876
 877     # initialize the variables we're passing back
 878     my $query_cgi;
 879     my $query_desc;
 880     my $query_type;
 881
 882     my $limit;
 883     my $limit_cgi;
 884     my $limit_desc;
 885
 886     my $stopwords_removed;    # flag to determine if stopwords have been removed
 887
 888 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 889 # DIAGNOSTIC ONLY!!
 890     if ( $query =~ /^ccl=/ ) {
 891         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
 892     }
 893     if ( $query =~ /^cql=/ ) {
 894         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
 895     }
 896     if ( $query =~ /^pqf=/ ) {
 897         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
 898     }
 899
 900     # pass nested queries directly
 901     # FIXME: need better handling of some of these variables in this case
 902     if ( $query =~ /(\(|\))/ ) {
 903         return (
 904             undef,              $query, $simple_query, $query_cgi,
 905             $query,             $limit, $limit_cgi,    $limit_desc,
 906             $stopwords_removed, 'ccl'
 907         );
 908     }
 909
 910 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 911 # query operands and indexes and add stemming, truncation, field weighting, etc.
 912 # Once we do so, we'll end up with a value in $query, just like if we had an
 913 # incoming $query from the user
 914     else {
 915         $query = ""
 916           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 917         my $previous_operand
 918           ;    # a flag used to keep track if there was a previous query
 919                # if there was, we can apply the current operator
 920                # for every operand
 921         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 922
 923             # COMBINE OPERANDS, INDEXES AND OPERATORS
 924             if ( $operands[$i] ) {
 925
 926               # A flag to determine whether or not to add the index to the query
 927                 my $indexes_set;
 928
 929 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 930                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 931                     $weight_fields    = 0;
 932                     $stemming         = 0;
 933                     $remove_stopwords = 0;
 934                 }
 935                 my $operand = $operands[$i];
 936                 my $index   = $indexes[$i];
 937
 938                 # Add index-specific attributes
 939                 # Date of Publication
 940                 if ( $index eq 'yr' ) {
 941                     $index .= ",st-numeric";
 942 #                     $indexes_set++;
 943                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 944                 }
 945
 946                 # Date of Acquisition
 947                 elsif ( $index eq 'acqdate' ) {
 948                     $index .= ",st-date-normalized";
 949 #                     $indexes_set++;
 950                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 951                 }
 952                 # ISBN,ISSN,Standard Number, don't need special treatment
 953                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 954 #                     $indexes_set++;
 955                     (
 956                         $stemming,      $auto_truncation,
 957                         $weight_fields, $fuzzy_enabled,
 958                         $remove_stopwords
 959                     ) = ( 0, 0, 0, 0, 0 );
 960
 961                 }
 962                 # Set default structure attribute (word list)
 963                 my $struct_attr;
 964                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 965                     $struct_attr = ",wrdl";
 966                 }
 967
 968                 # Some helpful index variants
 969                 my $index_plus       = $index . $struct_attr . ":" if $index;
 970                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 971                 if ($auto_truncation){
 972 #                                       FIXME Auto Truncation is only valid for LTR languages
 973 #                                       use C4::Output;
 974 #                                       use C4::Languages qw(regex_lang_subtags get_bidi);
 975 #                               $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
 976 #                                   my $current_lang = regex_lang_subtags($lang);
 977 #                                   my $bidi;
 978 #                                   $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
 979                                         $index_plus_comma .= "rtrn:";
 980                                 }
 981
 982                 # Remove Stopwords
 983                 if ($remove_stopwords) {
 984                     ( $operand, $stopwords_removed ) =
 985                       _remove_stopwords( $operand, $index );
 986                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 987                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 988                       if ( $stopwords_removed && $DEBUG );
 989                 }
 990
 991                 # Detect Truncation
 992                 my $truncated_operand;
 993                 my( $nontruncated, $righttruncated, $lefttruncated,
 994                     $rightlefttruncated, $regexpr
 995                 ) = _detect_truncation( $operand, $index );
 996                 warn
 997 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 998                   if $DEBUG;
 999
1000                 # Apply Truncation
1001                 if (
1002                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1003                     scalar(@$rightlefttruncated) > 0 )
1004                 {
1005
1006                # Don't field weight or add the index to the query, we do it here
1007                     $indexes_set = 1;
1008                     undef $weight_fields;
1009                     my $previous_truncation_operand;
1010                     if ( scalar(@$nontruncated) > 0 ) {
1011                         $truncated_operand .= "$index_plus @$nontruncated ";
1012                         $previous_truncation_operand = 1;
1013                     }
1014                     if ( scalar(@$righttruncated) > 0 ) {
1015                         $truncated_operand .= "and "
1016                           if $previous_truncation_operand;
1017                         $truncated_operand .=
1018                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1019                         $previous_truncation_operand = 1;
1020                     }
1021                     if ( scalar(@$lefttruncated) > 0 ) {
1022                         $truncated_operand .= "and "
1023                           if $previous_truncation_operand;
1024                         $truncated_operand .=
1025                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1026                         $previous_truncation_operand = 1;
1027                     }
1028                     if ( scalar(@$rightlefttruncated) > 0 ) {
1029                         $truncated_operand .= "and "
1030                           if $previous_truncation_operand;
1031                         $truncated_operand .=
1032                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1033                         $previous_truncation_operand = 1;
1034                     }
1035                 }
1036                 $operand = $truncated_operand if $truncated_operand;
1037                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1038
1039                 # Handle Stemming
1040                 my $stemmed_operand;
1041                 $stemmed_operand = _build_stemmed_operand($operand)
1042                   if $stemming;
1043                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1044
1045                 # Handle Field Weighting
1046                 my $weighted_operand;
1047                 $weighted_operand =
1048                   _build_weighted_query( $operand, $stemmed_operand, $index )
1049                   if $weight_fields;
1050                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1051                 $operand = $weighted_operand if $weight_fields;
1052                 $indexes_set = 1 if $weight_fields;
1053
1054                 # If there's a previous operand, we need to add an operator
1055                 if ($previous_operand) {
1056
1057                     # User-specified operator
1058                     if ( $operators[ $i - 1 ] ) {
1059                         $query     .= " $operators[$i-1] ";
1060                         $query     .= " $index_plus " unless $indexes_set;
1061                         $query     .= " $operand";
1062                         $query_cgi .= "&op=$operators[$i-1]";
1063                         $query_cgi .= "&idx=$index" if $index;
1064                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1065                         $query_desc .=
1066                           " $operators[$i-1] $index_plus $operands[$i]";
1067                     }
1068
1069                     # Default operator is and
1070                     else {
1071                         $query      .= " and ";
1072                         $query      .= "$index_plus " unless $indexes_set;
1073                         $query      .= "$operand";
1074                         $query_cgi  .= "&op=and&idx=$index" if $index;
1075                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1076                         $query_desc .= " and $index_plus $operands[$i]";
1077                     }
1078                 }
1079
1080                 # There isn't a pervious operand, don't need an operator
1081                 else {
1082
1083                     # Field-weighted queries already have indexes set
1084                     $query .= " $index_plus " unless $indexes_set;
1085                     $query .= $operand;
1086                     $query_desc .= " $index_plus $operands[$i]";
1087                     $query_cgi  .= "&idx=$index" if $index;
1088                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1089                     $previous_operand = 1;
1090                 }
1091             }    #/if $operands
1092         }    # /for
1093     }
1094     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1095
1096     # add limits
1097     my $group_OR_limits;
1098     my $availability_limit;
1099     foreach my $this_limit (@limits) {
1100         if ( $this_limit =~ /available/ ) {
1101
1102 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1103 # In English:
1104 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1105             $availability_limit .=
1106 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1107             $limit_cgi  .= "&limit=available";
1108             $limit_desc .= "";
1109         }
1110
1111         # group_OR_limits, prefixed by mc-
1112         # OR every member of the group
1113         elsif ( $this_limit =~ /mc/ ) {
1114             $group_OR_limits .= " or " if $group_OR_limits;
1115             $limit_desc      .= " or " if $group_OR_limits;
1116             $group_OR_limits .= "$this_limit";
1117             $limit_cgi       .= "&limit=$this_limit";
1118             $limit_desc      .= " $this_limit";
1119         }
1120
1121         # Regular old limits
1122         else {
1123             if ($this_limit){
1124                 $limit .= " and " if $limit || $query;
1125                 $limit      .= "$this_limit";
1126                 $limit_cgi  .= "&limit=$this_limit";
1127                 $limit_desc .= " $this_limit";
1128             }
1129         }
1130     }
1131     if ($group_OR_limits) {
1132         $limit .= " and " if ( $query || $limit );
1133         $limit .= "($group_OR_limits)";
1134     }
1135     if ($availability_limit) {
1136         $limit .= " and " if ( $query || $limit );
1137         $limit .= "($availability_limit)";
1138     }
1139
1140     # Normalize the query and limit strings
1141     $query =~ s/:/=/g;
1142     $limit =~ s/:/=/g;
1143     for ( $query, $query_desc, $limit, $limit_desc ) {
1144         $_ =~ s/  / /g;    # remove extra spaces
1145         $_ =~ s/^ //g;     # remove any beginning spaces
1146         $_ =~ s/ $//g;     # remove any ending spaces
1147         $_ =~ s/==/=/g;    # remove double == from query
1148     }
1149     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1150
1151     for ($query_cgi,$simple_query) {
1152         $_ =~ s/"//g;
1153     }
1154     # append the limit to the query
1155     $query .= " " . $limit;
1156
1157     # Warnings if DEBUG
1158     if ($DEBUG) {
1159         warn "QUERY:" . $query;
1160         warn "QUERY CGI:" . $query_cgi;
1161         warn "QUERY DESC:" . $query_desc;
1162         warn "LIMIT:" . $limit;
1163         warn "LIMIT CGI:" . $limit_cgi;
1164         warn "LIMIT DESC:" . $limit_desc;
1165         warn "---------\nLeave buildQuery\n---------";
1166     }
1167     return (
1168         undef,              $query, $simple_query, $query_cgi,
1169         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1170         $stopwords_removed, $query_type
1171     );
1172 }
1173
1174 =head2 searchResults
1175
1176 Format results in a form suitable for passing to the template
1177
1178 =cut
1179
1180 # IMO this subroutine is pretty messy still -- it's responsible for
1181 # building the HTML output for the template
1182 sub searchResults {
1183     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1184     my $dbh = C4::Context->dbh;
1185     my $even = 1;
1186     my @newresults;
1187
1188     # add search-term highlighting via <span>s on the search terms
1189     my $span_terms_hashref;
1190     for my $span_term ( split( / /, $searchdesc ) ) {
1191         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1192         $span_terms_hashref->{$span_term}++;
1193     }
1194
1195     #Build branchnames hash
1196     #find branchname
1197     #get branch information.....
1198     my %branches;
1199     my $bsth =
1200       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1201       ;    # FIXME : use C4::Koha::GetBranches
1202     $bsth->execute();
1203     while ( my $bdata = $bsth->fetchrow_hashref ) {
1204         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1205     }
1206 # FIXME - We build an authorised values hash here, using the default framework
1207 # though it is possible to have different authvals for different fws.
1208
1209     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1210
1211     # get notforloan authorised value list (see $shelflocations  FIXME)
1212     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1213
1214     #Build itemtype hash
1215     #find itemtype & itemtype image
1216     my %itemtypes;
1217     $bsth =
1218       $dbh->prepare(
1219         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1220       );
1221     $bsth->execute();
1222     while ( my $bdata = $bsth->fetchrow_hashref ) {
1223                 foreach (qw(description imageurl summary notforloan)) {
1224                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1225                 }
1226     }
1227
1228     #search item field code
1229     my $sth =
1230       $dbh->prepare(
1231 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1232       );
1233     $sth->execute;
1234     my ($itemtag) = $sth->fetchrow;
1235
1236     ## find column names of items related to MARC
1237     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1238     $sth2->execute;
1239     my %subfieldstosearch;
1240     while ( ( my $column ) = $sth2->fetchrow ) {
1241         my ( $tagfield, $tagsubfield ) =
1242           &GetMarcFromKohaField( "items." . $column, "" );
1243         $subfieldstosearch{$column} = $tagsubfield;
1244     }
1245
1246     # handle which records to actually retrieve
1247     my $times;
1248     if ( $hits && $offset + $results_per_page <= $hits ) {
1249         $times = $offset + $results_per_page;
1250     }
1251     else {
1252         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1253     }
1254
1255     # loop through all of the records we've retrieved
1256     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1257         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1258         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1259         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1260         $oldbiblio->{result_number} = $i + 1;
1261
1262         # add imageurl to itemtype if there is one
1263         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1264
1265         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1266         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1267         $aisbn =~ s/-//g;
1268         $oldbiblio->{amazonisbn} = $aisbn;
1269                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1270  # Build summary if there is one (the summary is defined in the itemtypes table)
1271  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1272         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1273             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1274             my @fields  = $marcrecord->fields();
1275             foreach my $field (@fields) {
1276                 my $tag      = $field->tag();
1277                 my $tagvalue = $field->as_string();
1278                 if (! utf8::is_utf8($tagvalue)) {
1279                     utf8::decode($tagvalue);
1280                 }
1281
1282                 $summary =~
1283                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1284                 unless ( $tag < 10 ) {
1285                     my @subf = $field->subfields;
1286                     for my $i ( 0 .. $#subf ) {
1287                         my $subfieldcode  = $subf[$i][0];
1288                         my $subfieldvalue = $subf[$i][1];
1289                         if (! utf8::is_utf8($subfieldvalue)) {
1290                             utf8::decode($subfieldvalue);
1291                         }
1292                         my $tagsubf       = $tag . $subfieldcode;
1293                         $summary =~
1294 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1295                     }
1296                 }
1297             }
1298             # FIXME: yuk
1299             $summary =~ s/\[(.*?)]//g;
1300             $summary =~ s/\n/<br\/>/g;
1301             $oldbiblio->{summary} = $summary;
1302         }
1303
1304         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1305         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1306         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1307         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1308         # Add search-term highlighting to the whole record where they match using <span>s
1309         if (C4::Context->preference("OpacHighlightedWords")){
1310             my $searchhighlightblob;
1311             for my $highlight_field ( $marcrecord->fields ) {
1312
1313     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1314                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1315                 for my $subfield ($highlight_field->subfields()) {
1316                     my $match;
1317                     next if $subfield->[0] eq '9';
1318                     my $field = $subfield->[1];
1319                     for my $term ( keys %$span_terms_hashref ) {
1320                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1321                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1322                         $match++;
1323                         }
1324                     }
1325                     $searchhighlightblob .= $field . " ... " if $match;
1326                 }
1327
1328             }
1329             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1330             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1331         }
1332
1333         # Add search-term highlighting to the title, subtitle, etc. fields
1334         for my $term ( keys %$span_terms_hashref ) {
1335             my $old_term = $term;
1336             if ( length($term) > 3 ) {
1337                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1338                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1339                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1340                                 }
1341             }
1342         }
1343
1344         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1345
1346         # Pull out the items fields
1347         my @fields = $marcrecord->field($itemtag);
1348
1349         # Setting item statuses for display
1350         my @available_items_loop;
1351         my @onloan_items_loop;
1352         my @notforloan_items_loop;
1353         my @other_items_loop;
1354
1355         my $available_items;
1356         my $onloan_items;
1357         my $notforloan_items;
1358         my $other_items;
1359
1360         my $ordered_count         = 0;
1361         my $available_count       = 0;
1362         my $onloan_count          = 0;
1363         my $notforloan_count      = 0;
1364         my $longoverdue_count     = 0;
1365         my $other_count           = 0;
1366         my $wthdrawn_count        = 0;
1367         my $itemlost_count        = 0;
1368         my $itembinding_count     = 0;
1369         my $itemdamaged_count     = 0;
1370         my $item_in_transit_count = 0;
1371         my $can_place_holds       = 0;
1372         my $items_count           = scalar(@fields);
1373         my $maxitems =
1374           ( C4::Context->preference('maxItemsinSearchResults') )
1375           ? C4::Context->preference('maxItemsinSearchResults') - 1
1376           : 1;
1377
1378         # loop through every item
1379         foreach my $field (@fields) {
1380             my $item;
1381
1382             # populate the items hash
1383             foreach my $code ( keys %subfieldstosearch ) {
1384                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1385             }
1386                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1387                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1388             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1389             if ($item->{$hbranch}) {
1390                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1391             }
1392             elsif ($item->{$otherbranch}) {     # Last resort
1393                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1394             }
1395
1396                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1397 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1398             if ( $item->{onloan} ) {
1399                 $onloan_count++;
1400                                 my $key = $prefix . $item->{due_date};
1401                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1402                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1403                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1404                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1405                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1406                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1407                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1408                 # if something's checked out and lost, mark it as 'long overdue'
1409                 if ( $item->{itemlost} ) {
1410                     $onloan_items->{$prefix}->{longoverdue}++;
1411                     $longoverdue_count++;
1412                 } else {        # can place holds as long as item isn't lost
1413                     $can_place_holds = 1;
1414                 }
1415             }
1416
1417          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1418             else {
1419
1420                 # item is on order
1421                 if ( $item->{notforloan} == -1 ) {
1422                     $ordered_count++;
1423                 }
1424
1425                 # is item in transit?
1426                 my $transfertwhen = '';
1427                 my ($transfertfrom, $transfertto);
1428
1429                 unless ($item->{wthdrawn}
1430                         || $item->{itemlost}
1431                         || $item->{damaged}
1432                         || $item->{notforloan}
1433                         || $items_count > 20) {
1434
1435                     # A couple heuristics to limit how many times
1436                     # we query the database for item transfer information, sacrificing
1437                     # accuracy in some cases for speed;
1438                     #
1439                     # 1. don't query if item has one of the other statuses
1440                     # 2. don't check transit status if the bib has
1441                     #    more than 20 items
1442                     #
1443                     # FIXME: to avoid having the query the database like this, and to make
1444                     #        the in transit status count as unavailable for search limiting,
1445                     #        should map transit status to record indexed in Zebra.
1446                     #
1447                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1448                 }
1449
1450                 # item is withdrawn, lost or damaged
1451                 if (   $item->{wthdrawn}
1452                     || $item->{itemlost}
1453                     || $item->{damaged}
1454                     || $item->{notforloan}
1455                     || ($transfertwhen ne ''))
1456                 {
1457                     $wthdrawn_count++        if $item->{wthdrawn};
1458                     $itemlost_count++        if $item->{itemlost};
1459                     $itemdamaged_count++     if $item->{damaged};
1460                     $item_in_transit_count++ if $transfertwhen ne '';
1461                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1462
1463                                         my $key = $prefix . $item->{status};
1464
1465                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1466                                             if($item->{notforloan} == 1){
1467                                                 $notforloan_items->{$key}->{$_} = $item->{$_};
1468                                             }else{
1469                            $other_items->{$key}->{$_} = $item->{$_};
1470                                             }
1471                                         }
1472
1473                                         if($item->{notforloan} == 1){
1474                         $notforloan_count++;
1475
1476                         $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1477                                         $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1478                                         $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1479                                         $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1480                                         $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1481                                         $notforloan_items->{$key}->{barcode} = $item->{barcode};
1482                     }else{
1483                         $other_count++;
1484
1485                         $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1486                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1487                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1488                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1489                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1490                                         $other_items->{$key}->{barcode} = $item->{barcode};
1491                     }
1492
1493                 }
1494                 # item is available
1495                 else {
1496                     $can_place_holds = 1;
1497                     $available_count++;
1498                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1499                                         foreach (qw(branchname itemcallnumber barcode)) {
1500                         $available_items->{$prefix}->{$_} = $item->{$_};
1501                                         }
1502                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1503                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1504                 }
1505             }
1506         }    # notforloan, item level and biblioitem level
1507         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1508         $maxitems =
1509           ( C4::Context->preference('maxItemsinSearchResults') )
1510           ? C4::Context->preference('maxItemsinSearchResults') - 1
1511           : 1;
1512         for my $key ( sort keys %$onloan_items ) {
1513             (++$onloanitemscount > $maxitems) and last;
1514             push @onloan_items_loop, $onloan_items->{$key};
1515         }
1516         for my $key ( sort keys %$other_items ) {
1517             (++$otheritemscount > $maxitems) and last;
1518             push @other_items_loop, $other_items->{$key};
1519         }
1520         for my $key ( sort keys %$notforloan_items ) {
1521             (++$notforloanitemscount > $maxitems) and last;
1522             push @notforloan_items_loop, $notforloan_items->{$key};
1523         }
1524         for my $key ( sort keys %$available_items ) {
1525             (++$availableitemscount > $maxitems) and last;
1526             push @available_items_loop, $available_items->{$key}
1527         }
1528
1529         # XSLT processing of some stuff
1530         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1531             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1532             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1533         }
1534
1535         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1536         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1537         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1538         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1539         $oldbiblio->{items_count}          = $items_count;
1540         $oldbiblio->{available_items_loop} = \@available_items_loop;
1541         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1542         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1543         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1544         $oldbiblio->{availablecount}       = $available_count;
1545         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1546         $oldbiblio->{onloancount}          = $onloan_count;
1547         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1548         $oldbiblio->{notforloancount}      = $notforloan_count;
1549         $oldbiblio->{othercount}           = $other_count;
1550         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1551         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1552         $oldbiblio->{itemlostcount}        = $itemlost_count;
1553         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1554         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1555         $oldbiblio->{orderedcount}         = $ordered_count;
1556         $oldbiblio->{isbn} =~
1557           s/-//g;    # deleting - in isbn to enable amazon content
1558         push( @newresults, $oldbiblio );
1559     }
1560     return @newresults;
1561 }
1562
1563 #----------------------------------------------------------------------
1564 #
1565 # Non-Zebra GetRecords#
1566 #----------------------------------------------------------------------
1567
1568 =head2 NZgetRecords
1569
1570   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1571
1572 =cut
1573
1574 sub NZgetRecords {
1575     my (
1576         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1577         $results_per_page, $offset,       $expanded_facet, $branches,
1578         $query_type,       $scan
1579     ) = @_;
1580     warn "query =$query" if $DEBUG;
1581     my $result = NZanalyse($query);
1582     warn "results =$result" if $DEBUG;
1583     return ( undef,
1584         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1585         undef );
1586 }
1587
1588 =head2 NZanalyse
1589
1590   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1591   the list is built from an inverted index in the nozebra SQL table
1592   note that title is here only for convenience : the sorting will be very fast when requested on title
1593   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1594
1595 =cut
1596
1597 sub NZanalyse {
1598     my ( $string, $server ) = @_;
1599 #     warn "---------"       if $DEBUG;
1600     warn " NZanalyse" if $DEBUG;
1601 #     warn "---------"       if $DEBUG;
1602
1603  # $server contains biblioserver or authorities, depending on what we search on.
1604  #warn "querying : $string on $server";
1605     $server = 'biblioserver' unless $server;
1606
1607 # if we have a ", replace the content to discard temporarily any and/or/not inside
1608     my $commacontent;
1609     if ( $string =~ /"/ ) {
1610         $string =~ s/"(.*?)"/__X__/;
1611         $commacontent = $1;
1612         warn "commacontent : $commacontent" if $DEBUG;
1613     }
1614
1615 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1616 # then, call again NZanalyse with $left and $right
1617 # (recursive until we find a leaf (=> something without and/or/not)
1618 # delete repeated operator... Would then go in infinite loop
1619     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1620     }
1621
1622     #process parenthesis before.
1623     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1624         my $left     = $1;
1625         my $right    = $4;
1626         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1627         warn
1628 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1629           if $DEBUG;
1630         my $leftresult = NZanalyse( $left, $server );
1631         if ($operator) {
1632             my $rightresult = NZanalyse( $right, $server );
1633
1634             # OK, we have the results for right and left part of the query
1635             # depending of operand, intersect, union or exclude both lists
1636             # to get a result list
1637             if ( $operator eq ' and ' ) {
1638                 return NZoperatorAND($leftresult,$rightresult);
1639             }
1640             elsif ( $operator eq ' or ' ) {
1641
1642                 # just merge the 2 strings
1643                 return $leftresult . $rightresult;
1644             }
1645             elsif ( $operator eq ' not ' ) {
1646                 return NZoperatorNOT($leftresult,$rightresult);
1647             }
1648         }
1649         else {
1650 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1651             return $leftresult;
1652         }
1653     }
1654     warn "string :" . $string if $DEBUG;
1655     my $left = "";
1656     my $right = "";
1657     my $operator = "";
1658     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1659         $left     = $1;
1660         $right    = $3;
1661         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1662     }
1663     warn "no parenthesis. left : $left operator: $operator right: $right"
1664       if $DEBUG;
1665
1666     # it's not a leaf, we have a and/or/not
1667     if ($operator) {
1668
1669         # reintroduce comma content if needed
1670         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1671         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1672         warn "node : $left / $operator / $right\n" if $DEBUG;
1673         my $leftresult  = NZanalyse( $left,  $server );
1674         my $rightresult = NZanalyse( $right, $server );
1675         warn " leftresult : $leftresult" if $DEBUG;
1676         warn " rightresult : $rightresult" if $DEBUG;
1677         # OK, we have the results for right and left part of the query
1678         # depending of operand, intersect, union or exclude both lists
1679         # to get a result list
1680         if ( $operator eq ' and ' ) {
1681             warn "NZAND";
1682             return NZoperatorAND($leftresult,$rightresult);
1683         }
1684         elsif ( $operator eq ' or ' ) {
1685
1686             # just merge the 2 strings
1687             return $leftresult . $rightresult;
1688         }
1689         elsif ( $operator eq ' not ' ) {
1690             return NZoperatorNOT($leftresult,$rightresult);
1691         }
1692         else {
1693
1694 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1695             die "error : operand unknown : $operator for $string";
1696         }
1697
1698         # it's a leaf, do the real SQL query and return the result
1699     }
1700     else {
1701         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1702         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1703         #remove trailing blank at the beginning
1704         $string =~ s/^ //g;
1705         warn "leaf:$string" if $DEBUG;
1706
1707         # parse the string in in operator/operand/value again
1708         my $left = "";
1709         my $operator = "";
1710         my $right = "";
1711         if ($string =~ /(.*)(>=|<=)(.*)/) {
1712             $left     = $1;
1713             $operator = $2;
1714             $right    = $3;
1715         } else {
1716             $left = $string;
1717         }
1718 #         warn "handling leaf... left:$left operator:$operator right:$right"
1719 #           if $DEBUG;
1720         unless ($operator) {
1721             if ($string =~ /(.*)(>|<|=)(.*)/) {
1722                 $left     = $1;
1723                 $operator = $2;
1724                 $right    = $3;
1725                 warn
1726     "handling unless (operator)... left:$left operator:$operator right:$right"
1727                 if $DEBUG;
1728             } else {
1729                 $left = $string;
1730             }
1731         }
1732         my $results;
1733
1734 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1735         $left =~ s/ .*$//;
1736
1737         # automatic replace for short operators
1738         $left = 'title'            if $left =~ '^ti$';
1739         $left = 'author'           if $left =~ '^au$';
1740         $left = 'publisher'        if $left =~ '^pb$';
1741         $left = 'subject'          if $left =~ '^su$';
1742         $left = 'koha-Auth-Number' if $left =~ '^an$';
1743         $left = 'keyword'          if $left =~ '^kw$';
1744         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1745         if ( $operator && $left ne 'keyword' ) {
1746
1747             #do a specific search
1748             my $dbh = C4::Context->dbh;
1749             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1750             my $sth =
1751               $dbh->prepare(
1752 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1753               );
1754             warn "$left / $operator / $right\n" if $DEBUG;
1755
1756             # split each word, query the DB and build the biblionumbers result
1757             #sanitizing leftpart
1758             $left =~ s/^\s+|\s+$//;
1759             foreach ( split / /, $right ) {
1760                 my $biblionumbers;
1761                 $_ =~ s/^\s+|\s+$//;
1762                 next unless $_;
1763                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1764                 $sth->execute( $server, $left, $_ )
1765                   or warn "execute failed: $!";
1766                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1767
1768 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1769 # otherwise, fill the result
1770                     $biblionumbers .= $line
1771                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1772                     warn "result : $value "
1773                       . ( $right  =~ /\d/ ) . "=="
1774                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1775                 }
1776
1777 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1778                 if ($results) {
1779                     warn "NZAND" if $DEBUG;
1780                     $results = NZoperatorAND($biblionumbers,$results);
1781                 }
1782                 else {
1783                     $results = $biblionumbers;
1784                 }
1785             }
1786         }
1787         else {
1788
1789       #do a complete search (all indexes), if index='kw' do complete search too.
1790             my $dbh = C4::Context->dbh;
1791             my $sth =
1792               $dbh->prepare(
1793 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1794               );
1795
1796             # split each word, query the DB and build the biblionumbers result
1797             foreach ( split / /, $string ) {
1798                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1799                 warn "search on all indexes on $_" if $DEBUG;
1800                 my $biblionumbers;
1801                 next unless $_;
1802                 $sth->execute( $server, $_ );
1803                 while ( my $line = $sth->fetchrow ) {
1804                     $biblionumbers .= $line;
1805                 }
1806
1807 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1808                 if ($results) {
1809                     $results = NZoperatorAND($biblionumbers,$results);
1810                 }
1811                 else {
1812                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1813                     $results = $biblionumbers;
1814                 }
1815             }
1816         }
1817         warn "return : $results for LEAF : $string" if $DEBUG;
1818         return $results;
1819     }
1820     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1821 }
1822
1823 sub NZoperatorAND{
1824     my ($rightresult, $leftresult)=@_;
1825
1826     my @leftresult = split /;/, $leftresult;
1827     warn " @leftresult / $rightresult \n" if $DEBUG;
1828
1829     #             my @rightresult = split /;/,$leftresult;
1830     my $finalresult;
1831
1832 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1833 # the result is stored twice, to have the same weight for AND than OR.
1834 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1835 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1836     foreach (@leftresult) {
1837         my $value = $_;
1838         my $countvalue;
1839         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1840         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1841             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1842             $finalresult .=
1843                 "$value-$countvalue;$value-$countvalue;";
1844         }
1845     }
1846     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1847     return $finalresult;
1848 }
1849
1850 sub NZoperatorOR{
1851     my ($rightresult, $leftresult)=@_;
1852     return $rightresult.$leftresult;
1853 }
1854
1855 sub NZoperatorNOT{
1856     my ($leftresult, $rightresult)=@_;
1857
1858     my @leftresult = split /;/, $leftresult;
1859
1860     #             my @rightresult = split /;/,$leftresult;
1861     my $finalresult;
1862     foreach (@leftresult) {
1863         my $value=$_;
1864         $value=$1 if $value=~m/(.*)-\d+$/;
1865         unless ($rightresult =~ "$value-") {
1866             $finalresult .= "$_;";
1867         }
1868     }
1869     return $finalresult;
1870 }
1871
1872 =head2 NZorder
1873
1874   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1875
1876   TODO :: Description
1877
1878 =cut
1879
1880 sub NZorder {
1881     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1882     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1883
1884     # order title asc by default
1885     #     $ordering = '1=36 <i' unless $ordering;
1886     $results_per_page = 20 unless $results_per_page;
1887     $offset           = 0  unless $offset;
1888     my $dbh = C4::Context->dbh;
1889
1890     #
1891     # order by POPULARITY
1892     #
1893     if ( $ordering =~ /popularity/ ) {
1894         my %result;
1895         my %popularity;
1896
1897         # popularity is not in MARC record, it's builded from a specific query
1898         my $sth =
1899           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1900         foreach ( split /;/, $biblionumbers ) {
1901             my ( $biblionumber, $title ) = split /,/, $_;
1902             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1903             $sth->execute($biblionumber);
1904             my $popularity = $sth->fetchrow || 0;
1905
1906 # hint : the key is popularity.title because we can have
1907 # many results with the same popularity. In this cas, sub-ordering is done by title
1908 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1909 # (un-frequent, I agree, but we won't forget anything that way ;-)
1910             $popularity{ sprintf( "%10d", $popularity ) . $title
1911                   . $biblionumber } = $biblionumber;
1912         }
1913
1914     # sort the hash and return the same structure as GetRecords (Zebra querying)
1915         my $result_hash;
1916         my $numbers = 0;
1917         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1918             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1919                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1920                   $result{ $popularity{$key} }->as_usmarc();
1921             }
1922         }
1923         else {                                    # sort popularity ASC
1924             foreach my $key ( sort ( keys %popularity ) ) {
1925                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1926                   $result{ $popularity{$key} }->as_usmarc();
1927             }
1928         }
1929         my $finalresult = ();
1930         $result_hash->{'hits'}         = $numbers;
1931         $finalresult->{'biblioserver'} = $result_hash;
1932         return $finalresult;
1933
1934         #
1935         # ORDER BY author
1936         #
1937     }
1938     elsif ( $ordering =~ /author/ ) {
1939         my %result;
1940         foreach ( split /;/, $biblionumbers ) {
1941             my ( $biblionumber, $title ) = split /,/, $_;
1942             my $record = GetMarcBiblio($biblionumber);
1943             my $author;
1944             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1945                 $author = $record->subfield( '200', 'f' );
1946                 $author = $record->subfield( '700', 'a' ) unless $author;
1947             }
1948             else {
1949                 $author = $record->subfield( '100', 'a' );
1950             }
1951
1952 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1953 # and we don't want to get only 1 result for each of them !!!
1954             $result{ $author . $biblionumber } = $record;
1955         }
1956
1957     # sort the hash and return the same structure as GetRecords (Zebra querying)
1958         my $result_hash;
1959         my $numbers = 0;
1960         if ( $ordering eq 'author_za' ) {    # sort by author desc
1961             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1962                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1963                   $result{$key}->as_usmarc();
1964             }
1965         }
1966         else {                               # sort by author ASC
1967             foreach my $key ( sort ( keys %result ) ) {
1968                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1969                   $result{$key}->as_usmarc();
1970             }
1971         }
1972         my $finalresult = ();
1973         $result_hash->{'hits'}         = $numbers;
1974         $finalresult->{'biblioserver'} = $result_hash;
1975         return $finalresult;
1976
1977         #
1978         # ORDER BY callnumber
1979         #
1980     }
1981     elsif ( $ordering =~ /callnumber/ ) {
1982         my %result;
1983         foreach ( split /;/, $biblionumbers ) {
1984             my ( $biblionumber, $title ) = split /,/, $_;
1985             my $record = GetMarcBiblio($biblionumber);
1986             my $callnumber;
1987             my ( $callnumber_tag, $callnumber_subfield ) =
1988               GetMarcFromKohaField( 'items.itemcallnumber','' );
1989             ( $callnumber_tag, $callnumber_subfield ) =
1990               GetMarcFromKohaField('biblioitems.callnumber','')
1991               unless $callnumber_tag;
1992             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1993                 $callnumber = $record->subfield( '200', 'f' );
1994             }
1995             else {
1996                 $callnumber = $record->subfield( '100', 'a' );
1997             }
1998
1999 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2000 # and we don't want to get only 1 result for each of them !!!
2001             $result{ $callnumber . $biblionumber } = $record;
2002         }
2003
2004     # sort the hash and return the same structure as GetRecords (Zebra querying)
2005         my $result_hash;
2006         my $numbers = 0;
2007         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
2008             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2009                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2010                   $result{$key}->as_usmarc();
2011             }
2012         }
2013         else {                                     # sort by title ASC
2014             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
2015                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2016                   $result{$key}->as_usmarc();
2017             }
2018         }
2019         my $finalresult = ();
2020         $result_hash->{'hits'}         = $numbers;
2021         $finalresult->{'biblioserver'} = $result_hash;
2022         return $finalresult;
2023     }
2024     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2025         my %result;
2026         foreach ( split /;/, $biblionumbers ) {
2027             my ( $biblionumber, $title ) = split /,/, $_;
2028             my $record = GetMarcBiblio($biblionumber);
2029             my ( $publicationyear_tag, $publicationyear_subfield ) =
2030               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2031             my $publicationyear =
2032               $record->subfield( $publicationyear_tag,
2033                 $publicationyear_subfield );
2034
2035 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2036 # and we don't want to get only 1 result for each of them !!!
2037             $result{ $publicationyear . $biblionumber } = $record;
2038         }
2039
2040     # sort the hash and return the same structure as GetRecords (Zebra querying)
2041         my $result_hash;
2042         my $numbers = 0;
2043         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2044             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2045                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2046                   $result{$key}->as_usmarc();
2047             }
2048         }
2049         else {                                 # sort by pub year ASC
2050             foreach my $key ( sort ( keys %result ) ) {
2051                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2052                   $result{$key}->as_usmarc();
2053             }
2054         }
2055         my $finalresult = ();
2056         $result_hash->{'hits'}         = $numbers;
2057         $finalresult->{'biblioserver'} = $result_hash;
2058         return $finalresult;
2059
2060         #
2061         # ORDER BY title
2062         #
2063     }
2064     elsif ( $ordering =~ /title/ ) {
2065
2066 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2067         my %result;
2068         foreach ( split /;/, $biblionumbers ) {
2069             my ( $biblionumber, $title ) = split /,/, $_;
2070
2071 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2072 # and we don't want to get only 1 result for each of them !!!
2073 # hint & speed improvement : we can order without reading the record
2074 # so order, and read records only for the requested page !
2075             $result{ $title . $biblionumber } = $biblionumber;
2076         }
2077
2078     # sort the hash and return the same structure as GetRecords (Zebra querying)
2079         my $result_hash;
2080         my $numbers = 0;
2081         if ( $ordering eq 'title_az' ) {    # sort by title desc
2082             foreach my $key ( sort ( keys %result ) ) {
2083                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2084             }
2085         }
2086         else {                              # sort by title ASC
2087             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2088                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2089             }
2090         }
2091
2092         # limit the $results_per_page to result size if it's more
2093         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2094
2095         # for the requested page, replace biblionumber by the complete record
2096         # speed improvement : avoid reading too much things
2097         for (
2098             my $counter = $offset ;
2099             $counter <= $offset + $results_per_page ;
2100             $counter++
2101           )
2102         {
2103             $result_hash->{'RECORDS'}[$counter] =
2104               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2105         }
2106         my $finalresult = ();
2107         $result_hash->{'hits'}         = $numbers;
2108         $finalresult->{'biblioserver'} = $result_hash;
2109         return $finalresult;
2110     }
2111     else {
2112
2113 #
2114 # order by ranking
2115 #
2116 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2117         my %result;
2118         my %count_ranking;
2119         foreach ( split /;/, $biblionumbers ) {
2120             my ( $biblionumber, $title ) = split /,/, $_;
2121             $title =~ /(.*)-(\d)/;
2122
2123             # get weight
2124             my $ranking = $2;
2125
2126 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2127 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2128 # biblio N has ranking = 6
2129             $count_ranking{$biblionumber} += $ranking;
2130         }
2131
2132 # build the result by "inverting" the count_ranking hash
2133 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2134 #         warn "counting";
2135         foreach ( keys %count_ranking ) {
2136             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2137         }
2138
2139     # sort the hash and return the same structure as GetRecords (Zebra querying)
2140         my $result_hash;
2141         my $numbers = 0;
2142         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2143             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2144         }
2145
2146         # limit the $results_per_page to result size if it's more
2147         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2148
2149         # for the requested page, replace biblionumber by the complete record
2150         # speed improvement : avoid reading too much things
2151         for (
2152             my $counter = $offset ;
2153             $counter <= $offset + $results_per_page ;
2154             $counter++
2155           )
2156         {
2157             $result_hash->{'RECORDS'}[$counter] =
2158               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2159               if $result_hash->{'RECORDS'}[$counter];
2160         }
2161         my $finalresult = ();
2162         $result_hash->{'hits'}         = $numbers;
2163         $finalresult->{'biblioserver'} = $result_hash;
2164         return $finalresult;
2165     }
2166 }
2167
2168 END { }    # module clean-up code here (global destructor)
2169
2170 1;
2171 __END__
2172
2173 =head1 AUTHOR
2174
2175 Koha Developement team <info@koha.org>
2176
2177 =cut