C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30 use URI::Escape;
  31
  32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  33
  34 # set the version for version checking
  35 BEGIN {
  36     $VERSION = 3.01;
  37     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  38 }
  39
  40 =head1 NAME
  41
  42 C4::Search - Functions for searching the Koha catalog.
  43
  44 =head1 SYNOPSIS
  45
  46 See opac/opac-search.pl or catalogue/search.pl for example of usage
  47
  48 =head1 DESCRIPTION
  49
  50 This module provides searching functions for Koha's bibliographic databases
  51
  52 =head1 FUNCTIONS
  53
  54 =cut
  55
  56 @ISA    = qw(Exporter);
  57 @EXPORT = qw(
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 FindDuplicate
  69
  70 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  71
  72 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  73
  74 =cut
  75
  76 sub FindDuplicate {
  77     my ($record) = @_;
  78     my $dbh = C4::Context->dbh;
  79     my $result = TransformMarcToKoha( $dbh, $record, '' );
  80     my $sth;
  81     my $query;
  82     my $search;
  83     my $type;
  84     my ( $biblionumber, $title );
  85
  86     # search duplicate on ISBN, easy and fast..
  87     # ... normalize first
  88     if ( $result->{isbn} ) {
  89         $result->{isbn} =~ s/\(.*$//;
  90         $result->{isbn} =~ s/\s+$//;
  91         $query = "isbn=$result->{isbn}";
  92     }
  93     else {
  94         $result->{title} =~ s /\\//g;
  95         $result->{title} =~ s /\"//g;
  96         $result->{title} =~ s /\(//g;
  97         $result->{title} =~ s /\)//g;
  98
  99         # FIXME: instead of removing operators, could just do
 100         # quotes around the value
 101         $result->{title} =~ s/(and|or|not)//g;
 102         $query = "ti,ext=$result->{title}";
 103         $query .= " and itemtype=$result->{itemtype}"
 104           if ( $result->{itemtype} );
 105         if   ( $result->{author} ) {
 106             $result->{author} =~ s /\\//g;
 107             $result->{author} =~ s /\"//g;
 108             $result->{author} =~ s /\(//g;
 109             $result->{author} =~ s /\)//g;
 110
 111             # remove valid operators
 112             $result->{author} =~ s/(and|or|not)//g;
 113             $query .= " and au,ext=$result->{author}";
 114         }
 115     }
 116
 117     # FIXME: add error handling
 118     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 119     my @results;
 120     foreach my $possible_duplicate_record (@$searchresults) {
 121         my $marcrecord =
 122           MARC::Record->new_from_usmarc($possible_duplicate_record);
 123         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 124
 125         # FIXME :: why 2 $biblionumber ?
 126         if ($result) {
 127             push @results, $result->{'biblionumber'};
 128             push @results, $result->{'title'};
 129         }
 130     }
 131     return @results;
 132 }
 133
 134 =head2 SimpleSearch
 135
 136 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 137
 138 This function provides a simple search API on the bibliographic catalog
 139
 140 =over 2
 141
 142 =item C<input arg:>
 143
 144     * $query can be a simple keyword or a complete CCL query
 145     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 146     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 147     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 148
 149
 150 =item C<Output:>
 151
 152     * $error is a empty unless an error is detected
 153     * \@results is an array of records.
 154     * $total_hits is the number of hits that would have been returned with no limit
 155
 156 =item C<usage in the script:>
 157
 158 =back
 159
 160 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 161
 162 if (defined $error) {
 163     $template->param(query_error => $error);
 164     warn "error: ".$error;
 165     output_html_with_http_headers $input, $cookie, $template->output;
 166     exit;
 167 }
 168
 169 my $hits = scalar @$marcresults;
 170 my @results;
 171
 172 for my $i (0..$hits) {
 173     my %resultsloop;
 174     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 175     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 176
 177     #build the hash for the template.
 178     $resultsloop{title}           = $biblio->{'title'};
 179     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 180     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 181     $resultsloop{author}          = $biblio->{'author'};
 182     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 183     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 184
 185     push @results, \%resultsloop;
 186 }
 187
 188 $template->param(result=>\@results);
 189
 190 =cut
 191
 192 sub SimpleSearch {
 193     my ( $query, $offset, $max_results, $servers )  = @_;
 194
 195     if ( C4::Context->preference('NoZebra') ) {
 196         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 197         my $search_result =
 198           (      $result->{hits}
 199               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 200         return ( undef, $search_result, scalar($result->{hits}) );
 201     }
 202     else {
 203         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 204         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 205         my @results;
 206         my @zoom_queries;
 207         my @tmpresults;
 208         my @zconns;
 209         my $total_hits;
 210         return ( "No query entered", undef, undef ) unless $query;
 211
 212         # Initialize & Search Zebra
 213         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 214             eval {
 215                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 216                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 217                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 218
 219                 # error handling
 220                 my $error =
 221                     $zconns[$i]->errmsg() . " ("
 222                   . $zconns[$i]->errcode() . ") "
 223                   . $zconns[$i]->addinfo() . " "
 224                   . $zconns[$i]->diagset();
 225
 226                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 227             };
 228             if ($@) {
 229
 230                 # caught a ZOOM::Exception
 231                 my $error =
 232                     $@->message() . " ("
 233                   . $@->code() . ") "
 234                   . $@->addinfo() . " "
 235                   . $@->diagset();
 236                 warn $error;
 237                 return ( $error, undef, undef );
 238             }
 239         }
 240         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 241             my $event = $zconns[ $i - 1 ]->last_event();
 242             if ( $event == ZOOM::Event::ZEND ) {
 243
 244                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 245                 my $hits = $tmpresults[ $i - 1 ]->size();
 246                 $total_hits += $hits;
 247                 my $last_record = $hits;
 248                 if ( defined $max_results && $offset + $max_results < $hits ) {
 249                     $last_record  = $offset + $max_results;
 250                 }
 251
 252                 for my $j ( $first_record..$last_record ) {
 253                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 254                     push @results, $record;
 255                 }
 256             }
 257         }
 258
 259         foreach my $result (@tmpresults) {
 260             $result->destroy();
 261         }
 262         foreach my $zoom_query (@zoom_queries) {
 263             $zoom_query->destroy();
 264         }
 265
 266         return ( undef, \@results, $total_hits );
 267     }
 268 }
 269
 270 =head2 getRecords
 271
 272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 273
 274         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 275         $results_per_page, $offset,       $expanded_facet, $branches,
 276         $query_type,       $scan
 277     );
 278
 279 The all singing, all dancing, multi-server, asynchronous, scanning,
 280 searching, record nabbing, facet-building
 281
 282 See verbse embedded documentation.
 283
 284 =cut
 285
 286 sub getRecords {
 287     my (
 288         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 289         $results_per_page, $offset,       $expanded_facet, $branches,
 290         $query_type,       $scan
 291     ) = @_;
 292
 293     my @servers = @$servers_ref;
 294     my @sort_by = @$sort_by_ref;
 295
 296     # Initialize variables for the ZOOM connection and results object
 297     my $zconn;
 298     my @zconns;
 299     my @results;
 300     my $results_hashref = ();
 301
 302     # Initialize variables for the faceted results objects
 303     my $facets_counter = ();
 304     my $facets_info    = ();
 305     my $facets         = getFacets();
 306
 307     my @facets_loop
 308       ;    # stores the ref to array of hashes for template facets loop
 309
 310     ### LOOP THROUGH THE SERVERS
 311     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 312         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 313
 314 # perform the search, create the results objects
 315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 316         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 317
 318         #$query_to_use = $simple_query if $scan;
 319         warn $simple_query if ( $scan and $DEBUG );
 320
 321         # Check if we've got a query_type defined, if so, use it
 322         eval {
 323             if ($query_type) {
 324                 if ($query_type =~ /^ccl/) {
 325                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 326                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 327                 } elsif ($query_type =~ /^cql/) {
 328                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
 329                 } elsif ($query_type =~ /^pqf/) {
 330                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
 331                 } else {
 332                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 333                 }
 334             } elsif ($scan) {
 335                     $results[$i] = $zconns[$i]->scan(  new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 336             } else {
 337                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 338             }
 339         };
 340         if ($@) {
 341             warn "WARNING: query problem with $query_to_use " . $@;
 342         }
 343
 344         # Concatenate the sort_by limits and pass them to the results object
 345         # Note: sort will override rank
 346         my $sort_by;
 347         foreach my $sort (@sort_by) {
 348             if ( $sort eq "author_az" ) {
 349                 $sort_by .= "1=1003 <i ";
 350             }
 351             elsif ( $sort eq "author_za" ) {
 352                 $sort_by .= "1=1003 >i ";
 353             }
 354             elsif ( $sort eq "popularity_asc" ) {
 355                 $sort_by .= "1=9003 <i ";
 356             }
 357             elsif ( $sort eq "popularity_dsc" ) {
 358                 $sort_by .= "1=9003 >i ";
 359             }
 360             elsif ( $sort eq "call_number_asc" ) {
 361                 $sort_by .= "1=20  <i ";
 362             }
 363             elsif ( $sort eq "call_number_dsc" ) {
 364                 $sort_by .= "1=20 >i ";
 365             }
 366             elsif ( $sort eq "pubdate_asc" ) {
 367                 $sort_by .= "1=31 <i ";
 368             }
 369             elsif ( $sort eq "pubdate_dsc" ) {
 370                 $sort_by .= "1=31 >i ";
 371             }
 372             elsif ( $sort eq "acqdate_asc" ) {
 373                 $sort_by .= "1=32 <i ";
 374             }
 375             elsif ( $sort eq "acqdate_dsc" ) {
 376                 $sort_by .= "1=32 >i ";
 377             }
 378             elsif ( $sort eq "title_az" ) {
 379                 $sort_by .= "1=4 <i ";
 380             }
 381             elsif ( $sort eq "title_za" ) {
 382                 $sort_by .= "1=4 >i ";
 383             }
 384             else {
 385                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 386             }
 387         }
 388         if ($sort_by) {
 389             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 390                 warn "WARNING sort $sort_by failed";
 391             }
 392         }
 393     }    # finished looping through servers
 394
 395     # The big moment: asynchronously retrieve results from all servers
 396     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 397         my $ev = $zconns[ $i - 1 ]->last_event();
 398         if ( $ev == ZOOM::Event::ZEND ) {
 399             next unless $results[ $i - 1 ];
 400             my $size = $results[ $i - 1 ]->size();
 401             if ( $size > 0 ) {
 402                 my $results_hash;
 403
 404                 # loop through the results
 405                 $results_hash->{'hits'} = $size;
 406                 my $times;
 407                 if ( $offset + $results_per_page <= $size ) {
 408                     $times = $offset + $results_per_page;
 409                 }
 410                 else {
 411                     $times = $size;
 412                 }
 413                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 414                     my $records_hash;
 415                     my $record;
 416                     my $facet_record;
 417
 418                     ## Check if it's an index scan
 419                     if ($scan) {
 420                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 421
 422                  # here we create a minimal MARC record and hand it off to the
 423                  # template just like a normal result ... perhaps not ideal, but
 424                  # it works for now
 425                         my $tmprecord = MARC::Record->new();
 426                         $tmprecord->encoding('UTF-8');
 427                         my $tmptitle;
 428                         my $tmpauthor;
 429
 430                 # the minimal record in author/title (depending on MARC flavour)
 431                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 432                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 433                             $tmprecord->append_fields($tmptitle);
 434                         } else {
 435                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 436                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 437                             $tmprecord->append_fields($tmptitle);
 438                             $tmprecord->append_fields($tmpauthor);
 439                         }
 440                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 441                     }
 442
 443                     # not an index scan
 444                     else {
 445                         $record = $results[ $i - 1 ]->record($j)->raw();
 446
 447                         # warn "RECORD $j:".$record;
 448                         $results_hash->{'RECORDS'}[$j] = $record;
 449
 450             # Fill the facets while we're looping, but only for the biblioserver
 451                         $facet_record = MARC::Record->new_from_usmarc($record)
 452                           if $servers[ $i - 1 ] =~ /biblioserver/;
 453
 454                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 455                         if ($facet_record) {
 456                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 457                                 ($facets->[$k]) or next;
 458                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 459                                 for my $field (@fields) {
 460                                     my @subfields = $field->subfields();
 461                                     for my $subfield (@subfields) {
 462                                         my ( $code, $data ) = @$subfield;
 463                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 464                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 465                                     }
 466                                 }
 467                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 468                                     $facets->[$k]->{'label_value'};
 469                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 470                                     $facets->[$k]->{'expanded'};
 471                             }
 472                         }
 473                     }
 474                 }
 475                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 476             }
 477
 478             # warn "connection ", $i-1, ": $size hits";
 479             # warn $results[$i-1]->record(0)->render() if $size > 0;
 480
 481             # BUILD FACETS
 482             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 483                 for my $link_value (
 484                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 485                         keys %$facets_counter )
 486                 {
 487                     my $expandable;
 488                     my $number_of_facets;
 489                     my @this_facets_array;
 490                     for my $one_facet (
 491                         sort {
 492                              $facets_counter->{$link_value}->{$b}
 493                          <=> $facets_counter->{$link_value}->{$a}
 494                         } keys %{ $facets_counter->{$link_value} }
 495                       )
 496                     {
 497                         $number_of_facets++;
 498                         if (   ( $number_of_facets < 6 )
 499                             || ( $expanded_facet eq $link_value )
 500                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 501                         {
 502
 503                       # Sanitize the link value ), ( will cause errors with CCL,
 504                             my $facet_link_value = $one_facet;
 505                             $facet_link_value =~ s/(\(|\))/ /g;
 506
 507                             # fix the length that will display in the label,
 508                             my $facet_label_value = $one_facet;
 509                             $facet_label_value =
 510                               substr( $one_facet, 0, 20 ) . "..."
 511                               unless length($facet_label_value) <= 20;
 512
 513                             # if it's a branch, label by the name, not the code,
 514                             if ( $link_value =~ /branch/ ) {
 515                                 $facet_label_value =
 516                                   $branches->{$one_facet}->{'branchname'};
 517                             }
 518
 519                             # but we're down with the whole label being in the link's title.
 520                             push @this_facets_array, {
 521                                 facet_count       => $facets_counter->{$link_value}->{$one_facet},
 522                                 facet_label_value => $facet_label_value,
 523                                 facet_title_value => $one_facet,
 524                                 facet_link_value  => $facet_link_value,
 525                                 type_link_value   => $link_value,
 526                             };
 527                         }
 528                     }
 529
 530                     # handle expanded option
 531                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 532                         $expandable = 1
 533                           if ( ( $number_of_facets > 6 )
 534                             && ( $expanded_facet ne $link_value ) );
 535                     }
 536                     push @facets_loop, {
 537                         type_link_value => $link_value,
 538                         type_id         => $link_value . "_id",
 539                         "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 540                         facets     => \@this_facets_array,
 541                         expandable => $expandable,
 542                         expand     => $link_value,
 543                     } unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 544                 }
 545             }
 546         }
 547     }
 548     return ( undef, $results_hashref, \@facets_loop );
 549 }
 550
 551 sub pazGetRecords {
 552     my (
 553         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 554         $results_per_page, $offset,       $expanded_facet, $branches,
 555         $query_type,       $scan
 556     ) = @_;
 557
 558     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 559     $paz->init();
 560     $paz->search($simple_query);
 561     sleep 1;
 562
 563     # do results
 564     my $results_hashref = {};
 565     my $stats = XMLin($paz->stat);
 566     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 567
 568     # for a grouped search result, the number of hits
 569     # is the number of groups returned; 'bib_hits' will have
 570     # the total number of bibs.
 571     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 572     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 573
 574     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 575         my $recid = $hit->{recid}->[0];
 576
 577         my $work_title = $hit->{'md-work-title'}->[0];
 578         my $work_author;
 579         if (exists $hit->{'md-work-author'}) {
 580             $work_author = $hit->{'md-work-author'}->[0];
 581         }
 582         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 583
 584         my $result_group = {};
 585         $result_group->{'group_label'} = $group_label;
 586         $result_group->{'group_merge_key'} = $recid;
 587
 588         my $count = 1;
 589         if (exists $hit->{count}) {
 590             $count = $hit->{count}->[0];
 591         }
 592         $result_group->{'group_count'} = $count;
 593
 594         for (my $i = 0; $i < $count; $i++) {
 595             # FIXME -- may need to worry about diacritics here
 596             my $rec = $paz->record($recid, $i);
 597             push @{ $result_group->{'RECORDS'} }, $rec;
 598         }
 599
 600         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 601     }
 602
 603     # pass through facets
 604     my $termlist_xml = $paz->termlist('author,subject');
 605     my $terms = XMLin($termlist_xml, forcearray => 1);
 606     my @facets_loop = ();
 607     #die Dumper($results);
 608 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 609 #        my @facets = ();
 610 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 611 #            push @facets, {
 612 #                facet_label_value => $facet->{'name'}->[0],
 613 #            };
 614 #        }
 615 #        push @facets_loop, ( {
 616 #            type_label => $list,
 617 #            facets => \@facets,
 618 #        } );
 619 #    }
 620
 621     return ( undef, $results_hashref, \@facets_loop );
 622 }
 623
 624 # STOPWORDS
 625 sub _remove_stopwords {
 626     my ( $operand, $index ) = @_;
 627     my @stopwords_removed;
 628
 629     # phrase and exact-qualified indexes shouldn't have stopwords removed
 630     if ( $index !~ m/phr|ext/ ) {
 631
 632 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 633 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 634 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 635 #       is a stopword, we'd get "çon" and wouldn't find anything...
 636                 foreach ( keys %{ C4::Context->stopwords } ) {
 637                         next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 638                         if ( my ($matched) = ($operand =~
 639                                 /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
 640                         {
 641                                 $operand =~ s/\Q$matched\E/ /gi;
 642                                 push @stopwords_removed, $_;
 643                         }
 644                 }
 645         }
 646     return ( $operand, \@stopwords_removed );
 647 }
 648
 649 # TRUNCATION
 650 sub _detect_truncation {
 651     my ( $operand, $index ) = @_;
 652     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 653         @regexpr );
 654     $operand =~ s/^ //g;
 655     my @wordlist = split( /\s/, $operand );
 656     foreach my $word (@wordlist) {
 657         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 658             push @rightlefttruncated, $word;
 659         }
 660         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 661             push @lefttruncated, $word;
 662         }
 663         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 664             push @righttruncated, $word;
 665         }
 666         elsif ( index( $word, "*" ) < 0 ) {
 667             push @nontruncated, $word;
 668         }
 669         else {
 670             push @regexpr, $word;
 671         }
 672     }
 673     return (
 674         \@nontruncated,       \@righttruncated, \@lefttruncated,
 675         \@rightlefttruncated, \@regexpr
 676     );
 677 }
 678
 679 # STEMMING
 680 sub _build_stemmed_operand {
 681     my ($operand) = @_;
 682     my $stemmed_operand;
 683
 684     # If operand contains a digit, it is almost certainly an identifier, and should
 685     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 686     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 687     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 688     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 689     return $operand if $operand =~ /\d/;
 690
 691 # FIXME: the locale should be set based on the user's language and/or search choice
 692     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 693
 694 # FIXME: these should be stored in the db so the librarian can modify the behavior
 695     $stemmer->add_exceptions(
 696         {
 697             'and' => 'and',
 698             'or'  => 'or',
 699             'not' => 'not',
 700         }
 701     );
 702     my @words = split( / /, $operand );
 703     my $stems = $stemmer->stem(@words);
 704     for my $stem (@$stems) {
 705         $stemmed_operand .= "$stem";
 706         $stemmed_operand .= "?"
 707           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 708         $stemmed_operand .= " ";
 709     }
 710     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 711     return $stemmed_operand;
 712 }
 713
 714 # FIELD WEIGHTING
 715 sub _build_weighted_query {
 716
 717 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 718 # pretty well but could work much better if we had a smarter query parser
 719     my ( $operand, $stemmed_operand, $index ) = @_;
 720     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 721     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 722     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 723
 724     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 725
 726     # Keyword, or, no index specified
 727     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 728         $weighted_query .=
 729           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 730         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 731         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 732           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 733           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 734         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 735           if $fuzzy_enabled;    # add fuzzy, word list
 736         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 737           if ( $stemming and $stemmed_operand )
 738           ;                     # add stemming, right truncation
 739         $weighted_query .= " or wrdl,r9=\"$operand\"";
 740
 741         # embedded sorting: 0 a-z; 1 z-a
 742         # $weighted_query .= ") or (sort1,aut=1";
 743     }
 744
 745     # Barcode searches should skip this process
 746     elsif ( $index eq 'bc' ) {
 747         $weighted_query .= "bc=\"$operand\"";
 748     }
 749
 750     # Authority-number searches should skip this process
 751     elsif ( $index eq 'an' ) {
 752         $weighted_query .= "an=\"$operand\"";
 753     }
 754
 755     # If the index already has more than one qualifier, wrap the operand
 756     # in quotes and pass it back (assumption is that the user knows what they
 757     # are doing and won't appreciate us mucking up their query
 758     elsif ( $index =~ ',' ) {
 759         $weighted_query .= " $index=\"$operand\"";
 760     }
 761
 762     #TODO: build better cases based on specific search indexes
 763     else {
 764         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 765           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 766         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 767         $weighted_query .=
 768           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 769     }
 770
 771     $weighted_query .= "))";                       # close rank specification
 772     return $weighted_query;
 773 }
 774
 775 =head2 getIndexes
 776
 777 Return an array with available indexes.
 778
 779 =cut
 780
 781 sub getIndexes{
 782     my @indexes = (
 783                     # biblio indexes
 784                     'ab',
 785                     'Abstract',
 786                     'acqdate',
 787                     'allrecords',
 788                     'an',
 789                     'Any',
 790                     'at',
 791                     'au',
 792                     'aub',
 793                     'aud',
 794                     'audience',
 795                     'auo',
 796                     'aut',
 797                     'Author',
 798                     'Author-in-order ',
 799                     'Author-personal-bibliography',
 800                     'Authority-Number',
 801                     'authtype',
 802                     'bc',
 803                     'biblionumber',
 804                     'bio',
 805                     'biography',
 806                     'callnum',
 807                     'cfn',
 808                     'Chronological-subdivision',
 809                     'cn-bib-source',
 810                     'cn-bib-sort',
 811                     'cn-class',
 812                     'cn-item',
 813                     'cn-prefix',
 814                     'cn-suffix',
 815                     'cpn',
 816                     'Code-institution',
 817                     'Conference-name',
 818                     'Conference-name-heading',
 819                     'Conference-name-see',
 820                     'Conference-name-seealso',
 821                     'Content-type',
 822                     'Control-number',
 823                     'copydate',
 824                     'Corporate-name',
 825                     'Corporate-name-heading',
 826                     'Corporate-name-see',
 827                     'Corporate-name-seealso',
 828                     'ctype',
 829                     'date-entered-on-file',
 830                     'Date-of-acquisition',
 831                     'Date-of-publication',
 832                     'Dewey-classification',
 833                     'extent',
 834                     'fic',
 835                     'fiction',
 836                     'Form-subdivision',
 837                     'format',
 838                     'Geographic-subdivision',
 839                     'he',
 840                     'Heading',
 841                     'Heading-use-main-or-added-entry',
 842                     'Heading-use-series-added-entry ',
 843                     'Heading-use-subject-added-entry',
 844                     'Host-item',
 845                     'id-other',
 846                     'Illustration-code',
 847                     'ISBN',
 848                     'ISSN',
 849                     'itemtype',
 850                     'kw',
 851                     'Koha-Auth-Number',
 852                     'l-format',
 853                     'language',
 854                     'lc-card',
 855                     'LC-card-number',
 856                     'lcn',
 857                     'llength',
 858                     'ln',
 859                     'Local-classification',
 860                     'Local-number',
 861                     'Match-heading',
 862                     'Match-heading-see-from',
 863                     'Material-type',
 864                     'mc-itemtype',
 865                     'mc-rtype',
 866                     'mus',
 867                     'Name-geographic',
 868                     'Name-geographic-heading',
 869                     'Name-geographic-see',
 870                     'Name-geographic-seealso',
 871                     'nb',
 872                     'Note',
 873                     'ns',
 874                     'nt',
 875                     'pb',
 876                     'Personal-name',
 877                     'Personal-name-heading',
 878                     'Personal-name-see',
 879                     'Personal-name-seealso',
 880                     'pl',
 881                     'Place-publication',
 882                     'pn',
 883                     'popularity',
 884                     'pubdate',
 885                     'Publisher',
 886                     'Record-type',
 887                     'rtype',
 888                     'se',
 889                     'See',
 890                     'See-also',
 891                     'sn',
 892                     'Stock-number',
 893                     'su',
 894                     'Subject',
 895                     'Subject-heading-thesaurus',
 896                     'Subject-name-personal',
 897                     'Subject-subdivision',
 898                     'Summary',
 899                     'Suppress',
 900                     'su-geo',
 901                     'su-na',
 902                     'su-to',
 903                     'su-ut',
 904                     'ut',
 905                     'Term-genre-form',
 906                     'Term-genre-form-heading',
 907                     'Term-genre-form-see',
 908                     'Term-genre-form-seealso',
 909                     'ti',
 910                     'Title',
 911                     'Title-cover',
 912                     'Title-series',
 913                     'Title-uniform',
 914                     'Title-uniform-heading',
 915                     'Title-uniform-see',
 916                     'Title-uniform-seealso',
 917                     'totalissues',
 918                     'yr',
 919
 920                     # items indexes
 921                     'acqsource',
 922                     'barcode',
 923                     'bc',
 924                     'branch',
 925                     'ccode',
 926                     'classification-source',
 927                     'cn-sort',
 928                     'coded-location-qualifier',
 929                     'copynumber',
 930                     'damaged',
 931                     'datelastborrowed',
 932                     'datelastseen',
 933                     'holdingbranch',
 934                     'homebranch',
 935                     'issues',
 936                     'itemnumber',
 937                     'itype',
 938                     'Local-classification',
 939                     'location',
 940                     'lost',
 941                     'materials-specified',
 942                     'mc-ccode',
 943                     'mc-itype',
 944                     'mc-loc',
 945                     'notforloan',
 946                     'onloan',
 947                     'price',
 948                     'renewals',
 949                     'replacementprice',
 950                     'replacementpricedate',
 951                     'reserves',
 952                     'restricted',
 953                     'stack',
 954                     'uri',
 955                     'withdrawn',
 956
 957                     # subject related
 958                   );
 959
 960     return \@indexes;
 961 }
 962
 963 =head2 buildQuery
 964
 965 ( $error, $query,
 966 $simple_query, $query_cgi,
 967 $query_desc, $limit,
 968 $limit_cgi, $limit_desc,
 969 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 970
 971 Build queries and limits in CCL, CGI, Human,
 972 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 973
 974 See verbose embedded documentation.
 975
 976
 977 =cut
 978
 979 sub buildQuery {
 980     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 981
 982     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 983
 984     # dereference
 985     my @operators = $operators ? @$operators : ();
 986     my @indexes   = $indexes   ? @$indexes   : ();
 987     my @operands  = $operands  ? @$operands  : ();
 988     my @limits    = $limits    ? @$limits    : ();
 989     my @sort_by   = $sort_by   ? @$sort_by   : ();
 990
 991     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 992     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 993     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 994     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 995     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 996
 997     # no stemming/weight/fuzzy in NoZebra
 998     if ( C4::Context->preference("NoZebra") ) {
 999         $stemming      = 0;
1000         $weight_fields = 0;
1001         $fuzzy_enabled = 0;
1002     }
1003
1004     my $query        = $operands[0];
1005     my $simple_query = $operands[0];
1006
1007     # initialize the variables we're passing back
1008     my $query_cgi;
1009     my $query_desc;
1010     my $query_type;
1011
1012     my $limit;
1013     my $limit_cgi;
1014     my $limit_desc;
1015
1016     my $stopwords_removed;    # flag to determine if stopwords have been removed
1017
1018     my $cclq;
1019     my $cclindexes = getIndexes();
1020     if( $query !~ /\s*ccl=/ ){
1021         for my $index (@$cclindexes){
1022             if($query =~ /($index)(,?\w)*:/){
1023                 $cclq = 1;
1024             }
1025         }
1026         $query = "ccl=$query" if($cclq);
1027     }
1028
1029 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1030 # DIAGNOSTIC ONLY!!
1031     if ( $query =~ /^ccl=/ ) {
1032         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
1033     }
1034     if ( $query =~ /^cql=/ ) {
1035         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
1036     }
1037     if ( $query =~ /^pqf=/ ) {
1038         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
1039     }
1040
1041     # pass nested queries directly
1042     # FIXME: need better handling of some of these variables in this case
1043     if ( $query =~ /(\(|\))/ ) {
1044         return (
1045             undef,              $query, $simple_query, $query_cgi,
1046             $query,             $limit, $limit_cgi,    $limit_desc,
1047             $stopwords_removed, 'ccl'
1048         );
1049     }
1050
1051 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1052 # query operands and indexes and add stemming, truncation, field weighting, etc.
1053 # Once we do so, we'll end up with a value in $query, just like if we had an
1054 # incoming $query from the user
1055     else {
1056         $query = ""
1057           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1058         my $previous_operand
1059           ;    # a flag used to keep track if there was a previous query
1060                # if there was, we can apply the current operator
1061                # for every operand
1062         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1063
1064             # COMBINE OPERANDS, INDEXES AND OPERATORS
1065             if ( $operands[$i] ) {
1066
1067               # A flag to determine whether or not to add the index to the query
1068                 my $indexes_set;
1069
1070 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
1071                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
1072                     $weight_fields    = 0;
1073                     $stemming         = 0;
1074                     $remove_stopwords = 0;
1075                 }
1076                 my $operand = $operands[$i];
1077                 my $index   = $indexes[$i];
1078
1079                 # Add index-specific attributes
1080                 # Date of Publication
1081                 if ( $index eq 'yr' ) {
1082                     $index .= ",st-numeric";
1083 #                     $indexes_set++;
1084                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
1085                 }
1086
1087                 # Date of Acquisition
1088                 elsif ( $index eq 'acqdate' ) {
1089                     $index .= ",st-date-normalized";
1090 #                     $indexes_set++;
1091                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
1092                 }
1093                 # ISBN,ISSN,Standard Number, don't need special treatment
1094                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
1095 #                     $indexes_set++;
1096                     (
1097                         $stemming,      $auto_truncation,
1098                         $weight_fields, $fuzzy_enabled,
1099                         $remove_stopwords
1100                     ) = ( 0, 0, 0, 0, 0 );
1101
1102                 }
1103
1104                 if(not $index){
1105                     $index = 'kw';
1106                 }
1107
1108                 # Set default structure attribute (word list)
1109                 my $struct_attr;
1110                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
1111                     $struct_attr = ",wrdl";
1112                 }
1113
1114                 # Some helpful index variants
1115                 my $index_plus       = $index . $struct_attr . ":" if $index;
1116                 my $index_plus_comma = $index . $struct_attr . "," if $index;
1117
1118                 # Remove Stopwords
1119                 if ($remove_stopwords) {
1120                     ( $operand, $stopwords_removed ) =
1121                       _remove_stopwords( $operand, $index );
1122                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
1123                     warn "REMOVED STOPWORDS: @$stopwords_removed"
1124                       if ( $stopwords_removed && $DEBUG );
1125                 }
1126
1127                 if ($auto_truncation){
1128                                         #FIXME only valid with LTR scripts
1129                                         $operand=join(" ",map{
1130                                                                                         "$_*"
1131                                                                              }split (/\s+/,$operand));
1132                         warn $operand if $DEBUG;
1133                                 }
1134
1135                 # Detect Truncation
1136                 my $truncated_operand;
1137                 my( $nontruncated, $righttruncated, $lefttruncated,
1138                     $rightlefttruncated, $regexpr
1139                 ) = _detect_truncation( $operand, $index );
1140                 warn
1141 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1142                   if $DEBUG;
1143
1144                 # Apply Truncation
1145                 if (
1146                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1147                     scalar(@$rightlefttruncated) > 0 )
1148                 {
1149
1150                # Don't field weight or add the index to the query, we do it here
1151                     $indexes_set = 1;
1152                     undef $weight_fields;
1153                     my $previous_truncation_operand;
1154                     if (scalar @$nontruncated) {
1155                         $truncated_operand .= "$index_plus @$nontruncated ";
1156                         $previous_truncation_operand = 1;
1157                     }
1158                     if (scalar @$righttruncated) {
1159                         $truncated_operand .= "and " if $previous_truncation_operand;
1160                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1161                         $previous_truncation_operand = 1;
1162                     }
1163                     if (scalar @$lefttruncated) {
1164                         $truncated_operand .= "and " if $previous_truncation_operand;
1165                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1166                         $previous_truncation_operand = 1;
1167                     }
1168                     if (scalar @$rightlefttruncated) {
1169                         $truncated_operand .= "and " if $previous_truncation_operand;
1170                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1171                         $previous_truncation_operand = 1;
1172                     }
1173                 }
1174                 $operand = $truncated_operand if $truncated_operand;
1175                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1176
1177                 # Handle Stemming
1178                 my $stemmed_operand;
1179                 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
1180
1181                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1182
1183                 # Handle Field Weighting
1184                 my $weighted_operand;
1185                 if ($weight_fields) {
1186                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1187                     $operand = $weighted_operand;
1188                     $indexes_set = 1;
1189                 }
1190
1191                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1192
1193                 # If there's a previous operand, we need to add an operator
1194                 if ($previous_operand) {
1195
1196                     # User-specified operator
1197                     if ( $operators[ $i - 1 ] ) {
1198                         $query     .= " $operators[$i-1] ";
1199                         $query     .= " $index_plus " unless $indexes_set;
1200                         $query     .= " $operand";
1201                         $query_cgi .= "&op=$operators[$i-1]";
1202                         $query_cgi .= "&idx=$index" if $index;
1203                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1204                         $query_desc .=
1205                           " $operators[$i-1] $index_plus $operands[$i]";
1206                     }
1207
1208                     # Default operator is and
1209                     else {
1210                         $query      .= " and ";
1211                         $query      .= "$index_plus " unless $indexes_set;
1212                         $query      .= "$operand";
1213                         $query_cgi  .= "&op=and&idx=$index" if $index;
1214                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1215                         $query_desc .= " and $index_plus $operands[$i]";
1216                     }
1217                 }
1218
1219                 # There isn't a pervious operand, don't need an operator
1220                 else {
1221
1222                     # Field-weighted queries already have indexes set
1223                     $query .= " $index_plus " unless $indexes_set;
1224                     $query .= $operand;
1225                     $query_desc .= " $index_plus $operands[$i]";
1226                     $query_cgi  .= "&idx=$index" if $index;
1227                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1228                     $previous_operand = 1;
1229                 }
1230             }    #/if $operands
1231         }    # /for
1232     }
1233     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1234
1235     # add limits
1236     my $group_OR_limits;
1237     my $availability_limit;
1238     foreach my $this_limit (@limits) {
1239         if ( $this_limit =~ /available/ ) {
1240
1241 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1242 # In English:
1243 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1244             $availability_limit .=
1245 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1246             $limit_cgi  .= "&limit=available";
1247             $limit_desc .= "";
1248         }
1249
1250         # group_OR_limits, prefixed by mc-
1251         # OR every member of the group
1252         elsif ( $this_limit =~ /mc/ ) {
1253             $group_OR_limits .= " or " if $group_OR_limits;
1254             $limit_desc      .= " or " if $group_OR_limits;
1255             $group_OR_limits .= "$this_limit";
1256             $limit_cgi       .= "&limit=$this_limit";
1257             $limit_desc      .= " $this_limit";
1258         }
1259
1260         # Regular old limits
1261         else {
1262             $limit .= " and " if $limit || $query;
1263             $limit      .= "$this_limit";
1264             $limit_cgi  .= "&limit=$this_limit";
1265             if ($this_limit =~ /^branch:(.+)/) {
1266                 my $branchcode = $1;
1267                 my $branchname = GetBranchName($branchcode);
1268                 if (defined $branchname) {
1269                     $limit_desc .= " branch:$branchname";
1270                 } else {
1271                     $limit_desc .= " $this_limit";
1272                 }
1273             } else {
1274                 $limit_desc .= " $this_limit";
1275             }
1276         }
1277     }
1278     if ($group_OR_limits) {
1279         $limit .= " and " if ( $query || $limit );
1280         $limit .= "($group_OR_limits)";
1281     }
1282     if ($availability_limit) {
1283         $limit .= " and " if ( $query || $limit );
1284         $limit .= "($availability_limit)";
1285     }
1286
1287     # Normalize the query and limit strings
1288     $query =~ s/:/=/g;
1289     $limit =~ s/:/=/g;
1290     for ( $query, $query_desc, $limit, $limit_desc ) {
1291         s/  / /g;    # remove extra spaces
1292         s/^ //g;     # remove any beginning spaces
1293         s/ $//g;     # remove any ending spaces
1294         s/==/=/g;    # remove double == from query
1295     }
1296     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1297
1298     for ($query_cgi,$simple_query) {
1299         s/"//g;
1300     }
1301     # append the limit to the query
1302     $query .= " " . $limit;
1303
1304     # Warnings if DEBUG
1305     if ($DEBUG) {
1306         warn "QUERY:" . $query;
1307         warn "QUERY CGI:" . $query_cgi;
1308         warn "QUERY DESC:" . $query_desc;
1309         warn "LIMIT:" . $limit;
1310         warn "LIMIT CGI:" . $limit_cgi;
1311         warn "LIMIT DESC:" . $limit_desc;
1312         warn "---------\nLeave buildQuery\n---------";
1313     }
1314     return (
1315         undef,              $query, $simple_query, $query_cgi,
1316         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1317         $stopwords_removed, $query_type
1318     );
1319 }
1320
1321 =head2 searchResults
1322
1323 Format results in a form suitable for passing to the template
1324
1325 =cut
1326
1327 # IMO this subroutine is pretty messy still -- it's responsible for
1328 # building the HTML output for the template
1329 sub searchResults {
1330     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1331     my $dbh = C4::Context->dbh;
1332     my @newresults;
1333
1334     #Build branchnames hash
1335     #find branchname
1336     #get branch information.....
1337     my %branches;
1338     my $bsth =$dbh->prepare("SELECT branchcode,branchname FROM branches"); # FIXME : use C4::Branch::GetBranches
1339     $bsth->execute();
1340     while ( my $bdata = $bsth->fetchrow_hashref ) {
1341         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1342     }
1343 # FIXME - We build an authorised values hash here, using the default framework
1344 # though it is possible to have different authvals for different fws.
1345
1346     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1347
1348     # get notforloan authorised value list (see $shelflocations  FIXME)
1349     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1350
1351     #Build itemtype hash
1352     #find itemtype & itemtype image
1353     my %itemtypes;
1354     $bsth =
1355       $dbh->prepare(
1356         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1357       );
1358     $bsth->execute();
1359     while ( my $bdata = $bsth->fetchrow_hashref ) {
1360                 foreach (qw(description imageurl summary notforloan)) {
1361                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1362                 }
1363     }
1364
1365     #search item field code
1366     my $sth =
1367       $dbh->prepare(
1368 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1369       );
1370     $sth->execute;
1371     my ($itemtag) = $sth->fetchrow;
1372
1373     ## find column names of items related to MARC
1374     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1375     $sth2->execute;
1376     my %subfieldstosearch;
1377     while ( ( my $column ) = $sth2->fetchrow ) {
1378         my ( $tagfield, $tagsubfield ) =
1379           &GetMarcFromKohaField( "items." . $column, "" );
1380         $subfieldstosearch{$column} = $tagsubfield;
1381     }
1382
1383     # handle which records to actually retrieve
1384     my $times;
1385     if ( $hits && $offset + $results_per_page <= $hits ) {
1386         $times = $offset + $results_per_page;
1387     }
1388     else {
1389         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1390     }
1391     my $marcflavour = C4::Context->preference("marcflavour");
1392
1393     # We get the biblionumber position in MARC
1394     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField('biblio.biblionumber','');
1395     my $fw = '';
1396
1397     # loop through all of the records we've retrieved
1398     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1399         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1400
1401         if(not $scan){
1402             if ($bibliotag<10){
1403                 $biblionumber = $marcrecord->field($bibliotag)->data;
1404             }else{
1405                 $biblionumber = $marcrecord->subfield($bibliotag,$bibliosubf);
1406             }
1407             $fw = GetFrameworkCode($biblionumber);
1408         }
1409
1410         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw );
1411         $oldbiblio->{subtitle} = GetRecordValue('subtitle', $marcrecord, $fw);
1412         $oldbiblio->{result_number} = $i + 1;
1413
1414         # add imageurl to itemtype if there is one
1415         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1416
1417         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1418         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1419         $aisbn =~ s/-//g;
1420         $oldbiblio->{amazonisbn} = $aisbn;
1421         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1422         $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1423         $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1424         $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1425         $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1426         $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1427         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1428         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1429  # Build summary if there is one (the summary is defined in the itemtypes table)
1430  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1431         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1432             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1433             my @fields  = $marcrecord->fields();
1434             foreach my $field (@fields) {
1435                 my $tag      = $field->tag();
1436                 my $tagvalue = $field->as_string();
1437                 if (! utf8::is_utf8($tagvalue)) {
1438                     utf8::decode($tagvalue);
1439                 }
1440
1441                 $summary =~
1442                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1443                 unless ( $tag < 10 ) {
1444                     my @subf = $field->subfields;
1445                     for my $i ( 0 .. $#subf ) {
1446                         my $subfieldcode  = $subf[$i][0];
1447                         my $subfieldvalue = $subf[$i][1];
1448                         if (! utf8::is_utf8($subfieldvalue)) {
1449                             utf8::decode($subfieldvalue);
1450                         }
1451                         my $tagsubf       = $tag . $subfieldcode;
1452                         $summary =~
1453 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1454                     }
1455                 }
1456             }
1457             # FIXME: yuk
1458             $summary =~ s/\[(.*?)]//g;
1459             $summary =~ s/\n/<br\/>/g;
1460             $oldbiblio->{summary} = $summary;
1461         }
1462
1463         # Pull out the items fields
1464         my @fields = $marcrecord->field($itemtag);
1465
1466         # Setting item statuses for display
1467         my @available_items_loop;
1468         my @onloan_items_loop;
1469         my @notforloan_items_loop;
1470         my @other_items_loop;
1471
1472         my $available_items;
1473         my $onloan_items;
1474         my $notforloan_items;
1475         my $other_items;
1476
1477         my $ordered_count         = 0;
1478         my $available_count       = 0;
1479         my $onloan_count          = 0;
1480         my $notforloan_count      = 0;
1481         my $longoverdue_count     = 0;
1482         my $other_count           = 0;
1483         my $wthdrawn_count        = 0;
1484         my $itemlost_count        = 0;
1485         my $itembinding_count     = 0;
1486         my $itemdamaged_count     = 0;
1487         my $item_in_transit_count = 0;
1488         my $can_place_holds       = 0;
1489         my $items_count           = scalar(@fields);
1490         my $maxitems =
1491           ( C4::Context->preference('maxItemsinSearchResults') )
1492           ? C4::Context->preference('maxItemsinSearchResults') - 1
1493           : 1;
1494
1495         # loop through every item
1496         foreach my $field (@fields) {
1497             my $item;
1498
1499             # populate the items hash
1500             foreach my $code ( keys %subfieldstosearch ) {
1501                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1502             }
1503                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1504                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1505             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1506             if ($item->{$hbranch}) {
1507                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1508             }
1509             elsif ($item->{$otherbranch}) {     # Last resort
1510                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1511             }
1512
1513             ($item->{'reserved'}) = C4::Reserves::CheckReserves($item->{itemnumber});
1514
1515                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1516 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1517             if ( $item->{onloan} or $item->{reserved} ) {
1518                 $onloan_count++;
1519                                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1520                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1521                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1522                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1523                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1524                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1525                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1526                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1527                 # if something's checked out and lost, mark it as 'long overdue'
1528                 if ( $item->{itemlost} ) {
1529                     $onloan_items->{$prefix}->{longoverdue}++;
1530                     $longoverdue_count++;
1531                 } else {        # can place holds as long as item isn't lost
1532                     $can_place_holds = 1;
1533                 }
1534             }
1535
1536          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1537             else {
1538
1539                 # item is on order
1540                 if ( $item->{notforloan} == -1 ) {
1541                     $ordered_count++;
1542                 }
1543
1544                 # is item in transit?
1545                 my $transfertwhen = '';
1546                 my ($transfertfrom, $transfertto);
1547
1548                 unless ($item->{wthdrawn}
1549                         || $item->{itemlost}
1550                         || $item->{damaged}
1551                         || $item->{notforloan}
1552                         || $items_count > 20) {
1553
1554                     # A couple heuristics to limit how many times
1555                     # we query the database for item transfer information, sacrificing
1556                     # accuracy in some cases for speed;
1557                     #
1558                     # 1. don't query if item has one of the other statuses
1559                     # 2. don't check transit status if the bib has
1560                     #    more than 20 items
1561                     #
1562                     # FIXME: to avoid having the query the database like this, and to make
1563                     #        the in transit status count as unavailable for search limiting,
1564                     #        should map transit status to record indexed in Zebra.
1565                     #
1566                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1567                 }
1568
1569                 # item is withdrawn, lost or damaged
1570                 if (   $item->{wthdrawn}
1571                     || $item->{itemlost}
1572                     || $item->{damaged}
1573                     || $item->{notforloan}
1574                     || $item->{reserved}
1575                     || ($transfertwhen ne ''))
1576                 {
1577                     $wthdrawn_count++        if $item->{wthdrawn};
1578                     $itemlost_count++        if $item->{itemlost};
1579                     $itemdamaged_count++     if $item->{damaged};
1580                     $item_in_transit_count++ if $transfertwhen ne '';
1581                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1582
1583                                         my $key = $prefix . $item->{status};
1584
1585                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1586                                             if($item->{notforloan} == 1){
1587                                                 $notforloan_items->{$key}->{$_} = $item->{$_};
1588                                             }else{
1589                            $other_items->{$key}->{$_} = $item->{$_};
1590                                             }
1591                                         }
1592                                         if($item->{notforloan} == 1){
1593                         $notforloan_count++;
1594
1595                         $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1596                                         $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1597                                         $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1598                                         $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1599                                         $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1600                                         $notforloan_items->{$key}->{barcode} = $item->{barcode};
1601                     }else{
1602                         $other_count++;
1603
1604                         $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1605                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1606                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1607                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1608                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1609                                         $other_items->{$key}->{barcode} = $item->{barcode};
1610                     }
1611
1612                 }
1613                 # item is available
1614                 else {
1615                     $can_place_holds = 1;
1616                     $available_count++;
1617                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1618                                         foreach (qw(branchname itemcallnumber barcode)) {
1619                         $available_items->{$prefix}->{$_} = $item->{$_};
1620                                         }
1621                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1622                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1623                 }
1624             }
1625         }    # notforloan, item level and biblioitem level
1626         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1627         $maxitems =
1628           ( C4::Context->preference('maxItemsinSearchResults') )
1629           ? C4::Context->preference('maxItemsinSearchResults') - 1
1630           : 1;
1631         for my $key ( sort keys %$onloan_items ) {
1632             (++$onloanitemscount > $maxitems) and last;
1633             push @onloan_items_loop, $onloan_items->{$key};
1634         }
1635         for my $key ( sort keys %$other_items ) {
1636             (++$otheritemscount > $maxitems) and last;
1637             push @other_items_loop, $other_items->{$key};
1638         }
1639         for my $key ( sort keys %$notforloan_items ) {
1640             (++$notforloanitemscount > $maxitems) and last;
1641             push @notforloan_items_loop, $notforloan_items->{$key};
1642         }
1643         for my $key ( sort keys %$available_items ) {
1644             (++$availableitemscount > $maxitems) and last;
1645             push @available_items_loop, $available_items->{$key}
1646         }
1647
1648         # XSLT processing of some stuff
1649         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1650             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1651                 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1652         }
1653
1654         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1655         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1656         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1657         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1658         $oldbiblio->{items_count}          = $items_count;
1659         $oldbiblio->{available_items_loop} = \@available_items_loop;
1660         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1661         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1662         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1663         $oldbiblio->{availablecount}       = $available_count;
1664         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1665         $oldbiblio->{onloancount}          = $onloan_count;
1666         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1667         $oldbiblio->{notforloancount}      = $notforloan_count;
1668         $oldbiblio->{othercount}           = $other_count;
1669         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1670         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1671         $oldbiblio->{itemlostcount}        = $itemlost_count;
1672         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1673         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1674         $oldbiblio->{orderedcount}         = $ordered_count;
1675         $oldbiblio->{isbn} =~
1676           s/-//g;    # deleting - in isbn to enable amazon content
1677         push( @newresults, $oldbiblio );
1678     }
1679     return @newresults;
1680 }
1681
1682 #----------------------------------------------------------------------
1683 #
1684 # Non-Zebra GetRecords#
1685 #----------------------------------------------------------------------
1686
1687 =head2 NZgetRecords
1688
1689   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1690
1691 =cut
1692
1693 sub NZgetRecords {
1694     my (
1695         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1696         $results_per_page, $offset,       $expanded_facet, $branches,
1697         $query_type,       $scan
1698     ) = @_;
1699     warn "query =$query" if $DEBUG;
1700     my $result = NZanalyse($query);
1701     warn "results =$result" if $DEBUG;
1702     return ( undef,
1703         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1704         undef );
1705 }
1706
1707 =head2 NZanalyse
1708
1709   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1710   the list is built from an inverted index in the nozebra SQL table
1711   note that title is here only for convenience : the sorting will be very fast when requested on title
1712   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1713
1714 =cut
1715
1716 sub NZanalyse {
1717     my ( $string, $server ) = @_;
1718 #     warn "---------"       if $DEBUG;
1719     warn " NZanalyse" if $DEBUG;
1720 #     warn "---------"       if $DEBUG;
1721
1722  # $server contains biblioserver or authorities, depending on what we search on.
1723  #warn "querying : $string on $server";
1724     $server = 'biblioserver' unless $server;
1725
1726 # if we have a ", replace the content to discard temporarily any and/or/not inside
1727     my $commacontent;
1728     if ( $string =~ /"/ ) {
1729         $string =~ s/"(.*?)"/__X__/;
1730         $commacontent = $1;
1731         warn "commacontent : $commacontent" if $DEBUG;
1732     }
1733
1734 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1735 # then, call again NZanalyse with $left and $right
1736 # (recursive until we find a leaf (=> something without and/or/not)
1737 # delete repeated operator... Would then go in infinite loop
1738     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1739     }
1740
1741     #process parenthesis before.
1742     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1743         my $left     = $1;
1744         my $right    = $4;
1745         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1746         warn
1747 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1748           if $DEBUG;
1749         my $leftresult = NZanalyse( $left, $server );
1750         if ($operator) {
1751             my $rightresult = NZanalyse( $right, $server );
1752
1753             # OK, we have the results for right and left part of the query
1754             # depending of operand, intersect, union or exclude both lists
1755             # to get a result list
1756             if ( $operator eq ' and ' ) {
1757                 return NZoperatorAND($leftresult,$rightresult);
1758             }
1759             elsif ( $operator eq ' or ' ) {
1760
1761                 # just merge the 2 strings
1762                 return $leftresult . $rightresult;
1763             }
1764             elsif ( $operator eq ' not ' ) {
1765                 return NZoperatorNOT($leftresult,$rightresult);
1766             }
1767         }
1768         else {
1769 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1770             return $leftresult;
1771         }
1772     }
1773     warn "string :" . $string if $DEBUG;
1774     my $left = "";
1775     my $right = "";
1776     my $operator = "";
1777     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1778         $left     = $1;
1779         $right    = $3;
1780         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1781     }
1782     warn "no parenthesis. left : $left operator: $operator right: $right"
1783       if $DEBUG;
1784
1785     # it's not a leaf, we have a and/or/not
1786     if ($operator) {
1787
1788         # reintroduce comma content if needed
1789         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1790         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1791         warn "node : $left / $operator / $right\n" if $DEBUG;
1792         my $leftresult  = NZanalyse( $left,  $server );
1793         my $rightresult = NZanalyse( $right, $server );
1794         warn " leftresult : $leftresult" if $DEBUG;
1795         warn " rightresult : $rightresult" if $DEBUG;
1796         # OK, we have the results for right and left part of the query
1797         # depending of operand, intersect, union or exclude both lists
1798         # to get a result list
1799         if ( $operator eq ' and ' ) {
1800             warn "NZAND";
1801             return NZoperatorAND($leftresult,$rightresult);
1802         }
1803         elsif ( $operator eq ' or ' ) {
1804
1805             # just merge the 2 strings
1806             return $leftresult . $rightresult;
1807         }
1808         elsif ( $operator eq ' not ' ) {
1809             return NZoperatorNOT($leftresult,$rightresult);
1810         }
1811         else {
1812
1813 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1814             die "error : operand unknown : $operator for $string";
1815         }
1816
1817         # it's a leaf, do the real SQL query and return the result
1818     }
1819     else {
1820         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1821         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1822         #remove trailing blank at the beginning
1823         $string =~ s/^ //g;
1824         warn "leaf:$string" if $DEBUG;
1825
1826         # parse the string in in operator/operand/value again
1827         my $left = "";
1828         my $operator = "";
1829         my $right = "";
1830         if ($string =~ /(.*)(>=|<=)(.*)/) {
1831             $left     = $1;
1832             $operator = $2;
1833             $right    = $3;
1834         } else {
1835             $left = $string;
1836         }
1837 #         warn "handling leaf... left:$left operator:$operator right:$right"
1838 #           if $DEBUG;
1839         unless ($operator) {
1840             if ($string =~ /(.*)(>|<|=)(.*)/) {
1841                 $left     = $1;
1842                 $operator = $2;
1843                 $right    = $3;
1844                 warn
1845     "handling unless (operator)... left:$left operator:$operator right:$right"
1846                 if $DEBUG;
1847             } else {
1848                 $left = $string;
1849             }
1850         }
1851         my $results;
1852
1853 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1854         $left =~ s/ .*$//;
1855
1856         # automatic replace for short operators
1857         $left = 'title'            if $left =~ '^ti$';
1858         $left = 'author'           if $left =~ '^au$';
1859         $left = 'publisher'        if $left =~ '^pb$';
1860         $left = 'subject'          if $left =~ '^su$';
1861         $left = 'koha-Auth-Number' if $left =~ '^an$';
1862         $left = 'keyword'          if $left =~ '^kw$';
1863         $left = 'itemtype'         if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1864         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1865         my $dbh = C4::Context->dbh;
1866         if ( $operator && $left ne 'keyword' ) {
1867             #do a specific search
1868             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1869             my $sth = $dbh->prepare(
1870 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1871             );
1872             warn "$left / $operator / $right\n" if $DEBUG;
1873
1874             # split each word, query the DB and build the biblionumbers result
1875             #sanitizing leftpart
1876             $left =~ s/^\s+|\s+$//;
1877             foreach ( split / /, $right ) {
1878                 my $biblionumbers;
1879                 $_ =~ s/^\s+|\s+$//;
1880                 next unless $_;
1881                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1882                 $sth->execute( $server, $left, $_ )
1883                   or warn "execute failed: $!";
1884                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1885
1886 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1887 # otherwise, fill the result
1888                     $biblionumbers .= $line
1889                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1890                     warn "result : $value "
1891                       . ( $right  =~ /\d/ ) . "=="
1892                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1893                 }
1894
1895 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1896                 if ($results) {
1897                     warn "NZAND" if $DEBUG;
1898                     $results = NZoperatorAND($biblionumbers,$results);
1899                 } else {
1900                     $results = $biblionumbers;
1901                 }
1902             }
1903         }
1904         else {
1905       #do a complete search (all indexes), if index='kw' do complete search too.
1906             my $sth = $dbh->prepare(
1907 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1908             );
1909
1910             # split each word, query the DB and build the biblionumbers result
1911             foreach ( split / /, $string ) {
1912                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1913                 warn "search on all indexes on $_" if $DEBUG;
1914                 my $biblionumbers;
1915                 next unless $_;
1916                 $sth->execute( $server, $_ );
1917                 while ( my $line = $sth->fetchrow ) {
1918                     $biblionumbers .= $line;
1919                 }
1920
1921 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1922                 if ($results) {
1923                     $results = NZoperatorAND($biblionumbers,$results);
1924                 }
1925                 else {
1926                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1927                     $results = $biblionumbers;
1928                 }
1929             }
1930         }
1931         warn "return : $results for LEAF : $string" if $DEBUG;
1932         return $results;
1933     }
1934     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1935 }
1936
1937 sub NZoperatorAND{
1938     my ($rightresult, $leftresult)=@_;
1939
1940     my @leftresult = split /;/, $leftresult;
1941     warn " @leftresult / $rightresult \n" if $DEBUG;
1942
1943     #             my @rightresult = split /;/,$leftresult;
1944     my $finalresult;
1945
1946 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1947 # the result is stored twice, to have the same weight for AND than OR.
1948 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1949 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1950     foreach (@leftresult) {
1951         my $value = $_;
1952         my $countvalue;
1953         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1954         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1955             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1956             $finalresult .=
1957                 "$value-$countvalue;$value-$countvalue;";
1958         }
1959     }
1960     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1961     return $finalresult;
1962 }
1963
1964 sub NZoperatorOR{
1965     my ($rightresult, $leftresult)=@_;
1966     return $rightresult.$leftresult;
1967 }
1968
1969 sub NZoperatorNOT{
1970     my ($leftresult, $rightresult)=@_;
1971
1972     my @leftresult = split /;/, $leftresult;
1973
1974     #             my @rightresult = split /;/,$leftresult;
1975     my $finalresult;
1976     foreach (@leftresult) {
1977         my $value=$_;
1978         $value=$1 if $value=~m/(.*)-\d+$/;
1979         unless ($rightresult =~ "$value-") {
1980             $finalresult .= "$_;";
1981         }
1982     }
1983     return $finalresult;
1984 }
1985
1986 =head2 NZorder
1987
1988   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1989
1990   TODO :: Description
1991
1992 =cut
1993
1994 sub NZorder {
1995     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1996     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1997
1998     # order title asc by default
1999     #     $ordering = '1=36 <i' unless $ordering;
2000     $results_per_page = 20 unless $results_per_page;
2001     $offset           = 0  unless $offset;
2002     my $dbh = C4::Context->dbh;
2003
2004     #
2005     # order by POPULARITY
2006     #
2007     if ( $ordering =~ /popularity/ ) {
2008         my %result;
2009         my %popularity;
2010
2011         # popularity is not in MARC record, it's builded from a specific query
2012         my $sth =
2013           $dbh->prepare("select sum(issues) from items where biblionumber=?");
2014         foreach ( split /;/, $biblionumbers ) {
2015             my ( $biblionumber, $title ) = split /,/, $_;
2016             $result{$biblionumber} = GetMarcBiblio($biblionumber);
2017             $sth->execute($biblionumber);
2018             my $popularity = $sth->fetchrow || 0;
2019
2020 # hint : the key is popularity.title because we can have
2021 # many results with the same popularity. In this case, sub-ordering is done by title
2022 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
2023 # (un-frequent, I agree, but we won't forget anything that way ;-)
2024             $popularity{ sprintf( "%10d", $popularity ) . $title
2025                   . $biblionumber } = $biblionumber;
2026         }
2027
2028     # sort the hash and return the same structure as GetRecords (Zebra querying)
2029         my $result_hash;
2030         my $numbers = 0;
2031         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
2032             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
2033                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2034                   $result{ $popularity{$key} }->as_usmarc();
2035             }
2036         }
2037         else {                                    # sort popularity ASC
2038             foreach my $key ( sort ( keys %popularity ) ) {
2039                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2040                   $result{ $popularity{$key} }->as_usmarc();
2041             }
2042         }
2043         my $finalresult = ();
2044         $result_hash->{'hits'}         = $numbers;
2045         $finalresult->{'biblioserver'} = $result_hash;
2046         return $finalresult;
2047
2048         #
2049         # ORDER BY author
2050         #
2051     }
2052     elsif ( $ordering =~ /author/ ) {
2053         my %result;
2054         foreach ( split /;/, $biblionumbers ) {
2055             my ( $biblionumber, $title ) = split /,/, $_;
2056             my $record = GetMarcBiblio($biblionumber);
2057             my $author;
2058             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
2059                 $author = $record->subfield( '200', 'f' );
2060                 $author = $record->subfield( '700', 'a' ) unless $author;
2061             }
2062             else {
2063                 $author = $record->subfield( '100', 'a' );
2064             }
2065
2066 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2067 # and we don't want to get only 1 result for each of them !!!
2068             $result{ $author . $biblionumber } = $record;
2069         }
2070
2071     # sort the hash and return the same structure as GetRecords (Zebra querying)
2072         my $result_hash;
2073         my $numbers = 0;
2074         if ( $ordering eq 'author_za' ) {    # sort by author desc
2075             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2076                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2077                   $result{$key}->as_usmarc();
2078             }
2079         }
2080         else {                               # sort by author ASC
2081             foreach my $key ( sort ( keys %result ) ) {
2082                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2083                   $result{$key}->as_usmarc();
2084             }
2085         }
2086         my $finalresult = ();
2087         $result_hash->{'hits'}         = $numbers;
2088         $finalresult->{'biblioserver'} = $result_hash;
2089         return $finalresult;
2090
2091         #
2092         # ORDER BY callnumber
2093         #
2094     }
2095     elsif ( $ordering =~ /callnumber/ ) {
2096         my %result;
2097         foreach ( split /;/, $biblionumbers ) {
2098             my ( $biblionumber, $title ) = split /,/, $_;
2099             my $record = GetMarcBiblio($biblionumber);
2100             my $callnumber;
2101             my ( $callnumber_tag, $callnumber_subfield ) =
2102               GetMarcFromKohaField( 'items.itemcallnumber','' );
2103             ( $callnumber_tag, $callnumber_subfield ) =
2104               GetMarcFromKohaField('biblioitems.callnumber','')
2105               unless $callnumber_tag;
2106             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
2107                 $callnumber = $record->subfield( '200', 'f' );
2108             }
2109             else {
2110                 $callnumber = $record->subfield( '100', 'a' );
2111             }
2112
2113 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2114 # and we don't want to get only 1 result for each of them !!!
2115             $result{ $callnumber . $biblionumber } = $record;
2116         }
2117
2118     # sort the hash and return the same structure as GetRecords (Zebra querying)
2119         my $result_hash;
2120         my $numbers = 0;
2121         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
2122             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2123                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2124                   $result{$key}->as_usmarc();
2125             }
2126         }
2127         else {                                     # sort by title ASC
2128             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
2129                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2130                   $result{$key}->as_usmarc();
2131             }
2132         }
2133         my $finalresult = ();
2134         $result_hash->{'hits'}         = $numbers;
2135         $finalresult->{'biblioserver'} = $result_hash;
2136         return $finalresult;
2137     }
2138     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2139         my %result;
2140         foreach ( split /;/, $biblionumbers ) {
2141             my ( $biblionumber, $title ) = split /,/, $_;
2142             my $record = GetMarcBiblio($biblionumber);
2143             my ( $publicationyear_tag, $publicationyear_subfield ) =
2144               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2145             my $publicationyear =
2146               $record->subfield( $publicationyear_tag,
2147                 $publicationyear_subfield );
2148
2149 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2150 # and we don't want to get only 1 result for each of them !!!
2151             $result{ $publicationyear . $biblionumber } = $record;
2152         }
2153
2154     # sort the hash and return the same structure as GetRecords (Zebra querying)
2155         my $result_hash;
2156         my $numbers = 0;
2157         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2158             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2159                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2160                   $result{$key}->as_usmarc();
2161             }
2162         }
2163         else {                                 # sort by pub year ASC
2164             foreach my $key ( sort ( keys %result ) ) {
2165                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2166                   $result{$key}->as_usmarc();
2167             }
2168         }
2169         my $finalresult = ();
2170         $result_hash->{'hits'}         = $numbers;
2171         $finalresult->{'biblioserver'} = $result_hash;
2172         return $finalresult;
2173
2174         #
2175         # ORDER BY title
2176         #
2177     }
2178     elsif ( $ordering =~ /title/ ) {
2179
2180 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2181         my %result;
2182         foreach ( split /;/, $biblionumbers ) {
2183             my ( $biblionumber, $title ) = split /,/, $_;
2184
2185 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2186 # and we don't want to get only 1 result for each of them !!!
2187 # hint & speed improvement : we can order without reading the record
2188 # so order, and read records only for the requested page !
2189             $result{ $title . $biblionumber } = $biblionumber;
2190         }
2191
2192     # sort the hash and return the same structure as GetRecords (Zebra querying)
2193         my $result_hash;
2194         my $numbers = 0;
2195         if ( $ordering eq 'title_az' ) {    # sort by title desc
2196             foreach my $key ( sort ( keys %result ) ) {
2197                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2198             }
2199         }
2200         else {                              # sort by title ASC
2201             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2202                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2203             }
2204         }
2205
2206         # limit the $results_per_page to result size if it's more
2207         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2208
2209         # for the requested page, replace biblionumber by the complete record
2210         # speed improvement : avoid reading too much things
2211         for (
2212             my $counter = $offset ;
2213             $counter <= $offset + $results_per_page ;
2214             $counter++
2215           )
2216         {
2217             $result_hash->{'RECORDS'}[$counter] =
2218               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2219         }
2220         my $finalresult = ();
2221         $result_hash->{'hits'}         = $numbers;
2222         $finalresult->{'biblioserver'} = $result_hash;
2223         return $finalresult;
2224     }
2225     else {
2226
2227 #
2228 # order by ranking
2229 #
2230 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2231         my %result;
2232         my %count_ranking;
2233         foreach ( split /;/, $biblionumbers ) {
2234             my ( $biblionumber, $title ) = split /,/, $_;
2235             $title =~ /(.*)-(\d)/;
2236
2237             # get weight
2238             my $ranking = $2;
2239
2240 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2241 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2242 # biblio N has ranking = 6
2243             $count_ranking{$biblionumber} += $ranking;
2244         }
2245
2246 # build the result by "inverting" the count_ranking hash
2247 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2248 #         warn "counting";
2249         foreach ( keys %count_ranking ) {
2250             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2251         }
2252
2253     # sort the hash and return the same structure as GetRecords (Zebra querying)
2254         my $result_hash;
2255         my $numbers = 0;
2256         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2257             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2258         }
2259
2260         # limit the $results_per_page to result size if it's more
2261         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2262
2263         # for the requested page, replace biblionumber by the complete record
2264         # speed improvement : avoid reading too much things
2265         for (
2266             my $counter = $offset ;
2267             $counter <= $offset + $results_per_page ;
2268             $counter++
2269           )
2270         {
2271             $result_hash->{'RECORDS'}[$counter] =
2272               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2273               if $result_hash->{'RECORDS'}[$counter];
2274         }
2275         my $finalresult = ();
2276         $result_hash->{'hits'}         = $numbers;
2277         $finalresult->{'biblioserver'} = $result_hash;
2278         return $finalresult;
2279     }
2280 }
2281
2282 =head2 enabled_staff_search_views
2283
2284 %hash = enabled_staff_search_views()
2285
2286 This function returns a hash that contains three flags obtained from the system
2287 preferences, used to determine whether a particular staff search results view
2288 is enabled.
2289
2290 =over 2
2291
2292 =item C<Output arg:>
2293
2294     * $hash{can_view_MARC} is true only if the MARC view is enabled
2295     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2296     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2297
2298 =item C<usage in the script:>
2299
2300 =back
2301
2302 $template->param ( C4::Search::enabled_staff_search_views );
2303
2304 =cut
2305
2306 sub enabled_staff_search_views
2307 {
2308         return (
2309                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2310                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2311                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2312         );
2313 }
2314
2315
2316 =head2 z3950_search_args
2317
2318 $arrayref = z3950_search_args($matchpoints)
2319
2320 This function returns an array reference that contains the search parameters to be
2321 passed to the Z39.50 search script (z3950_search.pl). The array elements
2322 are hash refs whose keys are name, value and encvalue, and whose values are the
2323 name of a search parameter, the value of that search parameter and the URL encoded
2324 value of that parameter.
2325
2326 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2327
2328 The search parameter values are obtained from the bibliographic record whose
2329 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2330
2331 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2332 a general purpose search argument. In this case, the returned array contains only
2333 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2334
2335 If a search parameter value is undefined or empty, it is not included in the returned
2336 array.
2337
2338 The returned array reference may be passed directly to the template parameters.
2339
2340 =over 2
2341
2342 =item C<Output arg:>
2343
2344     * $array containing hash refs as described above
2345
2346 =item C<usage in the script:>
2347
2348 =back
2349
2350 $data = Biblio::GetBiblioData($bibno);
2351 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2352
2353 *OR*
2354
2355 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2356
2357 =cut
2358
2359 sub z3950_search_args {
2360     my $bibrec = shift;
2361     $bibrec = { title => $bibrec } if !ref $bibrec;
2362     my $array = [];
2363     for my $field (qw/ lccn isbn issn title author dewey subject /)
2364     {
2365         my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2366         push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2367     }
2368     return $array;
2369 }
2370
2371
2372 END { }    # module clean-up code here (global destructor)
2373
2374 1;
2375 __END__
2376
2377 =head1 AUTHOR
2378
2379 Koha Developement team <info@koha.org>
2380
2381 =cut