C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29 use C4::Branch;
  30 use URI::Escape;
  31
  32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  33
  34 # set the version for version checking
  35 BEGIN {
  36     $VERSION = 3.01;
  37     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  38 }
  39
  40 =head1 NAME
  41
  42 C4::Search - Functions for searching the Koha catalog.
  43
  44 =head1 SYNOPSIS
  45
  46 See opac/opac-search.pl or catalogue/search.pl for example of usage
  47
  48 =head1 DESCRIPTION
  49
  50 This module provides searching functions for Koha's bibliographic databases
  51
  52 =head1 FUNCTIONS
  53
  54 =cut
  55
  56 @ISA    = qw(Exporter);
  57 @EXPORT = qw(
  58   &FindDuplicate
  59   &SimpleSearch
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 FindDuplicate
  69
  70 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  71
  72 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  73
  74 =cut
  75
  76 sub FindDuplicate {
  77     my ($record) = @_;
  78     my $dbh = C4::Context->dbh;
  79     my $result = TransformMarcToKoha( $dbh, $record, '' );
  80     my $sth;
  81     my $query;
  82     my $search;
  83     my $type;
  84     my ( $biblionumber, $title );
  85
  86     # search duplicate on ISBN, easy and fast..
  87     # ... normalize first
  88     if ( $result->{isbn} ) {
  89         $result->{isbn} =~ s/\(.*$//;
  90         $result->{isbn} =~ s/\s+$//;
  91         $query = "isbn=$result->{isbn}";
  92     }
  93     else {
  94         $result->{title} =~ s /\\//g;
  95         $result->{title} =~ s /\"//g;
  96         $result->{title} =~ s /\(//g;
  97         $result->{title} =~ s /\)//g;
  98
  99         # FIXME: instead of removing operators, could just do
 100         # quotes around the value
 101         $result->{title} =~ s/(and|or|not)//g;
 102         $query = "ti,ext=$result->{title}";
 103         $query .= " and itemtype=$result->{itemtype}"
 104           if ( $result->{itemtype} );
 105         if   ( $result->{author} ) {
 106             $result->{author} =~ s /\\//g;
 107             $result->{author} =~ s /\"//g;
 108             $result->{author} =~ s /\(//g;
 109             $result->{author} =~ s /\)//g;
 110
 111             # remove valid operators
 112             $result->{author} =~ s/(and|or|not)//g;
 113             $query .= " and au,ext=$result->{author}";
 114         }
 115     }
 116
 117     # FIXME: add error handling
 118     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 119     my @results;
 120     foreach my $possible_duplicate_record (@$searchresults) {
 121         my $marcrecord =
 122           MARC::Record->new_from_usmarc($possible_duplicate_record);
 123         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 124
 125         # FIXME :: why 2 $biblionumber ?
 126         if ($result) {
 127             push @results, $result->{'biblionumber'};
 128             push @results, $result->{'title'};
 129         }
 130     }
 131     return @results;
 132 }
 133
 134 =head2 SimpleSearch
 135
 136 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 137
 138 This function provides a simple search API on the bibliographic catalog
 139
 140 =over 2
 141
 142 =item C<input arg:>
 143
 144     * $query can be a simple keyword or a complete CCL query
 145     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 146     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 147     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 148
 149
 150 =item C<Output:>
 151
 152     * $error is a empty unless an error is detected
 153     * \@results is an array of records.
 154     * $total_hits is the number of hits that would have been returned with no limit
 155
 156 =item C<usage in the script:>
 157
 158 =back
 159
 160 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 161
 162 if (defined $error) {
 163     $template->param(query_error => $error);
 164     warn "error: ".$error;
 165     output_html_with_http_headers $input, $cookie, $template->output;
 166     exit;
 167 }
 168
 169 my $hits = scalar @$marcresults;
 170 my @results;
 171
 172 for my $i (0..$hits) {
 173     my %resultsloop;
 174     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 175     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 176
 177     #build the hash for the template.
 178     $resultsloop{title}           = $biblio->{'title'};
 179     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 180     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 181     $resultsloop{author}          = $biblio->{'author'};
 182     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 183     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 184
 185     push @results, \%resultsloop;
 186 }
 187
 188 $template->param(result=>\@results);
 189
 190 =cut
 191
 192 sub SimpleSearch {
 193     my ( $query, $offset, $max_results, $servers )  = @_;
 194
 195     if ( C4::Context->preference('NoZebra') ) {
 196         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 197         my $search_result =
 198           (      $result->{hits}
 199               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 200         return ( undef, $search_result, scalar($result->{hits}) );
 201     }
 202     else {
 203         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 204         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 205         my @results;
 206         my @zoom_queries;
 207         my @tmpresults;
 208         my @zconns;
 209         my $total_hits;
 210         return ( "No query entered", undef, undef ) unless $query;
 211
 212         # Initialize & Search Zebra
 213         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 214             eval {
 215                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 216                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 217                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 218
 219                 # error handling
 220                 my $error =
 221                     $zconns[$i]->errmsg() . " ("
 222                   . $zconns[$i]->errcode() . ") "
 223                   . $zconns[$i]->addinfo() . " "
 224                   . $zconns[$i]->diagset();
 225
 226                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 227             };
 228             if ($@) {
 229
 230                 # caught a ZOOM::Exception
 231                 my $error =
 232                     $@->message() . " ("
 233                   . $@->code() . ") "
 234                   . $@->addinfo() . " "
 235                   . $@->diagset();
 236                 warn $error;
 237                 return ( $error, undef, undef );
 238             }
 239         }
 240         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 241             my $event = $zconns[ $i - 1 ]->last_event();
 242             if ( $event == ZOOM::Event::ZEND ) {
 243
 244                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 245                 my $hits = $tmpresults[ $i - 1 ]->size();
 246                 $total_hits += $hits;
 247                 my $last_record = $hits;
 248                 if ( defined $max_results && $offset + $max_results < $hits ) {
 249                     $last_record  = $offset + $max_results;
 250                 }
 251
 252                 for my $j ( $first_record..$last_record ) {
 253                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 254                     push @results, $record;
 255                 }
 256             }
 257         }
 258
 259         foreach my $result (@tmpresults) {
 260             $result->destroy();
 261         }
 262         foreach my $zoom_query (@zoom_queries) {
 263             $zoom_query->destroy();
 264         }
 265
 266         return ( undef, \@results, $total_hits );
 267     }
 268 }
 269
 270 =head2 getRecords
 271
 272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 273
 274         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 275         $results_per_page, $offset,       $expanded_facet, $branches,
 276         $query_type,       $scan
 277     );
 278
 279 The all singing, all dancing, multi-server, asynchronous, scanning,
 280 searching, record nabbing, facet-building
 281
 282 See verbse embedded documentation.
 283
 284 =cut
 285
 286 sub getRecords {
 287     my (
 288         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 289         $results_per_page, $offset,       $expanded_facet, $branches,
 290         $query_type,       $scan
 291     ) = @_;
 292
 293     my @servers = @$servers_ref;
 294     my @sort_by = @$sort_by_ref;
 295
 296     # Initialize variables for the ZOOM connection and results object
 297     my $zconn;
 298     my @zconns;
 299     my @results;
 300     my $results_hashref = ();
 301
 302     # Initialize variables for the faceted results objects
 303     my $facets_counter = ();
 304     my $facets_info    = ();
 305     my $facets         = getFacets();
 306
 307     my @facets_loop
 308       ;    # stores the ref to array of hashes for template facets loop
 309
 310     ### LOOP THROUGH THE SERVERS
 311     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 312         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 313
 314 # perform the search, create the results objects
 315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 316         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 317
 318         #$query_to_use = $simple_query if $scan;
 319         warn $simple_query if ( $scan and $DEBUG );
 320
 321         # Check if we've got a query_type defined, if so, use it
 322         eval {
 323             if ($query_type) {
 324                 if ($query_type =~ /^ccl/) {
 325                     $query_to_use =~ s/\:/\=/g;    # change : to = last minute (FIXME)
 326                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 327                 } elsif ($query_type =~ /^cql/) {
 328                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
 329                 } elsif ($query_type =~ /^pqf/) {
 330                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
 331                 } else {
 332                     warn "Unknown query_type '$query_type'.  Results undetermined.";
 333                 }
 334             } elsif ($scan) {
 335                     $results[$i] = $zconns[$i]->scan(  new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 336             } else {
 337                     $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
 338             }
 339         };
 340         if ($@) {
 341             warn "WARNING: query problem with $query_to_use " . $@;
 342         }
 343
 344         # Concatenate the sort_by limits and pass them to the results object
 345         # Note: sort will override rank
 346         my $sort_by;
 347         foreach my $sort (@sort_by) {
 348             if ( $sort eq "author_az" ) {
 349                 $sort_by .= "1=1003 <i ";
 350             }
 351             elsif ( $sort eq "author_za" ) {
 352                 $sort_by .= "1=1003 >i ";
 353             }
 354             elsif ( $sort eq "popularity_asc" ) {
 355                 $sort_by .= "1=9003 <i ";
 356             }
 357             elsif ( $sort eq "popularity_dsc" ) {
 358                 $sort_by .= "1=9003 >i ";
 359             }
 360             elsif ( $sort eq "call_number_asc" ) {
 361                 $sort_by .= "1=20  <i ";
 362             }
 363             elsif ( $sort eq "call_number_dsc" ) {
 364                 $sort_by .= "1=20 >i ";
 365             }
 366             elsif ( $sort eq "pubdate_asc" ) {
 367                 $sort_by .= "1=31 <i ";
 368             }
 369             elsif ( $sort eq "pubdate_dsc" ) {
 370                 $sort_by .= "1=31 >i ";
 371             }
 372             elsif ( $sort eq "acqdate_asc" ) {
 373                 $sort_by .= "1=32 <i ";
 374             }
 375             elsif ( $sort eq "acqdate_dsc" ) {
 376                 $sort_by .= "1=32 >i ";
 377             }
 378             elsif ( $sort eq "title_az" ) {
 379                 $sort_by .= "1=4 <i ";
 380             }
 381             elsif ( $sort eq "title_za" ) {
 382                 $sort_by .= "1=4 >i ";
 383             }
 384             else {
 385                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 386             }
 387         }
 388         if ($sort_by) {
 389             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 390                 warn "WARNING sort $sort_by failed";
 391             }
 392         }
 393     }    # finished looping through servers
 394
 395     # The big moment: asynchronously retrieve results from all servers
 396     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 397         my $ev = $zconns[ $i - 1 ]->last_event();
 398         if ( $ev == ZOOM::Event::ZEND ) {
 399             next unless $results[ $i - 1 ];
 400             my $size = $results[ $i - 1 ]->size();
 401             if ( $size > 0 ) {
 402                 my $results_hash;
 403
 404                 # loop through the results
 405                 $results_hash->{'hits'} = $size;
 406                 my $times;
 407                 if ( $offset + $results_per_page <= $size ) {
 408                     $times = $offset + $results_per_page;
 409                 }
 410                 else {
 411                     $times = $size;
 412                 }
 413                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 414                     my $records_hash;
 415                     my $record;
 416                     my $facet_record;
 417
 418                     ## Check if it's an index scan
 419                     if ($scan) {
 420                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 421
 422                  # here we create a minimal MARC record and hand it off to the
 423                  # template just like a normal result ... perhaps not ideal, but
 424                  # it works for now
 425                         my $tmprecord = MARC::Record->new();
 426                         $tmprecord->encoding('UTF-8');
 427                         my $tmptitle;
 428                         my $tmpauthor;
 429
 430                 # the minimal record in author/title (depending on MARC flavour)
 431                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 432                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 433                             $tmprecord->append_fields($tmptitle);
 434                         } else {
 435                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 436                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 437                             $tmprecord->append_fields($tmptitle);
 438                             $tmprecord->append_fields($tmpauthor);
 439                         }
 440                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 441                     }
 442
 443                     # not an index scan
 444                     else {
 445                         $record = $results[ $i - 1 ]->record($j)->raw();
 446
 447                         # warn "RECORD $j:".$record;
 448                         $results_hash->{'RECORDS'}[$j] = $record;
 449
 450             # Fill the facets while we're looping, but only for the biblioserver
 451                         $facet_record = MARC::Record->new_from_usmarc($record)
 452                           if $servers[ $i - 1 ] =~ /biblioserver/;
 453
 454                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 455                         if ($facet_record) {
 456                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 457                                 ($facets->[$k]) or next;
 458                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 459                                 for my $field (@fields) {
 460                                     my @subfields = $field->subfields();
 461                                     for my $subfield (@subfields) {
 462                                         my ( $code, $data ) = @$subfield;
 463                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 464                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 465                                     }
 466                                 }
 467                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 468                                     $facets->[$k]->{'label_value'};
 469                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 470                                     $facets->[$k]->{'expanded'};
 471                             }
 472                         }
 473                     }
 474                 }
 475                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 476             }
 477
 478             # warn "connection ", $i-1, ": $size hits";
 479             # warn $results[$i-1]->record(0)->render() if $size > 0;
 480
 481             # BUILD FACETS
 482             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 483                 for my $link_value (
 484                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 485                         keys %$facets_counter )
 486                 {
 487                     my $expandable;
 488                     my $number_of_facets;
 489                     my @this_facets_array;
 490                     for my $one_facet (
 491                         sort {
 492                              $facets_counter->{$link_value}->{$b}
 493                          <=> $facets_counter->{$link_value}->{$a}
 494                         } keys %{ $facets_counter->{$link_value} }
 495                       )
 496                     {
 497                         $number_of_facets++;
 498                         if (   ( $number_of_facets < 6 )
 499                             || ( $expanded_facet eq $link_value )
 500                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 501                         {
 502
 503                       # Sanitize the link value ), ( will cause errors with CCL,
 504                             my $facet_link_value = $one_facet;
 505                             $facet_link_value =~ s/(\(|\))/ /g;
 506
 507                             # fix the length that will display in the label,
 508                             my $facet_label_value = $one_facet;
 509                             $facet_label_value =
 510                               substr( $one_facet, 0, 20 ) . "..."
 511                               unless length($facet_label_value) <= 20;
 512
 513                             # if it's a branch, label by the name, not the code,
 514                             if ( $link_value =~ /branch/ ) {
 515                                 $facet_label_value =
 516                                   $branches->{$one_facet}->{'branchname'};
 517                             }
 518
 519                             # but we're down with the whole label being in the link's title.
 520                             push @this_facets_array, {
 521                                 facet_count       => $facets_counter->{$link_value}->{$one_facet},
 522                                 facet_label_value => $facet_label_value,
 523                                 facet_title_value => $one_facet,
 524                                 facet_link_value  => $facet_link_value,
 525                                 type_link_value   => $link_value,
 526                             };
 527                         }
 528                     }
 529
 530                     # handle expanded option
 531                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 532                         $expandable = 1
 533                           if ( ( $number_of_facets > 6 )
 534                             && ( $expanded_facet ne $link_value ) );
 535                     }
 536                     push @facets_loop, {
 537                         type_link_value => $link_value,
 538                         type_id         => $link_value . "_id",
 539                         "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 540                         facets     => \@this_facets_array,
 541                         expandable => $expandable,
 542                         expand     => $link_value,
 543                     } unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 544                 }
 545             }
 546         }
 547     }
 548     return ( undef, $results_hashref, \@facets_loop );
 549 }
 550
 551 sub pazGetRecords {
 552     my (
 553         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 554         $results_per_page, $offset,       $expanded_facet, $branches,
 555         $query_type,       $scan
 556     ) = @_;
 557
 558     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 559     $paz->init();
 560     $paz->search($simple_query);
 561     sleep 1;
 562
 563     # do results
 564     my $results_hashref = {};
 565     my $stats = XMLin($paz->stat);
 566     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 567
 568     # for a grouped search result, the number of hits
 569     # is the number of groups returned; 'bib_hits' will have
 570     # the total number of bibs.
 571     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 572     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 573
 574     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 575         my $recid = $hit->{recid}->[0];
 576
 577         my $work_title = $hit->{'md-work-title'}->[0];
 578         my $work_author;
 579         if (exists $hit->{'md-work-author'}) {
 580             $work_author = $hit->{'md-work-author'}->[0];
 581         }
 582         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 583
 584         my $result_group = {};
 585         $result_group->{'group_label'} = $group_label;
 586         $result_group->{'group_merge_key'} = $recid;
 587
 588         my $count = 1;
 589         if (exists $hit->{count}) {
 590             $count = $hit->{count}->[0];
 591         }
 592         $result_group->{'group_count'} = $count;
 593
 594         for (my $i = 0; $i < $count; $i++) {
 595             # FIXME -- may need to worry about diacritics here
 596             my $rec = $paz->record($recid, $i);
 597             push @{ $result_group->{'RECORDS'} }, $rec;
 598         }
 599
 600         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 601     }
 602
 603     # pass through facets
 604     my $termlist_xml = $paz->termlist('author,subject');
 605     my $terms = XMLin($termlist_xml, forcearray => 1);
 606     my @facets_loop = ();
 607     #die Dumper($results);
 608 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 609 #        my @facets = ();
 610 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 611 #            push @facets, {
 612 #                facet_label_value => $facet->{'name'}->[0],
 613 #            };
 614 #        }
 615 #        push @facets_loop, ( {
 616 #            type_label => $list,
 617 #            facets => \@facets,
 618 #        } );
 619 #    }
 620
 621     return ( undef, $results_hashref, \@facets_loop );
 622 }
 623
 624 # STOPWORDS
 625 sub _remove_stopwords {
 626     my ( $operand, $index ) = @_;
 627     my @stopwords_removed;
 628
 629     # phrase and exact-qualified indexes shouldn't have stopwords removed
 630     if ( $index !~ m/phr|ext/ ) {
 631
 632 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 633 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 634 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 635 #       is a stopword, we'd get "çon" and wouldn't find anything...
 636                 foreach ( keys %{ C4::Context->stopwords } ) {
 637                         next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 638                         if ( my ($matched) = ($operand =~
 639                                 /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
 640                         {
 641                                 $operand =~ s/\Q$matched\E/ /gi;
 642                                 push @stopwords_removed, $_;
 643                         }
 644                 }
 645         }
 646     return ( $operand, \@stopwords_removed );
 647 }
 648
 649 # TRUNCATION
 650 sub _detect_truncation {
 651     my ( $operand, $index ) = @_;
 652     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 653         @regexpr );
 654     $operand =~ s/^ //g;
 655     my @wordlist = split( /\s/, $operand );
 656     foreach my $word (@wordlist) {
 657         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 658             push @rightlefttruncated, $word;
 659         }
 660         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 661             push @lefttruncated, $word;
 662         }
 663         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 664             push @righttruncated, $word;
 665         }
 666         elsif ( index( $word, "*" ) < 0 ) {
 667             push @nontruncated, $word;
 668         }
 669         else {
 670             push @regexpr, $word;
 671         }
 672     }
 673     return (
 674         \@nontruncated,       \@righttruncated, \@lefttruncated,
 675         \@rightlefttruncated, \@regexpr
 676     );
 677 }
 678
 679 # STEMMING
 680 sub _build_stemmed_operand {
 681     my ($operand) = @_;
 682     my $stemmed_operand;
 683
 684     # If operand contains a digit, it is almost certainly an identifier, and should
 685     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 686     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 687     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 688     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 689     return $operand if $operand =~ /\d/;
 690
 691 # FIXME: the locale should be set based on the user's language and/or search choice
 692     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 693
 694 # FIXME: these should be stored in the db so the librarian can modify the behavior
 695     $stemmer->add_exceptions(
 696         {
 697             'and' => 'and',
 698             'or'  => 'or',
 699             'not' => 'not',
 700         }
 701     );
 702     my @words = split( / /, $operand );
 703     my $stems = $stemmer->stem(@words);
 704     for my $stem (@$stems) {
 705         $stemmed_operand .= "$stem";
 706         $stemmed_operand .= "?"
 707           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 708         $stemmed_operand .= " ";
 709     }
 710     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 711     return $stemmed_operand;
 712 }
 713
 714 # FIELD WEIGHTING
 715 sub _build_weighted_query {
 716
 717 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 718 # pretty well but could work much better if we had a smarter query parser
 719     my ( $operand, $stemmed_operand, $index ) = @_;
 720     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 721     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 722     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 723
 724     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 725
 726     # Keyword, or, no index specified
 727     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 728         $weighted_query .=
 729           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 730         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 731         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 732           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 733           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 734         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 735           if $fuzzy_enabled;    # add fuzzy, word list
 736         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 737           if ( $stemming and $stemmed_operand )
 738           ;                     # add stemming, right truncation
 739         $weighted_query .= " or wrdl,r9=\"$operand\"";
 740
 741         # embedded sorting: 0 a-z; 1 z-a
 742         # $weighted_query .= ") or (sort1,aut=1";
 743     }
 744
 745     # Barcode searches should skip this process
 746     elsif ( $index eq 'bc' ) {
 747         $weighted_query .= "bc=\"$operand\"";
 748     }
 749
 750     # Authority-number searches should skip this process
 751     elsif ( $index eq 'an' ) {
 752         $weighted_query .= "an=\"$operand\"";
 753     }
 754
 755     # If the index already has more than one qualifier, wrap the operand
 756     # in quotes and pass it back (assumption is that the user knows what they
 757     # are doing and won't appreciate us mucking up their query
 758     elsif ( $index =~ ',' ) {
 759         $weighted_query .= " $index=\"$operand\"";
 760     }
 761
 762     #TODO: build better cases based on specific search indexes
 763     else {
 764         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 765           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 766         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 767         $weighted_query .=
 768           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 769     }
 770
 771     $weighted_query .= "))";                       # close rank specification
 772     return $weighted_query;
 773 }
 774
 775 =head2 getIndexes
 776
 777 Return an array with available indexes.
 778
 779 =cut
 780
 781 sub getIndexes{
 782     my @indexes = (
 783                     # biblio indexes
 784                     'ab',
 785                     'Abstract',
 786                     'acqdate',
 787                     'allrecords',
 788                     'an',
 789                     'Any',
 790                     'at',
 791                     'au',
 792                     'aub',
 793                     'aud',
 794                     'audience',
 795                     'auo',
 796                     'aut',
 797                     'Author',
 798                     'Author-in-order ',
 799                     'Author-personal-bibliography',
 800                     'Authority-Number',
 801                     'authtype',
 802                     'bc',
 803                     'biblionumber',
 804                     'bio',
 805                     'biography',
 806                     'callnum',
 807                     'cfn',
 808                     'Chronological-subdivision',
 809                     'cn-bib-source',
 810                     'cn-bib-sort',
 811                     'cn-class',
 812                     'cn-item',
 813                     'cn-prefix',
 814                     'cn-suffix',
 815                     'cpn',
 816                     'Code-institution',
 817                     'Conference-name',
 818                     'Conference-name-heading',
 819                     'Conference-name-see',
 820                     'Conference-name-seealso',
 821                     'Content-type',
 822                     'Control-number',
 823                     'copydate',
 824                     'Corporate-name',
 825                     'Corporate-name-heading',
 826                     'Corporate-name-see',
 827                     'Corporate-name-seealso',
 828                     'ctype',
 829                     'date-entered-on-file',
 830                     'Date-of-acquisition',
 831                     'Date-of-publication',
 832                     'Dewey-classification',
 833                     'extent',
 834                     'fic',
 835                     'fiction',
 836                     'Form-subdivision',
 837                     'format',
 838                     'Geographic-subdivision',
 839                     'he',
 840                     'Heading',
 841                     'Heading-use-main-or-added-entry',
 842                     'Heading-use-series-added-entry ',
 843                     'Heading-use-subject-added-entry',
 844                     'Host-item',
 845                     'id-other',
 846                     'Illustration-code',
 847                     'ISBN',
 848                     'ISSN',
 849                     'itemtype',
 850                     'kw',
 851                     'Koha-Auth-Number',
 852                     'l-format',
 853                     'language',
 854                     'lc-card',
 855                     'LC-card-number',
 856                     'lcn',
 857                     'llength',
 858                     'ln',
 859                     'Local-classification',
 860                     'Local-number',
 861                     'Match-heading',
 862                     'Match-heading-see-from',
 863                     'Material-type',
 864                     'mc-itemtype',
 865                     'mc-rtype',
 866                     'mus',
 867                     'Name-geographic',
 868                     'Name-geographic-heading',
 869                     'Name-geographic-see',
 870                     'Name-geographic-seealso',
 871                     'nb',
 872                     'Note',
 873                     'ns',
 874                     'nt',
 875                     'pb',
 876                     'Personal-name',
 877                     'Personal-name-heading',
 878                     'Personal-name-see',
 879                     'Personal-name-seealso',
 880                     'pl',
 881                     'Place-publication',
 882                     'pn',
 883                     'popularity',
 884                     'pubdate',
 885                     'Publisher',
 886                     'Record-type',
 887                     'rtype',
 888                     'se',
 889                     'See',
 890                     'See-also',
 891                     'sn',
 892                     'Stock-number',
 893                     'su',
 894                     'Subject',
 895                     'Subject-heading-thesaurus',
 896                     'Subject-name-personal',
 897                     'Subject-subdivision',
 898                     'Summary',
 899                     'Suppress',
 900                     'su-geo',
 901                     'su-na',
 902                     'su-to',
 903                     'su-ut',
 904                     'ut',
 905                     'Term-genre-form',
 906                     'Term-genre-form-heading',
 907                     'Term-genre-form-see',
 908                     'Term-genre-form-seealso',
 909                     'ti',
 910                     'Title',
 911                     'Title-cover',
 912                     'Title-series',
 913                     'Title-uniform',
 914                     'Title-uniform-heading',
 915                     'Title-uniform-see',
 916                     'Title-uniform-seealso',
 917                     'totalissues',
 918                     'yr',
 919
 920                     # items indexes
 921                     'acqsource',
 922                     'barcode',
 923                     'bc',
 924                     'branch',
 925                     'ccode',
 926                     'classification-source',
 927                     'cn-sort',
 928                     'coded-location-qualifier',
 929                     'copynumber',
 930                     'damaged',
 931                     'datelastborrowed',
 932                     'datelastseen',
 933                     'holdingbranch',
 934                     'homebranch',
 935                     'issues',
 936                     'itemnumber',
 937                     'itype',
 938                     'Local-classification',
 939                     'location',
 940                     'lost',
 941                     'materials-specified',
 942                     'mc-ccode',
 943                     'mc-itype',
 944                     'mc-loc',
 945                     'notforloan',
 946                     'onloan',
 947                     'price',
 948                     'renewals',
 949                     'replacementprice',
 950                     'replacementpricedate',
 951                     'reserves',
 952                     'restricted',
 953                     'stack',
 954                     'uri',
 955                     'withdrawn',
 956
 957                     # subject related
 958                   );
 959
 960     return \@indexes;
 961 }
 962
 963 =head2 buildQuery
 964
 965 ( $error, $query,
 966 $simple_query, $query_cgi,
 967 $query_desc, $limit,
 968 $limit_cgi, $limit_desc,
 969 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 970
 971 Build queries and limits in CCL, CGI, Human,
 972 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 973
 974 See verbose embedded documentation.
 975
 976
 977 =cut
 978
 979 sub buildQuery {
 980     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 981
 982     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 983
 984     # dereference
 985     my @operators = $operators ? @$operators : ();
 986     my @indexes   = $indexes   ? @$indexes   : ();
 987     my @operands  = $operands  ? @$operands  : ();
 988     my @limits    = $limits    ? @$limits    : ();
 989     my @sort_by   = $sort_by   ? @$sort_by   : ();
 990
 991     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 992     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 993     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 994     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 995     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 996
 997     # no stemming/weight/fuzzy in NoZebra
 998     if ( C4::Context->preference("NoZebra") ) {
 999         $stemming      = 0;
1000         $weight_fields = 0;
1001         $fuzzy_enabled = 0;
1002     }
1003
1004     my $query        = $operands[0];
1005     my $simple_query = $operands[0];
1006
1007     # initialize the variables we're passing back
1008     my $query_cgi;
1009     my $query_desc;
1010     my $query_type;
1011
1012     my $limit;
1013     my $limit_cgi;
1014     my $limit_desc;
1015
1016     my $stopwords_removed;    # flag to determine if stopwords have been removed
1017
1018     my $cclq;
1019     my $cclindexes = getIndexes();
1020     if( $query !~ /\s*ccl=/ ){
1021         for my $index (@$cclindexes){
1022             if($query =~ /($index)(,?\w)*:/){
1023                 $cclq = 1;
1024             }
1025         }
1026         $query = "ccl=$query" if($cclq);
1027     }
1028
1029 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
1030 # DIAGNOSTIC ONLY!!
1031     if ( $query =~ /^ccl=/ ) {
1032         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
1033     }
1034     if ( $query =~ /^cql=/ ) {
1035         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
1036     }
1037     if ( $query =~ /^pqf=/ ) {
1038         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
1039     }
1040
1041     # pass nested queries directly
1042     # FIXME: need better handling of some of these variables in this case
1043     if ( $query =~ /(\(|\))/ ) {
1044         return (
1045             undef,              $query, $simple_query, $query_cgi,
1046             $query,             $limit, $limit_cgi,    $limit_desc,
1047             $stopwords_removed, 'ccl'
1048         );
1049     }
1050
1051 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
1052 # query operands and indexes and add stemming, truncation, field weighting, etc.
1053 # Once we do so, we'll end up with a value in $query, just like if we had an
1054 # incoming $query from the user
1055     else {
1056         $query = ""
1057           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
1058         my $previous_operand
1059           ;    # a flag used to keep track if there was a previous query
1060                # if there was, we can apply the current operator
1061                # for every operand
1062         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
1063
1064             # COMBINE OPERANDS, INDEXES AND OPERATORS
1065             if ( $operands[$i] ) {
1066
1067               # A flag to determine whether or not to add the index to the query
1068                 my $indexes_set;
1069
1070 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
1071                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
1072                     $weight_fields    = 0;
1073                     $stemming         = 0;
1074                     $remove_stopwords = 0;
1075                 }
1076                 my $operand = $operands[$i];
1077                 my $index   = $indexes[$i];
1078
1079                 # Add index-specific attributes
1080                 # Date of Publication
1081                 if ( $index eq 'yr' ) {
1082                     $index .= ",st-numeric";
1083 #                     $indexes_set++;
1084                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
1085                 }
1086
1087                 # Date of Acquisition
1088                 elsif ( $index eq 'acqdate' ) {
1089                     $index .= ",st-date-normalized";
1090 #                     $indexes_set++;
1091                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
1092                 }
1093                 # ISBN,ISSN,Standard Number, don't need special treatment
1094                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
1095 #                     $indexes_set++;
1096                     (
1097                         $stemming,      $auto_truncation,
1098                         $weight_fields, $fuzzy_enabled,
1099                         $remove_stopwords
1100                     ) = ( 0, 0, 0, 0, 0 );
1101
1102                 }
1103
1104                 if(not $index){
1105                     $index = 'kw';
1106                 }
1107
1108                 # Set default structure attribute (word list)
1109                 my $struct_attr;
1110                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
1111                     $struct_attr = ",wrdl";
1112                 }
1113
1114                 # Some helpful index variants
1115                 my $index_plus       = $index . $struct_attr . ":" if $index;
1116                 my $index_plus_comma = $index . $struct_attr . "," if $index;
1117
1118                 # Remove Stopwords
1119                 if ($remove_stopwords) {
1120                     ( $operand, $stopwords_removed ) =
1121                       _remove_stopwords( $operand, $index );
1122                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
1123                     warn "REMOVED STOPWORDS: @$stopwords_removed"
1124                       if ( $stopwords_removed && $DEBUG );
1125                 }
1126
1127                 if ($auto_truncation){
1128                                         unless ( $index =~ /(st-|phr|ext)/ ) {
1129                                                 #FIXME only valid with LTR scripts
1130                                                 $operand=join(" ",map{
1131                                                                                                 "$_*"
1132                                                                                          }split (/\s+/,$operand));
1133                                                 warn $operand if $DEBUG;
1134                                         }
1135                                 }
1136
1137                 # Detect Truncation
1138                 my $truncated_operand;
1139                 my( $nontruncated, $righttruncated, $lefttruncated,
1140                     $rightlefttruncated, $regexpr
1141                 ) = _detect_truncation( $operand, $index );
1142                 warn
1143 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1144                   if $DEBUG;
1145
1146                 # Apply Truncation
1147                 if (
1148                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1149                     scalar(@$rightlefttruncated) > 0 )
1150                 {
1151
1152                # Don't field weight or add the index to the query, we do it here
1153                     $indexes_set = 1;
1154                     undef $weight_fields;
1155                     my $previous_truncation_operand;
1156                     if (scalar @$nontruncated) {
1157                         $truncated_operand .= "$index_plus @$nontruncated ";
1158                         $previous_truncation_operand = 1;
1159                     }
1160                     if (scalar @$righttruncated) {
1161                         $truncated_operand .= "and " if $previous_truncation_operand;
1162                         $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
1163                         $previous_truncation_operand = 1;
1164                     }
1165                     if (scalar @$lefttruncated) {
1166                         $truncated_operand .= "and " if $previous_truncation_operand;
1167                         $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
1168                         $previous_truncation_operand = 1;
1169                     }
1170                     if (scalar @$rightlefttruncated) {
1171                         $truncated_operand .= "and " if $previous_truncation_operand;
1172                         $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
1173                         $previous_truncation_operand = 1;
1174                     }
1175                 }
1176                 $operand = $truncated_operand if $truncated_operand;
1177                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1178
1179                 # Handle Stemming
1180                 my $stemmed_operand;
1181                 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
1182
1183                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1184
1185                 # Handle Field Weighting
1186                 my $weighted_operand;
1187                 if ($weight_fields) {
1188                     $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
1189                     $operand = $weighted_operand;
1190                     $indexes_set = 1;
1191                 }
1192
1193                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1194
1195                 # If there's a previous operand, we need to add an operator
1196                 if ($previous_operand) {
1197
1198                     # User-specified operator
1199                     if ( $operators[ $i - 1 ] ) {
1200                         $query     .= " $operators[$i-1] ";
1201                         $query     .= " $index_plus " unless $indexes_set;
1202                         $query     .= " $operand";
1203                         $query_cgi .= "&op=$operators[$i-1]";
1204                         $query_cgi .= "&idx=$index" if $index;
1205                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1206                         $query_desc .=
1207                           " $operators[$i-1] $index_plus $operands[$i]";
1208                     }
1209
1210                     # Default operator is and
1211                     else {
1212                         $query      .= " and ";
1213                         $query      .= "$index_plus " unless $indexes_set;
1214                         $query      .= "$operand";
1215                         $query_cgi  .= "&op=and&idx=$index" if $index;
1216                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1217                         $query_desc .= " and $index_plus $operands[$i]";
1218                     }
1219                 }
1220
1221                 # There isn't a pervious operand, don't need an operator
1222                 else {
1223
1224                     # Field-weighted queries already have indexes set
1225                     $query .= " $index_plus " unless $indexes_set;
1226                     $query .= $operand;
1227                     $query_desc .= " $index_plus $operands[$i]";
1228                     $query_cgi  .= "&idx=$index" if $index;
1229                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1230                     $previous_operand = 1;
1231                 }
1232             }    #/if $operands
1233         }    # /for
1234     }
1235     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1236
1237     # add limits
1238     my $group_OR_limits;
1239     my $availability_limit;
1240     foreach my $this_limit (@limits) {
1241         if ( $this_limit =~ /available/ ) {
1242
1243 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1244 # In English:
1245 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1246             $availability_limit .=
1247 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1248             $limit_cgi  .= "&limit=available";
1249             $limit_desc .= "";
1250         }
1251
1252         # group_OR_limits, prefixed by mc-
1253         # OR every member of the group
1254         elsif ( $this_limit =~ /mc/ ) {
1255             $group_OR_limits .= " or " if $group_OR_limits;
1256             $limit_desc      .= " or " if $group_OR_limits;
1257             $group_OR_limits .= "$this_limit";
1258             $limit_cgi       .= "&limit=$this_limit";
1259             $limit_desc      .= " $this_limit";
1260         }
1261
1262         # Regular old limits
1263         else {
1264             $limit .= " and " if $limit || $query;
1265             $limit      .= "$this_limit";
1266             $limit_cgi  .= "&limit=$this_limit";
1267             if ($this_limit =~ /^branch:(.+)/) {
1268                 my $branchcode = $1;
1269                 my $branchname = GetBranchName($branchcode);
1270                 if (defined $branchname) {
1271                     $limit_desc .= " branch:$branchname";
1272                 } else {
1273                     $limit_desc .= " $this_limit";
1274                 }
1275             } else {
1276                 $limit_desc .= " $this_limit";
1277             }
1278         }
1279     }
1280     if ($group_OR_limits) {
1281         $limit .= " and " if ( $query || $limit );
1282         $limit .= "($group_OR_limits)";
1283     }
1284     if ($availability_limit) {
1285         $limit .= " and " if ( $query || $limit );
1286         $limit .= "($availability_limit)";
1287     }
1288
1289     # Normalize the query and limit strings
1290     $query =~ s/:/=/g;
1291     $limit =~ s/:/=/g;
1292     for ( $query, $query_desc, $limit, $limit_desc ) {
1293         s/  / /g;    # remove extra spaces
1294         s/^ //g;     # remove any beginning spaces
1295         s/ $//g;     # remove any ending spaces
1296         s/==/=/g;    # remove double == from query
1297     }
1298     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1299
1300     for ($query_cgi,$simple_query) {
1301         s/"//g;
1302     }
1303     # append the limit to the query
1304     $query .= " " . $limit;
1305
1306     # Warnings if DEBUG
1307     if ($DEBUG) {
1308         warn "QUERY:" . $query;
1309         warn "QUERY CGI:" . $query_cgi;
1310         warn "QUERY DESC:" . $query_desc;
1311         warn "LIMIT:" . $limit;
1312         warn "LIMIT CGI:" . $limit_cgi;
1313         warn "LIMIT DESC:" . $limit_desc;
1314         warn "---------\nLeave buildQuery\n---------";
1315     }
1316     return (
1317         undef,              $query, $simple_query, $query_cgi,
1318         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1319         $stopwords_removed, $query_type
1320     );
1321 }
1322
1323 =head2 searchResults
1324
1325 Format results in a form suitable for passing to the template
1326
1327 =cut
1328
1329 # IMO this subroutine is pretty messy still -- it's responsible for
1330 # building the HTML output for the template
1331 sub searchResults {
1332     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1333     my $dbh = C4::Context->dbh;
1334     my @newresults;
1335
1336     #Build branchnames hash
1337     #find branchname
1338     #get branch information.....
1339     my %branches;
1340     my $bsth =$dbh->prepare("SELECT branchcode,branchname FROM branches"); # FIXME : use C4::Branch::GetBranches
1341     $bsth->execute();
1342     while ( my $bdata = $bsth->fetchrow_hashref ) {
1343         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1344     }
1345 # FIXME - We build an authorised values hash here, using the default framework
1346 # though it is possible to have different authvals for different fws.
1347
1348     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1349
1350     # get notforloan authorised value list (see $shelflocations  FIXME)
1351     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1352
1353     #Build itemtype hash
1354     #find itemtype & itemtype image
1355     my %itemtypes;
1356     $bsth =
1357       $dbh->prepare(
1358         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1359       );
1360     $bsth->execute();
1361     while ( my $bdata = $bsth->fetchrow_hashref ) {
1362                 foreach (qw(description imageurl summary notforloan)) {
1363                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1364                 }
1365     }
1366
1367     #search item field code
1368     my $sth =
1369       $dbh->prepare(
1370 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1371       );
1372     $sth->execute;
1373     my ($itemtag) = $sth->fetchrow;
1374
1375     ## find column names of items related to MARC
1376     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1377     $sth2->execute;
1378     my %subfieldstosearch;
1379     while ( ( my $column ) = $sth2->fetchrow ) {
1380         my ( $tagfield, $tagsubfield ) =
1381           &GetMarcFromKohaField( "items." . $column, "" );
1382         $subfieldstosearch{$column} = $tagsubfield;
1383     }
1384
1385     # handle which records to actually retrieve
1386     my $times;
1387     if ( $hits && $offset + $results_per_page <= $hits ) {
1388         $times = $offset + $results_per_page;
1389     }
1390     else {
1391         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1392     }
1393     my $marcflavour = C4::Context->preference("marcflavour");
1394
1395     # We get the biblionumber position in MARC
1396     my ($bibliotag,$bibliosubf)=GetMarcFromKohaField('biblio.biblionumber','');
1397     my $fw = '';
1398
1399     # loop through all of the records we've retrieved
1400     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1401         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1402                 my $biblionumber;
1403
1404         if(not $scan){
1405             if ($bibliotag<10){
1406                 $biblionumber = $marcrecord->field($bibliotag)->data;
1407             }else{
1408                 $biblionumber = $marcrecord->subfield($bibliotag,$bibliosubf);
1409             }
1410             $fw = GetFrameworkCode($biblionumber);
1411         }
1412
1413         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw );
1414         $oldbiblio->{subtitle} = GetRecordValue('subtitle', $marcrecord, $fw);
1415         $oldbiblio->{result_number} = $i + 1;
1416
1417         # add imageurl to itemtype if there is one
1418         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1419
1420         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1421         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1422         $aisbn =~ s/-//g;
1423         $oldbiblio->{amazonisbn} = $aisbn;
1424         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1425         $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1426         $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1427         $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1428         $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1429         $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1430         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1431         $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1432  # Build summary if there is one (the summary is defined in the itemtypes table)
1433  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1434         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1435             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1436             my @fields  = $marcrecord->fields();
1437             foreach my $field (@fields) {
1438                 my $tag      = $field->tag();
1439                 my $tagvalue = $field->as_string();
1440                 if (! utf8::is_utf8($tagvalue)) {
1441                     utf8::decode($tagvalue);
1442                 }
1443
1444                 $summary =~
1445                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1446                 unless ( $tag < 10 ) {
1447                     my @subf = $field->subfields;
1448                     for my $i ( 0 .. $#subf ) {
1449                         my $subfieldcode  = $subf[$i][0];
1450                         my $subfieldvalue = $subf[$i][1];
1451                         if (! utf8::is_utf8($subfieldvalue)) {
1452                             utf8::decode($subfieldvalue);
1453                         }
1454                         my $tagsubf       = $tag . $subfieldcode;
1455                         $summary =~
1456 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1457                     }
1458                 }
1459             }
1460             # FIXME: yuk
1461             $summary =~ s/\[(.*?)]//g;
1462             $summary =~ s/\n/<br\/>/g;
1463             $oldbiblio->{summary} = $summary;
1464         }
1465
1466         # Pull out the items fields
1467         my @fields = $marcrecord->field($itemtag);
1468
1469         # Setting item statuses for display
1470         my @available_items_loop;
1471         my @onloan_items_loop;
1472         my @notforloan_items_loop;
1473         my @other_items_loop;
1474
1475         my $available_items;
1476         my $onloan_items;
1477         my $notforloan_items;
1478         my $other_items;
1479
1480         my $ordered_count         = 0;
1481         my $available_count       = 0;
1482         my $onloan_count          = 0;
1483         my $notforloan_count      = 0;
1484         my $longoverdue_count     = 0;
1485         my $other_count           = 0;
1486         my $wthdrawn_count        = 0;
1487         my $itemlost_count        = 0;
1488         my $itembinding_count     = 0;
1489         my $itemdamaged_count     = 0;
1490         my $item_in_transit_count = 0;
1491         my $can_place_holds       = 0;
1492         my $items_count           = scalar(@fields);
1493         my $maxitems =
1494           ( C4::Context->preference('maxItemsinSearchResults') )
1495           ? C4::Context->preference('maxItemsinSearchResults') - 1
1496           : 1;
1497
1498         # loop through every item
1499         foreach my $field (@fields) {
1500             my $item;
1501
1502             # populate the items hash
1503             foreach my $code ( keys %subfieldstosearch ) {
1504                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1505             }
1506                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1507                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1508             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1509             if ($item->{$hbranch}) {
1510                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1511             }
1512             elsif ($item->{$otherbranch}) {     # Last resort
1513                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1514             }
1515
1516             ($item->{'reserved'}) = C4::Reserves::CheckReserves($item->{itemnumber});
1517
1518                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1519 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1520             if ( $item->{onloan} or $item->{reserved} ) {
1521                 $onloan_count++;
1522                                 my $key = $prefix . $item->{onloan} . $item->{barcode};
1523                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1524                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1525                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1526                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1527                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1528                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1529                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1530                 # if something's checked out and lost, mark it as 'long overdue'
1531                 if ( $item->{itemlost} ) {
1532                     $onloan_items->{$prefix}->{longoverdue}++;
1533                     $longoverdue_count++;
1534                 } else {        # can place holds as long as item isn't lost
1535                     $can_place_holds = 1;
1536                 }
1537             }
1538
1539          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1540             else {
1541
1542                 # item is on order
1543                 if ( $item->{notforloan} == -1 ) {
1544                     $ordered_count++;
1545                 }
1546
1547                 # is item in transit?
1548                 my $transfertwhen = '';
1549                 my ($transfertfrom, $transfertto);
1550
1551                 unless ($item->{wthdrawn}
1552                         || $item->{itemlost}
1553                         || $item->{damaged}
1554                         || $item->{notforloan}
1555                         || $items_count > 20) {
1556
1557                     # A couple heuristics to limit how many times
1558                     # we query the database for item transfer information, sacrificing
1559                     # accuracy in some cases for speed;
1560                     #
1561                     # 1. don't query if item has one of the other statuses
1562                     # 2. don't check transit status if the bib has
1563                     #    more than 20 items
1564                     #
1565                     # FIXME: to avoid having the query the database like this, and to make
1566                     #        the in transit status count as unavailable for search limiting,
1567                     #        should map transit status to record indexed in Zebra.
1568                     #
1569                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1570                 }
1571
1572                 # item is withdrawn, lost or damaged
1573                 if (   $item->{wthdrawn}
1574                     || $item->{itemlost}
1575                     || $item->{damaged}
1576                     || $item->{notforloan}
1577                     || $item->{reserved}
1578                     || ($transfertwhen ne ''))
1579                 {
1580                     $wthdrawn_count++        if $item->{wthdrawn};
1581                     $itemlost_count++        if $item->{itemlost};
1582                     $itemdamaged_count++     if $item->{damaged};
1583                     $item_in_transit_count++ if $transfertwhen ne '';
1584                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1585
1586                                         my $key = $prefix . $item->{status};
1587
1588                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1589                                             if($item->{notforloan} == 1){
1590                                                 $notforloan_items->{$key}->{$_} = $item->{$_};
1591                                             }else{
1592                            $other_items->{$key}->{$_} = $item->{$_};
1593                                             }
1594                                         }
1595                                         if($item->{notforloan} == 1){
1596                         $notforloan_count++;
1597
1598                         $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1599                                         $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1600                                         $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1601                                         $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1602                                         $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1603                                         $notforloan_items->{$key}->{barcode} = $item->{barcode};
1604                     }else{
1605                         $other_count++;
1606
1607                         $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1608                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1609                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1610                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1611                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1612                                         $other_items->{$key}->{barcode} = $item->{barcode};
1613                     }
1614
1615                 }
1616                 # item is available
1617                 else {
1618                     $can_place_holds = 1;
1619                     $available_count++;
1620                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1621                                         foreach (qw(branchname itemcallnumber barcode)) {
1622                         $available_items->{$prefix}->{$_} = $item->{$_};
1623                                         }
1624                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1625                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1626                 }
1627             }
1628         }    # notforloan, item level and biblioitem level
1629         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1630         $maxitems =
1631           ( C4::Context->preference('maxItemsinSearchResults') )
1632           ? C4::Context->preference('maxItemsinSearchResults') - 1
1633           : 1;
1634         for my $key ( sort keys %$onloan_items ) {
1635             (++$onloanitemscount > $maxitems) and last;
1636             push @onloan_items_loop, $onloan_items->{$key};
1637         }
1638         for my $key ( sort keys %$other_items ) {
1639             (++$otheritemscount > $maxitems) and last;
1640             push @other_items_loop, $other_items->{$key};
1641         }
1642         for my $key ( sort keys %$notforloan_items ) {
1643             (++$notforloanitemscount > $maxitems) and last;
1644             push @notforloan_items_loop, $notforloan_items->{$key};
1645         }
1646         for my $key ( sort keys %$available_items ) {
1647             (++$availableitemscount > $maxitems) and last;
1648             push @available_items_loop, $available_items->{$key}
1649         }
1650
1651         # XSLT processing of some stuff
1652         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1653             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1654                 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1655         }
1656
1657         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1658         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1659         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1660         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1661         $oldbiblio->{items_count}          = $items_count;
1662         $oldbiblio->{available_items_loop} = \@available_items_loop;
1663         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1664         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1665         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1666         $oldbiblio->{availablecount}       = $available_count;
1667         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1668         $oldbiblio->{onloancount}          = $onloan_count;
1669         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1670         $oldbiblio->{notforloancount}      = $notforloan_count;
1671         $oldbiblio->{othercount}           = $other_count;
1672         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1673         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1674         $oldbiblio->{itemlostcount}        = $itemlost_count;
1675         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1676         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1677         $oldbiblio->{orderedcount}         = $ordered_count;
1678         $oldbiblio->{isbn} =~
1679           s/-//g;    # deleting - in isbn to enable amazon content
1680         push( @newresults, $oldbiblio );
1681     }
1682     return @newresults;
1683 }
1684
1685 #----------------------------------------------------------------------
1686 #
1687 # Non-Zebra GetRecords#
1688 #----------------------------------------------------------------------
1689
1690 =head2 NZgetRecords
1691
1692   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1693
1694 =cut
1695
1696 sub NZgetRecords {
1697     my (
1698         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1699         $results_per_page, $offset,       $expanded_facet, $branches,
1700         $query_type,       $scan
1701     ) = @_;
1702     warn "query =$query" if $DEBUG;
1703     my $result = NZanalyse($query);
1704     warn "results =$result" if $DEBUG;
1705     return ( undef,
1706         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1707         undef );
1708 }
1709
1710 =head2 NZanalyse
1711
1712   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1713   the list is built from an inverted index in the nozebra SQL table
1714   note that title is here only for convenience : the sorting will be very fast when requested on title
1715   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1716
1717 =cut
1718
1719 sub NZanalyse {
1720     my ( $string, $server ) = @_;
1721 #     warn "---------"       if $DEBUG;
1722     warn " NZanalyse" if $DEBUG;
1723 #     warn "---------"       if $DEBUG;
1724
1725  # $server contains biblioserver or authorities, depending on what we search on.
1726  #warn "querying : $string on $server";
1727     $server = 'biblioserver' unless $server;
1728
1729 # if we have a ", replace the content to discard temporarily any and/or/not inside
1730     my $commacontent;
1731     if ( $string =~ /"/ ) {
1732         $string =~ s/"(.*?)"/__X__/;
1733         $commacontent = $1;
1734         warn "commacontent : $commacontent" if $DEBUG;
1735     }
1736
1737 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1738 # then, call again NZanalyse with $left and $right
1739 # (recursive until we find a leaf (=> something without and/or/not)
1740 # delete repeated operator... Would then go in infinite loop
1741     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1742     }
1743
1744     #process parenthesis before.
1745     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1746         my $left     = $1;
1747         my $right    = $4;
1748         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1749         warn
1750 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1751           if $DEBUG;
1752         my $leftresult = NZanalyse( $left, $server );
1753         if ($operator) {
1754             my $rightresult = NZanalyse( $right, $server );
1755
1756             # OK, we have the results for right and left part of the query
1757             # depending of operand, intersect, union or exclude both lists
1758             # to get a result list
1759             if ( $operator eq ' and ' ) {
1760                 return NZoperatorAND($leftresult,$rightresult);
1761             }
1762             elsif ( $operator eq ' or ' ) {
1763
1764                 # just merge the 2 strings
1765                 return $leftresult . $rightresult;
1766             }
1767             elsif ( $operator eq ' not ' ) {
1768                 return NZoperatorNOT($leftresult,$rightresult);
1769             }
1770         }
1771         else {
1772 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1773             return $leftresult;
1774         }
1775     }
1776     warn "string :" . $string if $DEBUG;
1777     my $left = "";
1778     my $right = "";
1779     my $operator = "";
1780     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1781         $left     = $1;
1782         $right    = $3;
1783         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1784     }
1785     warn "no parenthesis. left : $left operator: $operator right: $right"
1786       if $DEBUG;
1787
1788     # it's not a leaf, we have a and/or/not
1789     if ($operator) {
1790
1791         # reintroduce comma content if needed
1792         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1793         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1794         warn "node : $left / $operator / $right\n" if $DEBUG;
1795         my $leftresult  = NZanalyse( $left,  $server );
1796         my $rightresult = NZanalyse( $right, $server );
1797         warn " leftresult : $leftresult" if $DEBUG;
1798         warn " rightresult : $rightresult" if $DEBUG;
1799         # OK, we have the results for right and left part of the query
1800         # depending of operand, intersect, union or exclude both lists
1801         # to get a result list
1802         if ( $operator eq ' and ' ) {
1803             warn "NZAND";
1804             return NZoperatorAND($leftresult,$rightresult);
1805         }
1806         elsif ( $operator eq ' or ' ) {
1807
1808             # just merge the 2 strings
1809             return $leftresult . $rightresult;
1810         }
1811         elsif ( $operator eq ' not ' ) {
1812             return NZoperatorNOT($leftresult,$rightresult);
1813         }
1814         else {
1815
1816 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1817             die "error : operand unknown : $operator for $string";
1818         }
1819
1820         # it's a leaf, do the real SQL query and return the result
1821     }
1822     else {
1823         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1824         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1825         #remove trailing blank at the beginning
1826         $string =~ s/^ //g;
1827         warn "leaf:$string" if $DEBUG;
1828
1829         # parse the string in in operator/operand/value again
1830         my $left = "";
1831         my $operator = "";
1832         my $right = "";
1833         if ($string =~ /(.*)(>=|<=)(.*)/) {
1834             $left     = $1;
1835             $operator = $2;
1836             $right    = $3;
1837         } else {
1838             $left = $string;
1839         }
1840 #         warn "handling leaf... left:$left operator:$operator right:$right"
1841 #           if $DEBUG;
1842         unless ($operator) {
1843             if ($string =~ /(.*)(>|<|=)(.*)/) {
1844                 $left     = $1;
1845                 $operator = $2;
1846                 $right    = $3;
1847                 warn
1848     "handling unless (operator)... left:$left operator:$operator right:$right"
1849                 if $DEBUG;
1850             } else {
1851                 $left = $string;
1852             }
1853         }
1854         my $results;
1855
1856 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1857         $left =~ s/ .*$//;
1858
1859         # automatic replace for short operators
1860         $left = 'title'            if $left =~ '^ti$';
1861         $left = 'author'           if $left =~ '^au$';
1862         $left = 'publisher'        if $left =~ '^pb$';
1863         $left = 'subject'          if $left =~ '^su$';
1864         $left = 'koha-Auth-Number' if $left =~ '^an$';
1865         $left = 'keyword'          if $left =~ '^kw$';
1866         $left = 'itemtype'         if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1867         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1868         my $dbh = C4::Context->dbh;
1869         if ( $operator && $left ne 'keyword' ) {
1870             #do a specific search
1871             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1872             my $sth = $dbh->prepare(
1873 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1874             );
1875             warn "$left / $operator / $right\n" if $DEBUG;
1876
1877             # split each word, query the DB and build the biblionumbers result
1878             #sanitizing leftpart
1879             $left =~ s/^\s+|\s+$//;
1880             foreach ( split / /, $right ) {
1881                 my $biblionumbers;
1882                 $_ =~ s/^\s+|\s+$//;
1883                 next unless $_;
1884                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1885                 $sth->execute( $server, $left, $_ )
1886                   or warn "execute failed: $!";
1887                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1888
1889 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1890 # otherwise, fill the result
1891                     $biblionumbers .= $line
1892                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1893                     warn "result : $value "
1894                       . ( $right  =~ /\d/ ) . "=="
1895                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1896                 }
1897
1898 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1899                 if ($results) {
1900                     warn "NZAND" if $DEBUG;
1901                     $results = NZoperatorAND($biblionumbers,$results);
1902                 } else {
1903                     $results = $biblionumbers;
1904                 }
1905             }
1906         }
1907         else {
1908       #do a complete search (all indexes), if index='kw' do complete search too.
1909             my $sth = $dbh->prepare(
1910 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1911             );
1912
1913             # split each word, query the DB and build the biblionumbers result
1914             foreach ( split / /, $string ) {
1915                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1916                 warn "search on all indexes on $_" if $DEBUG;
1917                 my $biblionumbers;
1918                 next unless $_;
1919                 $sth->execute( $server, $_ );
1920                 while ( my $line = $sth->fetchrow ) {
1921                     $biblionumbers .= $line;
1922                 }
1923
1924 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1925                 if ($results) {
1926                     $results = NZoperatorAND($biblionumbers,$results);
1927                 }
1928                 else {
1929                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1930                     $results = $biblionumbers;
1931                 }
1932             }
1933         }
1934         warn "return : $results for LEAF : $string" if $DEBUG;
1935         return $results;
1936     }
1937     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1938 }
1939
1940 sub NZoperatorAND{
1941     my ($rightresult, $leftresult)=@_;
1942
1943     my @leftresult = split /;/, $leftresult;
1944     warn " @leftresult / $rightresult \n" if $DEBUG;
1945
1946     #             my @rightresult = split /;/,$leftresult;
1947     my $finalresult;
1948
1949 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1950 # the result is stored twice, to have the same weight for AND than OR.
1951 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1952 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1953     foreach (@leftresult) {
1954         my $value = $_;
1955         my $countvalue;
1956         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1957         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1958             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1959             $finalresult .=
1960                 "$value-$countvalue;$value-$countvalue;";
1961         }
1962     }
1963     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1964     return $finalresult;
1965 }
1966
1967 sub NZoperatorOR{
1968     my ($rightresult, $leftresult)=@_;
1969     return $rightresult.$leftresult;
1970 }
1971
1972 sub NZoperatorNOT{
1973     my ($leftresult, $rightresult)=@_;
1974
1975     my @leftresult = split /;/, $leftresult;
1976
1977     #             my @rightresult = split /;/,$leftresult;
1978     my $finalresult;
1979     foreach (@leftresult) {
1980         my $value=$_;
1981         $value=$1 if $value=~m/(.*)-\d+$/;
1982         unless ($rightresult =~ "$value-") {
1983             $finalresult .= "$_;";
1984         }
1985     }
1986     return $finalresult;
1987 }
1988
1989 =head2 NZorder
1990
1991   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1992
1993   TODO :: Description
1994
1995 =cut
1996
1997 sub NZorder {
1998     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1999     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
2000
2001     # order title asc by default
2002     #     $ordering = '1=36 <i' unless $ordering;
2003     $results_per_page = 20 unless $results_per_page;
2004     $offset           = 0  unless $offset;
2005     my $dbh = C4::Context->dbh;
2006
2007     #
2008     # order by POPULARITY
2009     #
2010     if ( $ordering =~ /popularity/ ) {
2011         my %result;
2012         my %popularity;
2013
2014         # popularity is not in MARC record, it's builded from a specific query
2015         my $sth =
2016           $dbh->prepare("select sum(issues) from items where biblionumber=?");
2017         foreach ( split /;/, $biblionumbers ) {
2018             my ( $biblionumber, $title ) = split /,/, $_;
2019             $result{$biblionumber} = GetMarcBiblio($biblionumber);
2020             $sth->execute($biblionumber);
2021             my $popularity = $sth->fetchrow || 0;
2022
2023 # hint : the key is popularity.title because we can have
2024 # many results with the same popularity. In this case, sub-ordering is done by title
2025 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
2026 # (un-frequent, I agree, but we won't forget anything that way ;-)
2027             $popularity{ sprintf( "%10d", $popularity ) . $title
2028                   . $biblionumber } = $biblionumber;
2029         }
2030
2031     # sort the hash and return the same structure as GetRecords (Zebra querying)
2032         my $result_hash;
2033         my $numbers = 0;
2034         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
2035             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
2036                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2037                   $result{ $popularity{$key} }->as_usmarc();
2038             }
2039         }
2040         else {                                    # sort popularity ASC
2041             foreach my $key ( sort ( keys %popularity ) ) {
2042                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2043                   $result{ $popularity{$key} }->as_usmarc();
2044             }
2045         }
2046         my $finalresult = ();
2047         $result_hash->{'hits'}         = $numbers;
2048         $finalresult->{'biblioserver'} = $result_hash;
2049         return $finalresult;
2050
2051         #
2052         # ORDER BY author
2053         #
2054     }
2055     elsif ( $ordering =~ /author/ ) {
2056         my %result;
2057         foreach ( split /;/, $biblionumbers ) {
2058             my ( $biblionumber, $title ) = split /,/, $_;
2059             my $record = GetMarcBiblio($biblionumber);
2060             my $author;
2061             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
2062                 $author = $record->subfield( '200', 'f' );
2063                 $author = $record->subfield( '700', 'a' ) unless $author;
2064             }
2065             else {
2066                 $author = $record->subfield( '100', 'a' );
2067             }
2068
2069 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2070 # and we don't want to get only 1 result for each of them !!!
2071             $result{ $author . $biblionumber } = $record;
2072         }
2073
2074     # sort the hash and return the same structure as GetRecords (Zebra querying)
2075         my $result_hash;
2076         my $numbers = 0;
2077         if ( $ordering eq 'author_za' ) {    # sort by author desc
2078             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2079                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2080                   $result{$key}->as_usmarc();
2081             }
2082         }
2083         else {                               # sort by author ASC
2084             foreach my $key ( sort ( keys %result ) ) {
2085                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2086                   $result{$key}->as_usmarc();
2087             }
2088         }
2089         my $finalresult = ();
2090         $result_hash->{'hits'}         = $numbers;
2091         $finalresult->{'biblioserver'} = $result_hash;
2092         return $finalresult;
2093
2094         #
2095         # ORDER BY callnumber
2096         #
2097     }
2098     elsif ( $ordering =~ /callnumber/ ) {
2099         my %result;
2100         foreach ( split /;/, $biblionumbers ) {
2101             my ( $biblionumber, $title ) = split /,/, $_;
2102             my $record = GetMarcBiblio($biblionumber);
2103             my $callnumber;
2104             my ( $callnumber_tag, $callnumber_subfield ) =
2105               GetMarcFromKohaField( 'items.itemcallnumber','' );
2106             ( $callnumber_tag, $callnumber_subfield ) =
2107               GetMarcFromKohaField('biblioitems.callnumber','')
2108               unless $callnumber_tag;
2109             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
2110                 $callnumber = $record->subfield( '200', 'f' );
2111             }
2112             else {
2113                 $callnumber = $record->subfield( '100', 'a' );
2114             }
2115
2116 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2117 # and we don't want to get only 1 result for each of them !!!
2118             $result{ $callnumber . $biblionumber } = $record;
2119         }
2120
2121     # sort the hash and return the same structure as GetRecords (Zebra querying)
2122         my $result_hash;
2123         my $numbers = 0;
2124         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
2125             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2126                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2127                   $result{$key}->as_usmarc();
2128             }
2129         }
2130         else {                                     # sort by title ASC
2131             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
2132                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2133                   $result{$key}->as_usmarc();
2134             }
2135         }
2136         my $finalresult = ();
2137         $result_hash->{'hits'}         = $numbers;
2138         $finalresult->{'biblioserver'} = $result_hash;
2139         return $finalresult;
2140     }
2141     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2142         my %result;
2143         foreach ( split /;/, $biblionumbers ) {
2144             my ( $biblionumber, $title ) = split /,/, $_;
2145             my $record = GetMarcBiblio($biblionumber);
2146             my ( $publicationyear_tag, $publicationyear_subfield ) =
2147               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2148             my $publicationyear =
2149               $record->subfield( $publicationyear_tag,
2150                 $publicationyear_subfield );
2151
2152 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2153 # and we don't want to get only 1 result for each of them !!!
2154             $result{ $publicationyear . $biblionumber } = $record;
2155         }
2156
2157     # sort the hash and return the same structure as GetRecords (Zebra querying)
2158         my $result_hash;
2159         my $numbers = 0;
2160         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2161             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2162                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2163                   $result{$key}->as_usmarc();
2164             }
2165         }
2166         else {                                 # sort by pub year ASC
2167             foreach my $key ( sort ( keys %result ) ) {
2168                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2169                   $result{$key}->as_usmarc();
2170             }
2171         }
2172         my $finalresult = ();
2173         $result_hash->{'hits'}         = $numbers;
2174         $finalresult->{'biblioserver'} = $result_hash;
2175         return $finalresult;
2176
2177         #
2178         # ORDER BY title
2179         #
2180     }
2181     elsif ( $ordering =~ /title/ ) {
2182
2183 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2184         my %result;
2185         foreach ( split /;/, $biblionumbers ) {
2186             my ( $biblionumber, $title ) = split /,/, $_;
2187
2188 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2189 # and we don't want to get only 1 result for each of them !!!
2190 # hint & speed improvement : we can order without reading the record
2191 # so order, and read records only for the requested page !
2192             $result{ $title . $biblionumber } = $biblionumber;
2193         }
2194
2195     # sort the hash and return the same structure as GetRecords (Zebra querying)
2196         my $result_hash;
2197         my $numbers = 0;
2198         if ( $ordering eq 'title_az' ) {    # sort by title desc
2199             foreach my $key ( sort ( keys %result ) ) {
2200                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2201             }
2202         }
2203         else {                              # sort by title ASC
2204             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2205                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2206             }
2207         }
2208
2209         # limit the $results_per_page to result size if it's more
2210         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2211
2212         # for the requested page, replace biblionumber by the complete record
2213         # speed improvement : avoid reading too much things
2214         for (
2215             my $counter = $offset ;
2216             $counter <= $offset + $results_per_page ;
2217             $counter++
2218           )
2219         {
2220             $result_hash->{'RECORDS'}[$counter] =
2221               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2222         }
2223         my $finalresult = ();
2224         $result_hash->{'hits'}         = $numbers;
2225         $finalresult->{'biblioserver'} = $result_hash;
2226         return $finalresult;
2227     }
2228     else {
2229
2230 #
2231 # order by ranking
2232 #
2233 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2234         my %result;
2235         my %count_ranking;
2236         foreach ( split /;/, $biblionumbers ) {
2237             my ( $biblionumber, $title ) = split /,/, $_;
2238             $title =~ /(.*)-(\d)/;
2239
2240             # get weight
2241             my $ranking = $2;
2242
2243 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2244 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2245 # biblio N has ranking = 6
2246             $count_ranking{$biblionumber} += $ranking;
2247         }
2248
2249 # build the result by "inverting" the count_ranking hash
2250 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2251 #         warn "counting";
2252         foreach ( keys %count_ranking ) {
2253             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2254         }
2255
2256     # sort the hash and return the same structure as GetRecords (Zebra querying)
2257         my $result_hash;
2258         my $numbers = 0;
2259         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2260             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2261         }
2262
2263         # limit the $results_per_page to result size if it's more
2264         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2265
2266         # for the requested page, replace biblionumber by the complete record
2267         # speed improvement : avoid reading too much things
2268         for (
2269             my $counter = $offset ;
2270             $counter <= $offset + $results_per_page ;
2271             $counter++
2272           )
2273         {
2274             $result_hash->{'RECORDS'}[$counter] =
2275               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2276               if $result_hash->{'RECORDS'}[$counter];
2277         }
2278         my $finalresult = ();
2279         $result_hash->{'hits'}         = $numbers;
2280         $finalresult->{'biblioserver'} = $result_hash;
2281         return $finalresult;
2282     }
2283 }
2284
2285 =head2 enabled_staff_search_views
2286
2287 %hash = enabled_staff_search_views()
2288
2289 This function returns a hash that contains three flags obtained from the system
2290 preferences, used to determine whether a particular staff search results view
2291 is enabled.
2292
2293 =over 2
2294
2295 =item C<Output arg:>
2296
2297     * $hash{can_view_MARC} is true only if the MARC view is enabled
2298     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2299     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2300
2301 =item C<usage in the script:>
2302
2303 =back
2304
2305 $template->param ( C4::Search::enabled_staff_search_views );
2306
2307 =cut
2308
2309 sub enabled_staff_search_views
2310 {
2311         return (
2312                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2313                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2314                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2315         );
2316 }
2317
2318
2319 =head2 z3950_search_args
2320
2321 $arrayref = z3950_search_args($matchpoints)
2322
2323 This function returns an array reference that contains the search parameters to be
2324 passed to the Z39.50 search script (z3950_search.pl). The array elements
2325 are hash refs whose keys are name, value and encvalue, and whose values are the
2326 name of a search parameter, the value of that search parameter and the URL encoded
2327 value of that parameter.
2328
2329 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2330
2331 The search parameter values are obtained from the bibliographic record whose
2332 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2333
2334 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2335 a general purpose search argument. In this case, the returned array contains only
2336 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2337
2338 If a search parameter value is undefined or empty, it is not included in the returned
2339 array.
2340
2341 The returned array reference may be passed directly to the template parameters.
2342
2343 =over 2
2344
2345 =item C<Output arg:>
2346
2347     * $array containing hash refs as described above
2348
2349 =item C<usage in the script:>
2350
2351 =back
2352
2353 $data = Biblio::GetBiblioData($bibno);
2354 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2355
2356 *OR*
2357
2358 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2359
2360 =cut
2361
2362 sub z3950_search_args {
2363     my $bibrec = shift;
2364     $bibrec = { title => $bibrec } if !ref $bibrec;
2365     my $array = [];
2366     for my $field (qw/ lccn isbn issn title author dewey subject /)
2367     {
2368         my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2369         push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2370     }
2371     return $array;
2372 }
2373
2374
2375 END { }    # module clean-up code here (global destructor)
2376
2377 1;
2378 __END__
2379
2380 =head1 AUTHOR
2381
2382 Koha Developement team <info@koha.org>
2383
2384 =cut