C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField, GetBiblioData
  23 use C4::Koha;      # getFacets
  24 use C4::Search::PazPar2;
  25 use XML::Simple;
  26 use C4::Dates qw(format_date);
  27 use C4::XSLT;
  28 use C4::Branch;
  29 use URI::Escape;
  30
  31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  32
  33 # set the version for version checking
  34 BEGIN {
  35     $VERSION = 3.01;
  36     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  37 }
  38
  39 =head1 NAME
  40
  41 C4::Search - Functions for searching the Koha catalog.
  42
  43 =head1 SYNOPSIS
  44
  45 See opac/opac-search.pl or catalogue/search.pl for example of usage
  46
  47 =head1 DESCRIPTION
  48
  49 This module provides searching functions for Koha's bibliographic databases
  50
  51 =head1 FUNCTIONS
  52
  53 =cut
  54
  55 @ISA    = qw(Exporter);
  56 @EXPORT = qw(
  57   &FindDuplicate
  58   &SimpleSearch
  59   &GetDistinctValues
  60   &searchResults
  61   &getRecords
  62   &buildQuery
  63   &NZgetRecords
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 GetDistinctValues($field);
  69
  70 C<$field> is a reference to the fields array
  71
  72 =cut
  73
  74 sub GetDistinctValues {
  75     my ($fieldname,$string)=@_;
  76     # returns a reference to a hash of references to branches...
  77     if ($fieldname=~/\./){
  78                         my ($table,$column)=split /\./, $fieldname;
  79                         my $dbh = C4::Context->dbh;
  80                         warn "select DISTINCT($column) as value, count(*) as cnt from $table group by lib order by $column ";
  81                         my $sth = $dbh->prepare("select DISTINCT($column) as value, count(*) as cnt from $table ".($string?" where $column like \"$string%\"":"")."group by value order by $column ");
  82                         $sth->execute;
  83                         my $elements=$sth->fetchall_arrayref({});
  84                         return $elements;
  85    }
  86    else {
  87                 $string||= qq("");
  88                 my @servers=qw<biblioserver authorityserver>;
  89                 my (@zconns,@results);
  90         for ( my $i = 0 ; $i < @servers ; $i++ ) {
  91                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
  92                         $results[$i] =
  93                       $zconns[$i]->scan(
  94                         ZOOM::Query::CCL2RPN->new( qq"$fieldname $string", $zconns[$i])
  95                       );
  96                 }
  97                 # The big moment: asynchronously retrieve results from all servers
  98                 my @elements;
  99                 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 100                         my $ev = $zconns[ $i - 1 ]->last_event();
 101                         if ( $ev == ZOOM::Event::ZEND ) {
 102                                 next unless $results[ $i - 1 ];
 103                                 my $size = $results[ $i - 1 ]->size();
 104                                 if ( $size > 0 ) {
 105                       for (my $j=0;$j<$size;$j++){
 106                                                 my %hashscan;
 107                                                 @hashscan{qw(value cnt)}=$results[ $i - 1 ]->display_term($j);
 108                                                 push @elements, \%hashscan;
 109                                           }
 110                                 }
 111                         }
 112                 }
 113                 return \@elements;
 114    }
 115 }
 116
 117 =head2 FindDuplicate
 118
 119 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
 120
 121 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
 122
 123 =cut
 124
 125 sub FindDuplicate {
 126     my ($record) = @_;
 127     my $dbh = C4::Context->dbh;
 128     my $result = TransformMarcToKoha( $dbh, $record, '' );
 129     my $sth;
 130     my $query;
 131     my $search;
 132     my $type;
 133     my ( $biblionumber, $title );
 134
 135     # search duplicate on ISBN, easy and fast..
 136     # ... normalize first
 137     if ( $result->{isbn} ) {
 138         $result->{isbn} =~ s/\(.*$//;
 139         $result->{isbn} =~ s/\s+$//;
 140         $query = "isbn=$result->{isbn}";
 141     }
 142     else {
 143         $result->{title} =~ s /\\//g;
 144         $result->{title} =~ s /\"//g;
 145         $result->{title} =~ s /\(//g;
 146         $result->{title} =~ s /\)//g;
 147
 148         # FIXME: instead of removing operators, could just do
 149         # quotes around the value
 150         $result->{title} =~ s/(and|or|not)//g;
 151         $query = "ti,ext=$result->{title}";
 152         $query .= " and itemtype=$result->{itemtype}"
 153           if ( $result->{itemtype} );
 154         if   ( $result->{author} ) {
 155             $result->{author} =~ s /\\//g;
 156             $result->{author} =~ s /\"//g;
 157             $result->{author} =~ s /\(//g;
 158             $result->{author} =~ s /\)//g;
 159
 160             # remove valid operators
 161             $result->{author} =~ s/(and|or|not)//g;
 162             $query .= " and au,ext=$result->{author}";
 163         }
 164     }
 165
 166     # FIXME: add error handling
 167     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 168     my @results;
 169     foreach my $possible_duplicate_record (@$searchresults) {
 170         my $marcrecord =
 171           MARC::Record->new_from_usmarc($possible_duplicate_record);
 172         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 173
 174         # FIXME :: why 2 $biblionumber ?
 175         if ($result) {
 176             push @results, $result->{'biblionumber'};
 177             push @results, $result->{'title'};
 178         }
 179     }
 180     return @results;
 181 }
 182
 183 =head2 SimpleSearch
 184
 185 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 186
 187 This function provides a simple search API on the bibliographic catalog
 188
 189 =over 2
 190
 191 =item C<input arg:>
 192
 193     * $query can be a simple keyword or a complete CCL query
 194     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 195     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 196     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 197
 198
 199 =item C<Output:>
 200
 201     * $error is a empty unless an error is detected
 202     * \@results is an array of records.
 203     * $total_hits is the number of hits that would have been returned with no limit
 204
 205 =item C<usage in the script:>
 206
 207 =back
 208
 209 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 210
 211 if (defined $error) {
 212     $template->param(query_error => $error);
 213     warn "error: ".$error;
 214     output_html_with_http_headers $input, $cookie, $template->output;
 215     exit;
 216 }
 217
 218 my $hits = scalar @$marcresults;
 219 my @results;
 220
 221 for my $i (0..$hits) {
 222     my %resultsloop;
 223     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 224     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 225
 226     #build the hash for the template.
 227     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 228     $resultsloop{title}           = $biblio->{'title'};
 229     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 230     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 231     $resultsloop{author}          = $biblio->{'author'};
 232     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 233     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 234
 235     push @results, \%resultsloop;
 236 }
 237
 238 $template->param(result=>\@results);
 239
 240 =cut
 241
 242 sub SimpleSearch {
 243     my ( $query, $offset, $max_results, $servers )  = @_;
 244
 245     if ( C4::Context->preference('NoZebra') ) {
 246         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 247         my $search_result =
 248           (      $result->{hits}
 249               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 250         return ( undef, $search_result, scalar($result->{hits}) );
 251     }
 252     else {
 253         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 254         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 255         my @results;
 256         my @zoom_queries;
 257         my @tmpresults;
 258         my @zconns;
 259         my $total_hits;
 260         return ( "No query entered", undef, undef ) unless $query;
 261
 262         # Initialize & Search Zebra
 263         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 264             eval {
 265                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 266                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 267                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 268
 269                 # error handling
 270                 my $error =
 271                     $zconns[$i]->errmsg() . " ("
 272                   . $zconns[$i]->errcode() . ") "
 273                   . $zconns[$i]->addinfo() . " "
 274                   . $zconns[$i]->diagset();
 275
 276                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 277             };
 278             if ($@) {
 279
 280                 # caught a ZOOM::Exception
 281                 my $error =
 282                     $@->message() . " ("
 283                   . $@->code() . ") "
 284                   . $@->addinfo() . " "
 285                   . $@->diagset();
 286                 warn $error;
 287                 return ( $error, undef, undef );
 288             }
 289         }
 290         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 291             my $event = $zconns[ $i - 1 ]->last_event();
 292             if ( $event == ZOOM::Event::ZEND ) {
 293
 294                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 295                 my $hits = $tmpresults[ $i - 1 ]->size();
 296                 $total_hits += $hits;
 297                 my $last_record = $hits;
 298                 if ( defined $max_results && $offset + $max_results < $hits ) {
 299                     $last_record  = $offset + $max_results;
 300                 }
 301
 302                 for my $j ( $first_record..$last_record ) {
 303                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 304                     push @results, $record;
 305                 }
 306             }
 307         }
 308
 309         foreach my $result (@tmpresults) {
 310             $result->destroy();
 311         }
 312         foreach my $zoom_query (@zoom_queries) {
 313             $zoom_query->destroy();
 314         }
 315
 316         return ( undef, \@results, $total_hits );
 317     }
 318 }
 319
 320 =head2 getRecords
 321
 322 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 323
 324         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 325         $results_per_page, $offset,       $expanded_facet, $branches,
 326         $query_type,       $scan
 327     );
 328
 329 The all singing, all dancing, multi-server, asynchronous, scanning,
 330 searching, record nabbing, facet-building
 331
 332 See verbse embedded documentation.
 333
 334 =cut
 335
 336 sub getRecords {
 337     my (
 338         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 339         $results_per_page, $offset,       $expanded_facet, $branches,
 340         $query_type,       $scan
 341     ) = @_;
 342
 343     my @servers = @$servers_ref;
 344     my @sort_by = @$sort_by_ref;
 345
 346     # Initialize variables for the ZOOM connection and results object
 347     my $zconn;
 348     my @zconns;
 349     my @results;
 350     my $results_hashref = ();
 351
 352     # Initialize variables for the faceted results objects
 353     my $facets_counter = ();
 354     my $facets_info    = ();
 355     my $facets         = getFacets();
 356
 357     my @facets_loop;    # stores the ref to array of hashes for template facets loop
 358
 359     ### LOOP THROUGH THE SERVERS
 360     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 361         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 362
 363 # perform the search, create the results objects
 364 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 365         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 366
 367         #$query_to_use = $simple_query if $scan;
 368         warn $simple_query if ( $scan and $DEBUG );
 369
 370         # Check if we've got a query_type defined, if so, use it
 371         eval {
 372             if ($query_type)
 373             {
 374                 if ( $query_type =~ /^ccl/ ) {
 375                     $query_to_use =~
 376                       s/\:/\=/g;    # change : to = last minute (FIXME)
 377                     $results[$i] =
 378                       $zconns[$i]->search(
 379                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 380                       );
 381                 }
 382                 elsif ( $query_type =~ /^cql/ ) {
 383                     $results[$i] =
 384                       $zconns[$i]->search(
 385                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 386                 }
 387                 elsif ( $query_type =~ /^pqf/ ) {
 388                     $results[$i] =
 389                       $zconns[$i]->search(
 390                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 391                 }
 392             }
 393             else {
 394                 if ($scan) {
 395                     $results[$i] =
 396                       $zconns[$i]->scan(
 397                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 398                       );
 399                 }
 400                 else {
 401                     $results[$i] =
 402                       $zconns[$i]->search(
 403                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 404                       );
 405                 }
 406             }
 407         };
 408         if ($@) {
 409             warn "WARNING: query problem with $query_to_use " . $@;
 410         }
 411
 412         # Concatenate the sort_by limits and pass them to the results object
 413         # Note: sort will override rank
 414         my $sort_by;
 415         foreach my $sort (@sort_by) {
 416             if ( $sort eq "author_az" ) {
 417                 $sort_by .= "1=1003 <i ";
 418             }
 419             elsif ( $sort eq "author_za" ) {
 420                 $sort_by .= "1=1003 >i ";
 421             }
 422             elsif ( $sort eq "popularity_asc" ) {
 423                 $sort_by .= "1=9003 <i ";
 424             }
 425             elsif ( $sort eq "popularity_dsc" ) {
 426                 $sort_by .= "1=9003 >i ";
 427             }
 428             elsif ( $sort eq "call_number_asc" ) {
 429                 $sort_by .= "1=20  <i ";
 430             }
 431             elsif ( $sort eq "call_number_dsc" ) {
 432                 $sort_by .= "1=20 >i ";
 433             }
 434             elsif ( $sort eq "pubdate_asc" ) {
 435                 $sort_by .= "1=31 <i ";
 436             }
 437             elsif ( $sort eq "pubdate_dsc" ) {
 438                 $sort_by .= "1=31 >i ";
 439             }
 440             elsif ( $sort eq "acqdate_asc" ) {
 441                 $sort_by .= "1=32 <i ";
 442             }
 443             elsif ( $sort eq "acqdate_dsc" ) {
 444                 $sort_by .= "1=32 >i ";
 445             }
 446             elsif ( $sort eq "title_az" ) {
 447                 $sort_by .= "1=4 <i ";
 448             }
 449             elsif ( $sort eq "title_za" ) {
 450                 $sort_by .= "1=4 >i ";
 451             }
 452             else {
 453                 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
 454             }
 455         }
 456         if ($sort_by) {
 457             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 458                 warn "WARNING sort $sort_by failed";
 459             }
 460         }
 461     }    # finished looping through servers
 462
 463     # The big moment: asynchronously retrieve results from all servers
 464     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 465         my $ev = $zconns[ $i - 1 ]->last_event();
 466         if ( $ev == ZOOM::Event::ZEND ) {
 467             next unless $results[ $i - 1 ];
 468             my $size = $results[ $i - 1 ]->size();
 469             if ( $size > 0 ) {
 470                 my $results_hash;
 471
 472                 # loop through the results
 473                 $results_hash->{'hits'} = $size;
 474                 my $times;
 475                 if ( $offset + $results_per_page <= $size ) {
 476                     $times = $offset + $results_per_page;
 477                 }
 478                 else {
 479                     $times = $size;
 480                 }
 481                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 482                     my $records_hash;
 483                     my $record;
 484                     my $facet_record;
 485
 486                     ## Check if it's an index scan
 487                     if ($scan) {
 488                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 489
 490                  # here we create a minimal MARC record and hand it off to the
 491                  # template just like a normal result ... perhaps not ideal, but
 492                  # it works for now
 493                         my $tmprecord = MARC::Record->new();
 494                         $tmprecord->encoding('UTF-8');
 495                         my $tmptitle;
 496                         my $tmpauthor;
 497
 498                 # the minimal record in author/title (depending on MARC flavour)
 499                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 500                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 501                             $tmprecord->append_fields($tmptitle);
 502                         } else {
 503                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 504                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 505                             $tmprecord->append_fields($tmptitle);
 506                             $tmprecord->append_fields($tmpauthor);
 507                         }
 508                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 509                     }
 510
 511                     # not an index scan
 512                     else {
 513                         $record = $results[ $i - 1 ]->record($j)->raw();
 514
 515                         # warn "RECORD $j:".$record;
 516                         $results_hash->{'RECORDS'}[$j] = $record;
 517
 518             # Fill the facets while we're looping, but only for the biblioserver
 519                         $facet_record = MARC::Record->new_from_usmarc($record)
 520                           if $servers[ $i - 1 ] =~ /biblioserver/;
 521
 522                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 523                         if ($facet_record) {
 524                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 525                                 ($facets->[$k]) or next;
 526                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 527                                 for my $field (@fields) {
 528                                     my @subfields = $field->subfields();
 529                                     for my $subfield (@subfields) {
 530                                         my ( $code, $data ) = @$subfield;
 531                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 532                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 533                                     }
 534                                 }
 535                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 536                                     $facets->[$k]->{'label_value'};
 537                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 538                                     $facets->[$k]->{'expanded'};
 539                             }
 540                         }
 541                     }
 542                 }
 543                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 544             }
 545
 546             # warn "connection ", $i-1, ": $size hits";
 547             # warn $results[$i-1]->record(0)->render() if $size > 0;
 548
 549             # BUILD FACETS
 550             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 551                 for my $link_value (
 552                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 553                     keys %$facets_counter )
 554                 {
 555                     my $expandable;
 556                     my $number_of_facets;
 557                     my @this_facets_array;
 558                     for my $one_facet (
 559                         sort {
 560                             $facets_counter->{$link_value}
 561                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 562                         } keys %{ $facets_counter->{$link_value} }
 563                       )
 564                     {
 565                         $number_of_facets++;
 566                         if (   ( $number_of_facets < 6 )
 567                             || ( $expanded_facet eq $link_value )
 568                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 569                         {
 570
 571                       # Sanitize the link value ), ( will cause errors with CCL,
 572                             my $facet_link_value = $one_facet;
 573                             $facet_link_value =~ s/(\(|\))/ /g;
 574
 575                             # fix the length that will display in the label,
 576                             my $facet_label_value = $one_facet;
 577                             $facet_label_value =
 578                               substr( $one_facet, 0, 20 ) . "..."
 579                               unless length($facet_label_value) <= 20;
 580
 581                             # if it's a branch, label by the name, not the code,
 582                             if ( $link_value =~ /branch/ ) {
 583                                 $facet_label_value =
 584                                   $branches->{$one_facet}->{'branchname'};
 585                             }
 586
 587                 # but we're down with the whole label being in the link's title.
 588                             my $facet_title_value = $one_facet;
 589
 590                             push @this_facets_array,
 591                               (
 592                                 {
 593                                     facet_count =>
 594                                       $facets_counter->{$link_value}
 595                                       ->{$one_facet},
 596                                     facet_label_value => $facet_label_value,
 597                                     facet_title_value => $facet_title_value,
 598                                     facet_link_value  => $facet_link_value,
 599                                     type_link_value   => $link_value,
 600                                 },
 601                               );
 602                         }
 603                     }
 604
 605                     # handle expanded option
 606                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 607                         $expandable = 1
 608                           if ( ( $number_of_facets > 6 )
 609                             && ( $expanded_facet ne $link_value ) );
 610                     }
 611                     push @facets_loop,
 612                       (
 613                         {
 614                             type_link_value => $link_value,
 615                             type_id         => $link_value . "_id",
 616                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 617                             facets     => \@this_facets_array,
 618                             expandable => $expandable,
 619                             expand     => $link_value,
 620                         }
 621                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 622                 }
 623             }
 624         }
 625     }
 626     return ( undef, $results_hashref, \@facets_loop );
 627 }
 628
 629 sub pazGetRecords {
 630     my (
 631         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 632         $results_per_page, $offset,       $expanded_facet, $branches,
 633         $query_type,       $scan
 634     ) = @_;
 635
 636     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 637     $paz->init();
 638     $paz->search($simple_query);
 639     sleep 1;   # FIXME: WHY?
 640
 641     # do results
 642     my $results_hashref = {};
 643     my $stats = XMLin($paz->stat);
 644     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 645
 646     # for a grouped search result, the number of hits
 647     # is the number of groups returned; 'bib_hits' will have
 648     # the total number of bibs.
 649     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 650     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 651
 652     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 653         my $recid = $hit->{recid}->[0];
 654
 655         my $work_title = $hit->{'md-work-title'}->[0];
 656         my $work_author;
 657         if (exists $hit->{'md-work-author'}) {
 658             $work_author = $hit->{'md-work-author'}->[0];
 659         }
 660         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 661
 662         my $result_group = {};
 663         $result_group->{'group_label'} = $group_label;
 664         $result_group->{'group_merge_key'} = $recid;
 665
 666         my $count = 1;
 667         if (exists $hit->{count}) {
 668             $count = $hit->{count}->[0];
 669         }
 670         $result_group->{'group_count'} = $count;
 671
 672         for (my $i = 0; $i < $count; $i++) {
 673             # FIXME -- may need to worry about diacritics here
 674             my $rec = $paz->record($recid, $i);
 675             push @{ $result_group->{'RECORDS'} }, $rec;
 676         }
 677
 678         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 679     }
 680
 681     # pass through facets
 682     my $termlist_xml = $paz->termlist('author,subject');
 683     my $terms = XMLin($termlist_xml, forcearray => 1);
 684     my @facets_loop = ();
 685     #die Dumper($results);
 686 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 687 #        my @facets = ();
 688 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 689 #            push @facets, {
 690 #                facet_label_value => $facet->{'name'}->[0],
 691 #            };
 692 #        }
 693 #        push @facets_loop, ( {
 694 #            type_label => $list,
 695 #            facets => \@facets,
 696 #        } );
 697 #    }
 698
 699     return ( undef, $results_hashref, \@facets_loop );
 700 }
 701
 702 # STOPWORDS
 703 sub _remove_stopwords {
 704     my ( $operand, $index ) = @_;
 705     my @stopwords_removed;
 706
 707     # phrase and exact-qualified indexes shouldn't have stopwords removed
 708     if ( $index !~ m/phr|ext/ ) {
 709
 710 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 711 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 712 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 713 #       is a stopword, we'd get "çon" and wouldn't find anything...
 714         foreach ( keys %{ C4::Context->stopwords } ) {
 715             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 716             if ( $operand =~
 717                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 718             {
 719                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 720                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 721                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 722                                 $operand =~ s/$1//gi;
 723                 push @stopwords_removed, $_;
 724             }
 725         }
 726     }
 727     return ( $operand, \@stopwords_removed );
 728 }
 729
 730 # TRUNCATION
 731 sub _detect_truncation {
 732     my ( $operand, $index ) = @_;
 733     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 734         @regexpr );
 735     $operand =~ s/^ //g;
 736     my @wordlist = split( /\s/, $operand );
 737     foreach my $word (@wordlist) {
 738         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 739             push @rightlefttruncated, $word;
 740         }
 741         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 742             push @lefttruncated, $word;
 743         }
 744         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 745             push @righttruncated, $word;
 746         }
 747         elsif ( index( $word, "*" ) < 0 ) {
 748             push @nontruncated, $word;
 749         }
 750         else {
 751             push @regexpr, $word;
 752         }
 753     }
 754     return (
 755         \@nontruncated,       \@righttruncated, \@lefttruncated,
 756         \@rightlefttruncated, \@regexpr
 757     );
 758 }
 759
 760 # STEMMING
 761 sub _build_stemmed_operand {
 762     my ($operand, $lang) = @_;
 763     my $stemmed_operand;
 764
 765     require Lingua::Stem::Snowball;
 766
 767     # If operand contains a digit, it is almost certainly an identifier, and should
 768     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 769     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 770     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 771     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 772     return $operand if $operand =~ /\d/;
 773
 774     my $stemmer = Lingua::Stem::Snowball->new( lang => $lang,
 775                                                encoding => "UTF-8" );
 776
 777     my @words = split( / /, $operand );
 778
 779     for my $word (@words) {
 780         my $stem = $stemmer->stem($word);
 781         $stemmed_operand .= "$stem";
 782         $stemmed_operand .= "?"
 783           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 784         $stemmed_operand .= " ";
 785     }
 786     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 787     return $stemmed_operand;
 788 }
 789
 790 # FIELD WEIGHTING
 791 sub _build_weighted_query {
 792
 793 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 794 # pretty well but could work much better if we had a smarter query parser
 795     my ( $operand, $stemmed_operand, $index ) = @_;
 796     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 797     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 798     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 799
 800     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 801
 802     # Keyword, or, no index specified
 803     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 804         $weighted_query .=
 805           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 806         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 807         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 808           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 809           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 810         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 811           if $fuzzy_enabled;    # add fuzzy, word list
 812         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 813           if ( $stemming and $stemmed_operand )
 814           ;                     # add stemming, right truncation
 815         $weighted_query .= " or wrdl,r9=\"$operand\"";
 816
 817         # embedded sorting: 0 a-z; 1 z-a
 818         # $weighted_query .= ") or (sort1,aut=1";
 819     }
 820
 821     # Barcode searches should skip this process
 822     elsif ( $index eq 'bc' ) {
 823         $weighted_query .= "bc=\"$operand\"";
 824     }
 825
 826     # Authority-number searches should skip this process
 827     elsif ( $index eq 'an' ) {
 828         $weighted_query .= "an=\"$operand\"";
 829     }
 830
 831     # If the index already has more than one qualifier, wrap the operand
 832     # in quotes and pass it back (assumption is that the user knows what they
 833     # are doing and won't appreciate us mucking up their query
 834     elsif ( $index =~ ',' ) {
 835         $weighted_query .= " $index=\"$operand\"";
 836     }
 837
 838     #TODO: build better cases based on specific search indexes
 839     else {
 840         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 841           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 842         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 843         $weighted_query .=
 844           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 845     }
 846
 847     $weighted_query .= "))";                       # close rank specification
 848     return $weighted_query;
 849 }
 850
 851 =head2 buildQuery
 852
 853 ( $error, $query,
 854 $simple_query, $query_cgi,
 855 $query_desc, $limit,
 856 $limit_cgi, $limit_desc,
 857 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 858
 859 Build queries and limits in CCL, CGI, Human,
 860 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 861
 862 See verbose embedded documentation.
 863
 864
 865 =cut
 866
 867 sub buildQuery {
 868     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang ) = @_;
 869
 870     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 871
 872     # dereference
 873     my @operators = @$operators if $operators;
 874     my @indexes   = @$indexes   if $indexes;
 875     my @operands  = @$operands  if $operands;
 876     my @limits    = @$limits    if $limits;
 877     my @sort_by   = @$sort_by   if $sort_by;
 878
 879     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 880     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 881     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 882     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 883     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 884
 885     # no stemming/weight/fuzzy in NoZebra
 886     if ( C4::Context->preference("NoZebra") ) {
 887         $stemming      = 0;
 888         $weight_fields = 0;
 889         $fuzzy_enabled = 0;
 890     }
 891
 892     my $query        = $operands[0];
 893     my $simple_query = $operands[0];
 894
 895     # initialize the variables we're passing back
 896     my $query_cgi;
 897     my $query_desc;
 898     my $query_type;
 899
 900     my $limit;
 901     my $limit_cgi;
 902     my $limit_desc;
 903
 904     my $stopwords_removed;    # flag to determine if stopwords have been removed
 905
 906 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 907 # DIAGNOSTIC ONLY!!
 908     if ( $query =~ /^ccl=/ ) {
 909         return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
 910     }
 911     if ( $query =~ /^cql=/ ) {
 912         return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
 913     }
 914     if ( $query =~ /^pqf=/ ) {
 915         return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
 916     }
 917
 918     # pass nested queries directly
 919     # FIXME: need better handling of some of these variables in this case
 920     if ( $query =~ /(\(|\))/ ) {
 921         return (
 922             undef,              $query, $simple_query, $query_cgi,
 923             $query,             $limit, $limit_cgi,    $limit_desc,
 924             $stopwords_removed, 'ccl'
 925         );
 926     }
 927
 928 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 929 # query operands and indexes and add stemming, truncation, field weighting, etc.
 930 # Once we do so, we'll end up with a value in $query, just like if we had an
 931 # incoming $query from the user
 932     else {
 933         $query = ""
 934           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 935         my $previous_operand
 936           ;    # a flag used to keep track if there was a previous query
 937                # if there was, we can apply the current operator
 938                # for every operand
 939         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 940
 941             # COMBINE OPERANDS, INDEXES AND OPERATORS
 942             if ( $operands[$i] ) {
 943
 944               # A flag to determine whether or not to add the index to the query
 945                 my $indexes_set;
 946
 947 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 948                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 949                     $weight_fields    = 0;
 950                     $stemming         = 0;
 951                     $remove_stopwords = 0;
 952                 }
 953                 my $operand = $operands[$i];
 954                 my $index   = $indexes[$i];
 955
 956                 # Add index-specific attributes
 957                 # Date of Publication
 958                 if ( $index eq 'yr' ) {
 959                     $index .= ",st-numeric";
 960 #                     $indexes_set++;
 961                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 962                 }
 963
 964                 # Date of Acquisition
 965                 elsif ( $index eq 'acqdate' ) {
 966                     $index .= ",st-date-normalized";
 967 #                     $indexes_set++;
 968                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 969                 }
 970                 # ISBN,ISSN,Standard Number, don't need special treatment
 971                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 972 #                     $indexes_set++;
 973                     (
 974                         $stemming,      $auto_truncation,
 975                         $weight_fields, $fuzzy_enabled,
 976                         $remove_stopwords
 977                     ) = ( 0, 0, 0, 0, 0 );
 978
 979                 }
 980                 # Set default structure attribute (word list)
 981                 my $struct_attr;
 982                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 983                     $struct_attr = ",wrdl";
 984                 }
 985
 986                 # Some helpful index variants
 987                 my $index_plus       = $index . $struct_attr . ":" if $index;
 988                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 989                 if ($auto_truncation){
 990 #                                       FIXME Auto Truncation is only valid for LTR languages
 991 #                                       use C4::Output;
 992 #                                       use C4::Languages qw(regex_lang_subtags get_bidi);
 993 #                               $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
 994 #                                   my $current_lang = regex_lang_subtags($lang);
 995 #                                   my $bidi;
 996 #                                   $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
 997                                         $index_plus_comma .= "rtrn:";
 998                                 }
 999
1000                 # Remove Stopwords
1001                 if ($remove_stopwords) {
1002                     ( $operand, $stopwords_removed ) =
1003                       _remove_stopwords( $operand, $index );
1004                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
1005                     warn "REMOVED STOPWORDS: @$stopwords_removed"
1006                       if ( $stopwords_removed && $DEBUG );
1007                 }
1008
1009                 # Detect Truncation
1010                 my $truncated_operand;
1011                 my( $nontruncated, $righttruncated, $lefttruncated,
1012                     $rightlefttruncated, $regexpr
1013                 ) = _detect_truncation( $operand, $index );
1014                 warn
1015 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
1016                   if $DEBUG;
1017
1018                 # Apply Truncation
1019                 if (
1020                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
1021                     scalar(@$rightlefttruncated) > 0 )
1022                 {
1023
1024                # Don't field weight or add the index to the query, we do it here
1025                     $indexes_set = 1;
1026                     undef $weight_fields;
1027                     my $previous_truncation_operand;
1028                     if ( scalar(@$nontruncated) > 0 ) {
1029                         $truncated_operand .= "$index_plus @$nontruncated ";
1030                         $previous_truncation_operand = 1;
1031                     }
1032                     if ( scalar(@$righttruncated) > 0 ) {
1033                         $truncated_operand .= "and "
1034                           if $previous_truncation_operand;
1035                         $truncated_operand .=
1036                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1037                         $previous_truncation_operand = 1;
1038                     }
1039                     if ( scalar(@$lefttruncated) > 0 ) {
1040                         $truncated_operand .= "and "
1041                           if $previous_truncation_operand;
1042                         $truncated_operand .=
1043                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1044                         $previous_truncation_operand = 1;
1045                     }
1046                     if ( scalar(@$rightlefttruncated) > 0 ) {
1047                         $truncated_operand .= "and "
1048                           if $previous_truncation_operand;
1049                         $truncated_operand .=
1050                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1051                         $previous_truncation_operand = 1;
1052                     }
1053                 }
1054                 $operand = $truncated_operand if $truncated_operand;
1055                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1056
1057                 # Handle Stemming
1058                 my $stemmed_operand;
1059                 $stemmed_operand = _build_stemmed_operand($operand, $lang)
1060                   if $stemming;
1061                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1062
1063                 # Handle Field Weighting
1064                 my $weighted_operand;
1065                 $weighted_operand =
1066                   _build_weighted_query( $operand, $stemmed_operand, $index )
1067                   if $weight_fields;
1068                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1069                 $operand = $weighted_operand if $weight_fields;
1070                 $indexes_set = 1 if $weight_fields;
1071
1072                 # If there's a previous operand, we need to add an operator
1073                 if ($previous_operand) {
1074
1075                     # User-specified operator
1076                     if ( $operators[ $i - 1 ] ) {
1077                         $query     .= " $operators[$i-1] ";
1078                         $query     .= " $index_plus " unless $indexes_set;
1079                         $query     .= " $operand";
1080                         $query_cgi .= "&op=$operators[$i-1]";
1081                         $query_cgi .= "&idx=$index" if $index;
1082                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1083                         $query_desc .=
1084                           " $operators[$i-1] $index_plus $operands[$i]";
1085                     }
1086
1087                     # Default operator is and
1088                     else {
1089                         $query      .= " and ";
1090                         $query      .= "$index_plus " unless $indexes_set;
1091                         $query      .= "$operand";
1092                         $query_cgi  .= "&op=and&idx=$index" if $index;
1093                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1094                         $query_desc .= " and $index_plus $operands[$i]";
1095                     }
1096                 }
1097
1098                 # There isn't a pervious operand, don't need an operator
1099                 else {
1100
1101                     # Field-weighted queries already have indexes set
1102                     $query .= " $index_plus " unless $indexes_set;
1103                     $query .= $operand;
1104                     $query_desc .= " $index_plus $operands[$i]";
1105                     $query_cgi  .= "&idx=$index" if $index;
1106                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1107                     $previous_operand = 1;
1108                 }
1109             }    #/if $operands
1110         }    # /for
1111     }
1112     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1113
1114     # add limits
1115     my $group_OR_limits;
1116     my $availability_limit;
1117     foreach my $this_limit (@limits) {
1118         if ( $this_limit =~ /available/ ) {
1119
1120 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1121 # In English:
1122 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1123             $availability_limit .=
1124 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1125             $limit_cgi  .= "&limit=available";
1126             $limit_desc .= "";
1127         }
1128
1129         # group_OR_limits, prefixed by mc-
1130         # OR every member of the group
1131         elsif ( $this_limit =~ /mc/ ) {
1132             $group_OR_limits .= " or " if $group_OR_limits;
1133             $limit_desc      .= " or " if $group_OR_limits;
1134             $group_OR_limits .= "$this_limit";
1135             $limit_cgi       .= "&limit=$this_limit";
1136             $limit_desc      .= " $this_limit";
1137         }
1138
1139         # Regular old limits
1140         else {
1141             if ($this_limit){
1142                 $limit .= " and " if $limit || $query;
1143                 $limit      .= "$this_limit";
1144                 $limit_cgi  .= "&limit=$this_limit";
1145                 $limit_desc .= " $this_limit";
1146                 if ($this_limit =~ /^branch:(.+)/) {
1147                     my $branchcode = $1;
1148                     my $branchname = GetBranchName($branchcode);
1149                     if (defined $branchname) {
1150                         $limit_desc .= " branch:$branchname";
1151                     } else {
1152                         $limit_desc .= " $this_limit";
1153                     }
1154                 } else {
1155                    $limit_desc .= " $this_limit";
1156                 }
1157
1158             }
1159         }
1160     }
1161     if ($group_OR_limits) {
1162         $limit .= " and " if ( $query || $limit );
1163         $limit .= "($group_OR_limits)";
1164     }
1165     if ($availability_limit) {
1166         $limit .= " and " if ( $query || $limit );
1167         $limit .= "($availability_limit)";
1168     }
1169
1170     # Normalize the query and limit strings
1171     $query =~ s/:/=/g;
1172     $limit =~ s/:/=/g;
1173     for ( $query, $query_desc, $limit, $limit_desc ) {
1174         $_ =~ s/  / /g;    # remove extra spaces
1175         $_ =~ s/^ //g;     # remove any beginning spaces
1176         $_ =~ s/ $//g;     # remove any ending spaces
1177         $_ =~ s/==/=/g;    # remove double == from query
1178     }
1179     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1180
1181     for ($query_cgi,$simple_query) {
1182         $_ =~ s/"//g;
1183     }
1184     # append the limit to the query
1185     $query .= " " . $limit;
1186
1187     # Warnings if DEBUG
1188     if ($DEBUG) {
1189         warn "QUERY:" . $query;
1190         warn "QUERY CGI:" . $query_cgi;
1191         warn "QUERY DESC:" . $query_desc;
1192         warn "LIMIT:" . $limit;
1193         warn "LIMIT CGI:" . $limit_cgi;
1194         warn "LIMIT DESC:" . $limit_desc;
1195         warn "---------\nLeave buildQuery\n---------";
1196     }
1197     return (
1198         undef,              $query, $simple_query, $query_cgi,
1199         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1200         $stopwords_removed, $query_type
1201     );
1202 }
1203
1204 =head2 searchResults
1205
1206 Format results in a form suitable for passing to the template
1207
1208 =cut
1209
1210 # IMO this subroutine is pretty messy still -- it's responsible for
1211 # building the HTML output for the template
1212 sub searchResults {
1213     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1214     my $dbh = C4::Context->dbh;
1215     my $even = 1;
1216     my @newresults;
1217
1218     # add search-term highlighting via <span>s on the search terms
1219     my $span_terms_hashref;
1220     for my $span_term ( split( / /, $searchdesc ) ) {
1221         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1222         $span_terms_hashref->{$span_term}++;
1223     }
1224
1225     #Build branchnames hash
1226     #find branchname
1227     #get branch information.....
1228     my %branches;
1229     my $bsth =
1230       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1231       ;    # FIXME : use C4::Koha::GetBranches
1232     $bsth->execute();
1233     while ( my $bdata = $bsth->fetchrow_hashref ) {
1234         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1235     }
1236 # FIXME - We build an authorised values hash here, using the default framework
1237 # though it is possible to have different authvals for different fws.
1238
1239     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1240
1241     # get notforloan authorised value list (see $shelflocations  FIXME)
1242     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1243
1244     #Build itemtype hash
1245     #find itemtype & itemtype image
1246     my %itemtypes;
1247     $bsth =
1248       $dbh->prepare(
1249         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1250       );
1251     $bsth->execute();
1252     while ( my $bdata = $bsth->fetchrow_hashref ) {
1253                 foreach (qw(description imageurl summary notforloan)) {
1254                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1255                 }
1256     }
1257
1258     #search item field code
1259     my $sth =
1260       $dbh->prepare(
1261 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1262       );
1263     $sth->execute;
1264     my ($itemtag) = $sth->fetchrow;
1265
1266     ## find column names of items related to MARC
1267     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1268     $sth2->execute;
1269     my %subfieldstosearch;
1270     while ( ( my $column ) = $sth2->fetchrow ) {
1271         my ( $tagfield, $tagsubfield ) =
1272           &GetMarcFromKohaField( "items." . $column, "" );
1273         $subfieldstosearch{$column} = $tagsubfield;
1274     }
1275
1276     # handle which records to actually retrieve
1277     my $times;
1278     if ( $hits && $offset + $results_per_page <= $hits ) {
1279         $times = $offset + $results_per_page;
1280     }
1281     else {
1282         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1283     }
1284
1285         my $marcflavour = C4::Context->preference("marcflavour");
1286     # loop through all of the records we've retrieved
1287     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1288         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1289         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1290         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1291         $oldbiblio->{result_number} = $i + 1;
1292
1293         # add imageurl to itemtype if there is one
1294         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1295
1296         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1297                 $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1298                 $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1299                 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1300                 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1301                 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1302
1303                 # edition information, if any
1304         $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1305                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1306  # Build summary if there is one (the summary is defined in the itemtypes table)
1307  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1308         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1309             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1310             my @fields  = $marcrecord->fields();
1311             foreach my $field (@fields) {
1312                 my $tag      = $field->tag();
1313                 my $tagvalue = $field->as_string();
1314                 if (! utf8::is_utf8($tagvalue)) {
1315                     utf8::decode($tagvalue);
1316                 }
1317
1318                 $summary =~
1319                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1320                 unless ( $tag < 10 ) {
1321                     my @subf = $field->subfields;
1322                     for my $i ( 0 .. $#subf ) {
1323                         my $subfieldcode  = $subf[$i][0];
1324                         my $subfieldvalue = $subf[$i][1];
1325                         if (! utf8::is_utf8($subfieldvalue)) {
1326                             utf8::decode($subfieldvalue);
1327                         }
1328                         my $tagsubf       = $tag . $subfieldcode;
1329                         $summary =~
1330 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1331                     }
1332                 }
1333             }
1334             # FIXME: yuk
1335             $summary =~ s/\[(.*?)]//g;
1336             $summary =~ s/\n/<br\/>/g;
1337             $oldbiblio->{summary} = $summary;
1338         }
1339
1340         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1341         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1342         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1343         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1344         # Add search-term highlighting to the whole record where they match using <span>s
1345         if (C4::Context->preference("OpacHighlightedWords")){
1346             my $searchhighlightblob;
1347             for my $highlight_field ( $marcrecord->fields ) {
1348
1349     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1350                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1351                 for my $subfield ($highlight_field->subfields()) {
1352                     my $match;
1353                     next if $subfield->[0] eq '9';
1354                     my $field = $subfield->[1];
1355                     for my $term ( keys %$span_terms_hashref ) {
1356                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1357                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1358                         $match++;
1359                         }
1360                     }
1361                     $searchhighlightblob .= $field . " ... " if $match;
1362                 }
1363
1364             }
1365             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1366             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1367         }
1368
1369         # Add search-term highlighting to the title, subtitle, etc. fields
1370         for my $term ( keys %$span_terms_hashref ) {
1371             my $old_term = $term;
1372             if ( length($term) > 3 ) {
1373                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1374                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1375                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1376                                 }
1377             }
1378         }
1379
1380         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1381
1382         # Pull out the items fields
1383         my @fields = $marcrecord->field($itemtag);
1384
1385         # Setting item statuses for display
1386         my @available_items_loop;
1387         my @onloan_items_loop;
1388         my @notforloan_items_loop;
1389         my @other_items_loop;
1390
1391         my $available_items;
1392         my $onloan_items;
1393         my $notforloan_items;
1394         my $other_items;
1395
1396         my $ordered_count         = 0;
1397         my $available_count       = 0;
1398         my $onloan_count          = 0;
1399         my $notforloan_count      = 0;
1400         my $longoverdue_count     = 0;
1401         my $other_count           = 0;
1402         my $wthdrawn_count        = 0;
1403         my $itemlost_count        = 0;
1404         my $itembinding_count     = 0;
1405         my $itemdamaged_count     = 0;
1406         my $item_in_transit_count = 0;
1407         my $can_place_holds       = 0;
1408         my $items_count           = scalar(@fields);
1409         my $maxitems =
1410           ( C4::Context->preference('maxItemsinSearchResults') )
1411           ? C4::Context->preference('maxItemsinSearchResults') - 1
1412           : 1;
1413
1414         # loop through every item
1415         foreach my $field (@fields) {
1416             my $item;
1417
1418             # populate the items hash
1419             foreach my $code ( keys %subfieldstosearch ) {
1420                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1421             }
1422                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1423                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1424             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1425             if ($item->{$hbranch}) {
1426                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1427             }
1428             elsif ($item->{$otherbranch}) {     # Last resort
1429                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1430             }
1431
1432                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1433 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1434             if ( $item->{onloan} ) {
1435                 $onloan_count++;
1436                                 my $key = $prefix . $item->{due_date};
1437                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1438                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1439                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1440                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1441                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1442                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1443                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1444                 # if something's checked out and lost, mark it as 'long overdue'
1445                 if ( $item->{itemlost} ) {
1446                     $onloan_items->{$prefix}->{longoverdue}++;
1447                     $longoverdue_count++;
1448                 } else {        # can place holds as long as item isn't lost
1449                     $can_place_holds = 1;
1450                 }
1451             }
1452
1453          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1454             else {
1455
1456                 # item is on order
1457                 if ( $item->{notforloan} == -1 ) {
1458                     $ordered_count++;
1459                 }
1460
1461                 # is item in transit?
1462                 my $transfertwhen = '';
1463                 my ($transfertfrom, $transfertto);
1464
1465                 unless ($item->{wthdrawn}
1466                         || $item->{itemlost}
1467                         || $item->{damaged}
1468                         || $item->{notforloan}
1469                         || $items_count > 20) {
1470
1471                     # A couple heuristics to limit how many times
1472                     # we query the database for item transfer information, sacrificing
1473                     # accuracy in some cases for speed;
1474                     #
1475                     # 1. don't query if item has one of the other statuses
1476                     # 2. don't check transit status if the bib has
1477                     #    more than 20 items
1478                     #
1479                     # FIXME: to avoid having the query the database like this, and to make
1480                     #        the in transit status count as unavailable for search limiting,
1481                     #        should map transit status to record indexed in Zebra.
1482                     #
1483                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1484                 }
1485
1486                 # item is withdrawn, lost or damaged
1487                 if (   $item->{wthdrawn}
1488                     || $item->{itemlost}
1489                     || $item->{damaged}
1490                     || $item->{notforloan}
1491                     || ($transfertwhen ne ''))
1492                 {
1493                     $wthdrawn_count++        if $item->{wthdrawn};
1494                     $itemlost_count++        if $item->{itemlost};
1495                     $itemdamaged_count++     if $item->{damaged};
1496                     $item_in_transit_count++ if $transfertwhen ne '';
1497                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1498                     $other_count++;
1499
1500                                         my $key = $prefix . $item->{status};
1501
1502                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1503                         $other_items->{$key}->{$_} = $item->{$_};
1504                                         }
1505                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1506                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1507                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1508                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1509                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1510                 }
1511                 # item is available
1512                 else {
1513                     $can_place_holds = 1;
1514                     $available_count++;
1515                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1516                                         foreach (qw(branchname itemcallnumber barcode)) {
1517                         $available_items->{$prefix}->{$_} = $item->{$_};
1518                                         }
1519                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1520                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1521                 }
1522             }
1523         }    # notforloan, item level and biblioitem level
1524         my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1525         $maxitems =
1526           ( C4::Context->preference('maxItemsinSearchResults') )
1527           ? C4::Context->preference('maxItemsinSearchResults') - 1
1528           : 1;
1529         for my $key ( sort keys %$onloan_items ) {
1530             (++$onloanitemscount > $maxitems) and last;
1531             push @onloan_items_loop, $onloan_items->{$key};
1532         }
1533         for my $key ( sort keys %$other_items ) {
1534             (++$otheritemscount > $maxitems) and last;
1535             push @other_items_loop, $other_items->{$key};
1536         }
1537         for my $key ( sort keys %$notforloan_items ) {
1538             (++$notforloanitemscount > $maxitems) and last;
1539             push @notforloan_items_loop, $notforloan_items->{$key};
1540         }
1541         for my $key ( sort keys %$available_items ) {
1542             (++$availableitemscount > $maxitems) and last;
1543             push @available_items_loop, $available_items->{$key}
1544         }
1545
1546         # XSLT processing of some stuff
1547         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1548             $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1549                 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1550         }
1551
1552         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1553         $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1554         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1555         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1556         $oldbiblio->{items_count}          = $items_count;
1557         $oldbiblio->{available_items_loop} = \@available_items_loop;
1558         $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1559         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1560         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1561         $oldbiblio->{availablecount}       = $available_count;
1562         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1563         $oldbiblio->{onloancount}          = $onloan_count;
1564         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1565         $oldbiblio->{notforloancount}      = $notforloan_count;
1566         $oldbiblio->{othercount}           = $other_count;
1567         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1568         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1569         $oldbiblio->{itemlostcount}        = $itemlost_count;
1570         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1571         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1572         $oldbiblio->{orderedcount}         = $ordered_count;
1573         push( @newresults, $oldbiblio );
1574     }
1575     return @newresults;
1576 }
1577
1578 #----------------------------------------------------------------------
1579 #
1580 # Non-Zebra GetRecords#
1581 #----------------------------------------------------------------------
1582
1583 =head2 NZgetRecords
1584
1585   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1586
1587 =cut
1588
1589 sub NZgetRecords {
1590     my (
1591         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1592         $results_per_page, $offset,       $expanded_facet, $branches,
1593         $query_type,       $scan
1594     ) = @_;
1595     warn "query =$query" if $DEBUG;
1596     my $result = NZanalyse($query);
1597     warn "results =$result" if $DEBUG;
1598     return ( undef,
1599         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1600         undef );
1601 }
1602
1603 =head2 NZanalyse
1604
1605   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1606   the list is built from an inverted index in the nozebra SQL table
1607   note that title is here only for convenience : the sorting will be very fast when requested on title
1608   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1609
1610 =cut
1611
1612 sub NZanalyse {
1613     my ( $string, $server ) = @_;
1614 #     warn "---------"       if $DEBUG;
1615     warn " NZanalyse" if $DEBUG;
1616 #     warn "---------"       if $DEBUG;
1617
1618  # $server contains biblioserver or authorities, depending on what we search on.
1619  #warn "querying : $string on $server";
1620     $server = 'biblioserver' unless $server;
1621
1622 # if we have a ", replace the content to discard temporarily any and/or/not inside
1623     my $commacontent;
1624     if ( $string =~ /"/ ) {
1625         $string =~ s/"(.*?)"/__X__/;
1626         $commacontent = $1;
1627         warn "commacontent : $commacontent" if $DEBUG;
1628     }
1629
1630 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1631 # then, call again NZanalyse with $left and $right
1632 # (recursive until we find a leaf (=> something without and/or/not)
1633 # delete repeated operator... Would then go in infinite loop
1634     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1635     }
1636
1637     #process parenthesis before.
1638     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1639         my $left     = $1;
1640         my $right    = $4;
1641         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1642         warn
1643 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1644           if $DEBUG;
1645         my $leftresult = NZanalyse( $left, $server );
1646         if ($operator) {
1647             my $rightresult = NZanalyse( $right, $server );
1648
1649             # OK, we have the results for right and left part of the query
1650             # depending of operand, intersect, union or exclude both lists
1651             # to get a result list
1652             if ( $operator eq ' and ' ) {
1653                 return NZoperatorAND($leftresult,$rightresult);
1654             }
1655             elsif ( $operator eq ' or ' ) {
1656
1657                 # just merge the 2 strings
1658                 return $leftresult . $rightresult;
1659             }
1660             elsif ( $operator eq ' not ' ) {
1661                 return NZoperatorNOT($leftresult,$rightresult);
1662             }
1663         }
1664         else {
1665 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1666             return $leftresult;
1667         }
1668     }
1669     warn "string :" . $string if $DEBUG;
1670     my $left = "";
1671     my $right = "";
1672     my $operator = "";
1673     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1674         $left     = $1;
1675         $right    = $3;
1676         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1677     }
1678     warn "no parenthesis. left : $left operator: $operator right: $right"
1679       if $DEBUG;
1680
1681     # it's not a leaf, we have a and/or/not
1682     if ($operator) {
1683
1684         # reintroduce comma content if needed
1685         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1686         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1687         warn "node : $left / $operator / $right\n" if $DEBUG;
1688         my $leftresult  = NZanalyse( $left,  $server );
1689         my $rightresult = NZanalyse( $right, $server );
1690         warn " leftresult : $leftresult" if $DEBUG;
1691         warn " rightresult : $rightresult" if $DEBUG;
1692         # OK, we have the results for right and left part of the query
1693         # depending of operand, intersect, union or exclude both lists
1694         # to get a result list
1695         if ( $operator eq ' and ' ) {
1696             warn "NZAND";
1697             return NZoperatorAND($leftresult,$rightresult);
1698         }
1699         elsif ( $operator eq ' or ' ) {
1700
1701             # just merge the 2 strings
1702             return $leftresult . $rightresult;
1703         }
1704         elsif ( $operator eq ' not ' ) {
1705             return NZoperatorNOT($leftresult,$rightresult);
1706         }
1707         else {
1708
1709 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1710             die "error : operand unknown : $operator for $string";
1711         }
1712
1713         # it's a leaf, do the real SQL query and return the result
1714     }
1715     else {
1716         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1717         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1718         #remove trailing blank at the beginning
1719         $string =~ s/^ //g;
1720         warn "leaf:$string" if $DEBUG;
1721
1722         # parse the string in in operator/operand/value again
1723         my $left = "";
1724         my $operator = "";
1725         my $right = "";
1726         if ($string =~ /(.*)(>=|<=)(.*)/) {
1727             $left     = $1;
1728             $operator = $2;
1729             $right    = $3;
1730         } else {
1731             $left = $string;
1732         }
1733 #         warn "handling leaf... left:$left operator:$operator right:$right"
1734 #           if $DEBUG;
1735         unless ($operator) {
1736             if ($string =~ /(.*)(>|<|=)(.*)/) {
1737                 $left     = $1;
1738                 $operator = $2;
1739                 $right    = $3;
1740                 warn
1741     "handling unless (operator)... left:$left operator:$operator right:$right"
1742                 if $DEBUG;
1743             } else {
1744                 $left = $string;
1745             }
1746         }
1747         my $results;
1748
1749 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1750         $left =~ s/ .*$//;
1751
1752         # automatic replace for short operators
1753         $left = 'title'            if $left =~ '^ti$';
1754         $left = 'author'           if $left =~ '^au$';
1755         $left = 'publisher'        if $left =~ '^pb$';
1756         $left = 'subject'          if $left =~ '^su$';
1757         $left = 'koha-Auth-Number' if $left =~ '^an$';
1758         $left = 'keyword'          if $left =~ '^kw$';
1759         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1760         if ( $operator && $left ne 'keyword' ) {
1761
1762             #do a specific search
1763             my $dbh = C4::Context->dbh;
1764             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1765             my $sth =
1766               $dbh->prepare(
1767 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1768               );
1769             warn "$left / $operator / $right\n" if $DEBUG;
1770
1771             # split each word, query the DB and build the biblionumbers result
1772             #sanitizing leftpart
1773             $left =~ s/^\s+|\s+$//;
1774             foreach ( split / /, $right ) {
1775                 my $biblionumbers;
1776                 $_ =~ s/^\s+|\s+$//;
1777                 next unless $_;
1778                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1779                 $sth->execute( $server, $left, $_ )
1780                   or warn "execute failed: $!";
1781                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1782
1783 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1784 # otherwise, fill the result
1785                     $biblionumbers .= $line
1786                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1787                     warn "result : $value "
1788                       . ( $right  =~ /\d/ ) . "=="
1789                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1790                 }
1791
1792 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1793                 if ($results) {
1794                     warn "NZAND" if $DEBUG;
1795                     $results = NZoperatorAND($biblionumbers,$results);
1796                 }
1797                 else {
1798                     $results = $biblionumbers;
1799                 }
1800             }
1801         }
1802         else {
1803
1804       #do a complete search (all indexes), if index='kw' do complete search too.
1805             my $dbh = C4::Context->dbh;
1806             my $sth =
1807               $dbh->prepare(
1808 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1809               );
1810
1811             # split each word, query the DB and build the biblionumbers result
1812             foreach ( split / /, $string ) {
1813                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1814                 warn "search on all indexes on $_" if $DEBUG;
1815                 my $biblionumbers;
1816                 next unless $_;
1817                 $sth->execute( $server, $_ );
1818                 while ( my $line = $sth->fetchrow ) {
1819                     $biblionumbers .= $line;
1820                 }
1821
1822 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1823                 if ($results) {
1824                     $results = NZoperatorAND($biblionumbers,$results);
1825                 }
1826                 else {
1827                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1828                     $results = $biblionumbers;
1829                 }
1830             }
1831         }
1832         warn "return : $results for LEAF : $string" if $DEBUG;
1833         return $results;
1834     }
1835     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1836 }
1837
1838 sub NZoperatorAND{
1839     my ($rightresult, $leftresult)=@_;
1840
1841     my @leftresult = split /;/, $leftresult;
1842     warn " @leftresult / $rightresult \n" if $DEBUG;
1843
1844     #             my @rightresult = split /;/,$leftresult;
1845     my $finalresult;
1846
1847 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1848 # the result is stored twice, to have the same weight for AND than OR.
1849 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1850 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1851     foreach (@leftresult) {
1852         my $value = $_;
1853         my $countvalue;
1854         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1855         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1856             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1857             $finalresult .=
1858                 "$value-$countvalue;$value-$countvalue;";
1859         }
1860     }
1861     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1862     return $finalresult;
1863 }
1864
1865 sub NZoperatorOR{
1866     my ($rightresult, $leftresult)=@_;
1867     return $rightresult.$leftresult;
1868 }
1869
1870 sub NZoperatorNOT{
1871     my ($leftresult, $rightresult)=@_;
1872
1873     my @leftresult = split /;/, $leftresult;
1874
1875     #             my @rightresult = split /;/,$leftresult;
1876     my $finalresult;
1877     foreach (@leftresult) {
1878         my $value=$_;
1879         $value=$1 if $value=~m/(.*)-\d+$/;
1880         unless ($rightresult =~ "$value-") {
1881             $finalresult .= "$_;";
1882         }
1883     }
1884     return $finalresult;
1885 }
1886
1887 =head2 NZorder
1888
1889   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1890
1891   TODO :: Description
1892
1893 =cut
1894
1895 sub NZorder {
1896     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1897     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1898
1899     # order title asc by default
1900     #     $ordering = '1=36 <i' unless $ordering;
1901     $results_per_page = 20 unless $results_per_page;
1902     $offset           = 0  unless $offset;
1903     my $dbh = C4::Context->dbh;
1904
1905     #
1906     # order by POPULARITY
1907     #
1908     if ( $ordering =~ /popularity/ ) {
1909         my %result;
1910         my %popularity;
1911
1912         # popularity is not in MARC record, it's builded from a specific query
1913         my $sth =
1914           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1915         foreach ( split /;/, $biblionumbers ) {
1916             my ( $biblionumber, $title ) = split /,/, $_;
1917             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1918             $sth->execute($biblionumber);
1919             my $popularity = $sth->fetchrow || 0;
1920
1921 # hint : the key is popularity.title because we can have
1922 # many results with the same popularity. In this cas, sub-ordering is done by title
1923 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1924 # (un-frequent, I agree, but we won't forget anything that way ;-)
1925             $popularity{ sprintf( "%10d", $popularity ) . $title
1926                   . $biblionumber } = $biblionumber;
1927         }
1928
1929     # sort the hash and return the same structure as GetRecords (Zebra querying)
1930         my $result_hash;
1931         my $numbers = 0;
1932         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1933             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1934                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1935                   $result{ $popularity{$key} }->as_usmarc();
1936             }
1937         }
1938         else {                                    # sort popularity ASC
1939             foreach my $key ( sort ( keys %popularity ) ) {
1940                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1941                   $result{ $popularity{$key} }->as_usmarc();
1942             }
1943         }
1944         my $finalresult = ();
1945         $result_hash->{'hits'}         = $numbers;
1946         $finalresult->{'biblioserver'} = $result_hash;
1947         return $finalresult;
1948
1949         #
1950         # ORDER BY author
1951         #
1952     }
1953     elsif ( $ordering =~ /author/ ) {
1954         my %result;
1955         foreach ( split /;/, $biblionumbers ) {
1956             my ( $biblionumber, $title ) = split /,/, $_;
1957             my $record = GetMarcBiblio($biblionumber);
1958             my $author;
1959             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1960                 $author = $record->subfield( '200', 'f' );
1961                 $author = $record->subfield( '700', 'a' ) unless $author;
1962             }
1963             else {
1964                 $author = $record->subfield( '100', 'a' );
1965             }
1966
1967 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1968 # and we don't want to get only 1 result for each of them !!!
1969             $result{ $author . $biblionumber } = $record;
1970         }
1971
1972     # sort the hash and return the same structure as GetRecords (Zebra querying)
1973         my $result_hash;
1974         my $numbers = 0;
1975         if ( $ordering eq 'author_za' ) {    # sort by author desc
1976             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1977                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1978                   $result{$key}->as_usmarc();
1979             }
1980         }
1981         else {                               # sort by author ASC
1982             foreach my $key ( sort ( keys %result ) ) {
1983                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1984                   $result{$key}->as_usmarc();
1985             }
1986         }
1987         my $finalresult = ();
1988         $result_hash->{'hits'}         = $numbers;
1989         $finalresult->{'biblioserver'} = $result_hash;
1990         return $finalresult;
1991
1992         #
1993         # ORDER BY callnumber
1994         #
1995     }
1996     elsif ( $ordering =~ /callnumber/ ) {
1997         my %result;
1998         foreach ( split /;/, $biblionumbers ) {
1999             my ( $biblionumber, $title ) = split /,/, $_;
2000             my $record = GetMarcBiblio($biblionumber);
2001             my $callnumber;
2002             my $frameworkcode = GetFrameworkCode($biblionumber);
2003             my ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField(  'items.itemcallnumber', $frameworkcode);
2004                ( $callnumber_tag, $callnumber_subfield ) = GetMarcFromKohaField('biblioitems.callnumber', $frameworkcode)
2005                 unless $callnumber_tag;
2006             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
2007                 $callnumber = $record->subfield( '200', 'f' );
2008             }
2009             else {
2010                 $callnumber = $record->subfield( '100', 'a' );
2011             }
2012
2013 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2014 # and we don't want to get only 1 result for each of them !!!
2015             $result{ $callnumber . $biblionumber } = $record;
2016         }
2017
2018     # sort the hash and return the same structure as GetRecords (Zebra querying)
2019         my $result_hash;
2020         my $numbers = 0;
2021         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
2022             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2023                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2024                   $result{$key}->as_usmarc();
2025             }
2026         }
2027         else {                                     # sort by title ASC
2028             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
2029                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2030                   $result{$key}->as_usmarc();
2031             }
2032         }
2033         my $finalresult = ();
2034         $result_hash->{'hits'}         = $numbers;
2035         $finalresult->{'biblioserver'} = $result_hash;
2036         return $finalresult;
2037     }
2038     elsif ( $ordering =~ /pubdate/ ) {             #pub year
2039         my %result;
2040         foreach ( split /;/, $biblionumbers ) {
2041             my ( $biblionumber, $title ) = split /,/, $_;
2042             my $record = GetMarcBiblio($biblionumber);
2043             my ( $publicationyear_tag, $publicationyear_subfield ) =
2044               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2045             my $publicationyear =
2046               $record->subfield( $publicationyear_tag,
2047                 $publicationyear_subfield );
2048
2049 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2050 # and we don't want to get only 1 result for each of them !!!
2051             $result{ $publicationyear . $biblionumber } = $record;
2052         }
2053
2054     # sort the hash and return the same structure as GetRecords (Zebra querying)
2055         my $result_hash;
2056         my $numbers = 0;
2057         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2058             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2059                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2060                   $result{$key}->as_usmarc();
2061             }
2062         }
2063         else {                                 # sort by pub year ASC
2064             foreach my $key ( sort ( keys %result ) ) {
2065                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2066                   $result{$key}->as_usmarc();
2067             }
2068         }
2069         my $finalresult = ();
2070         $result_hash->{'hits'}         = $numbers;
2071         $finalresult->{'biblioserver'} = $result_hash;
2072         return $finalresult;
2073
2074         #
2075         # ORDER BY title
2076         #
2077     }
2078     elsif ( $ordering =~ /title/ ) {
2079
2080 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2081         my %result;
2082         foreach ( split /;/, $biblionumbers ) {
2083             my ( $biblionumber, $title ) = split /,/, $_;
2084
2085 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2086 # and we don't want to get only 1 result for each of them !!!
2087 # hint & speed improvement : we can order without reading the record
2088 # so order, and read records only for the requested page !
2089             $result{ $title . $biblionumber } = $biblionumber;
2090         }
2091
2092     # sort the hash and return the same structure as GetRecords (Zebra querying)
2093         my $result_hash;
2094         my $numbers = 0;
2095         if ( $ordering eq 'title_az' ) {    # sort by title desc
2096             foreach my $key ( sort ( keys %result ) ) {
2097                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2098             }
2099         }
2100         else {                              # sort by title ASC
2101             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2102                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2103             }
2104         }
2105
2106         # limit the $results_per_page to result size if it's more
2107         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2108
2109         # for the requested page, replace biblionumber by the complete record
2110         # speed improvement : avoid reading too much things
2111         for (
2112             my $counter = $offset ;
2113             $counter <= $offset + $results_per_page ;
2114             $counter++
2115           )
2116         {
2117             $result_hash->{'RECORDS'}[$counter] =
2118               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2119         }
2120         my $finalresult = ();
2121         $result_hash->{'hits'}         = $numbers;
2122         $finalresult->{'biblioserver'} = $result_hash;
2123         return $finalresult;
2124     }
2125     else {
2126
2127 #
2128 # order by ranking
2129 #
2130 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2131         my %result;
2132         my %count_ranking;
2133         foreach ( split /;/, $biblionumbers ) {
2134             my ( $biblionumber, $title ) = split /,/, $_;
2135             $title =~ /(.*)-(\d)/;
2136
2137             # get weight
2138             my $ranking = $2;
2139
2140 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2141 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2142 # biblio N has ranking = 6
2143             $count_ranking{$biblionumber} += $ranking;
2144         }
2145
2146 # build the result by "inverting" the count_ranking hash
2147 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2148 #         warn "counting";
2149         foreach ( keys %count_ranking ) {
2150             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2151         }
2152
2153     # sort the hash and return the same structure as GetRecords (Zebra querying)
2154         my $result_hash;
2155         my $numbers = 0;
2156         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2157             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2158         }
2159
2160         # limit the $results_per_page to result size if it's more
2161         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2162
2163         # for the requested page, replace biblionumber by the complete record
2164         # speed improvement : avoid reading too much things
2165         for (
2166             my $counter = $offset ;
2167             $counter <= $offset + $results_per_page ;
2168             $counter++
2169           )
2170         {
2171             $result_hash->{'RECORDS'}[$counter] =
2172               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2173               if $result_hash->{'RECORDS'}[$counter];
2174         }
2175         my $finalresult = ();
2176         $result_hash->{'hits'}         = $numbers;
2177         $finalresult->{'biblioserver'} = $result_hash;
2178         return $finalresult;
2179     }
2180 }
2181
2182 =head2 enabled_staff_search_views
2183
2184 %hash = enabled_staff_search_views()
2185
2186 This function returns a hash that contains three flags obtained from the system
2187 preferences, used to determine whether a particular staff search results view
2188 is enabled.
2189
2190 =over 2
2191
2192 =item C<Output arg:>
2193
2194     * $hash{can_view_MARC} is true only if the MARC view is enabled
2195     * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2196     * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2197
2198 =item C<usage in the script:>
2199
2200 =back
2201
2202 $template->param ( C4::Search::enabled_staff_search_views );
2203
2204 =cut
2205
2206 sub enabled_staff_search_views
2207 {
2208         return (
2209                 can_view_MARC                   => C4::Context->preference('viewMARC'),                 # 1 if the staff search allows the MARC view
2210                 can_view_ISBD                   => C4::Context->preference('viewISBD'),                 # 1 if the staff search allows the ISBD view
2211                 can_view_labeledMARC    => C4::Context->preference('viewLabeledMARC'),  # 1 if the staff search allows the Labeled MARC view
2212         );
2213 }
2214
2215
2216 =head2 z3950_search_args
2217
2218 $arrayref = z3950_search_args($matchpoints)
2219
2220 This function returns an array reference that contains the search parameters to be
2221 passed to the Z39.50 search script (z3950_search.pl). The array elements
2222 are hash refs whose keys are name, value and encvalue, and whose values are the
2223 name of a search parameter, the value of that search parameter and the URL encoded
2224 value of that parameter.
2225
2226 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2227
2228 The search parameter values are obtained from the bibliographic record whose
2229 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2230
2231 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2232 a general purpose search argument. In this case, the returned array contains only
2233 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2234
2235 If a search parameter value is undefined or empty, it is not included in the returned
2236 array.
2237
2238 The returned array reference may be passed directly to the template parameters.
2239
2240 =over 2
2241
2242 =item C<Output arg:>
2243
2244     * $array containing hash refs as described above
2245
2246 =item C<usage in the script:>
2247
2248 =back
2249
2250 $data = Biblio::GetBiblioData($bibno);
2251 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2252
2253 *OR*
2254
2255 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2256
2257 =cut
2258
2259 sub z3950_search_args {
2260     my $bibrec = shift;
2261     $bibrec = { title => $bibrec } if !ref $bibrec;
2262     my $array = [];
2263     for my $field (qw/ lccn isbn issn title author dewey subject /)
2264     {
2265         my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2266         push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2267     }
2268     return $array;
2269 }
2270
2271
2272 END { }    # module clean-up code here (global destructor)
2273
2274 1;
2275 __END__
2276
2277 =head1 AUTHOR
2278
2279 Koha Developement team <info@koha.org>
2280
2281 =cut