C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29
  30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  31
  32 # set the version for version checking
  33 BEGIN {
  34     $VERSION = 3.01;
  35     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  36 }
  37
  38 =head1 NAME
  39
  40 C4::Search - Functions for searching the Koha catalog.
  41
  42 =head1 SYNOPSIS
  43
  44 See opac/opac-search.pl or catalogue/search.pl for example of usage
  45
  46 =head1 DESCRIPTION
  47
  48 This module provides searching functions for Koha's bibliographic databases
  49
  50 =head1 FUNCTIONS
  51
  52 =cut
  53
  54 @ISA    = qw(Exporter);
  55 @EXPORT = qw(
  56   &findseealso
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63   &ModBiblios
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 findseealso($dbh,$fields);
  69
  70 C<$dbh> is a link to the DB handler.
  71
  72 use C4::Context;
  73 my $dbh =C4::Context->dbh;
  74
  75 C<$fields> is a reference to the fields array
  76
  77 This function modifies the @$fields array and adds related fields to search on.
  78
  79 FIXME: this function is probably deprecated in Koha 3
  80
  81 =cut
  82
  83 sub findseealso {
  84     my ( $dbh, $fields ) = @_;
  85     my $tagslib = GetMarcStructure(1);
  86     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  87         my ($tag)      = substr( @$fields[$i], 1, 3 );
  88         my ($subfield) = substr( @$fields[$i], 4, 1 );
  89         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  90           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  91     }
  92 }
  93
  94 =head2 FindDuplicate
  95
  96 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  97
  98 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  99
 100 =cut
 101
 102 sub FindDuplicate {
 103     my ($record) = @_;
 104     my $dbh = C4::Context->dbh;
 105     my $result = TransformMarcToKoha( $dbh, $record, '' );
 106     my $sth;
 107     my $query;
 108     my $search;
 109     my $type;
 110     my ( $biblionumber, $title );
 111
 112     # search duplicate on ISBN, easy and fast..
 113     # ... normalize first
 114     if ( $result->{isbn} ) {
 115         $result->{isbn} =~ s/\(.*$//;
 116         $result->{isbn} =~ s/\s+$//;
 117         $query = "isbn=$result->{isbn}";
 118     }
 119     else {
 120         $result->{title} =~ s /\\//g;
 121         $result->{title} =~ s /\"//g;
 122         $result->{title} =~ s /\(//g;
 123         $result->{title} =~ s /\)//g;
 124
 125         # FIXME: instead of removing operators, could just do
 126         # quotes around the value
 127         $result->{title} =~ s/(and|or|not)//g;
 128         $query = "ti,ext=$result->{title}";
 129         $query .= " and itemtype=$result->{itemtype}"
 130           if ( $result->{itemtype} );
 131         if   ( $result->{author} ) {
 132             $result->{author} =~ s /\\//g;
 133             $result->{author} =~ s /\"//g;
 134             $result->{author} =~ s /\(//g;
 135             $result->{author} =~ s /\)//g;
 136
 137             # remove valid operators
 138             $result->{author} =~ s/(and|or|not)//g;
 139             $query .= " and au,ext=$result->{author}";
 140         }
 141     }
 142
 143     # FIXME: add error handling
 144     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 145     my @results;
 146     foreach my $possible_duplicate_record (@$searchresults) {
 147         my $marcrecord =
 148           MARC::Record->new_from_usmarc($possible_duplicate_record);
 149         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 150
 151         # FIXME :: why 2 $biblionumber ?
 152         if ($result) {
 153             push @results, $result->{'biblionumber'};
 154             push @results, $result->{'title'};
 155         }
 156     }
 157     return @results;
 158 }
 159
 160 =head2 SimpleSearch
 161
 162 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 163
 164 This function provides a simple search API on the bibliographic catalog
 165
 166 =over 2
 167
 168 =item C<input arg:>
 169
 170     * $query can be a simple keyword or a complete CCL query
 171     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 172     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 173     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 174
 175
 176 =item C<Output:>
 177
 178     * $error is a empty unless an error is detected
 179     * \@results is an array of records.
 180     * $total_hits is the number of hits that would have been returned with no limit
 181
 182 =item C<usage in the script:>
 183
 184 =back
 185
 186 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 187
 188 if (defined $error) {
 189     $template->param(query_error => $error);
 190     warn "error: ".$error;
 191     output_html_with_http_headers $input, $cookie, $template->output;
 192     exit;
 193 }
 194
 195 my $hits = scalar @$marcresults;
 196 my @results;
 197
 198 for my $i (0..$hits) {
 199     my %resultsloop;
 200     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 201     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 202
 203     #build the hash for the template.
 204     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 205     $resultsloop{title}           = $biblio->{'title'};
 206     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 207     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 208     $resultsloop{author}          = $biblio->{'author'};
 209     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 210     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 211
 212     push @results, \%resultsloop;
 213 }
 214
 215 $template->param(result=>\@results);
 216
 217 =cut
 218
 219 sub SimpleSearch {
 220     my ( $query, $offset, $max_results, $servers )  = @_;
 221
 222     if ( C4::Context->preference('NoZebra') ) {
 223         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 224         my $search_result =
 225           (      $result->{hits}
 226               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 227         return ( undef, $search_result, scalar($search_result) );
 228     }
 229     else {
 230         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 231         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 232         my @results;
 233         my @zoom_queries;
 234         my @tmpresults;
 235         my @zconns;
 236         my $total_hits;
 237         return ( "No query entered", undef, undef ) unless $query;
 238
 239         # Initialize & Search Zebra
 240         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 241             eval {
 242                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 243                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 244                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 245
 246                 # error handling
 247                 my $error =
 248                     $zconns[$i]->errmsg() . " ("
 249                   . $zconns[$i]->errcode() . ") "
 250                   . $zconns[$i]->addinfo() . " "
 251                   . $zconns[$i]->diagset();
 252
 253                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 254             };
 255             if ($@) {
 256
 257                 # caught a ZOOM::Exception
 258                 my $error =
 259                     $@->message() . " ("
 260                   . $@->code() . ") "
 261                   . $@->addinfo() . " "
 262                   . $@->diagset();
 263                 warn $error;
 264                 return ( $error, undef, undef );
 265             }
 266         }
 267         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 268             my $event = $zconns[ $i - 1 ]->last_event();
 269             if ( $event == ZOOM::Event::ZEND ) {
 270
 271                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 272                 my $hits = $tmpresults[ $i - 1 ]->size();
 273                 $total_hits += $hits;
 274                 my $last_record = $hits;
 275                 if ( defined $max_results && $offset + $max_results < $hits ) {
 276                     $last_record  = $offset + $max_results;
 277                 }
 278
 279                 for my $j ( $first_record..$last_record ) {
 280                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 281                     push @results, $record;
 282                 }
 283             }
 284         }
 285
 286         foreach my $result (@tmpresults) {
 287             $result->destroy();
 288         }
 289         foreach my $zoom_query (@zoom_queries) {
 290             $zoom_query->destroy();
 291         }
 292
 293         return ( undef, \@results, $total_hits );
 294     }
 295 }
 296
 297 =head2 getRecords
 298
 299 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 300
 301         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 302         $results_per_page, $offset,       $expanded_facet, $branches,
 303         $query_type,       $scan
 304     );
 305
 306 The all singing, all dancing, multi-server, asynchronous, scanning,
 307 searching, record nabbing, facet-building
 308
 309 See verbse embedded documentation.
 310
 311 =cut
 312
 313 sub getRecords {
 314     my (
 315         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 316         $results_per_page, $offset,       $expanded_facet, $branches,
 317         $query_type,       $scan
 318     ) = @_;
 319
 320     my @servers = @$servers_ref;
 321     my @sort_by = @$sort_by_ref;
 322
 323     # Initialize variables for the ZOOM connection and results object
 324     my $zconn;
 325     my @zconns;
 326     my @results;
 327     my $results_hashref = ();
 328
 329     # Initialize variables for the faceted results objects
 330     my $facets_counter = ();
 331     my $facets_info    = ();
 332     my $facets         = getFacets();
 333
 334     my @facets_loop
 335       ;    # stores the ref to array of hashes for template facets loop
 336
 337     ### LOOP THROUGH THE SERVERS
 338     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 339         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 340
 341 # perform the search, create the results objects
 342 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 343         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 344
 345         #$query_to_use = $simple_query if $scan;
 346         warn $simple_query if ( $scan and $DEBUG );
 347
 348         # Check if we've got a query_type defined, if so, use it
 349         eval {
 350             if ($query_type)
 351             {
 352                 if ( $query_type =~ /^ccl/ ) {
 353                     $query_to_use =~
 354                       s/\:/\=/g;    # change : to = last minute (FIXME)
 355                     $results[$i] =
 356                       $zconns[$i]->search(
 357                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 358                       );
 359                 }
 360                 elsif ( $query_type =~ /^cql/ ) {
 361                     $results[$i] =
 362                       $zconns[$i]->search(
 363                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 364                 }
 365                 elsif ( $query_type =~ /^pqf/ ) {
 366                     $results[$i] =
 367                       $zconns[$i]->search(
 368                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 369                 }
 370             }
 371             else {
 372                 if ($scan) {
 373                     $results[$i] =
 374                       $zconns[$i]->scan(
 375                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 376                       );
 377                 }
 378                 else {
 379                     $results[$i] =
 380                       $zconns[$i]->search(
 381                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 382                       );
 383                 }
 384             }
 385         };
 386         if ($@) {
 387             warn "WARNING: query problem with $query_to_use " . $@;
 388         }
 389
 390         # Concatenate the sort_by limits and pass them to the results object
 391         # Note: sort will override rank
 392         my $sort_by;
 393         foreach my $sort (@sort_by) {
 394             if ( $sort eq "author_az" ) {
 395                 $sort_by .= "1=1003 <i ";
 396             }
 397             elsif ( $sort eq "author_za" ) {
 398                 $sort_by .= "1=1003 >i ";
 399             }
 400             elsif ( $sort eq "popularity_asc" ) {
 401                 $sort_by .= "1=9003 <i ";
 402             }
 403             elsif ( $sort eq "popularity_dsc" ) {
 404                 $sort_by .= "1=9003 >i ";
 405             }
 406             elsif ( $sort eq "call_number_asc" ) {
 407                 $sort_by .= "1=20  <i ";
 408             }
 409             elsif ( $sort eq "call_number_dsc" ) {
 410                 $sort_by .= "1=20 >i ";
 411             }
 412             elsif ( $sort eq "pubdate_asc" ) {
 413                 $sort_by .= "1=31 <i ";
 414             }
 415             elsif ( $sort eq "pubdate_dsc" ) {
 416                 $sort_by .= "1=31 >i ";
 417             }
 418             elsif ( $sort eq "acqdate_asc" ) {
 419                 $sort_by .= "1=32 <i ";
 420             }
 421             elsif ( $sort eq "acqdate_dsc" ) {
 422                 $sort_by .= "1=32 >i ";
 423             }
 424             elsif ( $sort eq "title_az" ) {
 425                 $sort_by .= "1=4 <i ";
 426             }
 427             elsif ( $sort eq "title_za" ) {
 428                 $sort_by .= "1=4 >i ";
 429             }
 430         }
 431         if ($sort_by) {
 432             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 433                 warn "WARNING sort $sort_by failed";
 434             }
 435         }
 436     }    # finished looping through servers
 437
 438     # The big moment: asynchronously retrieve results from all servers
 439     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 440         my $ev = $zconns[ $i - 1 ]->last_event();
 441         if ( $ev == ZOOM::Event::ZEND ) {
 442             next unless $results[ $i - 1 ];
 443             my $size = $results[ $i - 1 ]->size();
 444             if ( $size > 0 ) {
 445                 my $results_hash;
 446
 447                 # loop through the results
 448                 $results_hash->{'hits'} = $size;
 449                 my $times;
 450                 if ( $offset + $results_per_page <= $size ) {
 451                     $times = $offset + $results_per_page;
 452                 }
 453                 else {
 454                     $times = $size;
 455                 }
 456                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 457                     my $records_hash;
 458                     my $record;
 459                     my $facet_record;
 460
 461                     ## Check if it's an index scan
 462                     if ($scan) {
 463                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 464
 465                  # here we create a minimal MARC record and hand it off to the
 466                  # template just like a normal result ... perhaps not ideal, but
 467                  # it works for now
 468                         my $tmprecord = MARC::Record->new();
 469                         $tmprecord->encoding('UTF-8');
 470                         my $tmptitle;
 471                         my $tmpauthor;
 472
 473                 # the minimal record in author/title (depending on MARC flavour)
 474                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 475                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 476                             $tmprecord->append_fields($tmptitle);
 477                         } else {
 478                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 479                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 480                             $tmprecord->append_fields($tmptitle);
 481                             $tmprecord->append_fields($tmpauthor);
 482                         }
 483                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 484                     }
 485
 486                     # not an index scan
 487                     else {
 488                         $record = $results[ $i - 1 ]->record($j)->raw();
 489
 490                         # warn "RECORD $j:".$record;
 491                         $results_hash->{'RECORDS'}[$j] = $record;
 492
 493             # Fill the facets while we're looping, but only for the biblioserver
 494                         $facet_record = MARC::Record->new_from_usmarc($record)
 495                           if $servers[ $i - 1 ] =~ /biblioserver/;
 496
 497                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 498                         if ($facet_record) {
 499                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 500                                 ($facets->[$k]) or next;
 501                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 502                                 for my $field (@fields) {
 503                                     my @subfields = $field->subfields();
 504                                     for my $subfield (@subfields) {
 505                                         my ( $code, $data ) = @$subfield;
 506                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 507                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 508                                     }
 509                                 }
 510                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 511                                     $facets->[$k]->{'label_value'};
 512                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 513                                     $facets->[$k]->{'expanded'};
 514                             }
 515                         }
 516                     }
 517                 }
 518                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 519             }
 520
 521             # warn "connection ", $i-1, ": $size hits";
 522             # warn $results[$i-1]->record(0)->render() if $size > 0;
 523
 524             # BUILD FACETS
 525             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 526                 for my $link_value (
 527                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 528                     keys %$facets_counter )
 529                 {
 530                     my $expandable;
 531                     my $number_of_facets;
 532                     my @this_facets_array;
 533                     for my $one_facet (
 534                         sort {
 535                             $facets_counter->{$link_value}
 536                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 537                         } keys %{ $facets_counter->{$link_value} }
 538                       )
 539                     {
 540                         $number_of_facets++;
 541                         if (   ( $number_of_facets < 6 )
 542                             || ( $expanded_facet eq $link_value )
 543                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 544                         {
 545
 546                       # Sanitize the link value ), ( will cause errors with CCL,
 547                             my $facet_link_value = $one_facet;
 548                             $facet_link_value =~ s/(\(|\))/ /g;
 549
 550                             # fix the length that will display in the label,
 551                             my $facet_label_value = $one_facet;
 552                             $facet_label_value =
 553                               substr( $one_facet, 0, 20 ) . "..."
 554                               unless length($facet_label_value) <= 20;
 555
 556                             # if it's a branch, label by the name, not the code,
 557                             if ( $link_value =~ /branch/ ) {
 558                                 $facet_label_value =
 559                                   $branches->{$one_facet}->{'branchname'};
 560                             }
 561
 562                 # but we're down with the whole label being in the link's title.
 563                             my $facet_title_value = $one_facet;
 564
 565                             push @this_facets_array,
 566                               (
 567                                 {
 568                                     facet_count =>
 569                                       $facets_counter->{$link_value}
 570                                       ->{$one_facet},
 571                                     facet_label_value => $facet_label_value,
 572                                     facet_title_value => $facet_title_value,
 573                                     facet_link_value  => $facet_link_value,
 574                                     type_link_value   => $link_value,
 575                                 },
 576                               );
 577                         }
 578                     }
 579
 580                     # handle expanded option
 581                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 582                         $expandable = 1
 583                           if ( ( $number_of_facets > 6 )
 584                             && ( $expanded_facet ne $link_value ) );
 585                     }
 586                     push @facets_loop,
 587                       (
 588                         {
 589                             type_link_value => $link_value,
 590                             type_id         => $link_value . "_id",
 591                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 592                             facets     => \@this_facets_array,
 593                             expandable => $expandable,
 594                             expand     => $link_value,
 595                         }
 596                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 597                 }
 598             }
 599         }
 600     }
 601     return ( undef, $results_hashref, \@facets_loop );
 602 }
 603
 604 sub pazGetRecords {
 605     my (
 606         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 607         $results_per_page, $offset,       $expanded_facet, $branches,
 608         $query_type,       $scan
 609     ) = @_;
 610
 611     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 612     $paz->init();
 613     $paz->search($simple_query);
 614     sleep 1;
 615
 616     # do results
 617     my $results_hashref = {};
 618     my $stats = XMLin($paz->stat);
 619     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 620
 621     # for a grouped search result, the number of hits
 622     # is the number of groups returned; 'bib_hits' will have
 623     # the total number of bibs.
 624     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 625     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 626
 627     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 628         my $recid = $hit->{recid}->[0];
 629
 630         my $work_title = $hit->{'md-work-title'}->[0];
 631         my $work_author;
 632         if (exists $hit->{'md-work-author'}) {
 633             $work_author = $hit->{'md-work-author'}->[0];
 634         }
 635         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 636
 637         my $result_group = {};
 638         $result_group->{'group_label'} = $group_label;
 639         $result_group->{'group_merge_key'} = $recid;
 640
 641         my $count = 1;
 642         if (exists $hit->{count}) {
 643             $count = $hit->{count}->[0];
 644         }
 645         $result_group->{'group_count'} = $count;
 646
 647         for (my $i = 0; $i < $count; $i++) {
 648             # FIXME -- may need to worry about diacritics here
 649             my $rec = $paz->record($recid, $i);
 650             push @{ $result_group->{'RECORDS'} }, $rec;
 651         }
 652
 653         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 654     }
 655
 656     # pass through facets
 657     my $termlist_xml = $paz->termlist('author,subject');
 658     my $terms = XMLin($termlist_xml, forcearray => 1);
 659     my @facets_loop = ();
 660     #die Dumper($results);
 661 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 662 #        my @facets = ();
 663 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 664 #            push @facets, {
 665 #                facet_label_value => $facet->{'name'}->[0],
 666 #            };
 667 #        }
 668 #        push @facets_loop, ( {
 669 #            type_label => $list,
 670 #            facets => \@facets,
 671 #        } );
 672 #    }
 673
 674     return ( undef, $results_hashref, \@facets_loop );
 675 }
 676
 677 # STOPWORDS
 678 sub _remove_stopwords {
 679     my ( $operand, $index ) = @_;
 680     my @stopwords_removed;
 681
 682     # phrase and exact-qualified indexes shouldn't have stopwords removed
 683     if ( $index !~ m/phr|ext/ ) {
 684
 685 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 686 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 687 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 688 #       is a stopword, we'd get "çon" and wouldn't find anything...
 689         foreach ( keys %{ C4::Context->stopwords } ) {
 690             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 691             if ( $operand =~
 692                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 693             {
 694                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 695                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 696                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 697                                 $operand =~ s/$1//gi;
 698                 push @stopwords_removed, $_;
 699             }
 700         }
 701     }
 702     return ( $operand, \@stopwords_removed );
 703 }
 704
 705 # TRUNCATION
 706 sub _detect_truncation {
 707     my ( $operand, $index ) = @_;
 708     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 709         @regexpr );
 710     $operand =~ s/^ //g;
 711     my @wordlist = split( /\s/, $operand );
 712     foreach my $word (@wordlist) {
 713         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 714             push @rightlefttruncated, $word;
 715         }
 716         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 717             push @lefttruncated, $word;
 718         }
 719         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 720             push @righttruncated, $word;
 721         }
 722         elsif ( index( $word, "*" ) < 0 ) {
 723             push @nontruncated, $word;
 724         }
 725         else {
 726             push @regexpr, $word;
 727         }
 728     }
 729     return (
 730         \@nontruncated,       \@righttruncated, \@lefttruncated,
 731         \@rightlefttruncated, \@regexpr
 732     );
 733 }
 734
 735 # STEMMING
 736 sub _build_stemmed_operand {
 737     my ($operand) = @_;
 738     my $stemmed_operand;
 739
 740     # If operand contains a digit, it is almost certainly an identifier, and should
 741     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 742     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 743     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 744     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 745     return $operand if $operand =~ /\d/;
 746
 747 # FIXME: the locale should be set based on the user's language and/or search choice
 748     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 749
 750 # FIXME: these should be stored in the db so the librarian can modify the behavior
 751     $stemmer->add_exceptions(
 752         {
 753             'and' => 'and',
 754             'or'  => 'or',
 755             'not' => 'not',
 756         }
 757     );
 758     my @words = split( / /, $operand );
 759     my $stems = $stemmer->stem(@words);
 760     for my $stem (@$stems) {
 761         $stemmed_operand .= "$stem";
 762         $stemmed_operand .= "?"
 763           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 764         $stemmed_operand .= " ";
 765     }
 766     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 767     return $stemmed_operand;
 768 }
 769
 770 # FIELD WEIGHTING
 771 sub _build_weighted_query {
 772
 773 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 774 # pretty well but could work much better if we had a smarter query parser
 775     my ( $operand, $stemmed_operand, $index ) = @_;
 776     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 777     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 778     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 779
 780     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 781
 782     # Keyword, or, no index specified
 783     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 784         $weighted_query .=
 785           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 786         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 787         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 788           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 789           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 790         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 791           if $fuzzy_enabled;    # add fuzzy, word list
 792         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 793           if ( $stemming and $stemmed_operand )
 794           ;                     # add stemming, right truncation
 795         $weighted_query .= " or wrdl,r9=\"$operand\"";
 796
 797         # embedded sorting: 0 a-z; 1 z-a
 798         # $weighted_query .= ") or (sort1,aut=1";
 799     }
 800
 801     # Barcode searches should skip this process
 802     elsif ( $index eq 'bc' ) {
 803         $weighted_query .= "bc=\"$operand\"";
 804     }
 805
 806     # Authority-number searches should skip this process
 807     elsif ( $index eq 'an' ) {
 808         $weighted_query .= "an=\"$operand\"";
 809     }
 810
 811     # If the index already has more than one qualifier, wrap the operand
 812     # in quotes and pass it back (assumption is that the user knows what they
 813     # are doing and won't appreciate us mucking up their query
 814     elsif ( $index =~ ',' ) {
 815         $weighted_query .= " $index=\"$operand\"";
 816     }
 817
 818     #TODO: build better cases based on specific search indexes
 819     else {
 820         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 821           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 822         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 823         $weighted_query .=
 824           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 825     }
 826
 827     $weighted_query .= "))";                       # close rank specification
 828     return $weighted_query;
 829 }
 830
 831 =head2 buildQuery
 832
 833 ( $error, $query,
 834 $simple_query, $query_cgi,
 835 $query_desc, $limit,
 836 $limit_cgi, $limit_desc,
 837 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 838
 839 Build queries and limits in CCL, CGI, Human,
 840 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 841
 842 See verbose embedded documentation.
 843
 844
 845 =cut
 846
 847 sub buildQuery {
 848     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 849
 850     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 851
 852     # dereference
 853     my @operators = @$operators if $operators;
 854     my @indexes   = @$indexes   if $indexes;
 855     my @operands  = @$operands  if $operands;
 856     my @limits    = @$limits    if $limits;
 857     my @sort_by   = @$sort_by   if $sort_by;
 858
 859     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 860     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 861     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 862     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 863     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 864
 865     # no stemming/weight/fuzzy in NoZebra
 866     if ( C4::Context->preference("NoZebra") ) {
 867         $stemming      = 0;
 868         $weight_fields = 0;
 869         $fuzzy_enabled = 0;
 870     }
 871
 872     my $query        = $operands[0];
 873     my $simple_query = $operands[0];
 874
 875     # initialize the variables we're passing back
 876     my $query_cgi;
 877     my $query_desc;
 878     my $query_type;
 879
 880     my $limit;
 881     my $limit_cgi;
 882     my $limit_desc;
 883
 884     my $stopwords_removed;    # flag to determine if stopwords have been removed
 885
 886 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 887 # DIAGNOSTIC ONLY!!
 888     if ( $query =~ /^ccl=/ ) {
 889         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 890     }
 891     if ( $query =~ /^cql=/ ) {
 892         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 893     }
 894     if ( $query =~ /^pqf=/ ) {
 895         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 896     }
 897
 898     # pass nested queries directly
 899     # FIXME: need better handling of some of these variables in this case
 900     if ( $query =~ /(\(|\))/ ) {
 901         return (
 902             undef,              $query, $simple_query, $query_cgi,
 903             $query,             $limit, $limit_cgi,    $limit_desc,
 904             $stopwords_removed, 'ccl'
 905         );
 906     }
 907
 908 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 909 # query operands and indexes and add stemming, truncation, field weighting, etc.
 910 # Once we do so, we'll end up with a value in $query, just like if we had an
 911 # incoming $query from the user
 912     else {
 913         $query = ""
 914           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 915         my $previous_operand
 916           ;    # a flag used to keep track if there was a previous query
 917                # if there was, we can apply the current operator
 918                # for every operand
 919         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 920
 921             # COMBINE OPERANDS, INDEXES AND OPERATORS
 922             if ( $operands[$i] ) {
 923
 924               # A flag to determine whether or not to add the index to the query
 925                 my $indexes_set;
 926
 927 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 928                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 929                     $weight_fields    = 0;
 930                     $stemming         = 0;
 931                     $remove_stopwords = 0;
 932                 }
 933                 my $operand = $operands[$i];
 934                 my $index   = $indexes[$i];
 935
 936                 # Add index-specific attributes
 937                 # Date of Publication
 938                 if ( $index eq 'yr' ) {
 939                     $index .= ",st-numeric";
 940 #                     $indexes_set++;
 941                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 942                 }
 943
 944                 # Date of Acquisition
 945                 elsif ( $index eq 'acqdate' ) {
 946                     $index .= ",st-date-normalized";
 947 #                     $indexes_set++;
 948                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 949                 }
 950                 # ISBN,ISSN,Standard Number, don't need special treatment
 951                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 952 #                     $indexes_set++;
 953                     (
 954                         $stemming,      $auto_truncation,
 955                         $weight_fields, $fuzzy_enabled,
 956                         $remove_stopwords
 957                     ) = ( 0, 0, 0, 0, 0 );
 958
 959                 }
 960                 # Set default structure attribute (word list)
 961                 my $struct_attr;
 962                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 963                     $struct_attr = ",wrdl";
 964                 }
 965
 966                 # Some helpful index variants
 967                 my $index_plus       = $index . $struct_attr . ":" if $index;
 968                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 969
 970                 # Remove Stopwords
 971                 if ($remove_stopwords) {
 972                     ( $operand, $stopwords_removed ) =
 973                       _remove_stopwords( $operand, $index );
 974                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 975                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 976                       if ( $stopwords_removed && $DEBUG );
 977                 }
 978
 979                 # Detect Truncation
 980                 my $truncated_operand;
 981                 my( $nontruncated, $righttruncated, $lefttruncated,
 982                     $rightlefttruncated, $regexpr
 983                 ) = _detect_truncation( $operand, $index );
 984                 warn
 985 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 986                   if $DEBUG;
 987
 988                 # Apply Truncation
 989                 if (
 990                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 991                     scalar(@$rightlefttruncated) > 0 )
 992                 {
 993
 994                # Don't field weight or add the index to the query, we do it here
 995                     $indexes_set = 1;
 996                     undef $weight_fields;
 997                     my $previous_truncation_operand;
 998                     if ( scalar(@$nontruncated) > 0 ) {
 999                         $truncated_operand .= "$index_plus @$nontruncated ";
1000                         $previous_truncation_operand = 1;
1001                     }
1002                     if ( scalar(@$righttruncated) > 0 ) {
1003                         $truncated_operand .= "and "
1004                           if $previous_truncation_operand;
1005                         $truncated_operand .=
1006                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1007                         $previous_truncation_operand = 1;
1008                     }
1009                     if ( scalar(@$lefttruncated) > 0 ) {
1010                         $truncated_operand .= "and "
1011                           if $previous_truncation_operand;
1012                         $truncated_operand .=
1013                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1014                         $previous_truncation_operand = 1;
1015                     }
1016                     if ( scalar(@$rightlefttruncated) > 0 ) {
1017                         $truncated_operand .= "and "
1018                           if $previous_truncation_operand;
1019                         $truncated_operand .=
1020                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1021                         $previous_truncation_operand = 1;
1022                     }
1023                 }
1024                 $operand = $truncated_operand if $truncated_operand;
1025                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1026
1027                 # Handle Stemming
1028                 my $stemmed_operand;
1029                 $stemmed_operand = _build_stemmed_operand($operand)
1030                   if $stemming;
1031                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1032
1033                 # Handle Field Weighting
1034                 my $weighted_operand;
1035                 $weighted_operand =
1036                   _build_weighted_query( $operand, $stemmed_operand, $index )
1037                   if $weight_fields;
1038                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1039                 $operand = $weighted_operand if $weight_fields;
1040                 $indexes_set = 1 if $weight_fields;
1041
1042                 # If there's a previous operand, we need to add an operator
1043                 if ($previous_operand) {
1044
1045                     # User-specified operator
1046                     if ( $operators[ $i - 1 ] ) {
1047                         $query     .= " $operators[$i-1] ";
1048                         $query     .= " $index_plus " unless $indexes_set;
1049                         $query     .= " $operand";
1050                         $query_cgi .= "&op=$operators[$i-1]";
1051                         $query_cgi .= "&idx=$index" if $index;
1052                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1053                         $query_desc .=
1054                           " $operators[$i-1] $index_plus $operands[$i]";
1055                     }
1056
1057                     # Default operator is and
1058                     else {
1059                         $query      .= " and ";
1060                         $query      .= "$index_plus " unless $indexes_set;
1061                         $query      .= "$operand";
1062                         $query_cgi  .= "&op=and&idx=$index" if $index;
1063                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1064                         $query_desc .= " and $index_plus $operands[$i]";
1065                     }
1066                 }
1067
1068                 # There isn't a pervious operand, don't need an operator
1069                 else {
1070
1071                     # Field-weighted queries already have indexes set
1072                     $query .= " $index_plus " unless $indexes_set;
1073                     $query .= $operand;
1074                     $query_desc .= " $index_plus $operands[$i]";
1075                     $query_cgi  .= "&idx=$index" if $index;
1076                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1077                     $previous_operand = 1;
1078                 }
1079             }    #/if $operands
1080         }    # /for
1081     }
1082     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1083
1084     # add limits
1085     my $group_OR_limits;
1086     my $availability_limit;
1087     foreach my $this_limit (@limits) {
1088         if ( $this_limit =~ /available/ ) {
1089
1090 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1091 # In English:
1092 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1093             $availability_limit .=
1094 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1095             $limit_cgi  .= "&limit=available";
1096             $limit_desc .= "";
1097         }
1098
1099         # group_OR_limits, prefixed by mc-
1100         # OR every member of the group
1101         elsif ( $this_limit =~ /mc/ ) {
1102             $group_OR_limits .= " or " if $group_OR_limits;
1103             $limit_desc      .= " or " if $group_OR_limits;
1104             $group_OR_limits .= "$this_limit";
1105             $limit_cgi       .= "&limit=$this_limit";
1106             $limit_desc      .= " $this_limit";
1107         }
1108
1109         # Regular old limits
1110         else {
1111             if ($this_limit){
1112                 $limit .= " and " if $limit || $query;
1113                 $limit      .= "$this_limit";
1114                 $limit_cgi  .= "&limit=$this_limit";
1115                 $limit_desc .= " $this_limit";
1116             }
1117         }
1118     }
1119     if ($group_OR_limits) {
1120         $limit .= " and " if ( $query || $limit );
1121         $limit .= "($group_OR_limits)";
1122     }
1123     if ($availability_limit) {
1124         $limit .= " and " if ( $query || $limit );
1125         $limit .= "($availability_limit)";
1126     }
1127
1128     # Normalize the query and limit strings
1129     $query =~ s/:/=/g;
1130     $limit =~ s/:/=/g;
1131     for ( $query, $query_desc, $limit, $limit_desc ) {
1132         $_ =~ s/  / /g;    # remove extra spaces
1133         $_ =~ s/^ //g;     # remove any beginning spaces
1134         $_ =~ s/ $//g;     # remove any ending spaces
1135         $_ =~ s/==/=/g;    # remove double == from query
1136     }
1137     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1138
1139     for ($query_cgi,$simple_query) {
1140         $_ =~ s/"//g;
1141     }
1142     # append the limit to the query
1143     $query .= " " . $limit;
1144
1145     # Warnings if DEBUG
1146     if ($DEBUG) {
1147         warn "QUERY:" . $query;
1148         warn "QUERY CGI:" . $query_cgi;
1149         warn "QUERY DESC:" . $query_desc;
1150         warn "LIMIT:" . $limit;
1151         warn "LIMIT CGI:" . $limit_cgi;
1152         warn "LIMIT DESC:" . $limit_desc;
1153         warn "---------\nLeave buildQuery\n---------";
1154     }
1155     return (
1156         undef,              $query, $simple_query, $query_cgi,
1157         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1158         $stopwords_removed, $query_type
1159     );
1160 }
1161
1162 =head2 searchResults
1163
1164 Format results in a form suitable for passing to the template
1165
1166 =cut
1167
1168 # IMO this subroutine is pretty messy still -- it's responsible for
1169 # building the HTML output for the template
1170 sub searchResults {
1171     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1172     my $dbh = C4::Context->dbh;
1173     my $even = 1;
1174     my @newresults;
1175
1176     # add search-term highlighting via <span>s on the search terms
1177     my $span_terms_hashref;
1178     for my $span_term ( split( / /, $searchdesc ) ) {
1179         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1180         $span_terms_hashref->{$span_term}++;
1181     }
1182
1183     #Build branchnames hash
1184     #find branchname
1185     #get branch information.....
1186     my %branches;
1187     my $bsth =
1188       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1189       ;    # FIXME : use C4::Koha::GetBranches
1190     $bsth->execute();
1191     while ( my $bdata = $bsth->fetchrow_hashref ) {
1192         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1193     }
1194 # FIXME - We build an authorised values hash here, using the default framework
1195 # though it is possible to have different authvals for different fws.
1196
1197     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1198
1199     # get notforloan authorised value list (see $shelflocations  FIXME)
1200     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1201
1202     #Build itemtype hash
1203     #find itemtype & itemtype image
1204     my %itemtypes;
1205     $bsth =
1206       $dbh->prepare(
1207         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1208       );
1209     $bsth->execute();
1210     while ( my $bdata = $bsth->fetchrow_hashref ) {
1211                 foreach (qw(description imageurl summary notforloan)) {
1212                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1213                 }
1214     }
1215
1216     #search item field code
1217     my $sth =
1218       $dbh->prepare(
1219 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1220       );
1221     $sth->execute;
1222     my ($itemtag) = $sth->fetchrow;
1223
1224     ## find column names of items related to MARC
1225     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1226     $sth2->execute;
1227     my %subfieldstosearch;
1228     while ( ( my $column ) = $sth2->fetchrow ) {
1229         my ( $tagfield, $tagsubfield ) =
1230           &GetMarcFromKohaField( "items." . $column, "" );
1231         $subfieldstosearch{$column} = $tagsubfield;
1232     }
1233
1234     # handle which records to actually retrieve
1235     my $times;
1236     if ( $hits && $offset + $results_per_page <= $hits ) {
1237         $times = $offset + $results_per_page;
1238     }
1239     else {
1240         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1241     }
1242
1243     # loop through all of the records we've retrieved
1244     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1245         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1246         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1247         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1248         $oldbiblio->{result_number} = $i + 1;
1249
1250         # add imageurl to itemtype if there is one
1251         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1252
1253         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1254         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1255         $aisbn =~ s/-//g;
1256         $oldbiblio->{amazonisbn} = $aisbn;
1257                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1258  # Build summary if there is one (the summary is defined in the itemtypes table)
1259  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1260         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1261             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1262             my @fields  = $marcrecord->fields();
1263             foreach my $field (@fields) {
1264                 my $tag      = $field->tag();
1265                 my $tagvalue = $field->as_string();
1266                 if (! utf8::is_utf8($tagvalue)) {
1267                     utf8::decode($tagvalue);
1268                 }
1269
1270                 $summary =~
1271                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1272                 unless ( $tag < 10 ) {
1273                     my @subf = $field->subfields;
1274                     for my $i ( 0 .. $#subf ) {
1275                         my $subfieldcode  = $subf[$i][0];
1276                         my $subfieldvalue = $subf[$i][1];
1277                         if (! utf8::is_utf8($subfieldvalue)) {
1278                             utf8::decode($subfieldvalue);
1279                         }
1280                         my $tagsubf       = $tag . $subfieldcode;
1281                         $summary =~
1282 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1283                     }
1284                 }
1285             }
1286             # FIXME: yuk
1287             $summary =~ s/\[(.*?)]//g;
1288             $summary =~ s/\n/<br\/>/g;
1289             $oldbiblio->{summary} = $summary;
1290         }
1291
1292         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1293         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1294         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1295         $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1296         # Add search-term highlighting to the whole record where they match using <span>s
1297         if (C4::Context->preference("OpacHighlightedWords")){
1298             my $searchhighlightblob;
1299             for my $highlight_field ( $marcrecord->fields ) {
1300
1301     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1302                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1303                 for my $subfield ($highlight_field->subfields()) {
1304                     my $match;
1305                     next if $subfield->[0] eq '9';
1306                     my $field = $subfield->[1];
1307                     for my $term ( keys %$span_terms_hashref ) {
1308                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1309                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1310                         $match++;
1311                         }
1312                     }
1313                     $searchhighlightblob .= $field . " ... " if $match;
1314                 }
1315
1316             }
1317             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1318             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1319         }
1320
1321         # Add search-term highlighting to the title, subtitle, etc. fields
1322         for my $term ( keys %$span_terms_hashref ) {
1323             my $old_term = $term;
1324             if ( length($term) > 3 ) {
1325                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1326                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1327                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1328                                 }
1329             }
1330         }
1331
1332         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1333
1334         # Pull out the items fields
1335         my @fields = $marcrecord->field($itemtag);
1336
1337         # Setting item statuses for display
1338         my @available_items_loop;
1339         my @onloan_items_loop;
1340         my @other_items_loop;
1341
1342         my $available_items;
1343         my $onloan_items;
1344         my $other_items;
1345
1346         my $ordered_count         = 0;
1347         my $available_count       = 0;
1348         my $onloan_count          = 0;
1349         my $longoverdue_count     = 0;
1350         my $other_count           = 0;
1351         my $wthdrawn_count        = 0;
1352         my $itemlost_count        = 0;
1353         my $itembinding_count     = 0;
1354         my $itemdamaged_count     = 0;
1355         my $item_in_transit_count = 0;
1356         my $can_place_holds       = 0;
1357         my $items_count           = scalar(@fields);
1358         my $maxitems =
1359           ( C4::Context->preference('maxItemsinSearchResults') )
1360           ? C4::Context->preference('maxItemsinSearchResults') - 1
1361           : 1;
1362
1363         # loop through every item
1364         foreach my $field (@fields) {
1365             my $item;
1366
1367             # populate the items hash
1368             foreach my $code ( keys %subfieldstosearch ) {
1369                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1370             }
1371                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1372                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1373             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1374             if ($item->{$hbranch}) {
1375                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1376             }
1377             elsif ($item->{$otherbranch}) {     # Last resort
1378                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1379             }
1380
1381                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1382 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1383             if ( $item->{onloan} ) {
1384                 $onloan_count++;
1385                                 my $key = $prefix . $item->{due_date};
1386                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1387                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1388                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1389                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1390                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1391                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1392                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1393                 # if something's checked out and lost, mark it as 'long overdue'
1394                 if ( $item->{itemlost} ) {
1395                     $onloan_items->{$prefix}->{longoverdue}++;
1396                     $longoverdue_count++;
1397                 } else {        # can place holds as long as item isn't lost
1398                     $can_place_holds = 1;
1399                 }
1400             }
1401
1402          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1403             else {
1404
1405                 # item is on order
1406                 if ( $item->{notforloan} == -1 ) {
1407                     $ordered_count++;
1408                 }
1409
1410                 # is item in transit?
1411                 my $transfertwhen = '';
1412                 my ($transfertfrom, $transfertto);
1413
1414                 unless ($item->{wthdrawn}
1415                         || $item->{itemlost}
1416                         || $item->{damaged}
1417                         || $item->{notforloan}
1418                         || $items_count > 20) {
1419
1420                     # A couple heuristics to limit how many times
1421                     # we query the database for item transfer information, sacrificing
1422                     # accuracy in some cases for speed;
1423                     #
1424                     # 1. don't query if item has one of the other statuses
1425                     # 2. don't check transit status if the bib has
1426                     #    more than 20 items
1427                     #
1428                     # FIXME: to avoid having the query the database like this, and to make
1429                     #        the in transit status count as unavailable for search limiting,
1430                     #        should map transit status to record indexed in Zebra.
1431                     #
1432                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1433                 }
1434
1435                 # item is withdrawn, lost or damaged
1436                 if (   $item->{wthdrawn}
1437                     || $item->{itemlost}
1438                     || $item->{damaged}
1439                     || $item->{notforloan}
1440                     || ($transfertwhen ne ''))
1441                 {
1442                     $wthdrawn_count++        if $item->{wthdrawn};
1443                     $itemlost_count++        if $item->{itemlost};
1444                     $itemdamaged_count++     if $item->{damaged};
1445                     $item_in_transit_count++ if $transfertwhen ne '';
1446                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1447                     $other_count++;
1448
1449                                         my $key = $prefix . $item->{status};
1450                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1451                         $other_items->{$key}->{$_} = $item->{$_};
1452                                         }
1453                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1454                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1455                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1456                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1457                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1458                                         $other_items->{$key}->{barcode} = $item->{barcode};
1459                 }
1460                 # item is available
1461                 else {
1462                     $can_place_holds = 1;
1463                     $available_count++;
1464                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1465                                         foreach (qw(branchname itemcallnumber barcode)) {
1466                         $available_items->{$prefix}->{$_} = $item->{$_};
1467                                         }
1468                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1469                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1470                 }
1471             }
1472         }    # notforloan, item level and biblioitem level
1473         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1474         $maxitems =
1475           ( C4::Context->preference('maxItemsinSearchResults') )
1476           ? C4::Context->preference('maxItemsinSearchResults') - 1
1477           : 1;
1478         for my $key ( sort keys %$onloan_items ) {
1479             (++$onloanitemscount > $maxitems) and last;
1480             push @onloan_items_loop, $onloan_items->{$key};
1481         }
1482         for my $key ( sort keys %$other_items ) {
1483             (++$otheritemscount > $maxitems) and last;
1484             push @other_items_loop, $other_items->{$key};
1485         }
1486         for my $key ( sort keys %$available_items ) {
1487             (++$availableitemscount > $maxitems) and last;
1488             push @available_items_loop, $available_items->{$key}
1489         }
1490
1491         # XSLT processing of some stuff
1492         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1493             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1494             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1495         }
1496
1497         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1498         $can_place_holds = 0
1499           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1500         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1501         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1502         $oldbiblio->{items_count}          = $items_count;
1503         $oldbiblio->{available_items_loop} = \@available_items_loop;
1504         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1505         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1506         $oldbiblio->{availablecount}       = $available_count;
1507         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1508         $oldbiblio->{onloancount}          = $onloan_count;
1509         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1510         $oldbiblio->{othercount}           = $other_count;
1511         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1512         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1513         $oldbiblio->{itemlostcount}        = $itemlost_count;
1514         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1515         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1516         $oldbiblio->{orderedcount}         = $ordered_count;
1517         $oldbiblio->{isbn} =~
1518           s/-//g;    # deleting - in isbn to enable amazon content
1519         push( @newresults, $oldbiblio );
1520     }
1521     return @newresults;
1522 }
1523
1524 #----------------------------------------------------------------------
1525 #
1526 # Non-Zebra GetRecords#
1527 #----------------------------------------------------------------------
1528
1529 =head2 NZgetRecords
1530
1531   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1532
1533 =cut
1534
1535 sub NZgetRecords {
1536     my (
1537         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1538         $results_per_page, $offset,       $expanded_facet, $branches,
1539         $query_type,       $scan
1540     ) = @_;
1541     warn "query =$query" if $DEBUG;
1542     my $result = NZanalyse($query);
1543     warn "results =$result" if $DEBUG;
1544     return ( undef,
1545         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1546         undef );
1547 }
1548
1549 =head2 NZanalyse
1550
1551   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1552   the list is built from an inverted index in the nozebra SQL table
1553   note that title is here only for convenience : the sorting will be very fast when requested on title
1554   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1555
1556 =cut
1557
1558 sub NZanalyse {
1559     my ( $string, $server ) = @_;
1560 #     warn "---------"       if $DEBUG;
1561     warn " NZanalyse" if $DEBUG;
1562 #     warn "---------"       if $DEBUG;
1563
1564  # $server contains biblioserver or authorities, depending on what we search on.
1565  #warn "querying : $string on $server";
1566     $server = 'biblioserver' unless $server;
1567
1568 # if we have a ", replace the content to discard temporarily any and/or/not inside
1569     my $commacontent;
1570     if ( $string =~ /"/ ) {
1571         $string =~ s/"(.*?)"/__X__/;
1572         $commacontent = $1;
1573         warn "commacontent : $commacontent" if $DEBUG;
1574     }
1575
1576 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1577 # then, call again NZanalyse with $left and $right
1578 # (recursive until we find a leaf (=> something without and/or/not)
1579 # delete repeated operator... Would then go in infinite loop
1580     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1581     }
1582
1583     #process parenthesis before.
1584     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1585         my $left     = $1;
1586         my $right    = $4;
1587         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1588         warn
1589 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1590           if $DEBUG;
1591         my $leftresult = NZanalyse( $left, $server );
1592         if ($operator) {
1593             my $rightresult = NZanalyse( $right, $server );
1594
1595             # OK, we have the results for right and left part of the query
1596             # depending of operand, intersect, union or exclude both lists
1597             # to get a result list
1598             if ( $operator eq ' and ' ) {
1599                 return NZoperatorAND($leftresult,$rightresult);
1600             }
1601             elsif ( $operator eq ' or ' ) {
1602
1603                 # just merge the 2 strings
1604                 return $leftresult . $rightresult;
1605             }
1606             elsif ( $operator eq ' not ' ) {
1607                 return NZoperatorNOT($leftresult,$rightresult);
1608             }
1609         }
1610         else {
1611 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1612             return $leftresult;
1613         }
1614     }
1615     warn "string :" . $string if $DEBUG;
1616     my $left = "";
1617     my $right = "";
1618     my $operator = "";
1619     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1620         $left     = $1;
1621         $right    = $3;
1622         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1623     }
1624     warn "no parenthesis. left : $left operator: $operator right: $right"
1625       if $DEBUG;
1626
1627     # it's not a leaf, we have a and/or/not
1628     if ($operator) {
1629
1630         # reintroduce comma content if needed
1631         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1632         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1633         warn "node : $left / $operator / $right\n" if $DEBUG;
1634         my $leftresult  = NZanalyse( $left,  $server );
1635         my $rightresult = NZanalyse( $right, $server );
1636         warn " leftresult : $leftresult" if $DEBUG;
1637         warn " rightresult : $rightresult" if $DEBUG;
1638         # OK, we have the results for right and left part of the query
1639         # depending of operand, intersect, union or exclude both lists
1640         # to get a result list
1641         if ( $operator eq ' and ' ) {
1642             warn "NZAND";
1643             return NZoperatorAND($leftresult,$rightresult);
1644         }
1645         elsif ( $operator eq ' or ' ) {
1646
1647             # just merge the 2 strings
1648             return $leftresult . $rightresult;
1649         }
1650         elsif ( $operator eq ' not ' ) {
1651             return NZoperatorNOT($leftresult,$rightresult);
1652         }
1653         else {
1654
1655 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1656             die "error : operand unknown : $operator for $string";
1657         }
1658
1659         # it's a leaf, do the real SQL query and return the result
1660     }
1661     else {
1662         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1663         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1664         #remove trailing blank at the beginning
1665         $string =~ s/^ //g;
1666         warn "leaf:$string" if $DEBUG;
1667
1668         # parse the string in in operator/operand/value again
1669         my $left = "";
1670         my $operator = "";
1671         my $right = "";
1672         if ($string =~ /(.*)(>=|<=)(.*)/) {
1673             $left     = $1;
1674             $operator = $2;
1675             $right    = $3;
1676         } else {
1677             $left = $string;
1678         }
1679 #         warn "handling leaf... left:$left operator:$operator right:$right"
1680 #           if $DEBUG;
1681         unless ($operator) {
1682             if ($string =~ /(.*)(>|<|=)(.*)/) {
1683                 $left     = $1;
1684                 $operator = $2;
1685                 $right    = $3;
1686                 warn
1687     "handling unless (operator)... left:$left operator:$operator right:$right"
1688                 if $DEBUG;
1689             } else {
1690                 $left = $string;
1691             }
1692         }
1693         my $results;
1694
1695 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1696         $left =~ s/ .*$//;
1697
1698         # automatic replace for short operators
1699         $left = 'title'            if $left =~ '^ti$';
1700         $left = 'author'           if $left =~ '^au$';
1701         $left = 'publisher'        if $left =~ '^pb$';
1702         $left = 'subject'          if $left =~ '^su$';
1703         $left = 'koha-Auth-Number' if $left =~ '^an$';
1704         $left = 'keyword'          if $left =~ '^kw$';
1705         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1706         if ( $operator && $left ne 'keyword' ) {
1707
1708             #do a specific search
1709             my $dbh = C4::Context->dbh;
1710             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1711             my $sth =
1712               $dbh->prepare(
1713 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1714               );
1715             warn "$left / $operator / $right\n" if $DEBUG;
1716
1717             # split each word, query the DB and build the biblionumbers result
1718             #sanitizing leftpart
1719             $left =~ s/^\s+|\s+$//;
1720             foreach ( split / /, $right ) {
1721                 my $biblionumbers;
1722                 $_ =~ s/^\s+|\s+$//;
1723                 next unless $_;
1724                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1725                 $sth->execute( $server, $left, $_ )
1726                   or warn "execute failed: $!";
1727                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1728
1729 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1730 # otherwise, fill the result
1731                     $biblionumbers .= $line
1732                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1733                     warn "result : $value "
1734                       . ( $right  =~ /\d/ ) . "=="
1735                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1736                 }
1737
1738 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1739                 if ($results) {
1740                     warn "NZAND" if $DEBUG;
1741                     $results = NZoperatorAND($biblionumbers,$results);
1742                 }
1743                 else {
1744                     $results = $biblionumbers;
1745                 }
1746             }
1747         }
1748         else {
1749
1750       #do a complete search (all indexes), if index='kw' do complete search too.
1751             my $dbh = C4::Context->dbh;
1752             my $sth =
1753               $dbh->prepare(
1754 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1755               );
1756
1757             # split each word, query the DB and build the biblionumbers result
1758             foreach ( split / /, $string ) {
1759                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1760                 warn "search on all indexes on $_" if $DEBUG;
1761                 my $biblionumbers;
1762                 next unless $_;
1763                 $sth->execute( $server, $_ );
1764                 while ( my $line = $sth->fetchrow ) {
1765                     $biblionumbers .= $line;
1766                 }
1767
1768 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1769                 if ($results) {
1770                     $results = NZoperatorAND($biblionumbers,$results);
1771                 }
1772                 else {
1773                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1774                     $results = $biblionumbers;
1775                 }
1776             }
1777         }
1778         warn "return : $results for LEAF : $string" if $DEBUG;
1779         return $results;
1780     }
1781     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1782 }
1783
1784 sub NZoperatorAND{
1785     my ($rightresult, $leftresult)=@_;
1786
1787     my @leftresult = split /;/, $leftresult;
1788     warn " @leftresult / $rightresult \n" if $DEBUG;
1789
1790     #             my @rightresult = split /;/,$leftresult;
1791     my $finalresult;
1792
1793 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1794 # the result is stored twice, to have the same weight for AND than OR.
1795 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1796 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1797     foreach (@leftresult) {
1798         my $value = $_;
1799         my $countvalue;
1800         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1801         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1802             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1803             $finalresult .=
1804                 "$value-$countvalue;$value-$countvalue;";
1805         }
1806     }
1807     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1808     return $finalresult;
1809 }
1810
1811 sub NZoperatorOR{
1812     my ($rightresult, $leftresult)=@_;
1813     return $rightresult.$leftresult;
1814 }
1815
1816 sub NZoperatorNOT{
1817     my ($leftresult, $rightresult)=@_;
1818
1819     my @leftresult = split /;/, $leftresult;
1820
1821     #             my @rightresult = split /;/,$leftresult;
1822     my $finalresult;
1823     foreach (@leftresult) {
1824         my $value=$_;
1825         $value=$1 if $value=~m/(.*)-\d+$/;
1826         unless ($rightresult =~ "$value-") {
1827             $finalresult .= "$_;";
1828         }
1829     }
1830     return $finalresult;
1831 }
1832
1833 =head2 NZorder
1834
1835   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1836
1837   TODO :: Description
1838
1839 =cut
1840
1841 sub NZorder {
1842     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1843     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1844
1845     # order title asc by default
1846     #     $ordering = '1=36 <i' unless $ordering;
1847     $results_per_page = 20 unless $results_per_page;
1848     $offset           = 0  unless $offset;
1849     my $dbh = C4::Context->dbh;
1850
1851     #
1852     # order by POPULARITY
1853     #
1854     if ( $ordering =~ /popularity/ ) {
1855         my %result;
1856         my %popularity;
1857
1858         # popularity is not in MARC record, it's builded from a specific query
1859         my $sth =
1860           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1861         foreach ( split /;/, $biblionumbers ) {
1862             my ( $biblionumber, $title ) = split /,/, $_;
1863             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1864             $sth->execute($biblionumber);
1865             my $popularity = $sth->fetchrow || 0;
1866
1867 # hint : the key is popularity.title because we can have
1868 # many results with the same popularity. In this cas, sub-ordering is done by title
1869 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1870 # (un-frequent, I agree, but we won't forget anything that way ;-)
1871             $popularity{ sprintf( "%10d", $popularity ) . $title
1872                   . $biblionumber } = $biblionumber;
1873         }
1874
1875     # sort the hash and return the same structure as GetRecords (Zebra querying)
1876         my $result_hash;
1877         my $numbers = 0;
1878         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1879             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1880                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1881                   $result{ $popularity{$key} }->as_usmarc();
1882             }
1883         }
1884         else {                                    # sort popularity ASC
1885             foreach my $key ( sort ( keys %popularity ) ) {
1886                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1887                   $result{ $popularity{$key} }->as_usmarc();
1888             }
1889         }
1890         my $finalresult = ();
1891         $result_hash->{'hits'}         = $numbers;
1892         $finalresult->{'biblioserver'} = $result_hash;
1893         return $finalresult;
1894
1895         #
1896         # ORDER BY author
1897         #
1898     }
1899     elsif ( $ordering =~ /author/ ) {
1900         my %result;
1901         foreach ( split /;/, $biblionumbers ) {
1902             my ( $biblionumber, $title ) = split /,/, $_;
1903             my $record = GetMarcBiblio($biblionumber);
1904             my $author;
1905             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1906                 $author = $record->subfield( '200', 'f' );
1907                 $author = $record->subfield( '700', 'a' ) unless $author;
1908             }
1909             else {
1910                 $author = $record->subfield( '100', 'a' );
1911             }
1912
1913 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1914 # and we don't want to get only 1 result for each of them !!!
1915             $result{ $author . $biblionumber } = $record;
1916         }
1917
1918     # sort the hash and return the same structure as GetRecords (Zebra querying)
1919         my $result_hash;
1920         my $numbers = 0;
1921         if ( $ordering eq 'author_za' ) {    # sort by author desc
1922             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1923                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1924                   $result{$key}->as_usmarc();
1925             }
1926         }
1927         else {                               # sort by author ASC
1928             foreach my $key ( sort ( keys %result ) ) {
1929                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1930                   $result{$key}->as_usmarc();
1931             }
1932         }
1933         my $finalresult = ();
1934         $result_hash->{'hits'}         = $numbers;
1935         $finalresult->{'biblioserver'} = $result_hash;
1936         return $finalresult;
1937
1938         #
1939         # ORDER BY callnumber
1940         #
1941     }
1942     elsif ( $ordering =~ /callnumber/ ) {
1943         my %result;
1944         foreach ( split /;/, $biblionumbers ) {
1945             my ( $biblionumber, $title ) = split /,/, $_;
1946             my $record = GetMarcBiblio($biblionumber);
1947             my $callnumber;
1948             my ( $callnumber_tag, $callnumber_subfield ) =
1949               GetMarcFromKohaField( 'items.itemcallnumber','' );
1950             ( $callnumber_tag, $callnumber_subfield ) =
1951               GetMarcFromKohaField('biblioitems.callnumber','')
1952               unless $callnumber_tag;
1953             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1954                 $callnumber = $record->subfield( '200', 'f' );
1955             }
1956             else {
1957                 $callnumber = $record->subfield( '100', 'a' );
1958             }
1959
1960 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1961 # and we don't want to get only 1 result for each of them !!!
1962             $result{ $callnumber . $biblionumber } = $record;
1963         }
1964
1965     # sort the hash and return the same structure as GetRecords (Zebra querying)
1966         my $result_hash;
1967         my $numbers = 0;
1968         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1969             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1970                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1971                   $result{$key}->as_usmarc();
1972             }
1973         }
1974         else {                                     # sort by title ASC
1975             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1976                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1977                   $result{$key}->as_usmarc();
1978             }
1979         }
1980         my $finalresult = ();
1981         $result_hash->{'hits'}         = $numbers;
1982         $finalresult->{'biblioserver'} = $result_hash;
1983         return $finalresult;
1984     }
1985     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1986         my %result;
1987         foreach ( split /;/, $biblionumbers ) {
1988             my ( $biblionumber, $title ) = split /,/, $_;
1989             my $record = GetMarcBiblio($biblionumber);
1990             my ( $publicationyear_tag, $publicationyear_subfield ) =
1991               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1992             my $publicationyear =
1993               $record->subfield( $publicationyear_tag,
1994                 $publicationyear_subfield );
1995
1996 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1997 # and we don't want to get only 1 result for each of them !!!
1998             $result{ $publicationyear . $biblionumber } = $record;
1999         }
2000
2001     # sort the hash and return the same structure as GetRecords (Zebra querying)
2002         my $result_hash;
2003         my $numbers = 0;
2004         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2005             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2006                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2007                   $result{$key}->as_usmarc();
2008             }
2009         }
2010         else {                                 # sort by pub year ASC
2011             foreach my $key ( sort ( keys %result ) ) {
2012                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2013                   $result{$key}->as_usmarc();
2014             }
2015         }
2016         my $finalresult = ();
2017         $result_hash->{'hits'}         = $numbers;
2018         $finalresult->{'biblioserver'} = $result_hash;
2019         return $finalresult;
2020
2021         #
2022         # ORDER BY title
2023         #
2024     }
2025     elsif ( $ordering =~ /title/ ) {
2026
2027 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2028         my %result;
2029         foreach ( split /;/, $biblionumbers ) {
2030             my ( $biblionumber, $title ) = split /,/, $_;
2031
2032 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2033 # and we don't want to get only 1 result for each of them !!!
2034 # hint & speed improvement : we can order without reading the record
2035 # so order, and read records only for the requested page !
2036             $result{ $title . $biblionumber } = $biblionumber;
2037         }
2038
2039     # sort the hash and return the same structure as GetRecords (Zebra querying)
2040         my $result_hash;
2041         my $numbers = 0;
2042         if ( $ordering eq 'title_az' ) {    # sort by title desc
2043             foreach my $key ( sort ( keys %result ) ) {
2044                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2045             }
2046         }
2047         else {                              # sort by title ASC
2048             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2049                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2050             }
2051         }
2052
2053         # limit the $results_per_page to result size if it's more
2054         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2055
2056         # for the requested page, replace biblionumber by the complete record
2057         # speed improvement : avoid reading too much things
2058         for (
2059             my $counter = $offset ;
2060             $counter <= $offset + $results_per_page ;
2061             $counter++
2062           )
2063         {
2064             $result_hash->{'RECORDS'}[$counter] =
2065               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2066         }
2067         my $finalresult = ();
2068         $result_hash->{'hits'}         = $numbers;
2069         $finalresult->{'biblioserver'} = $result_hash;
2070         return $finalresult;
2071     }
2072     else {
2073
2074 #
2075 # order by ranking
2076 #
2077 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2078         my %result;
2079         my %count_ranking;
2080         foreach ( split /;/, $biblionumbers ) {
2081             my ( $biblionumber, $title ) = split /,/, $_;
2082             $title =~ /(.*)-(\d)/;
2083
2084             # get weight
2085             my $ranking = $2;
2086
2087 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2088 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2089 # biblio N has ranking = 6
2090             $count_ranking{$biblionumber} += $ranking;
2091         }
2092
2093 # build the result by "inverting" the count_ranking hash
2094 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2095 #         warn "counting";
2096         foreach ( keys %count_ranking ) {
2097             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2098         }
2099
2100     # sort the hash and return the same structure as GetRecords (Zebra querying)
2101         my $result_hash;
2102         my $numbers = 0;
2103         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2104             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2105         }
2106
2107         # limit the $results_per_page to result size if it's more
2108         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2109
2110         # for the requested page, replace biblionumber by the complete record
2111         # speed improvement : avoid reading too much things
2112         for (
2113             my $counter = $offset ;
2114             $counter <= $offset + $results_per_page ;
2115             $counter++
2116           )
2117         {
2118             $result_hash->{'RECORDS'}[$counter] =
2119               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2120               if $result_hash->{'RECORDS'}[$counter];
2121         }
2122         my $finalresult = ();
2123         $result_hash->{'hits'}         = $numbers;
2124         $finalresult->{'biblioserver'} = $result_hash;
2125         return $finalresult;
2126     }
2127 }
2128
2129 =head2 ModBiblios
2130
2131 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2132
2133 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2134 test parameter if set donot perform change to records in database.
2135
2136 =over 2
2137
2138 =item C<input arg:>
2139
2140     * $listbiblios is an array ref to marcrecords to be changed
2141     * $tagsubfield is the reference of the subfield to change.
2142     * $initvalue is the value to search the record for
2143     * $targetvalue is the value to set the subfield to
2144     * $test is to be set only not to perform changes in database.
2145
2146 =item C<Output arg:>
2147     * $countchanged counts all the changes performed.
2148     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2149
2150 =item C<usage in the script:>
2151
2152 =back
2153
2154 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2155 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2156 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2157
2158 =cut
2159
2160 sub ModBiblios {
2161     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2162     my $countmatched;
2163     my @unmatched;
2164     my ( $tag, $subfield ) = ( $1, $2 )
2165       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2166     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2167         $tag = $tag . $subfield;
2168         undef $subfield;
2169     }
2170     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber','');
2171     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber','');
2172     if ($tag eq $itemtag) {
2173         # do not allow the embedded item tag to be
2174         # edited from here
2175         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2176         return (0, []);
2177     }
2178     foreach my $usmarc (@$listbiblios) {
2179         my $record;
2180         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2181         my $biblionumber;
2182         if ($@) {
2183
2184             # usmarc is not a valid usmarc May be a biblionumber
2185             # FIXME - sorry, please let's figure out whether
2186             #         this function is to be passed a list of
2187             #         record numbers or a list of MARC::Record
2188             #         objects.  The former is probably better
2189             #         because the MARC records supplied by Zebra
2190             #         may be not current.
2191             $record       = GetMarcBiblio($usmarc);
2192             $biblionumber = $usmarc;
2193         }
2194         else {
2195             if ( $bntag >= 010 ) {
2196                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2197             }
2198             else {
2199                 $biblionumber = $record->field($bntag)->data;
2200             }
2201         }
2202
2203         #GetBiblionumber is to be written.
2204         #Could be replaced by TransformMarcToKoha (But Would be longer)
2205         if ( $record->field($tag) ) {
2206             my $modify = 0;
2207             foreach my $field ( $record->field($tag) ) {
2208                 if ($subfield) {
2209                     if (
2210                         $field->delete_subfield(
2211                             'code'  => $subfield,
2212                             'match' => qr($initvalue)
2213                         )
2214                       )
2215                     {
2216                         $countmatched++;
2217                         $modify = 1;
2218                         $field->update( $subfield, $targetvalue )
2219                           if ($targetvalue);
2220                     }
2221                 }
2222                 else {
2223                     if ( $tag >= 010 ) {
2224                         if ( $field->delete_field($field) ) {
2225                             $countmatched++;
2226                             $modify = 1;
2227                         }
2228                     }
2229                     else {
2230                         $field->data = $targetvalue
2231                           if ( $field->data =~ qr($initvalue) );
2232                     }
2233                 }
2234             }
2235
2236             #       warn $record->as_formatted;
2237             if ($modify) {
2238                 ModBiblio( $record, $biblionumber,
2239                     GetFrameworkCode($biblionumber) )
2240                   unless ($test);
2241             }
2242             else {
2243                 push @unmatched, $biblionumber;
2244             }
2245         }
2246         else {
2247             push @unmatched, $biblionumber;
2248         }
2249     }
2250     return ( $countmatched, \@unmatched );
2251 }
2252
2253 END { }    # module clean-up code here (global destructor)
2254
2255 1;
2256 __END__
2257
2258 =head1 AUTHOR
2259
2260 Koha Developement team <info@koha.org>
2261
2262 =cut