C4/Search.pm

   1 package C4::Search;
   2
   3 # This file is part of Koha.
   4 #
   5 # Koha is free software; you can redistribute it and/or modify it under the
   6 # terms of the GNU General Public License as published by the Free Software
   7 # Foundation; either version 2 of the License, or (at your option) any later
   8 # version.
   9 #
  10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
  11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  12 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  13 #
  14 # You should have received a copy of the GNU General Public License along with
  15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
  16 # Suite 330, Boston, MA  02111-1307 USA
  17
  18 use strict;
  19 # use warnings; # FIXME
  20 require Exporter;
  21 use C4::Context;
  22 use C4::Biblio;    # GetMarcFromKohaField
  23 use C4::Koha;      # getFacets
  24 use Lingua::Stem;
  25 use C4::Search::PazPar2;
  26 use XML::Simple;
  27 use C4::Dates qw(format_date);
  28 use C4::XSLT;
  29
  30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
  31
  32 # set the version for version checking
  33 BEGIN {
  34     $VERSION = 3.01;
  35     $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
  36 }
  37
  38 =head1 NAME
  39
  40 C4::Search - Functions for searching the Koha catalog.
  41
  42 =head1 SYNOPSIS
  43
  44 See opac/opac-search.pl or catalogue/search.pl for example of usage
  45
  46 =head1 DESCRIPTION
  47
  48 This module provides searching functions for Koha's bibliographic databases
  49
  50 =head1 FUNCTIONS
  51
  52 =cut
  53
  54 @ISA    = qw(Exporter);
  55 @EXPORT = qw(
  56   &findseealso
  57   &FindDuplicate
  58   &SimpleSearch
  59   &searchResults
  60   &getRecords
  61   &buildQuery
  62   &NZgetRecords
  63   &ModBiblios
  64 );
  65
  66 # make all your functions, whether exported or not;
  67
  68 =head2 findseealso($dbh,$fields);
  69
  70 C<$dbh> is a link to the DB handler.
  71
  72 use C4::Context;
  73 my $dbh =C4::Context->dbh;
  74
  75 C<$fields> is a reference to the fields array
  76
  77 This function modifies the @$fields array and adds related fields to search on.
  78
  79 FIXME: this function is probably deprecated in Koha 3
  80
  81 =cut
  82
  83 sub findseealso {
  84     my ( $dbh, $fields ) = @_;
  85     my $tagslib = GetMarcStructure(1);
  86     for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
  87         my ($tag)      = substr( @$fields[$i], 1, 3 );
  88         my ($subfield) = substr( @$fields[$i], 4, 1 );
  89         @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
  90           if ( $tagslib->{$tag}->{$subfield}->{seealso} );
  91     }
  92 }
  93
  94 =head2 FindDuplicate
  95
  96 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
  97
  98 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
  99
 100 =cut
 101
 102 sub FindDuplicate {
 103     my ($record) = @_;
 104     my $dbh = C4::Context->dbh;
 105     my $result = TransformMarcToKoha( $dbh, $record, '' );
 106     my $sth;
 107     my $query;
 108     my $search;
 109     my $type;
 110     my ( $biblionumber, $title );
 111
 112     # search duplicate on ISBN, easy and fast..
 113     # ... normalize first
 114     if ( $result->{isbn} ) {
 115         $result->{isbn} =~ s/\(.*$//;
 116         $result->{isbn} =~ s/\s+$//;
 117         $query = "isbn=$result->{isbn}";
 118     }
 119     else {
 120         $result->{title} =~ s /\\//g;
 121         $result->{title} =~ s /\"//g;
 122         $result->{title} =~ s /\(//g;
 123         $result->{title} =~ s /\)//g;
 124
 125         # FIXME: instead of removing operators, could just do
 126         # quotes around the value
 127         $result->{title} =~ s/(and|or|not)//g;
 128         $query = "ti,ext=$result->{title}";
 129         $query .= " and itemtype=$result->{itemtype}"
 130           if ( $result->{itemtype} );
 131         if   ( $result->{author} ) {
 132             $result->{author} =~ s /\\//g;
 133             $result->{author} =~ s /\"//g;
 134             $result->{author} =~ s /\(//g;
 135             $result->{author} =~ s /\)//g;
 136
 137             # remove valid operators
 138             $result->{author} =~ s/(and|or|not)//g;
 139             $query .= " and au,ext=$result->{author}";
 140         }
 141     }
 142
 143     # FIXME: add error handling
 144     my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
 145     my @results;
 146     foreach my $possible_duplicate_record (@$searchresults) {
 147         my $marcrecord =
 148           MARC::Record->new_from_usmarc($possible_duplicate_record);
 149         my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
 150
 151         # FIXME :: why 2 $biblionumber ?
 152         if ($result) {
 153             push @results, $result->{'biblionumber'};
 154             push @results, $result->{'title'};
 155         }
 156     }
 157     return @results;
 158 }
 159
 160 =head2 SimpleSearch
 161
 162 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
 163
 164 This function provides a simple search API on the bibliographic catalog
 165
 166 =over 2
 167
 168 =item C<input arg:>
 169
 170     * $query can be a simple keyword or a complete CCL query
 171     * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
 172     * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
 173     * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
 174
 175
 176 =item C<Output:>
 177
 178     * $error is a empty unless an error is detected
 179     * \@results is an array of records.
 180     * $total_hits is the number of hits that would have been returned with no limit
 181
 182 =item C<usage in the script:>
 183
 184 =back
 185
 186 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
 187
 188 if (defined $error) {
 189     $template->param(query_error => $error);
 190     warn "error: ".$error;
 191     output_html_with_http_headers $input, $cookie, $template->output;
 192     exit;
 193 }
 194
 195 my $hits = scalar @$marcresults;
 196 my @results;
 197
 198 for my $i (0..$hits) {
 199     my %resultsloop;
 200     my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
 201     my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
 202
 203     #build the hash for the template.
 204     $resultsloop{highlight}       = ($i % 2)?(1):(0);
 205     $resultsloop{title}           = $biblio->{'title'};
 206     $resultsloop{subtitle}        = $biblio->{'subtitle'};
 207     $resultsloop{biblionumber}    = $biblio->{'biblionumber'};
 208     $resultsloop{author}          = $biblio->{'author'};
 209     $resultsloop{publishercode}   = $biblio->{'publishercode'};
 210     $resultsloop{publicationyear} = $biblio->{'publicationyear'};
 211
 212     push @results, \%resultsloop;
 213 }
 214
 215 $template->param(result=>\@results);
 216
 217 =cut
 218
 219 sub SimpleSearch {
 220     my ( $query, $offset, $max_results, $servers )  = @_;
 221
 222     if ( C4::Context->preference('NoZebra') ) {
 223         my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
 224         my $search_result =
 225           (      $result->{hits}
 226               && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
 227         return ( undef, $search_result, scalar($search_result) );
 228     }
 229     else {
 230         # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
 231         my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
 232         my @results;
 233         my @zoom_queries;
 234         my @tmpresults;
 235         my @zconns;
 236         my $total_hits;
 237         return ( "No query entered", undef, undef ) unless $query;
 238
 239         # Initialize & Search Zebra
 240         for ( my $i = 0 ; $i < @servers ; $i++ ) {
 241             eval {
 242                 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 243                 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
 244                 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
 245
 246                 # error handling
 247                 my $error =
 248                     $zconns[$i]->errmsg() . " ("
 249                   . $zconns[$i]->errcode() . ") "
 250                   . $zconns[$i]->addinfo() . " "
 251                   . $zconns[$i]->diagset();
 252
 253                 return ( $error, undef, undef ) if $zconns[$i]->errcode();
 254             };
 255             if ($@) {
 256
 257                 # caught a ZOOM::Exception
 258                 my $error =
 259                     $@->message() . " ("
 260                   . $@->code() . ") "
 261                   . $@->addinfo() . " "
 262                   . $@->diagset();
 263                 warn $error;
 264                 return ( $error, undef, undef );
 265             }
 266         }
 267         while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 268             my $event = $zconns[ $i - 1 ]->last_event();
 269             if ( $event == ZOOM::Event::ZEND ) {
 270
 271                 my $first_record = defined( $offset ) ? $offset+1 : 1;
 272                 my $hits = $tmpresults[ $i - 1 ]->size();
 273                 $total_hits += $hits;
 274                 my $last_record = $hits;
 275                 if ( defined $max_results && $offset + $max_results < $hits ) {
 276                     $last_record  = $offset + $max_results;
 277                 }
 278
 279                 for my $j ( $first_record..$last_record ) {
 280                     my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
 281                     push @results, $record;
 282                 }
 283             }
 284         }
 285
 286         foreach my $result (@tmpresults) {
 287             $result->destroy();
 288         }
 289         foreach my $zoom_query (@zoom_queries) {
 290             $zoom_query->destroy();
 291         }
 292
 293         return ( undef, \@results, $total_hits );
 294     }
 295 }
 296
 297 =head2 getRecords
 298
 299 ( undef, $results_hashref, \@facets_loop ) = getRecords (
 300
 301         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 302         $results_per_page, $offset,       $expanded_facet, $branches,
 303         $query_type,       $scan
 304     );
 305
 306 The all singing, all dancing, multi-server, asynchronous, scanning,
 307 searching, record nabbing, facet-building
 308
 309 See verbse embedded documentation.
 310
 311 =cut
 312
 313 sub getRecords {
 314     my (
 315         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 316         $results_per_page, $offset,       $expanded_facet, $branches,
 317         $query_type,       $scan
 318     ) = @_;
 319
 320     my @servers = @$servers_ref;
 321     my @sort_by = @$sort_by_ref;
 322
 323     # Initialize variables for the ZOOM connection and results object
 324     my $zconn;
 325     my @zconns;
 326     my @results;
 327     my $results_hashref = ();
 328
 329     # Initialize variables for the faceted results objects
 330     my $facets_counter = ();
 331     my $facets_info    = ();
 332     my $facets         = getFacets();
 333
 334     my @facets_loop
 335       ;    # stores the ref to array of hashes for template facets loop
 336
 337     ### LOOP THROUGH THE SERVERS
 338     for ( my $i = 0 ; $i < @servers ; $i++ ) {
 339         $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
 340
 341 # perform the search, create the results objects
 342 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
 343         my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
 344
 345         #$query_to_use = $simple_query if $scan;
 346         warn $simple_query if ( $scan and $DEBUG );
 347
 348         # Check if we've got a query_type defined, if so, use it
 349         eval {
 350             if ($query_type)
 351             {
 352                 if ( $query_type =~ /^ccl/ ) {
 353                     $query_to_use =~
 354                       s/\:/\=/g;    # change : to = last minute (FIXME)
 355                     $results[$i] =
 356                       $zconns[$i]->search(
 357                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 358                       );
 359                 }
 360                 elsif ( $query_type =~ /^cql/ ) {
 361                     $results[$i] =
 362                       $zconns[$i]->search(
 363                         new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
 364                 }
 365                 elsif ( $query_type =~ /^pqf/ ) {
 366                     $results[$i] =
 367                       $zconns[$i]->search(
 368                         new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
 369                 }
 370             }
 371             else {
 372                 if ($scan) {
 373                     $results[$i] =
 374                       $zconns[$i]->scan(
 375                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 376                       );
 377                 }
 378                 else {
 379                     $results[$i] =
 380                       $zconns[$i]->search(
 381                         new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
 382                       );
 383                 }
 384             }
 385         };
 386         if ($@) {
 387             warn "WARNING: query problem with $query_to_use " . $@;
 388         }
 389
 390         # Concatenate the sort_by limits and pass them to the results object
 391         # Note: sort will override rank
 392         my $sort_by;
 393         foreach my $sort (@sort_by) {
 394             if ( $sort eq "author_az" ) {
 395                 $sort_by .= "1=1003 <i ";
 396             }
 397             elsif ( $sort eq "author_za" ) {
 398                 $sort_by .= "1=1003 >i ";
 399             }
 400             elsif ( $sort eq "popularity_asc" ) {
 401                 $sort_by .= "1=9003 <i ";
 402             }
 403             elsif ( $sort eq "popularity_dsc" ) {
 404                 $sort_by .= "1=9003 >i ";
 405             }
 406             elsif ( $sort eq "call_number_asc" ) {
 407                 $sort_by .= "1=20  <i ";
 408             }
 409             elsif ( $sort eq "call_number_dsc" ) {
 410                 $sort_by .= "1=20 >i ";
 411             }
 412             elsif ( $sort eq "pubdate_asc" ) {
 413                 $sort_by .= "1=31 <i ";
 414             }
 415             elsif ( $sort eq "pubdate_dsc" ) {
 416                 $sort_by .= "1=31 >i ";
 417             }
 418             elsif ( $sort eq "acqdate_asc" ) {
 419                 $sort_by .= "1=32 <i ";
 420             }
 421             elsif ( $sort eq "acqdate_dsc" ) {
 422                 $sort_by .= "1=32 >i ";
 423             }
 424             elsif ( $sort eq "title_az" ) {
 425                 $sort_by .= "1=4 <i ";
 426             }
 427             elsif ( $sort eq "title_za" ) {
 428                 $sort_by .= "1=4 >i ";
 429             }
 430         }
 431         if ($sort_by) {
 432             if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
 433                 warn "WARNING sort $sort_by failed";
 434             }
 435         }
 436     }    # finished looping through servers
 437
 438     # The big moment: asynchronously retrieve results from all servers
 439     while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
 440         my $ev = $zconns[ $i - 1 ]->last_event();
 441         if ( $ev == ZOOM::Event::ZEND ) {
 442             next unless $results[ $i - 1 ];
 443             my $size = $results[ $i - 1 ]->size();
 444             if ( $size > 0 ) {
 445                 my $results_hash;
 446
 447                 # loop through the results
 448                 $results_hash->{'hits'} = $size;
 449                 my $times;
 450                 if ( $offset + $results_per_page <= $size ) {
 451                     $times = $offset + $results_per_page;
 452                 }
 453                 else {
 454                     $times = $size;
 455                 }
 456                 for ( my $j = $offset ; $j < $times ; $j++ ) {
 457                     my $records_hash;
 458                     my $record;
 459                     my $facet_record;
 460
 461                     ## Check if it's an index scan
 462                     if ($scan) {
 463                         my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
 464
 465                  # here we create a minimal MARC record and hand it off to the
 466                  # template just like a normal result ... perhaps not ideal, but
 467                  # it works for now
 468                         my $tmprecord = MARC::Record->new();
 469                         $tmprecord->encoding('UTF-8');
 470                         my $tmptitle;
 471                         my $tmpauthor;
 472
 473                 # the minimal record in author/title (depending on MARC flavour)
 474                         if (C4::Context->preference("marcflavour") eq "UNIMARC") {
 475                             $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
 476                             $tmprecord->append_fields($tmptitle);
 477                         } else {
 478                             $tmptitle  = MARC::Field->new('245',' ',' ', a => $term,);
 479                             $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
 480                             $tmprecord->append_fields($tmptitle);
 481                             $tmprecord->append_fields($tmpauthor);
 482                         }
 483                         $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
 484                     }
 485
 486                     # not an index scan
 487                     else {
 488                         $record = $results[ $i - 1 ]->record($j)->raw();
 489
 490                         # warn "RECORD $j:".$record;
 491                         $results_hash->{'RECORDS'}[$j] = $record;
 492
 493             # Fill the facets while we're looping, but only for the biblioserver
 494                         $facet_record = MARC::Record->new_from_usmarc($record)
 495                           if $servers[ $i - 1 ] =~ /biblioserver/;
 496
 497                     #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
 498                         if ($facet_record) {
 499                             for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
 500                                 ($facets->[$k]) or next;
 501                                 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
 502                                 for my $field (@fields) {
 503                                     my @subfields = $field->subfields();
 504                                     for my $subfield (@subfields) {
 505                                         my ( $code, $data ) = @$subfield;
 506                                         ($code eq $facets->[$k]->{'subfield'}) or next;
 507                                         $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
 508                                     }
 509                                 }
 510                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
 511                                     $facets->[$k]->{'label_value'};
 512                                 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
 513                                     $facets->[$k]->{'expanded'};
 514                             }
 515                         }
 516                     }
 517                 }
 518                 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
 519             }
 520
 521             # warn "connection ", $i-1, ": $size hits";
 522             # warn $results[$i-1]->record(0)->render() if $size > 0;
 523
 524             # BUILD FACETS
 525             if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
 526                 for my $link_value (
 527                     sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
 528                     keys %$facets_counter )
 529                 {
 530                     my $expandable;
 531                     my $number_of_facets;
 532                     my @this_facets_array;
 533                     for my $one_facet (
 534                         sort {
 535                             $facets_counter->{$link_value}
 536                               ->{$b} <=> $facets_counter->{$link_value}->{$a}
 537                         } keys %{ $facets_counter->{$link_value} }
 538                       )
 539                     {
 540                         $number_of_facets++;
 541                         if (   ( $number_of_facets < 6 )
 542                             || ( $expanded_facet eq $link_value )
 543                             || ( $facets_info->{$link_value}->{'expanded'} ) )
 544                         {
 545
 546                       # Sanitize the link value ), ( will cause errors with CCL,
 547                             my $facet_link_value = $one_facet;
 548                             $facet_link_value =~ s/(\(|\))/ /g;
 549
 550                             # fix the length that will display in the label,
 551                             my $facet_label_value = $one_facet;
 552                             $facet_label_value =
 553                               substr( $one_facet, 0, 20 ) . "..."
 554                               unless length($facet_label_value) <= 20;
 555
 556                             # if it's a branch, label by the name, not the code,
 557                             if ( $link_value =~ /branch/ ) {
 558                                 $facet_label_value =
 559                                   $branches->{$one_facet}->{'branchname'};
 560                             }
 561
 562                 # but we're down with the whole label being in the link's title.
 563                             my $facet_title_value = $one_facet;
 564
 565                             push @this_facets_array,
 566                               (
 567                                 {
 568                                     facet_count =>
 569                                       $facets_counter->{$link_value}
 570                                       ->{$one_facet},
 571                                     facet_label_value => $facet_label_value,
 572                                     facet_title_value => $facet_title_value,
 573                                     facet_link_value  => $facet_link_value,
 574                                     type_link_value   => $link_value,
 575                                 },
 576                               );
 577                         }
 578                     }
 579
 580                     # handle expanded option
 581                     unless ( $facets_info->{$link_value}->{'expanded'} ) {
 582                         $expandable = 1
 583                           if ( ( $number_of_facets > 6 )
 584                             && ( $expanded_facet ne $link_value ) );
 585                     }
 586                     push @facets_loop,
 587                       (
 588                         {
 589                             type_link_value => $link_value,
 590                             type_id         => $link_value . "_id",
 591                             "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
 592                             facets     => \@this_facets_array,
 593                             expandable => $expandable,
 594                             expand     => $link_value,
 595                         }
 596                       ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
 597                 }
 598             }
 599         }
 600     }
 601     return ( undef, $results_hashref, \@facets_loop );
 602 }
 603
 604 sub pazGetRecords {
 605     my (
 606         $koha_query,       $simple_query, $sort_by_ref,    $servers_ref,
 607         $results_per_page, $offset,       $expanded_facet, $branches,
 608         $query_type,       $scan
 609     ) = @_;
 610
 611     my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
 612     $paz->init();
 613     $paz->search($simple_query);
 614     sleep 1;
 615
 616     # do results
 617     my $results_hashref = {};
 618     my $stats = XMLin($paz->stat);
 619     my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
 620
 621     # for a grouped search result, the number of hits
 622     # is the number of groups returned; 'bib_hits' will have
 623     # the total number of bibs.
 624     $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
 625     $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
 626
 627     HIT: foreach my $hit (@{ $results->{'hit'} }) {
 628         my $recid = $hit->{recid}->[0];
 629
 630         my $work_title = $hit->{'md-work-title'}->[0];
 631         my $work_author;
 632         if (exists $hit->{'md-work-author'}) {
 633             $work_author = $hit->{'md-work-author'}->[0];
 634         }
 635         my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
 636
 637         my $result_group = {};
 638         $result_group->{'group_label'} = $group_label;
 639         $result_group->{'group_merge_key'} = $recid;
 640
 641         my $count = 1;
 642         if (exists $hit->{count}) {
 643             $count = $hit->{count}->[0];
 644         }
 645         $result_group->{'group_count'} = $count;
 646
 647         for (my $i = 0; $i < $count; $i++) {
 648             # FIXME -- may need to worry about diacritics here
 649             my $rec = $paz->record($recid, $i);
 650             push @{ $result_group->{'RECORDS'} }, $rec;
 651         }
 652
 653         push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
 654     }
 655
 656     # pass through facets
 657     my $termlist_xml = $paz->termlist('author,subject');
 658     my $terms = XMLin($termlist_xml, forcearray => 1);
 659     my @facets_loop = ();
 660     #die Dumper($results);
 661 #    foreach my $list (sort keys %{ $terms->{'list'} }) {
 662 #        my @facets = ();
 663 #        foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
 664 #            push @facets, {
 665 #                facet_label_value => $facet->{'name'}->[0],
 666 #            };
 667 #        }
 668 #        push @facets_loop, ( {
 669 #            type_label => $list,
 670 #            facets => \@facets,
 671 #        } );
 672 #    }
 673
 674     return ( undef, $results_hashref, \@facets_loop );
 675 }
 676
 677 # STOPWORDS
 678 sub _remove_stopwords {
 679     my ( $operand, $index ) = @_;
 680     my @stopwords_removed;
 681
 682     # phrase and exact-qualified indexes shouldn't have stopwords removed
 683     if ( $index !~ m/phr|ext/ ) {
 684
 685 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
 686 #       we use IsAlpha unicode definition, to deal correctly with diacritics.
 687 #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
 688 #       is a stopword, we'd get "çon" and wouldn't find anything...
 689         foreach ( keys %{ C4::Context->stopwords } ) {
 690             next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
 691             if ( $operand =~
 692                 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
 693             {
 694                 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
 695                 $operand =~ s/^$_\P{IsAlpha}/ /gi;
 696                 $operand =~ s/\P{IsAlpha}$_$/ /gi;
 697                                 $operand =~ s/$1//gi;
 698                 push @stopwords_removed, $_;
 699             }
 700         }
 701     }
 702     return ( $operand, \@stopwords_removed );
 703 }
 704
 705 # TRUNCATION
 706 sub _detect_truncation {
 707     my ( $operand, $index ) = @_;
 708     my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
 709         @regexpr );
 710     $operand =~ s/^ //g;
 711     my @wordlist = split( /\s/, $operand );
 712     foreach my $word (@wordlist) {
 713         if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
 714             push @rightlefttruncated, $word;
 715         }
 716         elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
 717             push @lefttruncated, $word;
 718         }
 719         elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
 720             push @righttruncated, $word;
 721         }
 722         elsif ( index( $word, "*" ) < 0 ) {
 723             push @nontruncated, $word;
 724         }
 725         else {
 726             push @regexpr, $word;
 727         }
 728     }
 729     return (
 730         \@nontruncated,       \@righttruncated, \@lefttruncated,
 731         \@rightlefttruncated, \@regexpr
 732     );
 733 }
 734
 735 # STEMMING
 736 sub _build_stemmed_operand {
 737     my ($operand) = @_;
 738     my $stemmed_operand;
 739
 740     # If operand contains a digit, it is almost certainly an identifier, and should
 741     # not be stemmed.  This is particularly relevant for ISBNs and ISSNs, which
 742     # can contain the letter "X" - for example, _build_stemmend_operand would reduce
 743     # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
 744     # results (e.g., "23 x 29 cm." from the 300$c).  Bug 2098.
 745     return $operand if $operand =~ /\d/;
 746
 747 # FIXME: the locale should be set based on the user's language and/or search choice
 748     my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
 749
 750 # FIXME: these should be stored in the db so the librarian can modify the behavior
 751     $stemmer->add_exceptions(
 752         {
 753             'and' => 'and',
 754             'or'  => 'or',
 755             'not' => 'not',
 756         }
 757     );
 758     my @words = split( / /, $operand );
 759     my $stems = $stemmer->stem(@words);
 760     for my $stem (@$stems) {
 761         $stemmed_operand .= "$stem";
 762         $stemmed_operand .= "?"
 763           unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
 764         $stemmed_operand .= " ";
 765     }
 766     warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
 767     return $stemmed_operand;
 768 }
 769
 770 # FIELD WEIGHTING
 771 sub _build_weighted_query {
 772
 773 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
 774 # pretty well but could work much better if we had a smarter query parser
 775     my ( $operand, $stemmed_operand, $index ) = @_;
 776     my $stemming      = C4::Context->preference("QueryStemming")     || 0;
 777     my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
 778     my $fuzzy_enabled = C4::Context->preference("QueryFuzzy")        || 0;
 779
 780     my $weighted_query .= "(rk=(";    # Specifies that we're applying rank
 781
 782     # Keyword, or, no index specified
 783     if ( ( $index eq 'kw' ) || ( !$index ) ) {
 784         $weighted_query .=
 785           "Title-cover,ext,r1=\"$operand\"";    # exact title-cover
 786         $weighted_query .= " or ti,ext,r2=\"$operand\"";    # exact title
 787         $weighted_query .= " or ti,phr,r3=\"$operand\"";    # phrase title
 788           #$weighted_query .= " or any,ext,r4=$operand";               # exact any
 789           #$weighted_query .=" or kw,wrdl,r5=\"$operand\"";            # word list any
 790         $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
 791           if $fuzzy_enabled;    # add fuzzy, word list
 792         $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
 793           if ( $stemming and $stemmed_operand )
 794           ;                     # add stemming, right truncation
 795         $weighted_query .= " or wrdl,r9=\"$operand\"";
 796
 797         # embedded sorting: 0 a-z; 1 z-a
 798         # $weighted_query .= ") or (sort1,aut=1";
 799     }
 800
 801     # Barcode searches should skip this process
 802     elsif ( $index eq 'bc' ) {
 803         $weighted_query .= "bc=\"$operand\"";
 804     }
 805
 806     # Authority-number searches should skip this process
 807     elsif ( $index eq 'an' ) {
 808         $weighted_query .= "an=\"$operand\"";
 809     }
 810
 811     # If the index already has more than one qualifier, wrap the operand
 812     # in quotes and pass it back (assumption is that the user knows what they
 813     # are doing and won't appreciate us mucking up their query
 814     elsif ( $index =~ ',' ) {
 815         $weighted_query .= " $index=\"$operand\"";
 816     }
 817
 818     #TODO: build better cases based on specific search indexes
 819     else {
 820         $weighted_query .= " $index,ext,r1=\"$operand\"";    # exact index
 821           #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
 822         $weighted_query .= " or $index,phr,r3=\"$operand\"";    # phrase index
 823         $weighted_query .=
 824           " or $index,rt,wrdl,r3=\"$operand\"";    # word list index
 825     }
 826
 827     $weighted_query .= "))";                       # close rank specification
 828     return $weighted_query;
 829 }
 830
 831 =head2 buildQuery
 832
 833 ( $error, $query,
 834 $simple_query, $query_cgi,
 835 $query_desc, $limit,
 836 $limit_cgi, $limit_desc,
 837 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
 838
 839 Build queries and limits in CCL, CGI, Human,
 840 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
 841
 842 See verbose embedded documentation.
 843
 844
 845 =cut
 846
 847 sub buildQuery {
 848     my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
 849
 850     warn "---------\nEnter buildQuery\n---------" if $DEBUG;
 851
 852     # dereference
 853     my @operators = @$operators if $operators;
 854     my @indexes   = @$indexes   if $indexes;
 855     my @operands  = @$operands  if $operands;
 856     my @limits    = @$limits    if $limits;
 857     my @sort_by   = @$sort_by   if $sort_by;
 858
 859     my $stemming         = C4::Context->preference("QueryStemming")        || 0;
 860     my $auto_truncation  = C4::Context->preference("QueryAutoTruncate")    || 0;
 861     my $weight_fields    = C4::Context->preference("QueryWeightFields")    || 0;
 862     my $fuzzy_enabled    = C4::Context->preference("QueryFuzzy")           || 0;
 863     my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
 864
 865     # no stemming/weight/fuzzy in NoZebra
 866     if ( C4::Context->preference("NoZebra") ) {
 867         $stemming      = 0;
 868         $weight_fields = 0;
 869         $fuzzy_enabled = 0;
 870     }
 871
 872     my $query        = $operands[0];
 873     my $simple_query = $operands[0];
 874
 875     # initialize the variables we're passing back
 876     my $query_cgi;
 877     my $query_desc;
 878     my $query_type;
 879
 880     my $limit;
 881     my $limit_cgi;
 882     my $limit_desc;
 883
 884     my $stopwords_removed;    # flag to determine if stopwords have been removed
 885
 886 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
 887 # DIAGNOSTIC ONLY!!
 888     if ( $query =~ /^ccl=/ ) {
 889         return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
 890     }
 891     if ( $query =~ /^cql=/ ) {
 892         return ( undef, $', $', $', $', '', '', '', '', 'cql' );
 893     }
 894     if ( $query =~ /^pqf=/ ) {
 895         return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
 896     }
 897
 898     # pass nested queries directly
 899     # FIXME: need better handling of some of these variables in this case
 900     if ( $query =~ /(\(|\))/ ) {
 901         return (
 902             undef,              $query, $simple_query, $query_cgi,
 903             $query,             $limit, $limit_cgi,    $limit_desc,
 904             $stopwords_removed, 'ccl'
 905         );
 906     }
 907
 908 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
 909 # query operands and indexes and add stemming, truncation, field weighting, etc.
 910 # Once we do so, we'll end up with a value in $query, just like if we had an
 911 # incoming $query from the user
 912     else {
 913         $query = ""
 914           ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
 915         my $previous_operand
 916           ;    # a flag used to keep track if there was a previous query
 917                # if there was, we can apply the current operator
 918                # for every operand
 919         for ( my $i = 0 ; $i <= @operands ; $i++ ) {
 920
 921             # COMBINE OPERANDS, INDEXES AND OPERATORS
 922             if ( $operands[$i] ) {
 923
 924               # A flag to determine whether or not to add the index to the query
 925                 my $indexes_set;
 926
 927 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
 928                 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
 929                     $weight_fields    = 0;
 930                     $stemming         = 0;
 931                     $remove_stopwords = 0;
 932                 }
 933                 my $operand = $operands[$i];
 934                 my $index   = $indexes[$i];
 935
 936                 # Add index-specific attributes
 937                 # Date of Publication
 938                 if ( $index eq 'yr' ) {
 939                     $index .= ",st-numeric";
 940 #                     $indexes_set++;
 941                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 942                 }
 943
 944                 # Date of Acquisition
 945                 elsif ( $index eq 'acqdate' ) {
 946                     $index .= ",st-date-normalized";
 947 #                     $indexes_set++;
 948                                         $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
 949                 }
 950                 # ISBN,ISSN,Standard Number, don't need special treatment
 951                 elsif ( $index eq 'nb' || $index eq 'ns' ) {
 952 #                     $indexes_set++;
 953                     (
 954                         $stemming,      $auto_truncation,
 955                         $weight_fields, $fuzzy_enabled,
 956                         $remove_stopwords
 957                     ) = ( 0, 0, 0, 0, 0 );
 958
 959                 }
 960                 # Set default structure attribute (word list)
 961                 my $struct_attr;
 962                 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
 963                     $struct_attr = ",wrdl";
 964                 }
 965
 966                 # Some helpful index variants
 967                 my $index_plus       = $index . $struct_attr . ":" if $index;
 968                 my $index_plus_comma = $index . $struct_attr . "," if $index;
 969
 970                 # Remove Stopwords
 971                 if ($remove_stopwords) {
 972                     ( $operand, $stopwords_removed ) =
 973                       _remove_stopwords( $operand, $index );
 974                     warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
 975                     warn "REMOVED STOPWORDS: @$stopwords_removed"
 976                       if ( $stopwords_removed && $DEBUG );
 977                 }
 978
 979                 # Detect Truncation
 980                 my $truncated_operand;
 981                 my( $nontruncated, $righttruncated, $lefttruncated,
 982                     $rightlefttruncated, $regexpr
 983                 ) = _detect_truncation( $operand, $index );
 984                 warn
 985 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
 986                   if $DEBUG;
 987
 988                 # Apply Truncation
 989                 if (
 990                     scalar(@$righttruncated) + scalar(@$lefttruncated) +
 991                     scalar(@$rightlefttruncated) > 0 )
 992                 {
 993
 994                # Don't field weight or add the index to the query, we do it here
 995                     $indexes_set = 1;
 996                     undef $weight_fields;
 997                     my $previous_truncation_operand;
 998                     if ( scalar(@$nontruncated) > 0 ) {
 999                         $truncated_operand .= "$index_plus @$nontruncated ";
1000                         $previous_truncation_operand = 1;
1001                     }
1002                     if ( scalar(@$righttruncated) > 0 ) {
1003                         $truncated_operand .= "and "
1004                           if $previous_truncation_operand;
1005                         $truncated_operand .=
1006                           "$index_plus_comma" . "rtrn:@$righttruncated ";
1007                         $previous_truncation_operand = 1;
1008                     }
1009                     if ( scalar(@$lefttruncated) > 0 ) {
1010                         $truncated_operand .= "and "
1011                           if $previous_truncation_operand;
1012                         $truncated_operand .=
1013                           "$index_plus_comma" . "ltrn:@$lefttruncated ";
1014                         $previous_truncation_operand = 1;
1015                     }
1016                     if ( scalar(@$rightlefttruncated) > 0 ) {
1017                         $truncated_operand .= "and "
1018                           if $previous_truncation_operand;
1019                         $truncated_operand .=
1020                           "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1021                         $previous_truncation_operand = 1;
1022                     }
1023                 }
1024                 $operand = $truncated_operand if $truncated_operand;
1025                 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1026
1027                 # Handle Stemming
1028                 my $stemmed_operand;
1029                 $stemmed_operand = _build_stemmed_operand($operand)
1030                   if $stemming;
1031                 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1032
1033                 # Handle Field Weighting
1034                 my $weighted_operand;
1035                 $weighted_operand =
1036                   _build_weighted_query( $operand, $stemmed_operand, $index )
1037                   if $weight_fields;
1038                 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1039                 $operand = $weighted_operand if $weight_fields;
1040                 $indexes_set = 1 if $weight_fields;
1041
1042                 # If there's a previous operand, we need to add an operator
1043                 if ($previous_operand) {
1044
1045                     # User-specified operator
1046                     if ( $operators[ $i - 1 ] ) {
1047                         $query     .= " $operators[$i-1] ";
1048                         $query     .= " $index_plus " unless $indexes_set;
1049                         $query     .= " $operand";
1050                         $query_cgi .= "&op=$operators[$i-1]";
1051                         $query_cgi .= "&idx=$index" if $index;
1052                         $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1053                         $query_desc .=
1054                           " $operators[$i-1] $index_plus $operands[$i]";
1055                     }
1056
1057                     # Default operator is and
1058                     else {
1059                         $query      .= " and ";
1060                         $query      .= "$index_plus " unless $indexes_set;
1061                         $query      .= "$operand";
1062                         $query_cgi  .= "&op=and&idx=$index" if $index;
1063                         $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1064                         $query_desc .= " and $index_plus $operands[$i]";
1065                     }
1066                 }
1067
1068                 # There isn't a pervious operand, don't need an operator
1069                 else {
1070
1071                     # Field-weighted queries already have indexes set
1072                     $query .= " $index_plus " unless $indexes_set;
1073                     $query .= $operand;
1074                     $query_desc .= " $index_plus $operands[$i]";
1075                     $query_cgi  .= "&idx=$index" if $index;
1076                     $query_cgi  .= "&q=$operands[$i]" if $operands[$i];
1077                     $previous_operand = 1;
1078                 }
1079             }    #/if $operands
1080         }    # /for
1081     }
1082     warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1083
1084     # add limits
1085     my $group_OR_limits;
1086     my $availability_limit;
1087     foreach my $this_limit (@limits) {
1088         if ( $this_limit =~ /available/ ) {
1089
1090 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1091 # In English:
1092 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1093             $availability_limit .=
1094 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1095             $limit_cgi  .= "&limit=available";
1096             $limit_desc .= "";
1097         }
1098
1099         # group_OR_limits, prefixed by mc-
1100         # OR every member of the group
1101         elsif ( $this_limit =~ /mc/ ) {
1102             $group_OR_limits .= " or " if $group_OR_limits;
1103             $limit_desc      .= " or " if $group_OR_limits;
1104             $group_OR_limits .= "$this_limit";
1105             $limit_cgi       .= "&limit=$this_limit";
1106             $limit_desc      .= " $this_limit";
1107         }
1108
1109         # Regular old limits
1110         else {
1111             if ($this_limit){
1112                 $limit .= " and " if $limit || $query;
1113                 $limit      .= "$this_limit";
1114                 $limit_cgi  .= "&limit=$this_limit";
1115                 $limit_desc .= " $this_limit";
1116             }
1117         }
1118     }
1119     if ($group_OR_limits) {
1120         $limit .= " and " if ( $query || $limit );
1121         $limit .= "($group_OR_limits)";
1122     }
1123     if ($availability_limit) {
1124         $limit .= " and " if ( $query || $limit );
1125         $limit .= "($availability_limit)";
1126     }
1127
1128     # Normalize the query and limit strings
1129     $query =~ s/:/=/g;
1130     $limit =~ s/:/=/g;
1131     for ( $query, $query_desc, $limit, $limit_desc ) {
1132         $_ =~ s/  / /g;    # remove extra spaces
1133         $_ =~ s/^ //g;     # remove any beginning spaces
1134         $_ =~ s/ $//g;     # remove any ending spaces
1135         $_ =~ s/==/=/g;    # remove double == from query
1136     }
1137     $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1138
1139     for ($query_cgi,$simple_query) {
1140         $_ =~ s/"//g;
1141     }
1142     # append the limit to the query
1143     $query .= " " . $limit;
1144
1145     # Warnings if DEBUG
1146     if ($DEBUG) {
1147         warn "QUERY:" . $query;
1148         warn "QUERY CGI:" . $query_cgi;
1149         warn "QUERY DESC:" . $query_desc;
1150         warn "LIMIT:" . $limit;
1151         warn "LIMIT CGI:" . $limit_cgi;
1152         warn "LIMIT DESC:" . $limit_desc;
1153         warn "---------\nLeave buildQuery\n---------";
1154     }
1155     return (
1156         undef,              $query, $simple_query, $query_cgi,
1157         $query_desc,        $limit, $limit_cgi,    $limit_desc,
1158         $stopwords_removed, $query_type
1159     );
1160 }
1161
1162 =head2 searchResults
1163
1164 Format results in a form suitable for passing to the template
1165
1166 =cut
1167
1168 # IMO this subroutine is pretty messy still -- it's responsible for
1169 # building the HTML output for the template
1170 sub searchResults {
1171     my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1172     my $dbh = C4::Context->dbh;
1173     my $even = 1;
1174     my @newresults;
1175
1176     # add search-term highlighting via <span>s on the search terms
1177     my $span_terms_hashref;
1178     for my $span_term ( split( / /, $searchdesc ) ) {
1179         $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1180         $span_terms_hashref->{$span_term}++;
1181     }
1182
1183     #Build branchnames hash
1184     #find branchname
1185     #get branch information.....
1186     my %branches;
1187     my $bsth =
1188       $dbh->prepare("SELECT branchcode,branchname FROM branches")
1189       ;    # FIXME : use C4::Koha::GetBranches
1190     $bsth->execute();
1191     while ( my $bdata = $bsth->fetchrow_hashref ) {
1192         $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1193     }
1194 # FIXME - We build an authorised values hash here, using the default framework
1195 # though it is possible to have different authvals for different fws.
1196
1197     my $shelflocations =GetKohaAuthorisedValues('items.location','');
1198
1199     # get notforloan authorised value list (see $shelflocations  FIXME)
1200     my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1201
1202     #Build itemtype hash
1203     #find itemtype & itemtype image
1204     my %itemtypes;
1205     $bsth =
1206       $dbh->prepare(
1207         "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1208       );
1209     $bsth->execute();
1210     while ( my $bdata = $bsth->fetchrow_hashref ) {
1211                 foreach (qw(description imageurl summary notforloan)) {
1212                 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1213                 }
1214     }
1215
1216     #search item field code
1217     my $sth =
1218       $dbh->prepare(
1219 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1220       );
1221     $sth->execute;
1222     my ($itemtag) = $sth->fetchrow;
1223
1224     ## find column names of items related to MARC
1225     my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1226     $sth2->execute;
1227     my %subfieldstosearch;
1228     while ( ( my $column ) = $sth2->fetchrow ) {
1229         my ( $tagfield, $tagsubfield ) =
1230           &GetMarcFromKohaField( "items." . $column, "" );
1231         $subfieldstosearch{$column} = $tagsubfield;
1232     }
1233
1234     # handle which records to actually retrieve
1235     my $times;
1236     if ( $hits && $offset + $results_per_page <= $hits ) {
1237         $times = $offset + $results_per_page;
1238     }
1239     else {
1240         $times = $hits;  # FIXME: if $hits is undefined, why do we want to equal it?
1241     }
1242
1243     # loop through all of the records we've retrieved
1244     for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1245         my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1246         my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1247         $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1248         $oldbiblio->{result_number} = $i + 1;
1249
1250         # add imageurl to itemtype if there is one
1251         $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1252
1253         $oldbiblio->{'authorised_value_images'}  = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1254         (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1255         $aisbn =~ s/-//g;
1256         $oldbiblio->{amazonisbn} = $aisbn;
1257                 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1258  # Build summary if there is one (the summary is defined in the itemtypes table)
1259  # FIXME: is this used anywhere, I think it can be commented out? -- JF
1260         if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1261             my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1262             my @fields  = $marcrecord->fields();
1263             foreach my $field (@fields) {
1264                 my $tag      = $field->tag();
1265                 my $tagvalue = $field->as_string();
1266                 if (! utf8::is_utf8($tagvalue)) {
1267                     utf8::decode($tagvalue);
1268                 }
1269
1270                 $summary =~
1271                   s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1272                 unless ( $tag < 10 ) {
1273                     my @subf = $field->subfields;
1274                     for my $i ( 0 .. $#subf ) {
1275                         my $subfieldcode  = $subf[$i][0];
1276                         my $subfieldvalue = $subf[$i][1];
1277                         if (! utf8::is_utf8($subfieldvalue)) {
1278                             utf8::decode($subfieldvalue);
1279                         }
1280                         my $tagsubf       = $tag . $subfieldcode;
1281                         $summary =~
1282 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1283                     }
1284                 }
1285             }
1286             # FIXME: yuk
1287             $summary =~ s/\[(.*?)]//g;
1288             $summary =~ s/\n/<br\/>/g;
1289             $oldbiblio->{summary} = $summary;
1290         }
1291
1292         # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1293         $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1294         $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1295         # Add search-term highlighting to the whole record where they match using <span>s
1296         if (C4::Context->preference("OpacHighlightedWords")){
1297             my $searchhighlightblob;
1298             for my $highlight_field ( $marcrecord->fields ) {
1299
1300     # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1301                 next if $highlight_field->tag() =~ /(^00)/;    # skip fixed fields
1302                 for my $subfield ($highlight_field->subfields()) {
1303                     my $match;
1304                     next if $subfield->[0] eq '9';
1305                     my $field = $subfield->[1];
1306                     for my $term ( keys %$span_terms_hashref ) {
1307                         if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1308                             $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1309                         $match++;
1310                         }
1311                     }
1312                     $searchhighlightblob .= $field . " ... " if $match;
1313                 }
1314
1315             }
1316             $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1317             $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1318         }
1319
1320         # Add search-term highlighting to the title, subtitle, etc. fields
1321         for my $term ( keys %$span_terms_hashref ) {
1322             my $old_term = $term;
1323             if ( length($term) > 3 ) {
1324                 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1325                                 foreach(qw(title subtitle author publishercode place pages notes size)) {
1326                         $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1327                                 }
1328             }
1329         }
1330
1331         ($i % 2) and $oldbiblio->{'toggle'} = 1;
1332
1333         # Pull out the items fields
1334         my @fields = $marcrecord->field($itemtag);
1335
1336         # Setting item statuses for display
1337         my @available_items_loop;
1338         my @onloan_items_loop;
1339         my @other_items_loop;
1340
1341         my $available_items;
1342         my $onloan_items;
1343         my $other_items;
1344
1345         my $ordered_count         = 0;
1346         my $available_count       = 0;
1347         my $onloan_count          = 0;
1348         my $longoverdue_count     = 0;
1349         my $other_count           = 0;
1350         my $wthdrawn_count        = 0;
1351         my $itemlost_count        = 0;
1352         my $itembinding_count     = 0;
1353         my $itemdamaged_count     = 0;
1354         my $item_in_transit_count = 0;
1355         my $can_place_holds       = 0;
1356         my $items_count           = scalar(@fields);
1357         my $maxitems =
1358           ( C4::Context->preference('maxItemsinSearchResults') )
1359           ? C4::Context->preference('maxItemsinSearchResults') - 1
1360           : 1;
1361
1362         # loop through every item
1363         foreach my $field (@fields) {
1364             my $item;
1365
1366             # populate the items hash
1367             foreach my $code ( keys %subfieldstosearch ) {
1368                 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1369             }
1370                         my $hbranch     = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch'    : 'holdingbranch';
1371                         my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1372             # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1373             if ($item->{$hbranch}) {
1374                 $item->{'branchname'} = $branches{$item->{$hbranch}};
1375             }
1376             elsif ($item->{$otherbranch}) {     # Last resort
1377                 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1378             }
1379
1380                         my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1381 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1382             if ( $item->{onloan} ) {
1383                 $onloan_count++;
1384                                 my $key = $prefix . $item->{due_date};
1385                                 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1386                                 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1387                                 $onloan_items->{$key}->{branchname} = $item->{branchname};
1388                                 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1389                                 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1390                                 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1391                                 $onloan_items->{$key}->{barcode} = $item->{barcode};
1392                 # if something's checked out and lost, mark it as 'long overdue'
1393                 if ( $item->{itemlost} ) {
1394                     $onloan_items->{$prefix}->{longoverdue}++;
1395                     $longoverdue_count++;
1396                 } else {        # can place holds as long as item isn't lost
1397                     $can_place_holds = 1;
1398                 }
1399             }
1400
1401          # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1402             else {
1403
1404                 # item is on order
1405                 if ( $item->{notforloan} == -1 ) {
1406                     $ordered_count++;
1407                 }
1408
1409                 # is item in transit?
1410                 my $transfertwhen = '';
1411                 my ($transfertfrom, $transfertto);
1412
1413                 unless ($item->{wthdrawn}
1414                         || $item->{itemlost}
1415                         || $item->{damaged}
1416                         || $item->{notforloan}
1417                         || $items_count > 20) {
1418
1419                     # A couple heuristics to limit how many times
1420                     # we query the database for item transfer information, sacrificing
1421                     # accuracy in some cases for speed;
1422                     #
1423                     # 1. don't query if item has one of the other statuses
1424                     # 2. don't check transit status if the bib has
1425                     #    more than 20 items
1426                     #
1427                     # FIXME: to avoid having the query the database like this, and to make
1428                     #        the in transit status count as unavailable for search limiting,
1429                     #        should map transit status to record indexed in Zebra.
1430                     #
1431                     ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1432                 }
1433
1434                 # item is withdrawn, lost or damaged
1435                 if (   $item->{wthdrawn}
1436                     || $item->{itemlost}
1437                     || $item->{damaged}
1438                     || $item->{notforloan}
1439                     || ($transfertwhen ne ''))
1440                 {
1441                     $wthdrawn_count++        if $item->{wthdrawn};
1442                     $itemlost_count++        if $item->{itemlost};
1443                     $itemdamaged_count++     if $item->{damaged};
1444                     $item_in_transit_count++ if $transfertwhen ne '';
1445                     $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1446                     $other_count++;
1447
1448                                         my $key = $prefix . $item->{status};
1449                                         foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1450                         $other_items->{$key}->{$_} = $item->{$_};
1451                                         }
1452                     $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1453                                         $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1454                                         $other_items->{$key}->{count}++ if $item->{$hbranch};
1455                                         $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1456                                         $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1457                                         $other_items->{$key}->{barcode} = $item->{barcode};
1458                 }
1459                 # item is available
1460                 else {
1461                     $can_place_holds = 1;
1462                     $available_count++;
1463                                         $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1464                                         foreach (qw(branchname itemcallnumber barcode)) {
1465                         $available_items->{$prefix}->{$_} = $item->{$_};
1466                                         }
1467                                         $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1468                                         $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1469                 }
1470             }
1471         }    # notforloan, item level and biblioitem level
1472         my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1473         $maxitems =
1474           ( C4::Context->preference('maxItemsinSearchResults') )
1475           ? C4::Context->preference('maxItemsinSearchResults') - 1
1476           : 1;
1477         for my $key ( sort keys %$onloan_items ) {
1478             (++$onloanitemscount > $maxitems) and last;
1479             push @onloan_items_loop, $onloan_items->{$key};
1480         }
1481         for my $key ( sort keys %$other_items ) {
1482             (++$otheritemscount > $maxitems) and last;
1483             push @other_items_loop, $other_items->{$key};
1484         }
1485         for my $key ( sort keys %$available_items ) {
1486             (++$availableitemscount > $maxitems) and last;
1487             push @available_items_loop, $available_items->{$key}
1488         }
1489
1490         # XSLT processing of some stuff
1491         if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1492             my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1493             $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1494         }
1495
1496         # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1497         $can_place_holds = 0
1498           if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1499         $oldbiblio->{norequests} = 1 unless $can_place_holds;
1500         $oldbiblio->{itemsplural}          = 1 if $items_count > 1;
1501         $oldbiblio->{items_count}          = $items_count;
1502         $oldbiblio->{available_items_loop} = \@available_items_loop;
1503         $oldbiblio->{onloan_items_loop}    = \@onloan_items_loop;
1504         $oldbiblio->{other_items_loop}     = \@other_items_loop;
1505         $oldbiblio->{availablecount}       = $available_count;
1506         $oldbiblio->{availableplural}      = 1 if $available_count > 1;
1507         $oldbiblio->{onloancount}          = $onloan_count;
1508         $oldbiblio->{onloanplural}         = 1 if $onloan_count > 1;
1509         $oldbiblio->{othercount}           = $other_count;
1510         $oldbiblio->{otherplural}          = 1 if $other_count > 1;
1511         $oldbiblio->{wthdrawncount}        = $wthdrawn_count;
1512         $oldbiblio->{itemlostcount}        = $itemlost_count;
1513         $oldbiblio->{damagedcount}         = $itemdamaged_count;
1514         $oldbiblio->{intransitcount}       = $item_in_transit_count;
1515         $oldbiblio->{orderedcount}         = $ordered_count;
1516         $oldbiblio->{isbn} =~
1517           s/-//g;    # deleting - in isbn to enable amazon content
1518         push( @newresults, $oldbiblio );
1519     }
1520     return @newresults;
1521 }
1522
1523 #----------------------------------------------------------------------
1524 #
1525 # Non-Zebra GetRecords#
1526 #----------------------------------------------------------------------
1527
1528 =head2 NZgetRecords
1529
1530   NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1531
1532 =cut
1533
1534 sub NZgetRecords {
1535     my (
1536         $query,            $simple_query, $sort_by_ref,    $servers_ref,
1537         $results_per_page, $offset,       $expanded_facet, $branches,
1538         $query_type,       $scan
1539     ) = @_;
1540     warn "query =$query" if $DEBUG;
1541     my $result = NZanalyse($query);
1542     warn "results =$result" if $DEBUG;
1543     return ( undef,
1544         NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1545         undef );
1546 }
1547
1548 =head2 NZanalyse
1549
1550   NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1551   the list is built from an inverted index in the nozebra SQL table
1552   note that title is here only for convenience : the sorting will be very fast when requested on title
1553   if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1554
1555 =cut
1556
1557 sub NZanalyse {
1558     my ( $string, $server ) = @_;
1559 #     warn "---------"       if $DEBUG;
1560     warn " NZanalyse" if $DEBUG;
1561 #     warn "---------"       if $DEBUG;
1562
1563  # $server contains biblioserver or authorities, depending on what we search on.
1564  #warn "querying : $string on $server";
1565     $server = 'biblioserver' unless $server;
1566
1567 # if we have a ", replace the content to discard temporarily any and/or/not inside
1568     my $commacontent;
1569     if ( $string =~ /"/ ) {
1570         $string =~ s/"(.*?)"/__X__/;
1571         $commacontent = $1;
1572         warn "commacontent : $commacontent" if $DEBUG;
1573     }
1574
1575 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1576 # then, call again NZanalyse with $left and $right
1577 # (recursive until we find a leaf (=> something without and/or/not)
1578 # delete repeated operator... Would then go in infinite loop
1579     while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1580     }
1581
1582     #process parenthesis before.
1583     if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1584         my $left     = $1;
1585         my $right    = $4;
1586         my $operator = lc($3);   # FIXME: and/or/not are operators, not operands
1587         warn
1588 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1589           if $DEBUG;
1590         my $leftresult = NZanalyse( $left, $server );
1591         if ($operator) {
1592             my $rightresult = NZanalyse( $right, $server );
1593
1594             # OK, we have the results for right and left part of the query
1595             # depending of operand, intersect, union or exclude both lists
1596             # to get a result list
1597             if ( $operator eq ' and ' ) {
1598                 return NZoperatorAND($leftresult,$rightresult);
1599             }
1600             elsif ( $operator eq ' or ' ) {
1601
1602                 # just merge the 2 strings
1603                 return $leftresult . $rightresult;
1604             }
1605             elsif ( $operator eq ' not ' ) {
1606                 return NZoperatorNOT($leftresult,$rightresult);
1607             }
1608         }
1609         else {
1610 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1611             return $leftresult;
1612         }
1613     }
1614     warn "string :" . $string if $DEBUG;
1615     my $left = "";
1616     my $right = "";
1617     my $operator = "";
1618     if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1619         $left     = $1;
1620         $right    = $3;
1621         $operator = lc($2);    # FIXME: and/or/not are operators, not operands
1622     }
1623     warn "no parenthesis. left : $left operator: $operator right: $right"
1624       if $DEBUG;
1625
1626     # it's not a leaf, we have a and/or/not
1627     if ($operator) {
1628
1629         # reintroduce comma content if needed
1630         $right =~ s/__X__/"$commacontent"/ if $commacontent;
1631         $left  =~ s/__X__/"$commacontent"/ if $commacontent;
1632         warn "node : $left / $operator / $right\n" if $DEBUG;
1633         my $leftresult  = NZanalyse( $left,  $server );
1634         my $rightresult = NZanalyse( $right, $server );
1635         warn " leftresult : $leftresult" if $DEBUG;
1636         warn " rightresult : $rightresult" if $DEBUG;
1637         # OK, we have the results for right and left part of the query
1638         # depending of operand, intersect, union or exclude both lists
1639         # to get a result list
1640         if ( $operator eq ' and ' ) {
1641             warn "NZAND";
1642             return NZoperatorAND($leftresult,$rightresult);
1643         }
1644         elsif ( $operator eq ' or ' ) {
1645
1646             # just merge the 2 strings
1647             return $leftresult . $rightresult;
1648         }
1649         elsif ( $operator eq ' not ' ) {
1650             return NZoperatorNOT($leftresult,$rightresult);
1651         }
1652         else {
1653
1654 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1655             die "error : operand unknown : $operator for $string";
1656         }
1657
1658         # it's a leaf, do the real SQL query and return the result
1659     }
1660     else {
1661         $string =~ s/__X__/"$commacontent"/ if $commacontent;
1662         $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1663         #remove trailing blank at the beginning
1664         $string =~ s/^ //g;
1665         warn "leaf:$string" if $DEBUG;
1666
1667         # parse the string in in operator/operand/value again
1668         my $left = "";
1669         my $operator = "";
1670         my $right = "";
1671         if ($string =~ /(.*)(>=|<=)(.*)/) {
1672             $left     = $1;
1673             $operator = $2;
1674             $right    = $3;
1675         } else {
1676             $left = $string;
1677         }
1678 #         warn "handling leaf... left:$left operator:$operator right:$right"
1679 #           if $DEBUG;
1680         unless ($operator) {
1681             if ($string =~ /(.*)(>|<|=)(.*)/) {
1682                 $left     = $1;
1683                 $operator = $2;
1684                 $right    = $3;
1685                 warn
1686     "handling unless (operator)... left:$left operator:$operator right:$right"
1687                 if $DEBUG;
1688             } else {
1689                 $left = $string;
1690             }
1691         }
1692         my $results;
1693
1694 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1695         $left =~ s/ .*$//;
1696
1697         # automatic replace for short operators
1698         $left = 'title'            if $left =~ '^ti$';
1699         $left = 'author'           if $left =~ '^au$';
1700         $left = 'publisher'        if $left =~ '^pb$';
1701         $left = 'subject'          if $left =~ '^su$';
1702         $left = 'koha-Auth-Number' if $left =~ '^an$';
1703         $left = 'keyword'          if $left =~ '^kw$';
1704         warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1705         if ( $operator && $left ne 'keyword' ) {
1706
1707             #do a specific search
1708             my $dbh = C4::Context->dbh;
1709             $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1710             my $sth =
1711               $dbh->prepare(
1712 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1713               );
1714             warn "$left / $operator / $right\n" if $DEBUG;
1715
1716             # split each word, query the DB and build the biblionumbers result
1717             #sanitizing leftpart
1718             $left =~ s/^\s+|\s+$//;
1719             foreach ( split / /, $right ) {
1720                 my $biblionumbers;
1721                 $_ =~ s/^\s+|\s+$//;
1722                 next unless $_;
1723                 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1724                 $sth->execute( $server, $left, $_ )
1725                   or warn "execute failed: $!";
1726                 while ( my ( $line, $value ) = $sth->fetchrow ) {
1727
1728 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1729 # otherwise, fill the result
1730                     $biblionumbers .= $line
1731                       unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1732                     warn "result : $value "
1733                       . ( $right  =~ /\d/ ) . "=="
1734                       . ( $value =~ /\D/?$line:"" ) if $DEBUG;         #= $line";
1735                 }
1736
1737 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1738                 if ($results) {
1739                     warn "NZAND" if $DEBUG;
1740                     $results = NZoperatorAND($biblionumbers,$results);
1741                 }
1742                 else {
1743                     $results = $biblionumbers;
1744                 }
1745             }
1746         }
1747         else {
1748
1749       #do a complete search (all indexes), if index='kw' do complete search too.
1750             my $dbh = C4::Context->dbh;
1751             my $sth =
1752               $dbh->prepare(
1753 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1754               );
1755
1756             # split each word, query the DB and build the biblionumbers result
1757             foreach ( split / /, $string ) {
1758                 next if C4::Context->stopwords->{ uc($_) };   # skip if stopword
1759                 warn "search on all indexes on $_" if $DEBUG;
1760                 my $biblionumbers;
1761                 next unless $_;
1762                 $sth->execute( $server, $_ );
1763                 while ( my $line = $sth->fetchrow ) {
1764                     $biblionumbers .= $line;
1765                 }
1766
1767 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1768                 if ($results) {
1769                     $results = NZoperatorAND($biblionumbers,$results);
1770                 }
1771                 else {
1772                     warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1773                     $results = $biblionumbers;
1774                 }
1775             }
1776         }
1777         warn "return : $results for LEAF : $string" if $DEBUG;
1778         return $results;
1779     }
1780     warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1781 }
1782
1783 sub NZoperatorAND{
1784     my ($rightresult, $leftresult)=@_;
1785
1786     my @leftresult = split /;/, $leftresult;
1787     warn " @leftresult / $rightresult \n" if $DEBUG;
1788
1789     #             my @rightresult = split /;/,$leftresult;
1790     my $finalresult;
1791
1792 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1793 # the result is stored twice, to have the same weight for AND than OR.
1794 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1795 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1796     foreach (@leftresult) {
1797         my $value = $_;
1798         my $countvalue;
1799         ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1800         if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1801             $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1802             $finalresult .=
1803                 "$value-$countvalue;$value-$countvalue;";
1804         }
1805     }
1806     warn "NZAND DONE : $finalresult \n" if $DEBUG;
1807     return $finalresult;
1808 }
1809
1810 sub NZoperatorOR{
1811     my ($rightresult, $leftresult)=@_;
1812     return $rightresult.$leftresult;
1813 }
1814
1815 sub NZoperatorNOT{
1816     my ($leftresult, $rightresult)=@_;
1817
1818     my @leftresult = split /;/, $leftresult;
1819
1820     #             my @rightresult = split /;/,$leftresult;
1821     my $finalresult;
1822     foreach (@leftresult) {
1823         my $value=$_;
1824         $value=$1 if $value=~m/(.*)-\d+$/;
1825         unless ($rightresult =~ "$value-") {
1826             $finalresult .= "$_;";
1827         }
1828     }
1829     return $finalresult;
1830 }
1831
1832 =head2 NZorder
1833
1834   $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1835
1836   TODO :: Description
1837
1838 =cut
1839
1840 sub NZorder {
1841     my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1842     warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1843
1844     # order title asc by default
1845     #     $ordering = '1=36 <i' unless $ordering;
1846     $results_per_page = 20 unless $results_per_page;
1847     $offset           = 0  unless $offset;
1848     my $dbh = C4::Context->dbh;
1849
1850     #
1851     # order by POPULARITY
1852     #
1853     if ( $ordering =~ /popularity/ ) {
1854         my %result;
1855         my %popularity;
1856
1857         # popularity is not in MARC record, it's builded from a specific query
1858         my $sth =
1859           $dbh->prepare("select sum(issues) from items where biblionumber=?");
1860         foreach ( split /;/, $biblionumbers ) {
1861             my ( $biblionumber, $title ) = split /,/, $_;
1862             $result{$biblionumber} = GetMarcBiblio($biblionumber);
1863             $sth->execute($biblionumber);
1864             my $popularity = $sth->fetchrow || 0;
1865
1866 # hint : the key is popularity.title because we can have
1867 # many results with the same popularity. In this cas, sub-ordering is done by title
1868 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1869 # (un-frequent, I agree, but we won't forget anything that way ;-)
1870             $popularity{ sprintf( "%10d", $popularity ) . $title
1871                   . $biblionumber } = $biblionumber;
1872         }
1873
1874     # sort the hash and return the same structure as GetRecords (Zebra querying)
1875         my $result_hash;
1876         my $numbers = 0;
1877         if ( $ordering eq 'popularity_dsc' ) {    # sort popularity DESC
1878             foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1879                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1880                   $result{ $popularity{$key} }->as_usmarc();
1881             }
1882         }
1883         else {                                    # sort popularity ASC
1884             foreach my $key ( sort ( keys %popularity ) ) {
1885                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1886                   $result{ $popularity{$key} }->as_usmarc();
1887             }
1888         }
1889         my $finalresult = ();
1890         $result_hash->{'hits'}         = $numbers;
1891         $finalresult->{'biblioserver'} = $result_hash;
1892         return $finalresult;
1893
1894         #
1895         # ORDER BY author
1896         #
1897     }
1898     elsif ( $ordering =~ /author/ ) {
1899         my %result;
1900         foreach ( split /;/, $biblionumbers ) {
1901             my ( $biblionumber, $title ) = split /,/, $_;
1902             my $record = GetMarcBiblio($biblionumber);
1903             my $author;
1904             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1905                 $author = $record->subfield( '200', 'f' );
1906                 $author = $record->subfield( '700', 'a' ) unless $author;
1907             }
1908             else {
1909                 $author = $record->subfield( '100', 'a' );
1910             }
1911
1912 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1913 # and we don't want to get only 1 result for each of them !!!
1914             $result{ $author . $biblionumber } = $record;
1915         }
1916
1917     # sort the hash and return the same structure as GetRecords (Zebra querying)
1918         my $result_hash;
1919         my $numbers = 0;
1920         if ( $ordering eq 'author_za' ) {    # sort by author desc
1921             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1922                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1923                   $result{$key}->as_usmarc();
1924             }
1925         }
1926         else {                               # sort by author ASC
1927             foreach my $key ( sort ( keys %result ) ) {
1928                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1929                   $result{$key}->as_usmarc();
1930             }
1931         }
1932         my $finalresult = ();
1933         $result_hash->{'hits'}         = $numbers;
1934         $finalresult->{'biblioserver'} = $result_hash;
1935         return $finalresult;
1936
1937         #
1938         # ORDER BY callnumber
1939         #
1940     }
1941     elsif ( $ordering =~ /callnumber/ ) {
1942         my %result;
1943         foreach ( split /;/, $biblionumbers ) {
1944             my ( $biblionumber, $title ) = split /,/, $_;
1945             my $record = GetMarcBiblio($biblionumber);
1946             my $callnumber;
1947             my ( $callnumber_tag, $callnumber_subfield ) =
1948               GetMarcFromKohaField( 'items.itemcallnumber','' );
1949             ( $callnumber_tag, $callnumber_subfield ) =
1950               GetMarcFromKohaField('biblioitems.callnumber','')
1951               unless $callnumber_tag;
1952             if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1953                 $callnumber = $record->subfield( '200', 'f' );
1954             }
1955             else {
1956                 $callnumber = $record->subfield( '100', 'a' );
1957             }
1958
1959 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1960 # and we don't want to get only 1 result for each of them !!!
1961             $result{ $callnumber . $biblionumber } = $record;
1962         }
1963
1964     # sort the hash and return the same structure as GetRecords (Zebra querying)
1965         my $result_hash;
1966         my $numbers = 0;
1967         if ( $ordering eq 'call_number_dsc' ) {    # sort by title desc
1968             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1969                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1970                   $result{$key}->as_usmarc();
1971             }
1972         }
1973         else {                                     # sort by title ASC
1974             foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1975                 $result_hash->{'RECORDS'}[ $numbers++ ] =
1976                   $result{$key}->as_usmarc();
1977             }
1978         }
1979         my $finalresult = ();
1980         $result_hash->{'hits'}         = $numbers;
1981         $finalresult->{'biblioserver'} = $result_hash;
1982         return $finalresult;
1983     }
1984     elsif ( $ordering =~ /pubdate/ ) {             #pub year
1985         my %result;
1986         foreach ( split /;/, $biblionumbers ) {
1987             my ( $biblionumber, $title ) = split /,/, $_;
1988             my $record = GetMarcBiblio($biblionumber);
1989             my ( $publicationyear_tag, $publicationyear_subfield ) =
1990               GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1991             my $publicationyear =
1992               $record->subfield( $publicationyear_tag,
1993                 $publicationyear_subfield );
1994
1995 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1996 # and we don't want to get only 1 result for each of them !!!
1997             $result{ $publicationyear . $biblionumber } = $record;
1998         }
1999
2000     # sort the hash and return the same structure as GetRecords (Zebra querying)
2001         my $result_hash;
2002         my $numbers = 0;
2003         if ( $ordering eq 'pubdate_dsc' ) {    # sort by pubyear desc
2004             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2005                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2006                   $result{$key}->as_usmarc();
2007             }
2008         }
2009         else {                                 # sort by pub year ASC
2010             foreach my $key ( sort ( keys %result ) ) {
2011                 $result_hash->{'RECORDS'}[ $numbers++ ] =
2012                   $result{$key}->as_usmarc();
2013             }
2014         }
2015         my $finalresult = ();
2016         $result_hash->{'hits'}         = $numbers;
2017         $finalresult->{'biblioserver'} = $result_hash;
2018         return $finalresult;
2019
2020         #
2021         # ORDER BY title
2022         #
2023     }
2024     elsif ( $ordering =~ /title/ ) {
2025
2026 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2027         my %result;
2028         foreach ( split /;/, $biblionumbers ) {
2029             my ( $biblionumber, $title ) = split /,/, $_;
2030
2031 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2032 # and we don't want to get only 1 result for each of them !!!
2033 # hint & speed improvement : we can order without reading the record
2034 # so order, and read records only for the requested page !
2035             $result{ $title . $biblionumber } = $biblionumber;
2036         }
2037
2038     # sort the hash and return the same structure as GetRecords (Zebra querying)
2039         my $result_hash;
2040         my $numbers = 0;
2041         if ( $ordering eq 'title_az' ) {    # sort by title desc
2042             foreach my $key ( sort ( keys %result ) ) {
2043                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2044             }
2045         }
2046         else {                              # sort by title ASC
2047             foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2048                 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2049             }
2050         }
2051
2052         # limit the $results_per_page to result size if it's more
2053         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2054
2055         # for the requested page, replace biblionumber by the complete record
2056         # speed improvement : avoid reading too much things
2057         for (
2058             my $counter = $offset ;
2059             $counter <= $offset + $results_per_page ;
2060             $counter++
2061           )
2062         {
2063             $result_hash->{'RECORDS'}[$counter] =
2064               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2065         }
2066         my $finalresult = ();
2067         $result_hash->{'hits'}         = $numbers;
2068         $finalresult->{'biblioserver'} = $result_hash;
2069         return $finalresult;
2070     }
2071     else {
2072
2073 #
2074 # order by ranking
2075 #
2076 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2077         my %result;
2078         my %count_ranking;
2079         foreach ( split /;/, $biblionumbers ) {
2080             my ( $biblionumber, $title ) = split /,/, $_;
2081             $title =~ /(.*)-(\d)/;
2082
2083             # get weight
2084             my $ranking = $2;
2085
2086 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2087 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2088 # biblio N has ranking = 6
2089             $count_ranking{$biblionumber} += $ranking;
2090         }
2091
2092 # build the result by "inverting" the count_ranking hash
2093 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2094 #         warn "counting";
2095         foreach ( keys %count_ranking ) {
2096             $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2097         }
2098
2099     # sort the hash and return the same structure as GetRecords (Zebra querying)
2100         my $result_hash;
2101         my $numbers = 0;
2102         foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2103             $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2104         }
2105
2106         # limit the $results_per_page to result size if it's more
2107         $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2108
2109         # for the requested page, replace biblionumber by the complete record
2110         # speed improvement : avoid reading too much things
2111         for (
2112             my $counter = $offset ;
2113             $counter <= $offset + $results_per_page ;
2114             $counter++
2115           )
2116         {
2117             $result_hash->{'RECORDS'}[$counter] =
2118               GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2119               if $result_hash->{'RECORDS'}[$counter];
2120         }
2121         my $finalresult = ();
2122         $result_hash->{'hits'}         = $numbers;
2123         $finalresult->{'biblioserver'} = $result_hash;
2124         return $finalresult;
2125     }
2126 }
2127
2128 =head2 ModBiblios
2129
2130 ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test);
2131
2132 this function changes all the values $initvalue in subfield $tag$subfield in any record in $listbiblios
2133 test parameter if set donot perform change to records in database.
2134
2135 =over 2
2136
2137 =item C<input arg:>
2138
2139     * $listbiblios is an array ref to marcrecords to be changed
2140     * $tagsubfield is the reference of the subfield to change.
2141     * $initvalue is the value to search the record for
2142     * $targetvalue is the value to set the subfield to
2143     * $test is to be set only not to perform changes in database.
2144
2145 =item C<Output arg:>
2146     * $countchanged counts all the changes performed.
2147     * $listunchanged contains the list of all the biblionumbers of records unchanged.
2148
2149 =item C<usage in the script:>
2150
2151 =back
2152
2153 my ($countchanged, $listunchanged) = EditBiblios($results->{RECORD}, $tagsubfield,$initvalue,$targetvalue);;
2154 #If one wants to display unchanged records, you should get biblios foreach @$listunchanged
2155 $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged);
2156
2157 =cut
2158
2159 sub ModBiblios {
2160     my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_;
2161     my $countmatched;
2162     my @unmatched;
2163     my ( $tag, $subfield ) = ( $1, $2 )
2164       if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ );
2165     if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) {
2166         $tag = $tag . $subfield;
2167         undef $subfield;
2168     }
2169     my ( $bntag,   $bnsubf )   = GetMarcFromKohaField('biblio.biblionumber','');
2170     my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber','');
2171     if ($tag eq $itemtag) {
2172         # do not allow the embedded item tag to be
2173         # edited from here
2174         warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed";
2175         return (0, []);
2176     }
2177     foreach my $usmarc (@$listbiblios) {
2178         my $record;
2179         $record = eval { MARC::Record->new_from_usmarc($usmarc) };
2180         my $biblionumber;
2181         if ($@) {
2182
2183             # usmarc is not a valid usmarc May be a biblionumber
2184             # FIXME - sorry, please let's figure out whether
2185             #         this function is to be passed a list of
2186             #         record numbers or a list of MARC::Record
2187             #         objects.  The former is probably better
2188             #         because the MARC records supplied by Zebra
2189             #         may be not current.
2190             $record       = GetMarcBiblio($usmarc);
2191             $biblionumber = $usmarc;
2192         }
2193         else {
2194             if ( $bntag >= 010 ) {
2195                 $biblionumber = $record->subfield( $bntag, $bnsubf );
2196             }
2197             else {
2198                 $biblionumber = $record->field($bntag)->data;
2199             }
2200         }
2201
2202         #GetBiblionumber is to be written.
2203         #Could be replaced by TransformMarcToKoha (But Would be longer)
2204         if ( $record->field($tag) ) {
2205             my $modify = 0;
2206             foreach my $field ( $record->field($tag) ) {
2207                 if ($subfield) {
2208                     if (
2209                         $field->delete_subfield(
2210                             'code'  => $subfield,
2211                             'match' => qr($initvalue)
2212                         )
2213                       )
2214                     {
2215                         $countmatched++;
2216                         $modify = 1;
2217                         $field->update( $subfield, $targetvalue )
2218                           if ($targetvalue);
2219                     }
2220                 }
2221                 else {
2222                     if ( $tag >= 010 ) {
2223                         if ( $field->delete_field($field) ) {
2224                             $countmatched++;
2225                             $modify = 1;
2226                         }
2227                     }
2228                     else {
2229                         $field->data = $targetvalue
2230                           if ( $field->data =~ qr($initvalue) );
2231                     }
2232                 }
2233             }
2234
2235             #       warn $record->as_formatted;
2236             if ($modify) {
2237                 ModBiblio( $record, $biblionumber,
2238                     GetFrameworkCode($biblionumber) )
2239                   unless ($test);
2240             }
2241             else {
2242                 push @unmatched, $biblionumber;
2243             }
2244         }
2245         else {
2246             push @unmatched, $biblionumber;
2247         }
2248     }
2249     return ( $countmatched, \@unmatched );
2250 }
2251
2252 END { }    # module clean-up code here (global destructor)
2253
2254 1;
2255 __END__
2256
2257 =head1 AUTHOR
2258
2259 Koha Developement team <info@koha.org>
2260
2261 =cut