3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField, GetBiblioData
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
32 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
34 # set the version for version checking
37 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
42 C4::Search - Functions for searching the Koha catalog.
46 See opac/opac-search.pl or catalogue/search.pl for example of usage
50 This module provides searching functions for Koha's bibliographic databases
66 # make all your functions, whether exported or not;
70 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
72 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
78 my $dbh = C4::Context->dbh;
79 my $result = TransformMarcToKoha( $dbh, $record, '' );
84 my ( $biblionumber, $title );
86 # search duplicate on ISBN, easy and fast..
88 if ( $result->{isbn} ) {
89 $result->{isbn} =~ s/\(.*$//;
90 $result->{isbn} =~ s/\s+$//;
91 $query = "isbn=$result->{isbn}";
94 $result->{title} =~ s /\\//g;
95 $result->{title} =~ s /\"//g;
96 $result->{title} =~ s /\(//g;
97 $result->{title} =~ s /\)//g;
99 # FIXME: instead of removing operators, could just do
100 # quotes around the value
101 $result->{title} =~ s/(and|or|not)//g;
102 $query = "ti,ext=$result->{title}";
103 $query .= " and itemtype=$result->{itemtype}"
104 if ( $result->{itemtype} );
105 if ( $result->{author} ) {
106 $result->{author} =~ s /\\//g;
107 $result->{author} =~ s /\"//g;
108 $result->{author} =~ s /\(//g;
109 $result->{author} =~ s /\)//g;
111 # remove valid operators
112 $result->{author} =~ s/(and|or|not)//g;
113 $query .= " and au,ext=$result->{author}";
117 # FIXME: add error handling
118 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
120 foreach my $possible_duplicate_record (@$searchresults) {
122 MARC::Record->new_from_usmarc($possible_duplicate_record);
123 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
125 # FIXME :: why 2 $biblionumber ?
127 push @results, $result->{'biblionumber'};
128 push @results, $result->{'title'};
136 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
138 This function provides a simple search API on the bibliographic catalog
144 * $query can be a simple keyword or a complete CCL query
145 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
146 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
147 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
152 * $error is a empty unless an error is detected
153 * \@results is an array of records.
154 * $total_hits is the number of hits that would have been returned with no limit
156 =item C<usage in the script:>
160 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
162 if (defined $error) {
163 $template->param(query_error => $error);
164 warn "error: ".$error;
165 output_html_with_http_headers $input, $cookie, $template->output;
169 my $hits = scalar @$marcresults;
172 for my $i (0..$hits) {
174 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
175 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
177 #build the hash for the template.
178 $resultsloop{title} = $biblio->{'title'};
179 $resultsloop{subtitle} = $biblio->{'subtitle'};
180 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
181 $resultsloop{author} = $biblio->{'author'};
182 $resultsloop{publishercode} = $biblio->{'publishercode'};
183 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
185 push @results, \%resultsloop;
188 $template->param(result=>\@results);
193 my ( $query, $offset, $max_results, $servers ) = @_;
195 if ( C4::Context->preference('NoZebra') ) {
196 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
199 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
200 return ( undef, $search_result, scalar($result->{hits}) );
203 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
204 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
210 return ( "No query entered", undef, undef ) unless $query;
212 # Initialize & Search Zebra
213 for ( my $i = 0 ; $i < @servers ; $i++ ) {
215 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
216 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
217 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
221 $zconns[$i]->errmsg() . " ("
222 . $zconns[$i]->errcode() . ") "
223 . $zconns[$i]->addinfo() . " "
224 . $zconns[$i]->diagset();
226 return ( $error, undef, undef ) if $zconns[$i]->errcode();
230 # caught a ZOOM::Exception
234 . $@->addinfo() . " "
237 return ( $error, undef, undef );
240 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
241 my $event = $zconns[ $i - 1 ]->last_event();
242 if ( $event == ZOOM::Event::ZEND ) {
244 my $first_record = defined( $offset ) ? $offset+1 : 1;
245 my $hits = $tmpresults[ $i - 1 ]->size();
246 $total_hits += $hits;
247 my $last_record = $hits;
248 if ( defined $max_results && $offset + $max_results < $hits ) {
249 $last_record = $offset + $max_results;
252 for my $j ( $first_record..$last_record ) {
253 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
254 push @results, $record;
259 foreach my $result (@tmpresults) {
262 foreach my $zoom_query (@zoom_queries) {
263 $zoom_query->destroy();
266 return ( undef, \@results, $total_hits );
272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
274 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
275 $results_per_page, $offset, $expanded_facet, $branches,
279 The all singing, all dancing, multi-server, asynchronous, scanning,
280 searching, record nabbing, facet-building
282 See verbse embedded documentation.
288 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
289 $results_per_page, $offset, $expanded_facet, $branches,
293 my @servers = @$servers_ref;
294 my @sort_by = @$sort_by_ref;
296 # Initialize variables for the ZOOM connection and results object
300 my $results_hashref = ();
302 # Initialize variables for the faceted results objects
303 my $facets_counter = ();
304 my $facets_info = ();
305 my $facets = getFacets();
308 ; # stores the ref to array of hashes for template facets loop
310 ### LOOP THROUGH THE SERVERS
311 for ( my $i = 0 ; $i < @servers ; $i++ ) {
312 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
314 # perform the search, create the results objects
315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
316 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
318 #$query_to_use = $simple_query if $scan;
319 warn $simple_query if ( $scan and $DEBUG );
321 # Check if we've got a query_type defined, if so, use it
324 if ($query_type =~ /^ccl/) {
325 $query_to_use =~ s/\:/\=/g; # change : to = last minute (FIXME)
326 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
327 } elsif ($query_type =~ /^cql/) {
328 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CQL($query_to_use, $zconns[$i]));
329 } elsif ($query_type =~ /^pqf/) {
330 $results[$i] = $zconns[$i]->search(new ZOOM::Query::PQF($query_to_use, $zconns[$i]));
332 warn "Unknown query_type '$query_type'. Results undetermined.";
335 $results[$i] = $zconns[$i]->scan( new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
337 $results[$i] = $zconns[$i]->search(new ZOOM::Query::CCL2RPN($query_to_use, $zconns[$i]));
341 warn "WARNING: query problem with $query_to_use " . $@;
344 # Concatenate the sort_by limits and pass them to the results object
345 # Note: sort will override rank
347 foreach my $sort (@sort_by) {
348 if ( $sort eq "author_az" ) {
349 $sort_by .= "1=1003 <i ";
351 elsif ( $sort eq "author_za" ) {
352 $sort_by .= "1=1003 >i ";
354 elsif ( $sort eq "popularity_asc" ) {
355 $sort_by .= "1=9003 <i ";
357 elsif ( $sort eq "popularity_dsc" ) {
358 $sort_by .= "1=9003 >i ";
360 elsif ( $sort eq "call_number_asc" ) {
361 $sort_by .= "1=20 <i ";
363 elsif ( $sort eq "call_number_dsc" ) {
364 $sort_by .= "1=20 >i ";
366 elsif ( $sort eq "pubdate_asc" ) {
367 $sort_by .= "1=31 <i ";
369 elsif ( $sort eq "pubdate_dsc" ) {
370 $sort_by .= "1=31 >i ";
372 elsif ( $sort eq "acqdate_asc" ) {
373 $sort_by .= "1=32 <i ";
375 elsif ( $sort eq "acqdate_dsc" ) {
376 $sort_by .= "1=32 >i ";
378 elsif ( $sort eq "title_az" ) {
379 $sort_by .= "1=4 <i ";
381 elsif ( $sort eq "title_za" ) {
382 $sort_by .= "1=4 >i ";
385 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
389 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
390 warn "WARNING sort $sort_by failed";
393 } # finished looping through servers
395 # The big moment: asynchronously retrieve results from all servers
396 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
397 my $ev = $zconns[ $i - 1 ]->last_event();
398 if ( $ev == ZOOM::Event::ZEND ) {
399 next unless $results[ $i - 1 ];
400 my $size = $results[ $i - 1 ]->size();
404 # loop through the results
405 $results_hash->{'hits'} = $size;
407 if ( $offset + $results_per_page <= $size ) {
408 $times = $offset + $results_per_page;
413 for ( my $j = $offset ; $j < $times ; $j++ ) {
418 ## Check if it's an index scan
420 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
422 # here we create a minimal MARC record and hand it off to the
423 # template just like a normal result ... perhaps not ideal, but
425 my $tmprecord = MARC::Record->new();
426 $tmprecord->encoding('UTF-8');
430 # the minimal record in author/title (depending on MARC flavour)
431 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
432 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
433 $tmprecord->append_fields($tmptitle);
435 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
436 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
437 $tmprecord->append_fields($tmptitle);
438 $tmprecord->append_fields($tmpauthor);
440 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
445 $record = $results[ $i - 1 ]->record($j)->raw();
447 # warn "RECORD $j:".$record;
448 $results_hash->{'RECORDS'}[$j] = $record;
450 # Fill the facets while we're looping, but only for the biblioserver
451 $facet_record = MARC::Record->new_from_usmarc($record)
452 if $servers[ $i - 1 ] =~ /biblioserver/;
454 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
456 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
457 ($facets->[$k]) or next;
458 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
459 for my $field (@fields) {
460 my @subfields = $field->subfields();
461 for my $subfield (@subfields) {
462 my ( $code, $data ) = @$subfield;
463 ($code eq $facets->[$k]->{'subfield'}) or next;
464 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
467 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
468 $facets->[$k]->{'label_value'};
469 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
470 $facets->[$k]->{'expanded'};
475 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
478 # warn "connection ", $i-1, ": $size hits";
479 # warn $results[$i-1]->record(0)->render() if $size > 0;
482 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
484 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
485 keys %$facets_counter )
488 my $number_of_facets;
489 my @this_facets_array;
492 $facets_counter->{$link_value}->{$b}
493 <=> $facets_counter->{$link_value}->{$a}
494 } keys %{ $facets_counter->{$link_value} }
498 if ( ( $number_of_facets < 6 )
499 || ( $expanded_facet eq $link_value )
500 || ( $facets_info->{$link_value}->{'expanded'} ) )
503 # Sanitize the link value ), ( will cause errors with CCL,
504 my $facet_link_value = $one_facet;
505 $facet_link_value =~ s/(\(|\))/ /g;
507 # fix the length that will display in the label,
508 my $facet_label_value = $one_facet;
510 substr( $one_facet, 0, 20 ) . "..."
511 unless length($facet_label_value) <= 20;
513 # if it's a branch, label by the name, not the code,
514 if ( $link_value =~ /branch/ ) {
516 $branches->{$one_facet}->{'branchname'};
519 # but we're down with the whole label being in the link's title.
520 push @this_facets_array, {
521 facet_count => $facets_counter->{$link_value}->{$one_facet},
522 facet_label_value => $facet_label_value,
523 facet_title_value => $one_facet,
524 facet_link_value => $facet_link_value,
525 type_link_value => $link_value,
530 # handle expanded option
531 unless ( $facets_info->{$link_value}->{'expanded'} ) {
533 if ( ( $number_of_facets > 6 )
534 && ( $expanded_facet ne $link_value ) );
537 type_link_value => $link_value,
538 type_id => $link_value . "_id",
539 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
540 facets => \@this_facets_array,
541 expandable => $expandable,
542 expand => $link_value,
543 } unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
548 return ( undef, $results_hashref, \@facets_loop );
553 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
554 $results_per_page, $offset, $expanded_facet, $branches,
558 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
560 $paz->search($simple_query);
564 my $results_hashref = {};
565 my $stats = XMLin($paz->stat);
566 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
568 # for a grouped search result, the number of hits
569 # is the number of groups returned; 'bib_hits' will have
570 # the total number of bibs.
571 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
572 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
574 HIT: foreach my $hit (@{ $results->{'hit'} }) {
575 my $recid = $hit->{recid}->[0];
577 my $work_title = $hit->{'md-work-title'}->[0];
579 if (exists $hit->{'md-work-author'}) {
580 $work_author = $hit->{'md-work-author'}->[0];
582 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
584 my $result_group = {};
585 $result_group->{'group_label'} = $group_label;
586 $result_group->{'group_merge_key'} = $recid;
589 if (exists $hit->{count}) {
590 $count = $hit->{count}->[0];
592 $result_group->{'group_count'} = $count;
594 for (my $i = 0; $i < $count; $i++) {
595 # FIXME -- may need to worry about diacritics here
596 my $rec = $paz->record($recid, $i);
597 push @{ $result_group->{'RECORDS'} }, $rec;
600 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
603 # pass through facets
604 my $termlist_xml = $paz->termlist('author,subject');
605 my $terms = XMLin($termlist_xml, forcearray => 1);
606 my @facets_loop = ();
607 #die Dumper($results);
608 # foreach my $list (sort keys %{ $terms->{'list'} }) {
610 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
612 # facet_label_value => $facet->{'name'}->[0],
615 # push @facets_loop, ( {
616 # type_label => $list,
617 # facets => \@facets,
621 return ( undef, $results_hashref, \@facets_loop );
625 sub _remove_stopwords {
626 my ( $operand, $index ) = @_;
627 my @stopwords_removed;
629 # phrase and exact-qualified indexes shouldn't have stopwords removed
630 if ( $index !~ m/phr|ext/ ) {
632 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
633 # we use IsAlpha unicode definition, to deal correctly with diacritics.
634 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
635 # is a stopword, we'd get "çon" and wouldn't find anything...
636 foreach ( keys %{ C4::Context->stopwords } ) {
637 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
638 if ( my ($matched) = ($operand =~
639 /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
641 $operand =~ s/\Q$matched\E/ /gi;
642 push @stopwords_removed, $_;
646 return ( $operand, \@stopwords_removed );
650 sub _detect_truncation {
651 my ( $operand, $index ) = @_;
652 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
655 my @wordlist = split( /\s/, $operand );
656 foreach my $word (@wordlist) {
657 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
658 push @rightlefttruncated, $word;
660 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
661 push @lefttruncated, $word;
663 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
664 push @righttruncated, $word;
666 elsif ( index( $word, "*" ) < 0 ) {
667 push @nontruncated, $word;
670 push @regexpr, $word;
674 \@nontruncated, \@righttruncated, \@lefttruncated,
675 \@rightlefttruncated, \@regexpr
680 sub _build_stemmed_operand {
684 # If operand contains a digit, it is almost certainly an identifier, and should
685 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
686 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
687 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
688 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
689 return $operand if $operand =~ /\d/;
691 # FIXME: the locale should be set based on the user's language and/or search choice
692 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
694 # FIXME: these should be stored in the db so the librarian can modify the behavior
695 $stemmer->add_exceptions(
702 my @words = split( / /, $operand );
703 my $stems = $stemmer->stem(@words);
704 for my $stem (@$stems) {
705 $stemmed_operand .= "$stem";
706 $stemmed_operand .= "?"
707 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
708 $stemmed_operand .= " ";
710 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
711 return $stemmed_operand;
715 sub _build_weighted_query {
717 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
718 # pretty well but could work much better if we had a smarter query parser
719 my ( $operand, $stemmed_operand, $index ) = @_;
720 my $stemming = C4::Context->preference("QueryStemming") || 0;
721 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
722 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
724 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
726 # Keyword, or, no index specified
727 if ( ( $index eq 'kw' ) || ( !$index ) ) {
729 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
730 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
731 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
732 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
733 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
734 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
735 if $fuzzy_enabled; # add fuzzy, word list
736 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
737 if ( $stemming and $stemmed_operand )
738 ; # add stemming, right truncation
739 $weighted_query .= " or wrdl,r9=\"$operand\"";
741 # embedded sorting: 0 a-z; 1 z-a
742 # $weighted_query .= ") or (sort1,aut=1";
745 # Barcode searches should skip this process
746 elsif ( $index eq 'bc' ) {
747 $weighted_query .= "bc=\"$operand\"";
750 # Authority-number searches should skip this process
751 elsif ( $index eq 'an' ) {
752 $weighted_query .= "an=\"$operand\"";
755 # If the index already has more than one qualifier, wrap the operand
756 # in quotes and pass it back (assumption is that the user knows what they
757 # are doing and won't appreciate us mucking up their query
758 elsif ( $index =~ ',' ) {
759 $weighted_query .= " $index=\"$operand\"";
762 #TODO: build better cases based on specific search indexes
764 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
765 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
766 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
768 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
771 $weighted_query .= "))"; # close rank specification
772 return $weighted_query;
778 $simple_query, $query_cgi,
780 $limit_cgi, $limit_desc,
781 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
783 Build queries and limits in CCL, CGI, Human,
784 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
786 See verbose embedded documentation.
792 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
794 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
797 my @operators = $operators ? @$operators : ();
798 my @indexes = $indexes ? @$indexes : ();
799 my @operands = $operands ? @$operands : ();
800 my @limits = $limits ? @$limits : ();
801 my @sort_by = $sort_by ? @$sort_by : ();
803 my $stemming = C4::Context->preference("QueryStemming") || 0;
804 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
805 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
806 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
807 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
809 # no stemming/weight/fuzzy in NoZebra
810 if ( C4::Context->preference("NoZebra") ) {
816 my $query = $operands[0];
817 my $simple_query = $operands[0];
819 # initialize the variables we're passing back
828 my $stopwords_removed; # flag to determine if stopwords have been removed
830 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
832 if ( $query =~ /^ccl=/ ) {
833 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
835 if ( $query =~ /^cql=/ ) {
836 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
838 if ( $query =~ /^pqf=/ ) {
839 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
842 # pass nested queries directly
843 # FIXME: need better handling of some of these variables in this case
844 if ( $query =~ /(\(|\))/ ) {
846 undef, $query, $simple_query, $query_cgi,
847 $query, $limit, $limit_cgi, $limit_desc,
848 $stopwords_removed, 'ccl'
852 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
853 # query operands and indexes and add stemming, truncation, field weighting, etc.
854 # Once we do so, we'll end up with a value in $query, just like if we had an
855 # incoming $query from the user
858 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
860 ; # a flag used to keep track if there was a previous query
861 # if there was, we can apply the current operator
863 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
865 # COMBINE OPERANDS, INDEXES AND OPERATORS
866 if ( $operands[$i] ) {
868 # A flag to determine whether or not to add the index to the query
871 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
872 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
875 $remove_stopwords = 0;
877 my $operand = $operands[$i];
878 my $index = $indexes[$i];
880 # Add index-specific attributes
881 # Date of Publication
882 if ( $index eq 'yr' ) {
883 $index .= ",st-numeric";
885 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
888 # Date of Acquisition
889 elsif ( $index eq 'acqdate' ) {
890 $index .= ",st-date-normalized";
892 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
894 # ISBN,ISSN,Standard Number, don't need special treatment
895 elsif ( $index eq 'nb' || $index eq 'ns' ) {
898 $stemming, $auto_truncation,
899 $weight_fields, $fuzzy_enabled,
901 ) = ( 0, 0, 0, 0, 0 );
904 # Set default structure attribute (word list)
906 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
907 $struct_attr = ",wrdl";
910 # Some helpful index variants
911 my $index_plus = $index . $struct_attr . ":" if $index;
912 my $index_plus_comma = $index . $struct_attr . "," if $index;
915 if ($remove_stopwords) {
916 ( $operand, $stopwords_removed ) =
917 _remove_stopwords( $operand, $index );
918 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
919 warn "REMOVED STOPWORDS: @$stopwords_removed"
920 if ( $stopwords_removed && $DEBUG );
923 if ($auto_truncation){
924 #FIXME only valid with LTR scripts
925 $operand=join(" ",map{
927 }split (/\s+/,$operand));
928 warn $operand if $DEBUG;
932 my $truncated_operand;
933 my( $nontruncated, $righttruncated, $lefttruncated,
934 $rightlefttruncated, $regexpr
935 ) = _detect_truncation( $operand, $index );
937 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
942 scalar(@$righttruncated) + scalar(@$lefttruncated) +
943 scalar(@$rightlefttruncated) > 0 )
946 # Don't field weight or add the index to the query, we do it here
948 undef $weight_fields;
949 my $previous_truncation_operand;
950 if (scalar @$nontruncated) {
951 $truncated_operand .= "$index_plus @$nontruncated ";
952 $previous_truncation_operand = 1;
954 if (scalar @$righttruncated) {
955 $truncated_operand .= "and " if $previous_truncation_operand;
956 $truncated_operand .= $index_plus_comma . "rtrn:@$righttruncated ";
957 $previous_truncation_operand = 1;
959 if (scalar @$lefttruncated) {
960 $truncated_operand .= "and " if $previous_truncation_operand;
961 $truncated_operand .= $index_plus_comma . "ltrn:@$lefttruncated ";
962 $previous_truncation_operand = 1;
964 if (scalar @$rightlefttruncated) {
965 $truncated_operand .= "and " if $previous_truncation_operand;
966 $truncated_operand .= $index_plus_comma . "rltrn:@$rightlefttruncated ";
967 $previous_truncation_operand = 1;
970 $operand = $truncated_operand if $truncated_operand;
971 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
975 $stemmed_operand = _build_stemmed_operand($operand) if $stemming;
977 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
979 # Handle Field Weighting
980 my $weighted_operand;
981 if ($weight_fields) {
982 $weighted_operand = _build_weighted_query( $operand, $stemmed_operand, $index );
983 $operand = $weighted_operand;
987 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
989 # If there's a previous operand, we need to add an operator
990 if ($previous_operand) {
992 # User-specified operator
993 if ( $operators[ $i - 1 ] ) {
994 $query .= " $operators[$i-1] ";
995 $query .= " $index_plus " unless $indexes_set;
996 $query .= " $operand";
997 $query_cgi .= "&op=$operators[$i-1]";
998 $query_cgi .= "&idx=$index" if $index;
999 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1001 " $operators[$i-1] $index_plus $operands[$i]";
1004 # Default operator is and
1007 $query .= "$index_plus " unless $indexes_set;
1008 $query .= "$operand";
1009 $query_cgi .= "&op=and&idx=$index" if $index;
1010 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1011 $query_desc .= " and $index_plus $operands[$i]";
1015 # There isn't a pervious operand, don't need an operator
1018 # Field-weighted queries already have indexes set
1019 $query .= " $index_plus " unless $indexes_set;
1021 $query_desc .= " $index_plus $operands[$i]";
1022 $query_cgi .= "&idx=$index" if $index;
1023 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1024 $previous_operand = 1;
1029 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1032 my $group_OR_limits;
1033 my $availability_limit;
1034 foreach my $this_limit (@limits) {
1035 if ( $this_limit =~ /available/ ) {
1037 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1039 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1040 $availability_limit .=
1041 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1042 $limit_cgi .= "&limit=available";
1046 # group_OR_limits, prefixed by mc-
1047 # OR every member of the group
1048 elsif ( $this_limit =~ /mc/ ) {
1049 $group_OR_limits .= " or " if $group_OR_limits;
1050 $limit_desc .= " or " if $group_OR_limits;
1051 $group_OR_limits .= "$this_limit";
1052 $limit_cgi .= "&limit=$this_limit";
1053 $limit_desc .= " $this_limit";
1056 # Regular old limits
1058 $limit .= " and " if $limit || $query;
1059 $limit .= "$this_limit";
1060 $limit_cgi .= "&limit=$this_limit";
1061 if ($this_limit =~ /^branch:(.+)/) {
1062 my $branchcode = $1;
1063 my $branchname = GetBranchName($branchcode);
1064 if (defined $branchname) {
1065 $limit_desc .= " branch:$branchname";
1067 $limit_desc .= " $this_limit";
1070 $limit_desc .= " $this_limit";
1074 if ($group_OR_limits) {
1075 $limit .= " and " if ( $query || $limit );
1076 $limit .= "($group_OR_limits)";
1078 if ($availability_limit) {
1079 $limit .= " and " if ( $query || $limit );
1080 $limit .= "($availability_limit)";
1083 # Normalize the query and limit strings
1086 for ( $query, $query_desc, $limit, $limit_desc ) {
1087 s/ / /g; # remove extra spaces
1088 s/^ //g; # remove any beginning spaces
1089 s/ $//g; # remove any ending spaces
1090 s/==/=/g; # remove double == from query
1092 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1094 for ($query_cgi,$simple_query) {
1097 # append the limit to the query
1098 $query .= " " . $limit;
1102 warn "QUERY:" . $query;
1103 warn "QUERY CGI:" . $query_cgi;
1104 warn "QUERY DESC:" . $query_desc;
1105 warn "LIMIT:" . $limit;
1106 warn "LIMIT CGI:" . $limit_cgi;
1107 warn "LIMIT DESC:" . $limit_desc;
1108 warn "---------\nLeave buildQuery\n---------";
1111 undef, $query, $simple_query, $query_cgi,
1112 $query_desc, $limit, $limit_cgi, $limit_desc,
1113 $stopwords_removed, $query_type
1117 =head2 searchResults
1119 Format results in a form suitable for passing to the template
1123 # IMO this subroutine is pretty messy still -- it's responsible for
1124 # building the HTML output for the template
1126 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1127 my $dbh = C4::Context->dbh;
1130 #Build branchnames hash
1132 #get branch information.....
1134 my $bsth =$dbh->prepare("SELECT branchcode,branchname FROM branches"); # FIXME : use C4::Branch::GetBranches
1136 while ( my $bdata = $bsth->fetchrow_hashref ) {
1137 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1139 # FIXME - We build an authorised values hash here, using the default framework
1140 # though it is possible to have different authvals for different fws.
1142 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1144 # get notforloan authorised value list (see $shelflocations FIXME)
1145 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1147 #Build itemtype hash
1148 #find itemtype & itemtype image
1152 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1155 while ( my $bdata = $bsth->fetchrow_hashref ) {
1156 foreach (qw(description imageurl summary notforloan)) {
1157 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1161 #search item field code
1164 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1167 my ($itemtag) = $sth->fetchrow;
1169 ## find column names of items related to MARC
1170 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1172 my %subfieldstosearch;
1173 while ( ( my $column ) = $sth2->fetchrow ) {
1174 my ( $tagfield, $tagsubfield ) =
1175 &GetMarcFromKohaField( "items." . $column, "" );
1176 $subfieldstosearch{$column} = $tagsubfield;
1179 # handle which records to actually retrieve
1181 if ( $hits && $offset + $results_per_page <= $hits ) {
1182 $times = $offset + $results_per_page;
1185 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1187 my $marcflavour = C4::Context->preference("marcflavour");
1188 # loop through all of the records we've retrieved
1189 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1190 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1191 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1192 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1193 $oldbiblio->{result_number} = $i + 1;
1195 # add imageurl to itemtype if there is one
1196 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1198 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1199 (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1201 $oldbiblio->{amazonisbn} = $aisbn;
1202 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1203 $oldbiblio->{normalized_upc} = GetNormalizedUPC($marcrecord,$marcflavour);
1204 $oldbiblio->{normalized_ean} = GetNormalizedEAN($marcrecord,$marcflavour);
1205 $oldbiblio->{normalized_oclc} = GetNormalizedOCLCNumber($marcrecord,$marcflavour);
1206 $oldbiblio->{normalized_isbn} = GetNormalizedISBN(undef,$marcrecord,$marcflavour);
1207 $oldbiblio->{content_identifier_exists} = 1 if ($oldbiblio->{normalized_isbn} or $oldbiblio->{normalized_oclc} or $oldbiblio->{normalized_ean} or $oldbiblio->{normalized_upc});
1208 $oldbiblio->{edition} = $oldbiblio->{editionstatement};
1209 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1210 # Build summary if there is one (the summary is defined in the itemtypes table)
1211 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1212 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1213 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1214 my @fields = $marcrecord->fields();
1215 foreach my $field (@fields) {
1216 my $tag = $field->tag();
1217 my $tagvalue = $field->as_string();
1218 if (! utf8::is_utf8($tagvalue)) {
1219 utf8::decode($tagvalue);
1223 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1224 unless ( $tag < 10 ) {
1225 my @subf = $field->subfields;
1226 for my $i ( 0 .. $#subf ) {
1227 my $subfieldcode = $subf[$i][0];
1228 my $subfieldvalue = $subf[$i][1];
1229 if (! utf8::is_utf8($subfieldvalue)) {
1230 utf8::decode($subfieldvalue);
1232 my $tagsubf = $tag . $subfieldcode;
1234 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1239 $summary =~ s/\[(.*?)]//g;
1240 $summary =~ s/\n/<br\/>/g;
1241 $oldbiblio->{summary} = $summary;
1244 # Pull out the items fields
1245 my @fields = $marcrecord->field($itemtag);
1247 # Setting item statuses for display
1248 my @available_items_loop;
1249 my @onloan_items_loop;
1250 my @notforloan_items_loop;
1251 my @other_items_loop;
1253 my $available_items;
1255 my $notforloan_items;
1258 my $ordered_count = 0;
1259 my $available_count = 0;
1260 my $onloan_count = 0;
1261 my $notforloan_count = 0;
1262 my $longoverdue_count = 0;
1263 my $other_count = 0;
1264 my $wthdrawn_count = 0;
1265 my $itemlost_count = 0;
1266 my $itembinding_count = 0;
1267 my $itemdamaged_count = 0;
1268 my $item_in_transit_count = 0;
1269 my $can_place_holds = 0;
1270 my $items_count = scalar(@fields);
1272 ( C4::Context->preference('maxItemsinSearchResults') )
1273 ? C4::Context->preference('maxItemsinSearchResults') - 1
1276 # loop through every item
1277 foreach my $field (@fields) {
1280 # populate the items hash
1281 foreach my $code ( keys %subfieldstosearch ) {
1282 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1284 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1285 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1286 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1287 if ($item->{$hbranch}) {
1288 $item->{'branchname'} = $branches{$item->{$hbranch}};
1290 elsif ($item->{$otherbranch}) { # Last resort
1291 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1294 ($item->{'reserved'}) = C4::Reserves::CheckReserves($item->{itemnumber});
1296 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1297 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1298 if ( $item->{onloan} or $item->{reserved} ) {
1300 my $key = $prefix . $item->{onloan} . $item->{barcode};
1301 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1302 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1303 $onloan_items->{$key}->{branchname} = $item->{branchname};
1304 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1305 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1306 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1307 $onloan_items->{$key}->{barcode} = $item->{barcode};
1308 # if something's checked out and lost, mark it as 'long overdue'
1309 if ( $item->{itemlost} ) {
1310 $onloan_items->{$prefix}->{longoverdue}++;
1311 $longoverdue_count++;
1312 } else { # can place holds as long as item isn't lost
1313 $can_place_holds = 1;
1317 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1321 if ( $item->{notforloan} == -1 ) {
1325 # is item in transit?
1326 my $transfertwhen = '';
1327 my ($transfertfrom, $transfertto);
1329 unless ($item->{wthdrawn}
1330 || $item->{itemlost}
1332 || $item->{notforloan}
1333 || $items_count > 20) {
1335 # A couple heuristics to limit how many times
1336 # we query the database for item transfer information, sacrificing
1337 # accuracy in some cases for speed;
1339 # 1. don't query if item has one of the other statuses
1340 # 2. don't check transit status if the bib has
1341 # more than 20 items
1343 # FIXME: to avoid having the query the database like this, and to make
1344 # the in transit status count as unavailable for search limiting,
1345 # should map transit status to record indexed in Zebra.
1347 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1350 # item is withdrawn, lost or damaged
1351 if ( $item->{wthdrawn}
1352 || $item->{itemlost}
1354 || $item->{notforloan}
1355 || $item->{reserved}
1356 || ($transfertwhen ne ''))
1358 $wthdrawn_count++ if $item->{wthdrawn};
1359 $itemlost_count++ if $item->{itemlost};
1360 $itemdamaged_count++ if $item->{damaged};
1361 $item_in_transit_count++ if $transfertwhen ne '';
1362 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1364 my $key = $prefix . $item->{status};
1366 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1367 if($item->{notforloan} == 1){
1368 $notforloan_items->{$key}->{$_} = $item->{$_};
1370 $other_items->{$key}->{$_} = $item->{$_};
1373 if($item->{notforloan} == 1){
1374 $notforloan_count++;
1376 $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1377 $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1378 $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1379 $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1380 $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1381 $notforloan_items->{$key}->{barcode} = $item->{barcode};
1385 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1386 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1387 $other_items->{$key}->{count}++ if $item->{$hbranch};
1388 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1389 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1390 $other_items->{$key}->{barcode} = $item->{barcode};
1396 $can_place_holds = 1;
1398 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1399 foreach (qw(branchname itemcallnumber barcode)) {
1400 $available_items->{$prefix}->{$_} = $item->{$_};
1402 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1403 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1406 } # notforloan, item level and biblioitem level
1407 my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1409 ( C4::Context->preference('maxItemsinSearchResults') )
1410 ? C4::Context->preference('maxItemsinSearchResults') - 1
1412 for my $key ( sort keys %$onloan_items ) {
1413 (++$onloanitemscount > $maxitems) and last;
1414 push @onloan_items_loop, $onloan_items->{$key};
1416 for my $key ( sort keys %$other_items ) {
1417 (++$otheritemscount > $maxitems) and last;
1418 push @other_items_loop, $other_items->{$key};
1420 for my $key ( sort keys %$notforloan_items ) {
1421 (++$notforloanitemscount > $maxitems) and last;
1422 push @notforloan_items_loop, $notforloan_items->{$key};
1424 for my $key ( sort keys %$available_items ) {
1425 (++$availableitemscount > $maxitems) and last;
1426 push @available_items_loop, $available_items->{$key}
1429 # XSLT processing of some stuff
1430 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1431 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1432 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1435 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1436 $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1437 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1438 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1439 $oldbiblio->{items_count} = $items_count;
1440 $oldbiblio->{available_items_loop} = \@available_items_loop;
1441 $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1442 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1443 $oldbiblio->{other_items_loop} = \@other_items_loop;
1444 $oldbiblio->{availablecount} = $available_count;
1445 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1446 $oldbiblio->{onloancount} = $onloan_count;
1447 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1448 $oldbiblio->{notforloancount} = $notforloan_count;
1449 $oldbiblio->{othercount} = $other_count;
1450 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1451 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1452 $oldbiblio->{itemlostcount} = $itemlost_count;
1453 $oldbiblio->{damagedcount} = $itemdamaged_count;
1454 $oldbiblio->{intransitcount} = $item_in_transit_count;
1455 $oldbiblio->{orderedcount} = $ordered_count;
1456 $oldbiblio->{isbn} =~
1457 s/-//g; # deleting - in isbn to enable amazon content
1458 push( @newresults, $oldbiblio );
1463 #----------------------------------------------------------------------
1465 # Non-Zebra GetRecords#
1466 #----------------------------------------------------------------------
1470 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1476 $query, $simple_query, $sort_by_ref, $servers_ref,
1477 $results_per_page, $offset, $expanded_facet, $branches,
1480 warn "query =$query" if $DEBUG;
1481 my $result = NZanalyse($query);
1482 warn "results =$result" if $DEBUG;
1484 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1490 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1491 the list is built from an inverted index in the nozebra SQL table
1492 note that title is here only for convenience : the sorting will be very fast when requested on title
1493 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1498 my ( $string, $server ) = @_;
1499 # warn "---------" if $DEBUG;
1500 warn " NZanalyse" if $DEBUG;
1501 # warn "---------" if $DEBUG;
1503 # $server contains biblioserver or authorities, depending on what we search on.
1504 #warn "querying : $string on $server";
1505 $server = 'biblioserver' unless $server;
1507 # if we have a ", replace the content to discard temporarily any and/or/not inside
1509 if ( $string =~ /"/ ) {
1510 $string =~ s/"(.*?)"/__X__/;
1512 warn "commacontent : $commacontent" if $DEBUG;
1515 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1516 # then, call again NZanalyse with $left and $right
1517 # (recursive until we find a leaf (=> something without and/or/not)
1518 # delete repeated operator... Would then go in infinite loop
1519 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1522 #process parenthesis before.
1523 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1526 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1528 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1530 my $leftresult = NZanalyse( $left, $server );
1532 my $rightresult = NZanalyse( $right, $server );
1534 # OK, we have the results for right and left part of the query
1535 # depending of operand, intersect, union or exclude both lists
1536 # to get a result list
1537 if ( $operator eq ' and ' ) {
1538 return NZoperatorAND($leftresult,$rightresult);
1540 elsif ( $operator eq ' or ' ) {
1542 # just merge the 2 strings
1543 return $leftresult . $rightresult;
1545 elsif ( $operator eq ' not ' ) {
1546 return NZoperatorNOT($leftresult,$rightresult);
1550 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1554 warn "string :" . $string if $DEBUG;
1558 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1561 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1563 warn "no parenthesis. left : $left operator: $operator right: $right"
1566 # it's not a leaf, we have a and/or/not
1569 # reintroduce comma content if needed
1570 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1571 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1572 warn "node : $left / $operator / $right\n" if $DEBUG;
1573 my $leftresult = NZanalyse( $left, $server );
1574 my $rightresult = NZanalyse( $right, $server );
1575 warn " leftresult : $leftresult" if $DEBUG;
1576 warn " rightresult : $rightresult" if $DEBUG;
1577 # OK, we have the results for right and left part of the query
1578 # depending of operand, intersect, union or exclude both lists
1579 # to get a result list
1580 if ( $operator eq ' and ' ) {
1582 return NZoperatorAND($leftresult,$rightresult);
1584 elsif ( $operator eq ' or ' ) {
1586 # just merge the 2 strings
1587 return $leftresult . $rightresult;
1589 elsif ( $operator eq ' not ' ) {
1590 return NZoperatorNOT($leftresult,$rightresult);
1594 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1595 die "error : operand unknown : $operator for $string";
1598 # it's a leaf, do the real SQL query and return the result
1601 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1602 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1603 #remove trailing blank at the beginning
1605 warn "leaf:$string" if $DEBUG;
1607 # parse the string in in operator/operand/value again
1611 if ($string =~ /(.*)(>=|<=)(.*)/) {
1618 # warn "handling leaf... left:$left operator:$operator right:$right"
1620 unless ($operator) {
1621 if ($string =~ /(.*)(>|<|=)(.*)/) {
1626 "handling unless (operator)... left:$left operator:$operator right:$right"
1634 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1637 # automatic replace for short operators
1638 $left = 'title' if $left =~ '^ti$';
1639 $left = 'author' if $left =~ '^au$';
1640 $left = 'publisher' if $left =~ '^pb$';
1641 $left = 'subject' if $left =~ '^su$';
1642 $left = 'koha-Auth-Number' if $left =~ '^an$';
1643 $left = 'keyword' if $left =~ '^kw$';
1644 $left = 'itemtype' if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1645 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1646 my $dbh = C4::Context->dbh;
1647 if ( $operator && $left ne 'keyword' ) {
1648 #do a specific search
1649 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1650 my $sth = $dbh->prepare(
1651 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1653 warn "$left / $operator / $right\n" if $DEBUG;
1655 # split each word, query the DB and build the biblionumbers result
1656 #sanitizing leftpart
1657 $left =~ s/^\s+|\s+$//;
1658 foreach ( split / /, $right ) {
1660 $_ =~ s/^\s+|\s+$//;
1662 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1663 $sth->execute( $server, $left, $_ )
1664 or warn "execute failed: $!";
1665 while ( my ( $line, $value ) = $sth->fetchrow ) {
1667 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1668 # otherwise, fill the result
1669 $biblionumbers .= $line
1670 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1671 warn "result : $value "
1672 . ( $right =~ /\d/ ) . "=="
1673 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1676 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1678 warn "NZAND" if $DEBUG;
1679 $results = NZoperatorAND($biblionumbers,$results);
1681 $results = $biblionumbers;
1686 #do a complete search (all indexes), if index='kw' do complete search too.
1687 my $sth = $dbh->prepare(
1688 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1691 # split each word, query the DB and build the biblionumbers result
1692 foreach ( split / /, $string ) {
1693 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1694 warn "search on all indexes on $_" if $DEBUG;
1697 $sth->execute( $server, $_ );
1698 while ( my $line = $sth->fetchrow ) {
1699 $biblionumbers .= $line;
1702 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1704 $results = NZoperatorAND($biblionumbers,$results);
1707 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1708 $results = $biblionumbers;
1712 warn "return : $results for LEAF : $string" if $DEBUG;
1715 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1719 my ($rightresult, $leftresult)=@_;
1721 my @leftresult = split /;/, $leftresult;
1722 warn " @leftresult / $rightresult \n" if $DEBUG;
1724 # my @rightresult = split /;/,$leftresult;
1727 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1728 # the result is stored twice, to have the same weight for AND than OR.
1729 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1730 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1731 foreach (@leftresult) {
1734 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1735 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1736 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1738 "$value-$countvalue;$value-$countvalue;";
1741 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1742 return $finalresult;
1746 my ($rightresult, $leftresult)=@_;
1747 return $rightresult.$leftresult;
1751 my ($leftresult, $rightresult)=@_;
1753 my @leftresult = split /;/, $leftresult;
1755 # my @rightresult = split /;/,$leftresult;
1757 foreach (@leftresult) {
1759 $value=$1 if $value=~m/(.*)-\d+$/;
1760 unless ($rightresult =~ "$value-") {
1761 $finalresult .= "$_;";
1764 return $finalresult;
1769 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1776 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1777 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1779 # order title asc by default
1780 # $ordering = '1=36 <i' unless $ordering;
1781 $results_per_page = 20 unless $results_per_page;
1782 $offset = 0 unless $offset;
1783 my $dbh = C4::Context->dbh;
1786 # order by POPULARITY
1788 if ( $ordering =~ /popularity/ ) {
1792 # popularity is not in MARC record, it's builded from a specific query
1794 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1795 foreach ( split /;/, $biblionumbers ) {
1796 my ( $biblionumber, $title ) = split /,/, $_;
1797 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1798 $sth->execute($biblionumber);
1799 my $popularity = $sth->fetchrow || 0;
1801 # hint : the key is popularity.title because we can have
1802 # many results with the same popularity. In this case, sub-ordering is done by title
1803 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1804 # (un-frequent, I agree, but we won't forget anything that way ;-)
1805 $popularity{ sprintf( "%10d", $popularity ) . $title
1806 . $biblionumber } = $biblionumber;
1809 # sort the hash and return the same structure as GetRecords (Zebra querying)
1812 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1813 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1814 $result_hash->{'RECORDS'}[ $numbers++ ] =
1815 $result{ $popularity{$key} }->as_usmarc();
1818 else { # sort popularity ASC
1819 foreach my $key ( sort ( keys %popularity ) ) {
1820 $result_hash->{'RECORDS'}[ $numbers++ ] =
1821 $result{ $popularity{$key} }->as_usmarc();
1824 my $finalresult = ();
1825 $result_hash->{'hits'} = $numbers;
1826 $finalresult->{'biblioserver'} = $result_hash;
1827 return $finalresult;
1833 elsif ( $ordering =~ /author/ ) {
1835 foreach ( split /;/, $biblionumbers ) {
1836 my ( $biblionumber, $title ) = split /,/, $_;
1837 my $record = GetMarcBiblio($biblionumber);
1839 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1840 $author = $record->subfield( '200', 'f' );
1841 $author = $record->subfield( '700', 'a' ) unless $author;
1844 $author = $record->subfield( '100', 'a' );
1847 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1848 # and we don't want to get only 1 result for each of them !!!
1849 $result{ $author . $biblionumber } = $record;
1852 # sort the hash and return the same structure as GetRecords (Zebra querying)
1855 if ( $ordering eq 'author_za' ) { # sort by author desc
1856 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1857 $result_hash->{'RECORDS'}[ $numbers++ ] =
1858 $result{$key}->as_usmarc();
1861 else { # sort by author ASC
1862 foreach my $key ( sort ( keys %result ) ) {
1863 $result_hash->{'RECORDS'}[ $numbers++ ] =
1864 $result{$key}->as_usmarc();
1867 my $finalresult = ();
1868 $result_hash->{'hits'} = $numbers;
1869 $finalresult->{'biblioserver'} = $result_hash;
1870 return $finalresult;
1873 # ORDER BY callnumber
1876 elsif ( $ordering =~ /callnumber/ ) {
1878 foreach ( split /;/, $biblionumbers ) {
1879 my ( $biblionumber, $title ) = split /,/, $_;
1880 my $record = GetMarcBiblio($biblionumber);
1882 my ( $callnumber_tag, $callnumber_subfield ) =
1883 GetMarcFromKohaField( 'items.itemcallnumber','' );
1884 ( $callnumber_tag, $callnumber_subfield ) =
1885 GetMarcFromKohaField('biblioitems.callnumber','')
1886 unless $callnumber_tag;
1887 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1888 $callnumber = $record->subfield( '200', 'f' );
1891 $callnumber = $record->subfield( '100', 'a' );
1894 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1895 # and we don't want to get only 1 result for each of them !!!
1896 $result{ $callnumber . $biblionumber } = $record;
1899 # sort the hash and return the same structure as GetRecords (Zebra querying)
1902 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1903 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1904 $result_hash->{'RECORDS'}[ $numbers++ ] =
1905 $result{$key}->as_usmarc();
1908 else { # sort by title ASC
1909 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1910 $result_hash->{'RECORDS'}[ $numbers++ ] =
1911 $result{$key}->as_usmarc();
1914 my $finalresult = ();
1915 $result_hash->{'hits'} = $numbers;
1916 $finalresult->{'biblioserver'} = $result_hash;
1917 return $finalresult;
1919 elsif ( $ordering =~ /pubdate/ ) { #pub year
1921 foreach ( split /;/, $biblionumbers ) {
1922 my ( $biblionumber, $title ) = split /,/, $_;
1923 my $record = GetMarcBiblio($biblionumber);
1924 my ( $publicationyear_tag, $publicationyear_subfield ) =
1925 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
1926 my $publicationyear =
1927 $record->subfield( $publicationyear_tag,
1928 $publicationyear_subfield );
1930 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1931 # and we don't want to get only 1 result for each of them !!!
1932 $result{ $publicationyear . $biblionumber } = $record;
1935 # sort the hash and return the same structure as GetRecords (Zebra querying)
1938 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
1939 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1940 $result_hash->{'RECORDS'}[ $numbers++ ] =
1941 $result{$key}->as_usmarc();
1944 else { # sort by pub year ASC
1945 foreach my $key ( sort ( keys %result ) ) {
1946 $result_hash->{'RECORDS'}[ $numbers++ ] =
1947 $result{$key}->as_usmarc();
1950 my $finalresult = ();
1951 $result_hash->{'hits'} = $numbers;
1952 $finalresult->{'biblioserver'} = $result_hash;
1953 return $finalresult;
1959 elsif ( $ordering =~ /title/ ) {
1961 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
1963 foreach ( split /;/, $biblionumbers ) {
1964 my ( $biblionumber, $title ) = split /,/, $_;
1966 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1967 # and we don't want to get only 1 result for each of them !!!
1968 # hint & speed improvement : we can order without reading the record
1969 # so order, and read records only for the requested page !
1970 $result{ $title . $biblionumber } = $biblionumber;
1973 # sort the hash and return the same structure as GetRecords (Zebra querying)
1976 if ( $ordering eq 'title_az' ) { # sort by title desc
1977 foreach my $key ( sort ( keys %result ) ) {
1978 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1981 else { # sort by title ASC
1982 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1983 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
1987 # limit the $results_per_page to result size if it's more
1988 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
1990 # for the requested page, replace biblionumber by the complete record
1991 # speed improvement : avoid reading too much things
1993 my $counter = $offset ;
1994 $counter <= $offset + $results_per_page ;
1998 $result_hash->{'RECORDS'}[$counter] =
1999 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2001 my $finalresult = ();
2002 $result_hash->{'hits'} = $numbers;
2003 $finalresult->{'biblioserver'} = $result_hash;
2004 return $finalresult;
2011 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2014 foreach ( split /;/, $biblionumbers ) {
2015 my ( $biblionumber, $title ) = split /,/, $_;
2016 $title =~ /(.*)-(\d)/;
2021 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2022 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2023 # biblio N has ranking = 6
2024 $count_ranking{$biblionumber} += $ranking;
2027 # build the result by "inverting" the count_ranking hash
2028 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2030 foreach ( keys %count_ranking ) {
2031 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2034 # sort the hash and return the same structure as GetRecords (Zebra querying)
2037 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2038 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2041 # limit the $results_per_page to result size if it's more
2042 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2044 # for the requested page, replace biblionumber by the complete record
2045 # speed improvement : avoid reading too much things
2047 my $counter = $offset ;
2048 $counter <= $offset + $results_per_page ;
2052 $result_hash->{'RECORDS'}[$counter] =
2053 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2054 if $result_hash->{'RECORDS'}[$counter];
2056 my $finalresult = ();
2057 $result_hash->{'hits'} = $numbers;
2058 $finalresult->{'biblioserver'} = $result_hash;
2059 return $finalresult;
2063 =head2 enabled_staff_search_views
2065 %hash = enabled_staff_search_views()
2067 This function returns a hash that contains three flags obtained from the system
2068 preferences, used to determine whether a particular staff search results view
2073 =item C<Output arg:>
2075 * $hash{can_view_MARC} is true only if the MARC view is enabled
2076 * $hash{can_view_ISBD} is true only if the ISBD view is enabled
2077 * $hash{can_view_labeledMARC} is true only if the Labeled MARC view is enabled
2079 =item C<usage in the script:>
2083 $template->param ( C4::Search::enabled_staff_search_views );
2087 sub enabled_staff_search_views
2090 can_view_MARC => C4::Context->preference('viewMARC'), # 1 if the staff search allows the MARC view
2091 can_view_ISBD => C4::Context->preference('viewISBD'), # 1 if the staff search allows the ISBD view
2092 can_view_labeledMARC => C4::Context->preference('viewLabeledMARC'), # 1 if the staff search allows the Labeled MARC view
2097 =head2 z3950_search_args
2099 $arrayref = z3950_search_args($matchpoints)
2101 This function returns an array reference that contains the search parameters to be
2102 passed to the Z39.50 search script (z3950_search.pl). The array elements
2103 are hash refs whose keys are name, value and encvalue, and whose values are the
2104 name of a search parameter, the value of that search parameter and the URL encoded
2105 value of that parameter.
2107 The search parameter names are lccn, isbn, issn, title, author, dewey and subject.
2109 The search parameter values are obtained from the bibliographic record whose
2110 data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioData().
2112 If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g.
2113 a general purpose search argument. In this case, the returned array contains only
2114 entry: the key is 'title' and the value and encvalue are derived from $matchpoints.
2116 If a search parameter value is undefined or empty, it is not included in the returned
2119 The returned array reference may be passed directly to the template parameters.
2123 =item C<Output arg:>
2125 * $array containing hash refs as described above
2127 =item C<usage in the script:>
2131 $data = Biblio::GetBiblioData($bibno);
2132 $template->param ( MYLOOP => C4::Search::z3950_search_args($data) )
2136 $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) )
2140 sub z3950_search_args {
2142 $bibrec = { title => $bibrec } if !ref $bibrec;
2144 for my $field (qw/ lccn isbn issn title author dewey subject /)
2146 my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field});
2147 push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field};
2153 END { } # module clean-up code here (global destructor)
2160 Koha Developement team <info@koha.org>