3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
33 # set the version for version checking
36 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
41 C4::Search - Functions for searching the Koha catalog.
45 See opac/opac-search.pl or catalogue/search.pl for example of usage
49 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
69 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
71 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
77 my $dbh = C4::Context->dbh;
78 my $result = TransformMarcToKoha( $dbh, $record, '' );
83 my ( $biblionumber, $title );
85 # search duplicate on ISBN, easy and fast..
87 if ( $result->{isbn} ) {
88 $result->{isbn} =~ s/\(.*$//;
89 $result->{isbn} =~ s/\s+$//;
90 $query = "isbn=$result->{isbn}";
93 $result->{title} =~ s /\\//g;
94 $result->{title} =~ s /\"//g;
95 $result->{title} =~ s /\(//g;
96 $result->{title} =~ s /\)//g;
98 # FIXME: instead of removing operators, could just do
99 # quotes around the value
100 $result->{title} =~ s/(and|or|not)//g;
101 $query = "ti,ext=$result->{title}";
102 $query .= " and itemtype=$result->{itemtype}"
103 if ( $result->{itemtype} );
104 if ( $result->{author} ) {
105 $result->{author} =~ s /\\//g;
106 $result->{author} =~ s /\"//g;
107 $result->{author} =~ s /\(//g;
108 $result->{author} =~ s /\)//g;
110 # remove valid operators
111 $result->{author} =~ s/(and|or|not)//g;
112 $query .= " and au,ext=$result->{author}";
116 # FIXME: add error handling
117 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
119 foreach my $possible_duplicate_record (@$searchresults) {
121 MARC::Record->new_from_usmarc($possible_duplicate_record);
122 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
124 # FIXME :: why 2 $biblionumber ?
126 push @results, $result->{'biblionumber'};
127 push @results, $result->{'title'};
135 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
137 This function provides a simple search API on the bibliographic catalog
143 * $query can be a simple keyword or a complete CCL query
144 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
145 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
146 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
151 * $error is a empty unless an error is detected
152 * \@results is an array of records.
153 * $total_hits is the number of hits that would have been returned with no limit
155 =item C<usage in the script:>
159 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
161 if (defined $error) {
162 $template->param(query_error => $error);
163 warn "error: ".$error;
164 output_html_with_http_headers $input, $cookie, $template->output;
168 my $hits = scalar @$marcresults;
171 for my $i (0..$hits) {
173 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
174 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
176 #build the hash for the template.
177 $resultsloop{highlight} = ($i % 2)?(1):(0);
178 $resultsloop{title} = $biblio->{'title'};
179 $resultsloop{subtitle} = $biblio->{'subtitle'};
180 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
181 $resultsloop{author} = $biblio->{'author'};
182 $resultsloop{publishercode} = $biblio->{'publishercode'};
183 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
185 push @results, \%resultsloop;
188 $template->param(result=>\@results);
193 my ( $query, $offset, $max_results, $servers ) = @_;
195 if ( C4::Context->preference('NoZebra') ) {
196 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
199 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
200 return ( undef, $search_result, scalar($result->{hits}) );
203 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
204 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
210 return ( "No query entered", undef, undef ) unless $query;
212 # Initialize & Search Zebra
213 for ( my $i = 0 ; $i < @servers ; $i++ ) {
215 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
216 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
217 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
221 $zconns[$i]->errmsg() . " ("
222 . $zconns[$i]->errcode() . ") "
223 . $zconns[$i]->addinfo() . " "
224 . $zconns[$i]->diagset();
226 return ( $error, undef, undef ) if $zconns[$i]->errcode();
230 # caught a ZOOM::Exception
234 . $@->addinfo() . " "
237 return ( $error, undef, undef );
240 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
241 my $event = $zconns[ $i - 1 ]->last_event();
242 if ( $event == ZOOM::Event::ZEND ) {
244 my $first_record = defined( $offset ) ? $offset+1 : 1;
245 my $hits = $tmpresults[ $i - 1 ]->size();
246 $total_hits += $hits;
247 my $last_record = $hits;
248 if ( defined $max_results && $offset + $max_results < $hits ) {
249 $last_record = $offset + $max_results;
252 for my $j ( $first_record..$last_record ) {
253 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
254 push @results, $record;
259 foreach my $result (@tmpresults) {
262 foreach my $zoom_query (@zoom_queries) {
263 $zoom_query->destroy();
266 return ( undef, \@results, $total_hits );
272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
274 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
275 $results_per_page, $offset, $expanded_facet, $branches,
279 The all singing, all dancing, multi-server, asynchronous, scanning,
280 searching, record nabbing, facet-building
282 See verbse embedded documentation.
288 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
289 $results_per_page, $offset, $expanded_facet, $branches,
293 my @servers = @$servers_ref;
294 my @sort_by = @$sort_by_ref;
296 # Initialize variables for the ZOOM connection and results object
300 my $results_hashref = ();
302 # Initialize variables for the faceted results objects
303 my $facets_counter = ();
304 my $facets_info = ();
305 my $facets = getFacets();
308 ; # stores the ref to array of hashes for template facets loop
310 ### LOOP THROUGH THE SERVERS
311 for ( my $i = 0 ; $i < @servers ; $i++ ) {
312 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
314 # perform the search, create the results objects
315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
316 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
318 #$query_to_use = $simple_query if $scan;
319 warn $simple_query if ( $scan and $DEBUG );
321 # Check if we've got a query_type defined, if so, use it
325 if ( $query_type =~ /^ccl/ ) {
327 s/\:/\=/g; # change : to = last minute (FIXME)
330 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
333 elsif ( $query_type =~ /^cql/ ) {
336 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
338 elsif ( $query_type =~ /^pqf/ ) {
341 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
348 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
354 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
360 warn "WARNING: query problem with $query_to_use " . $@;
363 # Concatenate the sort_by limits and pass them to the results object
364 # Note: sort will override rank
366 foreach my $sort (@sort_by) {
367 if ( $sort eq "author_az" ) {
368 $sort_by .= "1=1003 <i ";
370 elsif ( $sort eq "author_za" ) {
371 $sort_by .= "1=1003 >i ";
373 elsif ( $sort eq "popularity_asc" ) {
374 $sort_by .= "1=9003 <i ";
376 elsif ( $sort eq "popularity_dsc" ) {
377 $sort_by .= "1=9003 >i ";
379 elsif ( $sort eq "call_number_asc" ) {
380 $sort_by .= "1=20 <i ";
382 elsif ( $sort eq "call_number_dsc" ) {
383 $sort_by .= "1=20 >i ";
385 elsif ( $sort eq "pubdate_asc" ) {
386 $sort_by .= "1=31 <i ";
388 elsif ( $sort eq "pubdate_dsc" ) {
389 $sort_by .= "1=31 >i ";
391 elsif ( $sort eq "acqdate_asc" ) {
392 $sort_by .= "1=32 <i ";
394 elsif ( $sort eq "acqdate_dsc" ) {
395 $sort_by .= "1=32 >i ";
397 elsif ( $sort eq "title_az" ) {
398 $sort_by .= "1=4 <i ";
400 elsif ( $sort eq "title_za" ) {
401 $sort_by .= "1=4 >i ";
404 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
408 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
409 warn "WARNING sort $sort_by failed";
412 } # finished looping through servers
414 # The big moment: asynchronously retrieve results from all servers
415 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
416 my $ev = $zconns[ $i - 1 ]->last_event();
417 if ( $ev == ZOOM::Event::ZEND ) {
418 next unless $results[ $i - 1 ];
419 my $size = $results[ $i - 1 ]->size();
423 # loop through the results
424 $results_hash->{'hits'} = $size;
426 if ( $offset + $results_per_page <= $size ) {
427 $times = $offset + $results_per_page;
432 for ( my $j = $offset ; $j < $times ; $j++ ) {
437 ## Check if it's an index scan
439 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
441 # here we create a minimal MARC record and hand it off to the
442 # template just like a normal result ... perhaps not ideal, but
444 my $tmprecord = MARC::Record->new();
445 $tmprecord->encoding('UTF-8');
449 # the minimal record in author/title (depending on MARC flavour)
450 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
451 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
452 $tmprecord->append_fields($tmptitle);
454 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
455 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
456 $tmprecord->append_fields($tmptitle);
457 $tmprecord->append_fields($tmpauthor);
459 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
464 $record = $results[ $i - 1 ]->record($j)->raw();
466 # warn "RECORD $j:".$record;
467 $results_hash->{'RECORDS'}[$j] = $record;
469 # Fill the facets while we're looping, but only for the biblioserver
470 $facet_record = MARC::Record->new_from_usmarc($record)
471 if $servers[ $i - 1 ] =~ /biblioserver/;
473 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
475 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
476 ($facets->[$k]) or next;
477 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
478 for my $field (@fields) {
479 my @subfields = $field->subfields();
480 for my $subfield (@subfields) {
481 my ( $code, $data ) = @$subfield;
482 ($code eq $facets->[$k]->{'subfield'}) or next;
483 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
486 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
487 $facets->[$k]->{'label_value'};
488 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
489 $facets->[$k]->{'expanded'};
494 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
497 # warn "connection ", $i-1, ": $size hits";
498 # warn $results[$i-1]->record(0)->render() if $size > 0;
501 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
503 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
504 keys %$facets_counter )
507 my $number_of_facets;
508 my @this_facets_array;
511 $facets_counter->{$link_value}
512 ->{$b} <=> $facets_counter->{$link_value}->{$a}
513 } keys %{ $facets_counter->{$link_value} }
517 if ( ( $number_of_facets < 6 )
518 || ( $expanded_facet eq $link_value )
519 || ( $facets_info->{$link_value}->{'expanded'} ) )
522 # Sanitize the link value ), ( will cause errors with CCL,
523 my $facet_link_value = $one_facet;
524 $facet_link_value =~ s/(\(|\))/ /g;
526 # fix the length that will display in the label,
527 my $facet_label_value = $one_facet;
529 substr( $one_facet, 0, 20 ) . "..."
530 unless length($facet_label_value) <= 20;
532 # if it's a branch, label by the name, not the code,
533 if ( $link_value =~ /branch/ ) {
535 $branches->{$one_facet}->{'branchname'};
538 # but we're down with the whole label being in the link's title.
539 my $facet_title_value = $one_facet;
541 push @this_facets_array,
545 $facets_counter->{$link_value}
547 facet_label_value => $facet_label_value,
548 facet_title_value => $facet_title_value,
549 facet_link_value => $facet_link_value,
550 type_link_value => $link_value,
556 # handle expanded option
557 unless ( $facets_info->{$link_value}->{'expanded'} ) {
559 if ( ( $number_of_facets > 6 )
560 && ( $expanded_facet ne $link_value ) );
565 type_link_value => $link_value,
566 type_id => $link_value . "_id",
567 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
568 facets => \@this_facets_array,
569 expandable => $expandable,
570 expand => $link_value,
572 ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
577 return ( undef, $results_hashref, \@facets_loop );
582 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
583 $results_per_page, $offset, $expanded_facet, $branches,
587 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
589 $paz->search($simple_query);
593 my $results_hashref = {};
594 my $stats = XMLin($paz->stat);
595 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
597 # for a grouped search result, the number of hits
598 # is the number of groups returned; 'bib_hits' will have
599 # the total number of bibs.
600 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
601 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
603 HIT: foreach my $hit (@{ $results->{'hit'} }) {
604 my $recid = $hit->{recid}->[0];
606 my $work_title = $hit->{'md-work-title'}->[0];
608 if (exists $hit->{'md-work-author'}) {
609 $work_author = $hit->{'md-work-author'}->[0];
611 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
613 my $result_group = {};
614 $result_group->{'group_label'} = $group_label;
615 $result_group->{'group_merge_key'} = $recid;
618 if (exists $hit->{count}) {
619 $count = $hit->{count}->[0];
621 $result_group->{'group_count'} = $count;
623 for (my $i = 0; $i < $count; $i++) {
624 # FIXME -- may need to worry about diacritics here
625 my $rec = $paz->record($recid, $i);
626 push @{ $result_group->{'RECORDS'} }, $rec;
629 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
632 # pass through facets
633 my $termlist_xml = $paz->termlist('author,subject');
634 my $terms = XMLin($termlist_xml, forcearray => 1);
635 my @facets_loop = ();
636 #die Dumper($results);
637 # foreach my $list (sort keys %{ $terms->{'list'} }) {
639 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
641 # facet_label_value => $facet->{'name'}->[0],
644 # push @facets_loop, ( {
645 # type_label => $list,
646 # facets => \@facets,
650 return ( undef, $results_hashref, \@facets_loop );
654 sub _remove_stopwords {
655 my ( $operand, $index ) = @_;
656 my @stopwords_removed;
658 # phrase and exact-qualified indexes shouldn't have stopwords removed
659 if ( $index !~ m/phr|ext/ ) {
661 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
662 # we use IsAlpha unicode definition, to deal correctly with diacritics.
663 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
664 # is a stopword, we'd get "çon" and wouldn't find anything...
665 foreach ( keys %{ C4::Context->stopwords } ) {
666 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
668 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
670 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
671 $operand =~ s/^$_\P{IsAlpha}/ /gi;
672 $operand =~ s/\P{IsAlpha}$_$/ /gi;
673 $operand =~ s/$1//gi;
674 push @stopwords_removed, $_;
678 return ( $operand, \@stopwords_removed );
682 sub _detect_truncation {
683 my ( $operand, $index ) = @_;
684 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
687 my @wordlist = split( /\s/, $operand );
688 foreach my $word (@wordlist) {
689 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
690 push @rightlefttruncated, $word;
692 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
693 push @lefttruncated, $word;
695 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
696 push @righttruncated, $word;
698 elsif ( index( $word, "*" ) < 0 ) {
699 push @nontruncated, $word;
702 push @regexpr, $word;
706 \@nontruncated, \@righttruncated, \@lefttruncated,
707 \@rightlefttruncated, \@regexpr
712 sub _build_stemmed_operand {
716 # If operand contains a digit, it is almost certainly an identifier, and should
717 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
718 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
719 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
720 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
721 return $operand if $operand =~ /\d/;
723 # FIXME: the locale should be set based on the user's language and/or search choice
724 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
726 # FIXME: these should be stored in the db so the librarian can modify the behavior
727 $stemmer->add_exceptions(
734 my @words = split( / /, $operand );
735 my $stems = $stemmer->stem(@words);
736 for my $stem (@$stems) {
737 $stemmed_operand .= "$stem";
738 $stemmed_operand .= "?"
739 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
740 $stemmed_operand .= " ";
742 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
743 return $stemmed_operand;
747 sub _build_weighted_query {
749 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
750 # pretty well but could work much better if we had a smarter query parser
751 my ( $operand, $stemmed_operand, $index ) = @_;
752 my $stemming = C4::Context->preference("QueryStemming") || 0;
753 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
754 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
756 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
758 # Keyword, or, no index specified
759 if ( ( $index eq 'kw' ) || ( !$index ) ) {
761 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
762 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
763 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
764 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
765 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
766 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
767 if $fuzzy_enabled; # add fuzzy, word list
768 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
769 if ( $stemming and $stemmed_operand )
770 ; # add stemming, right truncation
771 $weighted_query .= " or wrdl,r9=\"$operand\"";
773 # embedded sorting: 0 a-z; 1 z-a
774 # $weighted_query .= ") or (sort1,aut=1";
777 # Barcode searches should skip this process
778 elsif ( $index eq 'bc' ) {
779 $weighted_query .= "bc=\"$operand\"";
782 # Authority-number searches should skip this process
783 elsif ( $index eq 'an' ) {
784 $weighted_query .= "an=\"$operand\"";
787 # If the index already has more than one qualifier, wrap the operand
788 # in quotes and pass it back (assumption is that the user knows what they
789 # are doing and won't appreciate us mucking up their query
790 elsif ( $index =~ ',' ) {
791 $weighted_query .= " $index=\"$operand\"";
794 #TODO: build better cases based on specific search indexes
796 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
797 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
798 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
800 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
803 $weighted_query .= "))"; # close rank specification
804 return $weighted_query;
810 $simple_query, $query_cgi,
812 $limit_cgi, $limit_desc,
813 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
815 Build queries and limits in CCL, CGI, Human,
816 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
818 See verbose embedded documentation.
824 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
826 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
829 my @operators = @$operators if $operators;
830 my @indexes = @$indexes if $indexes;
831 my @operands = @$operands if $operands;
832 my @limits = @$limits if $limits;
833 my @sort_by = @$sort_by if $sort_by;
835 my $stemming = C4::Context->preference("QueryStemming") || 0;
836 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
837 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
838 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
839 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
841 # no stemming/weight/fuzzy in NoZebra
842 if ( C4::Context->preference("NoZebra") ) {
848 my $query = $operands[0];
849 my $simple_query = $operands[0];
851 # initialize the variables we're passing back
860 my $stopwords_removed; # flag to determine if stopwords have been removed
862 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
864 if ( $query =~ /^ccl=/ ) {
865 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
867 if ( $query =~ /^cql=/ ) {
868 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
870 if ( $query =~ /^pqf=/ ) {
871 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
874 # pass nested queries directly
875 # FIXME: need better handling of some of these variables in this case
876 if ( $query =~ /(\(|\))/ ) {
878 undef, $query, $simple_query, $query_cgi,
879 $query, $limit, $limit_cgi, $limit_desc,
880 $stopwords_removed, 'ccl'
884 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
885 # query operands and indexes and add stemming, truncation, field weighting, etc.
886 # Once we do so, we'll end up with a value in $query, just like if we had an
887 # incoming $query from the user
890 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
892 ; # a flag used to keep track if there was a previous query
893 # if there was, we can apply the current operator
895 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
897 # COMBINE OPERANDS, INDEXES AND OPERATORS
898 if ( $operands[$i] ) {
900 # A flag to determine whether or not to add the index to the query
903 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
904 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
907 $remove_stopwords = 0;
909 my $operand = $operands[$i];
910 my $index = $indexes[$i];
912 # Add index-specific attributes
913 # Date of Publication
914 if ( $index eq 'yr' ) {
915 $index .= ",st-numeric";
917 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
920 # Date of Acquisition
921 elsif ( $index eq 'acqdate' ) {
922 $index .= ",st-date-normalized";
924 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
926 # ISBN,ISSN,Standard Number, don't need special treatment
927 elsif ( $index eq 'nb' || $index eq 'ns' ) {
930 $stemming, $auto_truncation,
931 $weight_fields, $fuzzy_enabled,
933 ) = ( 0, 0, 0, 0, 0 );
936 # Set default structure attribute (word list)
938 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
939 $struct_attr = ",wrdl";
942 # Some helpful index variants
943 my $index_plus = $index . $struct_attr . ":" if $index;
944 my $index_plus_comma = $index . $struct_attr . "," if $index;
945 if ($auto_truncation){
946 # FIXME Auto Truncation is only valid for LTR languages
948 # use C4::Languages qw(regex_lang_subtags get_bidi);
949 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
950 # my $current_lang = regex_lang_subtags($lang);
952 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
953 $index_plus_comma .= "rtrn:";
957 if ($remove_stopwords) {
958 ( $operand, $stopwords_removed ) =
959 _remove_stopwords( $operand, $index );
960 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
961 warn "REMOVED STOPWORDS: @$stopwords_removed"
962 if ( $stopwords_removed && $DEBUG );
966 my $truncated_operand;
967 my( $nontruncated, $righttruncated, $lefttruncated,
968 $rightlefttruncated, $regexpr
969 ) = _detect_truncation( $operand, $index );
971 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
976 scalar(@$righttruncated) + scalar(@$lefttruncated) +
977 scalar(@$rightlefttruncated) > 0 )
980 # Don't field weight or add the index to the query, we do it here
982 undef $weight_fields;
983 my $previous_truncation_operand;
984 if ( scalar(@$nontruncated) > 0 ) {
985 $truncated_operand .= "$index_plus @$nontruncated ";
986 $previous_truncation_operand = 1;
988 if ( scalar(@$righttruncated) > 0 ) {
989 $truncated_operand .= "and "
990 if $previous_truncation_operand;
991 $truncated_operand .=
992 "$index_plus_comma" . "rtrn:@$righttruncated ";
993 $previous_truncation_operand = 1;
995 if ( scalar(@$lefttruncated) > 0 ) {
996 $truncated_operand .= "and "
997 if $previous_truncation_operand;
998 $truncated_operand .=
999 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1000 $previous_truncation_operand = 1;
1002 if ( scalar(@$rightlefttruncated) > 0 ) {
1003 $truncated_operand .= "and "
1004 if $previous_truncation_operand;
1005 $truncated_operand .=
1006 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1007 $previous_truncation_operand = 1;
1010 $operand = $truncated_operand if $truncated_operand;
1011 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1014 my $stemmed_operand;
1015 $stemmed_operand = _build_stemmed_operand($operand)
1017 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1019 # Handle Field Weighting
1020 my $weighted_operand;
1022 _build_weighted_query( $operand, $stemmed_operand, $index )
1024 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1025 $operand = $weighted_operand if $weight_fields;
1026 $indexes_set = 1 if $weight_fields;
1028 # If there's a previous operand, we need to add an operator
1029 if ($previous_operand) {
1031 # User-specified operator
1032 if ( $operators[ $i - 1 ] ) {
1033 $query .= " $operators[$i-1] ";
1034 $query .= " $index_plus " unless $indexes_set;
1035 $query .= " $operand";
1036 $query_cgi .= "&op=$operators[$i-1]";
1037 $query_cgi .= "&idx=$index" if $index;
1038 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1040 " $operators[$i-1] $index_plus $operands[$i]";
1043 # Default operator is and
1046 $query .= "$index_plus " unless $indexes_set;
1047 $query .= "$operand";
1048 $query_cgi .= "&op=and&idx=$index" if $index;
1049 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1050 $query_desc .= " and $index_plus $operands[$i]";
1054 # There isn't a pervious operand, don't need an operator
1057 # Field-weighted queries already have indexes set
1058 $query .= " $index_plus " unless $indexes_set;
1060 $query_desc .= " $index_plus $operands[$i]";
1061 $query_cgi .= "&idx=$index" if $index;
1062 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1063 $previous_operand = 1;
1068 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1071 my $group_OR_limits;
1072 my $availability_limit;
1073 foreach my $this_limit (@limits) {
1074 if ( $this_limit =~ /available/ ) {
1076 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1078 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1079 $availability_limit .=
1080 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1081 $limit_cgi .= "&limit=available";
1085 # group_OR_limits, prefixed by mc-
1086 # OR every member of the group
1087 elsif ( $this_limit =~ /mc/ ) {
1088 $group_OR_limits .= " or " if $group_OR_limits;
1089 $limit_desc .= " or " if $group_OR_limits;
1090 $group_OR_limits .= "$this_limit";
1091 $limit_cgi .= "&limit=$this_limit";
1092 $limit_desc .= " $this_limit";
1095 # Regular old limits
1097 $limit .= " and " if $limit || $query;
1098 $limit .= "$this_limit";
1099 $limit_cgi .= "&limit=$this_limit";
1100 if ($this_limit =~ /^branch:(.+)/) {
1101 my $branchcode = $1;
1102 my $branchname = GetBranchName($branchcode);
1103 if (defined $branchname) {
1104 $limit_desc .= " branch:$branchname";
1106 $limit_desc .= " $this_limit";
1109 $limit_desc .= " $this_limit";
1113 if ($group_OR_limits) {
1114 $limit .= " and " if ( $query || $limit );
1115 $limit .= "($group_OR_limits)";
1117 if ($availability_limit) {
1118 $limit .= " and " if ( $query || $limit );
1119 $limit .= "($availability_limit)";
1122 # Normalize the query and limit strings
1125 for ( $query, $query_desc, $limit, $limit_desc ) {
1126 $_ =~ s/ / /g; # remove extra spaces
1127 $_ =~ s/^ //g; # remove any beginning spaces
1128 $_ =~ s/ $//g; # remove any ending spaces
1129 $_ =~ s/==/=/g; # remove double == from query
1131 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1133 for ($query_cgi,$simple_query) {
1136 # append the limit to the query
1137 $query .= " " . $limit;
1141 warn "QUERY:" . $query;
1142 warn "QUERY CGI:" . $query_cgi;
1143 warn "QUERY DESC:" . $query_desc;
1144 warn "LIMIT:" . $limit;
1145 warn "LIMIT CGI:" . $limit_cgi;
1146 warn "LIMIT DESC:" . $limit_desc;
1147 warn "---------\nLeave buildQuery\n---------";
1150 undef, $query, $simple_query, $query_cgi,
1151 $query_desc, $limit, $limit_cgi, $limit_desc,
1152 $stopwords_removed, $query_type
1156 =head2 searchResults
1158 Format results in a form suitable for passing to the template
1162 # IMO this subroutine is pretty messy still -- it's responsible for
1163 # building the HTML output for the template
1165 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1166 my $dbh = C4::Context->dbh;
1170 # add search-term highlighting via <span>s on the search terms
1171 my $span_terms_hashref;
1172 for my $span_term ( split( / /, $searchdesc ) ) {
1173 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1174 $span_terms_hashref->{$span_term}++;
1177 #Build branchnames hash
1179 #get branch information.....
1182 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1183 ; # FIXME : use C4::Koha::GetBranches
1185 while ( my $bdata = $bsth->fetchrow_hashref ) {
1186 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1188 # FIXME - We build an authorised values hash here, using the default framework
1189 # though it is possible to have different authvals for different fws.
1191 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1193 # get notforloan authorised value list (see $shelflocations FIXME)
1194 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1196 #Build itemtype hash
1197 #find itemtype & itemtype image
1201 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1204 while ( my $bdata = $bsth->fetchrow_hashref ) {
1205 foreach (qw(description imageurl summary notforloan)) {
1206 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1210 #search item field code
1213 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1216 my ($itemtag) = $sth->fetchrow;
1218 ## find column names of items related to MARC
1219 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1221 my %subfieldstosearch;
1222 while ( ( my $column ) = $sth2->fetchrow ) {
1223 my ( $tagfield, $tagsubfield ) =
1224 &GetMarcFromKohaField( "items." . $column, "" );
1225 $subfieldstosearch{$column} = $tagsubfield;
1228 # handle which records to actually retrieve
1230 if ( $hits && $offset + $results_per_page <= $hits ) {
1231 $times = $offset + $results_per_page;
1234 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1237 # loop through all of the records we've retrieved
1238 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1239 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1240 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1241 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1242 $oldbiblio->{result_number} = $i + 1;
1244 # add imageurl to itemtype if there is one
1245 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1247 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1248 (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1250 $oldbiblio->{amazonisbn} = $aisbn;
1251 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1252 # Build summary if there is one (the summary is defined in the itemtypes table)
1253 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1254 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1255 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1256 my @fields = $marcrecord->fields();
1257 foreach my $field (@fields) {
1258 my $tag = $field->tag();
1259 my $tagvalue = $field->as_string();
1260 if (! utf8::is_utf8($tagvalue)) {
1261 utf8::decode($tagvalue);
1265 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1266 unless ( $tag < 10 ) {
1267 my @subf = $field->subfields;
1268 for my $i ( 0 .. $#subf ) {
1269 my $subfieldcode = $subf[$i][0];
1270 my $subfieldvalue = $subf[$i][1];
1271 if (! utf8::is_utf8($subfieldvalue)) {
1272 utf8::decode($subfieldvalue);
1274 my $tagsubf = $tag . $subfieldcode;
1276 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1281 $summary =~ s/\[(.*?)]//g;
1282 $summary =~ s/\n/<br\/>/g;
1283 $oldbiblio->{summary} = $summary;
1286 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1287 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1288 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1289 $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1290 # Add search-term highlighting to the whole record where they match using <span>s
1291 if (C4::Context->preference("OpacHighlightedWords")){
1292 my $searchhighlightblob;
1293 for my $highlight_field ( $marcrecord->fields ) {
1295 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1296 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1297 for my $subfield ($highlight_field->subfields()) {
1299 next if $subfield->[0] eq '9';
1300 my $field = $subfield->[1];
1301 for my $term ( keys %$span_terms_hashref ) {
1302 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1303 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1307 $searchhighlightblob .= $field . " ... " if $match;
1311 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1312 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1315 # Add search-term highlighting to the title, subtitle, etc. fields
1316 for my $term ( keys %$span_terms_hashref ) {
1317 my $old_term = $term;
1318 if ( length($term) > 3 ) {
1319 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1320 foreach(qw(title subtitle author publishercode place pages notes size)) {
1321 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1326 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1328 # Pull out the items fields
1329 my @fields = $marcrecord->field($itemtag);
1331 # Setting item statuses for display
1332 my @available_items_loop;
1333 my @onloan_items_loop;
1334 my @notforloan_items_loop;
1335 my @other_items_loop;
1337 my $available_items;
1339 my $notforloan_items;
1342 my $ordered_count = 0;
1343 my $available_count = 0;
1344 my $onloan_count = 0;
1345 my $notforloan_count = 0;
1346 my $longoverdue_count = 0;
1347 my $other_count = 0;
1348 my $wthdrawn_count = 0;
1349 my $itemlost_count = 0;
1350 my $itembinding_count = 0;
1351 my $itemdamaged_count = 0;
1352 my $item_in_transit_count = 0;
1353 my $can_place_holds = 0;
1354 my $items_count = scalar(@fields);
1356 ( C4::Context->preference('maxItemsinSearchResults') )
1357 ? C4::Context->preference('maxItemsinSearchResults') - 1
1360 # loop through every item
1361 foreach my $field (@fields) {
1364 # populate the items hash
1365 foreach my $code ( keys %subfieldstosearch ) {
1366 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1368 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1369 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1370 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1371 if ($item->{$hbranch}) {
1372 $item->{'branchname'} = $branches{$item->{$hbranch}};
1374 elsif ($item->{$otherbranch}) { # Last resort
1375 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1378 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1379 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1380 if ( $item->{onloan} ) {
1382 my $key = $prefix . $item->{due_date};
1383 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1384 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1385 $onloan_items->{$key}->{branchname} = $item->{branchname};
1386 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1387 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1388 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1389 $onloan_items->{$key}->{barcode} = $item->{barcode};
1390 # if something's checked out and lost, mark it as 'long overdue'
1391 if ( $item->{itemlost} ) {
1392 $onloan_items->{$prefix}->{longoverdue}++;
1393 $longoverdue_count++;
1394 } else { # can place holds as long as item isn't lost
1395 $can_place_holds = 1;
1399 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1403 if ( $item->{notforloan} == -1 ) {
1407 # is item in transit?
1408 my $transfertwhen = '';
1409 my ($transfertfrom, $transfertto);
1411 unless ($item->{wthdrawn}
1412 || $item->{itemlost}
1414 || $item->{notforloan}
1415 || $items_count > 20) {
1417 # A couple heuristics to limit how many times
1418 # we query the database for item transfer information, sacrificing
1419 # accuracy in some cases for speed;
1421 # 1. don't query if item has one of the other statuses
1422 # 2. don't check transit status if the bib has
1423 # more than 20 items
1425 # FIXME: to avoid having the query the database like this, and to make
1426 # the in transit status count as unavailable for search limiting,
1427 # should map transit status to record indexed in Zebra.
1429 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1432 # item is withdrawn, lost or damaged
1433 if ( $item->{wthdrawn}
1434 || $item->{itemlost}
1436 || $item->{notforloan}
1437 || ($transfertwhen ne ''))
1439 $wthdrawn_count++ if $item->{wthdrawn};
1440 $itemlost_count++ if $item->{itemlost};
1441 $itemdamaged_count++ if $item->{damaged};
1442 $item_in_transit_count++ if $transfertwhen ne '';
1443 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1445 my $key = $prefix . $item->{status};
1447 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1448 if($item->{notforloan} == 1){
1449 $notforloan_items->{$key}->{$_} = $item->{$_};
1451 $other_items->{$key}->{$_} = $item->{$_};
1454 if($item->{notforloan} == 1){
1455 $notforloan_count++;
1457 $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1458 $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1459 $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1460 $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1461 $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1462 $notforloan_items->{$key}->{barcode} = $item->{barcode};
1466 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1467 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1468 $other_items->{$key}->{count}++ if $item->{$hbranch};
1469 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1470 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1471 $other_items->{$key}->{barcode} = $item->{barcode};
1477 $can_place_holds = 1;
1479 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1480 foreach (qw(branchname itemcallnumber barcode)) {
1481 $available_items->{$prefix}->{$_} = $item->{$_};
1483 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1484 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1487 } # notforloan, item level and biblioitem level
1488 my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1490 ( C4::Context->preference('maxItemsinSearchResults') )
1491 ? C4::Context->preference('maxItemsinSearchResults') - 1
1493 for my $key ( sort keys %$onloan_items ) {
1494 (++$onloanitemscount > $maxitems) and last;
1495 push @onloan_items_loop, $onloan_items->{$key};
1497 for my $key ( sort keys %$other_items ) {
1498 (++$otheritemscount > $maxitems) and last;
1499 push @other_items_loop, $other_items->{$key};
1501 for my $key ( sort keys %$notforloan_items ) {
1502 (++$notforloanitemscount > $maxitems) and last;
1503 push @notforloan_items_loop, $notforloan_items->{$key};
1505 for my $key ( sort keys %$available_items ) {
1506 (++$availableitemscount > $maxitems) and last;
1507 push @available_items_loop, $available_items->{$key}
1510 # XSLT processing of some stuff
1511 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1512 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1513 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1516 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1517 $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1518 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1519 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1520 $oldbiblio->{items_count} = $items_count;
1521 $oldbiblio->{available_items_loop} = \@available_items_loop;
1522 $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1523 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1524 $oldbiblio->{other_items_loop} = \@other_items_loop;
1525 $oldbiblio->{availablecount} = $available_count;
1526 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1527 $oldbiblio->{onloancount} = $onloan_count;
1528 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1529 $oldbiblio->{notforloancount} = $notforloan_count;
1530 $oldbiblio->{othercount} = $other_count;
1531 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1532 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1533 $oldbiblio->{itemlostcount} = $itemlost_count;
1534 $oldbiblio->{damagedcount} = $itemdamaged_count;
1535 $oldbiblio->{intransitcount} = $item_in_transit_count;
1536 $oldbiblio->{orderedcount} = $ordered_count;
1537 $oldbiblio->{isbn} =~
1538 s/-//g; # deleting - in isbn to enable amazon content
1539 push( @newresults, $oldbiblio );
1544 #----------------------------------------------------------------------
1546 # Non-Zebra GetRecords#
1547 #----------------------------------------------------------------------
1551 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1557 $query, $simple_query, $sort_by_ref, $servers_ref,
1558 $results_per_page, $offset, $expanded_facet, $branches,
1561 warn "query =$query" if $DEBUG;
1562 my $result = NZanalyse($query);
1563 warn "results =$result" if $DEBUG;
1565 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1571 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1572 the list is built from an inverted index in the nozebra SQL table
1573 note that title is here only for convenience : the sorting will be very fast when requested on title
1574 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1579 my ( $string, $server ) = @_;
1580 # warn "---------" if $DEBUG;
1581 warn " NZanalyse" if $DEBUG;
1582 # warn "---------" if $DEBUG;
1584 # $server contains biblioserver or authorities, depending on what we search on.
1585 #warn "querying : $string on $server";
1586 $server = 'biblioserver' unless $server;
1588 # if we have a ", replace the content to discard temporarily any and/or/not inside
1590 if ( $string =~ /"/ ) {
1591 $string =~ s/"(.*?)"/__X__/;
1593 warn "commacontent : $commacontent" if $DEBUG;
1596 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1597 # then, call again NZanalyse with $left and $right
1598 # (recursive until we find a leaf (=> something without and/or/not)
1599 # delete repeated operator... Would then go in infinite loop
1600 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1603 #process parenthesis before.
1604 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1607 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1609 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1611 my $leftresult = NZanalyse( $left, $server );
1613 my $rightresult = NZanalyse( $right, $server );
1615 # OK, we have the results for right and left part of the query
1616 # depending of operand, intersect, union or exclude both lists
1617 # to get a result list
1618 if ( $operator eq ' and ' ) {
1619 return NZoperatorAND($leftresult,$rightresult);
1621 elsif ( $operator eq ' or ' ) {
1623 # just merge the 2 strings
1624 return $leftresult . $rightresult;
1626 elsif ( $operator eq ' not ' ) {
1627 return NZoperatorNOT($leftresult,$rightresult);
1631 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1635 warn "string :" . $string if $DEBUG;
1639 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1642 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1644 warn "no parenthesis. left : $left operator: $operator right: $right"
1647 # it's not a leaf, we have a and/or/not
1650 # reintroduce comma content if needed
1651 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1652 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1653 warn "node : $left / $operator / $right\n" if $DEBUG;
1654 my $leftresult = NZanalyse( $left, $server );
1655 my $rightresult = NZanalyse( $right, $server );
1656 warn " leftresult : $leftresult" if $DEBUG;
1657 warn " rightresult : $rightresult" if $DEBUG;
1658 # OK, we have the results for right and left part of the query
1659 # depending of operand, intersect, union or exclude both lists
1660 # to get a result list
1661 if ( $operator eq ' and ' ) {
1663 return NZoperatorAND($leftresult,$rightresult);
1665 elsif ( $operator eq ' or ' ) {
1667 # just merge the 2 strings
1668 return $leftresult . $rightresult;
1670 elsif ( $operator eq ' not ' ) {
1671 return NZoperatorNOT($leftresult,$rightresult);
1675 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1676 die "error : operand unknown : $operator for $string";
1679 # it's a leaf, do the real SQL query and return the result
1682 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1683 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1684 #remove trailing blank at the beginning
1686 warn "leaf:$string" if $DEBUG;
1688 # parse the string in in operator/operand/value again
1692 if ($string =~ /(.*)(>=|<=)(.*)/) {
1699 # warn "handling leaf... left:$left operator:$operator right:$right"
1701 unless ($operator) {
1702 if ($string =~ /(.*)(>|<|=)(.*)/) {
1707 "handling unless (operator)... left:$left operator:$operator right:$right"
1715 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1718 # automatic replace for short operators
1719 $left = 'title' if $left =~ '^ti$';
1720 $left = 'author' if $left =~ '^au$';
1721 $left = 'publisher' if $left =~ '^pb$';
1722 $left = 'subject' if $left =~ '^su$';
1723 $left = 'koha-Auth-Number' if $left =~ '^an$';
1724 $left = 'keyword' if $left =~ '^kw$';
1725 $left = 'itemtype' if $left =~ '^mc$'; # Fix for Bug 2599 - Search limits not working for NoZebra
1726 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1727 if ( $operator && $left ne 'keyword' ) {
1729 #do a specific search
1730 my $dbh = C4::Context->dbh;
1731 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1734 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1736 warn "$left / $operator / $right\n" if $DEBUG;
1738 # split each word, query the DB and build the biblionumbers result
1739 #sanitizing leftpart
1740 $left =~ s/^\s+|\s+$//;
1741 foreach ( split / /, $right ) {
1743 $_ =~ s/^\s+|\s+$//;
1745 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1746 $sth->execute( $server, $left, $_ )
1747 or warn "execute failed: $!";
1748 while ( my ( $line, $value ) = $sth->fetchrow ) {
1750 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1751 # otherwise, fill the result
1752 $biblionumbers .= $line
1753 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1754 warn "result : $value "
1755 . ( $right =~ /\d/ ) . "=="
1756 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1759 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1761 warn "NZAND" if $DEBUG;
1762 $results = NZoperatorAND($biblionumbers,$results);
1765 $results = $biblionumbers;
1771 #do a complete search (all indexes), if index='kw' do complete search too.
1772 my $dbh = C4::Context->dbh;
1775 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1778 # split each word, query the DB and build the biblionumbers result
1779 foreach ( split / /, $string ) {
1780 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1781 warn "search on all indexes on $_" if $DEBUG;
1784 $sth->execute( $server, $_ );
1785 while ( my $line = $sth->fetchrow ) {
1786 $biblionumbers .= $line;
1789 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1791 $results = NZoperatorAND($biblionumbers,$results);
1794 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1795 $results = $biblionumbers;
1799 warn "return : $results for LEAF : $string" if $DEBUG;
1802 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1806 my ($rightresult, $leftresult)=@_;
1808 my @leftresult = split /;/, $leftresult;
1809 warn " @leftresult / $rightresult \n" if $DEBUG;
1811 # my @rightresult = split /;/,$leftresult;
1814 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1815 # the result is stored twice, to have the same weight for AND than OR.
1816 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1817 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1818 foreach (@leftresult) {
1821 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1822 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1823 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1825 "$value-$countvalue;$value-$countvalue;";
1828 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1829 return $finalresult;
1833 my ($rightresult, $leftresult)=@_;
1834 return $rightresult.$leftresult;
1838 my ($leftresult, $rightresult)=@_;
1840 my @leftresult = split /;/, $leftresult;
1842 # my @rightresult = split /;/,$leftresult;
1844 foreach (@leftresult) {
1846 $value=$1 if $value=~m/(.*)-\d+$/;
1847 unless ($rightresult =~ "$value-") {
1848 $finalresult .= "$_;";
1851 return $finalresult;
1856 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1863 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1864 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1866 # order title asc by default
1867 # $ordering = '1=36 <i' unless $ordering;
1868 $results_per_page = 20 unless $results_per_page;
1869 $offset = 0 unless $offset;
1870 my $dbh = C4::Context->dbh;
1873 # order by POPULARITY
1875 if ( $ordering =~ /popularity/ ) {
1879 # popularity is not in MARC record, it's builded from a specific query
1881 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1882 foreach ( split /;/, $biblionumbers ) {
1883 my ( $biblionumber, $title ) = split /,/, $_;
1884 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1885 $sth->execute($biblionumber);
1886 my $popularity = $sth->fetchrow || 0;
1888 # hint : the key is popularity.title because we can have
1889 # many results with the same popularity. In this cas, sub-ordering is done by title
1890 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1891 # (un-frequent, I agree, but we won't forget anything that way ;-)
1892 $popularity{ sprintf( "%10d", $popularity ) . $title
1893 . $biblionumber } = $biblionumber;
1896 # sort the hash and return the same structure as GetRecords (Zebra querying)
1899 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1900 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1901 $result_hash->{'RECORDS'}[ $numbers++ ] =
1902 $result{ $popularity{$key} }->as_usmarc();
1905 else { # sort popularity ASC
1906 foreach my $key ( sort ( keys %popularity ) ) {
1907 $result_hash->{'RECORDS'}[ $numbers++ ] =
1908 $result{ $popularity{$key} }->as_usmarc();
1911 my $finalresult = ();
1912 $result_hash->{'hits'} = $numbers;
1913 $finalresult->{'biblioserver'} = $result_hash;
1914 return $finalresult;
1920 elsif ( $ordering =~ /author/ ) {
1922 foreach ( split /;/, $biblionumbers ) {
1923 my ( $biblionumber, $title ) = split /,/, $_;
1924 my $record = GetMarcBiblio($biblionumber);
1926 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1927 $author = $record->subfield( '200', 'f' );
1928 $author = $record->subfield( '700', 'a' ) unless $author;
1931 $author = $record->subfield( '100', 'a' );
1934 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1935 # and we don't want to get only 1 result for each of them !!!
1936 $result{ $author . $biblionumber } = $record;
1939 # sort the hash and return the same structure as GetRecords (Zebra querying)
1942 if ( $ordering eq 'author_za' ) { # sort by author desc
1943 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1944 $result_hash->{'RECORDS'}[ $numbers++ ] =
1945 $result{$key}->as_usmarc();
1948 else { # sort by author ASC
1949 foreach my $key ( sort ( keys %result ) ) {
1950 $result_hash->{'RECORDS'}[ $numbers++ ] =
1951 $result{$key}->as_usmarc();
1954 my $finalresult = ();
1955 $result_hash->{'hits'} = $numbers;
1956 $finalresult->{'biblioserver'} = $result_hash;
1957 return $finalresult;
1960 # ORDER BY callnumber
1963 elsif ( $ordering =~ /callnumber/ ) {
1965 foreach ( split /;/, $biblionumbers ) {
1966 my ( $biblionumber, $title ) = split /,/, $_;
1967 my $record = GetMarcBiblio($biblionumber);
1969 my ( $callnumber_tag, $callnumber_subfield ) =
1970 GetMarcFromKohaField( 'items.itemcallnumber','' );
1971 ( $callnumber_tag, $callnumber_subfield ) =
1972 GetMarcFromKohaField('biblioitems.callnumber','')
1973 unless $callnumber_tag;
1974 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1975 $callnumber = $record->subfield( '200', 'f' );
1978 $callnumber = $record->subfield( '100', 'a' );
1981 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1982 # and we don't want to get only 1 result for each of them !!!
1983 $result{ $callnumber . $biblionumber } = $record;
1986 # sort the hash and return the same structure as GetRecords (Zebra querying)
1989 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1990 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1991 $result_hash->{'RECORDS'}[ $numbers++ ] =
1992 $result{$key}->as_usmarc();
1995 else { # sort by title ASC
1996 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1997 $result_hash->{'RECORDS'}[ $numbers++ ] =
1998 $result{$key}->as_usmarc();
2001 my $finalresult = ();
2002 $result_hash->{'hits'} = $numbers;
2003 $finalresult->{'biblioserver'} = $result_hash;
2004 return $finalresult;
2006 elsif ( $ordering =~ /pubdate/ ) { #pub year
2008 foreach ( split /;/, $biblionumbers ) {
2009 my ( $biblionumber, $title ) = split /,/, $_;
2010 my $record = GetMarcBiblio($biblionumber);
2011 my ( $publicationyear_tag, $publicationyear_subfield ) =
2012 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2013 my $publicationyear =
2014 $record->subfield( $publicationyear_tag,
2015 $publicationyear_subfield );
2017 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2018 # and we don't want to get only 1 result for each of them !!!
2019 $result{ $publicationyear . $biblionumber } = $record;
2022 # sort the hash and return the same structure as GetRecords (Zebra querying)
2025 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
2026 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2027 $result_hash->{'RECORDS'}[ $numbers++ ] =
2028 $result{$key}->as_usmarc();
2031 else { # sort by pub year ASC
2032 foreach my $key ( sort ( keys %result ) ) {
2033 $result_hash->{'RECORDS'}[ $numbers++ ] =
2034 $result{$key}->as_usmarc();
2037 my $finalresult = ();
2038 $result_hash->{'hits'} = $numbers;
2039 $finalresult->{'biblioserver'} = $result_hash;
2040 return $finalresult;
2046 elsif ( $ordering =~ /title/ ) {
2048 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2050 foreach ( split /;/, $biblionumbers ) {
2051 my ( $biblionumber, $title ) = split /,/, $_;
2053 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2054 # and we don't want to get only 1 result for each of them !!!
2055 # hint & speed improvement : we can order without reading the record
2056 # so order, and read records only for the requested page !
2057 $result{ $title . $biblionumber } = $biblionumber;
2060 # sort the hash and return the same structure as GetRecords (Zebra querying)
2063 if ( $ordering eq 'title_az' ) { # sort by title desc
2064 foreach my $key ( sort ( keys %result ) ) {
2065 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2068 else { # sort by title ASC
2069 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2070 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2074 # limit the $results_per_page to result size if it's more
2075 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2077 # for the requested page, replace biblionumber by the complete record
2078 # speed improvement : avoid reading too much things
2080 my $counter = $offset ;
2081 $counter <= $offset + $results_per_page ;
2085 $result_hash->{'RECORDS'}[$counter] =
2086 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2088 my $finalresult = ();
2089 $result_hash->{'hits'} = $numbers;
2090 $finalresult->{'biblioserver'} = $result_hash;
2091 return $finalresult;
2098 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2101 foreach ( split /;/, $biblionumbers ) {
2102 my ( $biblionumber, $title ) = split /,/, $_;
2103 $title =~ /(.*)-(\d)/;
2108 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2109 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2110 # biblio N has ranking = 6
2111 $count_ranking{$biblionumber} += $ranking;
2114 # build the result by "inverting" the count_ranking hash
2115 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2117 foreach ( keys %count_ranking ) {
2118 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2121 # sort the hash and return the same structure as GetRecords (Zebra querying)
2124 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2125 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2128 # limit the $results_per_page to result size if it's more
2129 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2131 # for the requested page, replace biblionumber by the complete record
2132 # speed improvement : avoid reading too much things
2134 my $counter = $offset ;
2135 $counter <= $offset + $results_per_page ;
2139 $result_hash->{'RECORDS'}[$counter] =
2140 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2141 if $result_hash->{'RECORDS'}[$counter];
2143 my $finalresult = ();
2144 $result_hash->{'hits'} = $numbers;
2145 $finalresult->{'biblioserver'} = $result_hash;
2146 return $finalresult;
2150 END { } # module clean-up code here (global destructor)
2157 Koha Developement team <info@koha.org>