3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
32 # set the version for version checking
35 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
40 C4::Search - Functions for searching the Koha catalog.
44 See opac/opac-search.pl or catalogue/search.pl for example of usage
48 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
67 =head2 findseealso($dbh,$fields);
69 C<$dbh> is a link to the DB handler.
72 my $dbh =C4::Context->dbh;
74 C<$fields> is a reference to the fields array
76 This function modifies the @$fields array and adds related fields to search on.
78 FIXME: this function is probably deprecated in Koha 3
83 my ( $dbh, $fields ) = @_;
84 my $tagslib = GetMarcStructure(1);
85 for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) {
86 my ($tag) = substr( @$fields[$i], 1, 3 );
87 my ($subfield) = substr( @$fields[$i], 4, 1 );
88 @$fields[$i] .= ',' . $tagslib->{$tag}->{$subfield}->{seealso}
89 if ( $tagslib->{$tag}->{$subfield}->{seealso} );
95 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
97 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
103 my $dbh = C4::Context->dbh;
104 my $result = TransformMarcToKoha( $dbh, $record, '' );
109 my ( $biblionumber, $title );
111 # search duplicate on ISBN, easy and fast..
112 # ... normalize first
113 if ( $result->{isbn} ) {
114 $result->{isbn} =~ s/\(.*$//;
115 $result->{isbn} =~ s/\s+$//;
116 $query = "isbn=$result->{isbn}";
119 $result->{title} =~ s /\\//g;
120 $result->{title} =~ s /\"//g;
121 $result->{title} =~ s /\(//g;
122 $result->{title} =~ s /\)//g;
124 # FIXME: instead of removing operators, could just do
125 # quotes around the value
126 $result->{title} =~ s/(and|or|not)//g;
127 $query = "ti,ext=$result->{title}";
128 $query .= " and itemtype=$result->{itemtype}"
129 if ( $result->{itemtype} );
130 if ( $result->{author} ) {
131 $result->{author} =~ s /\\//g;
132 $result->{author} =~ s /\"//g;
133 $result->{author} =~ s /\(//g;
134 $result->{author} =~ s /\)//g;
136 # remove valid operators
137 $result->{author} =~ s/(and|or|not)//g;
138 $query .= " and au,ext=$result->{author}";
142 # FIXME: add error handling
143 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
145 foreach my $possible_duplicate_record (@$searchresults) {
147 MARC::Record->new_from_usmarc($possible_duplicate_record);
148 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
150 # FIXME :: why 2 $biblionumber ?
152 push @results, $result->{'biblionumber'};
153 push @results, $result->{'title'};
161 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
163 This function provides a simple search API on the bibliographic catalog
169 * $query can be a simple keyword or a complete CCL query
170 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
171 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
172 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
177 * $error is a empty unless an error is detected
178 * \@results is an array of records.
179 * $total_hits is the number of hits that would have been returned with no limit
181 =item C<usage in the script:>
185 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
187 if (defined $error) {
188 $template->param(query_error => $error);
189 warn "error: ".$error;
190 output_html_with_http_headers $input, $cookie, $template->output;
194 my $hits = scalar @$marcresults;
197 for my $i (0..$hits) {
199 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
200 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
202 #build the hash for the template.
203 $resultsloop{highlight} = ($i % 2)?(1):(0);
204 $resultsloop{title} = $biblio->{'title'};
205 $resultsloop{subtitle} = $biblio->{'subtitle'};
206 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
207 $resultsloop{author} = $biblio->{'author'};
208 $resultsloop{publishercode} = $biblio->{'publishercode'};
209 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
211 push @results, \%resultsloop;
214 $template->param(result=>\@results);
219 my ( $query, $offset, $max_results, $servers ) = @_;
221 if ( C4::Context->preference('NoZebra') ) {
222 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
225 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
226 return ( undef, $search_result, scalar($search_result) );
229 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
230 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
236 return ( "No query entered", undef, undef ) unless $query;
238 # Initialize & Search Zebra
239 for ( my $i = 0 ; $i < @servers ; $i++ ) {
241 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
242 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
243 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
247 $zconns[$i]->errmsg() . " ("
248 . $zconns[$i]->errcode() . ") "
249 . $zconns[$i]->addinfo() . " "
250 . $zconns[$i]->diagset();
252 return ( $error, undef, undef ) if $zconns[$i]->errcode();
256 # caught a ZOOM::Exception
260 . $@->addinfo() . " "
263 return ( $error, undef, undef );
266 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
267 my $event = $zconns[ $i - 1 ]->last_event();
268 if ( $event == ZOOM::Event::ZEND ) {
270 my $first_record = defined( $offset ) ? $offset+1 : 1;
271 my $hits = $tmpresults[ $i - 1 ]->size();
272 $total_hits += $hits;
273 my $last_record = $hits;
274 if ( defined $max_results && $offset + $max_results < $hits ) {
275 $last_record = $offset + $max_results;
278 for my $j ( $first_record..$last_record ) {
279 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
280 push @results, $record;
285 foreach my $result (@tmpresults) {
288 foreach my $zoom_query (@zoom_queries) {
289 $zoom_query->destroy();
292 return ( undef, \@results, $total_hits );
298 ( undef, $results_hashref, \@facets_loop ) = getRecords (
300 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
301 $results_per_page, $offset, $expanded_facet, $branches,
305 The all singing, all dancing, multi-server, asynchronous, scanning,
306 searching, record nabbing, facet-building
308 See verbse embedded documentation.
314 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
315 $results_per_page, $offset, $expanded_facet, $branches,
319 my @servers = @$servers_ref;
320 my @sort_by = @$sort_by_ref;
322 # Initialize variables for the ZOOM connection and results object
326 my $results_hashref = ();
328 # Initialize variables for the faceted results objects
329 my $facets_counter = ();
330 my $facets_info = ();
331 my $facets = getFacets();
334 ; # stores the ref to array of hashes for template facets loop
336 ### LOOP THROUGH THE SERVERS
337 for ( my $i = 0 ; $i < @servers ; $i++ ) {
338 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
340 # perform the search, create the results objects
341 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
342 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
344 #$query_to_use = $simple_query if $scan;
345 warn $simple_query if ( $scan and $DEBUG );
347 # Check if we've got a query_type defined, if so, use it
351 if ( $query_type =~ /^ccl/ ) {
353 s/\:/\=/g; # change : to = last minute (FIXME)
356 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
359 elsif ( $query_type =~ /^cql/ ) {
362 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
364 elsif ( $query_type =~ /^pqf/ ) {
367 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
374 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
380 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
386 warn "WARNING: query problem with $query_to_use " . $@;
389 # Concatenate the sort_by limits and pass them to the results object
390 # Note: sort will override rank
392 foreach my $sort (@sort_by) {
393 if ( $sort eq "author_az" ) {
394 $sort_by .= "1=1003 <i ";
396 elsif ( $sort eq "author_za" ) {
397 $sort_by .= "1=1003 >i ";
399 elsif ( $sort eq "popularity_asc" ) {
400 $sort_by .= "1=9003 <i ";
402 elsif ( $sort eq "popularity_dsc" ) {
403 $sort_by .= "1=9003 >i ";
405 elsif ( $sort eq "call_number_asc" ) {
406 $sort_by .= "1=20 <i ";
408 elsif ( $sort eq "call_number_dsc" ) {
409 $sort_by .= "1=20 >i ";
411 elsif ( $sort eq "pubdate_asc" ) {
412 $sort_by .= "1=31 <i ";
414 elsif ( $sort eq "pubdate_dsc" ) {
415 $sort_by .= "1=31 >i ";
417 elsif ( $sort eq "acqdate_asc" ) {
418 $sort_by .= "1=32 <i ";
420 elsif ( $sort eq "acqdate_dsc" ) {
421 $sort_by .= "1=32 >i ";
423 elsif ( $sort eq "title_az" ) {
424 $sort_by .= "1=4 <i ";
426 elsif ( $sort eq "title_za" ) {
427 $sort_by .= "1=4 >i ";
431 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
432 warn "WARNING sort $sort_by failed";
435 } # finished looping through servers
437 # The big moment: asynchronously retrieve results from all servers
438 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
439 my $ev = $zconns[ $i - 1 ]->last_event();
440 if ( $ev == ZOOM::Event::ZEND ) {
441 next unless $results[ $i - 1 ];
442 my $size = $results[ $i - 1 ]->size();
446 # loop through the results
447 $results_hash->{'hits'} = $size;
449 if ( $offset + $results_per_page <= $size ) {
450 $times = $offset + $results_per_page;
455 for ( my $j = $offset ; $j < $times ; $j++ ) {
460 ## Check if it's an index scan
462 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
464 # here we create a minimal MARC record and hand it off to the
465 # template just like a normal result ... perhaps not ideal, but
467 my $tmprecord = MARC::Record->new();
468 $tmprecord->encoding('UTF-8');
472 # the minimal record in author/title (depending on MARC flavour)
473 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
474 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
475 $tmprecord->append_fields($tmptitle);
477 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
478 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
479 $tmprecord->append_fields($tmptitle);
480 $tmprecord->append_fields($tmpauthor);
482 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
487 $record = $results[ $i - 1 ]->record($j)->raw();
489 # warn "RECORD $j:".$record;
490 $results_hash->{'RECORDS'}[$j] = $record;
492 # Fill the facets while we're looping, but only for the biblioserver
493 $facet_record = MARC::Record->new_from_usmarc($record)
494 if $servers[ $i - 1 ] =~ /biblioserver/;
496 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
498 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
499 ($facets->[$k]) or next;
500 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
501 for my $field (@fields) {
502 my @subfields = $field->subfields();
503 for my $subfield (@subfields) {
504 my ( $code, $data ) = @$subfield;
505 ($code eq $facets->[$k]->{'subfield'}) or next;
506 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
509 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
510 $facets->[$k]->{'label_value'};
511 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
512 $facets->[$k]->{'expanded'};
517 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
520 # warn "connection ", $i-1, ": $size hits";
521 # warn $results[$i-1]->record(0)->render() if $size > 0;
524 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
526 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
527 keys %$facets_counter )
530 my $number_of_facets;
531 my @this_facets_array;
534 $facets_counter->{$link_value}
535 ->{$b} <=> $facets_counter->{$link_value}->{$a}
536 } keys %{ $facets_counter->{$link_value} }
540 if ( ( $number_of_facets < 6 )
541 || ( $expanded_facet eq $link_value )
542 || ( $facets_info->{$link_value}->{'expanded'} ) )
545 # Sanitize the link value ), ( will cause errors with CCL,
546 my $facet_link_value = $one_facet;
547 $facet_link_value =~ s/(\(|\))/ /g;
549 # fix the length that will display in the label,
550 my $facet_label_value = $one_facet;
552 substr( $one_facet, 0, 20 ) . "..."
553 unless length($facet_label_value) <= 20;
555 # if it's a branch, label by the name, not the code,
556 if ( $link_value =~ /branch/ ) {
558 $branches->{$one_facet}->{'branchname'};
561 # but we're down with the whole label being in the link's title.
562 my $facet_title_value = $one_facet;
564 push @this_facets_array,
568 $facets_counter->{$link_value}
570 facet_label_value => $facet_label_value,
571 facet_title_value => $facet_title_value,
572 facet_link_value => $facet_link_value,
573 type_link_value => $link_value,
579 # handle expanded option
580 unless ( $facets_info->{$link_value}->{'expanded'} ) {
582 if ( ( $number_of_facets > 6 )
583 && ( $expanded_facet ne $link_value ) );
588 type_link_value => $link_value,
589 type_id => $link_value . "_id",
590 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
591 facets => \@this_facets_array,
592 expandable => $expandable,
593 expand => $link_value,
595 ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
600 return ( undef, $results_hashref, \@facets_loop );
605 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
606 $results_per_page, $offset, $expanded_facet, $branches,
610 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
612 $paz->search($simple_query);
616 my $results_hashref = {};
617 my $stats = XMLin($paz->stat);
618 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
620 # for a grouped search result, the number of hits
621 # is the number of groups returned; 'bib_hits' will have
622 # the total number of bibs.
623 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
624 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
626 HIT: foreach my $hit (@{ $results->{'hit'} }) {
627 my $recid = $hit->{recid}->[0];
629 my $work_title = $hit->{'md-work-title'}->[0];
631 if (exists $hit->{'md-work-author'}) {
632 $work_author = $hit->{'md-work-author'}->[0];
634 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
636 my $result_group = {};
637 $result_group->{'group_label'} = $group_label;
638 $result_group->{'group_merge_key'} = $recid;
641 if (exists $hit->{count}) {
642 $count = $hit->{count}->[0];
644 $result_group->{'group_count'} = $count;
646 for (my $i = 0; $i < $count; $i++) {
647 # FIXME -- may need to worry about diacritics here
648 my $rec = $paz->record($recid, $i);
649 push @{ $result_group->{'RECORDS'} }, $rec;
652 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
655 # pass through facets
656 my $termlist_xml = $paz->termlist('author,subject');
657 my $terms = XMLin($termlist_xml, forcearray => 1);
658 my @facets_loop = ();
659 #die Dumper($results);
660 # foreach my $list (sort keys %{ $terms->{'list'} }) {
662 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
664 # facet_label_value => $facet->{'name'}->[0],
667 # push @facets_loop, ( {
668 # type_label => $list,
669 # facets => \@facets,
673 return ( undef, $results_hashref, \@facets_loop );
677 sub _remove_stopwords {
678 my ( $operand, $index ) = @_;
679 my @stopwords_removed;
681 # phrase and exact-qualified indexes shouldn't have stopwords removed
682 if ( $index !~ m/phr|ext/ ) {
684 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
685 # we use IsAlpha unicode definition, to deal correctly with diacritics.
686 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
687 # is a stopword, we'd get "çon" and wouldn't find anything...
688 foreach ( keys %{ C4::Context->stopwords } ) {
689 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
691 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
693 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
694 $operand =~ s/^$_\P{IsAlpha}/ /gi;
695 $operand =~ s/\P{IsAlpha}$_$/ /gi;
696 $operand =~ s/$1//gi;
697 push @stopwords_removed, $_;
701 return ( $operand, \@stopwords_removed );
705 sub _detect_truncation {
706 my ( $operand, $index ) = @_;
707 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
710 my @wordlist = split( /\s/, $operand );
711 foreach my $word (@wordlist) {
712 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
713 push @rightlefttruncated, $word;
715 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
716 push @lefttruncated, $word;
718 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
719 push @righttruncated, $word;
721 elsif ( index( $word, "*" ) < 0 ) {
722 push @nontruncated, $word;
725 push @regexpr, $word;
729 \@nontruncated, \@righttruncated, \@lefttruncated,
730 \@rightlefttruncated, \@regexpr
735 sub _build_stemmed_operand {
739 # If operand contains a digit, it is almost certainly an identifier, and should
740 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
741 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
742 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
743 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
744 return $operand if $operand =~ /\d/;
746 # FIXME: the locale should be set based on the user's language and/or search choice
747 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
749 # FIXME: these should be stored in the db so the librarian can modify the behavior
750 $stemmer->add_exceptions(
757 my @words = split( / /, $operand );
758 my $stems = $stemmer->stem(@words);
759 for my $stem (@$stems) {
760 $stemmed_operand .= "$stem";
761 $stemmed_operand .= "?"
762 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
763 $stemmed_operand .= " ";
765 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
766 return $stemmed_operand;
770 sub _build_weighted_query {
772 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
773 # pretty well but could work much better if we had a smarter query parser
774 my ( $operand, $stemmed_operand, $index ) = @_;
775 my $stemming = C4::Context->preference("QueryStemming") || 0;
776 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
777 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
779 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
781 # Keyword, or, no index specified
782 if ( ( $index eq 'kw' ) || ( !$index ) ) {
784 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
785 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
786 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
787 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
788 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
789 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
790 if $fuzzy_enabled; # add fuzzy, word list
791 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
792 if ( $stemming and $stemmed_operand )
793 ; # add stemming, right truncation
794 $weighted_query .= " or wrdl,r9=\"$operand\"";
796 # embedded sorting: 0 a-z; 1 z-a
797 # $weighted_query .= ") or (sort1,aut=1";
800 # Barcode searches should skip this process
801 elsif ( $index eq 'bc' ) {
802 $weighted_query .= "bc=\"$operand\"";
805 # Authority-number searches should skip this process
806 elsif ( $index eq 'an' ) {
807 $weighted_query .= "an=\"$operand\"";
810 # If the index already has more than one qualifier, wrap the operand
811 # in quotes and pass it back (assumption is that the user knows what they
812 # are doing and won't appreciate us mucking up their query
813 elsif ( $index =~ ',' ) {
814 $weighted_query .= " $index=\"$operand\"";
817 #TODO: build better cases based on specific search indexes
819 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
820 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
821 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
823 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
826 $weighted_query .= "))"; # close rank specification
827 return $weighted_query;
833 $simple_query, $query_cgi,
835 $limit_cgi, $limit_desc,
836 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
838 Build queries and limits in CCL, CGI, Human,
839 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
841 See verbose embedded documentation.
847 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
849 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
852 my @operators = @$operators if $operators;
853 my @indexes = @$indexes if $indexes;
854 my @operands = @$operands if $operands;
855 my @limits = @$limits if $limits;
856 my @sort_by = @$sort_by if $sort_by;
858 my $stemming = C4::Context->preference("QueryStemming") || 0;
859 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
860 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
861 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
862 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
864 # no stemming/weight/fuzzy in NoZebra
865 if ( C4::Context->preference("NoZebra") ) {
871 my $query = $operands[0];
872 my $simple_query = $operands[0];
874 # initialize the variables we're passing back
883 my $stopwords_removed; # flag to determine if stopwords have been removed
885 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
887 if ( $query =~ /^ccl=/ ) {
888 return ( undef, $', $', $', $', '', '', '', '', 'ccl' );
890 if ( $query =~ /^cql=/ ) {
891 return ( undef, $', $', $', $', '', '', '', '', 'cql' );
893 if ( $query =~ /^pqf=/ ) {
894 return ( undef, $', $', $', $', '', '', '', '', 'pqf' );
897 # pass nested queries directly
898 # FIXME: need better handling of some of these variables in this case
899 if ( $query =~ /(\(|\))/ ) {
901 undef, $query, $simple_query, $query_cgi,
902 $query, $limit, $limit_cgi, $limit_desc,
903 $stopwords_removed, 'ccl'
907 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
908 # query operands and indexes and add stemming, truncation, field weighting, etc.
909 # Once we do so, we'll end up with a value in $query, just like if we had an
910 # incoming $query from the user
913 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
915 ; # a flag used to keep track if there was a previous query
916 # if there was, we can apply the current operator
918 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
920 # COMBINE OPERANDS, INDEXES AND OPERATORS
921 if ( $operands[$i] ) {
923 # A flag to determine whether or not to add the index to the query
926 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
927 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
930 $remove_stopwords = 0;
932 my $operand = $operands[$i];
933 my $index = $indexes[$i];
935 # Add index-specific attributes
936 # Date of Publication
937 if ( $index eq 'yr' ) {
938 $index .= ",st-numeric";
940 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
943 # Date of Acquisition
944 elsif ( $index eq 'acqdate' ) {
945 $index .= ",st-date-normalized";
947 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
949 # ISBN,ISSN,Standard Number, don't need special treatment
950 elsif ( $index eq 'nb' || $index eq 'ns' ) {
953 $stemming, $auto_truncation,
954 $weight_fields, $fuzzy_enabled,
956 ) = ( 0, 0, 0, 0, 0 );
959 # Set default structure attribute (word list)
961 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
962 $struct_attr = ",wrdl";
965 # Some helpful index variants
966 my $index_plus = $index . $struct_attr . ":" if $index;
967 my $index_plus_comma = $index . $struct_attr . "," if $index;
968 if ($auto_truncation){
969 # FIXME Auto Truncation is only valid for LTR languages
971 # use C4::Languages qw(regex_lang_subtags get_bidi);
972 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
973 # my $current_lang = regex_lang_subtags($lang);
975 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
976 $index_plus_comma .= "rtrn:";
980 if ($remove_stopwords) {
981 ( $operand, $stopwords_removed ) =
982 _remove_stopwords( $operand, $index );
983 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
984 warn "REMOVED STOPWORDS: @$stopwords_removed"
985 if ( $stopwords_removed && $DEBUG );
989 my $truncated_operand;
990 my( $nontruncated, $righttruncated, $lefttruncated,
991 $rightlefttruncated, $regexpr
992 ) = _detect_truncation( $operand, $index );
994 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
999 scalar(@$righttruncated) + scalar(@$lefttruncated) +
1000 scalar(@$rightlefttruncated) > 0 )
1003 # Don't field weight or add the index to the query, we do it here
1005 undef $weight_fields;
1006 my $previous_truncation_operand;
1007 if ( scalar(@$nontruncated) > 0 ) {
1008 $truncated_operand .= "$index_plus @$nontruncated ";
1009 $previous_truncation_operand = 1;
1011 if ( scalar(@$righttruncated) > 0 ) {
1012 $truncated_operand .= "and "
1013 if $previous_truncation_operand;
1014 $truncated_operand .=
1015 "$index_plus_comma" . "rtrn:@$righttruncated ";
1016 $previous_truncation_operand = 1;
1018 if ( scalar(@$lefttruncated) > 0 ) {
1019 $truncated_operand .= "and "
1020 if $previous_truncation_operand;
1021 $truncated_operand .=
1022 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1023 $previous_truncation_operand = 1;
1025 if ( scalar(@$rightlefttruncated) > 0 ) {
1026 $truncated_operand .= "and "
1027 if $previous_truncation_operand;
1028 $truncated_operand .=
1029 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1030 $previous_truncation_operand = 1;
1033 $operand = $truncated_operand if $truncated_operand;
1034 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1037 my $stemmed_operand;
1038 $stemmed_operand = _build_stemmed_operand($operand)
1040 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1042 # Handle Field Weighting
1043 my $weighted_operand;
1045 _build_weighted_query( $operand, $stemmed_operand, $index )
1047 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1048 $operand = $weighted_operand if $weight_fields;
1049 $indexes_set = 1 if $weight_fields;
1051 # If there's a previous operand, we need to add an operator
1052 if ($previous_operand) {
1054 # User-specified operator
1055 if ( $operators[ $i - 1 ] ) {
1056 $query .= " $operators[$i-1] ";
1057 $query .= " $index_plus " unless $indexes_set;
1058 $query .= " $operand";
1059 $query_cgi .= "&op=$operators[$i-1]";
1060 $query_cgi .= "&idx=$index" if $index;
1061 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1063 " $operators[$i-1] $index_plus $operands[$i]";
1066 # Default operator is and
1069 $query .= "$index_plus " unless $indexes_set;
1070 $query .= "$operand";
1071 $query_cgi .= "&op=and&idx=$index" if $index;
1072 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1073 $query_desc .= " and $index_plus $operands[$i]";
1077 # There isn't a pervious operand, don't need an operator
1080 # Field-weighted queries already have indexes set
1081 $query .= " $index_plus " unless $indexes_set;
1083 $query_desc .= " $index_plus $operands[$i]";
1084 $query_cgi .= "&idx=$index" if $index;
1085 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1086 $previous_operand = 1;
1091 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1094 my $group_OR_limits;
1095 my $availability_limit;
1096 foreach my $this_limit (@limits) {
1097 if ( $this_limit =~ /available/ ) {
1099 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1101 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1102 $availability_limit .=
1103 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1104 $limit_cgi .= "&limit=available";
1108 # group_OR_limits, prefixed by mc-
1109 # OR every member of the group
1110 elsif ( $this_limit =~ /mc/ ) {
1111 $group_OR_limits .= " or " if $group_OR_limits;
1112 $limit_desc .= " or " if $group_OR_limits;
1113 $group_OR_limits .= "$this_limit";
1114 $limit_cgi .= "&limit=$this_limit";
1115 $limit_desc .= " $this_limit";
1118 # Regular old limits
1121 $limit .= " and " if $limit || $query;
1122 $limit .= "$this_limit";
1123 $limit_cgi .= "&limit=$this_limit";
1124 $limit_desc .= " $this_limit";
1128 if ($group_OR_limits) {
1129 $limit .= " and " if ( $query || $limit );
1130 $limit .= "($group_OR_limits)";
1132 if ($availability_limit) {
1133 $limit .= " and " if ( $query || $limit );
1134 $limit .= "($availability_limit)";
1137 # Normalize the query and limit strings
1140 for ( $query, $query_desc, $limit, $limit_desc ) {
1141 $_ =~ s/ / /g; # remove extra spaces
1142 $_ =~ s/^ //g; # remove any beginning spaces
1143 $_ =~ s/ $//g; # remove any ending spaces
1144 $_ =~ s/==/=/g; # remove double == from query
1146 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1148 for ($query_cgi,$simple_query) {
1151 # append the limit to the query
1152 $query .= " " . $limit;
1156 warn "QUERY:" . $query;
1157 warn "QUERY CGI:" . $query_cgi;
1158 warn "QUERY DESC:" . $query_desc;
1159 warn "LIMIT:" . $limit;
1160 warn "LIMIT CGI:" . $limit_cgi;
1161 warn "LIMIT DESC:" . $limit_desc;
1162 warn "---------\nLeave buildQuery\n---------";
1165 undef, $query, $simple_query, $query_cgi,
1166 $query_desc, $limit, $limit_cgi, $limit_desc,
1167 $stopwords_removed, $query_type
1171 =head2 searchResults
1173 Format results in a form suitable for passing to the template
1177 # IMO this subroutine is pretty messy still -- it's responsible for
1178 # building the HTML output for the template
1180 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1181 my $dbh = C4::Context->dbh;
1185 # add search-term highlighting via <span>s on the search terms
1186 my $span_terms_hashref;
1187 for my $span_term ( split( / /, $searchdesc ) ) {
1188 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1189 $span_terms_hashref->{$span_term}++;
1192 #Build branchnames hash
1194 #get branch information.....
1197 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1198 ; # FIXME : use C4::Koha::GetBranches
1200 while ( my $bdata = $bsth->fetchrow_hashref ) {
1201 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1203 # FIXME - We build an authorised values hash here, using the default framework
1204 # though it is possible to have different authvals for different fws.
1206 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1208 # get notforloan authorised value list (see $shelflocations FIXME)
1209 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1211 #Build itemtype hash
1212 #find itemtype & itemtype image
1216 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1219 while ( my $bdata = $bsth->fetchrow_hashref ) {
1220 foreach (qw(description imageurl summary notforloan)) {
1221 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1225 #search item field code
1228 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1231 my ($itemtag) = $sth->fetchrow;
1233 ## find column names of items related to MARC
1234 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1236 my %subfieldstosearch;
1237 while ( ( my $column ) = $sth2->fetchrow ) {
1238 my ( $tagfield, $tagsubfield ) =
1239 &GetMarcFromKohaField( "items." . $column, "" );
1240 $subfieldstosearch{$column} = $tagsubfield;
1243 # handle which records to actually retrieve
1245 if ( $hits && $offset + $results_per_page <= $hits ) {
1246 $times = $offset + $results_per_page;
1249 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1252 # loop through all of the records we've retrieved
1253 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1254 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1255 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1256 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1257 $oldbiblio->{result_number} = $i + 1;
1259 # add imageurl to itemtype if there is one
1260 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1262 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1263 (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1265 $oldbiblio->{amazonisbn} = $aisbn;
1266 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1267 # Build summary if there is one (the summary is defined in the itemtypes table)
1268 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1269 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1270 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1271 my @fields = $marcrecord->fields();
1272 foreach my $field (@fields) {
1273 my $tag = $field->tag();
1274 my $tagvalue = $field->as_string();
1275 if (! utf8::is_utf8($tagvalue)) {
1276 utf8::decode($tagvalue);
1280 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1281 unless ( $tag < 10 ) {
1282 my @subf = $field->subfields;
1283 for my $i ( 0 .. $#subf ) {
1284 my $subfieldcode = $subf[$i][0];
1285 my $subfieldvalue = $subf[$i][1];
1286 if (! utf8::is_utf8($subfieldvalue)) {
1287 utf8::decode($subfieldvalue);
1289 my $tagsubf = $tag . $subfieldcode;
1291 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1296 $summary =~ s/\[(.*?)]//g;
1297 $summary =~ s/\n/<br\/>/g;
1298 $oldbiblio->{summary} = $summary;
1301 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1302 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1303 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1304 $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1305 # Add search-term highlighting to the whole record where they match using <span>s
1306 if (C4::Context->preference("OpacHighlightedWords")){
1307 my $searchhighlightblob;
1308 for my $highlight_field ( $marcrecord->fields ) {
1310 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1311 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1312 for my $subfield ($highlight_field->subfields()) {
1314 next if $subfield->[0] eq '9';
1315 my $field = $subfield->[1];
1316 for my $term ( keys %$span_terms_hashref ) {
1317 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1318 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1322 $searchhighlightblob .= $field . " ... " if $match;
1326 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1327 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1330 # Add search-term highlighting to the title, subtitle, etc. fields
1331 for my $term ( keys %$span_terms_hashref ) {
1332 my $old_term = $term;
1333 if ( length($term) > 3 ) {
1334 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1335 foreach(qw(title subtitle author publishercode place pages notes size)) {
1336 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1341 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1343 # Pull out the items fields
1344 my @fields = $marcrecord->field($itemtag);
1346 # Setting item statuses for display
1347 my @available_items_loop;
1348 my @onloan_items_loop;
1349 my @other_items_loop;
1351 my $available_items;
1355 my $ordered_count = 0;
1356 my $available_count = 0;
1357 my $onloan_count = 0;
1358 my $longoverdue_count = 0;
1359 my $other_count = 0;
1360 my $wthdrawn_count = 0;
1361 my $itemlost_count = 0;
1362 my $itembinding_count = 0;
1363 my $itemdamaged_count = 0;
1364 my $item_in_transit_count = 0;
1365 my $can_place_holds = 0;
1366 my $items_count = scalar(@fields);
1368 ( C4::Context->preference('maxItemsinSearchResults') )
1369 ? C4::Context->preference('maxItemsinSearchResults') - 1
1372 # loop through every item
1373 foreach my $field (@fields) {
1376 # populate the items hash
1377 foreach my $code ( keys %subfieldstosearch ) {
1378 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1380 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1381 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1382 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1383 if ($item->{$hbranch}) {
1384 $item->{'branchname'} = $branches{$item->{$hbranch}};
1386 elsif ($item->{$otherbranch}) { # Last resort
1387 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1390 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1391 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1392 if ( $item->{onloan} ) {
1394 my $key = $prefix . $item->{due_date};
1395 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1396 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1397 $onloan_items->{$key}->{branchname} = $item->{branchname};
1398 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1399 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1400 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1401 $onloan_items->{$key}->{barcode} = $item->{barcode};
1402 # if something's checked out and lost, mark it as 'long overdue'
1403 if ( $item->{itemlost} ) {
1404 $onloan_items->{$prefix}->{longoverdue}++;
1405 $longoverdue_count++;
1406 } else { # can place holds as long as item isn't lost
1407 $can_place_holds = 1;
1411 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1415 if ( $item->{notforloan} == -1 ) {
1419 # is item in transit?
1420 my $transfertwhen = '';
1421 my ($transfertfrom, $transfertto);
1423 unless ($item->{wthdrawn}
1424 || $item->{itemlost}
1426 || $item->{notforloan}
1427 || $items_count > 20) {
1429 # A couple heuristics to limit how many times
1430 # we query the database for item transfer information, sacrificing
1431 # accuracy in some cases for speed;
1433 # 1. don't query if item has one of the other statuses
1434 # 2. don't check transit status if the bib has
1435 # more than 20 items
1437 # FIXME: to avoid having the query the database like this, and to make
1438 # the in transit status count as unavailable for search limiting,
1439 # should map transit status to record indexed in Zebra.
1441 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1444 # item is withdrawn, lost or damaged
1445 if ( $item->{wthdrawn}
1446 || $item->{itemlost}
1448 || $item->{notforloan}
1449 || ($transfertwhen ne ''))
1451 $wthdrawn_count++ if $item->{wthdrawn};
1452 $itemlost_count++ if $item->{itemlost};
1453 $itemdamaged_count++ if $item->{damaged};
1454 $item_in_transit_count++ if $transfertwhen ne '';
1455 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1458 my $key = $prefix . $item->{status};
1459 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1460 $other_items->{$key}->{$_} = $item->{$_};
1462 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1463 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1464 $other_items->{$key}->{count}++ if $item->{$hbranch};
1465 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1466 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1467 $other_items->{$key}->{barcode} = $item->{barcode};
1471 $can_place_holds = 1;
1473 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1474 foreach (qw(branchname itemcallnumber barcode)) {
1475 $available_items->{$prefix}->{$_} = $item->{$_};
1477 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1478 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1481 } # notforloan, item level and biblioitem level
1482 my ( $availableitemscount, $onloanitemscount, $otheritemscount );
1484 ( C4::Context->preference('maxItemsinSearchResults') )
1485 ? C4::Context->preference('maxItemsinSearchResults') - 1
1487 for my $key ( sort keys %$onloan_items ) {
1488 (++$onloanitemscount > $maxitems) and last;
1489 push @onloan_items_loop, $onloan_items->{$key};
1491 for my $key ( sort keys %$other_items ) {
1492 (++$otheritemscount > $maxitems) and last;
1493 push @other_items_loop, $other_items->{$key};
1495 for my $key ( sort keys %$available_items ) {
1496 (++$availableitemscount > $maxitems) and last;
1497 push @available_items_loop, $available_items->{$key}
1500 # XSLT processing of some stuff
1501 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1502 my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl");
1503 $oldbiblio->{XSLTResultsRecord} = $newxmlrecord;
1506 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1507 $can_place_holds = 0
1508 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1509 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1510 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1511 $oldbiblio->{items_count} = $items_count;
1512 $oldbiblio->{available_items_loop} = \@available_items_loop;
1513 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1514 $oldbiblio->{other_items_loop} = \@other_items_loop;
1515 $oldbiblio->{availablecount} = $available_count;
1516 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1517 $oldbiblio->{onloancount} = $onloan_count;
1518 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1519 $oldbiblio->{othercount} = $other_count;
1520 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1521 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1522 $oldbiblio->{itemlostcount} = $itemlost_count;
1523 $oldbiblio->{damagedcount} = $itemdamaged_count;
1524 $oldbiblio->{intransitcount} = $item_in_transit_count;
1525 $oldbiblio->{orderedcount} = $ordered_count;
1526 $oldbiblio->{isbn} =~
1527 s/-//g; # deleting - in isbn to enable amazon content
1528 push( @newresults, $oldbiblio );
1533 #----------------------------------------------------------------------
1535 # Non-Zebra GetRecords#
1536 #----------------------------------------------------------------------
1540 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1546 $query, $simple_query, $sort_by_ref, $servers_ref,
1547 $results_per_page, $offset, $expanded_facet, $branches,
1550 warn "query =$query" if $DEBUG;
1551 my $result = NZanalyse($query);
1552 warn "results =$result" if $DEBUG;
1554 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1560 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1561 the list is built from an inverted index in the nozebra SQL table
1562 note that title is here only for convenience : the sorting will be very fast when requested on title
1563 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1568 my ( $string, $server ) = @_;
1569 # warn "---------" if $DEBUG;
1570 warn " NZanalyse" if $DEBUG;
1571 # warn "---------" if $DEBUG;
1573 # $server contains biblioserver or authorities, depending on what we search on.
1574 #warn "querying : $string on $server";
1575 $server = 'biblioserver' unless $server;
1577 # if we have a ", replace the content to discard temporarily any and/or/not inside
1579 if ( $string =~ /"/ ) {
1580 $string =~ s/"(.*?)"/__X__/;
1582 warn "commacontent : $commacontent" if $DEBUG;
1585 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1586 # then, call again NZanalyse with $left and $right
1587 # (recursive until we find a leaf (=> something without and/or/not)
1588 # delete repeated operator... Would then go in infinite loop
1589 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1592 #process parenthesis before.
1593 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1596 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1598 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1600 my $leftresult = NZanalyse( $left, $server );
1602 my $rightresult = NZanalyse( $right, $server );
1604 # OK, we have the results for right and left part of the query
1605 # depending of operand, intersect, union or exclude both lists
1606 # to get a result list
1607 if ( $operator eq ' and ' ) {
1608 return NZoperatorAND($leftresult,$rightresult);
1610 elsif ( $operator eq ' or ' ) {
1612 # just merge the 2 strings
1613 return $leftresult . $rightresult;
1615 elsif ( $operator eq ' not ' ) {
1616 return NZoperatorNOT($leftresult,$rightresult);
1620 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1624 warn "string :" . $string if $DEBUG;
1628 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1631 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1633 warn "no parenthesis. left : $left operator: $operator right: $right"
1636 # it's not a leaf, we have a and/or/not
1639 # reintroduce comma content if needed
1640 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1641 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1642 warn "node : $left / $operator / $right\n" if $DEBUG;
1643 my $leftresult = NZanalyse( $left, $server );
1644 my $rightresult = NZanalyse( $right, $server );
1645 warn " leftresult : $leftresult" if $DEBUG;
1646 warn " rightresult : $rightresult" if $DEBUG;
1647 # OK, we have the results for right and left part of the query
1648 # depending of operand, intersect, union or exclude both lists
1649 # to get a result list
1650 if ( $operator eq ' and ' ) {
1652 return NZoperatorAND($leftresult,$rightresult);
1654 elsif ( $operator eq ' or ' ) {
1656 # just merge the 2 strings
1657 return $leftresult . $rightresult;
1659 elsif ( $operator eq ' not ' ) {
1660 return NZoperatorNOT($leftresult,$rightresult);
1664 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1665 die "error : operand unknown : $operator for $string";
1668 # it's a leaf, do the real SQL query and return the result
1671 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1672 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1673 #remove trailing blank at the beginning
1675 warn "leaf:$string" if $DEBUG;
1677 # parse the string in in operator/operand/value again
1681 if ($string =~ /(.*)(>=|<=)(.*)/) {
1688 # warn "handling leaf... left:$left operator:$operator right:$right"
1690 unless ($operator) {
1691 if ($string =~ /(.*)(>|<|=)(.*)/) {
1696 "handling unless (operator)... left:$left operator:$operator right:$right"
1704 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1707 # automatic replace for short operators
1708 $left = 'title' if $left =~ '^ti$';
1709 $left = 'author' if $left =~ '^au$';
1710 $left = 'publisher' if $left =~ '^pb$';
1711 $left = 'subject' if $left =~ '^su$';
1712 $left = 'koha-Auth-Number' if $left =~ '^an$';
1713 $left = 'keyword' if $left =~ '^kw$';
1714 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1715 if ( $operator && $left ne 'keyword' ) {
1717 #do a specific search
1718 my $dbh = C4::Context->dbh;
1719 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1722 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1724 warn "$left / $operator / $right\n" if $DEBUG;
1726 # split each word, query the DB and build the biblionumbers result
1727 #sanitizing leftpart
1728 $left =~ s/^\s+|\s+$//;
1729 foreach ( split / /, $right ) {
1731 $_ =~ s/^\s+|\s+$//;
1733 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1734 $sth->execute( $server, $left, $_ )
1735 or warn "execute failed: $!";
1736 while ( my ( $line, $value ) = $sth->fetchrow ) {
1738 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1739 # otherwise, fill the result
1740 $biblionumbers .= $line
1741 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1742 warn "result : $value "
1743 . ( $right =~ /\d/ ) . "=="
1744 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1747 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1749 warn "NZAND" if $DEBUG;
1750 $results = NZoperatorAND($biblionumbers,$results);
1753 $results = $biblionumbers;
1759 #do a complete search (all indexes), if index='kw' do complete search too.
1760 my $dbh = C4::Context->dbh;
1763 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1766 # split each word, query the DB and build the biblionumbers result
1767 foreach ( split / /, $string ) {
1768 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1769 warn "search on all indexes on $_" if $DEBUG;
1772 $sth->execute( $server, $_ );
1773 while ( my $line = $sth->fetchrow ) {
1774 $biblionumbers .= $line;
1777 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1779 $results = NZoperatorAND($biblionumbers,$results);
1782 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1783 $results = $biblionumbers;
1787 warn "return : $results for LEAF : $string" if $DEBUG;
1790 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1794 my ($rightresult, $leftresult)=@_;
1796 my @leftresult = split /;/, $leftresult;
1797 warn " @leftresult / $rightresult \n" if $DEBUG;
1799 # my @rightresult = split /;/,$leftresult;
1802 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1803 # the result is stored twice, to have the same weight for AND than OR.
1804 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1805 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1806 foreach (@leftresult) {
1809 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1810 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1811 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1813 "$value-$countvalue;$value-$countvalue;";
1816 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1817 return $finalresult;
1821 my ($rightresult, $leftresult)=@_;
1822 return $rightresult.$leftresult;
1826 my ($leftresult, $rightresult)=@_;
1828 my @leftresult = split /;/, $leftresult;
1830 # my @rightresult = split /;/,$leftresult;
1832 foreach (@leftresult) {
1834 $value=$1 if $value=~m/(.*)-\d+$/;
1835 unless ($rightresult =~ "$value-") {
1836 $finalresult .= "$_;";
1839 return $finalresult;
1844 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1851 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1852 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1854 # order title asc by default
1855 # $ordering = '1=36 <i' unless $ordering;
1856 $results_per_page = 20 unless $results_per_page;
1857 $offset = 0 unless $offset;
1858 my $dbh = C4::Context->dbh;
1861 # order by POPULARITY
1863 if ( $ordering =~ /popularity/ ) {
1867 # popularity is not in MARC record, it's builded from a specific query
1869 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1870 foreach ( split /;/, $biblionumbers ) {
1871 my ( $biblionumber, $title ) = split /,/, $_;
1872 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1873 $sth->execute($biblionumber);
1874 my $popularity = $sth->fetchrow || 0;
1876 # hint : the key is popularity.title because we can have
1877 # many results with the same popularity. In this cas, sub-ordering is done by title
1878 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1879 # (un-frequent, I agree, but we won't forget anything that way ;-)
1880 $popularity{ sprintf( "%10d", $popularity ) . $title
1881 . $biblionumber } = $biblionumber;
1884 # sort the hash and return the same structure as GetRecords (Zebra querying)
1887 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1888 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1889 $result_hash->{'RECORDS'}[ $numbers++ ] =
1890 $result{ $popularity{$key} }->as_usmarc();
1893 else { # sort popularity ASC
1894 foreach my $key ( sort ( keys %popularity ) ) {
1895 $result_hash->{'RECORDS'}[ $numbers++ ] =
1896 $result{ $popularity{$key} }->as_usmarc();
1899 my $finalresult = ();
1900 $result_hash->{'hits'} = $numbers;
1901 $finalresult->{'biblioserver'} = $result_hash;
1902 return $finalresult;
1908 elsif ( $ordering =~ /author/ ) {
1910 foreach ( split /;/, $biblionumbers ) {
1911 my ( $biblionumber, $title ) = split /,/, $_;
1912 my $record = GetMarcBiblio($biblionumber);
1914 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1915 $author = $record->subfield( '200', 'f' );
1916 $author = $record->subfield( '700', 'a' ) unless $author;
1919 $author = $record->subfield( '100', 'a' );
1922 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1923 # and we don't want to get only 1 result for each of them !!!
1924 $result{ $author . $biblionumber } = $record;
1927 # sort the hash and return the same structure as GetRecords (Zebra querying)
1930 if ( $ordering eq 'author_za' ) { # sort by author desc
1931 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1932 $result_hash->{'RECORDS'}[ $numbers++ ] =
1933 $result{$key}->as_usmarc();
1936 else { # sort by author ASC
1937 foreach my $key ( sort ( keys %result ) ) {
1938 $result_hash->{'RECORDS'}[ $numbers++ ] =
1939 $result{$key}->as_usmarc();
1942 my $finalresult = ();
1943 $result_hash->{'hits'} = $numbers;
1944 $finalresult->{'biblioserver'} = $result_hash;
1945 return $finalresult;
1948 # ORDER BY callnumber
1951 elsif ( $ordering =~ /callnumber/ ) {
1953 foreach ( split /;/, $biblionumbers ) {
1954 my ( $biblionumber, $title ) = split /,/, $_;
1955 my $record = GetMarcBiblio($biblionumber);
1957 my ( $callnumber_tag, $callnumber_subfield ) =
1958 GetMarcFromKohaField( 'items.itemcallnumber','' );
1959 ( $callnumber_tag, $callnumber_subfield ) =
1960 GetMarcFromKohaField('biblioitems.callnumber','')
1961 unless $callnumber_tag;
1962 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1963 $callnumber = $record->subfield( '200', 'f' );
1966 $callnumber = $record->subfield( '100', 'a' );
1969 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1970 # and we don't want to get only 1 result for each of them !!!
1971 $result{ $callnumber . $biblionumber } = $record;
1974 # sort the hash and return the same structure as GetRecords (Zebra querying)
1977 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1978 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1979 $result_hash->{'RECORDS'}[ $numbers++ ] =
1980 $result{$key}->as_usmarc();
1983 else { # sort by title ASC
1984 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1985 $result_hash->{'RECORDS'}[ $numbers++ ] =
1986 $result{$key}->as_usmarc();
1989 my $finalresult = ();
1990 $result_hash->{'hits'} = $numbers;
1991 $finalresult->{'biblioserver'} = $result_hash;
1992 return $finalresult;
1994 elsif ( $ordering =~ /pubdate/ ) { #pub year
1996 foreach ( split /;/, $biblionumbers ) {
1997 my ( $biblionumber, $title ) = split /,/, $_;
1998 my $record = GetMarcBiblio($biblionumber);
1999 my ( $publicationyear_tag, $publicationyear_subfield ) =
2000 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2001 my $publicationyear =
2002 $record->subfield( $publicationyear_tag,
2003 $publicationyear_subfield );
2005 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2006 # and we don't want to get only 1 result for each of them !!!
2007 $result{ $publicationyear . $biblionumber } = $record;
2010 # sort the hash and return the same structure as GetRecords (Zebra querying)
2013 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
2014 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2015 $result_hash->{'RECORDS'}[ $numbers++ ] =
2016 $result{$key}->as_usmarc();
2019 else { # sort by pub year ASC
2020 foreach my $key ( sort ( keys %result ) ) {
2021 $result_hash->{'RECORDS'}[ $numbers++ ] =
2022 $result{$key}->as_usmarc();
2025 my $finalresult = ();
2026 $result_hash->{'hits'} = $numbers;
2027 $finalresult->{'biblioserver'} = $result_hash;
2028 return $finalresult;
2034 elsif ( $ordering =~ /title/ ) {
2036 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2038 foreach ( split /;/, $biblionumbers ) {
2039 my ( $biblionumber, $title ) = split /,/, $_;
2041 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2042 # and we don't want to get only 1 result for each of them !!!
2043 # hint & speed improvement : we can order without reading the record
2044 # so order, and read records only for the requested page !
2045 $result{ $title . $biblionumber } = $biblionumber;
2048 # sort the hash and return the same structure as GetRecords (Zebra querying)
2051 if ( $ordering eq 'title_az' ) { # sort by title desc
2052 foreach my $key ( sort ( keys %result ) ) {
2053 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2056 else { # sort by title ASC
2057 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2058 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2062 # limit the $results_per_page to result size if it's more
2063 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2065 # for the requested page, replace biblionumber by the complete record
2066 # speed improvement : avoid reading too much things
2068 my $counter = $offset ;
2069 $counter <= $offset + $results_per_page ;
2073 $result_hash->{'RECORDS'}[$counter] =
2074 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2076 my $finalresult = ();
2077 $result_hash->{'hits'} = $numbers;
2078 $finalresult->{'biblioserver'} = $result_hash;
2079 return $finalresult;
2086 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2089 foreach ( split /;/, $biblionumbers ) {
2090 my ( $biblionumber, $title ) = split /,/, $_;
2091 $title =~ /(.*)-(\d)/;
2096 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2097 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2098 # biblio N has ranking = 6
2099 $count_ranking{$biblionumber} += $ranking;
2102 # build the result by "inverting" the count_ranking hash
2103 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2105 foreach ( keys %count_ranking ) {
2106 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2109 # sort the hash and return the same structure as GetRecords (Zebra querying)
2112 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2113 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2116 # limit the $results_per_page to result size if it's more
2117 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2119 # for the requested page, replace biblionumber by the complete record
2120 # speed improvement : avoid reading too much things
2122 my $counter = $offset ;
2123 $counter <= $offset + $results_per_page ;
2127 $result_hash->{'RECORDS'}[$counter] =
2128 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2129 if $result_hash->{'RECORDS'}[$counter];
2131 my $finalresult = ();
2132 $result_hash->{'hits'} = $numbers;
2133 $finalresult->{'biblioserver'} = $result_hash;
2134 return $finalresult;
2138 END { } # module clean-up code here (global destructor)
2145 Koha Developement team <info@koha.org>