3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
31 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
33 # set the version for version checking
36 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
41 C4::Search - Functions for searching the Koha catalog.
45 See opac/opac-search.pl or catalogue/search.pl for example of usage
49 This module provides searching functions for Koha's bibliographic databases
65 # make all your functions, whether exported or not;
69 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
71 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
77 my $dbh = C4::Context->dbh;
78 my $result = TransformMarcToKoha( $dbh, $record, '' );
83 my ( $biblionumber, $title );
85 # search duplicate on ISBN, easy and fast..
87 if ( $result->{isbn} ) {
88 $result->{isbn} =~ s/\(.*$//;
89 $result->{isbn} =~ s/\s+$//;
90 $query = "isbn=$result->{isbn}";
93 $result->{title} =~ s /\\//g;
94 $result->{title} =~ s /\"//g;
95 $result->{title} =~ s /\(//g;
96 $result->{title} =~ s /\)//g;
98 # FIXME: instead of removing operators, could just do
99 # quotes around the value
100 $result->{title} =~ s/(and|or|not)//g;
101 $query = "ti,ext=$result->{title}";
102 $query .= " and itemtype=$result->{itemtype}"
103 if ( $result->{itemtype} );
104 if ( $result->{author} ) {
105 $result->{author} =~ s /\\//g;
106 $result->{author} =~ s /\"//g;
107 $result->{author} =~ s /\(//g;
108 $result->{author} =~ s /\)//g;
110 # remove valid operators
111 $result->{author} =~ s/(and|or|not)//g;
112 $query .= " and au,ext=$result->{author}";
116 # FIXME: add error handling
117 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
119 foreach my $possible_duplicate_record (@$searchresults) {
121 MARC::Record->new_from_usmarc($possible_duplicate_record);
122 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
124 # FIXME :: why 2 $biblionumber ?
126 push @results, $result->{'biblionumber'};
127 push @results, $result->{'title'};
135 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
137 This function provides a simple search API on the bibliographic catalog
143 * $query can be a simple keyword or a complete CCL query
144 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
145 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
146 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
151 * $error is a empty unless an error is detected
152 * \@results is an array of records.
153 * $total_hits is the number of hits that would have been returned with no limit
155 =item C<usage in the script:>
159 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
161 if (defined $error) {
162 $template->param(query_error => $error);
163 warn "error: ".$error;
164 output_html_with_http_headers $input, $cookie, $template->output;
168 my $hits = scalar @$marcresults;
171 for my $i (0..$hits) {
173 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
174 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
176 #build the hash for the template.
177 $resultsloop{highlight} = ($i % 2)?(1):(0);
178 $resultsloop{title} = $biblio->{'title'};
179 $resultsloop{subtitle} = $biblio->{'subtitle'};
180 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
181 $resultsloop{author} = $biblio->{'author'};
182 $resultsloop{publishercode} = $biblio->{'publishercode'};
183 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
185 push @results, \%resultsloop;
188 $template->param(result=>\@results);
193 my ( $query, $offset, $max_results, $servers ) = @_;
195 if ( C4::Context->preference('NoZebra') ) {
196 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
199 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
200 return ( undef, $search_result, scalar($result->{hits}) );
203 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
204 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
210 return ( "No query entered", undef, undef ) unless $query;
212 # Initialize & Search Zebra
213 for ( my $i = 0 ; $i < @servers ; $i++ ) {
215 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
216 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
217 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
221 $zconns[$i]->errmsg() . " ("
222 . $zconns[$i]->errcode() . ") "
223 . $zconns[$i]->addinfo() . " "
224 . $zconns[$i]->diagset();
226 return ( $error, undef, undef ) if $zconns[$i]->errcode();
230 # caught a ZOOM::Exception
234 . $@->addinfo() . " "
237 return ( $error, undef, undef );
240 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
241 my $event = $zconns[ $i - 1 ]->last_event();
242 if ( $event == ZOOM::Event::ZEND ) {
244 my $first_record = defined( $offset ) ? $offset+1 : 1;
245 my $hits = $tmpresults[ $i - 1 ]->size();
246 $total_hits += $hits;
247 my $last_record = $hits;
248 if ( defined $max_results && $offset + $max_results < $hits ) {
249 $last_record = $offset + $max_results;
252 for my $j ( $first_record..$last_record ) {
253 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
254 push @results, $record;
259 foreach my $result (@tmpresults) {
262 foreach my $zoom_query (@zoom_queries) {
263 $zoom_query->destroy();
266 return ( undef, \@results, $total_hits );
272 ( undef, $results_hashref, \@facets_loop ) = getRecords (
274 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
275 $results_per_page, $offset, $expanded_facet, $branches,
279 The all singing, all dancing, multi-server, asynchronous, scanning,
280 searching, record nabbing, facet-building
282 See verbse embedded documentation.
288 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
289 $results_per_page, $offset, $expanded_facet, $branches,
293 my @servers = @$servers_ref;
294 my @sort_by = @$sort_by_ref;
296 # Initialize variables for the ZOOM connection and results object
300 my $results_hashref = ();
302 # Initialize variables for the faceted results objects
303 my $facets_counter = ();
304 my $facets_info = ();
305 my $facets = getFacets();
308 ; # stores the ref to array of hashes for template facets loop
310 ### LOOP THROUGH THE SERVERS
311 for ( my $i = 0 ; $i < @servers ; $i++ ) {
312 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
314 # perform the search, create the results objects
315 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
316 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
318 #$query_to_use = $simple_query if $scan;
319 warn $simple_query if ( $scan and $DEBUG );
321 # Check if we've got a query_type defined, if so, use it
325 if ( $query_type =~ /^ccl/ ) {
327 s/\:/\=/g; # change : to = last minute (FIXME)
330 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
333 elsif ( $query_type =~ /^cql/ ) {
336 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
338 elsif ( $query_type =~ /^pqf/ ) {
341 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
348 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
354 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
360 warn "WARNING: query problem with $query_to_use " . $@;
363 # Concatenate the sort_by limits and pass them to the results object
364 # Note: sort will override rank
366 foreach my $sort (@sort_by) {
367 if ( $sort eq "author_az" ) {
368 $sort_by .= "1=1003 <i ";
370 elsif ( $sort eq "author_za" ) {
371 $sort_by .= "1=1003 >i ";
373 elsif ( $sort eq "popularity_asc" ) {
374 $sort_by .= "1=9003 <i ";
376 elsif ( $sort eq "popularity_dsc" ) {
377 $sort_by .= "1=9003 >i ";
379 elsif ( $sort eq "call_number_asc" ) {
380 $sort_by .= "1=20 <i ";
382 elsif ( $sort eq "call_number_dsc" ) {
383 $sort_by .= "1=20 >i ";
385 elsif ( $sort eq "pubdate_asc" ) {
386 $sort_by .= "1=31 <i ";
388 elsif ( $sort eq "pubdate_dsc" ) {
389 $sort_by .= "1=31 >i ";
391 elsif ( $sort eq "acqdate_asc" ) {
392 $sort_by .= "1=32 <i ";
394 elsif ( $sort eq "acqdate_dsc" ) {
395 $sort_by .= "1=32 >i ";
397 elsif ( $sort eq "title_az" ) {
398 $sort_by .= "1=4 <i ";
400 elsif ( $sort eq "title_za" ) {
401 $sort_by .= "1=4 >i ";
404 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
408 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
409 warn "WARNING sort $sort_by failed";
412 } # finished looping through servers
414 # The big moment: asynchronously retrieve results from all servers
415 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
416 my $ev = $zconns[ $i - 1 ]->last_event();
417 if ( $ev == ZOOM::Event::ZEND ) {
418 next unless $results[ $i - 1 ];
419 my $size = $results[ $i - 1 ]->size();
423 # loop through the results
424 $results_hash->{'hits'} = $size;
426 if ( $offset + $results_per_page <= $size ) {
427 $times = $offset + $results_per_page;
432 for ( my $j = $offset ; $j < $times ; $j++ ) {
437 ## Check if it's an index scan
439 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
441 # here we create a minimal MARC record and hand it off to the
442 # template just like a normal result ... perhaps not ideal, but
444 my $tmprecord = MARC::Record->new();
445 $tmprecord->encoding('UTF-8');
449 # the minimal record in author/title (depending on MARC flavour)
450 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
451 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
452 $tmprecord->append_fields($tmptitle);
454 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
455 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
456 $tmprecord->append_fields($tmptitle);
457 $tmprecord->append_fields($tmpauthor);
459 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
464 $record = $results[ $i - 1 ]->record($j)->raw();
466 # warn "RECORD $j:".$record;
467 $results_hash->{'RECORDS'}[$j] = $record;
469 # Fill the facets while we're looping, but only for the biblioserver
470 $facet_record = MARC::Record->new_from_usmarc($record)
471 if $servers[ $i - 1 ] =~ /biblioserver/;
473 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
475 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
476 ($facets->[$k]) or next;
477 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
478 for my $field (@fields) {
479 my @subfields = $field->subfields();
480 for my $subfield (@subfields) {
481 my ( $code, $data ) = @$subfield;
482 ($code eq $facets->[$k]->{'subfield'}) or next;
483 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
486 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
487 $facets->[$k]->{'label_value'};
488 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
489 $facets->[$k]->{'expanded'};
494 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
497 # warn "connection ", $i-1, ": $size hits";
498 # warn $results[$i-1]->record(0)->render() if $size > 0;
501 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
503 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
504 keys %$facets_counter )
507 my $number_of_facets;
508 my @this_facets_array;
511 $facets_counter->{$link_value}
512 ->{$b} <=> $facets_counter->{$link_value}->{$a}
513 } keys %{ $facets_counter->{$link_value} }
517 if ( ( $number_of_facets < 6 )
518 || ( $expanded_facet eq $link_value )
519 || ( $facets_info->{$link_value}->{'expanded'} ) )
522 # Sanitize the link value ), ( will cause errors with CCL,
523 my $facet_link_value = $one_facet;
524 $facet_link_value =~ s/(\(|\))/ /g;
526 # fix the length that will display in the label,
527 my $facet_label_value = $one_facet;
529 substr( $one_facet, 0, 20 ) . "..."
530 unless length($facet_label_value) <= 20;
532 # if it's a branch, label by the name, not the code,
533 if ( $link_value =~ /branch/ ) {
535 $branches->{$one_facet}->{'branchname'};
538 # but we're down with the whole label being in the link's title.
539 my $facet_title_value = $one_facet;
541 push @this_facets_array,
545 $facets_counter->{$link_value}
547 facet_label_value => $facet_label_value,
548 facet_title_value => $facet_title_value,
549 facet_link_value => $facet_link_value,
550 type_link_value => $link_value,
556 # handle expanded option
557 unless ( $facets_info->{$link_value}->{'expanded'} ) {
559 if ( ( $number_of_facets > 6 )
560 && ( $expanded_facet ne $link_value ) );
565 type_link_value => $link_value,
566 type_id => $link_value . "_id",
567 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
568 facets => \@this_facets_array,
569 expandable => $expandable,
570 expand => $link_value,
572 ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
577 return ( undef, $results_hashref, \@facets_loop );
582 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
583 $results_per_page, $offset, $expanded_facet, $branches,
587 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
589 $paz->search($simple_query);
593 my $results_hashref = {};
594 my $stats = XMLin($paz->stat);
595 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
597 # for a grouped search result, the number of hits
598 # is the number of groups returned; 'bib_hits' will have
599 # the total number of bibs.
600 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
601 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
603 HIT: foreach my $hit (@{ $results->{'hit'} }) {
604 my $recid = $hit->{recid}->[0];
606 my $work_title = $hit->{'md-work-title'}->[0];
608 if (exists $hit->{'md-work-author'}) {
609 $work_author = $hit->{'md-work-author'}->[0];
611 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
613 my $result_group = {};
614 $result_group->{'group_label'} = $group_label;
615 $result_group->{'group_merge_key'} = $recid;
618 if (exists $hit->{count}) {
619 $count = $hit->{count}->[0];
621 $result_group->{'group_count'} = $count;
623 for (my $i = 0; $i < $count; $i++) {
624 # FIXME -- may need to worry about diacritics here
625 my $rec = $paz->record($recid, $i);
626 push @{ $result_group->{'RECORDS'} }, $rec;
629 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
632 # pass through facets
633 my $termlist_xml = $paz->termlist('author,subject');
634 my $terms = XMLin($termlist_xml, forcearray => 1);
635 my @facets_loop = ();
636 #die Dumper($results);
637 # foreach my $list (sort keys %{ $terms->{'list'} }) {
639 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
641 # facet_label_value => $facet->{'name'}->[0],
644 # push @facets_loop, ( {
645 # type_label => $list,
646 # facets => \@facets,
650 return ( undef, $results_hashref, \@facets_loop );
654 sub _remove_stopwords {
655 my ( $operand, $index ) = @_;
656 my @stopwords_removed;
658 # phrase and exact-qualified indexes shouldn't have stopwords removed
659 if ( $index !~ m/phr|ext/ ) {
661 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
662 # we use IsAlpha unicode definition, to deal correctly with diacritics.
663 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
664 # is a stopword, we'd get "çon" and wouldn't find anything...
665 foreach ( keys %{ C4::Context->stopwords } ) {
666 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
668 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
670 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
671 $operand =~ s/^$_\P{IsAlpha}/ /gi;
672 $operand =~ s/\P{IsAlpha}$_$/ /gi;
673 $operand =~ s/$1//gi;
674 push @stopwords_removed, $_;
678 return ( $operand, \@stopwords_removed );
682 sub _detect_truncation {
683 my ( $operand, $index ) = @_;
684 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
687 my @wordlist = split( /\s/, $operand );
688 foreach my $word (@wordlist) {
689 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
690 push @rightlefttruncated, $word;
692 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
693 push @lefttruncated, $word;
695 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
696 push @righttruncated, $word;
698 elsif ( index( $word, "*" ) < 0 ) {
699 push @nontruncated, $word;
702 push @regexpr, $word;
706 \@nontruncated, \@righttruncated, \@lefttruncated,
707 \@rightlefttruncated, \@regexpr
712 sub _build_stemmed_operand {
716 # If operand contains a digit, it is almost certainly an identifier, and should
717 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
718 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
719 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
720 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
721 return $operand if $operand =~ /\d/;
723 # FIXME: the locale should be set based on the user's language and/or search choice
724 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
726 # FIXME: these should be stored in the db so the librarian can modify the behavior
727 $stemmer->add_exceptions(
734 my @words = split( / /, $operand );
735 my $stems = $stemmer->stem(@words);
736 for my $stem (@$stems) {
737 $stemmed_operand .= "$stem";
738 $stemmed_operand .= "?"
739 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
740 $stemmed_operand .= " ";
742 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
743 return $stemmed_operand;
747 sub _build_weighted_query {
749 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
750 # pretty well but could work much better if we had a smarter query parser
751 my ( $operand, $stemmed_operand, $index ) = @_;
752 my $stemming = C4::Context->preference("QueryStemming") || 0;
753 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
754 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
756 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
758 # Keyword, or, no index specified
759 if ( ( $index eq 'kw' ) || ( !$index ) ) {
761 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
762 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
763 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
764 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
765 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
766 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
767 if $fuzzy_enabled; # add fuzzy, word list
768 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
769 if ( $stemming and $stemmed_operand )
770 ; # add stemming, right truncation
771 $weighted_query .= " or wrdl,r9=\"$operand\"";
773 # embedded sorting: 0 a-z; 1 z-a
774 # $weighted_query .= ") or (sort1,aut=1";
777 # Barcode searches should skip this process
778 elsif ( $index eq 'bc' ) {
779 $weighted_query .= "bc=\"$operand\"";
782 # Authority-number searches should skip this process
783 elsif ( $index eq 'an' ) {
784 $weighted_query .= "an=\"$operand\"";
787 # If the index already has more than one qualifier, wrap the operand
788 # in quotes and pass it back (assumption is that the user knows what they
789 # are doing and won't appreciate us mucking up their query
790 elsif ( $index =~ ',' ) {
791 $weighted_query .= " $index=\"$operand\"";
794 #TODO: build better cases based on specific search indexes
796 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
797 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
798 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
800 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
803 $weighted_query .= "))"; # close rank specification
804 return $weighted_query;
810 $simple_query, $query_cgi,
812 $limit_cgi, $limit_desc,
813 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
815 Build queries and limits in CCL, CGI, Human,
816 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
818 See verbose embedded documentation.
824 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
826 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
829 my @operators = @$operators if $operators;
830 my @indexes = @$indexes if $indexes;
831 my @operands = @$operands if $operands;
832 my @limits = @$limits if $limits;
833 my @sort_by = @$sort_by if $sort_by;
835 my $stemming = C4::Context->preference("QueryStemming") || 0;
836 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
837 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
838 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
839 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
841 # no stemming/weight/fuzzy in NoZebra
842 if ( C4::Context->preference("NoZebra") ) {
848 my $query = $operands[0];
849 my $simple_query = $operands[0];
851 # initialize the variables we're passing back
860 my $stopwords_removed; # flag to determine if stopwords have been removed
862 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
864 if ( $query =~ /^ccl=/ ) {
865 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
867 if ( $query =~ /^cql=/ ) {
868 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
870 if ( $query =~ /^pqf=/ ) {
871 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
874 # pass nested queries directly
875 # FIXME: need better handling of some of these variables in this case
876 if ( $query =~ /(\(|\))/ ) {
878 undef, $query, $simple_query, $query_cgi,
879 $query, $limit, $limit_cgi, $limit_desc,
880 $stopwords_removed, 'ccl'
884 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
885 # query operands and indexes and add stemming, truncation, field weighting, etc.
886 # Once we do so, we'll end up with a value in $query, just like if we had an
887 # incoming $query from the user
890 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
892 ; # a flag used to keep track if there was a previous query
893 # if there was, we can apply the current operator
895 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
897 # COMBINE OPERANDS, INDEXES AND OPERATORS
898 if ( $operands[$i] ) {
900 # A flag to determine whether or not to add the index to the query
903 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
904 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
907 $remove_stopwords = 0;
909 my $operand = $operands[$i];
910 my $index = $indexes[$i];
912 # Add index-specific attributes
913 # Date of Publication
914 if ( $index eq 'yr' ) {
915 $index .= ",st-numeric";
917 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
920 # Date of Acquisition
921 elsif ( $index eq 'acqdate' ) {
922 $index .= ",st-date-normalized";
924 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
926 # ISBN,ISSN,Standard Number, don't need special treatment
927 elsif ( $index eq 'nb' || $index eq 'ns' ) {
930 $stemming, $auto_truncation,
931 $weight_fields, $fuzzy_enabled,
933 ) = ( 0, 0, 0, 0, 0 );
936 # Set default structure attribute (word list)
938 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
939 $struct_attr = ",wrdl";
942 # Some helpful index variants
943 my $index_plus = $index . $struct_attr . ":" if $index;
944 my $index_plus_comma = $index . $struct_attr . "," if $index;
945 if ($auto_truncation){
946 # FIXME Auto Truncation is only valid for LTR languages
948 # use C4::Languages qw(regex_lang_subtags get_bidi);
949 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
950 # my $current_lang = regex_lang_subtags($lang);
952 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
953 $index_plus_comma .= "rtrn:";
957 if ($remove_stopwords) {
958 ( $operand, $stopwords_removed ) =
959 _remove_stopwords( $operand, $index );
960 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
961 warn "REMOVED STOPWORDS: @$stopwords_removed"
962 if ( $stopwords_removed && $DEBUG );
966 my $truncated_operand;
967 my( $nontruncated, $righttruncated, $lefttruncated,
968 $rightlefttruncated, $regexpr
969 ) = _detect_truncation( $operand, $index );
971 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
976 scalar(@$righttruncated) + scalar(@$lefttruncated) +
977 scalar(@$rightlefttruncated) > 0 )
980 # Don't field weight or add the index to the query, we do it here
982 undef $weight_fields;
983 my $previous_truncation_operand;
984 if ( scalar(@$nontruncated) > 0 ) {
985 $truncated_operand .= "$index_plus @$nontruncated ";
986 $previous_truncation_operand = 1;
988 if ( scalar(@$righttruncated) > 0 ) {
989 $truncated_operand .= "and "
990 if $previous_truncation_operand;
991 $truncated_operand .=
992 "$index_plus_comma" . "rtrn:@$righttruncated ";
993 $previous_truncation_operand = 1;
995 if ( scalar(@$lefttruncated) > 0 ) {
996 $truncated_operand .= "and "
997 if $previous_truncation_operand;
998 $truncated_operand .=
999 "$index_plus_comma" . "ltrn:@$lefttruncated ";
1000 $previous_truncation_operand = 1;
1002 if ( scalar(@$rightlefttruncated) > 0 ) {
1003 $truncated_operand .= "and "
1004 if $previous_truncation_operand;
1005 $truncated_operand .=
1006 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1007 $previous_truncation_operand = 1;
1010 $operand = $truncated_operand if $truncated_operand;
1011 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1014 my $stemmed_operand;
1015 $stemmed_operand = _build_stemmed_operand($operand)
1017 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1019 # Handle Field Weighting
1020 my $weighted_operand;
1022 _build_weighted_query( $operand, $stemmed_operand, $index )
1024 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1025 $operand = $weighted_operand if $weight_fields;
1026 $indexes_set = 1 if $weight_fields;
1028 # If there's a previous operand, we need to add an operator
1029 if ($previous_operand) {
1031 # User-specified operator
1032 if ( $operators[ $i - 1 ] ) {
1033 $query .= " $operators[$i-1] ";
1034 $query .= " $index_plus " unless $indexes_set;
1035 $query .= " $operand";
1036 $query_cgi .= "&op=$operators[$i-1]";
1037 $query_cgi .= "&idx=$index" if $index;
1038 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1040 " $operators[$i-1] $index_plus $operands[$i]";
1043 # Default operator is and
1046 $query .= "$index_plus " unless $indexes_set;
1047 $query .= "$operand";
1048 $query_cgi .= "&op=and&idx=$index" if $index;
1049 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1050 $query_desc .= " and $index_plus $operands[$i]";
1054 # There isn't a pervious operand, don't need an operator
1057 # Field-weighted queries already have indexes set
1058 $query .= " $index_plus " unless $indexes_set;
1060 $query_desc .= " $index_plus $operands[$i]";
1061 $query_cgi .= "&idx=$index" if $index;
1062 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1063 $previous_operand = 1;
1068 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1071 my $group_OR_limits;
1072 my $availability_limit;
1073 foreach my $this_limit (@limits) {
1074 if ( $this_limit =~ /available/ ) {
1076 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1078 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1079 $availability_limit .=
1080 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1081 $limit_cgi .= "&limit=available";
1085 # group_OR_limits, prefixed by mc-
1086 # OR every member of the group
1087 elsif ( $this_limit =~ /mc/ ) {
1088 $group_OR_limits .= " or " if $group_OR_limits;
1089 $limit_desc .= " or " if $group_OR_limits;
1090 $group_OR_limits .= "$this_limit";
1091 $limit_cgi .= "&limit=$this_limit";
1092 $limit_desc .= " $this_limit";
1095 # Regular old limits
1097 $limit .= " and " if $limit || $query;
1098 $limit .= "$this_limit";
1099 $limit_cgi .= "&limit=$this_limit";
1100 if ($this_limit =~ /^branch:(.+)/) {
1101 my $branchcode = $1;
1102 my $branchname = GetBranchName($branchcode);
1103 if (defined $branchname) {
1104 $limit_desc .= " branch:$branchname";
1106 $limit_desc .= " $this_limit";
1109 $limit_desc .= " $this_limit";
1113 if ($group_OR_limits) {
1114 $limit .= " and " if ( $query || $limit );
1115 $limit .= "($group_OR_limits)";
1117 if ($availability_limit) {
1118 $limit .= " and " if ( $query || $limit );
1119 $limit .= "($availability_limit)";
1122 # Normalize the query and limit strings
1125 for ( $query, $query_desc, $limit, $limit_desc ) {
1126 $_ =~ s/ / /g; # remove extra spaces
1127 $_ =~ s/^ //g; # remove any beginning spaces
1128 $_ =~ s/ $//g; # remove any ending spaces
1129 $_ =~ s/==/=/g; # remove double == from query
1131 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1133 for ($query_cgi,$simple_query) {
1136 # append the limit to the query
1137 $query .= " " . $limit;
1141 warn "QUERY:" . $query;
1142 warn "QUERY CGI:" . $query_cgi;
1143 warn "QUERY DESC:" . $query_desc;
1144 warn "LIMIT:" . $limit;
1145 warn "LIMIT CGI:" . $limit_cgi;
1146 warn "LIMIT DESC:" . $limit_desc;
1147 warn "---------\nLeave buildQuery\n---------";
1150 undef, $query, $simple_query, $query_cgi,
1151 $query_desc, $limit, $limit_cgi, $limit_desc,
1152 $stopwords_removed, $query_type
1156 =head2 searchResults
1158 Format results in a form suitable for passing to the template
1162 # IMO this subroutine is pretty messy still -- it's responsible for
1163 # building the HTML output for the template
1165 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1166 my $dbh = C4::Context->dbh;
1170 # add search-term highlighting via <span>s on the search terms
1171 my $span_terms_hashref;
1172 for my $span_term ( split( / /, $searchdesc ) ) {
1173 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1174 $span_terms_hashref->{$span_term}++;
1177 #Build branchnames hash
1179 #get branch information.....
1182 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1183 ; # FIXME : use C4::Koha::GetBranches
1185 while ( my $bdata = $bsth->fetchrow_hashref ) {
1186 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1188 # FIXME - We build an authorised values hash here, using the default framework
1189 # though it is possible to have different authvals for different fws.
1191 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1193 # get notforloan authorised value list (see $shelflocations FIXME)
1194 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1196 #Build itemtype hash
1197 #find itemtype & itemtype image
1201 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1204 while ( my $bdata = $bsth->fetchrow_hashref ) {
1205 foreach (qw(description imageurl summary notforloan)) {
1206 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1210 #search item field code
1213 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1216 my ($itemtag) = $sth->fetchrow;
1218 ## find column names of items related to MARC
1219 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1221 my %subfieldstosearch;
1222 while ( ( my $column ) = $sth2->fetchrow ) {
1223 my ( $tagfield, $tagsubfield ) =
1224 &GetMarcFromKohaField( "items." . $column, "" );
1225 $subfieldstosearch{$column} = $tagsubfield;
1228 # handle which records to actually retrieve
1230 if ( $hits && $offset + $results_per_page <= $hits ) {
1231 $times = $offset + $results_per_page;
1234 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1237 # loop through all of the records we've retrieved
1238 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1239 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1240 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1241 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1242 $oldbiblio->{result_number} = $i + 1;
1244 # add imageurl to itemtype if there is one
1245 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1247 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1248 (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1250 $oldbiblio->{amazonisbn} = $aisbn;
1251 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1252 # Build summary if there is one (the summary is defined in the itemtypes table)
1253 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1254 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1255 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1256 my @fields = $marcrecord->fields();
1257 foreach my $field (@fields) {
1258 my $tag = $field->tag();
1259 my $tagvalue = $field->as_string();
1260 if (! utf8::is_utf8($tagvalue)) {
1261 utf8::decode($tagvalue);
1265 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1266 unless ( $tag < 10 ) {
1267 my @subf = $field->subfields;
1268 for my $i ( 0 .. $#subf ) {
1269 my $subfieldcode = $subf[$i][0];
1270 my $subfieldvalue = $subf[$i][1];
1271 if (! utf8::is_utf8($subfieldvalue)) {
1272 utf8::decode($subfieldvalue);
1274 my $tagsubf = $tag . $subfieldcode;
1276 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1281 $summary =~ s/\[(.*?)]//g;
1282 $summary =~ s/\n/<br\/>/g;
1283 $oldbiblio->{summary} = $summary;
1286 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1287 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1288 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1289 $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1290 # Add search-term highlighting to the whole record where they match using <span>s
1291 if (C4::Context->preference("OpacHighlightedWords")){
1292 my $searchhighlightblob;
1293 for my $highlight_field ( $marcrecord->fields ) {
1295 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1296 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1297 for my $subfield ($highlight_field->subfields()) {
1299 next if $subfield->[0] eq '9';
1300 my $field = $subfield->[1];
1301 for my $term ( keys %$span_terms_hashref ) {
1302 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1303 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1307 $searchhighlightblob .= $field . " ... " if $match;
1311 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1312 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1315 # Add search-term highlighting to the title, subtitle, etc. fields
1316 for my $term ( keys %$span_terms_hashref ) {
1317 my $old_term = $term;
1318 if ( length($term) > 3 ) {
1319 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1320 foreach(qw(title subtitle author publishercode place pages notes size)) {
1321 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1326 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1328 # Pull out the items fields
1329 my @fields = $marcrecord->field($itemtag);
1331 # Setting item statuses for display
1332 my @available_items_loop;
1333 my @onloan_items_loop;
1334 my @notforloan_items_loop;
1335 my @other_items_loop;
1337 my $available_items;
1339 my $notforloan_items;
1342 my $ordered_count = 0;
1343 my $available_count = 0;
1344 my $onloan_count = 0;
1345 my $notforloan_count = 0;
1346 my $longoverdue_count = 0;
1347 my $other_count = 0;
1348 my $wthdrawn_count = 0;
1349 my $itemlost_count = 0;
1350 my $itembinding_count = 0;
1351 my $itemdamaged_count = 0;
1352 my $item_in_transit_count = 0;
1353 my $can_place_holds = 0;
1354 my $items_count = scalar(@fields);
1356 ( C4::Context->preference('maxItemsinSearchResults') )
1357 ? C4::Context->preference('maxItemsinSearchResults') - 1
1360 # loop through every item
1361 foreach my $field (@fields) {
1364 # populate the items hash
1365 foreach my $code ( keys %subfieldstosearch ) {
1366 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1368 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1369 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1370 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1371 if ($item->{$hbranch}) {
1372 $item->{'branchname'} = $branches{$item->{$hbranch}};
1374 elsif ($item->{$otherbranch}) { # Last resort
1375 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1378 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1379 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1380 if ( $item->{onloan} ) {
1382 my $key = $prefix . $item->{due_date};
1383 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1384 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1385 $onloan_items->{$key}->{branchname} = $item->{branchname};
1386 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1387 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1388 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1389 $onloan_items->{$key}->{barcode} = $item->{barcode};
1390 # if something's checked out and lost, mark it as 'long overdue'
1391 if ( $item->{itemlost} ) {
1392 $onloan_items->{$prefix}->{longoverdue}++;
1393 $longoverdue_count++;
1394 } else { # can place holds as long as item isn't lost
1395 $can_place_holds = 1;
1399 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1403 if ( $item->{notforloan} == -1 ) {
1407 # is item in transit?
1408 my $transfertwhen = '';
1409 my ($transfertfrom, $transfertto);
1411 unless ($item->{wthdrawn}
1412 || $item->{itemlost}
1414 || $item->{notforloan}
1415 || $items_count > 20) {
1417 # A couple heuristics to limit how many times
1418 # we query the database for item transfer information, sacrificing
1419 # accuracy in some cases for speed;
1421 # 1. don't query if item has one of the other statuses
1422 # 2. don't check transit status if the bib has
1423 # more than 20 items
1425 # FIXME: to avoid having the query the database like this, and to make
1426 # the in transit status count as unavailable for search limiting,
1427 # should map transit status to record indexed in Zebra.
1429 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1432 # item is withdrawn, lost or damaged
1433 if ( $item->{wthdrawn}
1434 || $item->{itemlost}
1436 || $item->{notforloan}
1437 || ($transfertwhen ne ''))
1439 $wthdrawn_count++ if $item->{wthdrawn};
1440 $itemlost_count++ if $item->{itemlost};
1441 $itemdamaged_count++ if $item->{damaged};
1442 $item_in_transit_count++ if $transfertwhen ne '';
1443 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1445 my $key = $prefix . $item->{status};
1447 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1448 if($item->{notforloan} == 1){
1449 $notforloan_items->{$key}->{$_} = $item->{$_};
1451 $other_items->{$key}->{$_} = $item->{$_};
1454 if($item->{notforloan} == 1){
1455 $notforloan_count++;
1457 $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1458 $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1459 $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1460 $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1461 $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1462 $notforloan_items->{$key}->{barcode} = $item->{barcode};
1466 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1467 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1468 $other_items->{$key}->{count}++ if $item->{$hbranch};
1469 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1470 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1471 $other_items->{$key}->{barcode} = $item->{barcode};
1477 $can_place_holds = 1;
1479 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1480 foreach (qw(branchname itemcallnumber barcode)) {
1481 $available_items->{$prefix}->{$_} = $item->{$_};
1483 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1484 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1487 } # notforloan, item level and biblioitem level
1488 my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1490 ( C4::Context->preference('maxItemsinSearchResults') )
1491 ? C4::Context->preference('maxItemsinSearchResults') - 1
1493 for my $key ( sort keys %$onloan_items ) {
1494 (++$onloanitemscount > $maxitems) and last;
1495 push @onloan_items_loop, $onloan_items->{$key};
1497 for my $key ( sort keys %$other_items ) {
1498 (++$otheritemscount > $maxitems) and last;
1499 push @other_items_loop, $other_items->{$key};
1501 for my $key ( sort keys %$notforloan_items ) {
1502 (++$notforloanitemscount > $maxitems) and last;
1503 push @notforloan_items_loop, $notforloan_items->{$key};
1505 for my $key ( sort keys %$available_items ) {
1506 (++$availableitemscount > $maxitems) and last;
1507 push @available_items_loop, $available_items->{$key}
1510 # XSLT processing of some stuff
1511 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1512 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1513 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1516 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1517 $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1518 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1519 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1520 $oldbiblio->{items_count} = $items_count;
1521 $oldbiblio->{available_items_loop} = \@available_items_loop;
1522 $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1523 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1524 $oldbiblio->{other_items_loop} = \@other_items_loop;
1525 $oldbiblio->{availablecount} = $available_count;
1526 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1527 $oldbiblio->{onloancount} = $onloan_count;
1528 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1529 $oldbiblio->{notforloancount} = $notforloan_count;
1530 $oldbiblio->{othercount} = $other_count;
1531 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1532 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1533 $oldbiblio->{itemlostcount} = $itemlost_count;
1534 $oldbiblio->{damagedcount} = $itemdamaged_count;
1535 $oldbiblio->{intransitcount} = $item_in_transit_count;
1536 $oldbiblio->{orderedcount} = $ordered_count;
1537 $oldbiblio->{isbn} =~
1538 s/-//g; # deleting - in isbn to enable amazon content
1539 push( @newresults, $oldbiblio );
1544 #----------------------------------------------------------------------
1546 # Non-Zebra GetRecords#
1547 #----------------------------------------------------------------------
1551 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1557 $query, $simple_query, $sort_by_ref, $servers_ref,
1558 $results_per_page, $offset, $expanded_facet, $branches,
1561 warn "query =$query" if $DEBUG;
1562 my $result = NZanalyse($query);
1563 warn "results =$result" if $DEBUG;
1565 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1571 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1572 the list is built from an inverted index in the nozebra SQL table
1573 note that title is here only for convenience : the sorting will be very fast when requested on title
1574 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1579 my ( $string, $server ) = @_;
1580 # warn "---------" if $DEBUG;
1581 warn " NZanalyse" if $DEBUG;
1582 # warn "---------" if $DEBUG;
1584 # $server contains biblioserver or authorities, depending on what we search on.
1585 #warn "querying : $string on $server";
1586 $server = 'biblioserver' unless $server;
1588 # if we have a ", replace the content to discard temporarily any and/or/not inside
1590 if ( $string =~ /"/ ) {
1591 $string =~ s/"(.*?)"/__X__/;
1593 warn "commacontent : $commacontent" if $DEBUG;
1596 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1597 # then, call again NZanalyse with $left and $right
1598 # (recursive until we find a leaf (=> something without and/or/not)
1599 # delete repeated operator... Would then go in infinite loop
1600 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1603 #process parenthesis before.
1604 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1607 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1609 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1611 my $leftresult = NZanalyse( $left, $server );
1613 my $rightresult = NZanalyse( $right, $server );
1615 # OK, we have the results for right and left part of the query
1616 # depending of operand, intersect, union or exclude both lists
1617 # to get a result list
1618 if ( $operator eq ' and ' ) {
1619 return NZoperatorAND($leftresult,$rightresult);
1621 elsif ( $operator eq ' or ' ) {
1623 # just merge the 2 strings
1624 return $leftresult . $rightresult;
1626 elsif ( $operator eq ' not ' ) {
1627 return NZoperatorNOT($leftresult,$rightresult);
1631 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1635 warn "string :" . $string if $DEBUG;
1639 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1642 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1644 warn "no parenthesis. left : $left operator: $operator right: $right"
1647 # it's not a leaf, we have a and/or/not
1650 # reintroduce comma content if needed
1651 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1652 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1653 warn "node : $left / $operator / $right\n" if $DEBUG;
1654 my $leftresult = NZanalyse( $left, $server );
1655 my $rightresult = NZanalyse( $right, $server );
1656 warn " leftresult : $leftresult" if $DEBUG;
1657 warn " rightresult : $rightresult" if $DEBUG;
1658 # OK, we have the results for right and left part of the query
1659 # depending of operand, intersect, union or exclude both lists
1660 # to get a result list
1661 if ( $operator eq ' and ' ) {
1663 return NZoperatorAND($leftresult,$rightresult);
1665 elsif ( $operator eq ' or ' ) {
1667 # just merge the 2 strings
1668 return $leftresult . $rightresult;
1670 elsif ( $operator eq ' not ' ) {
1671 return NZoperatorNOT($leftresult,$rightresult);
1675 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1676 die "error : operand unknown : $operator for $string";
1679 # it's a leaf, do the real SQL query and return the result
1682 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1683 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1684 #remove trailing blank at the beginning
1686 warn "leaf:$string" if $DEBUG;
1688 # parse the string in in operator/operand/value again
1692 if ($string =~ /(.*)(>=|<=)(.*)/) {
1699 # warn "handling leaf... left:$left operator:$operator right:$right"
1701 unless ($operator) {
1702 if ($string =~ /(.*)(>|<|=)(.*)/) {
1707 "handling unless (operator)... left:$left operator:$operator right:$right"
1715 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1718 # automatic replace for short operators
1719 $left = 'title' if $left =~ '^ti$';
1720 $left = 'author' if $left =~ '^au$';
1721 $left = 'publisher' if $left =~ '^pb$';
1722 $left = 'subject' if $left =~ '^su$';
1723 $left = 'koha-Auth-Number' if $left =~ '^an$';
1724 $left = 'keyword' if $left =~ '^kw$';
1725 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1726 if ( $operator && $left ne 'keyword' ) {
1728 #do a specific search
1729 my $dbh = C4::Context->dbh;
1730 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1733 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1735 warn "$left / $operator / $right\n" if $DEBUG;
1737 # split each word, query the DB and build the biblionumbers result
1738 #sanitizing leftpart
1739 $left =~ s/^\s+|\s+$//;
1740 foreach ( split / /, $right ) {
1742 $_ =~ s/^\s+|\s+$//;
1744 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1745 $sth->execute( $server, $left, $_ )
1746 or warn "execute failed: $!";
1747 while ( my ( $line, $value ) = $sth->fetchrow ) {
1749 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1750 # otherwise, fill the result
1751 $biblionumbers .= $line
1752 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1753 warn "result : $value "
1754 . ( $right =~ /\d/ ) . "=="
1755 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1758 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1760 warn "NZAND" if $DEBUG;
1761 $results = NZoperatorAND($biblionumbers,$results);
1764 $results = $biblionumbers;
1770 #do a complete search (all indexes), if index='kw' do complete search too.
1771 my $dbh = C4::Context->dbh;
1774 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1777 # split each word, query the DB and build the biblionumbers result
1778 foreach ( split / /, $string ) {
1779 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1780 warn "search on all indexes on $_" if $DEBUG;
1783 $sth->execute( $server, $_ );
1784 while ( my $line = $sth->fetchrow ) {
1785 $biblionumbers .= $line;
1788 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1790 $results = NZoperatorAND($biblionumbers,$results);
1793 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1794 $results = $biblionumbers;
1798 warn "return : $results for LEAF : $string" if $DEBUG;
1801 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1805 my ($rightresult, $leftresult)=@_;
1807 my @leftresult = split /;/, $leftresult;
1808 warn " @leftresult / $rightresult \n" if $DEBUG;
1810 # my @rightresult = split /;/,$leftresult;
1813 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1814 # the result is stored twice, to have the same weight for AND than OR.
1815 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1816 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1817 foreach (@leftresult) {
1820 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1821 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1822 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1824 "$value-$countvalue;$value-$countvalue;";
1827 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1828 return $finalresult;
1832 my ($rightresult, $leftresult)=@_;
1833 return $rightresult.$leftresult;
1837 my ($leftresult, $rightresult)=@_;
1839 my @leftresult = split /;/, $leftresult;
1841 # my @rightresult = split /;/,$leftresult;
1843 foreach (@leftresult) {
1845 $value=$1 if $value=~m/(.*)-\d+$/;
1846 unless ($rightresult =~ "$value-") {
1847 $finalresult .= "$_;";
1850 return $finalresult;
1855 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1862 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1863 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1865 # order title asc by default
1866 # $ordering = '1=36 <i' unless $ordering;
1867 $results_per_page = 20 unless $results_per_page;
1868 $offset = 0 unless $offset;
1869 my $dbh = C4::Context->dbh;
1872 # order by POPULARITY
1874 if ( $ordering =~ /popularity/ ) {
1878 # popularity is not in MARC record, it's builded from a specific query
1880 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1881 foreach ( split /;/, $biblionumbers ) {
1882 my ( $biblionumber, $title ) = split /,/, $_;
1883 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1884 $sth->execute($biblionumber);
1885 my $popularity = $sth->fetchrow || 0;
1887 # hint : the key is popularity.title because we can have
1888 # many results with the same popularity. In this cas, sub-ordering is done by title
1889 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1890 # (un-frequent, I agree, but we won't forget anything that way ;-)
1891 $popularity{ sprintf( "%10d", $popularity ) . $title
1892 . $biblionumber } = $biblionumber;
1895 # sort the hash and return the same structure as GetRecords (Zebra querying)
1898 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1899 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1900 $result_hash->{'RECORDS'}[ $numbers++ ] =
1901 $result{ $popularity{$key} }->as_usmarc();
1904 else { # sort popularity ASC
1905 foreach my $key ( sort ( keys %popularity ) ) {
1906 $result_hash->{'RECORDS'}[ $numbers++ ] =
1907 $result{ $popularity{$key} }->as_usmarc();
1910 my $finalresult = ();
1911 $result_hash->{'hits'} = $numbers;
1912 $finalresult->{'biblioserver'} = $result_hash;
1913 return $finalresult;
1919 elsif ( $ordering =~ /author/ ) {
1921 foreach ( split /;/, $biblionumbers ) {
1922 my ( $biblionumber, $title ) = split /,/, $_;
1923 my $record = GetMarcBiblio($biblionumber);
1925 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1926 $author = $record->subfield( '200', 'f' );
1927 $author = $record->subfield( '700', 'a' ) unless $author;
1930 $author = $record->subfield( '100', 'a' );
1933 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1934 # and we don't want to get only 1 result for each of them !!!
1935 $result{ $author . $biblionumber } = $record;
1938 # sort the hash and return the same structure as GetRecords (Zebra querying)
1941 if ( $ordering eq 'author_za' ) { # sort by author desc
1942 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1943 $result_hash->{'RECORDS'}[ $numbers++ ] =
1944 $result{$key}->as_usmarc();
1947 else { # sort by author ASC
1948 foreach my $key ( sort ( keys %result ) ) {
1949 $result_hash->{'RECORDS'}[ $numbers++ ] =
1950 $result{$key}->as_usmarc();
1953 my $finalresult = ();
1954 $result_hash->{'hits'} = $numbers;
1955 $finalresult->{'biblioserver'} = $result_hash;
1956 return $finalresult;
1959 # ORDER BY callnumber
1962 elsif ( $ordering =~ /callnumber/ ) {
1964 foreach ( split /;/, $biblionumbers ) {
1965 my ( $biblionumber, $title ) = split /,/, $_;
1966 my $record = GetMarcBiblio($biblionumber);
1968 my ( $callnumber_tag, $callnumber_subfield ) =
1969 GetMarcFromKohaField( 'items.itemcallnumber','' );
1970 ( $callnumber_tag, $callnumber_subfield ) =
1971 GetMarcFromKohaField('biblioitems.callnumber','')
1972 unless $callnumber_tag;
1973 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1974 $callnumber = $record->subfield( '200', 'f' );
1977 $callnumber = $record->subfield( '100', 'a' );
1980 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1981 # and we don't want to get only 1 result for each of them !!!
1982 $result{ $callnumber . $biblionumber } = $record;
1985 # sort the hash and return the same structure as GetRecords (Zebra querying)
1988 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1989 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1990 $result_hash->{'RECORDS'}[ $numbers++ ] =
1991 $result{$key}->as_usmarc();
1994 else { # sort by title ASC
1995 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1996 $result_hash->{'RECORDS'}[ $numbers++ ] =
1997 $result{$key}->as_usmarc();
2000 my $finalresult = ();
2001 $result_hash->{'hits'} = $numbers;
2002 $finalresult->{'biblioserver'} = $result_hash;
2003 return $finalresult;
2005 elsif ( $ordering =~ /pubdate/ ) { #pub year
2007 foreach ( split /;/, $biblionumbers ) {
2008 my ( $biblionumber, $title ) = split /,/, $_;
2009 my $record = GetMarcBiblio($biblionumber);
2010 my ( $publicationyear_tag, $publicationyear_subfield ) =
2011 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2012 my $publicationyear =
2013 $record->subfield( $publicationyear_tag,
2014 $publicationyear_subfield );
2016 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2017 # and we don't want to get only 1 result for each of them !!!
2018 $result{ $publicationyear . $biblionumber } = $record;
2021 # sort the hash and return the same structure as GetRecords (Zebra querying)
2024 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
2025 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2026 $result_hash->{'RECORDS'}[ $numbers++ ] =
2027 $result{$key}->as_usmarc();
2030 else { # sort by pub year ASC
2031 foreach my $key ( sort ( keys %result ) ) {
2032 $result_hash->{'RECORDS'}[ $numbers++ ] =
2033 $result{$key}->as_usmarc();
2036 my $finalresult = ();
2037 $result_hash->{'hits'} = $numbers;
2038 $finalresult->{'biblioserver'} = $result_hash;
2039 return $finalresult;
2045 elsif ( $ordering =~ /title/ ) {
2047 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2049 foreach ( split /;/, $biblionumbers ) {
2050 my ( $biblionumber, $title ) = split /,/, $_;
2052 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2053 # and we don't want to get only 1 result for each of them !!!
2054 # hint & speed improvement : we can order without reading the record
2055 # so order, and read records only for the requested page !
2056 $result{ $title . $biblionumber } = $biblionumber;
2059 # sort the hash and return the same structure as GetRecords (Zebra querying)
2062 if ( $ordering eq 'title_az' ) { # sort by title desc
2063 foreach my $key ( sort ( keys %result ) ) {
2064 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2067 else { # sort by title ASC
2068 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2069 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2073 # limit the $results_per_page to result size if it's more
2074 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2076 # for the requested page, replace biblionumber by the complete record
2077 # speed improvement : avoid reading too much things
2079 my $counter = $offset ;
2080 $counter <= $offset + $results_per_page ;
2084 $result_hash->{'RECORDS'}[$counter] =
2085 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2087 my $finalresult = ();
2088 $result_hash->{'hits'} = $numbers;
2089 $finalresult->{'biblioserver'} = $result_hash;
2090 return $finalresult;
2097 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2100 foreach ( split /;/, $biblionumbers ) {
2101 my ( $biblionumber, $title ) = split /,/, $_;
2102 $title =~ /(.*)-(\d)/;
2107 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2108 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2109 # biblio N has ranking = 6
2110 $count_ranking{$biblionumber} += $ranking;
2113 # build the result by "inverting" the count_ranking hash
2114 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2116 foreach ( keys %count_ranking ) {
2117 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2120 # sort the hash and return the same structure as GetRecords (Zebra querying)
2123 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2124 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2127 # limit the $results_per_page to result size if it's more
2128 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2130 # for the requested page, replace biblionumber by the complete record
2131 # speed improvement : avoid reading too much things
2133 my $counter = $offset ;
2134 $counter <= $offset + $results_per_page ;
2138 $result_hash->{'RECORDS'}[$counter] =
2139 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2140 if $result_hash->{'RECORDS'}[$counter];
2142 my $finalresult = ();
2143 $result_hash->{'hits'} = $numbers;
2144 $finalresult->{'biblioserver'} = $result_hash;
2145 return $finalresult;
2149 END { } # module clean-up code here (global destructor)
2156 Koha Developement team <info@koha.org>