3 # This file is part of Koha.
5 # Koha is free software; you can redistribute it and/or modify it under the
6 # terms of the GNU General Public License as published by the Free Software
7 # Foundation; either version 2 of the License, or (at your option) any later
10 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 # You should have received a copy of the GNU General Public License along with
15 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 # Suite 330, Boston, MA 02111-1307 USA
19 # use warnings; # FIXME
22 use C4::Biblio; # GetMarcFromKohaField
23 use C4::Koha; # getFacets
25 use C4::Search::PazPar2;
27 use C4::Dates qw(format_date);
30 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
32 # set the version for version checking
35 $DEBUG = ($ENV{DEBUG}) ? 1 : 0;
40 C4::Search - Functions for searching the Koha catalog.
44 See opac/opac-search.pl or catalogue/search.pl for example of usage
48 This module provides searching functions for Koha's bibliographic databases
64 # make all your functions, whether exported or not;
68 ($biblionumber,$biblionumber,$title) = FindDuplicate($record);
70 This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm
76 my $dbh = C4::Context->dbh;
77 my $result = TransformMarcToKoha( $dbh, $record, '' );
82 my ( $biblionumber, $title );
84 # search duplicate on ISBN, easy and fast..
86 if ( $result->{isbn} ) {
87 $result->{isbn} =~ s/\(.*$//;
88 $result->{isbn} =~ s/\s+$//;
89 $query = "isbn=$result->{isbn}";
92 $result->{title} =~ s /\\//g;
93 $result->{title} =~ s /\"//g;
94 $result->{title} =~ s /\(//g;
95 $result->{title} =~ s /\)//g;
97 # FIXME: instead of removing operators, could just do
98 # quotes around the value
99 $result->{title} =~ s/(and|or|not)//g;
100 $query = "ti,ext=$result->{title}";
101 $query .= " and itemtype=$result->{itemtype}"
102 if ( $result->{itemtype} );
103 if ( $result->{author} ) {
104 $result->{author} =~ s /\\//g;
105 $result->{author} =~ s /\"//g;
106 $result->{author} =~ s /\(//g;
107 $result->{author} =~ s /\)//g;
109 # remove valid operators
110 $result->{author} =~ s/(and|or|not)//g;
111 $query .= " and au,ext=$result->{author}";
115 # FIXME: add error handling
116 my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded !
118 foreach my $possible_duplicate_record (@$searchresults) {
120 MARC::Record->new_from_usmarc($possible_duplicate_record);
121 my $result = TransformMarcToKoha( $dbh, $marcrecord, '' );
123 # FIXME :: why 2 $biblionumber ?
125 push @results, $result->{'biblionumber'};
126 push @results, $result->{'title'};
134 ( $error, $results, $total_hits ) = SimpleSearch( $query, $offset, $max_results, [@servers] );
136 This function provides a simple search API on the bibliographic catalog
142 * $query can be a simple keyword or a complete CCL query
143 * @servers is optional. Defaults to biblioserver as found in koha-conf.xml
144 * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0
145 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef.
150 * $error is a empty unless an error is detected
151 * \@results is an array of records.
152 * $total_hits is the number of hits that would have been returned with no limit
154 =item C<usage in the script:>
158 my ( $error, $marcresults, $total_hits ) = SimpleSearch($query);
160 if (defined $error) {
161 $template->param(query_error => $error);
162 warn "error: ".$error;
163 output_html_with_http_headers $input, $cookie, $template->output;
167 my $hits = scalar @$marcresults;
170 for my $i (0..$hits) {
172 my $marcrecord = MARC::File::USMARC::decode($marcresults->[$i]);
173 my $biblio = TransformMarcToKoha(C4::Context->dbh,$marcrecord,'');
175 #build the hash for the template.
176 $resultsloop{highlight} = ($i % 2)?(1):(0);
177 $resultsloop{title} = $biblio->{'title'};
178 $resultsloop{subtitle} = $biblio->{'subtitle'};
179 $resultsloop{biblionumber} = $biblio->{'biblionumber'};
180 $resultsloop{author} = $biblio->{'author'};
181 $resultsloop{publishercode} = $biblio->{'publishercode'};
182 $resultsloop{publicationyear} = $biblio->{'publicationyear'};
184 push @results, \%resultsloop;
187 $template->param(result=>\@results);
192 my ( $query, $offset, $max_results, $servers ) = @_;
194 if ( C4::Context->preference('NoZebra') ) {
195 my $result = NZorder( NZanalyse($query) )->{'biblioserver'};
198 && $result->{hits} > 0 ? $result->{'RECORDS'} : [] );
199 return ( undef, $search_result, scalar($result->{hits}) );
202 # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too.
203 my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" );
209 return ( "No query entered", undef, undef ) unless $query;
211 # Initialize & Search Zebra
212 for ( my $i = 0 ; $i < @servers ; $i++ ) {
214 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
215 $zoom_queries[$i] = new ZOOM::Query::CCL2RPN( $query, $zconns[$i]);
216 $tmpresults[$i] = $zconns[$i]->search( $zoom_queries[$i] );
220 $zconns[$i]->errmsg() . " ("
221 . $zconns[$i]->errcode() . ") "
222 . $zconns[$i]->addinfo() . " "
223 . $zconns[$i]->diagset();
225 return ( $error, undef, undef ) if $zconns[$i]->errcode();
229 # caught a ZOOM::Exception
233 . $@->addinfo() . " "
236 return ( $error, undef, undef );
239 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
240 my $event = $zconns[ $i - 1 ]->last_event();
241 if ( $event == ZOOM::Event::ZEND ) {
243 my $first_record = defined( $offset ) ? $offset+1 : 1;
244 my $hits = $tmpresults[ $i - 1 ]->size();
245 $total_hits += $hits;
246 my $last_record = $hits;
247 if ( defined $max_results && $offset + $max_results < $hits ) {
248 $last_record = $offset + $max_results;
251 for my $j ( $first_record..$last_record ) {
252 my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed
253 push @results, $record;
258 foreach my $result (@tmpresults) {
261 foreach my $zoom_query (@zoom_queries) {
262 $zoom_query->destroy();
265 return ( undef, \@results, $total_hits );
271 ( undef, $results_hashref, \@facets_loop ) = getRecords (
273 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
274 $results_per_page, $offset, $expanded_facet, $branches,
278 The all singing, all dancing, multi-server, asynchronous, scanning,
279 searching, record nabbing, facet-building
281 See verbse embedded documentation.
287 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
288 $results_per_page, $offset, $expanded_facet, $branches,
292 my @servers = @$servers_ref;
293 my @sort_by = @$sort_by_ref;
295 # Initialize variables for the ZOOM connection and results object
299 my $results_hashref = ();
301 # Initialize variables for the faceted results objects
302 my $facets_counter = ();
303 my $facets_info = ();
304 my $facets = getFacets();
307 ; # stores the ref to array of hashes for template facets loop
309 ### LOOP THROUGH THE SERVERS
310 for ( my $i = 0 ; $i < @servers ; $i++ ) {
311 $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 );
313 # perform the search, create the results objects
314 # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query
315 my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query;
317 #$query_to_use = $simple_query if $scan;
318 warn $simple_query if ( $scan and $DEBUG );
320 # Check if we've got a query_type defined, if so, use it
324 if ( $query_type =~ /^ccl/ ) {
326 s/\:/\=/g; # change : to = last minute (FIXME)
329 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
332 elsif ( $query_type =~ /^cql/ ) {
335 new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) );
337 elsif ( $query_type =~ /^pqf/ ) {
340 new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) );
347 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
353 new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] )
359 warn "WARNING: query problem with $query_to_use " . $@;
362 # Concatenate the sort_by limits and pass them to the results object
363 # Note: sort will override rank
365 foreach my $sort (@sort_by) {
366 if ( $sort eq "author_az" ) {
367 $sort_by .= "1=1003 <i ";
369 elsif ( $sort eq "author_za" ) {
370 $sort_by .= "1=1003 >i ";
372 elsif ( $sort eq "popularity_asc" ) {
373 $sort_by .= "1=9003 <i ";
375 elsif ( $sort eq "popularity_dsc" ) {
376 $sort_by .= "1=9003 >i ";
378 elsif ( $sort eq "call_number_asc" ) {
379 $sort_by .= "1=20 <i ";
381 elsif ( $sort eq "call_number_dsc" ) {
382 $sort_by .= "1=20 >i ";
384 elsif ( $sort eq "pubdate_asc" ) {
385 $sort_by .= "1=31 <i ";
387 elsif ( $sort eq "pubdate_dsc" ) {
388 $sort_by .= "1=31 >i ";
390 elsif ( $sort eq "acqdate_asc" ) {
391 $sort_by .= "1=32 <i ";
393 elsif ( $sort eq "acqdate_dsc" ) {
394 $sort_by .= "1=32 >i ";
396 elsif ( $sort eq "title_az" ) {
397 $sort_by .= "1=4 <i ";
399 elsif ( $sort eq "title_za" ) {
400 $sort_by .= "1=4 >i ";
403 warn "Ignoring unrecognized sort '$sort' requested" if $sort_by;
407 if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) {
408 warn "WARNING sort $sort_by failed";
411 } # finished looping through servers
413 # The big moment: asynchronously retrieve results from all servers
414 while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) {
415 my $ev = $zconns[ $i - 1 ]->last_event();
416 if ( $ev == ZOOM::Event::ZEND ) {
417 next unless $results[ $i - 1 ];
418 my $size = $results[ $i - 1 ]->size();
422 # loop through the results
423 $results_hash->{'hits'} = $size;
425 if ( $offset + $results_per_page <= $size ) {
426 $times = $offset + $results_per_page;
431 for ( my $j = $offset ; $j < $times ; $j++ ) {
436 ## Check if it's an index scan
438 my ( $term, $occ ) = $results[ $i - 1 ]->term($j);
440 # here we create a minimal MARC record and hand it off to the
441 # template just like a normal result ... perhaps not ideal, but
443 my $tmprecord = MARC::Record->new();
444 $tmprecord->encoding('UTF-8');
448 # the minimal record in author/title (depending on MARC flavour)
449 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
450 $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ);
451 $tmprecord->append_fields($tmptitle);
453 $tmptitle = MARC::Field->new('245',' ',' ', a => $term,);
454 $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,);
455 $tmprecord->append_fields($tmptitle);
456 $tmprecord->append_fields($tmpauthor);
458 $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc();
463 $record = $results[ $i - 1 ]->record($j)->raw();
465 # warn "RECORD $j:".$record;
466 $results_hash->{'RECORDS'}[$j] = $record;
468 # Fill the facets while we're looping, but only for the biblioserver
469 $facet_record = MARC::Record->new_from_usmarc($record)
470 if $servers[ $i - 1 ] =~ /biblioserver/;
472 #warn $servers[$i-1]."\n".$record; #.$facet_record->title();
474 for ( my $k = 0 ; $k <= @$facets ; $k++ ) {
475 ($facets->[$k]) or next;
476 my @fields = map {$facet_record->field($_)} @{$facets->[$k]->{'tags'}} ;
477 for my $field (@fields) {
478 my @subfields = $field->subfields();
479 for my $subfield (@subfields) {
480 my ( $code, $data ) = @$subfield;
481 ($code eq $facets->[$k]->{'subfield'}) or next;
482 $facets_counter->{ $facets->[$k]->{'link_value'} }->{$data}++;
485 $facets_info->{ $facets->[$k]->{'link_value'} }->{'label_value'} =
486 $facets->[$k]->{'label_value'};
487 $facets_info->{ $facets->[$k]->{'link_value'} }->{'expanded'} =
488 $facets->[$k]->{'expanded'};
493 $results_hashref->{ $servers[ $i - 1 ] } = $results_hash;
496 # warn "connection ", $i-1, ": $size hits";
497 # warn $results[$i-1]->record(0)->render() if $size > 0;
500 if ( $servers[ $i - 1 ] =~ /biblioserver/ ) {
502 sort { $facets_counter->{$b} <=> $facets_counter->{$a} }
503 keys %$facets_counter )
506 my $number_of_facets;
507 my @this_facets_array;
510 $facets_counter->{$link_value}
511 ->{$b} <=> $facets_counter->{$link_value}->{$a}
512 } keys %{ $facets_counter->{$link_value} }
516 if ( ( $number_of_facets < 6 )
517 || ( $expanded_facet eq $link_value )
518 || ( $facets_info->{$link_value}->{'expanded'} ) )
521 # Sanitize the link value ), ( will cause errors with CCL,
522 my $facet_link_value = $one_facet;
523 $facet_link_value =~ s/(\(|\))/ /g;
525 # fix the length that will display in the label,
526 my $facet_label_value = $one_facet;
528 substr( $one_facet, 0, 20 ) . "..."
529 unless length($facet_label_value) <= 20;
531 # if it's a branch, label by the name, not the code,
532 if ( $link_value =~ /branch/ ) {
534 $branches->{$one_facet}->{'branchname'};
537 # but we're down with the whole label being in the link's title.
538 my $facet_title_value = $one_facet;
540 push @this_facets_array,
544 $facets_counter->{$link_value}
546 facet_label_value => $facet_label_value,
547 facet_title_value => $facet_title_value,
548 facet_link_value => $facet_link_value,
549 type_link_value => $link_value,
555 # handle expanded option
556 unless ( $facets_info->{$link_value}->{'expanded'} ) {
558 if ( ( $number_of_facets > 6 )
559 && ( $expanded_facet ne $link_value ) );
564 type_link_value => $link_value,
565 type_id => $link_value . "_id",
566 "type_label_" . $facets_info->{$link_value}->{'label_value'} => 1,
567 facets => \@this_facets_array,
568 expandable => $expandable,
569 expand => $link_value,
571 ) unless ( ($facets_info->{$link_value}->{'label_value'} =~ /Libraries/) and (C4::Context->preference('singleBranchMode')) );
576 return ( undef, $results_hashref, \@facets_loop );
581 $koha_query, $simple_query, $sort_by_ref, $servers_ref,
582 $results_per_page, $offset, $expanded_facet, $branches,
586 my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url'));
588 $paz->search($simple_query);
592 my $results_hashref = {};
593 my $stats = XMLin($paz->stat);
594 my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1);
596 # for a grouped search result, the number of hits
597 # is the number of groups returned; 'bib_hits' will have
598 # the total number of bibs.
599 $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0];
600 $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'};
602 HIT: foreach my $hit (@{ $results->{'hit'} }) {
603 my $recid = $hit->{recid}->[0];
605 my $work_title = $hit->{'md-work-title'}->[0];
607 if (exists $hit->{'md-work-author'}) {
608 $work_author = $hit->{'md-work-author'}->[0];
610 my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title;
612 my $result_group = {};
613 $result_group->{'group_label'} = $group_label;
614 $result_group->{'group_merge_key'} = $recid;
617 if (exists $hit->{count}) {
618 $count = $hit->{count}->[0];
620 $result_group->{'group_count'} = $count;
622 for (my $i = 0; $i < $count; $i++) {
623 # FIXME -- may need to worry about diacritics here
624 my $rec = $paz->record($recid, $i);
625 push @{ $result_group->{'RECORDS'} }, $rec;
628 push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group;
631 # pass through facets
632 my $termlist_xml = $paz->termlist('author,subject');
633 my $terms = XMLin($termlist_xml, forcearray => 1);
634 my @facets_loop = ();
635 #die Dumper($results);
636 # foreach my $list (sort keys %{ $terms->{'list'} }) {
638 # foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) {
640 # facet_label_value => $facet->{'name'}->[0],
643 # push @facets_loop, ( {
644 # type_label => $list,
645 # facets => \@facets,
649 return ( undef, $results_hashref, \@facets_loop );
653 sub _remove_stopwords {
654 my ( $operand, $index ) = @_;
655 my @stopwords_removed;
657 # phrase and exact-qualified indexes shouldn't have stopwords removed
658 if ( $index !~ m/phr|ext/ ) {
660 # remove stopwords from operand : parse all stopwords & remove them (case insensitive)
661 # we use IsAlpha unicode definition, to deal correctly with diacritics.
662 # otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
663 # is a stopword, we'd get "çon" and wouldn't find anything...
664 foreach ( keys %{ C4::Context->stopwords } ) {
665 next if ( $_ =~ /(and|or|not)/ ); # don't remove operators
667 /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$|^$_$)/ )
669 $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi;
670 $operand =~ s/^$_\P{IsAlpha}/ /gi;
671 $operand =~ s/\P{IsAlpha}$_$/ /gi;
672 $operand =~ s/$1//gi;
673 push @stopwords_removed, $_;
677 return ( $operand, \@stopwords_removed );
681 sub _detect_truncation {
682 my ( $operand, $index ) = @_;
683 my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated,
686 my @wordlist = split( /\s/, $operand );
687 foreach my $word (@wordlist) {
688 if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) {
689 push @rightlefttruncated, $word;
691 elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) {
692 push @lefttruncated, $word;
694 elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) {
695 push @righttruncated, $word;
697 elsif ( index( $word, "*" ) < 0 ) {
698 push @nontruncated, $word;
701 push @regexpr, $word;
705 \@nontruncated, \@righttruncated, \@lefttruncated,
706 \@rightlefttruncated, \@regexpr
711 sub _build_stemmed_operand {
715 # If operand contains a digit, it is almost certainly an identifier, and should
716 # not be stemmed. This is particularly relevant for ISBNs and ISSNs, which
717 # can contain the letter "X" - for example, _build_stemmend_operand would reduce
718 # "014100018X" to "x ", which for a MARC21 database would bring up irrelevant
719 # results (e.g., "23 x 29 cm." from the 300$c). Bug 2098.
720 return $operand if $operand =~ /\d/;
722 # FIXME: the locale should be set based on the user's language and/or search choice
723 my $stemmer = Lingua::Stem->new( -locale => 'EN-US' );
725 # FIXME: these should be stored in the db so the librarian can modify the behavior
726 $stemmer->add_exceptions(
733 my @words = split( / /, $operand );
734 my $stems = $stemmer->stem(@words);
735 for my $stem (@$stems) {
736 $stemmed_operand .= "$stem";
737 $stemmed_operand .= "?"
738 unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 );
739 $stemmed_operand .= " ";
741 warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG;
742 return $stemmed_operand;
746 sub _build_weighted_query {
748 # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works
749 # pretty well but could work much better if we had a smarter query parser
750 my ( $operand, $stemmed_operand, $index ) = @_;
751 my $stemming = C4::Context->preference("QueryStemming") || 0;
752 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
753 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
755 my $weighted_query .= "(rk=("; # Specifies that we're applying rank
757 # Keyword, or, no index specified
758 if ( ( $index eq 'kw' ) || ( !$index ) ) {
760 "Title-cover,ext,r1=\"$operand\""; # exact title-cover
761 $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title
762 $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title
763 #$weighted_query .= " or any,ext,r4=$operand"; # exact any
764 #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any
765 $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\""
766 if $fuzzy_enabled; # add fuzzy, word list
767 $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\""
768 if ( $stemming and $stemmed_operand )
769 ; # add stemming, right truncation
770 $weighted_query .= " or wrdl,r9=\"$operand\"";
772 # embedded sorting: 0 a-z; 1 z-a
773 # $weighted_query .= ") or (sort1,aut=1";
776 # Barcode searches should skip this process
777 elsif ( $index eq 'bc' ) {
778 $weighted_query .= "bc=\"$operand\"";
781 # Authority-number searches should skip this process
782 elsif ( $index eq 'an' ) {
783 $weighted_query .= "an=\"$operand\"";
786 # If the index already has more than one qualifier, wrap the operand
787 # in quotes and pass it back (assumption is that the user knows what they
788 # are doing and won't appreciate us mucking up their query
789 elsif ( $index =~ ',' ) {
790 $weighted_query .= " $index=\"$operand\"";
793 #TODO: build better cases based on specific search indexes
795 $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index
796 #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)";
797 $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index
799 " or $index,rt,wrdl,r3=\"$operand\""; # word list index
802 $weighted_query .= "))"; # close rank specification
803 return $weighted_query;
809 $simple_query, $query_cgi,
811 $limit_cgi, $limit_desc,
812 $stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan);
814 Build queries and limits in CCL, CGI, Human,
815 handle truncation, stemming, field weighting, stopwords, fuzziness, etc.
817 See verbose embedded documentation.
823 my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_;
825 warn "---------\nEnter buildQuery\n---------" if $DEBUG;
828 my @operators = @$operators if $operators;
829 my @indexes = @$indexes if $indexes;
830 my @operands = @$operands if $operands;
831 my @limits = @$limits if $limits;
832 my @sort_by = @$sort_by if $sort_by;
834 my $stemming = C4::Context->preference("QueryStemming") || 0;
835 my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0;
836 my $weight_fields = C4::Context->preference("QueryWeightFields") || 0;
837 my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0;
838 my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0;
840 # no stemming/weight/fuzzy in NoZebra
841 if ( C4::Context->preference("NoZebra") ) {
847 my $query = $operands[0];
848 my $simple_query = $operands[0];
850 # initialize the variables we're passing back
859 my $stopwords_removed; # flag to determine if stopwords have been removed
861 # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
863 if ( $query =~ /^ccl=/ ) {
864 return ( undef, $', $', "q=ccl=$'", $', '', '', '', '', 'ccl' );
866 if ( $query =~ /^cql=/ ) {
867 return ( undef, $', $', "q=cql=$'", $', '', '', '', '', 'cql' );
869 if ( $query =~ /^pqf=/ ) {
870 return ( undef, $', $', "q=pqf=$'", $', '', '', '', '', 'pqf' );
873 # pass nested queries directly
874 # FIXME: need better handling of some of these variables in this case
875 if ( $query =~ /(\(|\))/ ) {
877 undef, $query, $simple_query, $query_cgi,
878 $query, $limit, $limit_cgi, $limit_desc,
879 $stopwords_removed, 'ccl'
883 # Form-based queries are non-nested and fixed depth, so we can easily modify the incoming
884 # query operands and indexes and add stemming, truncation, field weighting, etc.
885 # Once we do so, we'll end up with a value in $query, just like if we had an
886 # incoming $query from the user
889 ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query
891 ; # a flag used to keep track if there was a previous query
892 # if there was, we can apply the current operator
894 for ( my $i = 0 ; $i <= @operands ; $i++ ) {
896 # COMBINE OPERANDS, INDEXES AND OPERATORS
897 if ( $operands[$i] ) {
899 # A flag to determine whether or not to add the index to the query
902 # If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling
903 if ( $operands[$i] =~ /(:|=)/ || $scan ) {
906 $remove_stopwords = 0;
908 my $operand = $operands[$i];
909 my $index = $indexes[$i];
911 # Add index-specific attributes
912 # Date of Publication
913 if ( $index eq 'yr' ) {
914 $index .= ",st-numeric";
916 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
919 # Date of Acquisition
920 elsif ( $index eq 'acqdate' ) {
921 $index .= ",st-date-normalized";
923 $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0;
925 # ISBN,ISSN,Standard Number, don't need special treatment
926 elsif ( $index eq 'nb' || $index eq 'ns' ) {
929 $stemming, $auto_truncation,
930 $weight_fields, $fuzzy_enabled,
932 ) = ( 0, 0, 0, 0, 0 );
935 # Set default structure attribute (word list)
937 unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
938 $struct_attr = ",wrdl";
941 # Some helpful index variants
942 my $index_plus = $index . $struct_attr . ":" if $index;
943 my $index_plus_comma = $index . $struct_attr . "," if $index;
944 if ($auto_truncation){
945 # FIXME Auto Truncation is only valid for LTR languages
947 # use C4::Languages qw(regex_lang_subtags get_bidi);
948 # $lang = $query->cookie('KohaOpacLanguage') if (defined $query && $query->cookie('KohaOpacLanguage'));
949 # my $current_lang = regex_lang_subtags($lang);
951 # $bidi = get_bidi($current_lang->{script}) if $current_lang->{script};
952 $index_plus_comma .= "rtrn:";
956 if ($remove_stopwords) {
957 ( $operand, $stopwords_removed ) =
958 _remove_stopwords( $operand, $index );
959 warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG;
960 warn "REMOVED STOPWORDS: @$stopwords_removed"
961 if ( $stopwords_removed && $DEBUG );
965 my $truncated_operand;
966 my( $nontruncated, $righttruncated, $lefttruncated,
967 $rightlefttruncated, $regexpr
968 ) = _detect_truncation( $operand, $index );
970 "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<"
975 scalar(@$righttruncated) + scalar(@$lefttruncated) +
976 scalar(@$rightlefttruncated) > 0 )
979 # Don't field weight or add the index to the query, we do it here
981 undef $weight_fields;
982 my $previous_truncation_operand;
983 if ( scalar(@$nontruncated) > 0 ) {
984 $truncated_operand .= "$index_plus @$nontruncated ";
985 $previous_truncation_operand = 1;
987 if ( scalar(@$righttruncated) > 0 ) {
988 $truncated_operand .= "and "
989 if $previous_truncation_operand;
990 $truncated_operand .=
991 "$index_plus_comma" . "rtrn:@$righttruncated ";
992 $previous_truncation_operand = 1;
994 if ( scalar(@$lefttruncated) > 0 ) {
995 $truncated_operand .= "and "
996 if $previous_truncation_operand;
997 $truncated_operand .=
998 "$index_plus_comma" . "ltrn:@$lefttruncated ";
999 $previous_truncation_operand = 1;
1001 if ( scalar(@$rightlefttruncated) > 0 ) {
1002 $truncated_operand .= "and "
1003 if $previous_truncation_operand;
1004 $truncated_operand .=
1005 "$index_plus_comma" . "rltrn:@$rightlefttruncated ";
1006 $previous_truncation_operand = 1;
1009 $operand = $truncated_operand if $truncated_operand;
1010 warn "TRUNCATED OPERAND: >$truncated_operand<" if $DEBUG;
1013 my $stemmed_operand;
1014 $stemmed_operand = _build_stemmed_operand($operand)
1016 warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG;
1018 # Handle Field Weighting
1019 my $weighted_operand;
1021 _build_weighted_query( $operand, $stemmed_operand, $index )
1023 warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG;
1024 $operand = $weighted_operand if $weight_fields;
1025 $indexes_set = 1 if $weight_fields;
1027 # If there's a previous operand, we need to add an operator
1028 if ($previous_operand) {
1030 # User-specified operator
1031 if ( $operators[ $i - 1 ] ) {
1032 $query .= " $operators[$i-1] ";
1033 $query .= " $index_plus " unless $indexes_set;
1034 $query .= " $operand";
1035 $query_cgi .= "&op=$operators[$i-1]";
1036 $query_cgi .= "&idx=$index" if $index;
1037 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1039 " $operators[$i-1] $index_plus $operands[$i]";
1042 # Default operator is and
1045 $query .= "$index_plus " unless $indexes_set;
1046 $query .= "$operand";
1047 $query_cgi .= "&op=and&idx=$index" if $index;
1048 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1049 $query_desc .= " and $index_plus $operands[$i]";
1053 # There isn't a pervious operand, don't need an operator
1056 # Field-weighted queries already have indexes set
1057 $query .= " $index_plus " unless $indexes_set;
1059 $query_desc .= " $index_plus $operands[$i]";
1060 $query_cgi .= "&idx=$index" if $index;
1061 $query_cgi .= "&q=$operands[$i]" if $operands[$i];
1062 $previous_operand = 1;
1067 warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG;
1070 my $group_OR_limits;
1071 my $availability_limit;
1072 foreach my $this_limit (@limits) {
1073 if ( $this_limit =~ /available/ ) {
1075 # 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0)
1077 # all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0
1078 $availability_limit .=
1079 "( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )";
1080 $limit_cgi .= "&limit=available";
1084 # group_OR_limits, prefixed by mc-
1085 # OR every member of the group
1086 elsif ( $this_limit =~ /mc/ ) {
1087 $group_OR_limits .= " or " if $group_OR_limits;
1088 $limit_desc .= " or " if $group_OR_limits;
1089 $group_OR_limits .= "$this_limit";
1090 $limit_cgi .= "&limit=$this_limit";
1091 $limit_desc .= " $this_limit";
1094 # Regular old limits
1097 $limit .= " and " if $limit || $query;
1098 $limit .= "$this_limit";
1099 $limit_cgi .= "&limit=$this_limit";
1100 $limit_desc .= " $this_limit";
1104 if ($group_OR_limits) {
1105 $limit .= " and " if ( $query || $limit );
1106 $limit .= "($group_OR_limits)";
1108 if ($availability_limit) {
1109 $limit .= " and " if ( $query || $limit );
1110 $limit .= "($availability_limit)";
1113 # Normalize the query and limit strings
1116 for ( $query, $query_desc, $limit, $limit_desc ) {
1117 $_ =~ s/ / /g; # remove extra spaces
1118 $_ =~ s/^ //g; # remove any beginning spaces
1119 $_ =~ s/ $//g; # remove any ending spaces
1120 $_ =~ s/==/=/g; # remove double == from query
1122 $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi
1124 for ($query_cgi,$simple_query) {
1127 # append the limit to the query
1128 $query .= " " . $limit;
1132 warn "QUERY:" . $query;
1133 warn "QUERY CGI:" . $query_cgi;
1134 warn "QUERY DESC:" . $query_desc;
1135 warn "LIMIT:" . $limit;
1136 warn "LIMIT CGI:" . $limit_cgi;
1137 warn "LIMIT DESC:" . $limit_desc;
1138 warn "---------\nLeave buildQuery\n---------";
1141 undef, $query, $simple_query, $query_cgi,
1142 $query_desc, $limit, $limit_cgi, $limit_desc,
1143 $stopwords_removed, $query_type
1147 =head2 searchResults
1149 Format results in a form suitable for passing to the template
1153 # IMO this subroutine is pretty messy still -- it's responsible for
1154 # building the HTML output for the template
1156 my ( $searchdesc, $hits, $results_per_page, $offset, $scan, @marcresults ) = @_;
1157 my $dbh = C4::Context->dbh;
1161 # add search-term highlighting via <span>s on the search terms
1162 my $span_terms_hashref;
1163 for my $span_term ( split( / /, $searchdesc ) ) {
1164 $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g;
1165 $span_terms_hashref->{$span_term}++;
1168 #Build branchnames hash
1170 #get branch information.....
1173 $dbh->prepare("SELECT branchcode,branchname FROM branches")
1174 ; # FIXME : use C4::Koha::GetBranches
1176 while ( my $bdata = $bsth->fetchrow_hashref ) {
1177 $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'};
1179 # FIXME - We build an authorised values hash here, using the default framework
1180 # though it is possible to have different authvals for different fws.
1182 my $shelflocations =GetKohaAuthorisedValues('items.location','');
1184 # get notforloan authorised value list (see $shelflocations FIXME)
1185 my $notforloan_authorised_value = GetAuthValCode('items.notforloan','');
1187 #Build itemtype hash
1188 #find itemtype & itemtype image
1192 "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"
1195 while ( my $bdata = $bsth->fetchrow_hashref ) {
1196 foreach (qw(description imageurl summary notforloan)) {
1197 $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_};
1201 #search item field code
1204 "SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'"
1207 my ($itemtag) = $sth->fetchrow;
1209 ## find column names of items related to MARC
1210 my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items");
1212 my %subfieldstosearch;
1213 while ( ( my $column ) = $sth2->fetchrow ) {
1214 my ( $tagfield, $tagsubfield ) =
1215 &GetMarcFromKohaField( "items." . $column, "" );
1216 $subfieldstosearch{$column} = $tagsubfield;
1219 # handle which records to actually retrieve
1221 if ( $hits && $offset + $results_per_page <= $hits ) {
1222 $times = $offset + $results_per_page;
1225 $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it?
1228 # loop through all of the records we've retrieved
1229 for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
1230 my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
1231 my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' );
1232 $oldbiblio->{subtitle} = C4::Biblio::get_koha_field_from_marc('bibliosubtitle', 'subtitle', $marcrecord, '');
1233 $oldbiblio->{result_number} = $i + 1;
1235 # add imageurl to itemtype if there is one
1236 $oldbiblio->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} );
1238 $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'}, $marcrecord ) );
1239 (my $aisbn) = $oldbiblio->{isbn} =~ /([\d-]*[X]*)/;
1241 $oldbiblio->{amazonisbn} = $aisbn;
1242 $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description};
1243 # Build summary if there is one (the summary is defined in the itemtypes table)
1244 # FIXME: is this used anywhere, I think it can be commented out? -- JF
1245 if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) {
1246 my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary};
1247 my @fields = $marcrecord->fields();
1248 foreach my $field (@fields) {
1249 my $tag = $field->tag();
1250 my $tagvalue = $field->as_string();
1251 if (! utf8::is_utf8($tagvalue)) {
1252 utf8::decode($tagvalue);
1256 s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g;
1257 unless ( $tag < 10 ) {
1258 my @subf = $field->subfields;
1259 for my $i ( 0 .. $#subf ) {
1260 my $subfieldcode = $subf[$i][0];
1261 my $subfieldvalue = $subf[$i][1];
1262 if (! utf8::is_utf8($subfieldvalue)) {
1263 utf8::decode($subfieldvalue);
1265 my $tagsubf = $tag . $subfieldcode;
1267 s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g;
1272 $summary =~ s/\[(.*?)]//g;
1273 $summary =~ s/\n/<br\/>/g;
1274 $oldbiblio->{summary} = $summary;
1277 # save an author with no <span> tag, for the <a href=search.pl?q=<!--tmpl_var name="author"-->> link
1278 $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'};
1279 $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'};
1280 $oldbiblio->{'subtitle_nospan'} = $oldbiblio->{'subtitle'};
1281 # Add search-term highlighting to the whole record where they match using <span>s
1282 if (C4::Context->preference("OpacHighlightedWords")){
1283 my $searchhighlightblob;
1284 for my $highlight_field ( $marcrecord->fields ) {
1286 # FIXME: need to skip title, subtitle, author, etc., as they are handled below
1287 next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields
1288 for my $subfield ($highlight_field->subfields()) {
1290 next if $subfield->[0] eq '9';
1291 my $field = $subfield->[1];
1292 for my $term ( keys %$span_terms_hashref ) {
1293 if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) {
1294 $field =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1298 $searchhighlightblob .= $field . " ... " if $match;
1302 $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob;
1303 $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob;
1306 # Add search-term highlighting to the title, subtitle, etc. fields
1307 for my $term ( keys %$span_terms_hashref ) {
1308 my $old_term = $term;
1309 if ( length($term) > 3 ) {
1310 $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g;
1311 foreach(qw(title subtitle author publishercode place pages notes size)) {
1312 $oldbiblio->{$_} =~ s/$term/<span class=\"term\">$&<\/span>/gi;
1317 ($i % 2) and $oldbiblio->{'toggle'} = 1;
1319 # Pull out the items fields
1320 my @fields = $marcrecord->field($itemtag);
1322 # Setting item statuses for display
1323 my @available_items_loop;
1324 my @onloan_items_loop;
1325 my @notforloan_items_loop;
1326 my @other_items_loop;
1328 my $available_items;
1330 my $notforloan_items;
1333 my $ordered_count = 0;
1334 my $available_count = 0;
1335 my $onloan_count = 0;
1336 my $notforloan_count = 0;
1337 my $longoverdue_count = 0;
1338 my $other_count = 0;
1339 my $wthdrawn_count = 0;
1340 my $itemlost_count = 0;
1341 my $itembinding_count = 0;
1342 my $itemdamaged_count = 0;
1343 my $item_in_transit_count = 0;
1344 my $can_place_holds = 0;
1345 my $items_count = scalar(@fields);
1347 ( C4::Context->preference('maxItemsinSearchResults') )
1348 ? C4::Context->preference('maxItemsinSearchResults') - 1
1351 # loop through every item
1352 foreach my $field (@fields) {
1355 # populate the items hash
1356 foreach my $code ( keys %subfieldstosearch ) {
1357 $item->{$code} = $field->subfield( $subfieldstosearch{$code} );
1359 my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch';
1360 my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch';
1361 # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one
1362 if ($item->{$hbranch}) {
1363 $item->{'branchname'} = $branches{$item->{$hbranch}};
1365 elsif ($item->{$otherbranch}) { # Last resort
1366 $item->{'branchname'} = $branches{$item->{$otherbranch}};
1369 my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber};
1370 # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item
1371 if ( $item->{onloan} ) {
1373 my $key = $prefix . $item->{due_date};
1374 $onloan_items->{$key}->{due_date} = format_date($item->{onloan});
1375 $onloan_items->{$key}->{count}++ if $item->{$hbranch};
1376 $onloan_items->{$key}->{branchname} = $item->{branchname};
1377 $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1378 $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber};
1379 $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1380 $onloan_items->{$key}->{barcode} = $item->{barcode};
1381 # if something's checked out and lost, mark it as 'long overdue'
1382 if ( $item->{itemlost} ) {
1383 $onloan_items->{$prefix}->{longoverdue}++;
1384 $longoverdue_count++;
1385 } else { # can place holds as long as item isn't lost
1386 $can_place_holds = 1;
1390 # items not on loan, but still unavailable ( lost, withdrawn, damaged )
1394 if ( $item->{notforloan} == -1 ) {
1398 # is item in transit?
1399 my $transfertwhen = '';
1400 my ($transfertfrom, $transfertto);
1402 unless ($item->{wthdrawn}
1403 || $item->{itemlost}
1405 || $item->{notforloan}
1406 || $items_count > 20) {
1408 # A couple heuristics to limit how many times
1409 # we query the database for item transfer information, sacrificing
1410 # accuracy in some cases for speed;
1412 # 1. don't query if item has one of the other statuses
1413 # 2. don't check transit status if the bib has
1414 # more than 20 items
1416 # FIXME: to avoid having the query the database like this, and to make
1417 # the in transit status count as unavailable for search limiting,
1418 # should map transit status to record indexed in Zebra.
1420 ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber});
1423 # item is withdrawn, lost or damaged
1424 if ( $item->{wthdrawn}
1425 || $item->{itemlost}
1427 || $item->{notforloan}
1428 || ($transfertwhen ne ''))
1430 $wthdrawn_count++ if $item->{wthdrawn};
1431 $itemlost_count++ if $item->{itemlost};
1432 $itemdamaged_count++ if $item->{damaged};
1433 $item_in_transit_count++ if $transfertwhen ne '';
1434 $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan};
1436 my $key = $prefix . $item->{status};
1438 foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) {
1439 if($item->{notforloan} == 1){
1440 $notforloan_items->{$key}->{$_} = $item->{$_};
1442 $other_items->{$key}->{$_} = $item->{$_};
1445 if($item->{notforloan} == 1){
1446 $notforloan_count++;
1448 $notforloan_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1449 $notforloan_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1450 $notforloan_items->{$key}->{count}++ if $item->{$hbranch};
1451 $notforloan_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1452 $notforloan_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1453 $notforloan_items->{$key}->{barcode} = $item->{barcode};
1457 $other_items->{$key}->{intransit} = ($transfertwhen ne '') ? 1 : 0;
1458 $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value;
1459 $other_items->{$key}->{count}++ if $item->{$hbranch};
1460 $other_items->{$key}->{location} = $shelflocations->{ $item->{location} };
1461 $other_items->{$key}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1462 $other_items->{$key}->{barcode} = $item->{barcode};
1468 $can_place_holds = 1;
1470 $available_items->{$prefix}->{count}++ if $item->{$hbranch};
1471 foreach (qw(branchname itemcallnumber barcode)) {
1472 $available_items->{$prefix}->{$_} = $item->{$_};
1474 $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} };
1475 $available_items->{$prefix}->{imageurl} = getitemtypeimagelocation( 'opac', $itemtypes{ $item->{itype} }->{imageurl} );
1478 } # notforloan, item level and biblioitem level
1479 my ( $availableitemscount, $onloanitemscount, $notforloanitemscount,$otheritemscount );
1481 ( C4::Context->preference('maxItemsinSearchResults') )
1482 ? C4::Context->preference('maxItemsinSearchResults') - 1
1484 for my $key ( sort keys %$onloan_items ) {
1485 (++$onloanitemscount > $maxitems) and last;
1486 push @onloan_items_loop, $onloan_items->{$key};
1488 for my $key ( sort keys %$other_items ) {
1489 (++$otheritemscount > $maxitems) and last;
1490 push @other_items_loop, $other_items->{$key};
1492 for my $key ( sort keys %$notforloan_items ) {
1493 (++$notforloanitemscount > $maxitems) and last;
1494 push @notforloan_items_loop, $notforloan_items->{$key};
1496 for my $key ( sort keys %$available_items ) {
1497 (++$availableitemscount > $maxitems) and last;
1498 push @available_items_loop, $available_items->{$key}
1501 # XSLT processing of some stuff
1502 if (C4::Context->preference("XSLTResultsDisplay") && !$scan) {
1503 $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display(
1504 $oldbiblio->{biblionumber}, $marcrecord, 'Results' );
1507 # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items
1508 $can_place_holds = 0 if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan};
1509 $oldbiblio->{norequests} = 1 unless $can_place_holds;
1510 $oldbiblio->{itemsplural} = 1 if $items_count > 1;
1511 $oldbiblio->{items_count} = $items_count;
1512 $oldbiblio->{available_items_loop} = \@available_items_loop;
1513 $oldbiblio->{notforloan_items_loop}= \@notforloan_items_loop;
1514 $oldbiblio->{onloan_items_loop} = \@onloan_items_loop;
1515 $oldbiblio->{other_items_loop} = \@other_items_loop;
1516 $oldbiblio->{availablecount} = $available_count;
1517 $oldbiblio->{availableplural} = 1 if $available_count > 1;
1518 $oldbiblio->{onloancount} = $onloan_count;
1519 $oldbiblio->{onloanplural} = 1 if $onloan_count > 1;
1520 $oldbiblio->{notforloancount} = $notforloan_count;
1521 $oldbiblio->{othercount} = $other_count;
1522 $oldbiblio->{otherplural} = 1 if $other_count > 1;
1523 $oldbiblio->{wthdrawncount} = $wthdrawn_count;
1524 $oldbiblio->{itemlostcount} = $itemlost_count;
1525 $oldbiblio->{damagedcount} = $itemdamaged_count;
1526 $oldbiblio->{intransitcount} = $item_in_transit_count;
1527 $oldbiblio->{orderedcount} = $ordered_count;
1528 $oldbiblio->{isbn} =~
1529 s/-//g; # deleting - in isbn to enable amazon content
1530 push( @newresults, $oldbiblio );
1535 #----------------------------------------------------------------------
1537 # Non-Zebra GetRecords#
1538 #----------------------------------------------------------------------
1542 NZgetRecords has the same API as zera getRecords, even if some parameters are not managed
1548 $query, $simple_query, $sort_by_ref, $servers_ref,
1549 $results_per_page, $offset, $expanded_facet, $branches,
1552 warn "query =$query" if $DEBUG;
1553 my $result = NZanalyse($query);
1554 warn "results =$result" if $DEBUG;
1556 NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ),
1562 NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,...
1563 the list is built from an inverted index in the nozebra SQL table
1564 note that title is here only for convenience : the sorting will be very fast when requested on title
1565 if the sorting is requested on something else, we will have to reread all results, and that may be longer.
1570 my ( $string, $server ) = @_;
1571 # warn "---------" if $DEBUG;
1572 warn " NZanalyse" if $DEBUG;
1573 # warn "---------" if $DEBUG;
1575 # $server contains biblioserver or authorities, depending on what we search on.
1576 #warn "querying : $string on $server";
1577 $server = 'biblioserver' unless $server;
1579 # if we have a ", replace the content to discard temporarily any and/or/not inside
1581 if ( $string =~ /"/ ) {
1582 $string =~ s/"(.*?)"/__X__/;
1584 warn "commacontent : $commacontent" if $DEBUG;
1587 # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y"
1588 # then, call again NZanalyse with $left and $right
1589 # (recursive until we find a leaf (=> something without and/or/not)
1590 # delete repeated operator... Would then go in infinite loop
1591 while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) {
1594 #process parenthesis before.
1595 if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) {
1598 my $operator = lc($3); # FIXME: and/or/not are operators, not operands
1600 "dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right"
1602 my $leftresult = NZanalyse( $left, $server );
1604 my $rightresult = NZanalyse( $right, $server );
1606 # OK, we have the results for right and left part of the query
1607 # depending of operand, intersect, union or exclude both lists
1608 # to get a result list
1609 if ( $operator eq ' and ' ) {
1610 return NZoperatorAND($leftresult,$rightresult);
1612 elsif ( $operator eq ' or ' ) {
1614 # just merge the 2 strings
1615 return $leftresult . $rightresult;
1617 elsif ( $operator eq ' not ' ) {
1618 return NZoperatorNOT($leftresult,$rightresult);
1622 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1626 warn "string :" . $string if $DEBUG;
1630 if ($string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/) {
1633 $operator = lc($2); # FIXME: and/or/not are operators, not operands
1635 warn "no parenthesis. left : $left operator: $operator right: $right"
1638 # it's not a leaf, we have a and/or/not
1641 # reintroduce comma content if needed
1642 $right =~ s/__X__/"$commacontent"/ if $commacontent;
1643 $left =~ s/__X__/"$commacontent"/ if $commacontent;
1644 warn "node : $left / $operator / $right\n" if $DEBUG;
1645 my $leftresult = NZanalyse( $left, $server );
1646 my $rightresult = NZanalyse( $right, $server );
1647 warn " leftresult : $leftresult" if $DEBUG;
1648 warn " rightresult : $rightresult" if $DEBUG;
1649 # OK, we have the results for right and left part of the query
1650 # depending of operand, intersect, union or exclude both lists
1651 # to get a result list
1652 if ( $operator eq ' and ' ) {
1654 return NZoperatorAND($leftresult,$rightresult);
1656 elsif ( $operator eq ' or ' ) {
1658 # just merge the 2 strings
1659 return $leftresult . $rightresult;
1661 elsif ( $operator eq ' not ' ) {
1662 return NZoperatorNOT($leftresult,$rightresult);
1666 # this error is impossible, because of the regexp that isolate the operand, but just in case...
1667 die "error : operand unknown : $operator for $string";
1670 # it's a leaf, do the real SQL query and return the result
1673 $string =~ s/__X__/"$commacontent"/ if $commacontent;
1674 $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g;
1675 #remove trailing blank at the beginning
1677 warn "leaf:$string" if $DEBUG;
1679 # parse the string in in operator/operand/value again
1683 if ($string =~ /(.*)(>=|<=)(.*)/) {
1690 # warn "handling leaf... left:$left operator:$operator right:$right"
1692 unless ($operator) {
1693 if ($string =~ /(.*)(>|<|=)(.*)/) {
1698 "handling unless (operator)... left:$left operator:$operator right:$right"
1706 # strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr...
1709 # automatic replace for short operators
1710 $left = 'title' if $left =~ '^ti$';
1711 $left = 'author' if $left =~ '^au$';
1712 $left = 'publisher' if $left =~ '^pb$';
1713 $left = 'subject' if $left =~ '^su$';
1714 $left = 'koha-Auth-Number' if $left =~ '^an$';
1715 $left = 'keyword' if $left =~ '^kw$';
1716 warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG;
1717 if ( $operator && $left ne 'keyword' ) {
1719 #do a specific search
1720 my $dbh = C4::Context->dbh;
1721 $operator = 'LIKE' if $operator eq '=' and $right =~ /%/;
1724 "SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"
1726 warn "$left / $operator / $right\n" if $DEBUG;
1728 # split each word, query the DB and build the biblionumbers result
1729 #sanitizing leftpart
1730 $left =~ s/^\s+|\s+$//;
1731 foreach ( split / /, $right ) {
1733 $_ =~ s/^\s+|\s+$//;
1735 warn "EXECUTE : $server, $left, $_" if $DEBUG;
1736 $sth->execute( $server, $left, $_ )
1737 or warn "execute failed: $!";
1738 while ( my ( $line, $value ) = $sth->fetchrow ) {
1740 # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <)
1741 # otherwise, fill the result
1742 $biblionumbers .= $line
1743 unless ( $right =~ /^\d+$/ && $value =~ /\D/ );
1744 warn "result : $value "
1745 . ( $right =~ /\d/ ) . "=="
1746 . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line";
1749 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1751 warn "NZAND" if $DEBUG;
1752 $results = NZoperatorAND($biblionumbers,$results);
1755 $results = $biblionumbers;
1761 #do a complete search (all indexes), if index='kw' do complete search too.
1762 my $dbh = C4::Context->dbh;
1765 "SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"
1768 # split each word, query the DB and build the biblionumbers result
1769 foreach ( split / /, $string ) {
1770 next if C4::Context->stopwords->{ uc($_) }; # skip if stopword
1771 warn "search on all indexes on $_" if $DEBUG;
1774 $sth->execute( $server, $_ );
1775 while ( my $line = $sth->fetchrow ) {
1776 $biblionumbers .= $line;
1779 # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list
1781 $results = NZoperatorAND($biblionumbers,$results);
1784 warn "NEW RES for $_ = $biblionumbers" if $DEBUG;
1785 $results = $biblionumbers;
1789 warn "return : $results for LEAF : $string" if $DEBUG;
1792 warn "---------\nLeave NZanalyse\n---------" if $DEBUG;
1796 my ($rightresult, $leftresult)=@_;
1798 my @leftresult = split /;/, $leftresult;
1799 warn " @leftresult / $rightresult \n" if $DEBUG;
1801 # my @rightresult = split /;/,$leftresult;
1804 # parse the left results, and if the biblionumber exist in the right result, save it in finalresult
1805 # the result is stored twice, to have the same weight for AND than OR.
1806 # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130
1807 # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64
1808 foreach (@leftresult) {
1811 ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/);
1812 if ( $rightresult =~ /\Q$value\E-(\d+);/ ) {
1813 $countvalue = ( $1 > $countvalue ? $countvalue : $1 );
1815 "$value-$countvalue;$value-$countvalue;";
1818 warn "NZAND DONE : $finalresult \n" if $DEBUG;
1819 return $finalresult;
1823 my ($rightresult, $leftresult)=@_;
1824 return $rightresult.$leftresult;
1828 my ($leftresult, $rightresult)=@_;
1830 my @leftresult = split /;/, $leftresult;
1832 # my @rightresult = split /;/,$leftresult;
1834 foreach (@leftresult) {
1836 $value=$1 if $value=~m/(.*)-\d+$/;
1837 unless ($rightresult =~ "$value-") {
1838 $finalresult .= "$_;";
1841 return $finalresult;
1846 $finalresult = NZorder($biblionumbers, $ordering,$results_per_page,$offset);
1853 my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_;
1854 warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG;
1856 # order title asc by default
1857 # $ordering = '1=36 <i' unless $ordering;
1858 $results_per_page = 20 unless $results_per_page;
1859 $offset = 0 unless $offset;
1860 my $dbh = C4::Context->dbh;
1863 # order by POPULARITY
1865 if ( $ordering =~ /popularity/ ) {
1869 # popularity is not in MARC record, it's builded from a specific query
1871 $dbh->prepare("select sum(issues) from items where biblionumber=?");
1872 foreach ( split /;/, $biblionumbers ) {
1873 my ( $biblionumber, $title ) = split /,/, $_;
1874 $result{$biblionumber} = GetMarcBiblio($biblionumber);
1875 $sth->execute($biblionumber);
1876 my $popularity = $sth->fetchrow || 0;
1878 # hint : the key is popularity.title because we can have
1879 # many results with the same popularity. In this cas, sub-ordering is done by title
1880 # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity
1881 # (un-frequent, I agree, but we won't forget anything that way ;-)
1882 $popularity{ sprintf( "%10d", $popularity ) . $title
1883 . $biblionumber } = $biblionumber;
1886 # sort the hash and return the same structure as GetRecords (Zebra querying)
1889 if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC
1890 foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) {
1891 $result_hash->{'RECORDS'}[ $numbers++ ] =
1892 $result{ $popularity{$key} }->as_usmarc();
1895 else { # sort popularity ASC
1896 foreach my $key ( sort ( keys %popularity ) ) {
1897 $result_hash->{'RECORDS'}[ $numbers++ ] =
1898 $result{ $popularity{$key} }->as_usmarc();
1901 my $finalresult = ();
1902 $result_hash->{'hits'} = $numbers;
1903 $finalresult->{'biblioserver'} = $result_hash;
1904 return $finalresult;
1910 elsif ( $ordering =~ /author/ ) {
1912 foreach ( split /;/, $biblionumbers ) {
1913 my ( $biblionumber, $title ) = split /,/, $_;
1914 my $record = GetMarcBiblio($biblionumber);
1916 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1917 $author = $record->subfield( '200', 'f' );
1918 $author = $record->subfield( '700', 'a' ) unless $author;
1921 $author = $record->subfield( '100', 'a' );
1924 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1925 # and we don't want to get only 1 result for each of them !!!
1926 $result{ $author . $biblionumber } = $record;
1929 # sort the hash and return the same structure as GetRecords (Zebra querying)
1932 if ( $ordering eq 'author_za' ) { # sort by author desc
1933 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1934 $result_hash->{'RECORDS'}[ $numbers++ ] =
1935 $result{$key}->as_usmarc();
1938 else { # sort by author ASC
1939 foreach my $key ( sort ( keys %result ) ) {
1940 $result_hash->{'RECORDS'}[ $numbers++ ] =
1941 $result{$key}->as_usmarc();
1944 my $finalresult = ();
1945 $result_hash->{'hits'} = $numbers;
1946 $finalresult->{'biblioserver'} = $result_hash;
1947 return $finalresult;
1950 # ORDER BY callnumber
1953 elsif ( $ordering =~ /callnumber/ ) {
1955 foreach ( split /;/, $biblionumbers ) {
1956 my ( $biblionumber, $title ) = split /,/, $_;
1957 my $record = GetMarcBiblio($biblionumber);
1959 my ( $callnumber_tag, $callnumber_subfield ) =
1960 GetMarcFromKohaField( 'items.itemcallnumber','' );
1961 ( $callnumber_tag, $callnumber_subfield ) =
1962 GetMarcFromKohaField('biblioitems.callnumber','')
1963 unless $callnumber_tag;
1964 if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) {
1965 $callnumber = $record->subfield( '200', 'f' );
1968 $callnumber = $record->subfield( '100', 'a' );
1971 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
1972 # and we don't want to get only 1 result for each of them !!!
1973 $result{ $callnumber . $biblionumber } = $record;
1976 # sort the hash and return the same structure as GetRecords (Zebra querying)
1979 if ( $ordering eq 'call_number_dsc' ) { # sort by title desc
1980 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
1981 $result_hash->{'RECORDS'}[ $numbers++ ] =
1982 $result{$key}->as_usmarc();
1985 else { # sort by title ASC
1986 foreach my $key ( sort { $a cmp $b } ( keys %result ) ) {
1987 $result_hash->{'RECORDS'}[ $numbers++ ] =
1988 $result{$key}->as_usmarc();
1991 my $finalresult = ();
1992 $result_hash->{'hits'} = $numbers;
1993 $finalresult->{'biblioserver'} = $result_hash;
1994 return $finalresult;
1996 elsif ( $ordering =~ /pubdate/ ) { #pub year
1998 foreach ( split /;/, $biblionumbers ) {
1999 my ( $biblionumber, $title ) = split /,/, $_;
2000 my $record = GetMarcBiblio($biblionumber);
2001 my ( $publicationyear_tag, $publicationyear_subfield ) =
2002 GetMarcFromKohaField( 'biblioitems.publicationyear', '' );
2003 my $publicationyear =
2004 $record->subfield( $publicationyear_tag,
2005 $publicationyear_subfield );
2007 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2008 # and we don't want to get only 1 result for each of them !!!
2009 $result{ $publicationyear . $biblionumber } = $record;
2012 # sort the hash and return the same structure as GetRecords (Zebra querying)
2015 if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc
2016 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2017 $result_hash->{'RECORDS'}[ $numbers++ ] =
2018 $result{$key}->as_usmarc();
2021 else { # sort by pub year ASC
2022 foreach my $key ( sort ( keys %result ) ) {
2023 $result_hash->{'RECORDS'}[ $numbers++ ] =
2024 $result{$key}->as_usmarc();
2027 my $finalresult = ();
2028 $result_hash->{'hits'} = $numbers;
2029 $finalresult->{'biblioserver'} = $result_hash;
2030 return $finalresult;
2036 elsif ( $ordering =~ /title/ ) {
2038 # the title is in the biblionumbers string, so we just need to build a hash, sort it and return
2040 foreach ( split /;/, $biblionumbers ) {
2041 my ( $biblionumber, $title ) = split /,/, $_;
2043 # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title
2044 # and we don't want to get only 1 result for each of them !!!
2045 # hint & speed improvement : we can order without reading the record
2046 # so order, and read records only for the requested page !
2047 $result{ $title . $biblionumber } = $biblionumber;
2050 # sort the hash and return the same structure as GetRecords (Zebra querying)
2053 if ( $ordering eq 'title_az' ) { # sort by title desc
2054 foreach my $key ( sort ( keys %result ) ) {
2055 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2058 else { # sort by title ASC
2059 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2060 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2064 # limit the $results_per_page to result size if it's more
2065 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2067 # for the requested page, replace biblionumber by the complete record
2068 # speed improvement : avoid reading too much things
2070 my $counter = $offset ;
2071 $counter <= $offset + $results_per_page ;
2075 $result_hash->{'RECORDS'}[$counter] =
2076 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc;
2078 my $finalresult = ();
2079 $result_hash->{'hits'} = $numbers;
2080 $finalresult->{'biblioserver'} = $result_hash;
2081 return $finalresult;
2088 # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking
2091 foreach ( split /;/, $biblionumbers ) {
2092 my ( $biblionumber, $title ) = split /,/, $_;
2093 $title =~ /(.*)-(\d)/;
2098 # note that we + the ranking because ranking is calculated on weight of EACH term requested.
2099 # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N
2100 # biblio N has ranking = 6
2101 $count_ranking{$biblionumber} += $ranking;
2104 # build the result by "inverting" the count_ranking hash
2105 # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead
2107 foreach ( keys %count_ranking ) {
2108 $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_;
2111 # sort the hash and return the same structure as GetRecords (Zebra querying)
2114 foreach my $key ( sort { $b cmp $a } ( keys %result ) ) {
2115 $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key};
2118 # limit the $results_per_page to result size if it's more
2119 $results_per_page = $numbers - 1 if $numbers < $results_per_page;
2121 # for the requested page, replace biblionumber by the complete record
2122 # speed improvement : avoid reading too much things
2124 my $counter = $offset ;
2125 $counter <= $offset + $results_per_page ;
2129 $result_hash->{'RECORDS'}[$counter] =
2130 GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc
2131 if $result_hash->{'RECORDS'}[$counter];
2133 my $finalresult = ();
2134 $result_hash->{'hits'} = $numbers;
2135 $finalresult->{'biblioserver'} = $result_hash;
2136 return $finalresult;
2140 END { } # module clean-up code here (global destructor)
2147 Koha Developement team <info@koha.org>