X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=C4%2FSearch.pm;h=fcbb660f428cb4373146c91ce0ebb768a2c56418;hb=39f9b9ddb0359428e4379a90711eb9dc9961be59;hp=c9f256032403479d3dca70f2df4fe54af68d9945;hpb=85cfe0647ac700cf9f00357978f74e8409e72c99;p=koha.git diff --git a/C4/Search.pm b/C4/Search.pm index c9f2560324..fcbb660f42 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -2,18 +2,18 @@ package C4::Search; # This file is part of Koha. # -# Koha is free software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the Free Software -# Foundation; either version 2 of the License, or (at your option) any later -# version. +# Koha is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. # -# Koha is distributed in the hope that it will be useful, but WITHOUT ANY -# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR -# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# Koha is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. # -# You should have received a copy of the GNU General Public License along with -# Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place, -# Suite 330, Boston, MA 02111-1307 USA +# You should have received a copy of the GNU General Public License +# along with Koha; if not, see . use strict; #use warnings; FIXME - Bug 2505 @@ -21,22 +21,22 @@ require Exporter; use C4::Context; use C4::Biblio; # GetMarcFromKohaField, GetBiblioData use C4::Koha; # getFacets +use Koha::DateUtils; use Lingua::Stem; use C4::Search::PazPar2; use XML::Simple; -use C4::Dates qw(format_date); use C4::Members qw(GetHideLostItemsPreference); use C4::XSLT; use C4::Branch; use C4::Reserves; # GetReserveStatus use C4::Debug; use C4::Charset; +use Koha::Libraries; use YAML; use URI::Escape; use Business::ISBN; use MARC::Record; use MARC::Field; -use utf8; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); # set the version for version checking @@ -68,10 +68,8 @@ This module provides searching functions for Koha's bibliographic databases &searchResults &getRecords &buildQuery - &AddSearchHistory &GetDistinctValues &enabled_staff_search_views - &PurgeSearchHistory ); # make all your functions, whether exported or not; @@ -144,8 +142,11 @@ sub FindDuplicate { my @results; if (!defined $error) { foreach my $possible_duplicate_record (@{$searchresults}) { - my $marcrecord = - MARC::Record->new_from_usmarc($possible_duplicate_record); + my $marcrecord = new_record_from_zebra( + 'biblioserver', + $possible_duplicate_record + ); + my $result = TransformMarcToKoha( $dbh, $marcrecord, '' ); # FIXME :: why 2 $biblionumber ? @@ -170,7 +171,7 @@ This function provides a simple search API on the bibliographic catalog * $query can be a simple keyword or a complete CCL query * @servers is optional. Defaults to biblioserver as found in koha-conf.xml - * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0 + * $offset - If present, represents the number of records at the beginning to omit. Defaults to 0 * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef. @@ -289,10 +290,11 @@ sub SimpleSearch { } for my $j ( $first_record .. $last_record ) { - my $record = + my $record = eval { $tmpresults[ $i - 1 ]->record( $j - 1 )->raw() ; # 0 indexed - push @{$results}, $record; + }; + push @{$results}, $record if defined $record; } } ); @@ -337,10 +339,9 @@ sub getRecords { my $results_hashref = (); # Initialize variables for the faceted results objects - my $facets_counter = (); - my $facets_info = (); + my $facets_counter = {}; + my $facets_info = {}; my $facets = getFacets(); - my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets')||20; my @facets_loop; # stores the ref to array of hashes for template facets loop @@ -422,7 +423,7 @@ sub getRecords { warn "Ignoring unrecognized sort '$sort' requested" if $sort_by; } } - if ($sort_by && !$scan) { + if ( $sort_by && !$scan && $results[$i] ) { if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) { warn "WARNING sort $sort_by failed"; } @@ -446,13 +447,14 @@ sub getRecords { else { $times = $size; } + for ( my $j = $offset ; $j < $times ; $j++ ) { my $records_hash; my $record; ## Check if it's an index scan if ($scan) { - my ( $term, $occ ) = $results[ $i - 1 ]->term($j); + my ( $term, $occ ) = $results[ $i - 1 ]->display_term($j); # here we create a minimal MARC record and hand it off to the # template just like a normal result ... perhaps not ideal, but @@ -488,7 +490,6 @@ sub getRecords { # not an index scan else { $record = $results[ $i - 1 ]->record($j)->raw(); - # warn "RECORD $j:".$record; $results_hash->{'RECORDS'}[$j] = $record; } @@ -496,58 +497,13 @@ sub getRecords { } $results_hashref->{ $servers[ $i - 1 ] } = $results_hash; -# Fill the facets while we're looping, but only for the biblioserver and not for a scan + # Fill the facets while we're looping, but only for the + # biblioserver and not for a scan if ( !$scan && $servers[ $i - 1 ] =~ /biblioserver/ ) { - - my $jmax = - $size > $facets_maxrecs ? $facets_maxrecs : $size; - for my $facet (@$facets) { - for ( my $j = 0 ; $j < $jmax ; $j++ ) { - my $render_record = - $results[ $i - 1 ]->record($j)->render(); - my @used_datas = (); - foreach my $tag ( @{ $facet->{tags} } ) { - - # avoid first line - my $tag_num = substr( $tag, 0, 3 ); - my $letters = substr( $tag, 3 ); - my $field_pattern = - '\n' . $tag_num . ' ([^z][^\n]+)'; - $field_pattern = '\n' . $tag_num . ' ([^\n]+)' - if ( int($tag_num) < 10 ); - my @field_tokens = - ( $render_record =~ /$field_pattern/g ); - foreach my $field_token (@field_tokens) { - my @subf = ( $field_token =~ - /\$([a-zA-Z0-9]) ([^\$]+)/g ); - my @values; - for ( my $i = 0 ; $i < @subf ; $i += 2 ) { - if ( $letters =~ $subf[$i] ) { - my $value = $subf[ $i + 1 ]; - $value =~ s/^ *//; - $value =~ s/ *$//; - push @values, $value; - } - } - my $data = join( $facet->{sep}, @values ); - unless ( $data ~~ @used_datas ) { - $facets_counter->{ $facet->{idx} } - ->{$data}++; - push @used_datas, $data; - } - } # fields - } # field codes - } # records - $facets_info->{ $facet->{idx} }->{label_value} = - $facet->{label}; - $facets_info->{ $facet->{idx} }->{expanded} = - $facet->{expanded}; - } # facets + $facets_counter = GetFacets( $results[ $i - 1 ] ); + $facets_info = _get_facets_info( $facets ); } - # warn "connection ", $i-1, ": $size hits"; - # warn $results[$i-1]->record(0)->render() if $size > 0; - # BUILD FACETS if ( $servers[ $i - 1 ] =~ /biblioserver/ ) { for my $link_value ( @@ -567,7 +523,7 @@ sub getRecords { ) { $number_of_facets++; - if ( ( $number_of_facets < 6 ) + if ( ( $number_of_facets <= 5 ) || ( $expanded_facet eq $link_value ) || ( $facets_info->{$link_value}->{'expanded'} ) ) @@ -615,7 +571,7 @@ sub getRecords { { $facet_label_value = $itemtypes->{$one_facet} - ->{'description'}; + ->{translated_description}; } } @@ -644,7 +600,7 @@ sub getRecords { # handle expanded option unless ( $facets_info->{$link_value}->{'expanded'} ) { $expandable = 1 - if ( ( $number_of_facets > 6 ) + if ( ( $number_of_facets > 5 ) && ( $expanded_facet ne $link_value ) ); } push @facets_loop, @@ -663,7 +619,7 @@ sub getRecords { $facets_info->{$link_value}->{'label_value'} =~ /Libraries/ ) - and ( C4::Context->preference('singleBranchMode') ) + and ( Koha::Libraries->search->count == 1 ) ); } } @@ -672,6 +628,217 @@ sub getRecords { return ( undef, $results_hashref, \@facets_loop ); } +sub GetFacets { + + my $rs = shift; + my $facets; + + my $indexing_mode = C4::Context->config('zebra_bib_index_mode') // 'dom'; + my $use_zebra_facets = C4::Context->config('use_zebra_facets') // 0; + + if ( $indexing_mode eq 'dom' && + $use_zebra_facets ) { + $facets = _get_facets_from_zebra( $rs ); + } else { + $facets = _get_facets_from_records( $rs ); + } + + return $facets; +} + +sub _get_facets_from_records { + + my $rs = shift; + + my $facets_maxrecs = C4::Context->preference('maxRecordsForFacets') // 20; + my $facets_config = getFacets(); + my $facets = {}; + my $size = $rs->size(); + my $jmax = $size > $facets_maxrecs + ? $facets_maxrecs + : $size; + + for ( my $j = 0 ; $j < $jmax ; $j++ ) { + + my $marc_record = new_record_from_zebra ( + 'biblioserver', + $rs->record( $j )->raw() + ); + + if ( ! defined $marc_record ) { + warn "ERROR DECODING RECORD - $@: " . + $rs->record( $j )->raw(); + next; + } + + _get_facets_data_from_record( $marc_record, $facets_config, $facets ); + } + + return $facets; +} + +=head2 _get_facets_data_from_record + + C4::Search::_get_facets_data_from_record( $marc_record, $facets, $facets_counter ); + +Internal function that extracts facets information from a MARC::Record object +and populates $facets_counter for using in getRecords. + +$facets is expected to be filled with C4::Koha::getFacets output (i.e. the configured +facets for Zebra). + +=cut + +sub _get_facets_data_from_record { + + my ( $marc_record, $facets, $facets_counter ) = @_; + + for my $facet (@$facets) { + + my @used_datas = (); + + foreach my $tag ( @{ $facet->{ tags } } ) { + + # tag number is the first three digits + my $tag_num = substr( $tag, 0, 3 ); + # subfields are the remainder + my $subfield_letters = substr( $tag, 3 ); + + my @fields = $marc_record->field( $tag_num ); + foreach my $field (@fields) { + # If $field->indicator(1) eq 'z', it means it is a 'see from' + # field introduced because of IncludeSeeFromInSearches, so skip it + next if $field->indicator(1) eq 'z'; + + my $data = $field->as_string( $subfield_letters, $facet->{ sep } ); + + unless ( grep { /^\Q$data\E$/ } @used_datas ) { + push @used_datas, $data; + $facets_counter->{ $facet->{ idx } }->{ $data }++; + } + } + } + } +} + +=head2 _get_facets_from_zebra + + my $facets = _get_facets_from_zebra( $result_set ) + +Retrieves facets for a specified result set. It loops through the facets defined +in C4::Koha::getFacets and returns a hash with the following structure: + + { facet_idx => { + facet_value => count + }, + ... + } + +=cut + +sub _get_facets_from_zebra { + + my $rs = shift; + + # save current elementSetName + my $elementSetName = $rs->option( 'elementSetName' ); + + my $facets_loop = getFacets(); + my $facets_data = {}; + # loop through defined facets and fill the facets hashref + foreach my $facet ( @$facets_loop ) { + + my $idx = $facet->{ idx }; + my $sep = $facet->{ sep }; + my $facet_values = _get_facet_from_result_set( $idx, $rs, $sep ); + if ( $facet_values ) { + # we've actually got a result + $facets_data->{ $idx } = $facet_values; + } + } + # set elementSetName to its previous value to avoid side effects + $rs->option( elementSetName => $elementSetName ); + + return $facets_data; +} + +=head2 _get_facet_from_result_set + + my $facet_values = + C4::Search::_get_facet_from_result_set( $facet_idx, $result_set, $sep ) + +Internal function that extracts facet information for a specific index ($facet_idx) and +returns a hash containing facet values and count: + + { + $facet_value => $count , + ... + } + +Warning: this function has the side effect of changing the elementSetName for the result +set. It is a helper function for the main loop, which takes care of backing it up for +restoring. + +=cut + +sub _get_facet_from_result_set { + + my $facet_idx = shift; + my $rs = shift; + my $sep = shift; + + my $internal_sep = '<*>'; + my $facetMaxCount = C4::Context->preference('FacetMaxCount') // 20; + + return if ( ! defined $facet_idx || ! defined $rs ); + # zebra's facet element, untokenized index + my $facet_element = 'zebra::facet::' . $facet_idx . ':0:' . $facetMaxCount; + # configure zebra results for retrieving the desired facet + $rs->option( elementSetName => $facet_element ); + # get the facet record from result set + my $facet = $rs->record( 0 )->raw; + # if the facet has no restuls... + return if !defined $facet; + # TODO: benchmark DOM vs. SAX performance + my $facet_dom = XML::LibXML->load_xml( + string => ($facet) + ); + my @terms = $facet_dom->getElementsByTagName('term'); + return if ! @terms; + + my $facets = {}; + foreach my $term ( @terms ) { + my $facet_value = $term->textContent; + $facet_value =~ s/\Q$internal_sep\E/$sep/ if defined $sep; + $facets->{ $facet_value } = $term->getAttribute( 'occur' ); + } + + return $facets; +} + +=head2 _get_facets_info + + my $facets_info = C4::Search::_get_facets_info( $facets ) + +Internal function that extracts facets information and properly builds +the data structure needed to render facet labels. + +=cut + +sub _get_facets_info { + + my $facets = shift; + + my $facets_info = {}; + + for my $facet ( @$facets ) { + $facets_info->{ $facet->{ idx } }->{ label_value } = $facet->{ label }; + $facets_info->{ $facet->{ idx } }->{ expanded } = $facet->{ expanded }; + } + + return $facets_info; +} + sub pazGetRecords { my ( $koha_query, $simple_query, $sort_by_ref, $servers_ref, @@ -745,32 +912,6 @@ sub pazGetRecords { return ( undef, $results_hashref, \@facets_loop ); } -# STOPWORDS -sub _remove_stopwords { - my ( $operand, $index ) = @_; - my @stopwords_removed; - - # phrase and exact-qualified indexes shouldn't have stopwords removed - if ( $index !~ m/,(phr|ext)/ ) { - -# remove stopwords from operand : parse all stopwords & remove them (case insensitive) -# we use IsAlpha unicode definition, to deal correctly with diacritics. -# otherwise, a French word like "leçon" woudl be split into "le" "çon", "le" -# is a stopword, we'd get "çon" and wouldn't find anything... -# - foreach ( keys %{ C4::Context->stopwords } ) { - next if ( $_ =~ /(and|or|not)/ ); # don't remove operators - if ( my ($matched) = ($operand =~ - /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi)) - { - $operand =~ s/\Q$matched\E/ /gi; - push @stopwords_removed, $_; - } - } - } - return ( $operand, \@stopwords_removed ); -} - # TRUNCATION sub _detect_truncation { my ( $operand, $index ) = @_; @@ -847,6 +988,7 @@ sub _build_weighted_query { my $stemming = C4::Context->preference("QueryStemming") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + $operand =~ s/"/ /g; # Bug 7518: searches with quotation marks don't work my $weighted_query .= "(rk=("; # Specifies that we're applying rank @@ -856,6 +998,7 @@ sub _build_weighted_query { "Title-cover,ext,r1=\"$operand\""; # exact title-cover $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title $weighted_query .= " or Title-cover,phr,r3=\"$operand\""; # phrase title + $weighted_query .= " or ti,wrdl,r4=\"$operand\""; # words in title #$weighted_query .= " or any,ext,r4=$operand"; # exact any #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\"" @@ -958,10 +1101,15 @@ sub getIndexes{ 'Corporate-name-seealso', 'Country-publication', 'ctype', + 'curriculum', 'date-entered-on-file', 'Date-of-acquisition', 'Date-of-publication', + 'Date-time-last-modified', 'Dewey-classification', + 'Dissertation-information', + 'diss', + 'dtlm', 'EAN', 'extent', 'fic', @@ -977,6 +1125,8 @@ sub getIndexes{ 'Host-item', 'id-other', 'Illustration-code', + 'Index-term-genre', + 'Index-term-uncontrolled', 'ISBN', 'isbn', 'ISSN', @@ -986,11 +1136,15 @@ sub getIndexes{ 'Koha-Auth-Number', 'l-format', 'language', + 'language-original', 'lc-card', 'LC-card-number', 'lcn', + 'lex', 'llength', 'ln', + 'ln-audio', + 'ln-subtitle', 'Local-classification', 'Local-number', 'Match-heading', @@ -1021,6 +1175,8 @@ sub getIndexes{ 'popularity', 'pubdate', 'Publisher', + 'Provider', + 'pv', 'Record-control-number', 'rcn', 'Record-type', @@ -1042,7 +1198,6 @@ sub getIndexes{ 'su-to', 'su-ut', 'ut', - 'UPC', 'Term-genre-form', 'Term-genre-form-heading', 'Term-genre-form-see', @@ -1051,7 +1206,6 @@ sub getIndexes{ 'Title', 'Title-cover', 'Title-series', - 'Title-host', 'Title-uniform', 'Title-uniform-heading', 'Title-uniform-see', @@ -1086,6 +1240,7 @@ sub getIndexes{ 'mc-itype', 'mc-loc', 'notforloan', + 'Number-local-acquisition', 'onloan', 'price', 'renewals', @@ -1188,7 +1343,10 @@ sub parseQuery { next unless $operands[$ii]; $query .= $operators[ $ii - 1 ] eq 'or' ? ' || ' : ' && ' if ($query); - if ( $indexes[$ii] =~ m/su-/ ) { + if ( $operands[$ii] =~ /^[^"]\W*[-|_\w]*:\w.*[^"]$/ ) { + $query .= $operands[$ii]; + } + elsif ( $indexes[$ii] =~ m/su-/ ) { $query .= $indexes[$ii] . '(' . $operands[$ii] . ')'; } else { @@ -1235,10 +1393,10 @@ sub parseQuery { $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, -$stopwords_removed, $query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); +$query_type ) = buildQuery ( $operators, $operands, $indexes, $limits, $sort_by, $scan, $lang); Build queries and limits in CCL, CGI, Human, -handle truncation, stemming, field weighting, stopwords, fuzziness, etc. +handle truncation, stemming, field weighting, fuzziness, etc. See verbose embedded documentation. @@ -1264,7 +1422,6 @@ sub buildQuery { my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; - my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0; my $query = $operands[0]; my $simple_query = $operands[0]; @@ -1277,11 +1434,9 @@ sub buildQuery { my $limit_cgi; my $limit_desc; - my $stopwords_removed; # flag to determine if stopwords have been removed - my $cclq = 0; my $cclindexes = getIndexes(); - if ( $query !~ /\s*ccl=/ ) { + if ( $query !~ /\s*(ccl=|pqf=|cql=)/ ) { while ( !$cclq && $query =~ /(?:^|\W)([\w-]+)(,[\w-]+)*[:=]/g ) { my $dx = lc($1); $cclq = grep { lc($_) eq $dx } @$cclindexes; @@ -1299,19 +1454,19 @@ sub buildQuery { if ( @limits ) { $q .= ' and '.join(' and ', @limits); } - return ( undef, $q, $q, "q=ccl=".uri_escape($q), $q, '', '', '', '', 'ccl' ); + return ( undef, $q, $q, "q=ccl=".uri_escape_utf8($q), $q, '', '', '', 'ccl' ); } if ( $query =~ /^cql=/ ) { - return ( undef, $', $', "q=cql=".uri_escape($'), $', '', '', '', '', 'cql' ); + return ( undef, $', $', "q=cql=".uri_escape_utf8($'), $', '', '', '', 'cql' ); } if ( $query =~ /^pqf=/ ) { if ($query_desc) { - $query_cgi = "q=".uri_escape($query_desc); + $query_cgi = "q=".uri_escape_utf8($query_desc); } else { $query_desc = $'; - $query_cgi = "q=pqf=".uri_escape($'); + $query_cgi = "q=pqf=".uri_escape_utf8($'); } - return ( undef, $', $', $query_cgi, $query_desc, '', '', '', '', 'pqf' ); + return ( undef, $', $', $query_cgi, $query_desc, '', '', '', 'pqf' ); } # pass nested queries directly @@ -1322,7 +1477,7 @@ sub buildQuery { # return ( # undef, $query, $simple_query, $query_cgi, # $query, $limit, $limit_cgi, $limit_desc, -# $stopwords_removed, 'ccl' +# 'ccl' # ); # } @@ -1346,11 +1501,10 @@ sub buildQuery { # A flag to determine whether or not to add the index to the query my $indexes_set; -# If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling +# If the user is sophisticated enough to specify an index, turn off field weighting, and stemming handling if ( $operands[$i] =~ /\w(:|=)/ || $scan ) { $weight_fields = 0; $stemming = 0; - $remove_stopwords = 0; } else { $operands[$i] =~ s/\?/{?}/g; # need to escape question marks } @@ -1358,27 +1512,43 @@ sub buildQuery { my $index = $indexes[$i]; # Add index-specific attributes + + #Afaik, this 'yr' condition will only ever be met in the staff client advanced search + #for "Publication date", since typing 'yr:YYYY' into the search box produces a CCL query, + #which is processed higher up in this sub. Other than that, year searches are typically + #handled as limits which are not processed her either. + # Date of Publication - if ( $index eq 'yr' ) { - $index .= ",st-numeric"; - $indexes_set++; - $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + if ( $index =~ /yr/ ) { + #weight_fields/relevance search causes errors with date ranges + #In the case of YYYY-, it will only return records with a 'yr' of YYYY (not the range) + #In the case of YYYY-YYYY, it will return no results + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0; } # Date of Acquisition - elsif ( $index eq 'acqdate' ) { - $index .= ",st-date-normalized"; - $indexes_set++; - $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + elsif ( $index =~ /acqdate/ ) { + #stemming and auto_truncation would have zero impact since it already is YYYY-MM-DD format + #Weight_fields probably SHOULD be turned OFF, otherwise you'll get records floating to the + #top of the results just because they have lots of item records matching that date. + #Fuzzy actually only applies during _build_weighted_query, and is reset there anyway, so + #irrelevant here + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = 0; } # ISBN,ISSN,Standard Number, don't need special treatment elsif ( $index eq 'nb' || $index eq 'ns' ) { ( $stemming, $auto_truncation, - $weight_fields, $fuzzy_enabled, - $remove_stopwords - ) = ( 0, 0, 0, 0, 0 ); - + $weight_fields, $fuzzy_enabled + ) = ( 0, 0, 0, 0 ); + + if ( $index eq 'nb' ) { + if ( C4::Context->preference("SearchWithISBNVariations") ) { + my @isbns = C4::Koha::GetVariationsOfISBN( $operand ); + $operands[$i] = $operand = '(nb=' . join(' OR nb=', @isbns) . ')'; + $indexes[$i] = $index = ''; + } + } } if(not $index){ @@ -1395,15 +1565,6 @@ sub buildQuery { my $index_plus = $index . $struct_attr . ':'; my $index_plus_comma = $index . $struct_attr . ','; - # Remove Stopwords - if ($remove_stopwords) { - ( $operand, $stopwords_removed ) = - _remove_stopwords( $operand, $index ); - warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG; - warn "REMOVED STOPWORDS: @$stopwords_removed" - if ( $stopwords_removed && $DEBUG ); - } - if ($auto_truncation){ unless ( $index =~ /,(st-|phr|ext)/ ) { #FIXME only valid with LTR scripts @@ -1473,43 +1634,19 @@ sub buildQuery { warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG; - # If there's a previous operand, we need to add an operator - if ($previous_operand) { - - # User-specified operator - if ( $operators[ $i - 1 ] ) { - $query .= " $operators[$i-1] "; - $query .= " $index_plus " unless $indexes_set; - $query .= " $operand"; - $query_cgi .= "&op=".uri_escape($operators[$i-1]); - $query_cgi .= "&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; - $query_desc .= - " $operators[$i-1] $index_plus $operands[$i]"; - } - - # Default operator is and - else { - $query .= " and "; - $query .= "$index_plus " unless $indexes_set; - $query .= "$operand"; - $query_cgi .= "&op=and&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; - $query_desc .= " and $index_plus $operands[$i]"; - } - } - - # There isn't a pervious operand, don't need an operator - else { + ($query,$query_cgi,$query_desc,$previous_operand) = _build_initial_query({ + query => $query, + query_cgi => $query_cgi, + query_desc => $query_desc, + operator => ($operators[ $i - 1 ]) ? $operators[ $i - 1 ] : '', + parsed_operand => $operand, + original_operand => ($operands[$i]) ? $operands[$i] : '', + index => $index, + index_plus => $index_plus, + indexes_set => $indexes_set, + previous_operand => $previous_operand, + }); - # Field-weighted queries already have indexes set - $query .= " $index_plus " unless $indexes_set; - $query .= $operand; - $query_desc .= " $index_plus $operands[$i]"; - $query_cgi .= "&idx=".uri_escape($index) if $index; - $query_cgi .= "&q=".uri_escape($operands[$i]) if $operands[$i]; - $previous_operand = 1; - } } #/if $operands } # /for } @@ -1538,13 +1675,13 @@ sub buildQuery { if ( $k !~ /mc-i(tem)?type/ ) { # in case the mc-ccode value has complicating chars like ()'s inside it we wrap in quotes $this_limit =~ tr/"//d; - $this_limit = $k.":\"".$v."\""; + $this_limit = $k.":'".$v."'"; } $group_OR_limits{$k} .= " or " if $group_OR_limits{$k}; $limit_desc .= " or " if $group_OR_limits{$k}; $group_OR_limits{$k} .= "$this_limit"; - $limit_cgi .= "&limit=$this_limit"; + $limit_cgi .= "&limit=" . uri_escape_utf8($this_limit); $limit_desc .= " $this_limit"; } @@ -1552,7 +1689,7 @@ sub buildQuery { else { $limit .= " and " if $limit || $query; $limit .= "$this_limit"; - $limit_cgi .= "&limit=$this_limit"; + $limit_cgi .= "&limit=" . uri_escape_utf8($this_limit); if ($this_limit =~ /^branch:(.+)/) { my $branchcode = $1; my $branchname = GetBranchName($branchcode); @@ -1579,9 +1716,13 @@ sub buildQuery { # This is flawed , means we can't search anything with : in it # if user wants to do ccl or cql, start the query with that # $query =~ s/:/=/g; + #NOTE: We use several several different regexps here as you can't have variable length lookback assertions $query =~ s/(?<=(ti|au|pb|su|an|kw|mc|nb|ns)):/=/g; $query =~ s/(?<=(wrdl)):/=/g; $query =~ s/(?<=(trn|phr)):/=/g; + $query =~ s/(?<=(st-numeric)):/=/g; + $query =~ s/(?<=(st-year)):/=/g; + $query =~ s/(?<=(st-date-normalized)):/=/g; $limit =~ s/:/=/g; for ( $query, $query_desc, $limit, $limit_desc ) { s/ +/ /g; # remove extra spaces @@ -1610,10 +1751,46 @@ sub buildQuery { return ( undef, $query, $simple_query, $query_cgi, $query_desc, $limit, $limit_cgi, $limit_desc, - $stopwords_removed, $query_type + $query_type ); } +=head2 _build_initial_query + + ($query, $query_cgi, $query_desc, $previous_operand) = _build_initial_query($initial_query_params); + + Build a section of the initial query containing indexes, operators, and operands. + +=cut + +sub _build_initial_query { + my ($params) = @_; + + my $operator = ""; + if ($params->{previous_operand}){ + #If there is a previous operand, add a supplied operator or the default 'and' + $operator = ($params->{operator}) ? " ".($params->{operator})." " : ' and '; + } + + #NOTE: indexes_set is typically set when doing truncation or field weighting + my $operand = ($params->{indexes_set}) ? $params->{parsed_operand} : $params->{index_plus}.$params->{parsed_operand}; + + #e.g. "kw,wrdl:test" + #e.g. " and kw,wrdl:test" + $params->{query} .= $operator . $operand; + + $params->{query_cgi} .= "&op=".uri_escape_utf8($operator) if $operator; + $params->{query_cgi} .= "&idx=".uri_escape_utf8($params->{index}) if $params->{index}; + $params->{query_cgi} .= "&q=".uri_escape_utf8($params->{original_operand}) if $params->{original_operand}; + + #e.g. " and kw,wrdl: test" + $params->{query_desc} .= $operator . $params->{index_plus} . " " . $params->{original_operand}; + + $params->{previous_operand} = 1 unless $params->{previous_operand}; #If there is no previous operand, mark this as one + + return ($params->{query}, $params->{query_cgi}, $params->{query_desc}, $params->{previous_operand}); +} + =head2 searchResults my @search_results = searchResults($search_context, $searchdesc, $hits, @@ -1657,31 +1834,21 @@ sub searchResults { # get notforloan authorised value list (see $shelflocations FIXME) my $notforloan_authorised_value = GetAuthValCode('items.notforloan',''); - #Build itemtype hash - #find itemtype & itemtype image - my %itemtypes; - $bsth = - $dbh->prepare( - "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes" - ); - $bsth->execute(); - while ( my $bdata = $bsth->fetchrow_hashref ) { - foreach (qw(description imageurl summary notforloan)) { - $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_}; - } - } + #Get itemtype hash + my %itemtypes = %{ GetItemTypes() }; #search item field code my ($itemtag, undef) = &GetMarcFromKohaField( "items.itemnumber", "" ); ## find column names of items related to MARC - my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items"); - $sth2->execute; my %subfieldstosearch; - while ( ( my $column ) = $sth2->fetchrow ) { + my @columns = Koha::Database->new()->schema()->resultset('Item')->result_source->columns; + for my $column ( @columns ) { my ( $tagfield, $tagsubfield ) = &GetMarcFromKohaField( "items." . $column, "" ); - $subfieldstosearch{$column} = $tagsubfield; + if ( defined $tagsubfield ) { + $subfieldstosearch{$column} = $tagsubfield; + } } # handle which records to actually retrieve @@ -1693,16 +1860,28 @@ sub searchResults { $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it? } - my $marcflavour = C4::Context->preference("marcflavour"); + my $marcflavour = C4::Context->preference("marcflavour"); # We get the biblionumber position in MARC my ($bibliotag,$bibliosubf)=GetMarcFromKohaField('biblio.biblionumber',''); # loop through all of the records we've retrieved for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) { - my $marcrecord = eval { MARC::File::USMARC::decode( $marcresults->[$i] ); }; - if ( $@ ) { - warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i]; - next; + + my $marcrecord; + if ($scan) { + # For Scan searches we built USMARC data + $marcrecord = MARC::Record->new_from_usmarc( $marcresults->[$i]); + } else { + # Normal search, render from Zebra's output + $marcrecord = new_record_from_zebra( + 'biblioserver', + $marcresults->[$i] + ); + + if ( ! defined $marcrecord ) { + warn "ERROR DECODING RECORD - $@: " . $marcresults->[$i]; + next; + } } my $fw = $scan @@ -1710,6 +1889,8 @@ sub searchResults { : $bibliotag < 10 ? GetFrameworkCode($marcrecord->field($bibliotag)->data) : GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf)); + + SetUTF8Flag($marcrecord); my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw ); $oldbiblio->{subtitle} = GetRecordValue('subtitle', $marcrecord, $fw); $oldbiblio->{result_number} = $i + 1; @@ -1726,7 +1907,7 @@ sub searchResults { # edition information, if any $oldbiblio->{edition} = $oldbiblio->{editionstatement}; - $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description}; + $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{translated_description}; # Build summary if there is one (the summary is defined in the itemtypes table) # FIXME: is this used anywhere, I think it can be commented out? -- JF if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) { @@ -1760,12 +1941,7 @@ sub searchResults { if($marcrecord->field($1)){ my @repl = $marcrecord->field($1)->subfield($2); my $subfieldvalue = $repl[$i]; - - if (! utf8::is_utf8($subfieldvalue)) { - utf8::decode($subfieldvalue); - } - - $newline =~ s/\[$tag\]/$subfieldvalue/g; + $newline =~ s/\[$tag\]/$subfieldvalue/g; } } $newsummary .= "$newline\n"; @@ -1829,6 +2005,7 @@ sub searchResults { my $item_in_transit_count = 0; my $can_place_holds = 0; my $item_onhold_count = 0; + my $notforloan_count = 0; my $items_count = scalar(@fields); my $maxitems_pref = C4::Context->preference('maxItemsinSearchResults'); my $maxitems = $maxitems_pref ? $maxitems_pref - 1 : 1; @@ -1842,7 +2019,7 @@ sub searchResults { foreach my $code ( keys %subfieldstosearch ) { $item->{$code} = $field->subfield( $subfieldstosearch{$code} ); } - $item->{description} = $itemtypes{ $item->{itype} }{description}; + $item->{description} = $itemtypes{ $item->{itype} }{translated_description}; # OPAC hidden items if ($is_opac) { @@ -1860,8 +2037,8 @@ sub searchResults { } } - my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch'; - my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch'; + my $hbranch = C4::Context->preference('StaffSearchResultsDisplayBranch'); + my $otherbranch = $hbranch eq 'homebranch' ? 'holdingbranch' : 'homebranch'; # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one if ($item->{$hbranch}) { @@ -1874,21 +2051,26 @@ sub searchResults { my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber}; # For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item my $userenv = C4::Context->userenv; - if ( $item->{onloan} && !(C4::Members::GetHideLostItemsPreference($userenv->{'number'}) && $item->{itemlost}) ) { + if ( $item->{onloan} + && !( C4::Members::GetHideLostItemsPreference( $userenv->{'number'} ) && $item->{itemlost} ) ) + { $onloan_count++; - my $key = $prefix . $item->{onloan} . $item->{barcode}; - $onloan_items->{$key}->{due_date} = format_date($item->{onloan}); - $onloan_items->{$key}->{count}++ if $item->{$hbranch}; - $onloan_items->{$key}->{branchname} = $item->{branchname}; - $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} }; - $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber}; - $onloan_items->{$key}->{description} = $item->{description}; - $onloan_items->{$key}->{imageurl} = getitemtypeimagelocation( $search_context, $itemtypes{ $item->{itype} }->{imageurl} ); + my $key = $prefix . $item->{onloan} . $item->{barcode}; + $onloan_items->{$key}->{due_date} = output_pref( { dt => dt_from_string( $item->{onloan} ), dateonly => 1 } ); + $onloan_items->{$key}->{count}++ if $item->{$hbranch}; + $onloan_items->{$key}->{branchname} = $item->{branchname}; + $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} }; + $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber}; + $onloan_items->{$key}->{description} = $item->{description}; + $onloan_items->{$key}->{imageurl} = + getitemtypeimagelocation( $search_context, $itemtypes{ $item->{itype} }->{imageurl} ); + # if something's checked out and lost, mark it as 'long overdue' if ( $item->{itemlost} ) { - $onloan_items->{$prefix}->{longoverdue}++; + $onloan_items->{$key}->{longoverdue}++; $longoverdue_count++; - } else { # can place holds as long as item isn't lost + } + else { # can place holds as long as item isn't lost $can_place_holds = 1; } } @@ -1896,9 +2078,13 @@ sub searchResults { # items not on loan, but still unavailable ( lost, withdrawn, damaged ) else { + $item->{notforloan}=1 if !$item->{notforloan} && $itemtypes{ C4::Context->preference("item-level_itypes")? $item->{itype}: $oldbiblio->{itemtype} }->{notforloan}; + # item is on order if ( $item->{notforloan} < 0 ) { $ordered_count++; + } elsif ( $item->{notforloan} > 0 ) { + $notforloan_count++; } # is item in transit? @@ -1912,7 +2098,8 @@ sub searchResults { || $item->{itemlost} || $item->{damaged} || $item->{notforloan} - || $items_count > 20) { + || ( C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') + && $items_count > C4::Context->preference('MaxSearchResultsItemsPerRecordStatusCheck') ) ) { # A couple heuristics to limit how many times # we query the database for item transfer information, sacrificing @@ -1927,7 +2114,7 @@ sub searchResults { # should map transit status to record indexed in Zebra. # ($transfertwhen, $transfertfrom, $transfertto) = C4::Circulation::GetTransfers($item->{itemnumber}); - $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber}, $oldbiblio->{biblionumber} ); + $reservestatus = C4::Reserves::GetReserveStatus( $item->{itemnumber} ); } # item is withdrawn, lost, damaged, not for loan, reserved or in transit @@ -2005,13 +2192,10 @@ sub searchResults { } # XSLT processing of some stuff - use C4::Charset; - SetUTF8Flag($marcrecord); - warn $marcrecord->as_formatted if $DEBUG; - my $interface = $search_context eq 'opac' ? 'OPAC' : ''; - if (!$scan && C4::Context->preference($interface . "XSLTResultsDisplay")) { + my $interface = $search_context eq 'opac' ? 'OPAC' : ''; + if (!$scan && C4::Context->preference($interface . "XSLTResultsDisplay")) { $oldbiblio->{XSLTResultsRecord} = XSLTParse4Display($oldbiblio->{biblionumber}, $marcrecord, $interface."XSLTResultsDisplay", 1, \@hiddenitems); - # the last parameter tells Koha to clean up the problematic ampersand entities that Zebra outputs + # the last parameter tells Koha to clean up the problematic ampersand entities that Zebra outputs } # if biblio level itypes are used and itemtype is notforloan, it can't be reserved either @@ -2038,6 +2222,7 @@ sub searchResults { $oldbiblio->{intransitcount} = $item_in_transit_count; $oldbiblio->{onholdcount} = $item_onhold_count; $oldbiblio->{orderedcount} = $ordered_count; + $oldbiblio->{notforloancount} = $notforloan_count; if (C4::Context->preference("AlternateHoldingsField") && $items_count == 0) { my $fieldspec = C4::Context->preference("AlternateHoldingsField"); @@ -2195,42 +2380,13 @@ sub enabled_staff_search_views ); } -sub AddSearchHistory{ - my ($borrowernumber,$session,$query_desc,$query_cgi, $total)=@_; - my $dbh = C4::Context->dbh; - - # Add the request the user just made - my $sql = "INSERT INTO search_history(userid, sessionid, query_desc, query_cgi, total, time) VALUES(?, ?, ?, ?, ?, NOW())"; - my $sth = $dbh->prepare($sql); - $sth->execute($borrowernumber, $session, $query_desc, $query_cgi, $total); - return $dbh->last_insert_id(undef, 'search_history', undef,undef,undef); -} - -sub GetSearchHistory{ - my ($borrowernumber,$session)=@_; - my $dbh = C4::Context->dbh; - - # Add the request the user just made - my $query = "SELECT FROM search_history WHERE (userid=? OR sessionid=?)"; - my $sth = $dbh->prepare($query); - $sth->execute($borrowernumber, $session); - return $sth->fetchall_hashref({}); -} - -sub PurgeSearchHistory{ - my ($pSearchhistory)=@_; - my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("DELETE FROM search_history WHERE time < DATE_SUB( NOW(), INTERVAL ? DAY )"); - $sth->execute($pSearchhistory) or die $dbh->errstr; -} - =head2 z3950_search_args $arrayref = z3950_search_args($matchpoints) This function returns an array reference that contains the search parameters to be passed to the Z39.50 search script (z3950_search.pl). The array elements -are hash refs whose keys are name, value and encvalue, and whose values are the +are hash refs whose keys are name and value, and whose values are the name of a search parameter, the value of that search parameter and the URL encoded value of that parameter. @@ -2241,7 +2397,7 @@ data is in a hash reference in $matchpoints, as returned by Biblio::GetBiblioDat If $matchpoints is a scalar, it is assumed to be an unnamed query descriptor, e.g. a general purpose search argument. In this case, the returned array contains only -entry: the key is 'title' and the value and encvalue are derived from $matchpoints. +entry: the key is 'title' and the value is derived from $matchpoints. If a search parameter value is undefined or empty, it is not included in the returned array. @@ -2269,11 +2425,18 @@ $template->param ( MYLOOP => C4::Search::z3950_search_args($searchscalar) ) sub z3950_search_args { my $bibrec = shift; - my $isbn = Business::ISBN->new($bibrec); + + my $isbn_string = ref( $bibrec ) ? $bibrec->{title} : $bibrec; + my $isbn = Business::ISBN->new( $isbn_string ); if (defined $isbn && $isbn->is_valid) { - $bibrec = { isbn => $bibrec } if !ref $bibrec; + if ( ref($bibrec) ) { + $bibrec->{isbn} = $isbn_string; + $bibrec->{title} = undef; + } else { + $bibrec = { isbn => $isbn_string }; + } } else { $bibrec = { title => $bibrec } if !ref $bibrec; @@ -2281,8 +2444,8 @@ sub z3950_search_args { my $array = []; for my $field (qw/ lccn isbn issn title author dewey subject /) { - my $encvalue = URI::Escape::uri_escape_utf8($bibrec->{$field}); - push @$array, { name=>$field, value=>$bibrec->{$field}, encvalue=>$encvalue } if defined $bibrec->{$field}; + push @$array, { name => $field, value => $bibrec->{$field} } + if defined $bibrec->{$field}; } return $array; } @@ -2365,6 +2528,43 @@ sub _ZOOM_event_loop { } } +=head2 new_record_from_zebra + +Given raw data from a Zebra result set, return a MARC::Record object + +This helper function is needed to take into account all the involved +system preferences and configuration variables to properly create the +MARC::Record object. + +If we are using GRS-1, then the raw data we get from Zebra should be USMARC +data. If we are using DOM, then it has to be MARCXML. + +=cut + +sub new_record_from_zebra { + + my $server = shift; + my $raw_data = shift; + # Set the default indexing modes + my $index_mode = ( $server eq 'biblioserver' ) + ? C4::Context->config('zebra_bib_index_mode') // 'dom' + : C4::Context->config('zebra_auth_index_mode') // 'dom'; + + my $marc_record = eval { + if ( $index_mode eq 'dom' ) { + MARC::Record->new_from_xml( $raw_data, 'UTF-8' ); + } else { + MARC::Record->new_from_usmarc( $raw_data ); + } + }; + + if ($@) { + return; + } else { + return $marc_record; + } + +} END { } # module clean-up code here (global destructor)