X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=C4%2FSearch.pm;h=45a4a069a397cc22d5234f156f5441ac1ea42e1b;hb=97f8b3abfff5acefe8a5b948b3c63ef7e5cb851e;hp=fde3f36afef2d38142eec3fe0785dfe2bf22d506;hpb=a4c187e895ec22976cda9362f0bb2ba0d6c27a33;p=koha.git diff --git a/C4/Search.pm b/C4/Search.pm index fde3f36afe..45a4a069a3 100644 --- a/C4/Search.pm +++ b/C4/Search.pm @@ -21,13 +21,18 @@ use C4::Context; use C4::Biblio; # GetMarcFromKohaField use C4::Koha; # getFacets use Lingua::Stem; -use C4::Date; +use C4::Search::PazPar2; +use XML::Simple; +use C4::Dates qw(format_date); +use C4::XSLT; -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG); # set the version for version checking -$VERSION = 3.00; -$DEBUG=1; +BEGIN { + $VERSION = 3.01; + $DEBUG = ($ENV{DEBUG}) ? 1 : 0; +} =head1 NAME @@ -35,11 +40,11 @@ C4::Search - Functions for searching the Koha catalog. =head1 SYNOPSIS -see opac/opac-search.pl or catalogue/search.pl for example of usage +See opac/opac-search.pl or catalogue/search.pl for example of usage =head1 DESCRIPTION -This module provides the searching facilities for the Koha into a zebra catalog. +This module provides searching functions for Koha's bibliographic databases =head1 FUNCTIONS @@ -47,9 +52,9 @@ This module provides the searching facilities for the Koha into a zebra catalog. @ISA = qw(Exporter); @EXPORT = qw( - &SimpleSearch &findseealso &FindDuplicate + &SimpleSearch &searchResults &getRecords &buildQuery @@ -68,13 +73,15 @@ my $dbh =C4::Context->dbh; C<$fields> is a reference to the fields array -This function modify the @$fields array and add related fields to search on. +This function modifies the @$fields array and adds related fields to search on. + +FIXME: this function is probably deprecated in Koha 3 =cut sub findseealso { my ( $dbh, $fields ) = @_; - my $tagslib = GetMarcStructure( 1 ); + my $tagslib = GetMarcStructure(1); for ( my $i = 0 ; $i <= $#{$fields} ; $i++ ) { my ($tag) = substr( @$fields[$i], 1, 3 ); my ($subfield) = substr( @$fields[$i], 4, 1 ); @@ -87,6 +94,8 @@ sub findseealso { ($biblionumber,$biblionumber,$title) = FindDuplicate($record); +This function attempts to find duplicate records using a hard-coded, fairly simplistic algorithm + =cut sub FindDuplicate { @@ -103,10 +112,7 @@ sub FindDuplicate { # ... normalize first if ( $result->{isbn} ) { $result->{isbn} =~ s/\(.*$//; - $result->{isbn} =~ s/\s+$//; - } - #$search->{'avoidquerylog'}=1; - if ( $result->{isbn} ) { + $result->{isbn} =~ s/\s+$//; $query = "isbn=$result->{isbn}"; } else { @@ -114,53 +120,61 @@ sub FindDuplicate { $result->{title} =~ s /\"//g; $result->{title} =~ s /\(//g; $result->{title} =~ s /\)//g; - # remove valid operators + + # FIXME: instead of removing operators, could just do + # quotes around the value $result->{title} =~ s/(and|or|not)//g; $query = "ti,ext=$result->{title}"; - $query .= " and mt=$result->{itemtype}" if ($result->{itemtype}); - if ($result->{author}){ - $result->{author} =~ s /\\//g; - $result->{author} =~ s /\"//g; - $result->{author} =~ s /\(//g; - $result->{author} =~ s /\)//g; - # remove valid operators - $result->{author} =~ s/(and|or|not)//g; - $query .= " and au,ext=$result->{author}"; - } + $query .= " and itemtype=$result->{itemtype}" + if ( $result->{itemtype} ); + if ( $result->{author} ) { + $result->{author} =~ s /\\//g; + $result->{author} =~ s /\"//g; + $result->{author} =~ s /\(//g; + $result->{author} =~ s /\)//g; + + # remove valid operators + $result->{author} =~ s/(and|or|not)//g; + $query .= " and au,ext=$result->{author}"; + } } - my ($error,$searchresults) = - SimpleSearch($query); # FIXME :: hardcoded ! + + # FIXME: add error handling + my ( $error, $searchresults ) = SimpleSearch($query); # FIXME :: hardcoded ! my @results; foreach my $possible_duplicate_record (@$searchresults) { my $marcrecord = MARC::Record->new_from_usmarc($possible_duplicate_record); my $result = TransformMarcToKoha( $dbh, $marcrecord, '' ); - + # FIXME :: why 2 $biblionumber ? - if ($result){ - push @results, $result->{'biblionumber'}; - push @results, $result->{'title'}; + if ($result) { + push @results, $result->{'biblionumber'}; + push @results, $result->{'title'}; } } - return @results; + return @results; } =head2 SimpleSearch -($error,$results) = SimpleSearch($query,@servers); +($error,$results) = SimpleSearch( $query, $offset, $max_results, [ @servers ] ); -this function performs a simple search on the catalog using zoom. +This function provides a simple search API on the bibliographic catalog =over 2 =item C - * $query could be a simple keyword or a complete CCL query wich is depending on your ccl file. - * @servers is optionnal. default one is read on koha.xml + * $query can be a simple keyword or a complete CCL query + * @servers is optional. Defaults to biblioserver as found in koha-conf.xml + * $offset - If present, represents the number of records at the beggining to omit. Defaults to 0 + * $max_results - if present, determines the maximum number of records to fetch. undef is All. defaults to undef. + =item C - * $error is a string which containt the description error if there is one. Else it's empty. - * \@results is an array of marc record. + * $error is a empty unless an error is detected + * \@results is an array of records. =item C @@ -194,125 +208,151 @@ for(my $i=0;$i<$hits;$i++) { push @results, \%resultsloop; } + $template->param(result=>\@results); =cut sub SimpleSearch { - my $query = shift; - if (C4::Context->preference('NoZebra')) { - my $result = NZorder(NZanalyse($query))->{'biblioserver'}->{'RECORDS'}; - return (undef,$result); - } else { - my @servers = @_; + my ( $query, $offset, $max_results, $servers ) = @_; + + if ( C4::Context->preference('NoZebra') ) { + my $result = NZorder( NZanalyse($query) )->{'biblioserver'}; + my $search_result = + ( $result->{hits} + && $result->{hits} > 0 ? $result->{'RECORDS'} : [] ); + return ( undef, $search_result, scalar($search_result) ); + } + else { + # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too. + my @servers = defined ( $servers ) ? @$servers : ( "biblioserver" ); my @results; my @tmpresults; my @zconns; - return ( "No query entered", undef ) unless $query; - - #@servers = (C4::Context->config("biblioserver")) unless @servers; - @servers = - ("biblioserver") unless @servers - ; # FIXME hardcoded value. See catalog/search.pl & opac-search.pl too. - - # Connect & Search + my $total_hits; + return ( "No query entered", undef, undef ) unless $query; + + # Initialize & Search Zebra for ( my $i = 0 ; $i < @servers ; $i++ ) { - $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); - $tmpresults[$i] = - $zconns[$i] - ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) ); - - # getting error message if one occured. - my $error = - $zconns[$i]->errmsg() . " (" - . $zconns[$i]->errcode() . ") " - . $zconns[$i]->addinfo() . " " - . $zconns[$i]->diagset(); - - return ( $error, undef ) if $zconns[$i]->errcode(); + eval { + $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); + $tmpresults[$i] = + $zconns[$i] + ->search( new ZOOM::Query::CCL2RPN( $query, $zconns[$i] ) ); + + # error handling + my $error = + $zconns[$i]->errmsg() . " (" + . $zconns[$i]->errcode() . ") " + . $zconns[$i]->addinfo() . " " + . $zconns[$i]->diagset(); + + return ( $error, undef, undef ) if $zconns[$i]->errcode(); + }; + if ($@) { + + # caught a ZOOM::Exception + my $error = + $@->message() . " (" + . $@->code() . ") " + . $@->addinfo() . " " + . $@->diagset(); + warn $error; + return ( $error, undef, undef ); + } } - my $hits; - my $ev; while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) { - $ev = $zconns[ $i - 1 ]->last_event(); - if ( $ev == ZOOM::Event::ZEND ) { - $hits = $tmpresults[ $i - 1 ]->size(); - } - if ( $hits > 0 ) { - for ( my $j = 0 ; $j < $hits ; $j++ ) { - my $record = $tmpresults[ $i - 1 ]->record($j)->raw(); + my $event = $zconns[ $i - 1 ]->last_event(); + if ( $event == ZOOM::Event::ZEND ) { + + my $first_record = defined( $offset ) ? $offset+1 : 1; + my $hits = $tmpresults[ $i - 1 ]->size(); + $total_hits += $hits; + my $last_record = $hits; + if ( defined $max_results && $offset + $max_results < $hits ) { + $last_record = $offset + $max_results; + } + + for my $j ( $first_record..$last_record ) { + my $record = $tmpresults[ $i - 1 ]->record( $j-1 )->raw(); # 0 indexed push @results, $record; } } } - return ( undef, \@results ); + + return ( undef, \@results, $total_hits ); } } -# performs the search +=head2 getRecords + +( undef, $results_hashref, \@facets_loop ) = getRecords ( + + $koha_query, $simple_query, $sort_by_ref, $servers_ref, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan + ); + +The all singing, all dancing, multi-server, asynchronous, scanning, +searching, record nabbing, facet-building + +See verbse embedded documentation. + +=cut + sub getRecords { my ( - $koha_query, $simple_query, $sort_by_ref, - $servers_ref, $results_per_page, $offset, - $expanded_facet, $branches, $query_type, - $scan + $koha_query, $simple_query, $sort_by_ref, $servers_ref, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan ) = @_; -# warn "Query : $koha_query"; + my @servers = @$servers_ref; my @sort_by = @$sort_by_ref; - # create the zoom connection and query object + # Initialize variables for the ZOOM connection and results object my $zconn; my @zconns; my @results; my $results_hashref = (); - ### FACETED RESULTS + # Initialize variables for the faceted results objects my $facets_counter = (); my $facets_info = (); my $facets = getFacets(); - #### INITIALIZE SOME VARS USED CREATE THE FACETED RESULTS - my @facets_loop; # stores the ref to array of hashes for template + my @facets_loop + ; # stores the ref to array of hashes for template facets loop + + ### LOOP THROUGH THE SERVERS for ( my $i = 0 ; $i < @servers ; $i++ ) { $zconns[$i] = C4::Context->Zconn( $servers[$i], 1 ); # perform the search, create the results objects # if this is a local search, use the $koha-query, if it's a federated one, use the federated-query - my $query_to_use; - if ( $servers[$i] =~ /biblioserver/ ) { - $query_to_use = $koha_query; - } - else { - $query_to_use = $simple_query; - } + my $query_to_use = ($servers[$i] =~ /biblioserver/) ? $koha_query : $simple_query; - $query_to_use = $simple_query if $scan; + #$query_to_use = $simple_query if $scan; + warn $simple_query if ( $scan and $DEBUG ); - # check if we've got a query_type defined + # Check if we've got a query_type defined, if so, use it eval { if ($query_type) { if ( $query_type =~ /^ccl/ ) { $query_to_use =~ s/\:/\=/g; # change : to = last minute (FIXME) - - # warn "CCL : $query_to_use"; $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) ); } elsif ( $query_type =~ /^cql/ ) { - - # warn "CQL : $query_to_use"; $results[$i] = $zconns[$i]->search( new ZOOM::Query::CQL( $query_to_use, $zconns[$i] ) ); } elsif ( $query_type =~ /^pqf/ ) { - - # warn "PQF : $query_to_use"; $results[$i] = $zconns[$i]->search( new ZOOM::Query::PQF( $query_to_use, $zconns[$i] ) ); @@ -320,14 +360,12 @@ sub getRecords { } else { if ($scan) { - warn "preparing to scan:$query_to_use"; $results[$i] = $zconns[$i]->scan( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) ); } else { - # warn "LAST : $query_to_use"; $results[$i] = $zconns[$i]->search( new ZOOM::Query::CCL2RPN( $query_to_use, $zconns[$i] ) @@ -339,59 +377,63 @@ sub getRecords { warn "WARNING: query problem with $query_to_use " . $@; } - # concatenate the sort_by limits and pass them to the results object + # Concatenate the sort_by limits and pass them to the results object + # Note: sort will override rank my $sort_by; foreach my $sort (@sort_by) { - if ($sort eq "author_az") { - $sort_by.="1=1003 sort( "yaz", $sort_by ) < 0) { + if ( $results[$i]->sort( "yaz", $sort_by ) < 0 ) { warn "WARNING sort $sort_by failed"; } } - } + } # finished looping through servers + + # The big moment: asynchronously retrieve results from all servers while ( ( my $i = ZOOM::event( \@zconns ) ) != 0 ) { my $ev = $zconns[ $i - 1 ]->last_event(); if ( $ev == ZOOM::Event::ZEND ) { + next unless $results[ $i - 1 ]; my $size = $results[ $i - 1 ]->size(); if ( $size > 0 ) { my $results_hash; - #$results_hash->{'server'} = $servers[$i-1]; + # loop through the results $results_hash->{'hits'} = $size; my $times; @@ -401,75 +443,78 @@ sub getRecords { else { $times = $size; } - for ( my $j = $offset ; $j < $times ; $j++ ) - { #(($offset+$count<=$size) ? ($offset+$count):$size) ; $j++){ + for ( my $j = $offset ; $j < $times ; $j++ ) { my $records_hash; my $record; my $facet_record; - ## This is just an index scan + + ## Check if it's an index scan if ($scan) { my ( $term, $occ ) = $results[ $i - 1 ]->term($j); + # here we create a minimal MARC record and hand it off to the # template just like a normal result ... perhaps not ideal, but # it works for now my $tmprecord = MARC::Record->new(); $tmprecord->encoding('UTF-8'); my $tmptitle; - - # srote the minimal record in author/title (depending on MARC flavour) - if ( C4::Context->preference("marcflavour") eq - "UNIMARC" ) - { - $tmptitle = MARC::Field->new( - '200', ' ', ' ', - a => $term, - f => $occ - ); - } - else { - $tmptitle = MARC::Field->new( - '245', ' ', ' ', - a => $term, - b => $occ - ); + my $tmpauthor; + + # the minimal record in author/title (depending on MARC flavour) + if (C4::Context->preference("marcflavour") eq "UNIMARC") { + $tmptitle = MARC::Field->new('200',' ',' ', a => $term, f => $occ); + $tmprecord->append_fields($tmptitle); + } else { + $tmptitle = MARC::Field->new('245',' ',' ', a => $term,); + $tmpauthor = MARC::Field->new('100',' ',' ', a => $occ,); + $tmprecord->append_fields($tmptitle); + $tmprecord->append_fields($tmpauthor); } - $tmprecord->append_fields($tmptitle); - $results_hash->{'RECORDS'}[$j] = - $tmprecord->as_usmarc(); + $results_hash->{'RECORDS'}[$j] = $tmprecord->as_usmarc(); } + + # not an index scan else { $record = $results[ $i - 1 ]->record($j)->raw(); - #warn "RECORD $j:".$record; - $results_hash->{'RECORDS'}[$j] = - $record; # making a reference to a hash - # Fill the facets while we're looping - $facet_record = MARC::Record->new_from_usmarc($record); - - #warn $servers[$i-1].$facet_record->title(); - for ( my $k = 0 ; $k <= @$facets ; $k++ ) { - if ( $facets->[$k] ) { - my @fields; - for my $tag ( @{ $facets->[$k]->{'tags'} } ) { - push @fields, $facet_record->field($tag); - } - for my $field (@fields) { - my @subfields = $field->subfields(); - for my $subfield (@subfields) { - my ( $code, $data ) = @$subfield; - if ( $code eq - $facets->[$k]->{'subfield'} ) - { - $facets_counter->{ $facets->[$k] - ->{'link_value'} }->{$data}++; + # warn "RECORD $j:".$record; + $results_hash->{'RECORDS'}[$j] = $record; + + # Fill the facets while we're looping, but only for the biblioserver + $facet_record = MARC::Record->new_from_usmarc($record) + if $servers[ $i - 1 ] =~ /biblioserver/; + + #warn $servers[$i-1]."\n".$record; #.$facet_record->title(); + if ($facet_record) { + for ( my $k = 0 ; $k <= @$facets ; $k++ ) { + + if ( $facets->[$k] ) { + my @fields; + for my $tag ( @{ $facets->[$k]->{'tags'} } ) + { + push @fields, + $facet_record->field($tag); + } + for my $field (@fields) { + my @subfields = $field->subfields(); + for my $subfield (@subfields) { + my ( $code, $data ) = @$subfield; + if ( $code eq + $facets->[$k]->{'subfield'} ) + { + $facets_counter->{ $facets->[$k] + ->{'link_value'} } + ->{$data}++; + } } } + $facets_info->{ $facets->[$k] + ->{'link_value'} }->{'label_value'} = + $facets->[$k]->{'label_value'}; + $facets_info->{ $facets->[$k] + ->{'link_value'} }->{'expanded'} = + $facets->[$k]->{'expanded'}; } - $facets_info->{ $facets->[$k]->{'link_value'} } - ->{'label_value'} = - $facets->[$k]->{'label_value'}; - $facets_info->{ $facets->[$k]->{'link_value'} } - ->{'expanded'} = $facets->[$k]->{'expanded'}; } } } @@ -477,289 +522,502 @@ sub getRecords { $results_hashref->{ $servers[ $i - 1 ] } = $results_hash; } - #print "connection ", $i-1, ": $size hits"; - #print $results[$i-1]->record(0)->render() if $size > 0; + # warn "connection ", $i-1, ": $size hits"; + # warn $results[$i-1]->record(0)->render() if $size > 0; + # BUILD FACETS - for my $link_value ( - sort { $facets_counter->{$b} <=> $facets_counter->{$a} } - keys %$facets_counter - ) - { - my $expandable; - my $number_of_facets; - my @this_facets_array; - for my $one_facet ( - sort { - $facets_counter->{$link_value} - ->{$b} <=> $facets_counter->{$link_value}->{$a} - } keys %{ $facets_counter->{$link_value} } - ) + if ( $servers[ $i - 1 ] =~ /biblioserver/ ) { + for my $link_value ( + sort { $facets_counter->{$b} <=> $facets_counter->{$a} } + keys %$facets_counter ) { - $number_of_facets++; - if ( ( $number_of_facets < 6 ) - || ( $expanded_facet eq $link_value ) - || ( $facets_info->{$link_value}->{'expanded'} ) ) + my $expandable; + my $number_of_facets; + my @this_facets_array; + for my $one_facet ( + sort { + $facets_counter->{$link_value} + ->{$b} <=> $facets_counter->{$link_value}->{$a} + } keys %{ $facets_counter->{$link_value} } + ) { + $number_of_facets++; + if ( ( $number_of_facets < 6 ) + || ( $expanded_facet eq $link_value ) + || ( $facets_info->{$link_value}->{'expanded'} ) ) + { - # sanitize the link value ), ( will cause errors with CCL - my $facet_link_value = $one_facet; - $facet_link_value =~ s/(\(|\))/ /g; - - # fix the length that will display in the label - my $facet_label_value = $one_facet; - $facet_label_value = substr( $one_facet, 0, 20 ) . "..." - unless length($facet_label_value) <= 20; + # Sanitize the link value ), ( will cause errors with CCL, + my $facet_link_value = $one_facet; + $facet_link_value =~ s/(\(|\))/ /g; - # well, if it's a branch, label by the name, not the code - if ( $link_value =~ /branch/ ) { + # fix the length that will display in the label, + my $facet_label_value = $one_facet; $facet_label_value = - $branches->{$one_facet}->{'branchname'}; + substr( $one_facet, 0, 20 ) . "..." + unless length($facet_label_value) <= 20; + + # if it's a branch, label by the name, not the code, + if ( $link_value =~ /branch/ ) { + $facet_label_value = + $branches->{$one_facet}->{'branchname'}; + } + + # but we're down with the whole label being in the link's title. + my $facet_title_value = $one_facet; + + push @this_facets_array, + ( + { + facet_count => + $facets_counter->{$link_value} + ->{$one_facet}, + facet_label_value => $facet_label_value, + facet_title_value => $facet_title_value, + facet_link_value => $facet_link_value, + type_link_value => $link_value, + }, + ); } + } - # but we're down with the whole label being in the link's title - my $facet_title_value = $one_facet; - - push @this_facets_array, - ( - { - facet_count => - $facets_counter->{$link_value}->{$one_facet}, - facet_label_value => $facet_label_value, - facet_title_value => $facet_title_value, - facet_link_value => $facet_link_value, - type_link_value => $link_value, - }, - ); + # handle expanded option + unless ( $facets_info->{$link_value}->{'expanded'} ) { + $expandable = 1 + if ( ( $number_of_facets > 6 ) + && ( $expanded_facet ne $link_value ) ); } + push @facets_loop, + ( + { + type_link_value => $link_value, + type_id => $link_value . "_id", + type_label => + $facets_info->{$link_value}->{'label_value'}, + facets => \@this_facets_array, + expandable => $expandable, + expand => $link_value, + } + ); } - unless ( $facets_info->{$link_value}->{'expanded'} ) { - $expandable = 1 - if ( ( $number_of_facets > 6 ) - && ( $expanded_facet ne $link_value ) ); - } - push @facets_loop, - ( - { - type_link_value => $link_value, - type_id => $link_value . "_id", - type_label => - $facets_info->{$link_value}->{'label_value'}, - facets => \@this_facets_array, - expandable => $expandable, - expand => $link_value, - } - ); } } } return ( undef, $results_hashref, \@facets_loop ); } +sub pazGetRecords { + my ( + $koha_query, $simple_query, $sort_by_ref, $servers_ref, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan + ) = @_; + + my $paz = C4::Search::PazPar2->new(C4::Context->config('pazpar2url')); + $paz->init(); + $paz->search($simple_query); + sleep 1; + + # do results + my $results_hashref = {}; + my $stats = XMLin($paz->stat); + my $results = XMLin($paz->show($offset, $results_per_page, 'work-title:1'), forcearray => 1); + + # for a grouped search result, the number of hits + # is the number of groups returned; 'bib_hits' will have + # the total number of bibs. + $results_hashref->{'biblioserver'}->{'hits'} = $results->{'merged'}->[0]; + $results_hashref->{'biblioserver'}->{'bib_hits'} = $stats->{'hits'}; + + HIT: foreach my $hit (@{ $results->{'hit'} }) { + my $recid = $hit->{recid}->[0]; + + my $work_title = $hit->{'md-work-title'}->[0]; + my $work_author; + if (exists $hit->{'md-work-author'}) { + $work_author = $hit->{'md-work-author'}->[0]; + } + my $group_label = (defined $work_author) ? "$work_title / $work_author" : $work_title; + + my $result_group = {}; + $result_group->{'group_label'} = $group_label; + $result_group->{'group_merge_key'} = $recid; + + my $count = 1; + if (exists $hit->{count}) { + $count = $hit->{count}->[0]; + } + $result_group->{'group_count'} = $count; + + for (my $i = 0; $i < $count; $i++) { + # FIXME -- may need to worry about diacritics here + my $rec = $paz->record($recid, $i); + push @{ $result_group->{'RECORDS'} }, $rec; + } + + push @{ $results_hashref->{'biblioserver'}->{'GROUPS'} }, $result_group; + } + + # pass through facets + my $termlist_xml = $paz->termlist('author,subject'); + my $terms = XMLin($termlist_xml, forcearray => 1); + my @facets_loop = (); + #die Dumper($results); +# foreach my $list (sort keys %{ $terms->{'list'} }) { +# my @facets = (); +# foreach my $facet (sort @{ $terms->{'list'}->{$list}->{'term'} } ) { +# push @facets, { +# facet_label_value => $facet->{'name'}->[0], +# }; +# } +# push @facets_loop, ( { +# type_label => $list, +# facets => \@facets, +# } ); +# } + + return ( undef, $results_hashref, \@facets_loop ); +} + # STOPWORDS sub _remove_stopwords { - my ($operand,$index) = @_; - # phrase and exact-qualified indexes shoudln't have stopwords removed - if ($index!~m/phr|ext/){ - # remove stopwords from operand : parse all stopwords & remove them (case insensitive) - # we use IsAlpha unicode definition, to deal correctly with diacritics. - # otherwise, a french word like "leçon" woudl be split into "le" "çon", le - # is an empty word, we get "çon" and wouldn't find anything... - foreach (keys %{C4::Context->stopwords}) { - next if ($_ =~/(and|or|not)/); # don't remove operators - $operand=~ s/\P{IsAlpha}$_\P{IsAlpha}/ /i; - $operand=~ s/^$_\P{IsAlpha}/ /i; - $operand=~ s/\P{IsAlpha}$_$/ /i; + my ( $operand, $index ) = @_; + my @stopwords_removed; + + # phrase and exact-qualified indexes shouldn't have stopwords removed + if ( $index !~ m/phr|ext/ ) { + +# remove stopwords from operand : parse all stopwords & remove them (case insensitive) +# we use IsAlpha unicode definition, to deal correctly with diacritics. +# otherwise, a French word like "leçon" woudl be split into "le" "çon", "le" +# is a stopword, we'd get "çon" and wouldn't find anything... + foreach ( keys %{ C4::Context->stopwords } ) { + next if ( $_ =~ /(and|or|not)/ ); # don't remove operators + if ( $operand =~ + /(\P{IsAlpha}$_\P{IsAlpha}|^$_\P{IsAlpha}|\P{IsAlpha}$_$)/ ) + { + $operand =~ s/\P{IsAlpha}$_\P{IsAlpha}/ /gi; + $operand =~ s/^$_\P{IsAlpha}/ /gi; + $operand =~ s/\P{IsAlpha}$_$/ /gi; + push @stopwords_removed, $_; + } } } - return $operand; + return ( $operand, \@stopwords_removed ); } # TRUNCATION sub _detect_truncation { - my ($operand,$index) = @_; - my (@nontruncated,@righttruncated,@lefttruncated,@rightlefttruncated,@regexpr); - $operand =~s/^ //g; - my @wordlist= split (/\s/,$operand); - foreach my $word (@wordlist){ - if ($word=~s/^\*([^\*]+)\*$/$1/){ - push @rightlefttruncated,$word; - } - elsif($word=~s/^\*([^\*]+)$/$1/){ - push @lefttruncated,$word; - } - elsif ($word=~s/^([^\*]+)\*$/$1/){ - push @righttruncated,$word; - } - elsif (index($word,"*")<0){ - push @nontruncated,$word; + my ( $operand, $index ) = @_; + my ( @nontruncated, @righttruncated, @lefttruncated, @rightlefttruncated, + @regexpr ); + $operand =~ s/^ //g; + my @wordlist = split( /\s/, $operand ); + foreach my $word (@wordlist) { + if ( $word =~ s/^\*([^\*]+)\*$/$1/ ) { + push @rightlefttruncated, $word; + } + elsif ( $word =~ s/^\*([^\*]+)$/$1/ ) { + push @lefttruncated, $word; + } + elsif ( $word =~ s/^([^\*]+)\*$/$1/ ) { + push @righttruncated, $word; + } + elsif ( index( $word, "*" ) < 0 ) { + push @nontruncated, $word; } else { - push @regexpr,$word; + push @regexpr, $word; } } - return (\@nontruncated,\@righttruncated,\@lefttruncated,\@rightlefttruncated,\@regexpr); + return ( + \@nontruncated, \@righttruncated, \@lefttruncated, + \@rightlefttruncated, \@regexpr + ); } +# STEMMING sub _build_stemmed_operand { my ($operand) = @_; my $stemmed_operand; - # FIXME: the locale should be set based on the user's language and/or search choice + +# FIXME: the locale should be set based on the user's language and/or search choice my $stemmer = Lingua::Stem->new( -locale => 'EN-US' ); - # FIXME: these should be stored in the db so the librarian can modify the behavior + +# FIXME: these should be stored in the db so the librarian can modify the behavior $stemmer->add_exceptions( - { - 'and' => 'and', - 'or' => 'or', - 'not' => 'not', - } - - ); + { + 'and' => 'and', + 'or' => 'or', + 'not' => 'not', + } + ); my @words = split( / /, $operand ); my $stems = $stemmer->stem(@words); for my $stem (@$stems) { - $stemmed_operand .= "$stem"; - $stemmed_operand .= "?" unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 ); - $stemmed_operand .= " "; + $stemmed_operand .= "$stem"; + $stemmed_operand .= "?" + unless ( $stem =~ /(and$|or$|not$)/ ) || ( length($stem) < 3 ); + $stemmed_operand .= " "; } - #warn "STEMMED OPERAND: $stemmed_operand"; + warn "STEMMED OPERAND: $stemmed_operand" if $DEBUG; return $stemmed_operand; } +# FIELD WEIGHTING sub _build_weighted_query { - # FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works - # pretty well but will work much better when we have an actual query parser - my ($operand,$stemmed_operand,$index) = @_; + +# FIELD WEIGHTING - This is largely experimental stuff. What I'm committing works +# pretty well but could work much better if we had a smarter query parser + my ( $operand, $stemmed_operand, $index ) = @_; my $stemming = C4::Context->preference("QueryStemming") || 0; my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; - my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; - my $weighted_query .= "(rk=("; # Specifies that we're applying rank + my $weighted_query .= "(rk=("; # Specifies that we're applying rank # Keyword, or, no index specified if ( ( $index eq 'kw' ) || ( !$index ) ) { - $weighted_query .= "Title-cover,ext,r1=\"$operand\""; # exact title-cover - $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title - $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title - #$weighted_query .= " or any,ext,r4=$operand"; # exact any - #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any - $weighted_query .= " or wrd,fuzzy,r8=\"$operand\"" if $fuzzy_enabled; # add fuzzy, word list - $weighted_query .= " or wrd,right-Truncation,r9=\"$stemmed_operand\"" if ($stemming and $stemmed_operand); # add stemming, right truncation - # embedded sorting: 0 a-z; 1 z-a - # $weighted_query .= ") or (sort1,aut=1"; + $weighted_query .= + "Title-cover,ext,r1=\"$operand\""; # exact title-cover + $weighted_query .= " or ti,ext,r2=\"$operand\""; # exact title + $weighted_query .= " or ti,phr,r3=\"$operand\""; # phrase title + #$weighted_query .= " or any,ext,r4=$operand"; # exact any + #$weighted_query .=" or kw,wrdl,r5=\"$operand\""; # word list any + $weighted_query .= " or wrdl,fuzzy,r8=\"$operand\"" + if $fuzzy_enabled; # add fuzzy, word list + $weighted_query .= " or wrdl,right-Truncation,r9=\"$stemmed_operand\"" + if ( $stemming and $stemmed_operand ) + ; # add stemming, right truncation + $weighted_query .= " or wrdl,r9=\"$operand\""; + + # embedded sorting: 0 a-z; 1 z-a + # $weighted_query .= ") or (sort1,aut=1"; } - # if the index already has more than one qualifier, just wrap the operand - # in quotes and pass it back - elsif ($index =~ ',') { - $weighted_query .=" $index=\"$operand\""; + + # Barcode searches should skip this process + elsif ( $index eq 'bc' ) { + $weighted_query .= "bc=\"$operand\""; } + + # Authority-number searches should skip this process + elsif ( $index eq 'an' ) { + $weighted_query .= "an=\"$operand\""; + } + + # If the index already has more than one qualifier, wrap the operand + # in quotes and pass it back (assumption is that the user knows what they + # are doing and won't appreciate us mucking up their query + elsif ( $index =~ ',' ) { + $weighted_query .= " $index=\"$operand\""; + } + #TODO: build better cases based on specific search indexes else { - $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index - #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)"; - $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index - $weighted_query .= " or $index,rt,wrd,r3=\"$operand\""; # word list index + $weighted_query .= " $index,ext,r1=\"$operand\""; # exact index + #$weighted_query .= " or (title-sort-az=0 or $index,startswithnt,st-word,r3=$operand #)"; + $weighted_query .= " or $index,phr,r3=\"$operand\""; # phrase index + $weighted_query .= + " or $index,rt,wrdl,r3=\"$operand\""; # word list index } - $weighted_query .= "))"; # close rank specification + + $weighted_query .= "))"; # close rank specification return $weighted_query; } -# build the query itself +=head2 buildQuery + +( $error, $query, +$simple_query, $query_cgi, +$query_desc, $limit, +$limit_cgi, $limit_desc, +$stopwords_removed, $query_type ) = getRecords ( $operators, $operands, $indexes, $limits, $sort_by, $scan); + +Build queries and limits in CCL, CGI, Human, +handle truncation, stemming, field weighting, stopwords, fuzziness, etc. + +See verbose embedded documentation. + + +=cut + sub buildQuery { - my ( $operators, $operands, $indexes, $limits, $sort_by ) = @_; + my ( $operators, $operands, $indexes, $limits, $sort_by, $scan ) = @_; + + warn "---------\nEnter buildQuery\n---------" if $DEBUG; + # dereference my @operators = @$operators if $operators; my @indexes = @$indexes if $indexes; my @operands = @$operands if $operands; my @limits = @$limits if $limits; my @sort_by = @$sort_by if $sort_by; - my $stemming = C4::Context->preference("QueryStemming") || 0; - my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; - my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + my $stemming = C4::Context->preference("QueryStemming") || 0; + my $auto_truncation = C4::Context->preference("QueryAutoTruncate") || 0; + my $weight_fields = C4::Context->preference("QueryWeightFields") || 0; + my $fuzzy_enabled = C4::Context->preference("QueryFuzzy") || 0; + my $remove_stopwords = C4::Context->preference("QueryRemoveStopwords") || 0; + + # no stemming/weight/fuzzy in NoZebra + if ( C4::Context->preference("NoZebra") ) { + $stemming = 0; + $weight_fields = 0; + $fuzzy_enabled = 0; + } - my $query = $operands[0]; - my $simple_query = $operands[0]; - my $query_cgi; - my $query_search_desc; + my $query = $operands[0]; + my $simple_query = $operands[0]; - my $limit; - my $limit_cgi; - my $limit_desc; + # initialize the variables we're passing back + my $query_cgi; + my $query_desc; + my $query_type; -# STEP I: determine if this is a form-based / simple query or if it's complex (if complex, -# pass it off to zebra directly) + my $limit; + my $limit_cgi; + my $limit_desc; -# check if this is a known query language query, if it is, return immediately, -# the user is responsible for constructing valid syntax: + my $stopwords_removed; # flag to determine if stopwords have been removed + +# for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps +# DIAGNOSTIC ONLY!! if ( $query =~ /^ccl=/ ) { - return ( undef, $', $', $', '', '', '', 'ccl' ); + return ( undef, $', $', $', $', '', '', '', '', 'ccl' ); } if ( $query =~ /^cql=/ ) { - return ( undef, $', $', $', '', '', '', 'cql' ); + return ( undef, $', $', $', $', '', '', '', '', 'cql' ); } if ( $query =~ /^pqf=/ ) { - return ( undef, $', $', $', '', '', '', 'pqf' ); + return ( undef, $', $', $', $', '', '', '', '', 'pqf' ); } -# FIXME: this is bound to be broken now - if ( $query =~ /(\(|\)|:|=)/ ) { # sorry, too complex, assume CCL - return ( undef, $query, $query_cgi, $query_search_desc, $limit, $limit_cgi, $limit_desc, 'ccl' ); + # pass nested queries directly + # FIXME: need better handling of some of these variables in this case + if ( $query =~ /(\(|\))/ ) { + return ( + undef, $query, $simple_query, $query_cgi, + $query, $limit, $limit_cgi, $limit_desc, + $stopwords_removed, 'ccl' + ); } -# form-based queries are limited to non-nested at a specific depth, so we can easily -# modify the incoming query operands and indexes to do stemming and field weighting +# Form-based queries are non-nested and fixed depth, so we can easily modify the incoming +# query operands and indexes and add stemming, truncation, field weighting, etc. # Once we do so, we'll end up with a value in $query, just like if we had an # incoming $query from the user else { - $query = ""; # clear it out so we can populate properly with field-weighted stemmed query - my $previous_operand; # a flag used to keep track if there was a previous query - # if there was, we can apply the current operator - # for every operand + $query = "" + ; # clear it out so we can populate properly with field-weighted, stemmed, etc. query + my $previous_operand + ; # a flag used to keep track if there was a previous query + # if there was, we can apply the current operator + # for every operand for ( my $i = 0 ; $i <= @operands ; $i++ ) { # COMBINE OPERANDS, INDEXES AND OPERATORS if ( $operands[$i] ) { + + # A flag to determine whether or not to add the index to the query + my $indexes_set; + +# If the user is sophisticated enough to specify an index, turn off field weighting, stemming, and stopword handling + if ( $operands[$i] =~ /(:|=)/ || $scan ) { + $weight_fields = 0; + $stemming = 0; + $remove_stopwords = 0; + } my $operand = $operands[$i]; my $index = $indexes[$i]; - # if there's no index, don't use one, it will throw a CCL error - my $index_plus = "$index:" if $index; - my $index_plus_comma="$index," if $index; + # Add index-specific attributes + # Date of Publication + if ( $index eq 'yr' ) { + $index .= ",st-numeric"; + $indexes_set++; + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + } - # Remove Stopwords - $operand = _remove_stopwords($operand,$index); - warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG; + # Date of Acquisition + elsif ( $index eq 'acqdate' ) { + $index .= ",st-date-normalized"; + $indexes_set++; + $stemming = $auto_truncation = $weight_fields = $fuzzy_enabled = $remove_stopwords = 0; + } + # ISBN,ISSN,Standard Number, don't need special treatment + elsif ( $index eq 'nb' || $index eq 'ns' ) { + $indexes_set++; + ( + $stemming, $auto_truncation, + $weight_fields, $fuzzy_enabled, + $remove_stopwords + ) = ( 0, 0, 0, 0, 0 ); - my $indexes_set; + } + # Set default structure attribute (word list) + my $struct_attr; + unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) { + $struct_attr = ",wrdl"; + } + + # Some helpful index variants + my $index_plus = $index . $struct_attr . ":" if $index; + my $index_plus_comma = $index . $struct_attr . "," if $index; + + # Remove Stopwords + if ($remove_stopwords) { + ( $operand, $stopwords_removed ) = + _remove_stopwords( $operand, $index ); + warn "OPERAND w/out STOPWORDS: >$operand<" if $DEBUG; + warn "REMOVED STOPWORDS: @$stopwords_removed" + if ( $stopwords_removed && $DEBUG ); + } # Detect Truncation - my ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr); + my ( $nontruncated, $righttruncated, $lefttruncated, + $rightlefttruncated, $regexpr ); my $truncated_operand; - ($nontruncated,$righttruncated,$lefttruncated,$rightlefttruncated,$regexpr) = _detect_truncation($operand,$index); - warn "TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" if $DEBUG; + ( + $nontruncated, $righttruncated, $lefttruncated, + $rightlefttruncated, $regexpr + ) = _detect_truncation( $operand, $index ); + warn +"TRUNCATION: NON:>@$nontruncated< RIGHT:>@$righttruncated< LEFT:>@$lefttruncated< RIGHTLEFT:>@$rightlefttruncated< REGEX:>@$regexpr<" + if $DEBUG; + # Apply Truncation - # Problem is when build_weights gets ahold if this is wraps in quotes which breaks the truncation :/ - if (scalar(@$righttruncated)+scalar(@$lefttruncated)+scalar(@$rightlefttruncated)>0){ + if ( + scalar(@$righttruncated) + scalar(@$lefttruncated) + + scalar(@$rightlefttruncated) > 0 ) + { + + # Don't field weight or add the index to the query, we do it here $indexes_set = 1; undef $weight_fields; my $previous_truncation_operand; - if (scalar(@$nontruncated)>0) { - $truncated_operand.= "$index_plus @$nontruncated "; + if ( scalar(@$nontruncated) > 0 ) { + $truncated_operand .= "$index_plus @$nontruncated "; $previous_truncation_operand = 1; } - if (scalar(@$righttruncated)>0){ - $truncated_operand .= "and " if $previous_truncation_operand; - $truncated_operand .= "$index_plus_comma"."rtrn:@$righttruncated "; + if ( scalar(@$righttruncated) > 0 ) { + $truncated_operand .= "and " + if $previous_truncation_operand; + $truncated_operand .= + "$index_plus_comma" . "rtrn:@$righttruncated "; $previous_truncation_operand = 1; } - if (scalar(@$lefttruncated)>0){ - $truncated_operand .= "and " if $previous_truncation_operand; - $truncated_operand .= "$index_plus_comma"."ltrn:@$lefttruncated "; + if ( scalar(@$lefttruncated) > 0 ) { + $truncated_operand .= "and " + if $previous_truncation_operand; + $truncated_operand .= + "$index_plus_comma" . "ltrn:@$lefttruncated "; $previous_truncation_operand = 1; } - if (scalar(@$rightlefttruncated)>0){ - $truncated_operand .= "and " if $previous_truncation_operand; - $truncated_operand .= "$index_plus_comma"."rltrn:@$rightlefttruncated "; + if ( scalar(@$rightlefttruncated) > 0 ) { + $truncated_operand .= "and " + if $previous_truncation_operand; + $truncated_operand .= + "$index_plus_comma" . "rltrn:@$rightlefttruncated "; $previous_truncation_operand = 1; } } @@ -768,12 +1026,15 @@ sub buildQuery { # Handle Stemming my $stemmed_operand; - $stemmed_operand = _build_stemmed_operand($operand) if $stemming; + $stemmed_operand = _build_stemmed_operand($operand) + if $stemming; warn "STEMMED OPERAND: >$stemmed_operand<" if $DEBUG; # Handle Field Weighting my $weighted_operand; - $weighted_operand = _build_weighted_query($operand,$stemmed_operand,$index) if $weight_fields; + $weighted_operand = + _build_weighted_query( $operand, $stemmed_operand, $index ) + if $weight_fields; warn "FIELD WEIGHTED OPERAND: >$weighted_operand<" if $DEBUG; $operand = $weighted_operand if $weight_fields; $indexes_set = 1 if $weight_fields; @@ -781,32 +1042,38 @@ sub buildQuery { # If there's a previous operand, we need to add an operator if ($previous_operand) { - # user-specified operator - if ( $operators[$i-1] ) { - $query .= " $operators[$i-1] "; - $query .= " $index_plus " unless $indexes_set; - $query .= " $operand"; - $query_cgi .=""; - $query_search_desc .=" $operators[$i-1] $index_plus $operands[$i]"; + # User-specified operator + if ( $operators[ $i - 1 ] ) { + $query .= " $operators[$i-1] "; + $query .= " $index_plus " unless $indexes_set; + $query .= " $operand"; + $query_cgi .= "&op=$operators[$i-1]"; + $query_cgi .= "&idx=$index" if $index; + $query_cgi .= "&q=$operands[$i]" if $operands[$i]; + $query_desc .= + " $operators[$i-1] $index_plus $operands[$i]"; } - # the default operator is and + # Default operator is and else { - $query .= " and "; - $query .= "$index_plus " unless $indexes_set; - $query .= "$operand"; - $query_cgi .=""; - $query_search_desc .= " and $index_plus $operands[$i]"; + $query .= " and "; + $query .= "$index_plus " unless $indexes_set; + $query .= "$operand"; + $query_cgi .= "&op=and&idx=$index" if $index; + $query_cgi .= "&q=$operands[$i]" if $operands[$i]; + $query_desc .= " and $index_plus $operands[$i]"; } } - else { - # field-weighted queries already have indexes set - $query .=" $index_plus " unless $indexes_set; - $query .= $operand; - $query_search_desc .= " $index_plus $operands[$i]"; - $query_cgi.=""; + # There isn't a pervious operand, don't need an operator + else { + # Field-weighted queries already have indexes set + $query .= " $index_plus " unless $indexes_set; + $query .= $operand; + $query_desc .= " $index_plus $operands[$i]"; + $query_cgi .= "&idx=$index" if $index; + $query_cgi .= "&q=$operands[$i]" if $operands[$i]; $previous_operand = 1; } } #/if $operands @@ -815,74 +1082,103 @@ sub buildQuery { warn "QUERY BEFORE LIMITS: >$query<" if $DEBUG; # add limits - my $group_OR_limits; + my $group_OR_limits; + my $availability_limit; foreach my $this_limit (@limits) { if ( $this_limit =~ /available/ ) { - # FIXME: switch to zebra search for null values - $limit .= " (($query and datedue=0000-00-00) or ($query and datedue=0000-00-00 not lost=1) or ($query and datedue=0000-00-00 not lost=2))"; - $limit_cgi .= "&limit=available"; - $limit_desc .=""; + +# 'available' is defined as (items.onloan is NULL) and (items.itemlost = 0) +# In English: +# all records not indexed in the onloan register (zebra) and all records with a value of lost equal to 0 + $availability_limit .= +"( ( allrecords,AlwaysMatches='' not onloan,AlwaysMatches='') and (lost,st-numeric=0) )"; #or ( allrecords,AlwaysMatches='' not lost,AlwaysMatches='')) )"; + $limit_cgi .= "&limit=available"; + $limit_desc .= ""; } - # these are treated as OR + # group_OR_limits, prefixed by mc- + # OR every member of the group elsif ( $this_limit =~ /mc/ ) { $group_OR_limits .= " or " if $group_OR_limits; - $group_OR_limits .= "$this_limit"; - $limit_cgi .="&limit=$this_limit"; - $limit_desc .= " or $this_limit"; + $limit_desc .= " or " if $group_OR_limits; + $group_OR_limits .= "$this_limit"; + $limit_cgi .= "&limit=$this_limit"; + $limit_desc .= " $this_limit"; } - # regular old limits - else { - $limit .= " and " if $limit || $query; - $limit .= "$this_limit"; - $limit_cgi .="&limit=$this_limit"; - $limit_desc .=" and $this_limit"; - } + # Regular old limits + else { + $limit .= " and " if $limit || $query; + $limit .= "$this_limit"; + $limit_cgi .= "&limit=$this_limit"; + $limit_desc .= " $this_limit"; + } + } + if ($group_OR_limits) { + $limit .= " and " if ( $query || $limit ); + $limit .= "($group_OR_limits)"; + } + if ($availability_limit) { + $limit .= " and " if ( $query || $limit ); + $limit .= "($availability_limit)"; + } + + # Normalize the query and limit strings + $query =~ s/:/=/g; + $limit =~ s/:/=/g; + for ( $query, $query_desc, $limit, $limit_desc ) { + $_ =~ s/ / /g; # remove extra spaces + $_ =~ s/^ //g; # remove any beginning spaces + $_ =~ s/ $//g; # remove any ending spaces + $_ =~ s/==/=/g; # remove double == from query + } + $query_cgi =~ s/^&//; # remove unnecessary & from beginning of the query cgi + + for ($query_cgi,$simple_query) { + $_ =~ s/"//g; } - if ($group_OR_limits) { - $limit.=" and " if ($query || $limit ); - $limit.="($group_OR_limits)"; - } - # normalize the strings - for ($query, $query_search_desc, $limit, $limit_desc) { - $_ =~ s/ / /g; # remove extra spaces - $_ =~ s/^ //g; # remove any beginning spaces - $_ =~ s/ $//g; # remove any beginning spaces - $_ =~ s/:/=/g; # causes probs for server - $_ =~ s/==/=/g; # remove double == from query - - } - - # append the limit to the query - $query .= $limit; - - warn "QUERY:".$query if $DEBUG; - warn "QUERY CGI:".$query_cgi if $DEBUG; - warn "QUERY DESC:".$query_search_desc if $DEBUG; - warn "LIMIT:".$limit if $DEBUG; - warn "LIMIT CGI:".$limit_cgi if $DEBUG; - warn "LIMIT DESC:".$limit_desc if $DEBUG; - - return ( undef, $query,$simple_query,$query_cgi,$query_search_desc,$limit,$limit_cgi,$limit_desc ); + # append the limit to the query + $query .= " " . $limit; + + # Warnings if DEBUG + if ($DEBUG) { + warn "QUERY:" . $query; + warn "QUERY CGI:" . $query_cgi; + warn "QUERY DESC:" . $query_desc; + warn "LIMIT:" . $limit; + warn "LIMIT CGI:" . $limit_cgi; + warn "LIMIT DESC:" . $limit_desc; + warn "---------\nLeave buildQuery\n---------"; + } + return ( + undef, $query, $simple_query, $query_cgi, + $query_desc, $limit, $limit_cgi, $limit_desc, + $stopwords_removed, $query_type + ); } +=head2 searchResults + +Format results in a form suitable for passing to the template + +=cut + # IMO this subroutine is pretty messy still -- it's responsible for # building the HTML output for the template sub searchResults { my ( $searchdesc, $hits, $results_per_page, $offset, @marcresults ) = @_; - my $dbh = C4::Context->dbh; - my $toggle; my $even = 1; my @newresults; + + # add search-term highlighting via s on the search terms my $span_terms_hashref; for my $span_term ( split( / /, $searchdesc ) ) { - $span_term =~ s/(.*=|\)|\(|\+|\.)//g; + $span_term =~ s/(.*=|\)|\(|\+|\.|\*)//g; $span_terms_hashref->{$span_term}++; } - #Build brancnames hash + #Build branchnames hash #find branchname #get branch information..... my %branches; @@ -893,31 +1189,38 @@ sub searchResults { while ( my $bdata = $bsth->fetchrow_hashref ) { $branches{ $bdata->{'branchcode'} } = $bdata->{'branchname'}; } +# FIXME - We build an authorised values hash here, using the default framework +# though it is possible to have different authvals for different fws. + + my $shelflocations =GetKohaAuthorisedValues('items.location',''); + + # get notforloan authorised value list (see $shelflocations FIXME) + my $notforloan_authorised_value = GetAuthValCode('items.notforloan',''); #Build itemtype hash #find itemtype & itemtype image my %itemtypes; $bsth = - $dbh->prepare("SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes"); + $dbh->prepare( + "SELECT itemtype,description,imageurl,summary,notforloan FROM itemtypes" + ); $bsth->execute(); while ( my $bdata = $bsth->fetchrow_hashref ) { - $itemtypes{ $bdata->{'itemtype'} }->{description} = - $bdata->{'description'}; - $itemtypes{ $bdata->{'itemtype'} }->{imageurl} = $bdata->{'imageurl'}; - $itemtypes{ $bdata->{'itemtype'} }->{summary} = $bdata->{'summary'}; - $itemtypes{ $bdata->{'itemtype'} }->{notforloan} = $bdata->{'notforloan'}; + foreach (qw(description imageurl summary notforloan)) { + $itemtypes{ $bdata->{'itemtype'} }->{$_} = $bdata->{$_}; + } } #search item field code my $sth = $dbh->prepare( -"select tagfield from marc_subfield_structure where kohafield like 'items.itemnumber'" +"SELECT tagfield FROM marc_subfield_structure WHERE kohafield LIKE 'items.itemnumber'" ); $sth->execute; my ($itemtag) = $sth->fetchrow; ## find column names of items related to MARC - my $sth2 = $dbh->prepare("SHOW COLUMNS from items"); + my $sth2 = $dbh->prepare("SHOW COLUMNS FROM items"); $sth2->execute; my %subfieldstosearch; while ( ( my $column ) = $sth2->fetchrow ) { @@ -925,182 +1228,267 @@ sub searchResults { &GetMarcFromKohaField( "items." . $column, "" ); $subfieldstosearch{$column} = $tagsubfield; } - my $times; + # handle which records to actually retrieve + my $times; if ( $hits && $offset + $results_per_page <= $hits ) { $times = $offset + $results_per_page; } else { - $times = $hits; + $times = $hits; # FIXME: if $hits is undefined, why do we want to equal it? } + # loop through all of the records we've retrieved for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) { - my $marcrecord; - $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] ); + my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] ); my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, '' ); - # add image url if there is one + $oldbiblio->{result_number} = $i + 1; + + # add imageurl to itemtype if there is one if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} =~ /^http:/ ) { $oldbiblio->{imageurl} = $itemtypes{ $oldbiblio->{itemtype} }->{imageurl}; - $oldbiblio->{description} = - $itemtypes{ $oldbiblio->{itemtype} }->{description}; - } - else { + } else { $oldbiblio->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} if ( $itemtypes{ $oldbiblio->{itemtype} }->{imageurl} ); - $oldbiblio->{description} = - $itemtypes{ $oldbiblio->{itemtype} }->{description}; } - # - # build summary if there is one (the summary is defined in itemtypes table - # - if ($itemtypes{ $oldbiblio->{itemtype} }->{summary}) { + my $biblio_authorised_value_images = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{biblionumber} ) ); + $oldbiblio->{authorised_value_images} = $biblio_authorised_value_images; + my $aisbn = $oldbiblio->{'isbn'}; + $aisbn =~ /(\d*[X]*)/; + $oldbiblio->{amazonisbn} = $1; + $oldbiblio->{description} = $itemtypes{ $oldbiblio->{itemtype} }->{description}; + # Build summary if there is one (the summary is defined in the itemtypes table) + # FIXME: is this used anywhere, I think it can be commented out? -- JF + if ( $itemtypes{ $oldbiblio->{itemtype} }->{summary} ) { my $summary = $itemtypes{ $oldbiblio->{itemtype} }->{summary}; - my @fields = $marcrecord->fields(); + my @fields = $marcrecord->fields(); foreach my $field (@fields) { - my $tag = $field->tag(); + my $tag = $field->tag(); my $tagvalue = $field->as_string(); - $summary =~ s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g; - unless ($tag<10) { + $summary =~ + s/\[(.?.?.?.?)$tag\*(.*?)]/$1$tagvalue$2\[$1$tag$2]/g; + unless ( $tag < 10 ) { my @subf = $field->subfields; - for my $i (0..$#subf) { - my $subfieldcode = $subf[$i][0]; + for my $i ( 0 .. $#subf ) { + my $subfieldcode = $subf[$i][0]; my $subfieldvalue = $subf[$i][1]; - my $tagsubf = $tag.$subfieldcode; - $summary =~ s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g; + my $tagsubf = $tag . $subfieldcode; + $summary =~ +s/\[(.?.?.?.?)$tagsubf(.*?)]/$1$subfieldvalue$2\[$1$tagsubf$2]/g; } } } + # FIXME: yuk $summary =~ s/\[(.*?)]//g; - $summary =~ s/\n/
/g; + $summary =~ s/\n//g; $oldbiblio->{summary} = $summary; } - # add spans to search term in results for search term highlighting - foreach my $term ( keys %$span_terms_hashref ) { + + # save an author with no tag, for the > link + $oldbiblio->{'author_nospan'} = $oldbiblio->{'author'}; + $oldbiblio->{'title_nospan'} = $oldbiblio->{'title'}; + # Add search-term highlighting to the whole record where they match using s + if (C4::Context->preference("OpacHighlightedWords")){ + my $searchhighlightblob; + for my $highlight_field ( $marcrecord->fields ) { + + # FIXME: need to skip title, subtitle, author, etc., as they are handled below + next if $highlight_field->tag() =~ /(^00)/; # skip fixed fields + for my $subfield ($highlight_field->subfields()) { + my $match; + next if $subfield->[0] eq '9'; + my $field = $subfield->[1]; + for my $term ( keys %$span_terms_hashref ) { + if ( ( $field =~ /$term/i ) && (( length($term) > 3 ) || ($field =~ / $term /i)) ) { + $field =~ s/$term/$&<\/span>/gi; + $match++; + } + } + $searchhighlightblob .= $field . " ... " if $match; + } + + } + $searchhighlightblob = ' ... '.$searchhighlightblob if $searchhighlightblob; + $oldbiblio->{'searchhighlightblob'} = $searchhighlightblob; + } + + # Add search-term highlighting to the title, subtitle, etc. fields + for my $term ( keys %$span_terms_hashref ) { my $old_term = $term; if ( length($term) > 3 ) { - $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\])//g; - $term =~ s/\\//g; - $term =~ s/\*//g; - - #FIXME: is there a better way to do this? - $oldbiblio->{'title'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'subtitle'} =~ - s/$term/$&<\/span>/gi; - - $oldbiblio->{'author'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'publishercode'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'place'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'pages'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'notes'} =~ s/$term/$&<\/span>/gi; - $oldbiblio->{'size'} =~ s/$term/$&<\/span>/gi; + $term =~ s/(.*=|\)|\(|\+|\.|\?|\[|\]|\\|\*)//g; + foreach(qw(title subtitle author publishercode place pages notes size)) { + $oldbiblio->{$_} =~ s/$term/$&<\/span>/gi; + } } } - if ( $i % 2 ) { - $toggle = "#ffffcc"; - } - else { - $toggle = "white"; - } - $oldbiblio->{'toggle'} = $toggle; + ($i % 2) and $oldbiblio->{'toggle'} = 1; + + # Pull out the items fields my @fields = $marcrecord->field($itemtag); - my @items_loop; - my $items; + + # Setting item statuses for display + my @available_items_loop; + my @onloan_items_loop; + my @other_items_loop; + + my $available_items; + my $onloan_items; + my $other_items; + my $ordered_count = 0; + my $available_count = 0; my $onloan_count = 0; + my $longoverdue_count = 0; + my $other_count = 0; my $wthdrawn_count = 0; my $itemlost_count = 0; - my $norequests = 1; - - # - # check the loan status of the item : - # it is not stored in the MARC record, for pref (zebra reindexing) - # reason. Thus, we have to get the status from a specific SQL query - # - my $sth_issue = $dbh->prepare(" - SELECT date_due,returndate - FROM issues - WHERE itemnumber=? AND returndate IS NULL"); - my $items_count=scalar(@fields); + my $itembinding_count = 0; + my $itemdamaged_count = 0; + my $can_place_holds = 0; + my $items_count = scalar(@fields); + my $items_counter; + my $maxitems = + ( C4::Context->preference('maxItemsinSearchResults') ) + ? C4::Context->preference('maxItemsinSearchResults') - 1 + : 1; + + # loop through every item foreach my $field (@fields) { my $item; + $items_counter++; + + # populate the items hash foreach my $code ( keys %subfieldstosearch ) { $item->{$code} = $field->subfield( $subfieldstosearch{$code} ); } - $sth_issue->execute($item->{itemnumber}); - $item->{due_date} = format_date($sth_issue->fetchrow); - $item->{onloan} = 1 if $item->{due_date}; - # at least one item can be reserved : suppose no - $norequests = 1; - if ( $item->{wthdrawn} ) { - $wthdrawn_count++; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{wthdrawn}=1; - } - elsif ( $item->{itemlost} ) { - $itemlost_count++; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{itemlost}=1; + my $hbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'homebranch' : 'holdingbranch'; + my $otherbranch = C4::Context->preference('HomeOrHoldingBranch') eq 'homebranch' ? 'holdingbranch' : 'homebranch'; + # set item's branch name, use HomeOrHoldingBranch syspref first, fall back to the other one + if ($item->{$hbranch}) { + $item->{'branchname'} = $branches{$item->{$hbranch}}; } - unless ( $item->{notforloan}) { - # OK, this one can be issued, so at least one can be reserved - $norequests = 0; + elsif ($item->{$otherbranch}) { # Last resort + $item->{'branchname'} = $branches{$item->{$otherbranch}}; } - if ( ( $item->{onloan} ) && ( $item->{onloan} != '0000-00-00' ) ) - { - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{unavailable}=1; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{onloancount} = 1; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{due_date} = $item->{due_date}; + + my $prefix = $item->{$hbranch} . '--' . $item->{location} . $item->{itype} . $item->{itemcallnumber}; +# For each grouping of items (onloan, available, unavailable), we build a key to store relevant info about that item + if ( $item->{onloan} ) { $onloan_count++; - } - if ( $item->{'homebranch'} ) { - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{count}++; + my $key = $prefix . $item->{due_date}; + $onloan_items->{$key}->{due_date} = format_date($item->{onloan}); + $onloan_items->{$key}->{count}++ if $item->{homebranch}; + $onloan_items->{$key}->{branchname} = $item->{branchname}; + $onloan_items->{$key}->{location} = $shelflocations->{ $item->{location} }; + $onloan_items->{$key}->{itemcallnumber} = $item->{itemcallnumber}; + $onloan_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl}; + # if something's checked out and lost, mark it as 'long overdue' + if ( $item->{itemlost} ) { + $onloan_items->{$prefix}->{longoverdue}++; + $longoverdue_count++; + } else { # can place holds as long as item isn't lost + $can_place_holds = 1; + } } - # Last resort - elsif ( $item->{'holdingbranch'} ) { - $items->{ $item->{'holdingbranch'} }->{count}++; + # items not on loan, but still unavailable ( lost, withdrawn, damaged ) + else { + + # item is on order + if ( $item->{notforloan} == -1 ) { + $ordered_count++; + } + + # item is withdrawn, lost or damaged + if ( $item->{wthdrawn} + || $item->{itemlost} + || $item->{damaged} + || $item->{notforloan} ) + { + $wthdrawn_count++ if $item->{wthdrawn}; + $itemlost_count++ if $item->{itemlost}; + $itemdamaged_count++ if $item->{damaged}; + $item->{status} = $item->{wthdrawn} . "-" . $item->{itemlost} . "-" . $item->{damaged} . "-" . $item->{notforloan}; + $other_count++; + + my $key = $prefix . $item->{status}; + foreach (qw(wthdrawn itemlost damaged branchname itemcallnumber)) { + $other_items->{$key}->{$_} = $item->{$_}; + } + $other_items->{$key}->{notforloan} = GetAuthorisedValueDesc('','',$item->{notforloan},'','',$notforloan_authorised_value) if $notforloan_authorised_value; + $other_items->{$key}->{count}++ if $item->{homebranch}; + $other_items->{$key}->{location} = $shelflocations->{ $item->{location} }; + $other_items->{$key}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl}; + } + # item is available + else { + $can_place_holds = 1; + $available_count++; + $available_items->{$prefix}->{count}++ if $item->{homebranch}; + foreach (qw(branchname itemcallnumber)) { + $available_items->{$prefix}->{$_} = $item->{$_}; + } + $available_items->{$prefix}->{location} = $shelflocations->{ $item->{location} }; + $available_items->{$prefix}->{imageurl} = getitemtypeimagesrc() . "/" . $itemtypes{ $item->{itype} }->{imageurl}; + } } - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{itemcallnumber} = $item->{itemcallnumber}; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{location} = $item->{location}; - $items->{ $item->{'homebranch'}.'--'.$item->{'itemcallnumber'} }->{branchcode} = $item->{homebranch}; } # notforloan, item level and biblioitem level + my ( $availableitemscount, $onloanitemscount, $otheritemscount ); + $maxitems = + ( C4::Context->preference('maxItemsinSearchResults') ) + ? C4::Context->preference('maxItemsinSearchResults') - 1 + : 1; + for my $key ( sort keys %$onloan_items ) { + (++$onloanitemscount > $maxitems) and last; + push @onloan_items_loop, $onloan_items->{$key}; + } + for my $key ( sort keys %$other_items ) { + (++$otheritemscount > $maxitems) and last; + push @other_items_loop, $other_items->{$key}; + } + for my $key ( sort keys %$available_items ) { + (++$availableitemscount > $maxitems) and last; + push @available_items_loop, $available_items->{$key} + } - # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items - $norequests = 1 if $itemtypes{$oldbiblio->{itemtype}}->{notforloan}; - - for my $key ( sort keys %$items ) { - my $this_item = { - branchname => $branches{$items->{$key}->{branchcode}}, - branchcode => $items->{$key}->{branchcode}, - count => $items->{$key}->{count}, - itemcallnumber => $items->{$key}->{itemcallnumber}, - location => $items->{$key}->{location}, - onloancount => $items->{$key}->{onloancount}, - due_date => $items->{$key}->{due_date}, - wthdrawn => $items->{$key}->{wthdrawn}, - lost => $items->{$key}->{itemlost}, - }; - push @items_loop, $this_item; + # XSLT processing of some stuff + if (C4::Context->preference("XSLTResultsDisplay") ) { + my $newxmlrecord = XSLTParse4Display($oldbiblio->{biblionumber},C4::Context->config('opachtdocs')."/prog/en/xslt/MARC21slim2OPACResults.xsl"); + $oldbiblio->{XSLTResultsRecord} = $newxmlrecord; } - $oldbiblio->{norequests} = $norequests; - $oldbiblio->{items_count} = $items_count; - $oldbiblio->{items_loop} = \@items_loop; - $oldbiblio->{onloancount} = $onloan_count; - $oldbiblio->{wthdrawncount} = $wthdrawn_count; - $oldbiblio->{itemlostcount} = $itemlost_count; - $oldbiblio->{orderedcount} = $ordered_count; - $oldbiblio->{isbn} =~ s/-//g; # deleting - in isbn to enable amazon content + + # last check for norequest : if itemtype is notforloan, it can't be reserved either, whatever the items + $can_place_holds = 0 + if $itemtypes{ $oldbiblio->{itemtype} }->{notforloan}; + $oldbiblio->{norequests} = 1 unless $can_place_holds; + $oldbiblio->{itemsplural} = 1 if $items_count > 1; + $oldbiblio->{items_count} = $items_count; + $oldbiblio->{available_items_loop} = \@available_items_loop; + $oldbiblio->{onloan_items_loop} = \@onloan_items_loop; + $oldbiblio->{other_items_loop} = \@other_items_loop; + $oldbiblio->{availablecount} = $available_count; + $oldbiblio->{availableplural} = 1 if $available_count > 1; + $oldbiblio->{onloancount} = $onloan_count; + $oldbiblio->{onloanplural} = 1 if $onloan_count > 1; + $oldbiblio->{othercount} = $other_count; + $oldbiblio->{otherplural} = 1 if $other_count > 1; + $oldbiblio->{wthdrawncount} = $wthdrawn_count; + $oldbiblio->{itemlostcount} = $itemlost_count; + $oldbiblio->{damagedcount} = $itemdamaged_count; + $oldbiblio->{orderedcount} = $ordered_count; + $oldbiblio->{isbn} =~ + s/-//g; # deleting - in isbn to enable amazon content + $oldbiblio->{'authorised_value_images'} = C4::Items::get_authorised_value_images( C4::Biblio::get_biblio_authorised_values( $oldbiblio->{'biblionumber'} ) ); push( @newresults, $oldbiblio ); } return @newresults; } - - #---------------------------------------------------------------------- # # Non-Zebra GetRecords# @@ -1114,187 +1502,287 @@ sub searchResults { sub NZgetRecords { my ( - $koha_query, $simple_query, $sort_by_ref, - $servers_ref, $results_per_page, $offset, - $expanded_facet, $branches, $query_type, - $scan + $query, $simple_query, $sort_by_ref, $servers_ref, + $results_per_page, $offset, $expanded_facet, $branches, + $query_type, $scan ) = @_; - my $result = NZanalyse($koha_query); - return (undef,NZorder($result,@$sort_by_ref[0],$results_per_page,$offset),undef); + warn "query =$query" if $DEBUG; + my $result = NZanalyse($query); + warn "results =$result" if $DEBUG; + return ( undef, + NZorder( $result, @$sort_by_ref[0], $results_per_page, $offset ), + undef ); } =head2 NZanalyse NZanalyse : get a CQL string as parameter, and returns a list of biblionumber;title,biblionumber;title,... - the list is builded from inverted index in nozebra SQL table + the list is built from an inverted index in the nozebra SQL table note that title is here only for convenience : the sorting will be very fast when requested on title if the sorting is requested on something else, we will have to reread all results, and that may be longer. =cut sub NZanalyse { - my ($string,$server) = @_; - # $server contains biblioserver or authorities, depending on what we search on. - #warn "querying : $string on $server"; - $server='biblioserver' unless $server; - # if we have a ", replace the content to discard temporarily any and/or/not inside + my ( $string, $server ) = @_; +# warn "---------" if $DEBUG; + warn " NZanalyse" if $DEBUG; +# warn "---------" if $DEBUG; + + # $server contains biblioserver or authorities, depending on what we search on. + #warn "querying : $string on $server"; + $server = 'biblioserver' unless $server; + +# if we have a ", replace the content to discard temporarily any and/or/not inside my $commacontent; - if ($string =~/"/) { + if ( $string =~ /"/ ) { $string =~ s/"(.*?)"/__X__/; $commacontent = $1; -# print "commacontent : $commacontent\n"; + warn "commacontent : $commacontent" if $DEBUG; + } + +# split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y" +# then, call again NZanalyse with $left and $right +# (recursive until we find a leaf (=> something without and/or/not) +# delete repeated operator... Would then go in infinite loop + while ( $string =~ s/( and| or| not| AND| OR| NOT)\1/$1/g ) { + } + + #process parenthesis before. + if ( $string =~ /^\s*\((.*)\)(( and | or | not | AND | OR | NOT )(.*))?/ ) { + my $left = $1; + my $right = $4; + my $operator = lc($3); # FIXME: and/or/not are operators, not operands + warn +"dealing w/parenthesis before recursive sub call. left :$left operator:$operator right:$right" + if $DEBUG; + my $leftresult = NZanalyse( $left, $server ); + if ($operator) { + my $rightresult = NZanalyse( $right, $server ); + + # OK, we have the results for right and left part of the query + # depending of operand, intersect, union or exclude both lists + # to get a result list + if ( $operator eq ' and ' ) { + return NZoperatorAND($leftresult,$rightresult); + } + elsif ( $operator eq ' or ' ) { + + # just merge the 2 strings + return $leftresult . $rightresult; + } + elsif ( $operator eq ' not ' ) { + return NZoperatorNOT($leftresult,$rightresult); + } + } + else { +# this error is impossible, because of the regexp that isolate the operand, but just in case... + return $leftresult; + } } - # split the query string in 3 parts : X AND Y means : $left="X", $operand="AND" and $right="Y" - # then, call again NZanalyse with $left and $right - # (recursive until we find a leaf (=> something without and/or/not) - $string =~ /(.*)( and | or | not | AND | OR | NOT )(.*)/; - my $left = $1; - my $right = $3; - my $operand = lc($2); + warn "string :" . $string if $DEBUG; + $string =~ /(.*?)( and | or | not | AND | OR | NOT )(.*)/; + my $left = $1; + my $right = $3; + my $operator = lc($2); # FIXME: and/or/not are operators, not operands + warn "no parenthesis. left : $left operator: $operator right: $right" + if $DEBUG; + # it's not a leaf, we have a and/or/not - if ($operand) { + if ($operator) { + # reintroduce comma content if needed $right =~ s/__X__/"$commacontent"/ if $commacontent; - $left =~ s/__X__/"$commacontent"/ if $commacontent; -# warn "node : $left / $operand / $right\n"; - my $leftresult = NZanalyse($left,$server); - my $rightresult = NZanalyse($right,$server); + $left =~ s/__X__/"$commacontent"/ if $commacontent; + warn "node : $left / $operator / $right\n" if $DEBUG; + my $leftresult = NZanalyse( $left, $server ); + my $rightresult = NZanalyse( $right, $server ); + warn " leftresult : $leftresult" if $DEBUG; + warn " rightresult : $rightresult" if $DEBUG; # OK, we have the results for right and left part of the query # depending of operand, intersect, union or exclude both lists # to get a result list - if ($operand eq ' and ') { - my @leftresult = split /;/, $leftresult; -# my @rightresult = split /;/,$leftresult; - my $finalresult; - # parse the left results, and if the biblionumber exist in the right result, save it in finalresult - # the result is stored twice, to have the same weight for AND than OR. - # example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130 - # result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64 - foreach (@leftresult) { - if ($rightresult =~ "$_;") { - $finalresult .= "$_;$_;"; - } - } - return $finalresult; - } elsif ($operand eq ' or ') { + if ( $operator eq ' and ' ) { + warn "NZAND"; + return NZoperatorAND($leftresult,$rightresult); + } + elsif ( $operator eq ' or ' ) { + # just merge the 2 strings - return $leftresult.$rightresult; - } elsif ($operand eq ' not ') { - my @leftresult = split /;/, $leftresult; -# my @rightresult = split /;/,$leftresult; - my $finalresult; - foreach (@leftresult) { - unless ($rightresult =~ "$_;") { - $finalresult .= "$_;"; - } - } - return $finalresult; - } else { - # this error is impossible, because of the regexp that isolate the operand, but just in case... - die "error : operand unknown : $operand for $string"; + return $leftresult . $rightresult; + } + elsif ( $operator eq ' not ' ) { + return NZoperatorNOT($leftresult,$rightresult); + } + else { + +# this error is impossible, because of the regexp that isolate the operand, but just in case... + die "error : operand unknown : $operator for $string"; } - # it's a leaf, do the real SQL query and return the result - } else { - $string =~ s/__X__/"$commacontent"/ if $commacontent; + + # it's a leaf, do the real SQL query and return the result + } + else { + $string =~ s/__X__/"$commacontent"/ if $commacontent; $string =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|&|\+|\*|\// /g; -# warn "leaf : $string\n"; + #remove trailing blank at the beginning + $string =~ s/^ //g; + warn "leaf:$string" if $DEBUG; + # parse the string in in operator/operand/value again $string =~ /(.*)(>=|<=)(.*)/; - my $left = $1; + my $left = $1; my $operator = $2; - my $right = $3; + my $right = $3; +# warn "handling leaf... left:$left operator:$operator right:$right" +# if $DEBUG; unless ($operator) { $string =~ /(.*)(>|<|=)(.*)/; - $left = $1; + $left = $1; $operator = $2; - $right = $3; + $right = $3; + warn +"handling unless (operator)... left:$left operator:$operator right:$right" + if $DEBUG; } my $results; + +# strip adv, zebra keywords, currently not handled in nozebra: wrdl, ext, phr... + $left =~ s/ .*$//; + # automatic replace for short operators - $left='title' if $left =~ '^ti'; - $left='author' if $left =~ '^au'; - $left='publisher' if $left =~ '^pb'; - $left='subject' if $left =~ '^su'; - $left='koha-Auth-Number' if $left =~ '^an'; - $left='keyword' if $left =~ '^kw'; - if ($operator) { + $left = 'title' if $left =~ '^ti$'; + $left = 'author' if $left =~ '^au$'; + $left = 'publisher' if $left =~ '^pb$'; + $left = 'subject' if $left =~ '^su$'; + $left = 'koha-Auth-Number' if $left =~ '^an$'; + $left = 'keyword' if $left =~ '^kw$'; + warn "handling leaf... left:$left operator:$operator right:$right" if $DEBUG; + if ( $operator && $left ne 'keyword' ) { + #do a specific search my $dbh = C4::Context->dbh; - $operator='LIKE' if $operator eq '=' and $right=~ /%/; - my $sth = $dbh->prepare("SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?"); - warn "$left / $operator / $right\n"; + $operator = 'LIKE' if $operator eq '=' and $right =~ /%/; + my $sth = + $dbh->prepare( +"SELECT biblionumbers,value FROM nozebra WHERE server=? AND indexname=? AND value $operator ?" + ); + warn "$left / $operator / $right\n" if $DEBUG; + # split each word, query the DB and build the biblionumbers result - foreach (split / /,$right) { - my ($biblionumbers,$value); + #sanitizing leftpart + $left =~ s/^\s+|\s+$//; + foreach ( split / /, $right ) { + my $biblionumbers; + $_ =~ s/^\s+|\s+$//; next unless $_; - warn "EXECUTE : $server, $left, $_"; - $sth->execute($server, $left, $_); - while (my ($line,$value) = $sth->fetchrow) { - # if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <) - # otherwise, fill the result - $biblionumbers .= $line unless ($right =~ /\d/ && $value =~ /\D/); - warn "result : $value ". ($right =~ /\d/) . "==".(!$value =~ /\d/) ;#= $line"; + warn "EXECUTE : $server, $left, $_" if $DEBUG; + $sth->execute( $server, $left, $_ ) + or warn "execute failed: $!"; + while ( my ( $line, $value ) = $sth->fetchrow ) { + +# if we are dealing with a numeric value, use only numeric results (in case of >=, <=, > or <) +# otherwise, fill the result + $biblionumbers .= $line + unless ( $right =~ /^\d+$/ && $value =~ /\D/ ); + warn "result : $value " + . ( $right =~ /\d/ ) . "==" + . ( $value =~ /\D/?$line:"" ) if $DEBUG; #= $line"; } - # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list + +# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { - my @leftresult = split /;/, $biblionumbers; - my $temp; - foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight - # remove weight at the end - my $cleaned = $entry; - $cleaned =~ s/-\d*$//; - # if the entry already in the hash, take it & increase weight -# warn "===== $cleaned ====="; - if ($results =~ "$cleaned") { - $temp .= "$entry;$entry;"; -# warn "INCLUDING $entry"; - } - } - $results = $temp; - } else { + warn "NZAND" if $DEBUG; + $results = NZoperatorAND($biblionumbers,$results); + } + else { $results = $biblionumbers; } } - } else { - #do a complete search (all indexes) + } + else { + + #do a complete search (all indexes), if index='kw' do complete search too. my $dbh = C4::Context->dbh; - my $sth = $dbh->prepare("SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?"); + my $sth = + $dbh->prepare( +"SELECT biblionumbers FROM nozebra WHERE server=? AND value LIKE ?" + ); + # split each word, query the DB and build the biblionumbers result - foreach (split / /,$string) { - next if C4::Context->stopwords->{uc($_)}; # skip if stopword - #warn "search on all indexes on $_"; + foreach ( split / /, $string ) { + next if C4::Context->stopwords->{ uc($_) }; # skip if stopword + warn "search on all indexes on $_" if $DEBUG; my $biblionumbers; next unless $_; - $sth->execute($server, $_); - while (my $line = $sth->fetchrow) { + $sth->execute( $server, $_ ); + while ( my $line = $sth->fetchrow ) { $biblionumbers .= $line; } - # do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list + +# do a AND with existing list if there is one, otherwise, use the biblionumbers list as 1st result list if ($results) { -# warn "RES for $_ = $biblionumbers"; - my @leftresult = split /;/, $biblionumbers; - my $temp; - foreach my $entry (@leftresult) { # $_ contains biblionumber,title-weight - # remove weight at the end - my $cleaned = $entry; - $cleaned =~ s/-\d*$//; - # if the entry already in the hash, take it & increase weight -# warn "===== $cleaned ====="; - if ($results =~ "$cleaned") { - $temp .= "$entry;$entry;"; -# warn "INCLUDING $entry"; - } - } - $results = $temp; - } else { -# warn "NEW RES for $_ = $biblionumbers"; + $results = NZoperatorAND($biblionumbers,$results); + } + else { + warn "NEW RES for $_ = $biblionumbers" if $DEBUG; $results = $biblionumbers; } } } -# warn "return : $results for LEAF : $string"; + warn "return : $results for LEAF : $string" if $DEBUG; return $results; } + warn "---------\nLeave NZanalyse\n---------" if $DEBUG; +} + +sub NZoperatorAND{ + my ($rightresult, $leftresult)=@_; + + my @leftresult = split /;/, $leftresult; + warn " @leftresult / $rightresult \n" if $DEBUG; + + # my @rightresult = split /;/,$leftresult; + my $finalresult; + +# parse the left results, and if the biblionumber exist in the right result, save it in finalresult +# the result is stored twice, to have the same weight for AND than OR. +# example : TWO : 61,61,64,121 (two is twice in the biblio #61) / TOWER : 61,64,130 +# result : 61,61,61,61,64,64 for two AND tower : 61 has more weight than 64 + foreach (@leftresult) { + my $value = $_; + my $countvalue; + ( $value, $countvalue ) = ( $1, $2 ) if ($value=~/(.*)-(\d+)$/); + if ( $rightresult =~ /\Q$value\E-(\d+);/ ) { + $countvalue = ( $1 > $countvalue ? $countvalue : $1 ); + $finalresult .= + "$value-$countvalue;$value-$countvalue;"; + } + } + warn "NZAND DONE : $finalresult \n" if $DEBUG; + return $finalresult; +} + +sub NZoperatorOR{ + my ($rightresult, $leftresult)=@_; + return $rightresult.$leftresult; +} + +sub NZoperatorNOT{ + my ($rightresult, $leftresult)=@_; + + my @leftresult = split /;/, $leftresult; + + # my @rightresult = split /;/,$leftresult; + my $finalresult; + foreach (@leftresult) { + my $value=$_; + $value=$1 if $value=~m/(.*)-\d+$/; + unless ($rightresult =~ "$value-") { + $finalresult .= "$_;"; + } + } + return $finalresult; } =head2 NZorder @@ -1305,226 +1793,294 @@ sub NZanalyse { =cut - sub NZorder { - my ($biblionumbers, $ordering,$results_per_page,$offset) = @_; + my ( $biblionumbers, $ordering, $results_per_page, $offset ) = @_; + warn "biblionumbers = $biblionumbers and ordering = $ordering\n" if $DEBUG; + # order title asc by default -# $ordering = '1=36 dbh; + # # order by POPULARITY # - if ($ordering =~ /popularity/) { + if ( $ordering =~ /popularity/ ) { my %result; my %popularity; + # popularity is not in MARC record, it's builded from a specific query - my $sth = $dbh->prepare("select sum(issues) from items where biblionumber=?"); - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; - $result{$biblionumber}=GetMarcBiblio($biblionumber); + my $sth = + $dbh->prepare("select sum(issues) from items where biblionumber=?"); + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; + $result{$biblionumber} = GetMarcBiblio($biblionumber); $sth->execute($biblionumber); - my $popularity= $sth->fetchrow ||0; - # hint : the key is popularity.title because we can have - # many results with the same popularity. In this cas, sub-ordering is done by title - # we also have biblionumber to avoid bug for 2 biblios with the same title & popularity - # (un-frequent, I agree, but we won't forget anything that way ;-) - $popularity{sprintf("%10d",$popularity).$title.$biblionumber} = $biblionumber; + my $popularity = $sth->fetchrow || 0; + +# hint : the key is popularity.title because we can have +# many results with the same popularity. In this cas, sub-ordering is done by title +# we also have biblionumber to avoid bug for 2 biblios with the same title & popularity +# (un-frequent, I agree, but we won't forget anything that way ;-) + $popularity{ sprintf( "%10d", $popularity ) . $title + . $biblionumber } = $biblionumber; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - if ($ordering eq 'popularity_dsc') { # sort popularity DESC - foreach my $key (sort {$b cmp $a} (keys %popularity)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc(); + my $numbers = 0; + if ( $ordering eq 'popularity_dsc' ) { # sort popularity DESC + foreach my $key ( sort { $b cmp $a } ( keys %popularity ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{ $popularity{$key} }->as_usmarc(); } - } else { # sort popularity ASC - foreach my $key (sort (keys %popularity)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$popularity{$key}}->as_usmarc(); + } + else { # sort popularity ASC + foreach my $key ( sort ( keys %popularity ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{ $popularity{$key} }->as_usmarc(); } } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; - # - # ORDER BY author - # - } elsif ($ordering =~/author/){ + + # + # ORDER BY author + # + } + elsif ( $ordering =~ /author/ ) { my %result; - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; - my $record=GetMarcBiblio($biblionumber); + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; + my $record = GetMarcBiblio($biblionumber); my $author; - if (C4::Context->preference('marcflavour') eq 'UNIMARC') { - $author=$record->subfield('200','f'); - $author=$record->subfield('700','a') unless $author; - } else { - $author=$record->subfield('100','a'); + if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) { + $author = $record->subfield( '200', 'f' ); + $author = $record->subfield( '700', 'a' ) unless $author; } - # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title - # and we don't want to get only 1 result for each of them !!! - $result{$author.$biblionumber}=$record; + else { + $author = $record->subfield( '100', 'a' ); + } + +# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title +# and we don't want to get only 1 result for each of them !!! + $result{ $author . $biblionumber } = $record; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - if ($ordering eq 'author_za') { # sort by author desc - foreach my $key (sort { $b cmp $a } (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + my $numbers = 0; + if ( $ordering eq 'author_za' ) { # sort by author desc + foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } - } else { # sort by author ASC - foreach my $key (sort (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + } + else { # sort by author ASC + foreach my $key ( sort ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; - # - # ORDER BY callnumber - # - } elsif ($ordering =~/callnumber/){ + + # + # ORDER BY callnumber + # + } + elsif ( $ordering =~ /callnumber/ ) { my %result; - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; - my $record=GetMarcBiblio($biblionumber); + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; + my $record = GetMarcBiblio($biblionumber); my $callnumber; - my ($callnumber_tag,$callnumber_subfield)=GetMarcFromKohaField($dbh,'items.itemcallnumber'); - ($callnumber_tag,$callnumber_subfield)= GetMarcFromKohaField('biblioitems.callnumber') unless $callnumber_tag; - if (C4::Context->preference('marcflavour') eq 'UNIMARC') { - $callnumber=$record->subfield('200','f'); - } else { - $callnumber=$record->subfield('100','a'); + my ( $callnumber_tag, $callnumber_subfield ) = + GetMarcFromKohaField( $dbh, 'items.itemcallnumber' ); + ( $callnumber_tag, $callnumber_subfield ) = + GetMarcFromKohaField('biblioitems.callnumber') + unless $callnumber_tag; + if ( C4::Context->preference('marcflavour') eq 'UNIMARC' ) { + $callnumber = $record->subfield( '200', 'f' ); } - # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title - # and we don't want to get only 1 result for each of them !!! - $result{$callnumber.$biblionumber}=$record; + else { + $callnumber = $record->subfield( '100', 'a' ); + } + +# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title +# and we don't want to get only 1 result for each of them !!! + $result{ $callnumber . $biblionumber } = $record; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - if ($ordering eq 'call_number_dsc') { # sort by title desc - foreach my $key (sort { $b cmp $a } (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + my $numbers = 0; + if ( $ordering eq 'call_number_dsc' ) { # sort by title desc + foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } - } else { # sort by title ASC - foreach my $key (sort { $a cmp $b } (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + } + else { # sort by title ASC + foreach my $key ( sort { $a cmp $b } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; - } elsif ($ordering =~ /pubdate/){ #pub year + } + elsif ( $ordering =~ /pubdate/ ) { #pub year my %result; - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; - my $record=GetMarcBiblio($biblionumber); - my ($publicationyear_tag,$publicationyear_subfield)=GetMarcFromKohaField('biblioitems.publicationyear',''); - my $publicationyear=$record->subfield($publicationyear_tag,$publicationyear_subfield); - # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title - # and we don't want to get only 1 result for each of them !!! - $result{$publicationyear.$biblionumber}=$record; + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; + my $record = GetMarcBiblio($biblionumber); + my ( $publicationyear_tag, $publicationyear_subfield ) = + GetMarcFromKohaField( 'biblioitems.publicationyear', '' ); + my $publicationyear = + $record->subfield( $publicationyear_tag, + $publicationyear_subfield ); + +# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title +# and we don't want to get only 1 result for each of them !!! + $result{ $publicationyear . $biblionumber } = $record; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - if ($ordering eq 'pubdate_dsc') { # sort by pubyear desc - foreach my $key (sort { $b cmp $a } (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + my $numbers = 0; + if ( $ordering eq 'pubdate_dsc' ) { # sort by pubyear desc + foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } - } else { # sort by pub year ASC - foreach my $key (sort (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}->as_usmarc(); + } + else { # sort by pub year ASC + foreach my $key ( sort ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = + $result{$key}->as_usmarc(); } } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; - # - # ORDER BY title - # - } elsif ($ordering =~ /title/) { - # the title is in the biblionumbers string, so we just need to build a hash, sort it and return + + # + # ORDER BY title + # + } + elsif ( $ordering =~ /title/ ) { + +# the title is in the biblionumbers string, so we just need to build a hash, sort it and return my %result; - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; - # hint : the result is sorted by title.biblionumber because we can have X biblios with the same title - # and we don't want to get only 1 result for each of them !!! - # hint & speed improvement : we can order without reading the record - # so order, and read records only for the requested page ! - $result{$title.$biblionumber}=$biblionumber; + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; + +# hint : the result is sorted by title.biblionumber because we can have X biblios with the same title +# and we don't want to get only 1 result for each of them !!! +# hint & speed improvement : we can order without reading the record +# so order, and read records only for the requested page ! + $result{ $title . $biblionumber } = $biblionumber; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - if ($ordering eq 'title_az') { # sort by title desc - foreach my $key (sort (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; + my $numbers = 0; + if ( $ordering eq 'title_az' ) { # sort by title desc + foreach my $key ( sort ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; } - } else { # sort by title ASC - foreach my $key (sort { $b cmp $a } (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; + } + else { # sort by title ASC + foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; } } + # limit the $results_per_page to result size if it's more - $results_per_page = $numbers-1 if $numbers < $results_per_page; + $results_per_page = $numbers - 1 if $numbers < $results_per_page; + # for the requested page, replace biblionumber by the complete record # speed improvement : avoid reading too much things - for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) { - $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc; + for ( + my $counter = $offset ; + $counter <= $offset + $results_per_page ; + $counter++ + ) + { + $result_hash->{'RECORDS'}[$counter] = + GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc; } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; - } else { - # - # order by ranking - # - # we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking + } + else { + +# +# order by ranking +# +# we need 2 hashes to order by ranking : the 1st one to count the ranking, the 2nd to order by ranking my %result; my %count_ranking; - foreach (split /;/,$biblionumbers) { - my ($biblionumber,$title) = split /,/,$_; + foreach ( split /;/, $biblionumbers ) { + my ( $biblionumber, $title ) = split /,/, $_; $title =~ /(.*)-(\d)/; - # get weight - my $ranking =$2; - # note that we + the ranking because ranking is calculated on weight of EACH term requested. - # if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N - # biblio N has ranking = 6 + + # get weight + my $ranking = $2; + +# note that we + the ranking because ranking is calculated on weight of EACH term requested. +# if we ask for "two towers", and "two" has weight 2 in biblio N, and "towers" has weight 4 in biblio N +# biblio N has ranking = 6 $count_ranking{$biblionumber} += $ranking; } - # build the result by "inverting" the count_ranking hash - # hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead + +# build the result by "inverting" the count_ranking hash +# hing : as usual, we don't order by ranking only, to avoid having only 1 result for each rank. We build an hash on concat(ranking,biblionumber) instead # warn "counting"; - foreach (keys %count_ranking) { - $result{sprintf("%10d",$count_ranking{$_}).'-'.$_} = $_; + foreach ( keys %count_ranking ) { + $result{ sprintf( "%10d", $count_ranking{$_} ) . '-' . $_ } = $_; } - # sort the hash and return the same structure as GetRecords (Zebra querying) + + # sort the hash and return the same structure as GetRecords (Zebra querying) my $result_hash; - my $numbers=0; - foreach my $key (sort {$b cmp $a} (keys %result)) { - $result_hash->{'RECORDS'}[$numbers++] = $result{$key}; - } + my $numbers = 0; + foreach my $key ( sort { $b cmp $a } ( keys %result ) ) { + $result_hash->{'RECORDS'}[ $numbers++ ] = $result{$key}; + } + # limit the $results_per_page to result size if it's more - $results_per_page = $numbers-1 if $numbers < $results_per_page; + $results_per_page = $numbers - 1 if $numbers < $results_per_page; + # for the requested page, replace biblionumber by the complete record # speed improvement : avoid reading too much things - for (my $counter=$offset;$counter<=$offset+$results_per_page;$counter++) { - $result_hash->{'RECORDS'}[$counter] = GetMarcBiblio($result_hash->{'RECORDS'}[$counter])->as_usmarc if $result_hash->{'RECORDS'}[$counter]; + for ( + my $counter = $offset ; + $counter <= $offset + $results_per_page ; + $counter++ + ) + { + $result_hash->{'RECORDS'}[$counter] = + GetMarcBiblio( $result_hash->{'RECORDS'}[$counter] )->as_usmarc + if $result_hash->{'RECORDS'}[$counter]; } - my $finalresult=(); - $result_hash->{'hits'} = $numbers; + my $finalresult = (); + $result_hash->{'hits'} = $numbers; $finalresult->{'biblioserver'} = $result_hash; return $finalresult; } } + =head2 ModBiblios ($countchanged,$listunchanged) = ModBiblios($listbiblios, $tagsubfield,$initvalue,$targetvalue,$test); @@ -1556,71 +2112,97 @@ $template->param(countchanged => $countchanged, loopunchanged=>$listunchanged); =cut -sub ModBiblios{ - my ($listbiblios,$tagsubfield,$initvalue,$targetvalue,$test)=@_; - my $countmatched; - my @unmatched; - my ($tag,$subfield)=($1,$2) if ($tagsubfield=~/^(\d{1,3})([a-z0-9A-Z@])?$/); - if ((length($tag)<3)&& $subfield=~/0-9/){ - $tag=$tag.$subfield; - undef $subfield; - } - my ($bntag,$bnsubf) = GetMarcFromKohaField('biblio.biblionumber'); - my ($itemtag,$itemsubf) = GetMarcFromKohaField('items.itemnumber'); - foreach my $usmarc (@$listbiblios){ - my $record; - $record=eval{MARC::Record->new_from_usmarc($usmarc)}; - my $biblionumber; - if ($@){ - # usmarc is not a valid usmarc May be a biblionumber - if ($tag eq $itemtag){ - my $bib=GetBiblioFromItemNumber($usmarc); - $record=GetMarcItem($bib->{'biblionumber'},$usmarc) ; - $biblionumber=$bib->{'biblionumber'}; - } else { - $record=GetMarcBiblio($usmarc); - $biblionumber=$usmarc; - } - } else { - if ($bntag >= 010){ - $biblionumber = $record->subfield($bntag,$bnsubf); - }else { - $biblionumber=$record->field($bntag)->data; - } - } - #GetBiblionumber is to be written. - #Could be replaced by TransformMarcToKoha (But Would be longer) - if ($record->field($tag)){ - my $modify=0; - foreach my $field ($record->field($tag)){ - if ($subfield){ - if ($field->delete_subfield('code' =>$subfield,'match'=>qr($initvalue))){ - $countmatched++; - $modify=1; - $field->update($subfield,$targetvalue) if ($targetvalue); - } - } else { - if ($tag >= 010){ - if ($field->delete_field($field)){ - $countmatched++; - $modify=1; +sub ModBiblios { + my ( $listbiblios, $tagsubfield, $initvalue, $targetvalue, $test ) = @_; + my $countmatched; + my @unmatched; + my ( $tag, $subfield ) = ( $1, $2 ) + if ( $tagsubfield =~ /^(\d{1,3})([a-z0-9A-Z@])?$/ ); + if ( ( length($tag) < 3 ) && $subfield =~ /0-9/ ) { + $tag = $tag . $subfield; + undef $subfield; + } + my ( $bntag, $bnsubf ) = GetMarcFromKohaField('biblio.biblionumber'); + my ( $itemtag, $itemsubf ) = GetMarcFromKohaField('items.itemnumber'); + if ($tag eq $itemtag) { + # do not allow the embedded item tag to be + # edited from here + warn "Attempting to edit item tag via C4::Search::ModBiblios -- not allowed"; + return (0, []); + } + foreach my $usmarc (@$listbiblios) { + my $record; + $record = eval { MARC::Record->new_from_usmarc($usmarc) }; + my $biblionumber; + if ($@) { + + # usmarc is not a valid usmarc May be a biblionumber + # FIXME - sorry, please let's figure out whether + # this function is to be passed a list of + # record numbers or a list of MARC::Record + # objects. The former is probably better + # because the MARC records supplied by Zebra + # may be not current. + $record = GetMarcBiblio($usmarc); + $biblionumber = $usmarc; + } + else { + if ( $bntag >= 010 ) { + $biblionumber = $record->subfield( $bntag, $bnsubf ); + } + else { + $biblionumber = $record->field($bntag)->data; + } + } + + #GetBiblionumber is to be written. + #Could be replaced by TransformMarcToKoha (But Would be longer) + if ( $record->field($tag) ) { + my $modify = 0; + foreach my $field ( $record->field($tag) ) { + if ($subfield) { + if ( + $field->delete_subfield( + 'code' => $subfield, + 'match' => qr($initvalue) + ) + ) + { + $countmatched++; + $modify = 1; + $field->update( $subfield, $targetvalue ) + if ($targetvalue); + } + } + else { + if ( $tag >= 010 ) { + if ( $field->delete_field($field) ) { + $countmatched++; + $modify = 1; + } + } + else { + $field->data = $targetvalue + if ( $field->data =~ qr($initvalue) ); + } + } + } + + # warn $record->as_formatted; + if ($modify) { + ModBiblio( $record, $biblionumber, + GetFrameworkCode($biblionumber) ) + unless ($test); } - } else { - $field->data=$targetvalue if ($field->data=~qr($initvalue)); - } - } - } -# warn $record->as_formatted; - if ($modify){ - ModBiblio($record,$biblionumber,GetFrameworkCode($biblionumber)) unless ($test); - } else { - push @unmatched, $biblionumber; - } - } else { - push @unmatched, $biblionumber; + else { + push @unmatched, $biblionumber; + } + } + else { + push @unmatched, $biblionumber; + } } - } - return ($countmatched,\@unmatched); + return ( $countmatched, \@unmatched ); } END { } # module clean-up code here (global destructor)