followup : auto_truncation 3287252c0

[koha.git] / C4 / Search.pm
diff --git a/C4/Search.pm b/C4/Search.pm

index 2c50850..a468821 100644 (file)
--- a/C4/Search.pm
+++ b/C4/Search.pm
@@ -27,6 +27,8 @@ use XML::Simple;
  use C4::Dates qw(format_date);
  use C4::XSLT;
  use C4::Branch;
+use C4::Debug;
+use YAML;
  use URI::Escape;
  
  use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $DEBUG);
@@ -633,10 +635,12 @@ sub _remove_stopwords {
  #       we use IsAlpha unicode definition, to deal correctly with diacritics.
  #       otherwise, a French word like "leçon" woudl be split into "le" "çon", "le"
  #       is a stopword, we'd get "çon" and wouldn't find anything...
+#       
                 foreach ( keys %{ C4::Context->stopwords } ) {
                         next if ( $_ =~ /(and|or|not)/ );    # don't remove operators
+                       $debug && warn "$_ Dump($operand)";
                         if ( my ($matched) = ($operand =~
-                               /(\P{IsAlnum}\Q$_\E\P{IsAlnum}|^\Q$_\E\P{IsAlnum}|\P{IsAlnum}\Q$_\E$|^\Q$_\E$)/gi) )
+                               /([^\X\p{isAlnum}]\Q$_\E[^\X\p{isAlnum}]|[^\X\p{isAlnum}]\Q$_\E$|^\Q$_\E[^\X\p{isAlnum}])/gi))
                         {
                                 $operand =~ s/\Q$matched\E/ /gi;
                                 push @stopwords_removed, $_;
@@ -772,6 +776,194 @@ sub _build_weighted_query {
      return $weighted_query;
  }
  
+=head2 getIndexes
+
+Return an array with available indexes.
+
+=cut
+
+sub getIndexes{
+    my @indexes = (
+                    # biblio indexes
+                    'ab',
+                    'Abstract',
+                    'acqdate',
+                    'allrecords',
+                    'an',
+                    'Any',
+                    'at',
+                    'au',
+                    'aub',
+                    'aud',
+                    'audience',
+                    'auo',
+                    'aut',
+                    'Author',
+                    'Author-in-order ',
+                    'Author-personal-bibliography',
+                    'Authority-Number',
+                    'authtype',
+                    'bc',
+                    'biblionumber',
+                    'bio',
+                    'biography',
+                    'callnum',          
+                    'cfn',
+                    'Chronological-subdivision',
+                    'cn-bib-source',
+                    'cn-bib-sort',
+                    'cn-class',
+                    'cn-item',
+                    'cn-prefix',
+                    'cn-suffix',
+                    'cpn',
+                    'Code-institution',
+                    'Conference-name',
+                    'Conference-name-heading',
+                    'Conference-name-see',
+                    'Conference-name-seealso',
+                    'Content-type',
+                    'Control-number',
+                    'copydate',
+                    'Corporate-name',
+                    'Corporate-name-heading',
+                    'Corporate-name-see',
+                    'Corporate-name-seealso',
+                    'ctype',
+                    'date-entered-on-file',
+                    'Date-of-acquisition',
+                    'Date-of-publication',
+                    'Dewey-classification',
+                    'extent',
+                    'fic',
+                    'fiction',
+                    'Form-subdivision',
+                    'format',
+                    'Geographic-subdivision',
+                    'he',
+                    'Heading',
+                    'Heading-use-main-or-added-entry',
+                    'Heading-use-series-added-entry ',
+                    'Heading-use-subject-added-entry',
+                    'Host-item',
+                    'id-other',
+                    'Illustration-code',
+                    'ISBN',
+                    'ISSN',
+                    'itemtype',
+                    'kw',
+                    'Koha-Auth-Number',
+                    'l-format',
+                    'language',
+                    'lc-card',
+                    'LC-card-number',
+                    'lcn',
+                    'llength',
+                    'ln',
+                    'Local-classification',
+                    'Local-number',
+                    'Match-heading',
+                    'Match-heading-see-from',
+                    'Material-type',
+                    'mc-itemtype',
+                    'mc-rtype',
+                    'mus',
+                    'Name-geographic',
+                    'Name-geographic-heading',
+                    'Name-geographic-see',
+                    'Name-geographic-seealso',
+                    'nb',
+                    'Note',
+                    'ns',
+                    'nt',
+                    'pb',
+                    'Personal-name',
+                    'Personal-name-heading',
+                    'Personal-name-see',
+                    'Personal-name-seealso',
+                    'pl',
+                    'Place-publication',
+                    'pn',
+                    'popularity',
+                    'pubdate',
+                    'Publisher',
+                    'Record-type',
+                    'rtype',
+                    'se',
+                    'See',
+                    'See-also',
+                    'sn',
+                    'Stock-number',
+                    'su',
+                    'Subject',
+                    'Subject-heading-thesaurus',
+                    'Subject-name-personal',
+                    'Subject-subdivision',
+                    'Summary',
+                    'Suppress',
+                    'su-geo',
+                    'su-na',
+                    'su-to',
+                    'su-ut',
+                    'ut',
+                    'Term-genre-form',
+                    'Term-genre-form-heading',
+                    'Term-genre-form-see',
+                    'Term-genre-form-seealso',
+                    'ti',
+                    'Title',
+                    'Title-cover',
+                    'Title-series',
+                    'Title-uniform',
+                    'Title-uniform-heading',
+                    'Title-uniform-see',
+                    'Title-uniform-seealso',
+                    'totalissues',
+                    'yr',
+                    
+                    # items indexes
+                    'acqsource',
+                    'barcode',
+                    'bc',
+                    'branch',
+                    'ccode',
+                    'classification-source',
+                    'cn-sort',
+                    'coded-location-qualifier',
+                    'copynumber',
+                    'damaged',
+                    'datelastborrowed',
+                    'datelastseen',
+                    'holdingbranch',
+                    'homebranch',
+                    'issues',
+                    'itemnumber',
+                    'itype',
+                    'Local-classification',
+                    'location',
+                    'lost',
+                    'materials-specified',
+                    'mc-ccode',
+                    'mc-itype',
+                    'mc-loc',
+                    'notforloan',
+                    'onloan',
+                    'price',
+                    'renewals',
+                    'replacementprice',
+                    'replacementpricedate',
+                    'reserves',
+                    'restricted',
+                    'stack',
+                    'uri',
+                    'withdrawn',
+                    
+                    # subject related
+                  );
+                  
+    return \@indexes;
+}
+
  =head2 buildQuery
  
  ( $error, $query,
@@ -808,9 +1000,10 @@ sub buildQuery {
  
      # no stemming/weight/fuzzy in NoZebra
      if ( C4::Context->preference("NoZebra") ) {
-        $stemming      = 0;
-        $weight_fields = 0;
-        $fuzzy_enabled = 0;
+        $stemming         = 0;
+        $weight_fields    = 0;
+        $fuzzy_enabled    = 0;
+       $auto_truncation  = 0;
      }
  
      my $query        = $operands[0];
@@ -827,6 +1020,17 @@ sub buildQuery {
  
      my $stopwords_removed;    # flag to determine if stopwords have been removed
  
+    my $cclq;
+    my $cclindexes = getIndexes();
+    if( $query !~ /\s*ccl=/ ){
+        for my $index (@$cclindexes){
+            if($query =~ /($index)(,?\w)*[:=]/){
+                $cclq = 1;
+            }
+        }
+        $query = "ccl=$query" if($cclq);
+    }
+
  # for handling ccl, cql, pqf queries in diagnostic mode, skip the rest of the steps
  # DIAGNOSTIC ONLY!!
      if ( $query =~ /^ccl=/ ) {
@@ -901,6 +1105,11 @@ sub buildQuery {
                      ) = ( 0, 0, 0, 0, 0 );
  
                  }
+                
+                if(not $index){
+                    $index = 'kw';
+                }
+                
                  # Set default structure attribute (word list)
                  my $struct_attr;
                  unless ( $indexes_set || !$index || $index =~ /(st-|phr|ext|wrdl)/ ) {
@@ -921,11 +1130,13 @@ sub buildQuery {
                  }
  
                  if ($auto_truncation){
-                                       #FIXME only valid with LTR scripts
-                                       $operand=join(" ",map{ 
-                                                                                       "$_*" 
-                                                                            }split (/\s+/,$operand));
-                       warn $operand if $DEBUG;
+                                       unless ( $index =~ /(st-|phr|ext)/ ) {
+                                               #FIXME only valid with LTR scripts
+                                               $operand=join(" ",map{ 
+                                                                                       (index($_,"*")>0?"$_":"$_*")
+                                                                                        }split (/\s+/,$operand));
+                                               warn $operand if $DEBUG;
+                                       }
                                 }
  
                  # Detect Truncation
@@ -1188,16 +1399,20 @@ sub searchResults {
  
      # We get the biblionumber position in MARC 
      my ($bibliotag,$bibliosubf)=GetMarcFromKohaField('biblio.biblionumber','');
-    my $fw;
+    my $fw = '';
      
      # loop through all of the records we've retrieved
      for ( my $i = $offset ; $i <= $times - 1 ; $i++ ) {
          my $marcrecord = MARC::File::USMARC::decode( $marcresults[$i] );
+               my $biblionumber;
          
-        if ($bibliotag<10){
-            $fw = GetFrameworkCode($marcrecord->field($bibliotag)->data);
-        }else{
-            $fw = GetFrameworkCode($marcrecord->subfield($bibliotag,$bibliosubf));
+        if(not $scan){
+            if ($bibliotag<10){
+                $biblionumber = $marcrecord->field($bibliotag)->data;
+            }else{
+                $biblionumber = $marcrecord->subfield($bibliotag,$bibliosubf);
+            } 
+            $fw = GetFrameworkCode($biblionumber);
          }
          
          my $oldbiblio = TransformMarcToKoha( $dbh, $marcrecord, $fw );