NoZebra fixes : removing \r and \n when indexing

[koha.git] / C4 / Biblio.pm
diff --git a/C4/Biblio.pm b/C4/Biblio.pm

index 2a4649c..b07073e 100755 (executable)
--- a/C4/Biblio.pm
+++ b/C4/Biblio.pm
@@ -30,6 +30,7 @@ use C4::Branch;
  use C4::Dates qw/format_date/;
  use C4::Log; # logaction
  use C4::ClassSource;
+use C4::Charset;
  
  use vars qw($VERSION @ISA @EXPORT);
  
@@ -232,6 +233,9 @@ sub AddBiblio {
  
      _koha_marc_update_bib_ids($record, $frameworkcode, $biblionumber, $biblioitemnumber);
  
+    # update MARC subfield that stores biblioitems.cn_sort
+    _koha_marc_update_biblioitem_cn_sort($record, $olddata, $frameworkcode);
+    
      # now add the record
      $biblionumber = ModBiblioMarc( $record, $biblionumber, $frameworkcode ) unless $defer_marc_save;
        
@@ -290,12 +294,15 @@ sub ModBiblio {
      $sth->finish();
      _koha_marc_update_bib_ids($record, $frameworkcode, $biblionumber, $biblioitemnumber);
  
-    # update the MARC record (that now contains biblio and items) with the new record data
-    &ModBiblioMarc( $record, $biblionumber, $frameworkcode );
-    
      # load the koha-table data object
      my $oldbiblio = TransformMarcToKoha( $dbh, $record, $frameworkcode );
  
+    # update MARC subfield that stores biblioitems.cn_sort
+    _koha_marc_update_biblioitem_cn_sort($record, $oldbiblio, $frameworkcode);
+
+    # update the MARC record (that now contains biblio and items) with the new record data
+    &ModBiblioMarc( $record, $biblionumber, $frameworkcode );
+    
      # modify the other koha tables
      _koha_modify_biblio( $dbh, $oldbiblio, $frameworkcode );
      _koha_modify_biblioitem_nonmarc( $dbh, $oldbiblio );
@@ -815,14 +822,9 @@ sub GetMarcBiblio {
      my $sth          =
        $dbh->prepare("SELECT marcxml FROM biblioitems WHERE biblionumber=? ");
      $sth->execute($biblionumber);
-     my ($marcxml) = $sth->fetchrow;
+    my $row = $sth->fetchrow_hashref;
+    my $marcxml = StripNonXmlChars($row->{'marcxml'});
       MARC::File::XML->default_record_format(C4::Context->preference('marcflavour'));
-     $marcxml =~ s/\x1e//g;
-     $marcxml =~ s/\x1f//g;
-     $marcxml =~ s/\x1d//g;
-     $marcxml =~ s/\x0f//g;
-     $marcxml =~ s/\x0c//g;  
-#   warn $marcxml;
      my $record = MARC::Record->new();
      if ($marcxml) {
          $record = eval {MARC::Record::new_from_xml( $marcxml, "utf8", C4::Context->preference('marcflavour'))};
@@ -2093,9 +2095,20 @@ sub ModZebra {
          #
          # we use zebra, just fill zebraqueue table
          #
-        my $sth=$dbh->prepare("INSERT INTO zebraqueue  (biblio_auth_number,server,operation) VALUES(?,?,?)");
-        $sth->execute($biblionumber,$server,$op);
-        $sth->finish;
+        my $check_sql = "SELECT COUNT(*) FROM zebraqueue 
+                         WHERE server = ?
+                         AND   biblio_auth_number = ?
+                         AND   operation = ?
+                         AND   done = 0";
+        my $check_sth = $dbh->prepare_cached($check_sql);
+        $check_sth->execute($server, $biblionumber, $op);
+        my ($count) = $check_sth->fetchrow_array;
+        $check_sth->finish();
+        if ($count == 0) {
+            my $sth=$dbh->prepare("INSERT INTO zebraqueue  (biblio_auth_number,server,operation) VALUES(?,?,?)");
+            $sth->execute($biblionumber,$server,$op);
+            $sth->finish;
+        }
      }
  }
  
@@ -2256,7 +2269,7 @@ sub _AddBiblioNoZebra {
      }
  
      # remove blancks comma (that could cause problem when decoding the string for CQL retrieval) and regexp specific values
-    $title =~ s/ |,|;|\[|\]|\(|\)|\*|-|'|=//g;
+    $title =~ s/ |\.|,|;|\[|\]|\(|\)|\*|-|'|:|=|\r|\n//g;
      # limit to 10 char, should be enough, and limit the DB size
      $title = substr($title,0,10);
      #parse each field
@@ -2268,6 +2281,7 @@ sub _AddBiblioNoZebra {
              my $tag = $field->tag();
              my $subfieldcode = $subfield->[0];
              my $indexed=0;
+            warn "INDEXING :".$subfield->[1];
              # check each index to see if the subfield is stored somewhere
              # otherwise, store it in __RAW__ index
              foreach my $key (keys %index) {
@@ -2276,7 +2290,7 @@ sub _AddBiblioNoZebra {
                      $indexed=1;
                      my $line= lc $subfield->[1];
                      # remove meaningless value in the field...
-                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
+                    $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:|\r|\n/ /g;
                      # ... and split in words
                      foreach (split / /,$line) {
                          next unless $_; # skip  empty values (multiple spaces)
@@ -2309,7 +2323,7 @@ sub _AddBiblioNoZebra {
              # the subfield is not indexed, store it in __RAW__ index anyway
              unless ($indexed) {
                  my $line= lc $subfield->[1];
-                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:/ /g;
+                $line =~ s/-|\.|\?|,|;|!|'|\(|\)|\[|\]|{|}|"|<|>|&|\+|\*|\/|=|:|\r|\n/ /g;
                  # ... and split in words
                  foreach (split / /,$line) {
                      next unless $_; # skip  empty values (multiple spaces)
@@ -2456,6 +2470,44 @@ sub _koha_marc_update_bib_ids {
      }
  }
  
+=head2 _koha_marc_update_biblioitem_cn_sort
+
+=over 4
+
+_koha_marc_update_biblioitem_cn_sort($marc, $biblioitem, $frameworkcode);
+
+=back
+
+Given a MARC bib record and the biblioitem hash, update the
+subfield that contains a copy of the value of biblioitems.cn_sort.
+
+=cut
+
+sub _koha_marc_update_biblioitem_cn_sort {
+    my $marc = shift;
+    my $biblioitem = shift;
+    my $frameworkcode= shift;
+
+    my ($biblioitem_tag, $biblioitem_subfield ) = GetMarcFromKohaField("biblioitems.cn_sort",$frameworkcode);
+    return unless $biblioitem_tag;
+
+    my ($cn_sort) = GetClassSort($biblioitem->{'biblioitems.cn_source'}, $biblioitem->{'cn_class'}, $biblioitem->{'cn_item'} );
+
+    if (my $field = $marc->field($biblioitem_tag)) {
+        $field->delete_subfield(code => $biblioitem_subfield);
+        if ($cn_sort ne '') {
+            $field->add_subfields($biblioitem_subfield => $cn_sort);
+        }
+    } else {
+        # if we get here, no biblioitem tag is present in the MARC record, so
+        # we'll create it if $cn_sort is not empty -- this would be
+        # an odd combination of events, however
+        if ($cn_sort) {
+            $marc->insert_grouped_field(MARC::Field->new($biblioitem_tag, ' ', ' ', $biblioitem_subfield => $cn_sort));
+        }
+    }
+}
+
  =head2 _koha_add_biblio
  
  =over 4