Bug 19893: Remove serialization format setting

author David Gustafsson <david.gustafsson@ub.gu.se>

Mon, 28 May 2018 14:03:32 +0000 (16:03 +0200)

committer Nick Clemens <nick@bywatersolutions.com>

Fri, 16 Nov 2018 11:04:57 +0000 (11:04 +0000)
author David Gustafsson <david.gustafsson@ub.gu.se>
Mon, 28 May 2018 14:03:32 +0000 (16:03 +0200)
committer Nick Clemens <nick@bywatersolutions.com>
Fri, 16 Nov 2018 11:04:57 +0000 (11:04 +0000)
diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm

index 93d9213..32ba39b 100644 (file)
--- a/Koha/SearchEngine/Elasticsearch.pm
+++ b/Koha/SearchEngine/Elasticsearch.pm
@@ -379,11 +379,26 @@ sub marc_records_to_documents {
          }
          # TODO: Perhaps should check if $records_document non empty, but really should never be the case
          $record->encoding('UTF-8');
-        if ($serialization_format eq 'base64ISO2709') {
+        my @warnings;
+        {
+            # Temporarily intercept all warn signals (MARC::Record carps when record length > 99999)
+            local $SIG{__WARN__} = sub {
+                push @warnings, $_[0];
+            };
              $record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc()));
          }
-        else {
+        if (@warnings) {
+            # Suppress warnings if record length exceeded
+            unless (substr($record->leader(), 0, 5) eq '99999') {
+                foreach my $warning (@warnings) {
+                    carp($warning);
+                }
+            }
              $record_document->{'marc_data'} = $record->as_xml_record($marcflavour);
+            $record_document->{'marc_format'} = 'MARCXML';
+        }
+        else {
+            $record_document->{'marc_format'} = 'base64ISO2709';
          }
          my $id = $record->subfield('999', 'c');
          push @record_documents, [$id, $record_document];
diff --git a/Koha/SearchEngine/Elasticsearch/Search.pm b/Koha/SearchEngine/Elasticsearch/Search.pm

index 3bd27ea..718eb06 100644 (file)
--- a/Koha/SearchEngine/Elasticsearch/Search.pm
+++ b/Koha/SearchEngine/Elasticsearch/Search.pm
@@ -369,11 +369,14 @@ sub decode_record_from_result {
      # Result is passed in as array, will get flattened
      # and first element will be $result
      my ( $self, $result ) = @_;
-    if (C4::Context->preference('ElasticsearchMARCSerializationFormat') eq 'MARCXML') {
+    if ($result->{marc_format} eq 'base64ISO2709') {
+        return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
+    }
+    elsif ($result->{marc_format} eq 'MARCXML') {
          return MARC::Record->new_from_xml($result->{marc_data}, 'UTF-8', uc C4::Context->preference('marcflavour'));
      }
      else {
-        return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
+        die("Missing marc_format field in Elasticsearch result");
      }
  }
  
diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml

index 0d60cd8..0baddf3 100644 (file)
--- a/admin/searchengine/elasticsearch/field_config.yaml
+++ b/admin/searchengine/elasticsearch/field_config.yaml
@@ -10,6 +10,12 @@ general:
        type: text
        analyzer: keyword
        index: false
+    marc_format:
+      store: true
+      type: text
+      analyzer: keyword
+      index: false
+
  # Search fields
  search:
    boolean:
diff --git a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref

index 95bd765..464c8c8 100644 (file)
--- a/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref
+++ b/koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref
@@ -427,11 +427,3 @@ Administration:
                choices:
                  Zebra: Zebra
                  Elasticsearch: Elasticsearch
-        -
-            - "Use"
-            - pref: ElasticsearchMARCSerializationFormat
-              default: MARCXML
-              choices:
-                MARCXML: MARCXML
-                base64ISO2709: base64ISO2709
-            - "as serialization format for MARC records stored in Elasticsearch index. base64ISO2709 is faster and will use less space but have a maximum record length which could cause issues with very large records."
author	David Gustafsson <david.gustafsson@ub.gu.se>
	Mon, 28 May 2018 14:03:32 +0000 (16:03 +0200)
committer	Nick Clemens <nick@bywatersolutions.com>
	Fri, 16 Nov 2018 11:04:57 +0000 (11:04 +0000)
Koha/SearchEngine/Elasticsearch.pm		patch \| blob \| history
Koha/SearchEngine/Elasticsearch/Search.pm		patch \| blob \| history
admin/searchengine/elasticsearch/field_config.yaml		patch \| blob \| history
koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref		patch \| blob \| history