Bug 19893: Remove serialization format setting
authorDavid Gustafsson <david.gustafsson@ub.gu.se>
Mon, 28 May 2018 14:03:32 +0000 (16:03 +0200)
committerNick Clemens <nick@bywatersolutions.com>
Fri, 16 Nov 2018 11:04:57 +0000 (11:04 +0000)
Default to base64 encoded binary MARC with MARCXML
fallback if record exceeds maximum size

Sponsored-by: Gothenburg University Library
Signed-off-by: Ere Maijala <ere.maijala@helsinki.fi>
Signed-off-by: Martin Renvoize <martin.renvoize@ptfs-europe.com>
Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
Koha/SearchEngine/Elasticsearch.pm
Koha/SearchEngine/Elasticsearch/Search.pm
admin/searchengine/elasticsearch/field_config.yaml
koha-tmpl/intranet-tmpl/prog/en/modules/admin/preferences/admin.pref

index 93d9213..32ba39b 100644 (file)
@@ -379,11 +379,26 @@ sub marc_records_to_documents {
         }
         # TODO: Perhaps should check if $records_document non empty, but really should never be the case
         $record->encoding('UTF-8');
-        if ($serialization_format eq 'base64ISO2709') {
+        my @warnings;
+        {
+            # Temporarily intercept all warn signals (MARC::Record carps when record length > 99999)
+            local $SIG{__WARN__} = sub {
+                push @warnings, $_[0];
+            };
             $record_document->{'marc_data'} = encode_base64(encode('UTF-8', $record->as_usmarc()));
         }
-        else {
+        if (@warnings) {
+            # Suppress warnings if record length exceeded
+            unless (substr($record->leader(), 0, 5) eq '99999') {
+                foreach my $warning (@warnings) {
+                    carp($warning);
+                }
+            }
             $record_document->{'marc_data'} = $record->as_xml_record($marcflavour);
+            $record_document->{'marc_format'} = 'MARCXML';
+        }
+        else {
+            $record_document->{'marc_format'} = 'base64ISO2709';
         }
         my $id = $record->subfield('999', 'c');
         push @record_documents, [$id, $record_document];
index 3bd27ea..718eb06 100644 (file)
@@ -369,11 +369,14 @@ sub decode_record_from_result {
     # Result is passed in as array, will get flattened
     # and first element will be $result
     my ( $self, $result ) = @_;
-    if (C4::Context->preference('ElasticsearchMARCSerializationFormat') eq 'MARCXML') {
+    if ($result->{marc_format} eq 'base64ISO2709') {
+        return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
+    }
+    elsif ($result->{marc_format} eq 'MARCXML') {
         return MARC::Record->new_from_xml($result->{marc_data}, 'UTF-8', uc C4::Context->preference('marcflavour'));
     }
     else {
-        return MARC::Record->new_from_usmarc(decode_base64($result->{marc_data}));
+        die("Missing marc_format field in Elasticsearch result");
     }
 }
 
index 0d60cd8..0baddf3 100644 (file)
@@ -10,6 +10,12 @@ general:
       type: text
       analyzer: keyword
       index: false
+    marc_format:
+      store: true
+      type: text
+      analyzer: keyword
+      index: false
+
 # Search fields
 search:
   boolean:
index 95bd765..464c8c8 100644 (file)
@@ -427,11 +427,3 @@ Administration:
               choices:
                 Zebra: Zebra
                 Elasticsearch: Elasticsearch
-        -
-            - "Use"
-            - pref: ElasticsearchMARCSerializationFormat
-              default: MARCXML
-              choices:
-                MARCXML: MARCXML
-                base64ISO2709: base64ISO2709
-            - "as serialization format for MARC records stored in Elasticsearch index. base64ISO2709 is faster and will use less space but have a maximum record length which could cause issues with very large records."