Bug 17255 - Upgrade Elastic Search code to work with version 2.4+ - rebased wip
authorOlli-Antti Kivilahti <olli-antti.kivilahti@jns.fi>
Mon, 9 Jan 2017 16:17:48 +0000 (18:17 +0200)
committerKyle M Hall <kyle@bywatersolutions.com>
Tue, 7 Feb 2017 17:17:53 +0000 (17:17 +0000)
-Changed deprecated facets to aggregations
-Fixed boolean datatypes not allowing analyzers to be specified
-Fixed deprecated '_id' to 'es_id'. Now the ES-index has the correct id==biblionumber

ZE TEST PLAN

1. Reset Zebra index since facets are hard coded to dynamic search_marc_mappings.
2. perl misc/search_tools/rebuild_elastic_search.pl
3. Fetch all indexed records and the facet for subject__facet

curl -XGET localhost:9200/koha_biblios/data/_search?pretty -d '{
  "aggregations": {
    "my_agg": {
      "terms": {
        "field": "subject__facet"
      }
    }
  }
}'

Signed-off-by: Nick Clemens <nick@bywatersolutions.com>
Signed-off-by: Tomas Cohen Arazi <tomascohen@theke.io>
Signed-off-by: Kyle M Hall <kyle@bywatersolutions.com>
Koha/SearchEngine/Elasticsearch.pm
Koha/SearchEngine/Elasticsearch/Indexer.pm
Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
Koha/SearchEngine/Elasticsearch/Search.pm

index 35a6540..f8b904c 100644 (file)
@@ -115,6 +115,8 @@ sub get_elasticsearch_params {
       if ( !$es->{index_name} );
     # Append the name of this particular index to our namespace
     $es->{index_name} .= '_' . $self->index;
+
+    $es->{key_prefix} = 'es_';
     return $es;
 }
 
@@ -145,7 +147,11 @@ sub get_elasticsearch_settings {
                     analyser_standard => {
                         tokenizer => 'standard',
                         filter    => ['lowercase'],
-                    }
+                    },
+                    default => {
+                        tokenizer => 'keyword',
+                        filter    => ['lowercase'],
+                    },
                 },
             }
         }
@@ -174,11 +180,6 @@ sub get_elasticsearch_mappings {
                     include_in_all => JSON::false,
                     type           => "string",
                 },
-                '_all.phrase' => {
-                    search_analyzer => "analyser_phrase",
-                    index_analyzer  => "analyser_phrase",
-                    type            => "string",
-                },
             }
         }
     };
@@ -188,6 +189,7 @@ sub get_elasticsearch_mappings {
         sub {
             my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
             return if $marc_type ne $marcflavour;
+
             # TODO if this gets any sort of complexity to it, it should
             # be broken out into its own function.
 
@@ -197,25 +199,14 @@ sub get_elasticsearch_mappings {
               $type eq 'boolean'
               ? 'boolean'
               : 'string';
-            $mappings->{data}{properties}{$name} = {
-                search_analyzer => "analyser_standard",
-                index_analyzer  => "analyser_standard",
-                type            => $es_type,
-                fields          => {
-                    phrase => {
-                        search_analyzer => "analyser_phrase",
-                        index_analyzer  => "analyser_phrase",
-                        type            => "string",
-                        copy_to         => "_all.phrase",
-                    },
-                    raw => {
-                        "type" => "string",
-                        "index" => "not_analyzed",
-                    }
-                },
-            };
-            $mappings->{data}{properties}{$name}{null_value} = 0
-              if $type eq 'boolean';
+
+            if ($es_type eq 'boolean') {
+                $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_boolean( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
+                return; #Boolean cannot have facets nor sorting nor suggestions
+            } else {
+                $mappings->{data}{properties}{$name} = _elasticsearch_mapping_for_default( $name, $es_type, $facet, $suggestible, $sort, $marc_type );
+            }
+
             if ($facet) {
                 $mappings->{data}{properties}{ $name . '__facet' } = {
                     type  => "string",
@@ -225,7 +216,7 @@ sub get_elasticsearch_mappings {
             if ($suggestible) {
                 $mappings->{data}{properties}{ $name . '__suggestion' } = {
                     type => 'completion',
-                    index_analyzer => 'simple',
+                    analyzer => 'simple',
                     search_analyzer => 'simple',
                 };
             }
@@ -234,13 +225,13 @@ sub get_elasticsearch_mappings {
             if (defined $sort) {
                 $mappings->{data}{properties}{ $name . '__sort' } = {
                     search_analyzer => "analyser_phrase",
-                    index_analyzer  => "analyser_phrase",
+                    analyzer  => "analyser_phrase",
                     type            => "string",
                     include_in_all  => JSON::false,
                     fields          => {
                         phrase => {
                             search_analyzer => "analyser_phrase",
-                            index_analyzer  => "analyser_phrase",
+                            analyzer  => "analyser_phrase",
                             type            => "string",
                         },
                     },
@@ -253,6 +244,44 @@ sub get_elasticsearch_mappings {
     return $mappings;
 }
 
+=head2 _elasticsearch_mapping_for_*
+
+Get the ES mappings for the given data type or a special mapping case
+
+Receives the same parameters from the $self->_foreach_mapping() dispatcher
+
+=cut
+
+sub _elasticsearch_mapping_for_boolean {
+    my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
+
+    return {
+        type            => $type,
+        null_value      => 0,
+    };
+}
+
+sub _elasticsearch_mapping_for_default {
+    my ( $name, $type, $facet, $suggestible, $sort, $marc_type ) = @_;
+
+    return {
+        search_analyzer => "analyser_standard",
+        analyzer        => "analyser_standard",
+        type            => $type,
+        fields          => {
+            phrase => {
+                search_analyzer => "analyser_phrase",
+                analyzer        => "analyser_phrase",
+                type            => "string",
+            },
+            raw => {
+                type    => "string",
+                index   => "not_analyzed",
+            }
+        },
+    };
+}
+
 sub reset_elasticsearch_mappings {
     my $mappings_yaml = C4::Context->config('intranetdir') . '/admin/searchengine/elasticsearch/mappings.yaml';
     my $indexes = LoadFile( $mappings_yaml );
@@ -294,6 +323,7 @@ sub get_fixer_rules {
 
     my $marcflavour = lc C4::Context->preference('marcflavour');
     my @rules;
+
     $self->_foreach_mapping(
         sub {
             my ( $name, $type, $facet, $suggestible, $sort, $marc_type, $marc_field ) = @_;
@@ -311,7 +341,8 @@ sub get_fixer_rules {
             }
             if ($suggestible) {
                 push @rules,
-"marc_map('$marc_field','${name}__suggestion.input.\$append', $options)";
+                    #"marc_map('$marc_field','${name}__suggestion.input.\$append', $options)"; #must not have nested data structures in .input
+                    "marc_map('$marc_field','${name}__suggestion.input.\$append')";
             }
             if ( $type eq 'boolean' ) {
 
@@ -334,6 +365,8 @@ sub get_fixer_rules {
             }
         }
     );
+
+    push @rules, "move_field(_id,es_id)"; #Also you must set the Catmandu::Store::ElasticSearch->new(key_prefix: 'es_');
     return \@rules;
 }
 
index b35378f..172580a 100644 (file)
@@ -77,6 +77,7 @@ sub update_index {
             )
         );
     }
+    my $array_ref = $from->to_array;
     $self->store->bag->add_many($from);
     $self->store->bag->commit;
     return 1;
index 53de283..d0aa916 100644 (file)
@@ -111,7 +111,7 @@ sub build_query {
 
     # See _convert_facets in Search.pm for how these get turned into
     # things that Koha can use.
-    $res->{facets} = {
+    $res->{aggregations} = {
         author   => { terms => { field => "author__facet" } },
         subject  => { terms => { field => "subject__facet" } },
         itype    => { terms => { field => "itype__facet" } },
@@ -120,7 +120,7 @@ sub build_query {
         se       => { terms => { field => "se__facet" } },
     };
     if ( my $ef = $options{expanded_facet} ) {
-        $res->{facets}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
+        $res->{aggregations}{$ef}{terms}{size} = C4::Context->preference('FacetMaxCount');
     };
     return $res;
 }
index d1ccd52..6e3ca3b 100644 (file)
@@ -167,7 +167,7 @@ sub search_compat {
     my %result;
     $result{biblioserver}{hits} = $results->total;
     $result{biblioserver}{RECORDS} = \@records;
-    return (undef, \%result, $self->_convert_facets($results->{facets}, $expanded_facet));
+    return (undef, \%result, $self->_convert_facets($results->{aggregations}, $expanded_facet));
 }
 
 =head2 search_auth_compat
@@ -430,15 +430,15 @@ sub _convert_facets {
             type_id    => $type . '_id',
             expand     => $type,
             expandable => ( $type ne $exp_facet )
-              && ( @{ $data->{terms} } > $limit ),
+              && ( @{ $data->{buckets} } > $limit ),
             "type_label_$type_to_label{$type}{label}" => 1,
             type_link_value                    => $type,
             order      => $type_to_label{$type}{order},
         };
-        $limit = @{ $data->{terms} } if ( $limit > @{ $data->{terms} } );
-        foreach my $term ( @{ $data->{terms} }[ 0 .. $limit - 1 ] ) {
-            my $t = $term->{term};
-            my $c = $term->{count};
+        $limit = @{ $data->{buckets} } if ( $limit > @{ $data->{buckets} } );
+        foreach my $term ( @{ $data->{buckets} }[ 0 .. $limit - 1 ] ) {
+            my $t = $term->{key};
+            my $c = $term->{doc_count};
             my $label;
             if ( exists( $special{$type} ) ) {
                 $label = $special{$type}->{$t} // $t;