3 # This inserts records from a Koha database into elastic search
5 # Copyright 2014 Catalyst IT
7 # This file is part of Koha.
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 3 of the License, or (at your option) any later
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with Koha; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 rebuild_elastic_search.pl - inserts records from a Koha database into Elasticsearch
28 B<rebuild_elastic_search.pl>
29 [B<-c|--commit>=C<count>]
40 =item B<-c|--commit>=C<count>
42 Specify how many records will be batched up before they're added to Elasticsearch.
43 Higher should be faster, but will cause more RAM usage. Default is 5000.
47 Delete the index and recreate it before indexing.
49 =item B<-a|--authorities>
51 Index the authorities only. Combining this with B<-b> is the same as
52 specifying neither and so both get indexed.
56 Index the biblios only. Combining this with B<-a> is the same as
57 specifying neither and so both get indexed.
59 =item B<-bn|--bnumber>
61 Only index the supplied biblionumber, mostly for testing purposes. May be
62 repeated. This also applies to authorities via authid, so if you're using it,
63 you probably only want to do one or the other at a time.
67 By default, this program only emits warnings and errors. This makes it talk
68 more. Add more to make it even more wordy, in particular when debugging.
84 use Koha::ElasticSearch::Indexer;
90 use Data::Dumper; # TODO remove
94 my ($delete, $help, $man);
95 my ($index_biblios, $index_authorities);
99 'c|commit=i' => \$commit,
100 'd|delete' => \$delete,
101 'a|authorities' => \$index_authorities,
102 'b|biblios' => \$index_biblios,
103 'bn|bnumber=i' => \@biblionumbers,
104 'v|verbose!' => \$verbose,
109 # Default is to do both
110 unless ($index_authorities || $index_biblios) {
111 $index_authorities = $index_biblios = 1;
114 pod2usage(1) if $help;
115 pod2usage( -exitstatus => 0, -verbose => 2 ) if $man;
118 if ($index_biblios) {
119 _log(1, "Indexing biblios\n");
120 if (@biblionumbers) {
122 my $r = shift @biblionumbers;
123 return () unless defined $r;
124 return ($r, Koha::Biblio->get_marc_biblio($r, item_data => 1));
127 my $records = Koha::Biblio->get_all_biblios_iterator();
132 do_reindex($next, $Koha::ElasticSearch::BIBLIOS_INDEX);
134 if ($index_authorities) {
135 _log(1, "Indexing authorities\n");
136 if (@biblionumbers) {
138 my $r = shift @biblionumbers;
139 return () unless defined $r;
140 my $a = Koha::Authority->get_from_authid($r);
141 return ($r, $a->record);
144 my $records = Koha::Authority->get_all_authorities_iterator();
149 do_reindex($next, $Koha::ElasticSearch::AUTHORITIES_INDEX);
153 my ( $next, $index_name ) = @_;
155 my $indexer = Koha::ElasticSearch::Indexer->new( { index => $index_name } );
158 # We know it's safe to not recreate the indexer because update_index
159 # hasn't been called yet.
160 $indexer->drop_index();
164 my $commit_count = $commit;
165 my ( @id_buffer, @commit_buffer );
166 while ( my $record = $next->() ) {
167 my $id = $record->idnumber;
168 my $record = $record->record;
172 push @id_buffer, $id;
173 push @commit_buffer, $record;
174 if ( !( --$commit_count ) ) {
175 _log( 2, "Committing...\n" );
176 $indexer->update_index( \@id_buffer, \@commit_buffer );
177 $commit_count = $commit;
183 # There are probably uncommitted records
184 $indexer->update_index( \@id_buffer, \@commit_buffer );
185 _log( 1, "$count records indexed.\n" );
188 # Output progress information.
190 # _log($level, $msg);
192 # Will output $msg if the verbosity setting is set to $level or more. Will
193 # not include a trailing newline.
195 my ($level, $msg) = @_;
197 print $msg if ($verbose <= $level);