#!/usr/bin/perl
-use strict;
-#use warnings; FIXME - Bug 2505
+# This file is part of Koha.
+#
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
+
+use Modern::Perl;
use C4::Context;
use Getopt::Long;
use C4::AuthoritiesMarc;
use C4::Items;
use Koha::RecordProcessor;
+use Koha::Caches;
use XML::LibXML;
use constant LOCK_FILENAME => 'rebuild..LCK';
my $reset;
my $biblios;
my $authorities;
-my $noxml;
+my $as_xml;
my $noshadow;
my $want_help;
-my $as_xml;
my $process_zebraqueue;
my $process_zebraqueue_skip_deletes;
my $do_not_clear_zebraqueue;
my $run_user = (getpwuid($<))[0];
my $wait_for_lock = 0;
my $use_flock;
+my $table = 'biblioitems';
+my $is_memcached = Koha::Caches->get_instance->memcached_cache;
my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
'I|skip-index' => \$skip_index,
'nosanitize' => \$nosanitize,
'b' => \$biblios,
- 'noxml' => \$noxml,
'w' => \$noshadow,
'a' => \$authorities,
'h|help' => \$want_help,
'v+' => \$verbose_logging,
'run-as-root' => \$run_as_root,
'wait-for-lock' => \$wait_for_lock,
+ 't|table:s' => \$table,
);
if (not $result or $want_help) {
exit 0;
}
+if ( $as_xml ) {
+ warn "Warning: You passed -x which is already the default and is now deprecated\n";
+ undef $as_xml; # Should not be used later
+}
+
if( not defined $run_as_root and $run_user eq 'root') {
my $msg = "Warning: You are running this script as the user 'root'.\n";
$msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n";
die $msg;
}
-if ( !$as_xml and $nosanitize ) {
- my $msg = "Cannot specify both -no_xml and -nosanitize\n";
- $msg .= "Please do '$0 --help' to see usage.\n";
- die $msg;
-}
-
if ($process_zebraqueue and ($skip_export or $reset)) {
my $msg = "Cannot specify -r or -s if -z is specified\n";
$msg .= "Please do '$0 --help' to see usage.\n";
die $msg;
}
-if ($reset) {
- $noshadow = 1;
-}
-
-if ($noshadow) {
- $noshadow = ' -n ';
-}
-
if ($daemon_mode) {
# incompatible flags handled above: help, reset, and do_not_clear_zebraqueue
if ($skip_export or $keep_export or $skip_index or
$msg .= "Please do '$0 --help' to see usage.\n";
die $msg;
}
+ unless ($is_memcached) {
+ warn "Warning: script running in daemon mode, without recommended caching system (memcached).\n";
+ }
$authorities = 1;
$biblios = 1;
$process_zebraqueue = 1;
die $msg;
}
+our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' );
+unless ( grep { /^$table$/ } @tables_allowed_for_select ) {
+ die "Cannot specify -t|--table with value '$table'. Only "
+ . ( join ', ', @tables_allowed_for_select )
+ . " are allowed.";
+}
+
# -v is for verbose, which seems backwards here because of how logging is set
# on the CLI of zebraidx. It works this way. The default is to not log much
my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
my $kohadir = C4::Context->config('intranetdir');
-my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';
-my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';
+my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom';
+my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom';
+
+my ($biblionumbertagfield,$biblionumbertagsubfield) = C4::Biblio::GetMarcFromKohaField("biblio.biblionumber","");
+my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = C4::Biblio::GetMarcFromKohaField("biblioitems.biblioitemnumber","");
-my $dbh = C4::Context->dbh;
-my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
-my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
+my $marcxml_open = q{<?xml version="1.0" encoding="UTF-8"?>
+<collection xmlns="http://www.loc.gov/MARC21/slim">
+};
+
+my $marcxml_close = q{
+</collection>
+};
# Protect again simultaneous update of the zebra index by using a lock file.
-# Create our own lock directory if its missing. This shouild be created
-# by koha-zebra-ctl.sh or at system installation. If the desired directory
+# Create our own lock directory if it is missing. This should be created
+# by koha-zebra-ctl.sh or at system installation. If the desired directory
# does not exist and cannot be created, we fall back on /tmp - which will
# always work.
}
my $tester = XML::LibXML->new();
+my $dbh;
# The main work is done here by calling do_one_pass(). We have added locking
# avoid race conditions between full rebuilds and incremental updates either from
while (1) {
# For incremental updates, skip the update if the updates are locked
if (_flock($LockFH, LOCK_EX|LOCK_NB)) {
- do_one_pass() if ( zebraqueue_not_empty() );
+ eval {
+ $dbh = C4::Context->dbh;
+ if( zebraqueue_not_empty() ) {
+ Koha::Caches->flush_L1_caches() if $is_memcached;
+ do_one_pass();
+ }
+ };
+ if ($@ && $verbose_logging) {
+ warn "Warning : $@\n";
+ }
_flock($LockFH, LOCK_UN);
}
sleep $daemon_sleep;
# all one-off invocations
my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB;
if (_flock($LockFH, $lock_mode)) {
+ $dbh = C4::Context->dbh;
do_one_pass();
_flock($LockFH, LOCK_UN);
} else {
sub do_one_pass {
if ($authorities) {
- index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
} # ---------- end of subroutine check_zebra_dirs ----------
sub index_records {
- my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
+ my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
my $records_deleted = {};
unless ( $process_zebraqueue_skip_deletes ) {
$entries = select_zebraqueue_records($record_type, 'deleted');
mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type");
- $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml);
+ $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type");
mark_zebraqueue_batch_done($entries);
}
$entries = select_zebraqueue_records($record_type, 'updated');
mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type");
- $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted);
+ $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $records_deleted);
mark_zebraqueue_batch_done($entries);
} else {
my $sth = select_all_records($record_type);
- $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize);
+ $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $nosanitize);
unless ($do_not_clear_zebraqueue) {
mark_all_zebraqueue_done($record_type);
}
print "REINDEXING zebra\n";
print "====================\n";
}
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
+ my $record_fmt = 'marcxml';
if ($process_zebraqueue) {
do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
if %$records_deleted;
}
sub select_all_biblios {
- my $strsth = qq{ SELECT biblionumber FROM biblioitems };
+ $table = 'biblioitems'
+ unless grep { /^$table$/ } @tables_allowed_for_select;
+ my $strsth = qq{ SELECT biblionumber FROM $table };
$strsth.=qq{ WHERE $where } if ($where);
$strsth.=qq{ LIMIT $length } if ($length && !$offset);
$strsth.=qq{ LIMIT $offset,$length } if ($offset);
return $sth;
}
-sub include_xml_wrapper {
- my $as_xml = shift;
- my $record_type = shift;
-
- return 0 unless $as_xml;
- return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
- return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
- return 0;
-
-}
-
sub export_marc_records_from_sth {
- my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
+ my ($record_type, $sth, $directory, $nosanitize) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open;
+
my $i = 0;
- my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
+ my ( $itemtag, $itemsubfield ) = C4::Biblio::GetMarcFromKohaField("items.itemnumber",'');
while (my ($record_number) = $sth->fetchrow_array) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
}
next;
}
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number);
if (defined $marc) {
eval {
- my $rec;
- if ($as_xml) {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- eval {
- my $doc = $tester->parse_string($rec);
- };
- if ($@) {
- die "invalid XML: $@";
- }
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ eval {
+ my $doc = $tester->parse_string($rec);
+ };
+ if ($@) {
+ die "invalid XML: $@";
}
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) XML";
warn "... specific error is $@" if $verbose_logging;
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ print {$fh} $marcxml_close;
+
close $fh;
return $num_exported;
}
sub export_marc_records_from_list {
- my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
+ my ($record_type, $entries, $directory, $records_deleted) = @_;
my $num_exported = 0;
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open;
+
my $i = 0;
# Skip any deleted records. We check for this anyway, but this reduces error spam
@$entries ) {
print "." if ( $verbose_logging );
print "\r$i" unless ($i++ %100 or !$verbose_logging);
- my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
+ my ($marc) = get_corrected_marc_record($record_type, $record_number);
if (defined $marc) {
eval {
- my $rec;
- if ($as_xml) {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
- }
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$num_exported++;
};
if ($@) {
- warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ warn "Error exporting record $record_number ($record_type) XML";
}
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+
+ print {$fh} $marcxml_close;
+
close $fh;
return $num_exported;
}
sub generate_deleted_marc_records {
- my ($record_type, $entries, $directory, $as_xml) = @_;
+
+ my ($record_type, $entries, $directory) = @_;
my $records_deleted = {};
open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
- if (include_xml_wrapper($as_xml, $record_type)) {
- # include XML declaration and root element
- print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
- }
+
+ print {$fh} $marcxml_open;
+
my $i = 0;
foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
print "\r$i" unless ($i++ %100 or !$verbose_logging);
fix_unimarc_100($marc);
}
- my $rec;
- if ($as_xml) {
- $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
- $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
- } else {
- $rec = $marc->as_usmarc();
- }
+ my $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ # Remove the record's XML header
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
print {$fh} $rec;
$records_deleted->{$record_number} = 1;
}
print "\nRecords exported: $i\n" if ( $verbose_logging );
- print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
- close $fh;
- return $records_deleted;
+ print {$fh} $marcxml_close;
+ close $fh;
+ return $records_deleted;
}
sub get_corrected_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ( $record_type, $record_number ) = @_;
- my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
+ my $marc = get_raw_marc_record( $record_type, $record_number );
- if (defined $marc) {
+ if ( defined $marc ) {
fix_leader($marc);
- if ($record_type eq 'authority') {
- fix_authority_id($marc, $record_number);
- } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) {
- my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } );
+ if ( $record_type eq 'authority' ) {
+ fix_authority_id( $marc, $record_number );
+ }
+ elsif ( $record_type eq 'biblio' ) {
+
+ my @filters;
+ push @filters, 'EmbedItemsAvailability';
+ push @filters, 'EmbedSeeFromHeadings'
+ if C4::Context->preference('IncludeSeeFromInSearches');
+
+ my $normalizer = Koha::RecordProcessor->new( { filters => \@filters } );
$marc = $normalizer->process($marc);
}
- if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+ if ( C4::Context->preference("marcflavour") eq "UNIMARC" ) {
fix_unimarc_100($marc);
}
}
}
sub get_raw_marc_record {
- my ($record_type, $record_number, $noxml) = @_;
+ my ($record_type, $record_number) = @_;
my $marc;
if ($record_type eq 'biblio') {
- if ($noxml) {
- my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
- $fetch_sth->execute($record_number);
- if (my ($blob) = $fetch_sth->fetchrow_array) {
- $marc = MARC::Record->new_from_usmarc($blob);
- unless ($marc) {
- warn "error creating MARC::Record from $blob";
- }
- }
- # failure to find a bib is not a problem -
- # a delete could have been done before
- # trying to process a record update
-
- $fetch_sth->finish();
- return unless $marc;
- } else {
- eval { $marc = GetMarcBiblio($record_number, 1); };
- if ($@ || !$marc) {
- # here we do warn since catching an exception
- # means that the bib was found but failed
- # to be parsed
- warn "error retrieving biblio $record_number";
- return;
- }
+ eval { $marc = C4::Biblio::GetMarcBiblio({ biblionumber => $record_number, embed_items => 1 }); };
+ if ($@ || !$marc) {
+ # here we do warn since catching an exception
+ # means that the bib was found but failed
+ # to be parsed
+ warn "error retrieving biblio $record_number";
+ return;
}
} else {
eval { $marc = GetAuthority($record_number); };
my $marc = shift;
my $string;
- if ( length($marc->subfield( 100, "a" )) == 36 ) {
+ my $length_100a = length($marc->subfield( 100, "a" ));
+ if ( $length_100a and $length_100a == 36 ) {
$string = $marc->subfield( 100, "a" );
my $f100 = $marc->field(100);
$marc->delete_field($f100);
$string = sprintf( "%-*s", 35, $string );
}
substr( $string, 22, 6, "frey50" );
- unless ( length($marc->subfield( 100, "a" )) == 36 ) {
+ $length_100a = length($marc->subfield( 100, "a" ));
+ unless ( $length_100a and $length_100a == 36 ) {
$marc->delete_field($marc->field(100));
$marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
}
my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
+ $noshadow //= '';
+
+ if ($noshadow or $reset_index) {
+ $noshadow = '-n';
+ }
+
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name init") if $reset_index;
system("zebraidx -c $zebra_config $zebraidx_log_opt $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
system("zebraidx -c $zebra_config $zebraidx_log_opt -g $record_format -d $zebra_db_name commit") unless $noshadow;
-
}
sub _flock {
already exported the records
in a previous run.
- -noxml index from ISO MARC blob
- instead of MARC XML. This
- option is recommended only
- for advanced user.
-
- -x export and index as xml instead of is02709 (biblios only).
- use this if you might have records > 99,999 chars,
-
-nosanitize export biblio/authority records directly from DB marcxml
field without sanitizing records. It speed up
dump process but could fail if DB contains badly
to wait for the lock to free and then continue
processing the rebuild request,
+ --table specify a table (can be items, biblioitems or biblio) to retrieve biblionumber to index.
+ biblioitems is the default value.
+
--help or -h show this message.
_USAGE_
}