X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Fmigration_tools%2Frebuild_zebra.pl;h=45c074a63a081695466668830fa3fd20d24f445f;hb=7a178fd262dfd88c11461247ec90f2e7ee676616;hp=1621e84b9bd7d4adf8236a8a16f2b589fcfa4e99;hpb=949fcd04bad3d7f13a48081cdfdd1bc11510a401;p=koha.git diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 1621e84b9b..45c074a63a 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -5,13 +5,17 @@ use strict; use C4::Context; use Getopt::Long; +use Fcntl qw(:flock); use File::Temp qw/ tempdir /; use File::Path; use C4::Biblio; use C4::AuthoritiesMarc; use C4::Items; +use Koha::RecordProcessor; +use XML::LibXML; + +use constant LOCK_FILENAME => 'rebuild..LCK'; -# # script that checks zebradir structure & create directories & mandatory files if needed # # @@ -20,6 +24,8 @@ $|=1; # flushes output # If the cron job starts us in an unreadable dir, we will break without # this. chdir $ENV{HOME} if (!(-r '.')); +my $daemon_mode; +my $daemon_sleep = 5; my $directory; my $nosanitize; my $skip_export; @@ -30,46 +36,57 @@ my $biblios; my $authorities; my $noxml; my $noshadow; -my $do_munge; my $want_help; my $as_xml; my $process_zebraqueue; +my $process_zebraqueue_skip_deletes; my $do_not_clear_zebraqueue; my $length; my $where; my $offset; +my $run_as_root; +my $run_user = (getpwuid($<))[0]; +my $wait_for_lock = 0; +my $use_flock; +my $table = 'biblioitems'; + my $verbose_logging = 0; my $zebraidx_log_opt = " -v none,fatal,warn "; my $result = GetOptions( + 'daemon' => \$daemon_mode, + 'sleep:i' => \$daemon_sleep, 'd:s' => \$directory, 'r|reset' => \$reset, 's' => \$skip_export, 'k' => \$keep_export, - 'I|skip-index' => \$skip_index, + 'I|skip-index' => \$skip_index, 'nosanitize' => \$nosanitize, 'b' => \$biblios, 'noxml' => \$noxml, 'w' => \$noshadow, - 'munge-config' => \$do_munge, 'a' => \$authorities, 'h|help' => \$want_help, - 'x' => \$as_xml, + 'x' => \$as_xml, 'y' => \$do_not_clear_zebraqueue, 'z' => \$process_zebraqueue, - 'where:s' => \$where, - 'length:i' => \$length, + 'skip-deletes' => \$process_zebraqueue_skip_deletes, + 'where:s' => \$where, + 'length:i' => \$length, 'offset:i' => \$offset, - 'v+' => \$verbose_logging, + 'v+' => \$verbose_logging, + 'run-as-root' => \$run_as_root, + 'wait-for-lock' => \$wait_for_lock, + 't|table:s' => \$table, ); - if (not $result or $want_help) { print_usage(); exit 0; } -if (not $biblios and not $authorities) { - my $msg = "Must specify -b or -a to reindex bibs or authorities\n"; +if( not defined $run_as_root and $run_user eq 'root') { + my $msg = "Warning: You are running this script as the user 'root'.\n"; + $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n"; $msg .= "Please do '$0 --help' to see usage.\n"; die $msg; } @@ -92,10 +109,41 @@ if ($process_zebraqueue and $do_not_clear_zebraqueue) { die $msg; } +if ($reset) { + $noshadow = 1; +} + if ($noshadow) { $noshadow = ' -n '; } +if ($daemon_mode) { + # incompatible flags handled above: help, reset, and do_not_clear_zebraqueue + if ($skip_export or $keep_export or $skip_index or + $where or $length or $offset) { + my $msg = "Cannot specify -s, -k, -I, -where, -length, or -offset with -daemon.\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; + } + $authorities = 1; + $biblios = 1; + $process_zebraqueue = 1; +} + +if (not $biblios and not $authorities) { + my $msg = "Must specify -b or -a to reindex bibs or authorities\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + +our @tables_allowed_for_select = ( 'biblioitems', 'items', 'biblio' ); +unless ( grep { /^$table$/ } @tables_allowed_for_select ) { + die "Cannot specify -t|--table with value '$table'. Only " + . ( join ', ', @tables_allowed_for_select ) + . " are allowed."; +} + + # -v is for verbose, which seems backwards here because of how logging is set # on the CLI of zebraidx. It works this way. The default is to not log much if ($verbose_logging >= 2) { @@ -106,45 +154,98 @@ my $use_tempdir = 0; unless ($directory) { $use_tempdir = 1; $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1)); -} +} my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory}; my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory}; my $kohadir = C4::Context->config('intranetdir'); -my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1'; -my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom'; +my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') // 'dom'; +my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') // 'dom'; my $dbh = C4::Context->dbh; my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber",""); my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber",""); +my $marcxml_open = q{ + +}; + +my $marcxml_close = q{ + +}; + +# Protect again simultaneous update of the zebra index by using a lock file. +# Create our own lock directory if its missing. This shouild be created +# by koha-zebra-ctl.sh or at system installation. If the desired directory +# does not exist and cannot be created, we fall back on /tmp - which will +# always work. + +my ($lockfile, $LockFH); +foreach ( + C4::Context->config("zebra_lockdir"), + '/var/lock/zebra_' . C4::Context->config('database'), + '/tmp/zebra_' . C4::Context->config('database') +) { + #we try three possibilities (we really want to lock :) + next if !$_; + ($LockFH, $lockfile) = _create_lockfile($_.'/rebuild'); + last if defined $LockFH; +} +if( !defined $LockFH ) { + print "WARNING: Could not create lock file $lockfile: $!\n"; + print "Please check your koha-conf.xml for ZEBRA_LOCKDIR.\n"; + print "Verify file permissions for it too.\n"; + $use_flock = 0; # we disable file locking now and will continue + # without it + # note that this mimics old behavior (before we used + # the lockfile) +}; + if ( $verbose_logging ) { print "Zebra configuration information\n"; print "================================\n"; print "Zebra biblio directory = $biblioserverdir\n"; print "Zebra authorities directory = $authorityserverdir\n"; print "Koha directory = $kohadir\n"; + print "Lockfile = $lockfile\n" if $lockfile; print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n"; print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n"; print "================================\n"; } -if ($do_munge) { - munge_config(); -} +my $tester = XML::LibXML->new(); -if ($authorities) { - index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); -} else { - print "skipping authorities\n" if ( $verbose_logging ); -} - -if ($biblios) { - index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); +# The main work is done here by calling do_one_pass(). We have added locking +# avoid race conditions between full rebuilds and incremental updates either from +# daemon mode or periodic invocation from cron. The race can lead to an updated +# record being overwritten by a rebuild if the update is applied after the export +# by the rebuild and before the rebuild finishes (more likely to affect large +# catalogs). +# +# We have chosen to exit immediately by default if we cannot obtain the lock +# to prevent the potential for a infinite backlog from cron invocations, but an +# option (wait-for-lock) is provided to let the program wait for the lock. +# See http://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=11078 for details. +if ($daemon_mode) { + while (1) { + # For incremental updates, skip the update if the updates are locked + if (_flock($LockFH, LOCK_EX|LOCK_NB)) { + do_one_pass() if ( zebraqueue_not_empty() ); + _flock($LockFH, LOCK_UN); + } + sleep $daemon_sleep; + } } else { - print "skipping biblios\n" if ( $verbose_logging ); + # all one-off invocations + my $lock_mode = ($wait_for_lock) ? LOCK_EX : LOCK_EX|LOCK_NB; + if (_flock($LockFH, $lock_mode)) { + do_one_pass(); + _flock($LockFH, LOCK_UN); + } else { + print "Skipping rebuild/update because flock failed on $lockfile: $!\n"; + } } @@ -174,33 +275,69 @@ if ($keep_export) { } } +sub do_one_pass { + if ($authorities) { + index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); + } else { + print "skipping authorities\n" if ( $verbose_logging ); + } + + if ($biblios) { + index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir); + } else { + print "skipping biblios\n" if ( $verbose_logging ); + } +} + +# Check the zebra update queue and return true if there are records to process +# This routine will handle each of -ab, -a, or -b, but in practice we force +# -ab when in daemon mode. +sub zebraqueue_not_empty { + my $where_str; + + if ($authorities && $biblios) { + $where_str = 'done = 0;'; + } elsif ($biblios) { + $where_str = 'server = "biblioserver" AND done = 0;'; + } else { + $where_str = 'server = "authorityserver" AND done = 0;'; + } + my $query = + $dbh->prepare('SELECT COUNT(*) FROM zebraqueue WHERE ' . $where_str ); + + $query->execute; + my $count = $query->fetchrow_arrayref->[0]; + print "queued records: $count\n" if $verbose_logging > 0; + return $count > 0; +} + # This checks to see if the zebra directories exist under the provided path. # If they don't, then zebra is likely to spit the dummy. This returns true # if the directories had to be created, false otherwise. sub check_zebra_dirs { - my ($base) = shift() . '/'; - my $needed_repairing = 0; - my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' ); - foreach my $dir (@dirs) { - my $bdir = $base . $dir; + my ($base) = shift() . '/'; + my $needed_repairing = 0; + my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' ); + foreach my $dir (@dirs) { + my $bdir = $base . $dir; if (! -d $bdir) { - $needed_repairing = 1; - mkdir $bdir || die "Unable to create '$bdir': $!\n"; - print "$0: needed to create '$bdir'\n"; + $needed_repairing = 1; + mkdir $bdir || die "Unable to create '$bdir': $!\n"; + print "$0: needed to create '$bdir'\n"; } } return $needed_repairing; -} # ---------- end of subroutine check_zebra_dirs ---------- +} # ---------- end of subroutine check_zebra_dirs ---------- sub index_records { my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_; my $num_records_exported = 0; - my $records_deleted; + my $records_deleted = {}; my $need_reset = check_zebra_dirs($server_dir); if ($need_reset) { - print "$0: found broken zebra server directories: forcing a rebuild\n"; - $reset = 1; + print "$0: found broken zebra server directories: forcing a rebuild\n"; + $reset = 1; } if ($skip_export && $verbose_logging) { print "====================\n"; @@ -215,15 +352,20 @@ sub index_records { mkdir "$directory" unless (-d $directory); mkdir "$directory/$record_type" unless (-d "$directory/$record_type"); if ($process_zebraqueue) { - my $entries = select_zebraqueue_records($record_type, 'deleted'); - mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); - $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); - mark_zebraqueue_batch_done($entries); + my $entries; + + unless ( $process_zebraqueue_skip_deletes ) { + $entries = select_zebraqueue_records($record_type, 'deleted'); + mkdir "$directory/del_$record_type" unless (-d "$directory/del_$record_type"); + $records_deleted = generate_deleted_marc_records($record_type, $entries, "$directory/del_$record_type", $as_xml); + mark_zebraqueue_batch_done($entries); + } + $entries = select_zebraqueue_records($record_type, 'updated'); mkdir "$directory/upd_$record_type" unless (-d "$directory/upd_$record_type"); - $num_records_exported = export_marc_records_from_list($record_type, - $entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); + $num_records_exported = export_marc_records_from_list($record_type,$entries, "$directory/upd_$record_type", $as_xml, $noxml, $records_deleted); mark_zebraqueue_batch_done($entries); + } else { my $sth = select_all_records($record_type); $num_records_exported = export_marc_records_from_sth($record_type, $sth, "$directory/$record_type", $as_xml, $noxml, $nosanitize); @@ -250,7 +392,7 @@ sub index_records { } my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ; if ($process_zebraqueue) { - do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) + do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if %$records_deleted; do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if $num_records_exported; @@ -268,7 +410,7 @@ sub select_zebraqueue_records { my $server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver'; my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate'; - my $sth = $dbh->prepare("SELECT id, biblio_auth_number + my $sth = $dbh->prepare("SELECT id, biblio_auth_number FROM zebraqueue WHERE server = ? AND operation = ? @@ -317,7 +459,9 @@ sub select_all_authorities { } sub select_all_biblios { - my $strsth = qq{ SELECT biblionumber FROM biblioitems }; + $table = 'biblioitems' + unless grep { /^$table$/ } @tables_allowed_for_select; + my $strsth = qq{ SELECT biblionumber FROM $table }; $strsth.=qq{ WHERE $where } if ($where); $strsth.=qq{ LIMIT $length } if ($length && !$offset); $strsth.=qq{ LIMIT $offset,$length } if ($offset); @@ -342,10 +486,10 @@ sub export_marc_records_from_sth { my $num_exported = 0; open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!; - if (include_xml_wrapper($as_xml, $record_type)) { - # include XML declaration and root element - print {$fh} ''; - } + + print {$fh} $marcxml_open + if include_xml_wrapper($as_xml, $record_type); + my $i = 0; my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",''); while (my ($record_number) = $sth->fetchrow_array) { @@ -362,7 +506,7 @@ sub export_marc_records_from_sth { $record->encoding('UTF-8'); my @itemsrecord; foreach my $item (@items){ - my $record = Item2Marc($item, $record_number); + my $record = Item2Marc($item, $record_number); push @itemsrecord, $record->field($itemtag); } $record->insert_fields_ordered(@itemsrecord); @@ -372,8 +516,18 @@ sub export_marc_records_from_sth { substr($itemsxml, index($itemsxml, "\n", 0) + 10); } } + # extra test to ensure that result is valid XML; otherwise + # Zebra won't parse it in DOM mode + eval { + my $doc = $tester->parse_string($marcxml); + }; + if ($@) { + warn "Error exporting record $record_number ($record_type): $@\n"; + next; + } if ( $marcxml ) { - print {$fh} $marcxml if $marcxml; + $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; + print {$fh} $marcxml; $num_exported++; } next; @@ -384,6 +538,12 @@ sub export_marc_records_from_sth { my $rec; if ($as_xml) { $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + eval { + my $doc = $tester->parse_string($rec); + }; + if ($@) { + die "invalid XML: $@"; + } $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; } else { $rec = $marc->as_usmarc(); @@ -392,12 +552,14 @@ sub export_marc_records_from_sth { $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "... specific error is $@" if $verbose_logging; } } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); - print {$fh} '' if (include_xml_wrapper($as_xml, $record_type)); + print {$fh} $marcxml_close + if include_xml_wrapper($as_xml, $record_type); close $fh; return $num_exported; } @@ -407,10 +569,10 @@ sub export_marc_records_from_list { my $num_exported = 0; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; - if (include_xml_wrapper($as_xml, $record_type)) { - # include XML declaration and root element - print {$fh} ''; - } + + print {$fh} $marcxml_open + if include_xml_wrapper($as_xml, $record_type); + my $i = 0; # Skip any deleted records. We check for this anyway, but this reduces error spam @@ -436,11 +598,13 @@ sub export_marc_records_from_list { if ($@) { warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); } - $num_exported++; } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); - print {$fh} '' if (include_xml_wrapper($as_xml, $record_type)); + + print {$fh} $marcxml_close + if include_xml_wrapper($as_xml, $record_type); + close $fh; return $num_exported; } @@ -450,10 +614,10 @@ sub generate_deleted_marc_records { my $records_deleted = {}; open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!; - if (include_xml_wrapper($as_xml, $record_type)) { - # include XML declaration and root element - print {$fh} ''; - } + + print {$fh} $marcxml_open + if include_xml_wrapper($as_xml, $record_type); + my $i = 0; foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) { print "\r$i" unless ($i++ %100 or !$verbose_logging); @@ -481,22 +645,28 @@ sub generate_deleted_marc_records { $records_deleted->{$record_number} = 1; } print "\nRecords exported: $i\n" if ( $verbose_logging ); - print {$fh} '' if (include_xml_wrapper($as_xml, $record_type)); + + print {$fh} $marcxml_close + if include_xml_wrapper($as_xml, $record_type); + close $fh; return $records_deleted; - + } sub get_corrected_marc_record { my ($record_type, $record_number, $noxml) = @_; - my $marc = get_raw_marc_record($record_type, $record_number, $noxml); + my $marc = get_raw_marc_record($record_type, $record_number, $noxml); if (defined $marc) { fix_leader($marc); if ($record_type eq 'authority') { fix_authority_id($marc, $record_number); + } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) { + my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } ); + $marc = $normalizer->process($marc); } if (C4::Context->preference("marcflavour") eq "UNIMARC") { fix_unimarc_100($marc); @@ -508,8 +678,8 @@ sub get_corrected_marc_record { sub get_raw_marc_record { my ($record_type, $record_number, $noxml) = @_; - - my $marc; + + my $marc; if ($record_type eq 'biblio') { if ($noxml) { my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?"); @@ -552,7 +722,7 @@ sub fix_leader { # force them to be recalculated correct when # the $marc->as_usmarc() or $marc->as_xml() is called. # But why is this necessary? It would be a serious bug - # in MARC::Record (definitely) and MARC::File::XML (arguably) + # in MARC::Record (definitely) and MARC::File::XML (arguably) # if they are emitting incorrect leader values. my $marc = shift; @@ -571,7 +741,7 @@ sub fix_biblio_ids { my $biblioitemnumber; if (@_) { $biblioitemnumber = shift; - } else { + } else { my $sth = $dbh->prepare( "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?"); $sth->execute($biblionumber); @@ -589,7 +759,7 @@ sub fix_biblio_ids { # present in the MARC::Record object ought to be part of GetMarcBiblio. # # On the other hand, this better for now than what rebuild_zebra.pl used to - # do, which was duplicate the code for inserting the biblionumber + # do, which was duplicate the code for inserting the biblionumber # and biblioitemnumber C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber); @@ -643,27 +813,83 @@ sub do_indexing { } +sub _flock { + # test if flock is present; if so, use it; if not, return true + # op refers to the official flock operations including LOCK_EX, + # LOCK_UN, etc. + # combining LOCK_EX with LOCK_NB returns immediately + my ($fh, $op)= @_; + if( !defined($use_flock) ) { + #check if flock is present; if not, you will have a fatal error + my $lock_acquired = eval { flock($fh, $op) }; + # assuming that $fh and $op are fine(..), an undef $lock_acquired + # means no flock + $use_flock = defined($lock_acquired) ? 1 : 0; + print "Warning: flock could not be used!\n" if $verbose_logging && !$use_flock; + return 1 if !$use_flock; + return $lock_acquired; + } else { + return 1 if !$use_flock; + return flock($fh, $op); + } +} + +sub _create_lockfile { #returns undef on failure + my $dir= shift; + unless (-d $dir) { + eval { mkpath($dir, 0, oct(755)) }; + return if $@; + } + return if !open my $fh, q{>}, $dir.'/'.LOCK_FILENAME; + return ( $fh, $dir.'/'.LOCK_FILENAME ); +} + sub print_usage { print <<_USAGE_; $0: reindex MARC bibs and/or authorities in Zebra. Use this batch job to reindex all biblio or authority -records in your Koha database. This job is useful -only if you are using Zebra; if you are using the 'NoZebra' -mode, this job should not be used. +records in your Koha database. Parameters: + -b index bibliographic records -a index authority records + -daemon Run in daemon mode. The program will loop checking + for entries on the zebraqueue table, processing + them incrementally if present, and then sleep + for a few seconds before repeating the process + Checking the zebraqueue table is done with a cheap + SQL query. This allows for near realtime update of + the zebra search index with low system overhead. + Use -sleep to control the checking interval. + + Daemon mode implies -z, -a, -b. The program will + refuse to start if options are present that do not + make sense while running as an incremental update + daemon (e.g. -r or -offset). + + -sleep 10 Seconds to sleep between checks of the zebraqueue + table in daemon mode. The default is 5 seconds. + -z select only updated and deleted records marked in the zebraqueue table. Cannot be used with -r or -s. + --skip-deletes only select record updates, not record + deletions, to avoid potential excessive + I/O when zebraidx processes deletions. + If this option is used for normal indexing, + a cronjob should be set up to run + rebuild_zebra.pl -z without --skip-deletes + during off hours. + Only effective with -z. + -r clear Zebra index before - adding records to index + adding records to index. Implies -w. -d Temporary directory for indexing. If not specified, one is automatically @@ -674,7 +900,7 @@ Parameters: -k Do not delete export directory. -s Skip export. Used if you have - already exported the records + already exported the records in a previous run. -noxml index from ISO MARC blob @@ -684,7 +910,7 @@ Parameters: -x export and index as xml instead of is02709 (biblios only). use this if you might have records > 99,999 chars, - + -nosanitize export biblio/authority records directly from DB marcxml field without sanitizing records. It speed up dump process but could fail if DB contains badly @@ -696,10 +922,10 @@ Parameters: after doing batch indexing, zebraqueue should be marked done for the affected record type(s) so that a running zebraqueue_daemon doesn't try to reindex - the same records - specify -y to override this. + the same records - specify -y to override this. Cannot be used with -z. - -v increase the amount of logging. Normally only + -v increase the amount of logging. Normally only warnings and errors from the indexing are shown. Use log level 2 (-v -v) to include all Zebra logs. @@ -710,382 +936,17 @@ Parameters: --where let you specify a WHERE query, like itemtype='BOOK' or something like that - --munge-config Deprecated option to try - to fix Zebra config files. - --help or -h show this message. -_USAGE_ -} + --run-as-root explicitily allow script to run as 'root' user -# FIXME: the following routines are deprecated and -# will be removed once it is determined whether -# a script to fix Zebra configuration files is -# actually needed. -sub munge_config { -# -# creating zebra-biblios.cfg depending on system -# + --wait-for-lock when not running in daemon mode, the default + behavior is to abort a rebuild if the rebuild + lock is busy. This option will cause the program + to wait for the lock to free and then continue + processing the rebuild request, -# getting zebraidx directory -my $zebraidxdir; -foreach (qw(/usr/local/bin/zebraidx - /opt/bin/zebraidx - /usr/bin/zebraidx - )) { - if ( -f $_ ) { - $zebraidxdir=$_; - } -} - -unless ($zebraidxdir) { - print qq| - ERROR: could not find zebraidx directory - ERROR: Either zebra is not installed, - ERROR: or it's in a directory I don't checked. - ERROR: do a which zebraidx and edit this file to add the result you get -|; - exit; -} -$zebraidxdir =~ s/\/bin\/.*//; -print "Info : zebra is in $zebraidxdir \n"; - -# getting modules directory -my $modulesdir; -foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so - /usr/local/lib/idzebra/modules/mod-grs-xml.so - /usr/lib/idzebra/modules/mod-grs-xml.so - /usr/lib/idzebra-2.0/modules/mod-grs-xml.so - )) { - if ( -f $_ ) { - $modulesdir=$_; - } -} - -unless ($modulesdir) { - print qq| - ERROR: could not find mod-grs-xml.so directory - ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so, - ERROR: or it's in a directory I don't checked. - ERROR: find where mod-grs-xml.so is and edit this file to add the result you get -|; - exit; -} -$modulesdir =~ s/\/modules\/.*//; -print "Info: zebra modules dir : $modulesdir\n"; - -# getting tab directory -my $tabdir; -foreach (qw(/usr/local/share/idzebra/tab/explain.att - /usr/local/share/idzebra-2.0/tab/explain.att - /usr/share/idzebra/tab/explain.att - /usr/share/idzebra-2.0/tab/explain.att - )) { - if ( -f $_ ) { - $tabdir=$_; - } -} - -unless ($tabdir) { - print qq| - ERROR: could not find explain.att directory - ERROR: Either zebra is not properly compiled, - ERROR: or it's in a directory I don't checked. - ERROR: find where explain.att is and edit this file to add the result you get -|; - exit; -} -$tabdir =~ s/\/tab\/.*//; -print "Info: tab dir : $tabdir\n"; - -# -# AUTHORITIES creating directory structure -# -my $created_dir_or_file = 0; -if ($authorities) { - if ( $verbose_logging ) { - print "====================\n"; - print "checking directories & files for authorities\n"; - print "====================\n"; - } - unless (-d "$authorityserverdir") { - system("mkdir -p $authorityserverdir"); - print "Info: created $authorityserverdir\n"; - $created_dir_or_file++; - } - unless (-d "$authorityserverdir/lock") { - mkdir "$authorityserverdir/lock"; - print "Info: created $authorityserverdir/lock\n"; - $created_dir_or_file++; - } - unless (-d "$authorityserverdir/register") { - mkdir "$authorityserverdir/register"; - print "Info: created $authorityserverdir/register\n"; - $created_dir_or_file++; - } - unless (-d "$authorityserverdir/shadow") { - mkdir "$authorityserverdir/shadow"; - print "Info: created $authorityserverdir/shadow\n"; - $created_dir_or_file++; - } - unless (-d "$authorityserverdir/tab") { - mkdir "$authorityserverdir/tab"; - print "Info: created $authorityserverdir/tab\n"; - $created_dir_or_file++; - } - unless (-d "$authorityserverdir/key") { - mkdir "$authorityserverdir/key"; - print "Info: created $authorityserverdir/key\n"; - $created_dir_or_file++; - } - - unless (-d "$authorityserverdir/etc") { - mkdir "$authorityserverdir/etc"; - print "Info: created $authorityserverdir/etc\n"; - $created_dir_or_file++; - } - - # - # AUTHORITIES : copying mandatory files - # - # the record model, depending on marc flavour - unless (-f "$authorityserverdir/tab/record.abs") { - if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs"); - print "Info: copied record.abs for UNIMARC\n"; - } else { - system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs"); - print "Info: copied record.abs for USMARC\n"; - } - $created_dir_or_file++; - } - unless (-f "$authorityserverdir/tab/sort-string-utf.chr") { - system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr"); - print "Info: copied sort-string-utf.chr\n"; - $created_dir_or_file++; - } - unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr"); - print "Info: copied word-phase-utf.chr\n"; - $created_dir_or_file++; - } - unless (-f "$authorityserverdir/tab/auth1.att") { - system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att"); - print "Info: copied auth1.att\n"; - $created_dir_or_file++; - } - unless (-f "$authorityserverdir/tab/default.idx") { - system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx"); - print "Info: copied default.idx\n"; - $created_dir_or_file++; - } - - unless (-f "$authorityserverdir/etc/ccl.properties") { -# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); - system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties"); - print "Info: copied ccl.properties\n"; - $created_dir_or_file++; - } - unless (-f "$authorityserverdir/etc/pqf.properties") { -# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn}); - system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties"); - print "Info: copied pqf.properties\n"; - $created_dir_or_file++; - } - - # - # AUTHORITIES : copying mandatory files - # - unless (-f C4::Context->zebraconfig('authorityserver')->{config}) { - open my $zd, '>:encoding(UTF-8)' ,C4::Context->zebraconfig('authorityserver')->{config}; - print {$zd} " -# generated by KOHA/misc/migration_tools/rebuild_zebra.pl -profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ - -encoding: UTF-8 -# Files that describe the attribute sets supported. -attset: auth1.att -attset: explain.att -attset: gils.att - -modulePath:$modulesdir/modules/ -# Specify record type -iso2709.recordType:grs.marcxml.record -recordType:grs.xml -recordId: (auth1,Local-Number) -storeKeys:1 -storeData:1 - - -# Lock File Area -lockDir: $authorityserverdir/lock -perm.anonymous:r -perm.kohaadmin:rw -register: $authorityserverdir/register:4G -shadow: $authorityserverdir/shadow:4G - -# Temp File area for result sets -setTmpDir: $authorityserverdir/tmp - -# Temp File area for index program -keyTmpDir: $authorityserverdir/key - -# Approx. Memory usage during indexing -memMax: 40M -rank:rank-1 - "; - print "Info: creating zebra-authorities.cfg\n"; - $created_dir_or_file++; - } - - if ($created_dir_or_file) { - print "Info: created : $created_dir_or_file directories & files\n"; - } else { - print "Info: file & directories OK\n"; - } - -} -if ($biblios) { - if ( $verbose_logging ) { - print "====================\n"; - print "checking directories & files for biblios\n"; - print "====================\n"; - } + --table specify a table (can be items, biblioitems or biblio) to retrieve biblionumber to index. + biblioitems is the default value. - # - # BIBLIOS : creating directory structure - # - unless (-d "$biblioserverdir") { - system("mkdir -p $biblioserverdir"); - print "Info: created $biblioserverdir\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/lock") { - mkdir "$biblioserverdir/lock"; - print "Info: created $biblioserverdir/lock\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/register") { - mkdir "$biblioserverdir/register"; - print "Info: created $biblioserverdir/register\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/shadow") { - mkdir "$biblioserverdir/shadow"; - print "Info: created $biblioserverdir/shadow\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/tab") { - mkdir "$biblioserverdir/tab"; - print "Info: created $biblioserverdir/tab\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/key") { - mkdir "$biblioserverdir/key"; - print "Info: created $biblioserverdir/key\n"; - $created_dir_or_file++; - } - unless (-d "$biblioserverdir/etc") { - mkdir "$biblioserverdir/etc"; - print "Info: created $biblioserverdir/etc\n"; - $created_dir_or_file++; - } - - # - # BIBLIOS : copying mandatory files - # - # the record model, depending on marc flavour - unless (-f "$biblioserverdir/tab/record.abs") { - if (C4::Context->preference("marcflavour") eq "UNIMARC") { - system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs"); - print "Info: copied record.abs for UNIMARC\n"; - } else { - system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs"); - print "Info: copied record.abs for USMARC\n"; - } - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/tab/sort-string-utf.chr") { - system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr"); - print "Info: copied sort-string-utf.chr\n"; - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") { - system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr"); - print "Info: copied word-phase-utf.chr\n"; - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/tab/bib1.att") { - system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att"); - print "Info: copied bib1.att\n"; - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/tab/default.idx") { - system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx"); - print "Info: copied default.idx\n"; - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/etc/ccl.properties") { -# system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); - system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties"); - print "Info: copied ccl.properties\n"; - $created_dir_or_file++; - } - unless (-f "$biblioserverdir/etc/pqf.properties") { -# system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn}); - system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties"); - print "Info: copied pqf.properties\n"; - $created_dir_or_file++; - } - - # - # BIBLIOS : copying mandatory files - # - unless (-f C4::Context->zebraconfig('biblioserver')->{config}) { - open my $zd, '>:encoding(UTF-8)', C4::Context->zebraconfig('biblioserver')->{config}; - print {$zd} " -# generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl -profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/ - -encoding: UTF-8 -# Files that describe the attribute sets supported. -attset:bib1.att -attset:explain.att -attset:gils.att - -modulePath:$modulesdir/modules/ -# Specify record type -iso2709.recordType:grs.marcxml.record -recordType:grs.xml -recordId: (bib1,Local-Number) -storeKeys:1 -storeData:1 - - -# Lock File Area -lockDir: $biblioserverdir/lock -perm.anonymous:r -perm.kohaadmin:rw -register: $biblioserverdir/register:4G -shadow: $biblioserverdir/shadow:4G - -# Temp File area for result sets -setTmpDir: $biblioserverdir/tmp - -# Temp File area for index program -keyTmpDir: $biblioserverdir/key - -# Approx. Memory usage during indexing -memMax: 40M -rank:rank-1 - "; - print "Info: creating zebra-biblios.cfg\n"; - $created_dir_or_file++; - } - - if ($created_dir_or_file) { - print "Info: created : $created_dir_or_file directories & files\n"; - } else { - print "Info: file & directories OK\n"; - } - -} + --help or -h show this message. +_USAGE_ }