X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Fmigration_tools%2Frebuild_zebra.pl;h=14fea3ce8f545ab3e6ffd0430de351f3a1c9857c;hb=ef86a77801ea42eb1f703a77f53123ccb924a9fe;hp=1621e84b9bd7d4adf8236a8a16f2b589fcfa4e99;hpb=226af0485d72031a0f63d06269f6b8e78fab15db;p=koha.git diff --git a/misc/migration_tools/rebuild_zebra.pl b/misc/migration_tools/rebuild_zebra.pl index 1621e84b9b..14fea3ce8f 100755 --- a/misc/migration_tools/rebuild_zebra.pl +++ b/misc/migration_tools/rebuild_zebra.pl @@ -10,8 +10,9 @@ use File::Path; use C4::Biblio; use C4::AuthoritiesMarc; use C4::Items; +use Koha::RecordProcessor; +use XML::LibXML; -# # script that checks zebradir structure & create directories & mandatory files if needed # # @@ -38,6 +39,9 @@ my $do_not_clear_zebraqueue; my $length; my $where; my $offset; +my $run_as_root; +my $run_user = (getpwuid($<))[0]; + my $verbose_logging = 0; my $zebraidx_log_opt = " -v none,fatal,warn "; my $result = GetOptions( @@ -45,7 +49,7 @@ my $result = GetOptions( 'r|reset' => \$reset, 's' => \$skip_export, 'k' => \$keep_export, - 'I|skip-index' => \$skip_index, + 'I|skip-index' => \$skip_index, 'nosanitize' => \$nosanitize, 'b' => \$biblios, 'noxml' => \$noxml, @@ -60,14 +64,21 @@ my $result = GetOptions( 'length:i' => \$length, 'offset:i' => \$offset, 'v+' => \$verbose_logging, + 'run-as-root' => \$run_as_root, ); - if (not $result or $want_help) { print_usage(); exit 0; } +if( not defined $run_as_root and $run_user eq 'root') { + my $msg = "Warning: You are running this script as the user 'root'.\n"; + $msg .= "If this is intentional you must explicitly specify this using the -run-as-root switch\n"; + $msg .= "Please do '$0 --help' to see usage.\n"; + die $msg; +} + if (not $biblios and not $authorities) { my $msg = "Must specify -b or -a to reindex bibs or authorities\n"; $msg .= "Please do '$0 --help' to see usage.\n"; @@ -92,6 +103,10 @@ if ($process_zebraqueue and $do_not_clear_zebraqueue) { die $msg; } +if ($reset) { + $noshadow = 1; +} + if ($noshadow) { $noshadow = ' -n '; } @@ -135,6 +150,8 @@ if ($do_munge) { munge_config(); } +my $tester = XML::LibXML->new(); + if ($authorities) { index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir); } else { @@ -250,7 +267,7 @@ sub index_records { } my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ; if ($process_zebraqueue) { - do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) + do_indexing($record_type, 'adelete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if %$records_deleted; do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt) if $num_records_exported; @@ -372,8 +389,18 @@ sub export_marc_records_from_sth { substr($itemsxml, index($itemsxml, "\n", 0) + 10); } } + # extra test to ensure that result is valid XML; otherwise + # Zebra won't parse it in DOM mode + eval { + my $doc = $tester->parse_string($marcxml); + }; + if ($@) { + warn "Error exporting record $record_number ($record_type): $@\n"; + next; + } if ( $marcxml ) { - print {$fh} $marcxml if $marcxml; + $marcxml =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; + print {$fh} $marcxml; $num_exported++; } next; @@ -384,6 +411,12 @@ sub export_marc_records_from_sth { my $rec; if ($as_xml) { $rec = $marc->as_xml_record(C4::Context->preference('marcflavour')); + eval { + my $doc = $tester->parse_string($rec); + }; + if ($@) { + die "invalid XML: $@"; + } $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!; } else { $rec = $marc->as_usmarc(); @@ -392,7 +425,8 @@ sub export_marc_records_from_sth { $num_exported++; }; if ($@) { - warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); + warn "... specific error is $@" if $verbose_logging; } } } @@ -436,7 +470,6 @@ sub export_marc_records_from_list { if ($@) { warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML"); } - $num_exported++; } } print "\nRecords exported: $num_exported\n" if ( $verbose_logging ); @@ -497,6 +530,9 @@ sub get_corrected_marc_record { fix_leader($marc); if ($record_type eq 'authority') { fix_authority_id($marc, $record_number); + } elsif ($record_type eq 'biblio' && C4::Context->preference('IncludeSeeFromInSearches')) { + my $normalizer = Koha::RecordProcessor->new( { filters => 'EmbedSeeFromHeadings' } ); + $marc = $normalizer->process($marc); } if (C4::Context->preference("marcflavour") eq "UNIMARC") { fix_unimarc_100($marc); @@ -648,11 +684,10 @@ sub print_usage { $0: reindex MARC bibs and/or authorities in Zebra. Use this batch job to reindex all biblio or authority -records in your Koha database. This job is useful -only if you are using Zebra; if you are using the 'NoZebra' -mode, this job should not be used. +records in your Koha database. Parameters: + -b index bibliographic records -a index authority records @@ -663,7 +698,7 @@ Parameters: or -s. -r clear Zebra index before - adding records to index + adding records to index. Implies -w. -d Temporary directory for indexing. If not specified, one is automatically @@ -712,6 +747,9 @@ Parameters: --munge-config Deprecated option to try to fix Zebra config files. + + --run-as-root explicitily allow script to run as 'root' user + --help or -h show this message. _USAGE_ }