my $nosanitize;
my $skip_export;
my $keep_export;
+my $skip_index;
my $reset;
my $biblios;
my $authorities;
'r|reset' => \$reset,
's' => \$skip_export,
'k' => \$keep_export,
+ 'I|skip-index' => \$skip_index,
'nosanitize' => \$nosanitize,
'b' => \$biblios,
'noxml' => \$noxml,
}
if ($authorities) {
- index_records('authority', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
+ index_records('authority', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $authorityserverdir);
} else {
print "skipping authorities\n" if ( $verbose_logging );
}
if ($biblios) {
- index_records('biblio', $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
+ index_records('biblio', $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $biblioserverdir);
} else {
print "skipping biblios\n" if ( $verbose_logging );
}
} # ---------- end of subroutine check_zebra_dirs ----------
sub index_records {
- my ($record_type, $directory, $skip_export, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
+ my ($record_type, $directory, $skip_export, $skip_index, $process_zebraqueue, $as_xml, $noxml, $nosanitize, $do_not_clear_zebraqueue, $verbose_logging, $zebraidx_log_opt, $server_dir) = @_;
my $num_records_exported = 0;
my $records_deleted;
}
}
}
-
+
#
# and reindexing everything
#
- if ( $verbose_logging ) {
- print "====================\n";
- print "REINDEXING zebra\n";
- print "====================\n";
- }
- my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
- if ($process_zebraqueue) {
- do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
- if %$records_deleted;
- do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
- if $num_records_exported;
+ if ($skip_index) {
+ if ($verbose_logging) {
+ print "====================\n";
+ print "SKIPPING $record_type indexing\n";
+ print "====================\n";
+ }
} else {
- do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
- if ($num_records_exported or $skip_export);
+ if ( $verbose_logging ) {
+ print "====================\n";
+ print "REINDEXING zebra\n";
+ print "====================\n";
+ }
+ my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
+ if ($process_zebraqueue) {
+ do_indexing($record_type, 'delete', "$directory/del_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
+ if %$records_deleted;
+ do_indexing($record_type, 'update', "$directory/upd_$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
+ if $num_records_exported;
+ } else {
+ do_indexing($record_type, 'update', "$directory/$record_type", $reset, $noshadow, $record_fmt, $zebraidx_log_opt)
+ if ($num_records_exported or $skip_export);
+ }
}
}
--- /dev/null
+#!/bin/sh
+
+usage() {
+ local scriptname=$(basename $0)
+ cat <<EOF
+$scriptname
+
+Index Koha records by chunks. It is useful when some record causes errors and
+stop the indexation process. With this script, if indexation of one chunk fails,
+chunk is splitted in two or more chunks, and indexation continue on these chunks.
+rebuild_zebra.pl is called only once to export records. Splitting and indexing
+is handled by this script (using yaz-marcdump and zebraidx).
+
+Usage:
+$scriptname -t type -l X [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f]
+$scriptname -h
+
+ -o | --offset Offset parameter of rebuild_zebra.pl
+ -l | --length Length parameter of rebuild_zebra.pl
+ -s | --chunks-size Initial chunk size (number of records indexed at once)
+ -d | --export-dir Where rebuild_zebra.pl will export data
+ -L | --log-dir Log directory
+ -r | --remove-logs Clean log directory before start
+ -t | --type Record type ('biblios' or 'authorities')
+ -f | --force Don't ask for confirmation before start
+ -h | --help Display this help message
+EOF
+}
+
+indexfile() {
+ local file=$1
+ local chunkssize=$2
+
+ if [ $chunkssize -lt 1 ]; then
+ echo "Fail on file $file"
+ else
+
+ local prefix="${file}_${chunkssize}_"
+ echo "Splitting file in chunks of $chunkssize records"
+ YAZMARCDUMP_CMD="$YAZMARCDUMP -n -s $prefix -C $chunkssize $file"
+ $YAZMARCDUMP_CMD
+
+ dir=$(dirname $prefix)
+ local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
+ for chunkfile in $files; do
+ echo "Indexing $chunkfile"
+ size=$($YAZMARCDUMP -p $chunkfile | grep '<!-- Record [0-9]\+ offset .* -->' | wc -l)
+ logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
+ ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 update $chunkfile"
+ $ZEBRAIDX_CMD >$logfile 2>&1
+ grep "Records: $size" $logfile >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "Indexing failed. Split file and continue..."
+ indexfile $chunkfile $(($chunkssize/2))
+ else
+ ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 commit"
+ $ZEBRAIDX_CMD >> $logfile 2>&1
+ fi
+ done
+ fi
+}
+
+OFFSET=0
+LENGTH=
+CHUNKSSIZE=10000
+EXPORTDIR=/tmp/rebuild/export
+LOGDIR=/tmp/rebuild/logs
+RMLOGS=no
+NOCONFIRM=no
+TYPE=biblios
+HELP=no
+
+# Get parameters
+while [ $1 ]; do
+ case $1 in
+ -o | --offset )
+ shift
+ OFFSET=$1
+ ;;
+ -l | --length )
+ shift
+ LENGTH=$1
+ ;;
+ -s | --chunks-size )
+ shift
+ CHUNKSSIZE=$1
+ ;;
+ -d | --export-dir )
+ shift
+ EXPORTDIR=$1
+ ;;
+ -L | --log-dir )
+ shift
+ LOGDIR=$1
+ ;;
+ -r | --remove-logs )
+ RMLOGS=yes
+ ;;
+ -t | --type )
+ shift
+ TYPE=$1
+ ;;
+ -f | --force )
+ NOCONFIRM=yes
+ ;;
+ -h | --help)
+ HELP=yes
+ ;;
+ * )
+ usage
+ exit 1
+ esac
+ shift
+done
+
+if [ $HELP = "yes" ]; then
+ usage
+ exit 0
+fi
+
+if [ -z $LENGTH ]; then
+ echo "--length parameter is mandatory"
+ exit 1
+fi
+
+TYPESWITCH=
+case $TYPE in
+ biblios )
+ TYPESWITCH=-b
+ ;;
+ authorities )
+ TYPESWITCH=-a
+ ;;
+ * )
+ echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
+ TYPESWITCH=-b
+ TYPE=biblios
+esac
+
+ZEBRAIDX=`which zebraidx`
+if [ -z $ZEBRAIDX ]; then
+ echo "zebraidx not found"
+ exit 1
+fi
+
+YAZMARCDUMP=`which yaz-marcdump`
+if [ -z $YAZMARCDUMP ]; then
+ echo "yaz-marcdump not found"
+ exit 1
+fi
+
+REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
+if [ ! -f $REBUILDZEBRA ]; then
+ echo "$REBUILDZEBRA: file not found"
+ exit 1
+fi
+
+echo ""
+echo "Configuration"
+echo "========================================================================="
+echo "Start at offset: $OFFSET"
+echo "Total number of records to index: $LENGTH"
+echo "Initial chunk size: $CHUNKSSIZE"
+echo "Export directory: $EXPORTDIR"
+echo "Log directory: $LOGDIR"
+echo "Remove logs before start? $RMLOGS"
+echo "Type of record: $TYPE"
+echo "-------------------------------------------------------------------------"
+echo "zebraidx path: $ZEBRAIDX"
+echo "yaz-marcdump path: $YAZMARCDUMP"
+echo "rebuild_zebra path: $REBUILDZEBRA"
+echo "========================================================================="
+
+if [ $NOCONFIRM != "yes" ]; then
+ confirm=y
+ echo -n "Confirm ? [Y/n] "
+ read response
+ if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
+ confirm=n
+ fi
+
+ if [ $confirm = "n" ]; then
+ exit 0
+ fi
+fi
+
+mkdir -p $EXPORTDIR
+if [ $? -ne 0 ]; then
+ echo "Failed to create directory $EXPORTDIR. Aborting."
+ exit 1
+fi
+
+mkdir -p $LOGDIR
+if [ $? -ne 0 ]; then
+ echo "Failed to create directory $LOGDIR. Aborting."
+ exit 1
+fi
+
+if [ $RMLOGS = "yes" ]; then
+ rm -f $LOGDIR/*.log
+fi
+
+REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
+echo "\n$REBUILDZEBRA_CMD"
+$REBUILDZEBRA_CMD
+
+EXPORTFILE=
+case $TYPE in
+ biblios )
+ EXPORTFILE="$EXPORTDIR/biblio/exported_records"
+ ;;
+ authorities )
+ EXPORTFILE="$EXPORTDIR/authority/exported_records"
+ ;;
+ * )
+ echo "Error: TYPE '$TYPE' is not supported"
+ exit 1
+esac
+
+CONFIGFILE="$(dirname $KOHA_CONF)/zebradb/zebra-$TYPE.cfg"
+
+
+indexfile $EXPORTFILE $CHUNKSSIZE