4 local scriptname=$(basename $0)
8 Index Koha records by chunks. It is useful when a record causes errors and
9 stops the indexing process. With this script, if indexing of one chunk fails,
10 that chunk is split into two or more chunks, and indexing continues on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using yaz-marcdump and zebraidx).
15 $scriptname [-t type] [-l X] [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f] [--reset-index]
18 -o | --offset Offset parameter of rebuild_zebra.pl.
20 -l | --length Length parameter of rebuild_zebra.pl. If omitted, the
21 length is automatically calculated to index all
23 -s | --chunks-size Initial chunk size (number of records indexed at once)
25 -d | --export-dir Where rebuild_zebra.pl will export data
27 -L | --log-dir Log directory
29 -r | --remove-logs Clean log directory before start
31 -t | --type Record type ('biblios' or 'authorities')
33 -f | --force Don't ask for confirmation before start
34 -h | --help Display this help message
35 --reset-index Reset Zebra index for 'type'
44 my $prefix = '"\"$prefix\""';
46 my ($i,$count) = (0,0);
47 open(my $fh, "<", '"\"$file\""');
48 open(my $out, ">", sprintf("$prefix%02d", $i));
53 open($out, ">", sprintf("$prefix%02d", $i));
57 $count++ if ($line =~ m|^</record>|);
58 if ($count == $size) {
73 if [ $chunkssize -lt 1 ]; then
74 echo "Fail on file $file"
77 local prefix="${file}_${chunkssize}_"
78 echo "Splitting file in chunks of $chunkssize records"
79 splitfile $file $prefix $chunkssize
81 dir=$(dirname $prefix)
82 local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
83 for chunkfile in $files; do
84 echo "Indexing $chunkfile"
85 size=$(grep '^</record>' $chunkfile | wc -l)
86 logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
87 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml update $chunkfile"
88 $ZEBRAIDX_CMD >$logfile 2>&1
89 grep "Records: $size" $logfile >/dev/null 2>&1
91 echo "Indexing failed. Split file and continue..."
92 indexfile $chunkfile $(($chunkssize/2))
94 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g marcxml commit"
95 $ZEBRAIDX_CMD >> $logfile 2>&1
104 EXPORTDIR=/tmp/rebuild/export
105 LOGDIR=/tmp/rebuild/logs
158 if [ $HELP = "yes" ]; then
163 if [ -z $KOHA_CONF ]; then
164 echo "KOHA_CONF is not set"
168 if [ -z $PERL5LIB ]; then
169 echo "PERL5LIB is not set"
183 SQLTABLE="auth_header"
186 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
193 if [ -z $PERL ]; then
194 echo "perl not found"
198 if [ -z $LENGTH ]; then
201 my ($count) = C4::Context->dbh->selectrow_array(qq{
202 SELECT COUNT(*) FROM '"$SQLTABLE"'
208 ZEBRAIDX=`which zebraidx`
209 if [ -z $ZEBRAIDX ]; then
210 echo "zebraidx not found"
214 REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
215 if [ ! -f $REBUILDZEBRA ]; then
216 echo "$REBUILDZEBRA: file not found"
222 echo "========================================================================="
223 echo "KOHA_CONF: $KOHA_CONF"
224 echo "PERL5LIB: $PERL5LIB"
225 echo "-------------------------------------------------------------------------"
226 echo "Start at offset: $OFFSET"
227 echo "Total number of records to index: $LENGTH"
228 echo "Initial chunk size: $CHUNKSSIZE"
229 echo "Export directory: $EXPORTDIR"
230 echo "Log directory: $LOGDIR"
231 echo "Remove logs before start? $RMLOGS"
232 echo "Type of record: $TYPE"
233 echo "Reset index before start? $RESETINDEX"
234 echo "-------------------------------------------------------------------------"
235 echo "zebraidx path: $ZEBRAIDX"
236 echo "rebuild_zebra path: $REBUILDZEBRA"
237 echo "perl path: $PERL"
238 echo "========================================================================="
240 if [ $NOCONFIRM != "yes" ]; then
242 echo -n "Confirm ? [Y/n] "
244 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
248 if [ $confirm = "n" ]; then
254 if [ $? -ne 0 ]; then
255 echo "Failed to create directory $EXPORTDIR. Aborting."
260 if [ $? -ne 0 ]; then
261 echo "Failed to create directory $LOGDIR. Aborting."
265 if [ $RMLOGS = "yes" ]; then
269 REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -x -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
270 echo "\n$REBUILDZEBRA_CMD"
276 EXPORTFILE="$EXPORTDIR/biblio/exported_records"
279 EXPORTFILE="$EXPORTDIR/authority/exported_records"
282 echo "Error: TYPE '$TYPE' is not supported"
286 CONFIGFILE="$(dirname $KOHA_CONF)/zebradb/zebra-$TYPE.cfg"
288 if [ $RESETINDEX = "yes" ]; then
289 RESETINDEX_CMD="$ZEBRAIDX -c $CONFIGFILE init"
290 echo "\n$RESETINDEX_CMD"
295 indexfile $EXPORTFILE $CHUNKSSIZE