4 local scriptname=$(basename $0)
8 Index Koha records by chunks. It is useful when some record causes errors and
9 stop the indexation process. With this script, if indexation of one chunk fails,
10 chunk is splitted in two or more chunks, and indexation continue on these chunks.
11 rebuild_zebra.pl is called only once to export records. Splitting and indexing
12 is handled by this script (using yaz-marcdump and zebraidx).
15 $scriptname -t type -l X [-o X] [-s X] [-d /export/dir] [-L /log/dir] [-r] [-f]
18 -o | --offset Offset parameter of rebuild_zebra.pl
19 -l | --length Length parameter of rebuild_zebra.pl
20 -s | --chunks-size Initial chunk size (number of records indexed at once)
21 -d | --export-dir Where rebuild_zebra.pl will export data
22 -L | --log-dir Log directory
23 -r | --remove-logs Clean log directory before start
24 -t | --type Record type ('biblios' or 'authorities')
25 -f | --force Don't ask for confirmation before start
26 -h | --help Display this help message
34 if [ $chunkssize -lt 1 ]; then
35 echo "Fail on file $file"
38 local prefix="${file}_${chunkssize}_"
39 echo "Splitting file in chunks of $chunkssize records"
40 YAZMARCDUMP_CMD="$YAZMARCDUMP -n -s $prefix -C $chunkssize $file"
43 dir=$(dirname $prefix)
44 local files="$(find $dir -regex $prefix[0-9]+ | sort | tr '\n' ' ')"
45 for chunkfile in $files; do
46 echo "Indexing $chunkfile"
47 size=$($YAZMARCDUMP -p $chunkfile | grep '<!-- Record [0-9]\+ offset .* -->' | wc -l)
48 logfile="$LOGDIR/zebraidx.$(basename $chunkfile).log"
49 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 update $chunkfile"
50 $ZEBRAIDX_CMD >$logfile 2>&1
51 grep "Records: $size" $logfile >/dev/null 2>&1
53 echo "Indexing failed. Split file and continue..."
54 indexfile $chunkfile $(($chunkssize/2))
56 ZEBRAIDX_CMD="$ZEBRAIDX -c $CONFIGFILE -d $TYPE -g iso2709 commit"
57 $ZEBRAIDX_CMD >> $logfile 2>&1
66 EXPORTDIR=/tmp/rebuild/export
67 LOGDIR=/tmp/rebuild/logs
116 if [ $HELP = "yes" ]; then
121 if [ -z $LENGTH ]; then
122 echo "--length parameter is mandatory"
135 echo "'$TYPE' is an unknown type. Defaulting to 'biblios'"
140 ZEBRAIDX=`which zebraidx`
141 if [ -z $ZEBRAIDX ]; then
142 echo "zebraidx not found"
146 YAZMARCDUMP=`which yaz-marcdump`
147 if [ -z $YAZMARCDUMP ]; then
148 echo "yaz-marcdump not found"
152 REBUILDZEBRA="`dirname $0`/rebuild_zebra.pl"
153 if [ ! -f $REBUILDZEBRA ]; then
154 echo "$REBUILDZEBRA: file not found"
160 echo "========================================================================="
161 echo "Start at offset: $OFFSET"
162 echo "Total number of records to index: $LENGTH"
163 echo "Initial chunk size: $CHUNKSSIZE"
164 echo "Export directory: $EXPORTDIR"
165 echo "Log directory: $LOGDIR"
166 echo "Remove logs before start? $RMLOGS"
167 echo "Type of record: $TYPE"
168 echo "-------------------------------------------------------------------------"
169 echo "zebraidx path: $ZEBRAIDX"
170 echo "yaz-marcdump path: $YAZMARCDUMP"
171 echo "rebuild_zebra path: $REBUILDZEBRA"
172 echo "========================================================================="
174 if [ $NOCONFIRM != "yes" ]; then
176 echo -n "Confirm ? [Y/n] "
178 if [ $response ] && [ $response != "yes" ] && [ $response != "y" ]; then
182 if [ $confirm = "n" ]; then
188 if [ $? -ne 0 ]; then
189 echo "Failed to create directory $EXPORTDIR. Aborting."
194 if [ $? -ne 0 ]; then
195 echo "Failed to create directory $LOGDIR. Aborting."
199 if [ $RMLOGS = "yes" ]; then
203 REBUILDZEBRA_CMD="$REBUILDZEBRA $TYPESWITCH -v -k -d $EXPORTDIR --offset $OFFSET --length $LENGTH --skip-index"
204 echo "\n$REBUILDZEBRA_CMD"
210 EXPORTFILE="$EXPORTDIR/biblio/exported_records"
213 EXPORTFILE="$EXPORTDIR/authority/exported_records"
216 echo "Error: TYPE '$TYPE' is not supported"
220 CONFIGFILE="$(dirname $KOHA_CONF)/zebradb/zebra-$TYPE.cfg"
223 indexfile $EXPORTFILE $CHUNKSSIZE