7 use File::Temp qw/ tempdir /;
10 use C4::AuthoritiesMarc;
13 # script that checks zebradir structure & create directories & mandatory files if needed
17 $|=1; # flushes output
19 # limit for database dumping
31 my $result = GetOptions(
39 'munge-config' => \$do_munge,
41 'h|help' => \$want_help,
46 if (not $result or $want_help) {
51 if (not $biblios and not $authorities) {
52 my $msg = "Must specify -b or -a to reindex bibs or authorites\n";
53 $msg .= "Please do '$0 --help' to see usage.\n";
63 $directory = tempdir(CLEANUP => ($keep_export ? 0 : 1));
67 my $biblioserverdir = C4::Context->zebraconfig('biblioserver')->{directory};
68 my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
70 my $kohadir = C4::Context->config('intranetdir');
71 my $dbh = C4::Context->dbh;
72 my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
73 my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
75 print "Zebra configuration information\n";
76 print "================================\n";
77 print "Zebra biblio directory = $biblioserverdir\n";
78 print "Zebra authorities directory = $authorityserverdir\n";
79 print "Koha directory = $kohadir\n";
80 print "BIBLIONUMBER in : $biblionumbertagfield\$$biblionumbertagsubfield\n";
81 print "BIBLIOITEMNUMBER in : $biblioitemnumbertagfield\$$biblioitemnumbertagsubfield\n";
82 print "================================\n";
90 # exporting authorities
93 print "====================\n";
94 print "SKIPPING authorities export\n";
95 print "====================\n";
97 print "====================\n";
98 print "exporting authorities\n";
99 print "====================\n";
100 mkdir "$directory" unless (-d $directory);
101 mkdir "$directory/authorities" unless (-d "$directory/authorities");
102 my $dbh=C4::Context->dbh;
104 $sth=$dbh->prepare("select authid,marc from auth_header");
106 export_marc_records('authority', $sth, "$directory/authorities", $as_xml, $noxml);
110 # and reindexing everything
112 print "====================\n";
113 print "REINDEXING zebra\n";
114 print "====================\n";
115 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
116 do_indexing('authority', 'update', "$directory/authorities", $reset, $noshadow, $record_fmt);
118 print "skipping authorities\n";
120 #################################################################################################################
122 #################################################################################################################
129 print "====================\n";
130 print "SKIPPING biblio export\n";
131 print "====================\n";
133 print "====================\n";
134 print "exporting biblios\n";
135 print "====================\n";
136 mkdir "$directory" unless (-d $directory);
137 mkdir "$directory/biblios" unless (-d "$directory/biblios");
138 my $dbh=C4::Context->dbh;
139 my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
141 export_marc_records('biblio', $sth, "$directory/biblios", $as_xml, $noxml);
145 # and reindexing everything
147 print "====================\n";
148 print "REINDEXING zebra\n";
149 print "====================\n";
150 my $record_fmt = ($as_xml) ? 'marcxml' : 'iso2709' ;
151 do_indexing('biblio', 'update', "$directory/biblios", $reset, $noshadow, $record_fmt);
153 print "skipping biblios\n";
156 print "====================\n";
158 print "====================\n";
160 print "NOTHING cleaned : the export $directory has been kept.\n";
161 print "You can re-run this script with the -s ";
163 print " and -d $directory parameters";
168 print "if you just want to rebuild zebra after changing the record.abs\n";
169 print "or another zebra config file\n";
171 unless ($use_tempdir) {
172 # if we're using a temporary directory
173 # created by File::Temp, it will be removed
175 rmtree($directory, 0, 1);
176 print "directory $directory deleted\n";
180 sub export_marc_records {
181 my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
183 open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
185 while (my ($record_number) = $sth->fetchrow_array) {
187 print "\r$i" unless ($i++ %100);
188 my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
190 # FIXME - when more than one record is exported and $as_xml is true,
191 # the output file is not valid XML - it's just multiple <record> elements
192 # strung together with no single root element. zebraidx doesn't seem
193 # to care, though, at least if you're using the GRS-1 filter. It does
194 # care if you're using the DOM filter, which requires valid XML file(s).
195 print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
198 print "\nRecords exported: $i\n";
202 sub get_corrected_marc_record {
203 my ($record_type, $record_number, $noxml) = @_;
205 my $marc = get_raw_marc_record($record_type, $record_number, $noxml);
209 if ($record_type eq 'biblio') {
210 my $succeeded = fix_biblio_ids($marc, $record_number);
211 return unless $succeeded;
213 fix_authority_id($marc, $record_number);
215 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
216 fix_unimarc_100($marc);
223 sub get_raw_marc_record {
224 my ($record_type, $record_number, $noxml) = @_;
227 if ($record_type eq 'biblio') {
229 my $fetch_sth = $dbh->prepare_cached("SELECT marc FROM biblioitems WHERE biblionumber = ?");
230 $fetch_sth->execute($record_number);
231 if (my ($blob) = $fetch_sth->fetchrow_array) {
232 $marc = MARC::Record->new_from_usmarc($blob);
234 warn "failed to retrieve biblio $record_number";
236 $fetch_sth->finish();
238 eval { $marc = GetMarcBiblio($record_number); };
240 warn "failed to retrieve biblio $record_number";
245 eval { $marc = GetAuthority($record_number); };
247 warn "failed to retrieve authority $record_number";
255 # FIXME - this routine is suspect
256 # It blanks the Leader/00-05 and Leader/12-16 to
257 # force them to be recalculated correct when
258 # the $marc->as_usmarc() or $marc->as_xml() is called.
259 # But why is this necessary? It would be a serious bug
260 # in MARC::Record (definitely) and MARC::File::XML (arguably)
261 # if they are emitting incorrect leader values.
264 my $leader = $marc->leader;
265 substr($leader, 0, 5) = ' ';
266 substr($leader, 10, 7) = '22 ';
267 $marc->leader(substr($leader, 0, 24));
271 # FIXME - it is essential to ensure that the biblionumber is present,
272 # otherwise, Zebra will choke on the record. However, this
273 # logic belongs in the relevant C4::Biblio APIs.
274 my ($marc, $biblionumber) = @_;
276 my $sth = $dbh->prepare(
277 "SELECT biblioitemnumber FROM biblioitems WHERE biblionumber=?");
278 $sth->execute($biblionumber);
279 my ($biblioitemnumber) = $sth->fetchrow_array;
281 unless ($biblioitemnumber) {
282 warn "failed to get biblioitemnumber for biblio $biblionumber";
286 # FIXME - this is cheating on two levels
287 # 1. C4::Biblio::_koha_marc_update_bib_ids is meant to be an internal function
288 # 2. Making sure that the biblionumber and biblioitemnumber are correct and
289 # present in the MARC::Record object ought to be part of GetMarcBiblio.
291 # On the other hand, this better for now than what rebuild_zebra.pl used to
292 # do, which was duplicate the code for inserting the biblionumber
293 # and biblioitemnumber
294 C4::Biblio::_koha_marc_update_bib_ids($marc, '', $biblionumber, $biblioitemnumber);
299 sub fix_authority_id {
300 # FIXME - as with fix_biblio_ids, the authid must be present
301 # for Zebra's sake. However, this really belongs
302 # in C4::AuthoritiesMarc.
303 my ($marc, $authid) = @_;
304 unless ($marc->field('001')->data() eq $authid){
305 print "$authid don't exist for this authority :".$marc->as_formatted;
306 $marc->delete_field($marc->field('001'));
307 $marc->insert_fields_ordered(MARC::Field->new('001',$authid));
311 sub fix_unimarc_100 {
312 # FIXME - again, if this is necessary, it belongs in C4::AuthoritiesMarc.
316 if ( length($marc->subfield( 100, "a" )) == 35 ) {
317 $string = $marc->subfield( 100, "a" );
318 my $f100 = $marc->field(100);
319 $marc->delete_field($f100);
322 $string = POSIX::strftime( "%Y%m%d", localtime );
324 $string = sprintf( "%-*s", 35, $string );
326 substr( $string, 22, 6, "frey50" );
327 unless ( length($marc->subfield( 100, "a" )) == 35 ) {
328 $marc->delete_field($marc->field(100));
329 $marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
334 my ($record_type, $op, $record_dir, $reset_index, $noshadow, $record_format) = @_;
336 my $zebra_server = ($record_type eq 'biblio') ? 'biblioserver' : 'authorityserver';
337 my $zebra_db_name = ($record_type eq 'biblio') ? 'biblios' : 'authorities';
338 my $zebra_config = C4::Context->zebraconfig($zebra_server)->{'config'};
339 my $zebra_db_dir = C4::Context->zebraconfig($zebra_server)->{'directory'};
341 system("zebraidx -c $zebra_config -g $record_format -d $zebra_db_name init") if $reset_index;
342 system("zebraidx -c $zebra_config $noshadow -g $record_format -d $zebra_db_name $op $record_dir");
343 system("zebraidx -c $zebra_config -g $record_format -d $zebra_db_name commit") unless $noshadow;
349 $0: reindex MARC bibs and/or authorities in Zebra.
351 Use this batch job to reindex all biblio or authority
352 records in your Koha database. This job is useful
353 only if you are using Zebra; if you are using the 'NoZebra'
354 mode, this job should not be used.
357 -b index bibliographic records
359 -a index authority records
361 -r clear Zebra index before
362 adding records to index
364 -d Temporary directory for indexing.
365 If not specified, one is automatically
366 created. The export directory
367 is automatically deleted unless
368 you supply the -k switch.
370 -k Do not delete export directory.
372 -s Skip export. Used if you have
373 already exported the records
376 -noxml index from ISO MARC blob
377 instead of MARC XML. This
378 option is recommended only
381 -x export and index as xml instead of is02709 (biblios only).
382 use this if you might have records > 99,999 chars,
384 -w skip shadow indexing for this batch
386 -munge-config Deprecated option to try
387 to fix Zebra config files.
388 --help or -h show this message.
392 # FIXME: the following routines are deprecated and
393 # will be removed once it is determined whether
394 # a script to fix Zebra configuration files is
398 # creating zebra-biblios.cfg depending on system
401 # getting zebraidx directory
403 foreach (qw(/usr/local/bin/zebraidx
412 unless ($zebraidxdir) {
414 ERROR: could not find zebraidx directory
415 ERROR: Either zebra is not installed,
416 ERROR: or it's in a directory I don't checked.
417 ERROR: do a which zebraidx and edit this file to add the result you get
421 $zebraidxdir =~ s/\/bin\/.*//;
422 print "Info : zebra is in $zebraidxdir \n";
424 # getting modules directory
426 foreach (qw(/usr/local/lib/idzebra-2.0/modules/mod-grs-xml.so
427 /usr/local/lib/idzebra/modules/mod-grs-xml.so
428 /usr/lib/idzebra/modules/mod-grs-xml.so
429 /usr/lib/idzebra-2.0/modules/mod-grs-xml.so
436 unless ($modulesdir) {
438 ERROR: could not find mod-grs-xml.so directory
439 ERROR: Either zebra is not properly compiled (libxml2 is not setup and you don t have mod-grs-xml.so,
440 ERROR: or it's in a directory I don't checked.
441 ERROR: find where mod-grs-xml.so is and edit this file to add the result you get
445 $modulesdir =~ s/\/modules\/.*//;
446 print "Info: zebra modules dir : $modulesdir\n";
448 # getting tab directory
450 foreach (qw(/usr/local/share/idzebra/tab/explain.att
451 /usr/local/share/idzebra-2.0/tab/explain.att
452 /usr/share/idzebra/tab/explain.att
453 /usr/share/idzebra-2.0/tab/explain.att
462 ERROR: could not find explain.att directory
463 ERROR: Either zebra is not properly compiled,
464 ERROR: or it's in a directory I don't checked.
465 ERROR: find where explain.att is and edit this file to add the result you get
469 $tabdir =~ s/\/tab\/.*//;
470 print "Info: tab dir : $tabdir\n";
473 # AUTHORITIES creating directory structure
475 my $created_dir_or_file = 0;
477 print "====================\n";
478 print "checking directories & files for authorities\n";
479 print "====================\n";
480 unless (-d "$authorityserverdir") {
481 system("mkdir -p $authorityserverdir");
482 print "Info: created $authorityserverdir\n";
483 $created_dir_or_file++;
485 unless (-d "$authorityserverdir/lock") {
486 mkdir "$authorityserverdir/lock";
487 print "Info: created $authorityserverdir/lock\n";
488 $created_dir_or_file++;
490 unless (-d "$authorityserverdir/register") {
491 mkdir "$authorityserverdir/register";
492 print "Info: created $authorityserverdir/register\n";
493 $created_dir_or_file++;
495 unless (-d "$authorityserverdir/shadow") {
496 mkdir "$authorityserverdir/shadow";
497 print "Info: created $authorityserverdir/shadow\n";
498 $created_dir_or_file++;
500 unless (-d "$authorityserverdir/tab") {
501 mkdir "$authorityserverdir/tab";
502 print "Info: created $authorityserverdir/tab\n";
503 $created_dir_or_file++;
505 unless (-d "$authorityserverdir/key") {
506 mkdir "$authorityserverdir/key";
507 print "Info: created $authorityserverdir/key\n";
508 $created_dir_or_file++;
511 unless (-d "$authorityserverdir/etc") {
512 mkdir "$authorityserverdir/etc";
513 print "Info: created $authorityserverdir/etc\n";
514 $created_dir_or_file++;
518 # AUTHORITIES : copying mandatory files
520 # the record model, depending on marc flavour
521 unless (-f "$authorityserverdir/tab/record.abs") {
522 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
523 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/authorities/record.abs $authorityserverdir/tab/record.abs");
524 print "Info: copied record.abs for UNIMARC\n";
526 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/authorities/record.abs $authorityserverdir/tab/record.abs");
527 print "Info: copied record.abs for USMARC\n";
529 $created_dir_or_file++;
531 unless (-f "$authorityserverdir/tab/sort-string-utf.chr") {
532 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/sort-string-utf.chr");
533 print "Info: copied sort-string-utf.chr\n";
534 $created_dir_or_file++;
536 unless (-f "$authorityserverdir/tab/word-phrase-utf.chr") {
537 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $authorityserverdir/tab/word-phrase-utf.chr");
538 print "Info: copied word-phase-utf.chr\n";
539 $created_dir_or_file++;
541 unless (-f "$authorityserverdir/tab/auth1.att") {
542 system("cp -f $kohadir/etc/zebradb/authorities/etc/bib1.att $authorityserverdir/tab/auth1.att");
543 print "Info: copied auth1.att\n";
544 $created_dir_or_file++;
546 unless (-f "$authorityserverdir/tab/default.idx") {
547 system("cp -f $kohadir/etc/zebradb/etc/default.idx $authorityserverdir/tab/default.idx");
548 print "Info: copied default.idx\n";
549 $created_dir_or_file++;
552 unless (-f "$authorityserverdir/etc/ccl.properties") {
553 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
554 system("cp -f $kohadir/etc/zebradb/ccl.properties $authorityserverdir/etc/ccl.properties");
555 print "Info: copied ccl.properties\n";
556 $created_dir_or_file++;
558 unless (-f "$authorityserverdir/etc/pqf.properties") {
559 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('authorityserver')->{ccl2rpn});
560 system("cp -f $kohadir/etc/zebradb/pqf.properties $authorityserverdir/etc/pqf.properties");
561 print "Info: copied pqf.properties\n";
562 $created_dir_or_file++;
566 # AUTHORITIES : copying mandatory files
568 unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
569 open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
571 # generated by KOHA/misc/migration_tools/rebuild_zebra.pl
572 profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
575 # Files that describe the attribute sets supported.
580 modulePath:$modulesdir/modules/
581 # Specify record type
582 iso2709.recordType:grs.marcxml.record
584 recordId: (auth1,Local-Number)
590 lockDir: $authorityserverdir/lock
593 register: $authorityserverdir/register:4G
594 shadow: $authorityserverdir/shadow:4G
596 # Temp File area for result sets
597 setTmpDir: $authorityserverdir/tmp
599 # Temp File area for index program
600 keyTmpDir: $authorityserverdir/key
602 # Approx. Memory usage during indexing
606 print "Info: creating zebra-authorities.cfg\n";
607 $created_dir_or_file++;
610 if ($created_dir_or_file) {
611 print "Info: created : $created_dir_or_file directories & files\n";
613 print "Info: file & directories OK\n";
618 print "====================\n";
619 print "checking directories & files for biblios\n";
620 print "====================\n";
623 # BIBLIOS : creating directory structure
625 unless (-d "$biblioserverdir") {
626 system("mkdir -p $biblioserverdir");
627 print "Info: created $biblioserverdir\n";
628 $created_dir_or_file++;
630 unless (-d "$biblioserverdir/lock") {
631 mkdir "$biblioserverdir/lock";
632 print "Info: created $biblioserverdir/lock\n";
633 $created_dir_or_file++;
635 unless (-d "$biblioserverdir/register") {
636 mkdir "$biblioserverdir/register";
637 print "Info: created $biblioserverdir/register\n";
638 $created_dir_or_file++;
640 unless (-d "$biblioserverdir/shadow") {
641 mkdir "$biblioserverdir/shadow";
642 print "Info: created $biblioserverdir/shadow\n";
643 $created_dir_or_file++;
645 unless (-d "$biblioserverdir/tab") {
646 mkdir "$biblioserverdir/tab";
647 print "Info: created $biblioserverdir/tab\n";
648 $created_dir_or_file++;
650 unless (-d "$biblioserverdir/key") {
651 mkdir "$biblioserverdir/key";
652 print "Info: created $biblioserverdir/key\n";
653 $created_dir_or_file++;
655 unless (-d "$biblioserverdir/etc") {
656 mkdir "$biblioserverdir/etc";
657 print "Info: created $biblioserverdir/etc\n";
658 $created_dir_or_file++;
662 # BIBLIOS : copying mandatory files
664 # the record model, depending on marc flavour
665 unless (-f "$biblioserverdir/tab/record.abs") {
666 if (C4::Context->preference("marcflavour") eq "UNIMARC") {
667 system("cp -f $kohadir/etc/zebradb/marc_defs/unimarc/biblios/record.abs $biblioserverdir/tab/record.abs");
668 print "Info: copied record.abs for UNIMARC\n";
670 system("cp -f $kohadir/etc/zebradb/marc_defs/marc21/biblios/record.abs $biblioserverdir/tab/record.abs");
671 print "Info: copied record.abs for USMARC\n";
673 $created_dir_or_file++;
675 unless (-f "$biblioserverdir/tab/sort-string-utf.chr") {
676 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/sort-string-utf.chr");
677 print "Info: copied sort-string-utf.chr\n";
678 $created_dir_or_file++;
680 unless (-f "$biblioserverdir/tab/word-phrase-utf.chr") {
681 system("cp -f $kohadir/etc/zebradb/lang_defs/fr/sort-string-utf.chr $biblioserverdir/tab/word-phrase-utf.chr");
682 print "Info: copied word-phase-utf.chr\n";
683 $created_dir_or_file++;
685 unless (-f "$biblioserverdir/tab/bib1.att") {
686 system("cp -f $kohadir/etc/zebradb/biblios/etc/bib1.att $biblioserverdir/tab/bib1.att");
687 print "Info: copied bib1.att\n";
688 $created_dir_or_file++;
690 unless (-f "$biblioserverdir/tab/default.idx") {
691 system("cp -f $kohadir/etc/zebradb/etc/default.idx $biblioserverdir/tab/default.idx");
692 print "Info: copied default.idx\n";
693 $created_dir_or_file++;
695 unless (-f "$biblioserverdir/etc/ccl.properties") {
696 # system("cp -f $kohadir/etc/zebradb/ccl.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
697 system("cp -f $kohadir/etc/zebradb/ccl.properties $biblioserverdir/etc/ccl.properties");
698 print "Info: copied ccl.properties\n";
699 $created_dir_or_file++;
701 unless (-f "$biblioserverdir/etc/pqf.properties") {
702 # system("cp -f $kohadir/etc/zebradb/pqf.properties ".C4::Context->zebraconfig('biblioserver')->{ccl2rpn});
703 system("cp -f $kohadir/etc/zebradb/pqf.properties $biblioserverdir/etc/pqf.properties");
704 print "Info: copied pqf.properties\n";
705 $created_dir_or_file++;
709 # BIBLIOS : copying mandatory files
711 unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
712 open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
714 # generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
715 profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
718 # Files that describe the attribute sets supported.
723 modulePath:$modulesdir/modules/
724 # Specify record type
725 iso2709.recordType:grs.marcxml.record
727 recordId: (bib1,Local-Number)
733 lockDir: $biblioserverdir/lock
736 register: $biblioserverdir/register:4G
737 shadow: $biblioserverdir/shadow:4G
739 # Temp File area for result sets
740 setTmpDir: $biblioserverdir/tmp
742 # Temp File area for index program
743 keyTmpDir: $biblioserverdir/key
745 # Approx. Memory usage during indexing
749 print "Info: creating zebra-biblios.cfg\n";
750 $created_dir_or_file++;
753 if ($created_dir_or_file) {
754 print "Info: created : $created_dir_or_file directories & files\n";
756 print "Info: file & directories OK\n";