my $as_xml;
my $process_zebraqueue;
my $do_not_clear_zebraqueue;
-my $verbose_logging;
+my $length;
+my $where;
+my $offset;
+my $verbose_logging = 0;
my $zebraidx_log_opt = " -v none,fatal,warn ";
my $result = GetOptions(
'd:s' => \$directory,
'x' => \$as_xml,
'y' => \$do_not_clear_zebraqueue,
'z' => \$process_zebraqueue,
- 'v' => \$verbose_logging,
+ 'where:s' => \$where,
+ 'length:i' => \$length,
+ 'offset:i' => \$offset,
+ 'v+' => \$verbose_logging,
);
die $msg;
}
-if ($authorities and $as_xml) {
- my $msg = "Cannot specify both -a and -x\n";
- $msg .= "Please do '$0 --help' to see usage.\n";
- die $msg;
-}
-
if ( !$as_xml and $nosanitize ) {
my $msg = "Cannot specify both -no_xml and -nosanitize\n";
$msg .= "Please do '$0 --help' to see usage.\n";
# -v is for verbose, which seems backwards here because of how logging is set
# on the CLI of zebraidx. It works this way. The default is to not log much
-if ($verbose_logging) {
- $zebraidx_log_opt = '';
+if ($verbose_logging >= 2) {
+ $zebraidx_log_opt = '-v none,fatal,warn,all';
}
my $use_tempdir = 0;
my $authorityserverdir = C4::Context->zebraconfig('authorityserver')->{directory};
my $kohadir = C4::Context->config('intranetdir');
+my $bib_index_mode = C4::Context->config('zebra_bib_index_mode') || 'grs1';
+my $auth_index_mode = C4::Context->config('zebra_auth_index_mode') || 'dom';
+
my $dbh = C4::Context->dbh;
my ($biblionumbertagfield,$biblionumbertagsubfield) = &GetMarcFromKohaField("biblio.biblionumber","");
my ($biblioitemnumbertagfield,$biblioitemnumbertagsubfield) = &GetMarcFromKohaField("biblioitems.biblioitemnumber","");
sub check_zebra_dirs {
my ($base) = shift() . '/';
my $needed_repairing = 0;
- my @dirs = ( '', 'key', 'register', 'shadow' );
+ my @dirs = ( '', 'key', 'register', 'shadow', 'tmp' );
foreach my $dir (@dirs) {
my $bdir = $base . $dir;
if (! -d $bdir) {
}
sub select_all_authorities {
- my $sth = $dbh->prepare("SELECT authid FROM auth_header");
+ my $strsth=qq{SELECT authid FROM auth_header};
+ $strsth.=qq{ WHERE $where } if ($where);
+ $strsth.=qq{ LIMIT $length } if ($length && !$offset);
+ $strsth.=qq{ LIMIT $offset,$length } if ($length && $offset);
+ my $sth = $dbh->prepare($strsth);
$sth->execute();
return $sth;
}
sub select_all_biblios {
- my $sth = $dbh->prepare("SELECT biblionumber FROM biblioitems ORDER BY biblionumber");
+ my $strsth = qq{ SELECT biblionumber FROM biblioitems };
+ $strsth.=qq{ WHERE $where } if ($where);
+ $strsth.=qq{ LIMIT $length } if ($length && !$offset);
+ $strsth.=qq{ LIMIT $offset,$length } if ($offset);
+ my $sth = $dbh->prepare($strsth);
$sth->execute();
return $sth;
}
+sub include_xml_wrapper {
+ my $as_xml = shift;
+ my $record_type = shift;
+
+ return 0 unless $as_xml;
+ return 1 if $record_type eq 'biblio' and $bib_index_mode eq 'dom';
+ return 1 if $record_type eq 'authority' and $auth_index_mode eq 'dom';
+ return 0;
+
+}
+
sub export_marc_records_from_sth {
my ($record_type, $sth, $directory, $as_xml, $noxml, $nosanitize) = @_;
my $num_exported = 0;
- open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ open my $fh, '>:encoding(UTF-8) ', "$directory/exported_records" or die $!;
+ if (include_xml_wrapper($as_xml, $record_type)) {
+ # include XML declaration and root element
+ print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
+ }
my $i = 0;
my ( $itemtag, $itemsubfield ) = GetMarcFromKohaField("items.itemnumber",'');
while (my ($record_number) = $sth->fetchrow_array) {
my @items = GetItemsInfo($record_number);
if (@items){
my $record = MARC::Record->new;
+ $record->encoding('UTF-8');
my @itemsrecord;
foreach my $item (@items){
my $record = Item2Marc($item, $record_number);
push @itemsrecord, $record->field($itemtag);
}
$record->insert_fields_ordered(@itemsrecord);
- my $itemsxml=$record->as_xml_record();
- my $searchstring = '<record>\n';
- my $index = index($itemsxml, '<record>\n', 0);
- $itemsxml = substr($itemsxml, $index + length($searchstring));
- $searchstring = '</record>';
- $marcxml = substr($marcxml, 0, index($marcxml, $searchstring));
- $marcxml .= $itemsxml;
+ my $itemsxml = $record->as_xml_record();
+ $marcxml =
+ substr($marcxml, 0, length($marcxml)-10) .
+ substr($itemsxml, index($itemsxml, "</leader>\n", 0) + 10);
}
}
if ( $marcxml ) {
- print OUT $marcxml if $marcxml;
+ print {$fh} $marcxml if $marcxml;
$num_exported++;
}
next;
}
my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
if (defined $marc) {
- # FIXME - when more than one record is exported and $as_xml is true,
- # the output file is not valid XML - it's just multiple <record> elements
- # strung together with no single root element. zebraidx doesn't seem
- # to care, though, at least if you're using the GRS-1 filter. It does
- # care if you're using the DOM filter, which requires valid XML file(s).
eval {
- print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
+ my $rec;
+ if ($as_xml) {
+ $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
+ } else {
+ $rec = $marc->as_usmarc();
+ }
+ print {$fh} $rec;
$num_exported++;
};
if ($@) {
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- close OUT;
+ print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ close $fh;
return $num_exported;
}
my ($record_type, $entries, $directory, $as_xml, $noxml, $records_deleted) = @_;
my $num_exported = 0;
- open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
+ if (include_xml_wrapper($as_xml, $record_type)) {
+ # include XML declaration and root element
+ print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
+ }
my $i = 0;
# Skip any deleted records. We check for this anyway, but this reduces error spam
print "\r$i" unless ($i++ %100 or !$verbose_logging);
my ($marc) = get_corrected_marc_record($record_type, $record_number, $noxml);
if (defined $marc) {
- # FIXME - when more than one record is exported and $as_xml is true,
- # the output file is not valid XML - it's just multiple <record> elements
- # strung together with no single root element. zebraidx doesn't seem
- # to care, though, at least if you're using the GRS-1 filter. It does
- # care if you're using the DOM filter, which requires valid XML file(s).
- print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference('marcflavour')) : $marc->as_usmarc();
+ eval {
+ my $rec;
+ if ($as_xml) {
+ $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
+ } else {
+ $rec = $marc->as_usmarc();
+ }
+ print {$fh} $rec;
+ $num_exported++;
+ };
+ if ($@) {
+ warn "Error exporting record $record_number ($record_type) ".($noxml ? "not XML" : "XML");
+ }
$num_exported++;
}
}
print "\nRecords exported: $num_exported\n" if ( $verbose_logging );
- close OUT;
+ print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ close $fh;
return $num_exported;
}
my ($record_type, $entries, $directory, $as_xml) = @_;
my $records_deleted = {};
- open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+ open my $fh, '>:encoding(UTF-8)', "$directory/exported_records" or die $!;
+ if (include_xml_wrapper($as_xml, $record_type)) {
+ # include XML declaration and root element
+ print {$fh} '<?xml version="1.0" encoding="UTF-8"?><collection>';
+ }
my $i = 0;
foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
print "\r$i" unless ($i++ %100 or !$verbose_logging);
fix_unimarc_100($marc);
}
- print OUT ($as_xml) ? $marc->as_xml_record(C4::Context->preference("marcflavour")) : $marc->as_usmarc();
+ my $rec;
+ if ($as_xml) {
+ $rec = $marc->as_xml_record(C4::Context->preference('marcflavour'));
+ $rec =~ s!<\?xml version="1.0" encoding="UTF-8"\?>\n!!;
+ } else {
+ $rec = $marc->as_usmarc();
+ }
+ print {$fh} $rec;
$records_deleted->{$record_number} = 1;
}
print "\nRecords exported: $i\n" if ( $verbose_logging );
- close OUT;
+ print {$fh} '</collection>' if (include_xml_wrapper($as_xml, $record_type));
+ close $fh;
return $records_deleted;
if (defined $marc) {
fix_leader($marc);
- if ($record_type eq 'biblio') {
- my $succeeded = fix_biblio_ids($marc, $record_number);
- return unless $succeeded;
- } else {
+ if ($record_type eq 'authority') {
fix_authority_id($marc, $record_number);
}
if (C4::Context->preference("marcflavour") eq "UNIMARC") {
$fetch_sth->finish();
return unless $marc;
} else {
- eval { $marc = GetMarcBiblio($record_number); };
+ eval { $marc = GetMarcBiblio($record_number, 1); };
if ($@ || !$marc) {
# here we do warn since catching an exception
# means that the bib was found but failed
return;
}
}
- # ITEM
- C4::Biblio::EmbedItemsInMarcBiblio($marc, $record_number);
} else {
eval { $marc = GetAuthority($record_number); };
if ($@) {
my $marc = shift;
my $string;
- if ( length($marc->subfield( 100, "a" )) == 35 ) {
+ if ( length($marc->subfield( 100, "a" )) == 36 ) {
$string = $marc->subfield( 100, "a" );
my $f100 = $marc->field(100);
$marc->delete_field($f100);
$string = sprintf( "%-*s", 35, $string );
}
substr( $string, 22, 6, "frey50" );
- unless ( length($marc->subfield( 100, "a" )) == 35 ) {
+ unless ( length($marc->subfield( 100, "a" )) == 36 ) {
$marc->delete_field($marc->field(100));
$marc->insert_grouped_field(MARC::Field->new( 100, "", "", "a" => $string ));
}
-v increase the amount of logging. Normally only
warnings and errors from the indexing are shown.
+ Use log level 2 (-v -v) to include all Zebra logs.
+
+ --length 1234 how many biblio you want to export
+ --offset 1243 offset you want to start to
+ example: --offset 500 --length=500 will result in a LIMIT 500,1000 (exporting 1000 records, starting by the 500th one)
+ note that the numbers are NOT related to biblionumber, that's the intended behaviour.
+ --where let you specify a WHERE query, like itemtype='BOOK'
+ or something like that
- -munge-config Deprecated option to try
+ --munge-config Deprecated option to try
to fix Zebra config files.
--help or -h show this message.
_USAGE_
# AUTHORITIES : copying mandatory files
#
unless (-f C4::Context->zebraconfig('authorityserver')->{config}) {
- open ZD,">:utf8 ",C4::Context->zebraconfig('authorityserver')->{config};
- print ZD "
+ open my $zd, '>:encoding(UTF-8)' ,C4::Context->zebraconfig('authorityserver')->{config};
+ print {$zd} "
# generated by KOHA/misc/migration_tools/rebuild_zebra.pl
profilePath:\${srcdir:-.}:$authorityserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/
# BIBLIOS : copying mandatory files
#
unless (-f C4::Context->zebraconfig('biblioserver')->{config}) {
- open ZD,">:utf8 ",C4::Context->zebraconfig('biblioserver')->{config};
- print ZD "
+ open my $zd, '>:encoding(UTF-8)', C4::Context->zebraconfig('biblioserver')->{config};
+ print {$zd} "
# generated by KOHA/misc/migrtion_tools/rebuild_zebra.pl
profilePath:\${srcdir:-.}:$biblioserverdir/tab/:$tabdir/tab/:\${srcdir:-.}/tab/