# Import an iso2709 file into Koha 3
use strict;
-#use warnings;
+use warnings;
#use diagnostics;
BEGIN {
# find Koha's Perl modules
use C4::Context;
use C4::Biblio;
+use C4::Koha;
use C4::Charset;
use C4::Items;
use Unicode::Normalize;
use IO::File;
binmode(STDOUT, ":utf8");
-
my ( $input_marc_file, $number, $offset) = ('',0,0);
-my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format);
+my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile);
my ($sourcetag,$sourcesubfield,$idmapfl);
$|=1;
'v:s' => \$verbose,
'fk' => \$fk_off,
'm:s' => \$format,
+ 'l:s' => \$logfile,
+ 'k|keepids:s' => \$keepids,
+ 'b|biblios' => \$biblios,
+ 'a|authorities' => \$authorities,
+ 'match=s@' => \$match,
+ 'i|isbn' => \$isbn_check,
'x:s' => \$sourcetag,
'y:s' => \$sourcesubfield,
'idmap:s' => \$idmapfl,
);
+$biblios=!$authorities||$biblios;
if ($version || ($input_marc_file eq '')) {
print <<EOF
n the number of records to import. If missing, all the file is imported
o file offset before importing, ie number of records to skip.
commit the number of records to wait before performing a 'commit' operation
+ l file logs actions done for each record and their status into file
t test mode: parses the file, saying what he would do, but doing nothing.
s skip automatic conversion of MARC-8 to UTF-8. This option is
provided for debugging.
d delete EVERYTHING related to biblio in koha-DB before import. Tables:
biblio, biblioitems, titems
m format, MARCXML or ISO2709 (defaults to ISO2709)
+ keepids field store ids in field (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+ b|biblios type of import : bibliographic records
+ a|authorities type of import : authority records
+ match matchindex,fieldtomatch matchpoint to use to deduplicate
+ fieldtomatch can be either 001 to 999
+ or field and list of subfields as such 100abcde
+ i|isbn if set, a search will be done on isbn, and, if the same isbn is found, the biblio is not added. It's another
+ method to deduplicate.
+ match & i can be both set.
x source bib tag for reporting the source bib number
y source subfield for reporting the source bib number
idmap file for the koha bib and source id
-
+ keepids store ids in 009 (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably)
+ b|biblios type of import : bibliographic records
+ a|authorities type of import : authority records
+ match matchindex,fieldtomatch matchpoint to use to deduplicate
+ fieldtomatch can be either 001 to 999
+ or field and list of subfields as such 100abcde
+ i|isbn if set, a search will be done on isbn, and, if the same isbn is found, the biblio is not added. It's another
+ method to deduplicate.
+ match & i can be both set.
IMPORTANT: don't use this script before you've entered and checked your MARC
parameters tables twice (or more!). Otherwise, the import won't work
correctly and you will get invalid data.
if ($delete) {
- print "deleting biblios\n";
- $dbh->do("truncate biblio");
- $dbh->do("truncate biblioitems");
- $dbh->do("truncate items");
+ if ($biblios){
+ print "deleting biblios\n";
+ $dbh->do("truncate biblio");
+ $dbh->do("truncate biblioitems");
+ $dbh->do("truncate items");
+ }
+ else {
+ print "deleting authorities\n";
+ $dbh->do("truncate auth_header");
+ }
$dbh->do("truncate zebraqueue");
}
# extract the records, not using regexes to look
# for <record>.*</record>.
$MARC::File::XML::_load_args{BinaryEncoding} = 'utf-8';
+ my $recordformat= ($marcFlavour eq "MARC21"?"USMARC":uc($marcFlavour));
+#UNIMARC Authorities have a different way to manage encoding than UNIMARC biblios.
+ $recordformat=$recordformat."AUTH" if ($authorities and $marcFlavour ne "MARC21");
+ $MARC::File::XML::_load_args{RecordFormat} = $recordformat;
$batch = MARC::Batch->new( 'XML', $fh );
} else {
$batch = MARC::Batch->new( 'USMARC', $fh );
$batch->next() while ($offset--);
}
+my ($tagid,$subfieldid);
+if ($authorities){
+ $tagid='001';
+}
+else {
+ ( $tagid, $subfieldid ) =
+ GetMarcFromKohaField( "biblio.biblionumber", '' );
+ $tagid||="001";
+}
+
+# the SQL query to search on isbn
+my $sth_isbn = $dbh->prepare("SELECT biblionumber,biblioitemnumber FROM biblioitems WHERE isbn=?");
+
$dbh->{AutoCommit} = 0;
+my $loghandle;
+if ($logfile){
+ $loghandle= IO::File->new($logfile,"w") ;
+ print $loghandle "id;operation;status\n";
+}
RECORD: while ( ) {
my $record;
+ # get records
eval { $record = $batch->next() };
if ( $@ ) {
print "Bad MARC record: skipped\n";
# C4::Charset::MarcToUTF8Record) because it doesn't use MARC::Batch.
next;
}
+ # skip if we get an empty record (that is MARC valid, but will result in AddBiblio failure
last unless ( $record );
$i++;
print ".";
print "\r$i" unless $i % 100;
+ # transcode the record to UTF8 if needed & applicable.
if ($record->encoding() eq 'MARC-8' and not $skip_marc8_conversion) {
# FIXME update condition
my ($guessed_charset, $charset_errors);
- ($record, $guessed_charset, $charset_errors) = MarcToUTF8Record($record, $marcFlavour);
+ ($record, $guessed_charset, $charset_errors) = MarcToUTF8Record($record, $marcFlavour.(($authorities and $marcFlavour ne "MARC21")?'AUTH':''));
if ($guessed_charset eq 'failed') {
warn "ERROR: failed to perform character conversion for record $i\n";
next RECORD;
}
}
-
- unless ($test_parameter) {
- my ( $biblionumber, $biblioitemnumber, $itemnumbers_ref, $errors_ref );
- eval { ( $biblionumber, $biblioitemnumber ) = AddBiblio($record, '', { defer_marc_save => 1 }) };
- if ( $@ ) {
- warn "ERROR: Adding biblio $biblionumber failed: $@\n";
- next RECORD;
- }
- if (defined $idmapfl) {
- if ($sourcetag lt '010'){
- if ($record->field($sourcetag)){
- my $source = $record->field($sourcetag)->data();
- printf(IDMAP "%s|%s\n",$source,$biblionumber);
+ my $isbn;
+ # remove trailing - in isbn (only for biblios, of course)
+ if ($biblios) {
+ if ($marcFlavour eq 'UNIMARC') {
+ if (my $f010 = $record->field('010')) {
+ $isbn = $f010->subfield('a');
+ $isbn =~ s/-//g;
+ $f010->update('a' => $isbn);
}
- } else {
- my $source=$record->subfield($sourcetag,$sourcesubfield);
- printf(IDMAP "%s|%s\n",$source,$biblionumber);
- }
+ } else {
+ if (my $f020 = $record->field('020')) {
+ $isbn = $f020->subfield('a');
+ $isbn =~ s/-//g;
+ $f020->update('a' => $isbn);
+ }
+ }
+ }
+ my $id;
+ # search for duplicates (based on Local-number)
+ if ($match){
+ require C4::Search;
+ my $query=build_query($match,$record);
+ my $server=($authorities?'authorityserver':'biblioserver');
+ my ($error, $results,$totalhits)=C4::Search::SimpleSearch( $query, 0, 3, [$server] );
+ die "unable to search the database for duplicates : $error" if (defined $error);
+ warn "$query $server : $totalhits";
+ if ($results && scalar(@$results)==1){
+ my $marcrecord = MARC::File::USMARC::decode($results->[0]);
+ $id=GetRecordId($marcrecord,$tagid,$subfieldid);
+ }
+ elsif ($results && scalar(@$results)>1){
+ warn "more than one match for $query";
+ }
+ else {
+ warn "nomatch for $query";
}
-
- eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
- if ( $@ ) {
- warn "ERROR: Adding items to bib $biblionumber failed: $@\n";
- # if we failed because of an exception, assume that
- # the MARC columns in biblioitems were not set.
- ModBiblioMarc( $record, $biblionumber, '' );
- next RECORD;
- }
- if ($#{ $errors_ref } > -1) {
- report_item_errors($biblionumber, $errors_ref);
+ }
+ my $originalid;
+ if ($keepids){
+ $originalid=GetRecordId($record,$tagid,$subfieldid);
+ if ($originalid){
+ my $storeidfield;
+ if (length($keepids)==3){
+ $storeidfield=MARC::Field->new($keepids,$originalid);
+ }
+ else {
+ $storeidfield=MARC::Field->new(substr($keepids,0,3),"","",substr($keepids,3,1),$originalid);
+ }
+ $record->insert_fields_ordered($storeidfield);
+ $record->delete_field($record->field($tagid));
+ }
+ }
+ unless ($test_parameter) {
+ if ($authorities){
+ use C4::AuthoritiesMarc;
+ my $authtypecode=GuessAuthTypeCode($record);
+ my $authid= ($id?$id:GuessAuthId($record));
+ if ($authid && GetAuthority($authid)){
+ ## Authority has an id and is in database : Replace
+ eval { ( $authid ) = ModAuthority($authid,$record, $authtypecode) };
+ if ($@){
+ warn "Problem with authority $authid Cannot Modify";
+ printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ERROR"}) if ($logfile);
+ }
+ else{
+ printlog({id=>$originalid||$id||$authid, op=>"edit",status=>"ok"}) if ($logfile);
+ }
+ }
+ elsif (defined $authid) {
+ ## An authid is defined but no authority in database : add
+ eval { ( $authid ) = AddAuthority($record,$authid, $authtypecode) };
+ if ($@){
+ warn "Problem with authority $authid Cannot Add";
+ printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
+ }
+ else{
+ printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
+ }
+ }
+ else {
+ ## True insert in database
+ eval { ( $authid ) = AddAuthority($record,"", $authtypecode) };
+ if ($@){
+ warn "Problem with authority $authid Cannot Add";
+ printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ERROR"}) if ($logfile);
+ }
+ else{
+ printlog({id=>$originalid||$id||$authid, op=>"insert",status=>"ok"}) if ($logfile);
+ }
+ }
+ }
+ else {
+ my ( $biblionumber, $biblioitemnumber, $itemnumbers_ref, $errors_ref );
+ $biblionumber = $id;
+ # check for duplicate, based on ISBN (skip it if we already have found a duplicate with match parameter
+ if (!$biblionumber && $isbn_check && $isbn) {
+ # warn "search ISBN : $isbn";
+ $sth_isbn->execute($isbn);
+ ($biblionumber,$biblioitemnumber) = $sth_isbn->fetchrow;
+ }
+ if (defined $idmapfl) {
+ if ($sourcetag < "010"){
+ if ($record->field($sourcetag)){
+ my $source = $record->field($sourcetag)->data();
+ printf(IDMAP "%s|%s\n",$source,$biblionumber);
+ }
+ } else {
+ my $source=$record->subfield($sourcetag,$sourcesubfield);
+ printf(IDMAP "%s|%s\n",$source,$biblionumber);
+ }
+ }
+ # create biblio, unless we already have it ( either match or isbn )
+ unless ($biblionumber) {
+ eval { ( $biblionumber, $biblioitemnumber ) = AddBiblio($record, '', { defer_marc_save => 1 }) };
+ }
+ if ( $@ ) {
+ warn "ERROR: Adding biblio $biblionumber failed: $@\n";
+ printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ERROR"}) if ($logfile);
+ next RECORD;
+ }
+ else{
+ printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ok"}) if ($logfile);
+ }
+ eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); };
+ if ( $@ ) {
+ warn "ERROR: Adding items to bib $biblionumber failed: $@\n";
+ printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile);
+ # if we failed because of an exception, assume that
+ # the MARC columns in biblioitems were not set.
+ ModBiblioMarc( $record, $biblionumber, '' );
+ next RECORD;
+ }
+ else{
+ printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ok"}) if ($logfile);
+ }
+ if ($#{ $errors_ref } > -1) {
+ report_item_errors($biblionumber, $errors_ref);
+ }
}
-
$dbh->commit() if (0 == $i % $commitnum);
}
last if $i == $number;
$dbh->commit();
+
if ($fk_off) {
$dbh->do("SET FOREIGN_KEY_CHECKS = 1");
}
my $timeneeded = gettimeofday - $starttime;
print "\n$i MARC records done in $timeneeded seconds\n";
-
+if ($logfile){
+ print $loghandle "file : $input_marc_file\n";
+ print $loghandle "$i MARC records done in $timeneeded seconds\n";
+ $loghandle->close;
+}
exit 0;
+sub GetRecordId{
+ my $marcrecord=shift;
+ my $tag=shift;
+ my $subfield=shift;
+ my $id;
+ if ($tag lt "010"){
+ return $marcrecord->field($tag)->data() if $marcrecord->field($tag);
+ }
+ elsif ($subfield){
+ if ($marcrecord->field($tag)){
+ return $marcrecord->subfield($tag,$subfield);
+ }
+ }
+ return $id;
+}
+sub build_query {
+ my $match = shift;
+ my $record=shift;
+ my @searchstrings;
+ foreach my $matchingpoint (@$match){
+ my $string = build_simplequery($matchingpoint,$record);
+ push @searchstrings,$string if (length($string)>0);
+ }
+ return join(" and ",@searchstrings);
+}
+sub build_simplequery {
+ my $element=shift;
+ my $record=shift;
+ my ($index,$recorddata)=split /,/,$element;
+ my ($tag,$subfields) =($1,$2) if ($recorddata=~/(\d{3})(.*)/);
+ my @searchstrings;
+ foreach my $field ($record->field($tag)){
+ if (length($field->as_string("$subfields"))>0){
+ push @searchstrings,"$index,wrdl=\"".$field->as_string("$subfields")."\"";
+ }
+ }
+ return join(" and ",@searchstrings);
+}
sub report_item_errors {
my $biblionumber = shift;
my $errors_ref = shift;
print $msg, "\n";
}
}
+sub printlog{
+ my $logelements=shift;
+ print $loghandle join (";",@$logelements{qw<id op status>}),"\n";
+}