X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Fmigration_tools%2Fbulkmarcimport.pl;h=a9fee95d1489bb3230f9a6a8174eea4ac4d683ab;hb=b2b16fb8cf4bf89862eba6fc14d507dc79de4db7;hp=2f18d5266e1a2159ea29f289cb11191c3dd1ee6f;hpb=12ac5e3d8908254d0b4c1d4399b895722821ea19;p=koha.git diff --git a/misc/migration_tools/bulkmarcimport.pl b/misc/migration_tools/bulkmarcimport.pl index 2f18d5266e..a9fee95d14 100755 --- a/misc/migration_tools/bulkmarcimport.pl +++ b/misc/migration_tools/bulkmarcimport.pl @@ -30,10 +30,11 @@ use Getopt::Long; use IO::File; use Pod::Usage; -binmode(STDOUT, ":utf8"); +binmode STDOUT, ':encoding(UTF-8)'; my ( $input_marc_file, $number, $offset) = ('',0,0); my ($version, $delete, $test_parameter, $skip_marc8_conversion, $char_encoding, $verbose, $commit, $fk_off,$format,$biblios,$authorities,$keepids,$match, $isbn_check, $logfile); -my ($sourcetag,$sourcesubfield,$idmapfl); +my $cleanisbn = 1; +my ($sourcetag,$sourcesubfield,$idmapfl, $dedup_barcode); $|=1; @@ -59,6 +60,8 @@ GetOptions( 'x:s' => \$sourcetag, 'y:s' => \$sourcesubfield, 'idmap:s' => \$idmapfl, + 'cleanisbn!' => \$cleanisbn, + 'dedupbarcode' => \$dedup_barcode, ); $biblios=!$authorities||$biblios; @@ -176,7 +179,6 @@ RECORD: while ( ) { # from because we don't have access to the original blob. Note # that the staging import can deal with this condition (via # C4::Charset::MarcToUTF8Record) because it doesn't use MARC::Batch. - $i++; next; } # skip if we get an empty record (that is MARC valid, but will result in AddBiblio failure @@ -197,19 +199,13 @@ RECORD: while ( ) { } my $isbn; # remove trailing - in isbn (only for biblios, of course) - if ($biblios) { - if ($marcFlavour eq 'UNIMARC') { - if (my $f010 = $record->field('010')) { - $isbn = $f010->subfield('a'); - $isbn =~ s/-//g; - $f010->update('a' => $isbn); - } - } else { - if (my $f020 = $record->field('020')) { - $isbn = $f020->subfield('a'); - $isbn =~ s/-//g; - $f020->update('a' => $isbn); - } + if ($biblios && $cleanisbn) { + my $tag = $marcFlavour eq 'UNIMARC' ? '010' : '020'; + my $field = $record->field($tag); + my $isbn = $field && $field->subfield('a'); + if ( $isbn ) { + $isbn =~ s/-//g; + $field->update('a' => $isbn); } } my $id; @@ -221,15 +217,15 @@ RECORD: while ( ) { my ($error, $results,$totalhits)=C4::Search::SimpleSearch( $query, 0, 3, [$server] ); die "unable to search the database for duplicates : $error" if (defined $error); #warn "$query $server : $totalhits"; - if ($results && scalar(@$results)==1){ + if ( @{$results} == 1 ){ my $marcrecord = MARC::File::USMARC::decode($results->[0]); $id=GetRecordId($marcrecord,$tagid,$subfieldid); } - elsif ($results && scalar(@$results)>1){ - $debug && warn "more than one match for $query"; + elsif ( @{$results} > 1){ + $debug && warn "more than one match for $query"; } else { - $debug && warn "nomatch for $query"; + $debug && warn "nomatch for $query"; } } my $originalid; @@ -323,18 +319,67 @@ RECORD: while ( ) { printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ok"}) if ($logfile); } eval { ( $itemnumbers_ref, $errors_ref ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); }; - if ( $@ ) { - warn "ERROR: Adding items to bib $biblionumber failed: $@\n"; + my $error_adding = $@; + # Work on a clone so that if there are real errors, we can maybe + # fix them up later. + my $clone_record = $record->clone(); + C4::Biblio::_strip_item_fields($clone_record, ''); + # This sets the marc fields if there was an error, and also calls + # defer_marc_save. + ModBiblioMarc( $clone_record, $biblionumber, '' ); + if ( $error_adding ) { + warn "ERROR: Adding items to bib $biblionumber failed: $error_adding"; printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile); # if we failed because of an exception, assume that # the MARC columns in biblioitems were not set. - ModBiblioMarc( $record, $biblionumber, '' ); next RECORD; - } + } else{ printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ok"}) if ($logfile); } - if ($#{ $errors_ref } > -1) { + if ($dedup_barcode && grep { exists $_->{error_code} && $_->{error_code} eq 'duplicate_barcode' } @$errors_ref) { + # Find the record called 'barcode' + my ($tag, $sub) = C4::Biblio::GetMarcFromKohaField('items.barcode', ''); + # Now remove any items that didn't have a duplicate_barcode error, + # erase the barcodes on items that did, and re-add those items. + my %dupes; + foreach my $i (0 .. $#{$errors_ref}) { + my $ref = $errors_ref->[$i]; + if ($ref && ($ref->{error_code} eq 'duplicate_barcode')) { + $dupes{$ref->{item_sequence}} = 1; + # Delete the error message because we're going to + # retry this one. + delete $errors_ref->[$i]; + } + } + my $seq = 0; + foreach my $field ($record->field($tag)) { + $seq++; + if ($dupes{$seq}) { + # Here we remove the barcode + $field->delete_subfield(code => $sub); + } else { + # otherwise we delete the field because we don't want + # two of them + $record->delete_fields($field); + } + } + # Now re-add the record as before, adding errors to the prev list + my $more_errors; + eval { ( $itemnumbers_ref, $more_errors ) = AddItemBatchFromMarc( $record, $biblionumber, $biblioitemnumber, '' ); }; + if ( $@ ) { + warn "ERROR: Adding items to bib $biblionumber failed: $@\n"; + printlog({id=>$id||$originalid||$biblionumber, op=>"insertitem",status=>"ERROR"}) if ($logfile); + # if we failed because of an exception, assume that + # the MARC columns in biblioitems were not set. + ModBiblioMarc( $record, $biblionumber, '' ); + next RECORD; + } else { + printlog({id=>$id||$originalid||$biblionumber, op=>"insert",status=>"ok"}) if ($logfile); + } + push @$errors_ref, @{ $more_errors }; + } + if ($#{ $errors_ref } > -1) { report_item_errors($biblionumber, $errors_ref); } } @@ -405,6 +450,7 @@ sub report_item_errors { my $errors_ref = shift; foreach my $error (@{ $errors_ref }) { + next if !$error; my $msg = "Item not added (bib $biblionumber, item tag #$error->{'item_sequence'}, barcode $error->{'item_barcode'}): "; my $error_code = $error->{'error_code'}; $error_code =~ s/_/ /g; @@ -518,6 +564,11 @@ If set, a search will be done on isbn, and, if the same isbn is found, the biblio is not added. It's another method to deduplicate. B<-match> & B<-isbn> can be both set. +=item B<-cleanisbn> + +Clean ISBN fields from entering biblio records, ie removes hyphens. By default, +ISBN are cleaned. --nocleanisbn will keep ISBN unchanged. + =item B<-x>=I Source bib I for reporting the source bib number @@ -536,6 +587,13 @@ Store ids in 009 (usefull for authorities, where 001 contains the authid for Koha, that can contain a very valuable info for authorities coming from LOC or BNF. useless for biblios probably) +=item B<-dedupbarcode> + +If set, whenever a duplicate barcode is detected, it is removed and the attempt +to add the record is retried, thereby giving the record a blank barcode. This +is useful when something has set barcodes to be a biblio ID, or similar +(usually other software.) + =back =cut