X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=misc%2Fstage_file.pl;h=ad0b7267aba607af8e7dca3a7676e2330e7796a6;hb=7b5c8cbf079725381341cea5494717d58735ca9b;hp=4ce16b59e15d2ad493c527004280bb769d0c4ab0;hpb=7ad5e203da0a7aa5436366b0735ea24dc0ac2d41;p=koha.git diff --git a/misc/stage_file.pl b/misc/stage_file.pl index 4ce16b59e1..ad0b7267ab 100755 --- a/misc/stage_file.pl +++ b/misc/stage_file.pl @@ -37,21 +37,27 @@ $| = 1; # command-line parameters my $record_type = "biblio"; -my $encoding = ""; +my $encoding = "UTF-8"; my $authorities = 0; my $match = 0; my $add_items = 0; my $input_file = ""; my $batch_comment = ""; my $want_help = 0; -my $no_replace ; +my $no_replace; +my $format = 'ISO2709'; +my $no_create; +my $item_action = 'always_add'; my $result = GetOptions( 'encoding:s' => \$encoding, 'file:s' => \$input_file, + 'format:s' => \$format, 'match|match-bibs:s' => \$match, 'add-items' => \$add_items, + 'item-action:s' => \$item_action, 'no-replace' => \$no_replace, + 'no-create' => \$no_create, 'comment:s' => \$batch_comment, 'authorities' => \$authorities, 'h|help' => \$want_help @@ -59,14 +65,15 @@ my $result = GetOptions( $record_type = 'auth' if ($authorities); -if ($encoding eq "") { - $encoding = "utf8"; -} - if (not $result or $input_file eq "" or $want_help) { print_usage(); exit 0; } +if ( $format !~ /^(MARCXML|ISO2709)$/i ) { + print "\n --format must be MARCXML or ISO2709\n"; + print_usage(); + exit 0; +} unless (-r $input_file) { die "$0: cannot open input file $input_file: $!\n"; @@ -74,40 +81,50 @@ unless (-r $input_file) { my $dbh = C4::Context->dbh; $dbh->{AutoCommit} = 0; -process_batch($input_file, $record_type, $match, $add_items, $batch_comment); +process_batch({ + format => $format, + input_file => $input_file, + record_type => $record_type, + match => $match, + add_items => $add_items, + batch_comment => $batch_comment, + encoding => $encoding, + no_replace => $no_replace, + no_create => $no_create, + item_action => $item_action, +}); $dbh->commit(); exit 0; sub process_batch { - my ($input_file, $record_type, $match, $add_items, $batch_comment) = @_; - - open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n"; - my $marc_records = ""; - $/ = "\035"; - my $num_input_records = 0; - while () { - s/^\s+//; - s/\s+$//; - next unless $_; # skip if record has only whitespace, as might occur - # if file includes newlines between each MARC record - $marc_records .= $_; # FIXME - this sort of string concatenation - # is probably rather inefficient - $num_input_records++; + my ( $params ) = @_; #Possible params are: format input_file record_type match add_items batch_comment encoding no_replace no_create item_action + my $format = $params->{format} // ''; + my $record_type = $params->{record_type} // 'biblio'; + + my ( $errors, $marc_records ); + if( $format eq 'ISO2709' ) { + ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromISO2709File( + $params->{input_file}, $record_type, $params->{encoding} ); + } elsif( $format eq 'MARCXML' ) { + ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromMARCXMLFile( + $params->{input_file}, $params->{encoding} ); } - close IN; + warn ( join ',', @$errors ) if @$errors; + my $num_input_records = ($marc_records) ? scalar(@$marc_records) : 0; print "... staging MARC records -- please wait\n"; + #FIXME: We should really allow the use of marc modification frameworks and to_marc plugins here if possible my ($batch_id, $num_valid_records, $num_items, @import_errors) = - BatchStageMarcRecords($record_type, $encoding, $marc_records, $input_file, $batch_comment, '', $add_items, 0, + BatchStageMarcRecords($record_type, $params->{encoding}, $marc_records, $params->{input_file}, undef, undef, $params->{batch_comment}, '', $params->{add_items}, 0, 100, \&print_progress_and_commit); print "... finished staging MARC records\n"; my $num_with_matches = 0; - if ($match) { - my $matcher = C4::Matcher->fetch($match) ; + if ( $params->{match} ) { + my $matcher = C4::Matcher->fetch( $params->{match} ); if (defined $matcher) { - SetImportBatchMatcher($batch_id, $match); + SetImportBatchMatcher( $batch_id, $params->{match} ); } elsif ($record_type eq 'biblio') { $matcher = C4::Matcher->new($record_type); $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, ''); @@ -115,9 +132,9 @@ sub process_batch { '245', 'a', -1, 0, ''); } # set default record overlay behavior - SetImportBatchOverlayAction($batch_id, ($no_replace) ? 'ignore' : 'replace'); - SetImportBatchNoMatchAction($batch_id, 'create_new'); - SetImportBatchItemAction($batch_id, 'always_add'); + SetImportBatchOverlayAction( $batch_id, $params->{no_replace} ? 'ignore' : 'replace' ); + SetImportBatchNoMatchAction( $batch_id, $params->{no_create} ? 'ignore' : 'create_new' ); + SetImportBatchItemAction( $batch_id, $params->{item_action} ); print "... looking for matches with records already in database\n"; $num_with_matches = BatchFindDuplicates($batch_id, $matcher, 10, 100, \&print_progress_and_commit); print "... finished looking for matches\n"; @@ -128,19 +145,19 @@ sub process_batch { MARC record staging report ------------------------------------ -Input file: $input_file +Input file: $params->{input_file} Record type: $record_type Number of input records: $num_input_records Number of valid records: $num_valid_records Number of invalid records: $num_invalid_records _SUMMARY_ - if ($match) { + if( $params->{match} ) { print "Number of records matched: $num_with_matches\n"; } else { print "Incoming records not matched against existing records (--match option not supplied)\n"; } if ($record_type eq 'biblio') { - if ($add_items) { + if ( $params->{add_items} ) { print "Number of items parsed: $num_items\n"; } else { print "No items parsed (--add-items option not supplied)\n"; @@ -174,9 +191,12 @@ records into the main Koha database. Parameters: --file name of input MARC bib file --authorities stage authority records instead of bibs - --encoding encoding of MARC records, default is utf8. + --encoding encoding of MARC records, default is UTF-8. Other possible options are: MARC-8, ISO_5426, ISO_6937, ISO_8859-1, EUC-KR + --format The MARC transport format to use? + Defaults to ISO2709. + Available values, MARCXML, ISO2709. --match use this option to match records in the file with records already in the database for future overlay. @@ -186,8 +206,14 @@ Parameters: --add-items use this option to specify that item data is embedded in the MARC bibs and should be parsed. + --item-action action to take if --add-items is specifed; + choices are 'always_add', + 'add_only_for_matches', 'add_only_for_new', + 'ignore', or 'replace' --no-replace overlay action for record: default is to replace extant with the imported record. + --no-create nomatch action for record: default is to + create new record with imported record. --comment optional comment to describe the record batch; if the comment has spaces in it, surround the