use XML::LibXML;
use C4::Biblio; #marc2bibtex
use C4::Csv; #marc2csv
-use Text::CSV; #marc2csv
+use C4::Koha; #marc2csv
+use YAML; #marcrecords2csv
+use Text::CSV::Encoded; #marc2csv
use vars qw($VERSION @ISA @EXPORT);
&marc2modsxml
&marc2bibtex
&marc2csv
-
&html2marcxml
&html2marc
&changeEncoding
}
-=head2 marc2csv - Convert from UNIMARC to CSV
+=head2 marcrecords2csv - Convert several records from UNIMARC to CSV
+Pre and postprocessing can be done through a YAML file
=over 4
-my ($csv) = marc2csv($record, $csvprofileid);
+my ($csv) = marcrecords2csv($biblios, $csvprofileid);
Returns a CSV scalar
=over 2
-C<$record> - a MARC::Record object
+C<$biblio> - a list of biblionumbers
C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv)
=back
=cut
+sub marc2csv {
+ my ($biblios, $id) = @_;
+ my $output;
+ my $csv = Text::CSV::Encoded->new();
+
+ # Getting yaml file
+ my $configfile = "../tools/csv-profiles/$id.yaml";
+ my ($preprocess, $postprocess, $fieldprocessing);
+ if (-e $configfile){
+ ($preprocess,$postprocess, $fieldprocessing) = YAML::LoadFile($configfile);
+ }
+ # Preprocessing
+ eval $preprocess if ($preprocess);
-sub marc2csv {
- my ($record, $id, $header) = @_;
+ my $firstpass = 1;
+ foreach my $biblio (@$biblios) {
+ $output .= marcrecord2csv($biblio, $id, $firstpass, $csv, $fieldprocessing) ;
+ $firstpass = 0;
+ }
+
+ # Postprocessing
+ eval $postprocess if ($postprocess);
+
+ return $output;
+}
+
+=head2 marc2csv - Convert a single record from UNIMARC to CSV
+
+=over 4
+
+my ($csv) = marc2csv($biblio, $csvprofileid, $header);
+
+Returns a CSV scalar
+
+=over 2
+
+C<$biblio> - a biblionumber
+
+C<$csvprofileid> - the id of the CSV profile to use for the export (see export_format.export_format_id and the GetCsvProfiles function in C4::Csv)
+
+C<$header> - true if the headers are to be printed (typically at first pass)
+
+C<$csv> - an already initialised Text::CSV object
+
+=back
+
+=back
+
+=cut
+
+
+sub marcrecord2csv {
+ my ($biblio, $id, $header, $csv, $fieldprocessing) = @_;
my $output;
- my $csv = Text::CSV->new();
- # Get the information about the csv profile
- my $marcfieldslist = GetMarcFieldsForCsv($id);
+ # Getting the record
+ my $record = GetMarcBiblio($biblio);
+
+ # Getting the framework
+ my $frameworkcode = GetFrameworkCode($biblio);
+
+ # Getting information about the csv profile
+ my $profile = GetCsvProfile($id);
+
+ # Getting output encoding
+ my $encoding = $profile->{encoding} || 'utf8';
+
+ # Getting separators
+ my $csvseparator = $profile->{csv_separator} || ',';
+ my $fieldseparator = $profile->{field_separator} || '#';
+ my $subfieldseparator = $profile->{subfield_separator} || '|';
+
+ # TODO: Be more generic (in case we have to handle other protected chars or more separators)
+ if ($csvseparator eq '\t') { $csvseparator = "\t" }
+ if ($fieldseparator eq '\t') { $fieldseparator = "\t" }
+ if ($subfieldseparator eq '\t') { $subfieldseparator = "\t" }
+
+ $csv->encoding_out($encoding) if ($encoding ne 'utf8');
+ $csv->sep_char($csvseparator);
+
+ # Getting the marcfields
+ my $marcfieldslist = $profile->{marcfields};
# Getting the marcfields as an array
- my @marcfields = split('\|', $marcfieldslist);
+ my @marcfieldsarray = split('\|', $marcfieldslist);
+
+ # Separating the marcfields from the the user-supplied headers
+ my @marcfields;
+ foreach (@marcfieldsarray) {
+ my @result = split('=', $_);
+ if (scalar(@result) == 2) {
+ push @marcfields, { header => $result[0], field => $result[1] };
+ } else {
+ push @marcfields, { field => $result[0] }
+ }
+ }
# If we have to insert the headers
if ($header) {
my @marcfieldsheaders;
-
my $dbh = C4::Context->dbh;
# For each field or subfield
foreach (@marcfields) {
- # We get the matching tag name
- if (index($_, '$') > 0) {
- my ($fieldtag, $subfieldtag) = split('\$', $_);
- my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?";
- my $sth = $dbh->prepare($query);
- $sth->execute($fieldtag, $subfieldtag);
- my @results = $sth->fetchrow_array();
- push @marcfieldsheaders, @results[0];
+
+ my $field = $_->{field};
+
+ # If we have a user-supplied header, we use it
+ if (exists $_->{header}) {
+ push @marcfieldsheaders, $_->{header};
} else {
- my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?";
- my $sth = $dbh->prepare($query);
- $sth->execute($_);
- my @results = $sth->fetchrow_array();
- push @marcfieldsheaders, @results[0];
+ # If not, we get the matching tag name from koha
+ if (index($field, '$') > 0) {
+ my ($fieldtag, $subfieldtag) = split('\$', $field);
+ my $query = "SELECT liblibrarian FROM marc_subfield_structure WHERE tagfield=? AND tagsubfield=?";
+ my $sth = $dbh->prepare($query);
+ $sth->execute($fieldtag, $subfieldtag);
+ my @results = $sth->fetchrow_array();
+ push @marcfieldsheaders, $results[0];
+ } else {
+ my $query = "SELECT liblibrarian FROM marc_tag_structure WHERE tagfield=?";
+ my $sth = $dbh->prepare($query);
+ $sth->execute($field);
+ my @results = $sth->fetchrow_array();
+ push @marcfieldsheaders, $results[0];
+ }
}
}
$csv->combine(@marcfieldsheaders);
# For each marcfield to export
my @fieldstab;
- foreach my $marcfield (@marcfields) {
+ foreach (@marcfields) {
+ my $marcfield = $_->{field};
# If it is a subfield
if (index($marcfield, '$') > 0) {
my ($fieldtag, $subfieldtag) = split('\$', $marcfield);
# We take every matching subfield
my @subfields = $field->subfield($subfieldtag);
foreach my $subfield (@subfields) {
- push @tmpfields, $subfield;
+
+ # Getting authorised value
+ my $authvalues = GetKohaAuthorisedValuesFromField($fieldtag, $subfieldtag, $frameworkcode, undef);
+ push @tmpfields, (defined $authvalues->{$subfield}) ? $authvalues->{$subfield} : $subfield;
}
}
- push (@fieldstab, join(',', @tmpfields));
+ push (@fieldstab, join($subfieldseparator, @tmpfields));
# Or a field
} else {
my @fields = ($record->field($marcfield));
- push (@fieldstab, join(',', map($_->as_string(), @fields)));
+ my $authvalues = GetKohaAuthorisedValuesFromField($marcfield, undef, $frameworkcode, undef);
+
+ my @valuesarray;
+ foreach (@fields) {
+ my $value;
+
+ # Getting authorised value
+ $value = defined $authvalues->{$_->as_string} ? $authvalues->{$_->as_string} : $_->as_string;
+
+ # Field processing
+ eval $fieldprocessing if ($fieldprocessing);
+
+ push @valuesarray, $value;
+ }
+ push (@fieldstab, join($fieldseparator, @valuesarray));
}
};
return ($error,$newrecord);
}
+=head2 marc2bibtex - Convert from MARC21 and UNIMARC to BibTex
+
+=over 4
+
+my ($bibtex) = marc2bibtex($record, $id);
+
+Returns a BibTex scalar
+
+=over 2
+
+C<$record> - a MARC::Record object
+
+C<$id> - an id for the BibTex record (might be the biblionumber)
+
+=back
+
+=back
+
+=cut
+
+
+sub marc2bibtex {
+ my ($record, $id) = @_;
+ my $tex;
+
+ # Authors
+ my $marcauthors = GetMarcAuthors($record,C4::Context->preference("marcflavour"));
+ my $author;
+ for my $authors ( map { map { @$_ } values %$_ } @$marcauthors ) {
+ $author .= " and " if ($author && $$authors{value});
+ $author .= $$authors{value} if ($$authors{value});
+ }
+
+ # Defining the conversion hash according to the marcflavour
+ my %bh;
+ if (C4::Context->preference("marcflavour") eq "UNIMARC") {
+
+ # FIXME, TODO : handle repeatable fields
+ # TODO : handle more types of documents
+
+ # Unimarc to bibtex hash
+ %bh = (
+
+ # Mandatory
+ author => $author,
+ title => $record->subfield("200", "a") || "",
+ editor => $record->subfield("210", "g") || "",
+ publisher => $record->subfield("210", "c") || "",
+ year => $record->subfield("210", "d") || $record->subfield("210", "h") || "",
+
+ # Optional
+ volume => $record->subfield("200", "v") || "",
+ series => $record->subfield("225", "a") || "",
+ address => $record->subfield("210", "a") || "",
+ edition => $record->subfield("205", "a") || "",
+ note => $record->subfield("300", "a") || "",
+ url => $record->subfield("856", "u") || ""
+ );
+ } else {
+
+ # Marc21 to bibtex hash
+ %bh = (
+
+ # Mandatory
+ author => $author,
+ title => $record->subfield("245", "a") || "",
+ editor => $record->subfield("260", "f") || "",
+ publisher => $record->subfield("260", "b") || "",
+ year => $record->subfield("260", "c") || $record->subfield("260", "g") || "",
+
+ # Optional
+ # unimarc to marc21 specification says not to convert 200$v to marc21
+ series => $record->subfield("490", "a") || "",
+ address => $record->subfield("260", "a") || "",
+ edition => $record->subfield("250", "a") || "",
+ note => $record->subfield("500", "a") || "",
+ url => $record->subfield("856", "u") || ""
+ );
+ }
+
+ $tex .= "\@book{";
+ $tex .= join(",\n", $id, map { $bh{$_} ? qq(\t$_ = "$bh{$_}") : () } keys %bh);
+ $tex .= "\n}\n";
+
+ return $tex;
+}
+
+
=head1 INTERNAL FUNCTIONS
=head2 _entity_encode - Entity-encode an array of strings