use C4::Search;
use C4::AuthoritiesMarc::MARC21;
use C4::AuthoritiesMarc::UNIMARC;
+use C4::Charset;
use vars qw($VERSION @ISA @EXPORT);
my $sth=$dbh->prepare("select authtypecode, marcxml from auth_header where authid=?");
$sth->execute($authid);
my ($authtypecode, $marcxml) = $sth->fetchrow;
- my $record=MARC::Record->new_from_xml($marcxml,'UTF-8',(C4::Context->preference("marcflavour") eq "UNIMARC"?"UNIMARCAUTH":C4::Context->preference("marcflavour")));
+ my $record=MARC::Record->new_from_xml(StripNonXmlChars($marcxml),'UTF-8',
+ (C4::Context->preference("marcflavour") eq "UNIMARC"?"UNIMARCAUTH":C4::Context->preference("marcflavour")));
$record->encoding('UTF-8');
if (C4::Context->preference("marcflavour") eq "MARC21") {
my ($auth_type_tag, $auth_type_subfield) = get_auth_type_location($authtypecode);
use C4::Dates qw/format_date/;
use C4::Log; # logaction
use C4::ClassSource;
+use C4::Charset;
use vars qw($VERSION @ISA @EXPORT);
my $sth =
$dbh->prepare("SELECT marcxml FROM biblioitems WHERE biblionumber=? ");
$sth->execute($biblionumber);
- my ($marcxml) = $sth->fetchrow;
+ my $row = $sth->fetchrow_hashref;
+ my $marcxml = StripNonXmlChars($row->{'marcxml'});
MARC::File::XML->default_record_format(C4::Context->preference('marcflavour'));
- $marcxml =~ s/\x1e//g;
- $marcxml =~ s/\x1f//g;
- $marcxml =~ s/\x1d//g;
- $marcxml =~ s/\x0f//g;
- $marcxml =~ s/\x0c//g;
-# warn $marcxml;
my $record = MARC::Record->new();
if ($marcxml) {
$record = eval {MARC::Record::new_from_xml( $marcxml, "utf8", C4::Context->preference('marcflavour'))};
IsStringUTF8ish
MarcToUTF8Record
SetMarcUnicodeFlag
+ StripNonXmlChars
);
}
}
}
+=head2 StripNonXmlChars
+=over 4
+
+my $new_str = StripNonXmlChars($old_str);
+
+=back
+
+Given a string, return a copy with the
+characters that are illegal in XML
+removed.
+
+This function exists to work around a problem
+that can occur with badly-encoded MARC records.
+Specifically, if a UTF-8 MARC record also
+has excape (\x1b) characters, MARC::File::XML
+will let the escape characters pass through
+when as_xml() or as_xml_record() is called. The
+problem is that the escape character is not
+legal in well-formed XML documents, so when
+MARC::File::XML attempts to parse such a record,
+the XML parser will fail.
+
+Stripping such characters will allow a
+MARC::Record->new_from_xml()
+to work, at the possible risk of some data loss.
+
+=cut
+
+sub StripNonXmlChars {
+ my $str = shift;
+ $str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
+ return $str;
+}
=head1 INTERNAL FUNCTIONS
# remove item fields so that they don't get
# added again if record is reverted
- my $old_marc = MARC::Record->new_from_xml($oldxml, 'UTF-8', $rowref->{'encoding'});
+ my $old_marc = MARC::Record->new_from_xml(StripNonXmlChars($oldxml), 'UTF-8', $rowref->{'encoding'});
foreach my $item_field ($old_marc->field($item_tag)) {
$old_marc->delete_field($item_field);
}
$sth->bind_param(1, $import_record_id);
$sth->execute();
while (my $row = $sth->fetchrow_hashref()) {
- my $item_marc = MARC::Record->new_from_xml($row->{'marcxml'}, 'UTF-8', $row->{'encoding'});
+ my $item_marc = MARC::Record->new_from_xml(StripNonXmlChars($row->{'marcxml'}), 'UTF-8', $row->{'encoding'});
# FIXME - duplicate barcode check needs to become part of AddItemFromMarc()
my $item = TransformMarcToKoha($dbh, $item_marc);
my $duplicate_barcode = exists($item->{'barcode'}) && GetItemnumberFromBarcode($item->{'barcode'});
}
} else {
$num_reverted++;
- my $old_record = MARC::Record->new_from_xml($rowref->{'marcxml_old'}, 'UTF-8', $rowref->{'encoding'});
+ my $old_record = MARC::Record->new_from_xml(StripNonXmlChars($rowref->{'marcxml_old'}), 'UTF-8', $rowref->{'encoding'});
my $biblionumber = $rowref->{'matched_biblionumber'};
my ($count, $oldbiblio) = GetBiblio($biblionumber);
$num_items_deleted += BatchRevertItems($rowref->{'import_record_id'}, $rowref->{'matched_biblionumber'});
use C4::Log;
use C4::Branch;
require C4::Reserves;
+use C4::Charset;
use vars qw($VERSION @ISA @EXPORT);
my $xml = shift;
return unless defined $xml and $xml ne "";
- my $marc = MARC::Record->new_from_xml($xml, 'UTF-8', C4::Context->preference("marcflavour"));
+ my $marc = MARC::Record->new_from_xml(StripNonXmlChars($xml), 'UTF-8', C4::Context->preference("marcflavour"));
my $unlinked_subfields = [];
my @fields = $marc->fields();
if ($#fields > -1) {