# Suite 330, Boston, MA 02111-1307 USA
use strict;
+use warnings;
+
use MARC::Charset qw/marc8_to_utf8/;
use Text::Iconv;
+use C4::Debug;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
BEGIN {
# set the version for version checking
- $VERSION = 3.00;
+ $VERSION = 3.01;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(
my $marc = shift;
my $marc_flavour = shift;
my $source_encoding = shift;
-
my $marc_record;
my $marc_blob_is_utf8 = 0;
if (ref($marc) eq 'MARC::Record') {
$marc =~ s/^\s+//;
$marc =~ s/\s+$//;
$marc_blob_is_utf8 = IsStringUTF8ish($marc);
- $marc_record = MARC::Record->new_from_usmarc($marc);
+ eval {
+ $marc_record = MARC::Record->new_from_usmarc($marc);
+ };
+ if ($@) {
+ # if we fail the first time, one likely problem
+ # is that we have a MARC21 record that says that it's
+ # UTF-8 (Leader/09 = 'a') but contains non-UTF-8 characters.
+ # We'll try parsing it again.
+ substr($marc, 9, 1) = ' ';
+ eval {
+ $marc_record = MARC::Record->new_from_usmarc($marc);
+ };
+ if ($@) {
+ # it's hopeless; return an empty MARC::Record
+ return MARC::Record->new(), 'failed', ['could not parse MARC blob'];
+ }
+ }
}
# If we do not know the source encoding, try some guesses
} else {
if ($marc_flavour eq 'MARC21') {
return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
- } elsif ($marc_flavour eq 'UNIMARC') {
+ } elsif ($marc_flavour =~/UNIMARC/) {
return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
} else {
return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
@errors = _marc_iso5426_to_utf8($marc_record, $marc_flavour);
} else {
# assume any other character encoding is for Text::Iconv
- @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, 'iso-8859-1');
+ @errors = _marc_to_utf8_via_text_iconv($marc_record, $marc_flavour, $source_encoding);
}
if (@errors) {
sub SetMarcUnicodeFlag {
my $marc_record = shift;
- my $marc_flavour = shift;
+ my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
$marc_record->encoding('UTF-8');
if ($marc_flavour eq 'MARC21') {
my $leader = $marc_record->leader();
substr($leader, 9, 1) = 'a';
$marc_record->leader($leader);
- } elsif ($marc_flavour eq "UNIMARC") {
- if (my $field = $marc_record->field('100')) {
- my $sfa = $field->subfield('a');
- substr($sfa, 26, 4) = '5050';
- $field->update('a' => $sfa);
+ } elsif ($marc_flavour =~/UNIMARC/) {
+ my $string;
+ my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22));
+ $string=$marc_record->subfield( 100, "a" );
+ if (defined $string && length($string)==$subflength) {
+ $string = substr $string, 0,$subflength if (length($string)>$subflength);
+ }
+ else {
+ $string = POSIX::strftime( "%Y%m%d", localtime );
+ $string =~ s/\-//g;
+ $string = sprintf( "%-*s", $subflength, $string );
+ }
+ substr( $string, $encodingposition, 8, "frey50 " );
+ if ( $marc_record->subfield( 100, "a" ) ) {
+ $marc_record->field('100')->update(a=>$string);
+ }
+ else {
+ $marc_record->insert_grouped_field(
+ MARC::Field->new( 100, '', '', "a" => $string ) );
}
+ $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
+ } else {
+ warn "Unrecognized marcflavour: $marc_flavour";
}
}
sub StripNonXmlChars {
my $str = shift;
+ if (!defined($str) || $str eq ""){
+ return "";
+ }
$str =~ s/[^\x09\x0A\x0D\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]//g;
return $str;
}
=cut
-sub char_decode5426 {
- my ( $string) = @_;
- my $result;
+
my %chars;
$chars{0xb0}=0x0101;#3/0ayn[ain]
$chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
# 5/14 right half of ligature sign
# 5/15 right half of double tilde
# map {printf "%x :%x\n",$_,$chars{$_};}keys %chars;
+
+sub char_decode5426 {
+ my ( $string) = @_;
+ my $result;
+
my @data = unpack("C*", $string);
my @characters;
my $length=scalar(@data);