X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=C4%2FCharset.pm;h=2968b5ee1a4d7653d2e85b644be5ed96b18b2538;hb=9c5e40e4923179bf0b2b630d3e09797dc4c1fdb0;hp=a4e6b716f8e5b44d251c0cebb1475e827ec9ac88;hpb=aef1dd15fbe37a8a9c30ba4b38f7ecd6c1fea54d;p=koha.git diff --git a/C4/Charset.pm b/C4/Charset.pm index a4e6b716f8..2968b5ee1a 100644 --- a/C4/Charset.pm +++ b/C4/Charset.pm @@ -29,7 +29,7 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); BEGIN { # set the version for version checking - $VERSION = 3.01; + $VERSION = 3.07.00.049; require Exporter; @ISA = qw(Exporter); @EXPORT = qw( @@ -39,6 +39,7 @@ BEGIN { SetUTF8Flag SetMarcUnicodeFlag StripNonXmlChars + nsb_clean ); } @@ -130,23 +131,26 @@ But since it handles charset, and MARC::Record, it finds its way in that package =cut sub SetUTF8Flag{ - my ($record, $nfd)=@_; - return unless ($record && $record->fields()); - foreach my $field ($record->fields()){ - if ($field->tag()>=10){ - my @subfields; - foreach my $subfield ($field->subfields()){ - push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd)); - } - my $newfield=MARC::Field->new( - $field->tag(), - $field->indicator(1), - $field->indicator(2), - @subfields - ); - $field->replace_with($newfield); - } - } + my ($record, $nfd)=@_; + return unless ($record && $record->fields()); + foreach my $field ($record->fields()){ + if ($field->tag()>=10){ + my @subfields; + foreach my $subfield ($field->subfields()){ + push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd)); + } + eval { + my $newfield=MARC::Field->new( + $field->tag(), + $field->indicator(1), + $field->indicator(2), + @subfields + ); + $field->replace_with($newfield); + }; + warn "ERROR occurred in SetUTF8Flag $@" if $@; + } + } } =head2 NormalizeString @@ -323,8 +327,10 @@ sub SetMarcUnicodeFlag { substr($leader, 9, 1) = 'a'; $marc_record->leader($leader); } elsif ($marc_flavour =~/UNIMARC/) { + my $defaultlanguage = C4::Context->preference("UNIMARCField100Language"); + $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3); my $string; - my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22)); + my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25)); $string=$marc_record->subfield( 100, "a" ); if (defined $string && length($string)==$subflength) { $string = substr $string, 0,$subflength if (length($string)>$subflength); @@ -332,9 +338,10 @@ sub SetMarcUnicodeFlag { else { $string = POSIX::strftime( "%Y%m%d", localtime ); $string =~ s/\-//g; - $string = sprintf( "%-*s", $subflength, $string ); + $string = sprintf( "%-*s", $subflength, $string ); + substr ( $string, ($encodingposition - 3), 3, $defaultlanguage); } - substr( $string, $encodingposition, 8, "frey50 " ); + substr( $string, $encodingposition, 3, "y50" ); if ( $marc_record->subfield( 100, "a" ) ) { $marc_record->field('100')->update(a=>$string); } @@ -342,7 +349,7 @@ sub SetMarcUnicodeFlag { $marc_record->insert_grouped_field( MARC::Field->new( 100, '', '', "a" => $string ) ); } - $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 ); + $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 ); } else { warn "Unrecognized marcflavour: $marc_flavour"; } @@ -382,6 +389,40 @@ sub StripNonXmlChars { return $str; } + + +=head2 nsb_clean + +=over 4 + +nsb_clean($string); + +=back + +Removes Non Sorting Block characters + +=cut +sub nsb_clean { + my $NSB = '\x88' ; # NSB : begin Non Sorting Block + my $NSE = '\x89' ; # NSE : Non Sorting Block end + my $NSB2 = '\x98' ; # NSB : begin Non Sorting Block + my $NSE2 = '\x9C' ; # NSE : Non Sorting Block end + my $C2 = '\xC2' ; # What is this char ? It is sometimes left by the regexp after removing NSB / NSE + + # handles non sorting blocks + my ($string) = @_ ; + $_ = $string ; + s/$NSB//g ; + s/$NSE//g ; + s/$NSB2//g ; + s/$NSE2//g ; + s/$C2//g ; + $string = $_ ; + + return($string) ; +} + + =head1 INTERNAL FUNCTIONS =head2 _default_marc21_charconv_to_utf8