Bug 10572: Add phone to message_transport_types table for new installs

[koha.git] / C4 / Charset.pm
diff --git a/C4/Charset.pm b/C4/Charset.pm

index b3b4b72..f8ddd63 100644 (file)
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -29,15 +29,17 @@ use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  
  BEGIN {
      # set the version for version checking
  
  BEGIN {
      # set the version for version checking
-    $VERSION = 3.01;
+    $VERSION = 3.07.00.049;
      require Exporter;
      @ISA    = qw(Exporter);
      @EXPORT = qw(
      require Exporter;
      @ISA    = qw(Exporter);
      @EXPORT = qw(
+        NormalizeString
          IsStringUTF8ish
          MarcToUTF8Record
          SetUTF8Flag
          SetMarcUnicodeFlag
          StripNonXmlChars
          IsStringUTF8ish
          MarcToUTF8Record
          SetUTF8Flag
          SetMarcUnicodeFlag
          StripNonXmlChars
+        nsb_clean
      );
  }
  
      );
  }
  
@@ -111,7 +113,7 @@ sub IsStringUTF8ish {
  
  =head2 SetUTF8Flag
  
  
  =head2 SetUTF8Flag
  
-  my $marc_record = SetUTF8Flag($marc_record);
+  my $marc_record = SetUTF8Flag($marc_record, $nfd);
  
  This function sets the PERL UTF8 flag for data.
  It is required when using new_from_usmarc 
  
  This function sets the PERL UTF8 flag for data.
  It is required when using new_from_usmarc 
@@ -119,6 +121,8 @@ since MARC::File::USMARC does not handle PERL UTF8 setting.
  When editing unicode marc records fields and subfields, you
  would end up in double encoding without using this function. 
  
  When editing unicode marc records fields and subfields, you
  would end up in double encoding without using this function. 
  
+If $nfd is set, string normalization will use NFD instead of NFC
+
  FIXME
  In my opinion, this function belongs to MARC::Record and not
  to this package.
  FIXME
  In my opinion, this function belongs to MARC::Record and not
  to this package.
@@ -127,13 +131,13 @@ But since it handles charset, and MARC::Record, it finds its way in that package
  =cut
  
  sub SetUTF8Flag{
  =cut
  
  sub SetUTF8Flag{
-       my ($record)=@_;
+       my ($record, $nfd)=@_;
         return unless ($record && $record->fields());
         foreach my $field ($record->fields()){
                 if ($field->tag()>=10){
                         my @subfields;
                         foreach my $subfield ($field->subfields()){
         return unless ($record && $record->fields());
         foreach my $field ($record->fields()){
                 if ($field->tag()>=10){
                         my @subfields;
                         foreach my $subfield ($field->subfields()){
-                               push @subfields,($$subfield[0],NormalizeString($$subfield[1]));
+                               push @subfields,($$subfield[0],NormalizeString($$subfield[1],$nfd));
                         }
                         my $newfield=MARC::Field->new(
                                                         $field->tag(),
                         }
                         my $newfield=MARC::Field->new(
                                                         $field->tag(),
@@ -148,27 +152,28 @@ sub SetUTF8Flag{
  
  =head2 NormalizeString
  
  
  =head2 NormalizeString
  
-    my $normalized_string=NormalizeString($string);
+    my $normalized_string=NormalizeString($string,$nfd,$transform);
  
  Given a string
  
  Given a string
-
-nfc : If you want to set NFC and not NFD
+nfd : If you want to set NFD and not NFC
  transform : If you expect all the signs to be removed
  transform : If you expect all the signs to be removed
+
  Sets the PERL UTF8 Flag on your initial data if need be
  Sets the PERL UTF8 Flag on your initial data if need be
-and applies cleaning if required 
+and applies cleaning if required
  
  
-Returns a utf8 NFD normalized string
+Returns a utf8 NFC normalized string
  
  Sample code :
  
  Sample code :
-    my $string=NormalizeString ("l'ornithoptère");
-    #results into ornithoptère in NFD form and sets UTF8 Flag
+   my $string=NormalizeString ("l'ornithoptère");
+   #results into ornithoptère in NFC form and sets UTF8 Flag
  
  =cut
  
  
  =cut
  
+
  sub NormalizeString{
  sub NormalizeString{
-       my ($string,$nfc,$transform)=@_;
+       my ($string,$nfd,$transform)=@_;
         utf8::decode($string) unless (utf8::is_utf8($string));
         utf8::decode($string) unless (utf8::is_utf8($string));
-       if ($nfc){
+       if ($nfd){
                 $string= NFD($string);
         }
         else {
                 $string= NFD($string);
         }
         else {
@@ -246,20 +251,20 @@ sub MarcToUTF8Record {
      # If we do not know the source encoding, try some guesses
      # as follows:
      #   1. Record is UTF-8 already.
      # If we do not know the source encoding, try some guesses
      # as follows:
      #   1. Record is UTF-8 already.
-    #   2. If MARC flavor is MARC21, then
+    #   2. If MARC flavor is MARC21 or NORMARC, then
      #      a. record is MARC-8
      #      b. record is ISO-8859-1
      #   3. If MARC flavor is UNIMARC, then
      if (not defined $source_encoding) {
          if ($marc_blob_is_utf8) {
      #      a. record is MARC-8
      #      b. record is ISO-8859-1
      #   3. If MARC flavor is UNIMARC, then
      if (not defined $source_encoding) {
          if ($marc_blob_is_utf8) {
-            # note that for MARC21 we are not bothering to check
+            # note that for MARC21/NORMARC we are not bothering to check
              # if the Leader/09 is set to 'a' or not -- because
              # of problems with various ILSs (including Koha in the
              # past, alas), this just is not trustworthy.
              SetMarcUnicodeFlag($marc_record, $marc_flavour);
              return $marc_record, 'UTF-8', [];
          } else {
              # if the Leader/09 is set to 'a' or not -- because
              # of problems with various ILSs (including Koha in the
              # past, alas), this just is not trustworthy.
              SetMarcUnicodeFlag($marc_record, $marc_flavour);
              return $marc_record, 'UTF-8', [];
          } else {
-            if ($marc_flavour eq 'MARC21') {
+            if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
                  return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
              } elsif ($marc_flavour =~/UNIMARC/) {
                  return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
                  return _default_marc21_charconv_to_utf8($marc_record, $marc_flavour);
              } elsif ($marc_flavour =~/UNIMARC/) {
                  return _default_unimarc_charconv_to_utf8($marc_record, $marc_flavour);
@@ -314,13 +319,15 @@ sub SetMarcUnicodeFlag {
      my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
  
      $marc_record->encoding('UTF-8');
      my $marc_flavour = shift; # || C4::Context->preference("marcflavour");
  
      $marc_record->encoding('UTF-8');
-    if ($marc_flavour eq 'MARC21') {
+    if ($marc_flavour eq 'MARC21' || $marc_flavour eq 'NORMARC') {
          my $leader = $marc_record->leader();
          substr($leader, 9, 1) = 'a';
          $marc_record->leader($leader); 
      } elsif ($marc_flavour =~/UNIMARC/) {
          my $leader = $marc_record->leader();
          substr($leader, 9, 1) = 'a';
          $marc_record->leader($leader); 
      } elsif ($marc_flavour =~/UNIMARC/) {
+       my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
+        $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
          my $string; 
          my $string; 
-               my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,9):(36,22));
+               my ($subflength,$encodingposition)=($marc_flavour=~/AUTH/?(21,12):(36,25));
                 $string=$marc_record->subfield( 100, "a" );
          if (defined $string && length($string)==$subflength) { 
                         $string = substr $string, 0,$subflength if (length($string)>$subflength);
                 $string=$marc_record->subfield( 100, "a" );
          if (defined $string && length($string)==$subflength) { 
                         $string = substr $string, 0,$subflength if (length($string)>$subflength);
@@ -328,9 +335,10 @@ sub SetMarcUnicodeFlag {
          else { 
              $string = POSIX::strftime( "%Y%m%d", localtime ); 
              $string =~ s/\-//g; 
          else { 
              $string = POSIX::strftime( "%Y%m%d", localtime ); 
              $string =~ s/\-//g; 
-            $string = sprintf( "%-*s", $subflength, $string ); 
+            $string = sprintf( "%-*s", $subflength, $string );
+           substr ( $string, ($encodingposition - 3), 3, $defaultlanguage);
          } 
          } 
-        substr( $string, $encodingposition, 8, "frey50  " ); 
+        substr( $string, $encodingposition, 3, "y50" );
          if ( $marc_record->subfield( 100, "a" ) ) { 
                         $marc_record->field('100')->update(a=>$string);
                 }
          if ( $marc_record->subfield( 100, "a" ) ) { 
                         $marc_record->field('100')->update(a=>$string);
                 }
@@ -338,7 +346,7 @@ sub SetMarcUnicodeFlag {
              $marc_record->insert_grouped_field( 
                  MARC::Field->new( 100, '', '', "a" => $string ) ); 
          }
              $marc_record->insert_grouped_field( 
                  MARC::Field->new( 100, '', '', "a" => $string ) ); 
          }
-               $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 8 );
+               $debug && warn "encodage: ", substr( $marc_record->subfield(100, 'a'), $encodingposition, 3 );
      } else {
          warn "Unrecognized marcflavour: $marc_flavour";
      }
      } else {
          warn "Unrecognized marcflavour: $marc_flavour";
      }
@@ -378,6 +386,40 @@ sub StripNonXmlChars {
      return $str;
  }
  
      return $str;
  }
  
+
+
+=head2 nsb_clean
+
+=over 4
+
+nsb_clean($string);
+
+=back
+
+Removes Non Sorting Block characters
+
+=cut
+sub nsb_clean {
+    my $NSB  = '\x88' ;        # NSB : begin Non Sorting Block
+    my $NSE  = '\x89' ;        # NSE : Non Sorting Block end
+    my $NSB2 = '\x98' ;        # NSB : begin Non Sorting Block
+    my $NSE2 = '\x9C' ;        # NSE : Non Sorting Block end
+    my $C2   = '\xC2' ;        # What is this char ? It is sometimes left by the regexp after removing NSB / NSE
+
+    # handles non sorting blocks
+    my ($string) = @_ ;
+    $_ = $string ;
+    s/$NSB//g ;
+    s/$NSE//g ;
+    s/$NSB2//g ;
+    s/$NSE2//g ;
+    s/$C2//g ;
+    $string = $_ ;
+
+    return($string) ;
+}
+
+
  =head1 INTERNAL FUNCTIONS
  
  =head2 _default_marc21_charconv_to_utf8
  =head1 INTERNAL FUNCTIONS
  
  =head2 _default_marc21_charconv_to_utf8