Bug 22330: (QA follow-up) Remove duplicate use lines, combine and sort remaning lines

[koha.git] / C4 / Charset.pm
diff --git a/C4/Charset.pm b/C4/Charset.pm

index 6bfd935..4dff14c 100644 (file)
--- a/C4/Charset.pm
+++ b/C4/Charset.pm
@@ -4,33 +4,31 @@ package C4::Charset;
  #
  # This file is part of Koha.
  #
-# Koha is free software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the Free Software
-# Foundation; either version 2 of the License, or (at your option) any later
-# version.
+# Koha is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
  #
-# Koha is distributed in the hope that it will be useful, but WITHOUT ANY
-# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
-# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# Koha is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
  #
-# You should have received a copy of the GNU General Public License along
-# with Koha; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+# You should have received a copy of the GNU General Public License
+# along with Koha; if not, see <http://www.gnu.org/licenses>.
  
  use strict;
  use warnings;
  
  use MARC::Charset qw/marc8_to_utf8/;
  use Text::Iconv;
-use C4::Context;
  use C4::Debug;
  use Unicode::Normalize;
+use Encode qw( decode encode is_utf8 );
  
-use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  
  BEGIN {
-    # set the version for version checking
-    $VERSION = 3.07.00.049;
      require Exporter;
      @ISA    = qw(Exporter);
      @EXPORT = qw(
@@ -111,8 +109,8 @@ will assume that this situation occur does not very often.
  sub IsStringUTF8ish {
      my $str = shift;
  
-    return 1 if utf8::is_utf8($str);
-    return utf8::decode($str);
+    return 1 if Encode::is_utf8($str);
+    return utf8::decode( $str );
  }
  
  =head2 SetUTF8Flag
@@ -180,7 +178,7 @@ Sample code :
  sub NormalizeString{
         my ($string,$nfd,$transform)=@_;
      return $string unless defined($string); # force scalar context return.
-       utf8::decode($string) unless (utf8::is_utf8($string));
+    $string = Encode::decode('UTF-8', $string) unless (Encode::is_utf8($string));
         if ($nfd){
                 $string= NFD($string);
         }
@@ -332,6 +330,7 @@ sub SetMarcUnicodeFlag {
          substr($leader, 9, 1) = 'a';
          $marc_record->leader($leader); 
      } elsif ($marc_flavour =~/UNIMARC/) {
+        require C4::Context;
         my $defaultlanguage = C4::Context->preference("UNIMARCField100Language");
          $defaultlanguage = "fre" if (!$defaultlanguage || length($defaultlanguage) != 3);
          my $string; 
@@ -764,7 +763,7 @@ where the eight bit is set) octet with the Unicode
  replacement character.  This is meant as a last-ditch
  method, and would be best used as part of a UI that
  lets a cataloguer pick various character conversions
-until he or she finds the right one.
+until they find the right one.
  
  =cut
  
@@ -803,6 +802,9 @@ Converts a string from ISO-5426 to UTF-8.
  
  
  my %chars;
+
+####
+## 0xb
  $chars{0xb0}=0x0101;#3/0ayn[ain]
  $chars{0xb1}=0x0623;#3/1alif/hamzah[alefwithhamzaabove]
  #$chars{0xb2}=0x00e0;#'à';
@@ -811,15 +813,47 @@ $chars{0xb2}=0x00e0;#3/2leftlowsinglequotationmark
  $chars{0xb3}=0x00e7;#3/2leftlowsinglequotationmark
  # $chars{0xb4}='è';
  $chars{0xb4}=0x00e8;
-$chars{0xbd}=0x02b9;
-$chars{0xbe}=0x02ba;
  # $chars{0xb5}='é';
  $chars{0xb5}=0x00e9;
+$chars{0xb6}=0x2021; # double dagger
+$chars{0xb7}=0x00b7; # middle dot
+$chars{0xb8}=0x2033; # double prime
+$chars{0xb9}=0x2019; # right single quotation mark
+$chars{0xba}=0x201d; # right double quotation mark
+$chars{0xbb}=0x00bb; # right-pointing double angle quotation mark
+$chars{0xbc}=0x266f; # music sharp sign
+$chars{0xbd}=0x02b9; # modifier letter prime
+$chars{0xbe}=0x02ba; # modifier letter double prime
+$chars{0xbf}=0x00bf; # inverted question mark
+
+####
+## 0xe
+$chars{0xe1}=0x00c6; # latin capital letter ae
+$chars{0xe2}=0x0110; # latin capital letter d with stroke
+$chars{0xe6}=0x0132; # latin capital ligature ij
+$chars{0xe8}=0x0141; # latin capital letter l with stroke
+$chars{0xe9}=0x00d8; # latin capital letter o with stroke
+$chars{0xea}=0x0152; # latin capital ligature oe
+$chars{0xec}=0x00de; # latin capital letter thorn
+
+####
+## 0xf
+$chars{0xf1}=0x00e6; # latin small letter ae
+$chars{0xf2}=0x0111; # latin small letter d with stroke
+$chars{0xf3}=0x00f0; # latin small letter eth
+$chars{0xf5}=0x0131; # latin small letter dotless i
+$chars{0xf6}=0x0133; # latin small ligature ij
+$chars{0xf8}=0x0142; # latin small letter l with stroke
+$chars{0xf9}=0x00f8; # latin small letter o with stroke
+$chars{0xfa}=0x0153; # latin small ligature oe
+$chars{0xfb}=0x00df; # latin small letter sharp s
+$chars{0xfc}=0x00fe; # latin small letter thorn
+
+####
+## Others
  $chars{0x97}=0x003c;#3/2leftlowsinglequotationmark
  $chars{0x98}=0x003e;#3/2leftlowsinglequotationmark
-$chars{0xfa}=0x0153; #oe
-$chars{0xea}=0x0152; #oe
-$chars{0x81d1}=0x00b0;
+#$chars{0x81d1}=0x00b0; # FIXME useless
  
  ####
  ## combined characters iso5426
@@ -1121,8 +1155,8 @@ $chars{0xd375}=0x0173; # small u with ogonek
  $chars{0xd441}=0x1e00; # capital a with ring below
  $chars{0xd461}=0x1e01; # small a with ring below
          # 5/5 half circle below
-$chars{0xf948}=0x1e2a; # capital h with breve below
-$chars{0xf968}=0x1e2b; # small h with breve below
+$chars{0xd548}=0x1e2a; # capital h with breve below
+$chars{0xd568}=0x1e2b; # small h with breve below
          # 5/6 dot below
  $chars{0xd641}=0x1ea0; # capital a with dot below
  $chars{0xd642}=0x1e04; # capital b with dot below