changes to support UTF-8 encoding from
authorDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 29 Sep 2004 17:22:24 +0000 (17:22 +0000)
committerDobrica Pavlinusic <dpavlin@rot13.org>
Wed, 29 Sep 2004 17:22:24 +0000 (17:22 +0000)
SpreadSheet::ParseExcel::FmtDefault.

You will have to modify line 69 from
return pack('C*', unpack('n*', $sTxt));
to following which returns utf-8:
return pack('U*', unpack('n*', $sTxt));

git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@488 13eb9ef6-21d5-0310-b721-a9d68796d827

all2xml.pl
parse_format.pm

index c3388f1..5031206 100755 (executable)
@@ -813,7 +813,11 @@ print STDERR "using: $type...\n";
                        for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
                                my $cell = $oWorksheet->{Cells}[$iR][$iC];
                                if ($cell) {
                        for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
                                my $cell = $oWorksheet->{Cells}[$iR][$iC];
                                if ($cell) {
-                                       $row->{int2col($iC)} = $cell->Value;
+                                       # this conversion is a cludge.
+                                       # Files from Excell could have
+                                       # characters which don't fit into
+                                       # destination encoding.
+                                       $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value;
                                }
                        }
 
                                }
                        }
 
index 6a9bfb8..e348dde 100644 (file)
@@ -194,7 +194,14 @@ sub parse_excel_format {
        my $format = shift;
        my $row = shift;
        my $i = shift;
        my $format = shift;
        my $row = shift;
        my $i = shift;
-       my $codepage = shift;
+       #my $codepage = shift;
+       #
+       # data allready comes in utf-8 due to change in
+       # SpreadSheet::ParseExcel::FmtDefault line 69 from
+       #       return pack('C*', unpack('n*', $sTxt));
+       # to following which returns utf-8:
+       #       return pack('U*', unpack('n*', $sTxt));
+       #
 
        return if ($i > 0);     # Excel doesn't support repeatable fields
 
 
        return if ($i > 0);     # Excel doesn't support repeatable fields
 
@@ -215,9 +222,6 @@ sub parse_excel_format {
 #print STDERR "--$1-> $format -[",length($format),"] ";
                        if ($row->{$1}) {
                                my $tmp = $row->{$1};
 #print STDERR "--$1-> $format -[",length($format),"] ";
                        if ($row->{$1}) {
                                my $tmp = $row->{$1};
-                               if ($codepage) {
-                                       $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert";
-                               }
                                $display .= $prefix . $tmp;
                                $swish .= $tmp." ";
 #print STDERR " == $tmp";
                                $display .= $prefix . $tmp;
                                $swish .= $tmp." ";
 #print STDERR " == $tmp";