From fc619af8ede79fde1f53aca67ab9bbbcf84c645f Mon Sep 17 00:00:00 2001 From: Dobrica Pavlinusic Date: Wed, 29 Sep 2004 17:22:24 +0000 Subject: [PATCH] changes to support UTF-8 encoding from SpreadSheet::ParseExcel::FmtDefault. You will have to modify line 69 from return pack('C*', unpack('n*', $sTxt)); to following which returns utf-8: return pack('U*', unpack('n*', $sTxt)); git-svn-id: file:///home/dpavlin/private/svn/webpac/trunk@488 13eb9ef6-21d5-0310-b721-a9d68796d827 --- all2xml.pl | 6 +++++- parse_format.pm | 12 ++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/all2xml.pl b/all2xml.pl index c3388f1..5031206 100755 --- a/all2xml.pl +++ b/all2xml.pl @@ -813,7 +813,11 @@ print STDERR "using: $type...\n"; for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) { my $cell = $oWorksheet->{Cells}[$iR][$iC]; if ($cell) { - $row->{int2col($iC)} = $cell->Value; + # this conversion is a cludge. + # Files from Excell could have + # characters which don't fit into + # destination encoding. + $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value; } } diff --git a/parse_format.pm b/parse_format.pm index 6a9bfb8..e348dde 100644 --- a/parse_format.pm +++ b/parse_format.pm @@ -194,7 +194,14 @@ sub parse_excel_format { my $format = shift; my $row = shift; my $i = shift; - my $codepage = shift; + #my $codepage = shift; + # + # data allready comes in utf-8 due to change in + # SpreadSheet::ParseExcel::FmtDefault line 69 from + # return pack('C*', unpack('n*', $sTxt)); + # to following which returns utf-8: + # return pack('U*', unpack('n*', $sTxt)); + # return if ($i > 0); # Excel doesn't support repeatable fields @@ -215,9 +222,6 @@ sub parse_excel_format { #print STDERR "--$1-> $format -[",length($format),"] "; if ($row->{$1}) { my $tmp = $row->{$1}; - if ($codepage) { - $tmp = $codepage->convert($tmp) || warn "excel: $1 '$tmp' can't convert"; - } $display .= $prefix . $tmp; $swish .= $tmp." "; #print STDERR " == $tmp"; -- 2.20.1