changes to support UTF-8 encoding from
[webpac] / all2xml.pl
index 11e49d6..5031206 100755 (executable)
@@ -168,6 +168,8 @@ sub data2xml {
                        ($s,$se,$d,$i) = (0,1,0,0);
                } elsif (lc($type) =~ /^lookup/) {
                        ($s,$se,$d,$i,$il) = (0,1,0,0,1);
+               } elsif ($type) {
+                       print STDERR "WARNING: unknown type: $type\n";
                }
                return ($s,$se,$d,$i,$il);
        }
@@ -637,6 +639,10 @@ foreach my $database ($cfg->Sections) {
        my $lookup_file = $cfg -> val($database, 'lookup_newfile'); # optional
        if ($lookup_file) {
                #tie %lhash, 'GDBM_File', $lookup_file, &GDBM_NEWDB, 0644;
+               if (! -e $lookup_file) {
+                       open(LOOKUP, "> $lookup_file") || die "can't create $lookup_file': $!";
+                       close(LOOKUP);
+               }
                tie %lhash, 'TDB_File', $lookup_file, TDB_CLEAR_IF_FIRST, O_RDWR, 0644;
                print STDERR "creating lookup file '$lookup_file'\n";
                # delete memory cache for lookup file
@@ -807,7 +813,11 @@ print STDERR "using: $type...\n";
                        for(my $iC = $oWorksheet->{MinCol} ; defined $oWorksheet->{MaxCol} && $iC <= $oWorksheet->{MaxCol} ; $iC++) {
                                my $cell = $oWorksheet->{Cells}[$iR][$iC];
                                if ($cell) {
-                                       $row->{int2col($iC)} = $cell->Value;
+                                       # this conversion is a cludge.
+                                       # Files from Excell could have
+                                       # characters which don't fit into
+                                       # destination encoding.
+                                       $row->{int2col($iC)} = $utf2cp->convert($cell->Value) || $cell->Value;
                                }
                        }