3 # perl/encoding.pl </usr/lib/tcl8.3/encoding/cp850.enc
4 # for x in /usr/lib/tcl8.3/encoding/*; do perl/encoding.pl <$x; done >encodings
6 # read a Tcl encoding file (see man Tcl_GetEncoding)
7 # spit out a perl translator
12 ($enc) = /: ([^,]+),/;
15 /^S/ || die "not a single byte encoding: got '$_'";
17 ($fallback,$symbol,$pages) = split /\s+/;
18 1 == $pages || die "not a single byte encoding: got $fallback,$symbol,$pages";
19 $fallback = hex $fallback;
20 $fbchr = 31<$fallback && $fallback<127
21 ? chr($fallback) : sprintf('\x%02x', $fallback);
22 print STDERR "fallback character is $fallback ($fbchr)\n";
26 hex && die "base is not 00 but '$_'";
27 for $n (0..7) { # check for ASCII
28 @v = unpack( 'n*', pack( 'H64', scalar(<STDIN>) ) );
29 15 == $#v || die "length is ".$#v;
30 # print STDERR join(',',@v), "\n";
32 $v[$_] == 16*$n + $_ || die "bad val $v[$_] at row $n pos $_";
36 for (0..127) { $rv[$_] = $fbchr; }
38 @v = unpack( 'n*', pack( 'H64', scalar(<STDIN>) ) );
39 15 == $#v || die "length is ".$#v;
40 print STDERR join(',',@v), "\n";
45 $tr .= sprintf('\x%02x',$v[$_]);
46 $rv[$v[$_]-128] = sprintf('\x%02x',16*$n + $_) if 127 < $v[$_];
50 print "# convert $enc to Latin1\n";
51 print 'y/\x80-\xff/',$tr,"/;\n";
53 print "# convert Latin1 to $enc\n";
54 print 'y/\x80-\xff/',$rv,"/;\n";
56 # Latin1 roundtrip test
57 $_ = $test = pack( 'C*', (160..255) );
58 eval 'y/\x80-\xff/'.$rv.'/';
59 print STDERR "Latin1 160..255 in encoding $enc\n";
61 print STDERR 160+16*$n,': ',join(',',unpack('C*',substr($_,16*$n,16))), "\n";
63 print STDERR 'test: ',$test,"\n";
64 eval 'y/\x80-\xff/'.$tr.'/';
65 print STDERR "trip: ",$_,"\n";
66 print STDERR "Latin1 160..255 after roundtrip through $enc\n";
68 print STDERR 160+16*$n,': ',join(',',unpack('C*',substr($_,16*$n,16))), "\n";