Attempt to make encoding detection slightly more accurate -- rule out Shift_JIS in...
authorsrowen <srowen@59b500cc-1b3d-0410-9834-0bbf25fbcc57>
Sat, 8 Nov 2008 14:25:36 +0000 (14:25 +0000)
committersrowen <srowen@59b500cc-1b3d-0410-9834-0bbf25fbcc57>
Sat, 8 Nov 2008 14:25:36 +0000 (14:25 +0000)
git-svn-id: http://zxing.googlecode.com/svn/trunk@677 59b500cc-1b3d-0410-9834-0bbf25fbcc57

core/src/com/google/zxing/qrcode/decoder/DecodedBitStreamParser.java

index f948980..668b0d8 100644 (file)
@@ -265,7 +265,12 @@ final class DecodedBitStreamParser {
       }
       if (value >= 0xA1 && value <= 0xDF) {
         // count the number of characters that might be a Shift_JIS single-byte Katakana character
-        maybeSingleByteKatakanaCount++;
+        if (!lastWasPossibleDoubleByteStart) {
+          maybeSingleByteKatakanaCount++;
+        }
+      }
+      if (!lastWasPossibleDoubleByteStart && ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) {
+        canBeShiftJIS = false;
       }
       if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF)) && i < length - 1) {
         // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid
@@ -288,6 +293,8 @@ final class DecodedBitStreamParser {
           // There is some conflicting information out there about which bytes can follow which in
           // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.
         }
+      } else {
+        lastWasPossibleDoubleByteStart = false;
       }
     }
     // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is: