package com.google.zxing.qrcode.decoder;
+import com.google.zxing.DecodeHintType;
import com.google.zxing.ReaderException;
import com.google.zxing.common.BitSource;
import com.google.zxing.common.CharacterSetECI;
import com.google.zxing.common.DecoderResult;
import java.io.UnsupportedEncodingException;
+import java.util.Hashtable;
import java.util.Vector;
/**
private DecodedBitStreamParser() {
}
- static DecoderResult decode(byte[] bytes, Version version) throws ReaderException {
+ static DecoderResult decode(byte[] bytes, Version version, ErrorCorrectionLevel ecLevel, Hashtable hints)
+ throws ReaderException {
BitSource bits = new BitSource(bytes);
- StringBuffer result = new StringBuffer();
+ StringBuffer result = new StringBuffer(50);
CharacterSetECI currentCharacterSetECI = null;
boolean fc1InEffect = false;
Vector byteSegments = new Vector(1);
bits.readBits(16);
} else if (mode.equals(Mode.ECI)) {
// Count doesn't apply to ECI
- try {
- int value = parseECIValue(bits);
- currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
- } catch (IllegalArgumentException iae) {
+ int value = parseECIValue(bits);
+ currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
+ if (currentCharacterSetECI == null) {
throw ReaderException.getInstance();
}
} else {
} else if (mode.equals(Mode.ALPHANUMERIC)) {
decodeAlphanumericSegment(bits, result, count, fc1InEffect);
} else if (mode.equals(Mode.BYTE)) {
- decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments);
+ decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints);
} else if (mode.equals(Mode.KANJI)) {
decodeKanjiSegment(bits, result, count);
} else {
}
} while (!mode.equals(Mode.TERMINATOR));
- return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments);
+ return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments, ecLevel);
}
private static void decodeKanjiSegment(BitSource bits,
StringBuffer result,
int count,
CharacterSetECI currentCharacterSetECI,
- Vector byteSegments) throws ReaderException {
+ Vector byteSegments,
+ Hashtable hints) throws ReaderException {
byte[] readBytes = new byte[count];
if (count << 3 > bits.available()) {
throw ReaderException.getInstance();
// upon decoding. I have seen ISO-8859-1 used as well as
// Shift_JIS -- without anything like an ECI designator to
// give a hint.
- encoding = guessEncoding(readBytes);
+ encoding = guessEncoding(readBytes, hints);
} else {
encoding = currentCharacterSetECI.getEncodingName();
}
}
}
- private static String guessEncoding(byte[] bytes) {
+ private static String guessEncoding(byte[] bytes, Hashtable hints) {
+ if (hints != null) {
+ String characterSet = (String) hints.get(DecodeHintType.CHARACTER_SET);
+ if (characterSet != null) {
+ return characterSet;
+ }
+ }
if (ASSUME_SHIFT_JIS) {
return SHIFT_JIS;
}
int length = bytes.length;
boolean canBeISO88591 = true;
boolean canBeShiftJIS = true;
- boolean sawDoubleByteStart = false;
+ int maybeDoubleByteCount = 0;
int maybeSingleByteKatakanaCount = 0;
boolean sawLatin1Supplement = false;
boolean lastWasPossibleDoubleByteStart = false;
for (int i = 0; i < length && (canBeISO88591 || canBeShiftJIS); i++) {
int value = bytes[i] & 0xFF;
- if (value == 0xC2 || value == 0xC3 && i < length - 1) {
+ if ((value == 0xC2 || value == 0xC3) && i < length - 1) {
// This is really a poor hack. The slightly more exotic characters people might want to put in
// a QR Code, by which I mean the Latin-1 supplement characters (e.g. u-umlaut) have encodings
// that start with 0xC2 followed by [0xA0,0xBF], or start with 0xC3 followed by [0x80,0xBF].
if (!lastWasPossibleDoubleByteStart && ((value >= 0xF0 && value <= 0xFF) || value == 0x80 || value == 0xA0)) {
canBeShiftJIS = false;
}
- if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF)) && i < length - 1) {
+ if (((value >= 0x81 && value <= 0x9F) || (value >= 0xE0 && value <= 0xEF))) {
// These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid
// second byte.
- sawDoubleByteStart = true;
if (lastWasPossibleDoubleByteStart) {
// If we just checked this and the last byte for being a valid double-byte
// char, don't check starting on this byte. If this and the last byte
// ... otherwise do check to see if this plus the next byte form a valid
// double byte pair encoding a character.
lastWasPossibleDoubleByteStart = true;
- int nextValue = bytes[i + 1] & 0xFF;
- if (nextValue < 0x40 || nextValue > 0xFC) {
+ if (i >= bytes.length - 1) {
canBeShiftJIS = false;
+ } else {
+ int nextValue = bytes[i + 1] & 0xFF;
+ if (nextValue < 0x40 || nextValue > 0xFC) {
+ canBeShiftJIS = false;
+ } else {
+ maybeDoubleByteCount++;
+ }
+ // There is some conflicting information out there about which bytes can follow which in
+ // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.
}
- // There is some conflicting information out there about which bytes can follow which in
- // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.
}
} else {
lastWasPossibleDoubleByteStart = false;
}
// Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is:
// - If we saw
- // - at least one byte that starts a double-byte value (bytes that are rare in ISO-8859-1), or
+ // - at least three byte that starts a double-byte value (bytes that are rare in ISO-8859-1), or
// - over 5% of bytes that could be single-byte Katakana (also rare in ISO-8859-1),
// - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS
- if (canBeShiftJIS && (sawDoubleByteStart || 20 * maybeSingleByteKatakanaCount > length)) {
+ if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length)) {
return SHIFT_JIS;
}
// Otherwise, we default to ISO-8859-1 unless we know it can't be