2 * Copyright 2007 ZXing authors
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.zxing.qrcode.decoder;
19 import com.google.zxing.FormatException;
20 import com.google.zxing.common.BitSource;
21 import com.google.zxing.common.CharacterSetECI;
22 import com.google.zxing.common.DecoderResult;
23 import com.google.zxing.common.StringUtils;
25 import java.io.UnsupportedEncodingException;
26 import java.util.Hashtable;
27 import java.util.Vector;
30 * <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes
31 * in one QR Code. This class decodes the bits back into text.</p>
33 * <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>
37 final class DecodedBitStreamParser {
40 * See ISO 18004:2006, 6.4.4 Table 5
42 private static final char[] ALPHANUMERIC_CHARS = {
43 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B',
44 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
45 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
46 ' ', '$', '%', '*', '+', '-', '.', '/', ':'
49 private DecodedBitStreamParser() {
52 static DecoderResult decode(byte[] bytes, Version version, ErrorCorrectionLevel ecLevel, Hashtable hints)
53 throws FormatException {
54 BitSource bits = new BitSource(bytes);
55 StringBuffer result = new StringBuffer(50);
56 CharacterSetECI currentCharacterSetECI = null;
57 boolean fc1InEffect = false;
58 Vector byteSegments = new Vector(1);
61 // While still another segment to read...
62 if (bits.available() < 4) {
63 // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here
64 mode = Mode.TERMINATOR;
67 mode = Mode.forBits(bits.readBits(4)); // mode is encoded by 4 bits
68 } catch (IllegalArgumentException iae) {
69 throw FormatException.getFormatInstance();
72 if (!mode.equals(Mode.TERMINATOR)) {
73 if (mode.equals(Mode.FNC1_FIRST_POSITION) || mode.equals(Mode.FNC1_SECOND_POSITION)) {
74 // We do little with FNC1 except alter the parsed result a bit according to the spec
76 } else if (mode.equals(Mode.STRUCTURED_APPEND)) {
77 // not really supported; all we do is ignore it
78 // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue
80 } else if (mode.equals(Mode.ECI)) {
81 // Count doesn't apply to ECI
82 int value = parseECIValue(bits);
83 currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value);
84 if (currentCharacterSetECI == null) {
85 throw FormatException.getFormatInstance();
88 // How many characters will follow, encoded in this mode?
89 int count = bits.readBits(mode.getCharacterCountBits(version));
90 if (mode.equals(Mode.NUMERIC)) {
91 decodeNumericSegment(bits, result, count);
92 } else if (mode.equals(Mode.ALPHANUMERIC)) {
93 decodeAlphanumericSegment(bits, result, count, fc1InEffect);
94 } else if (mode.equals(Mode.BYTE)) {
95 decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments, hints);
96 } else if (mode.equals(Mode.KANJI)) {
97 decodeKanjiSegment(bits, result, count);
99 throw FormatException.getFormatInstance();
103 } while (!mode.equals(Mode.TERMINATOR));
105 return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments, ecLevel);
108 private static void decodeKanjiSegment(BitSource bits,
110 int count) throws FormatException {
111 // Each character will require 2 bytes. Read the characters as 2-byte pairs
112 // and decode as Shift_JIS afterwards
113 byte[] buffer = new byte[2 * count];
116 // Each 13 bits encodes a 2-byte character
117 int twoBytes = bits.readBits(13);
118 int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);
119 if (assembledTwoBytes < 0x01F00) {
120 // In the 0x8140 to 0x9FFC range
121 assembledTwoBytes += 0x08140;
123 // In the 0xE040 to 0xEBBF range
124 assembledTwoBytes += 0x0C140;
126 buffer[offset] = (byte) (assembledTwoBytes >> 8);
127 buffer[offset + 1] = (byte) assembledTwoBytes;
131 // Shift_JIS may not be supported in some environments:
133 result.append(new String(buffer, StringUtils.SHIFT_JIS));
134 } catch (UnsupportedEncodingException uee) {
135 throw FormatException.getFormatInstance();
139 private static void decodeByteSegment(BitSource bits,
142 CharacterSetECI currentCharacterSetECI,
144 Hashtable hints) throws FormatException {
145 byte[] readBytes = new byte[count];
146 if (count << 3 > bits.available()) {
147 throw FormatException.getFormatInstance();
149 for (int i = 0; i < count; i++) {
150 readBytes[i] = (byte) bits.readBits(8);
153 if (currentCharacterSetECI == null) {
154 // The spec isn't clear on this mode; see
155 // section 6.4.5: t does not say which encoding to assuming
156 // upon decoding. I have seen ISO-8859-1 used as well as
157 // Shift_JIS -- without anything like an ECI designator to
159 encoding = StringUtils.guessEncoding(readBytes, hints);
161 encoding = currentCharacterSetECI.getEncodingName();
164 result.append(new String(readBytes, encoding));
165 } catch (UnsupportedEncodingException uce) {
166 throw FormatException.getFormatInstance();
168 byteSegments.addElement(readBytes);
171 private static void decodeAlphanumericSegment(BitSource bits,
174 boolean fc1InEffect) {
175 // Read two characters at a time
176 int start = result.length();
178 int nextTwoCharsBits = bits.readBits(11);
179 result.append(ALPHANUMERIC_CHARS[nextTwoCharsBits / 45]);
180 result.append(ALPHANUMERIC_CHARS[nextTwoCharsBits % 45]);
184 // special case: one character left
185 result.append(ALPHANUMERIC_CHARS[bits.readBits(6)]);
187 // See section 6.4.8.1, 6.4.8.2
189 // We need to massage the result a bit if in an FNC1 mode:
190 for (int i = start; i < result.length(); i++) {
191 if (result.charAt(i) == '%') {
192 if (i < result.length() - 1 && result.charAt(i + 1) == '%') {
193 // %% is rendered as %
194 result.deleteCharAt(i + 1);
196 // In alpha mode, % should be converted to FNC1 separator 0x1D
197 result.setCharAt(i, (char) 0x1D);
204 private static void decodeNumericSegment(BitSource bits,
206 int count) throws FormatException {
207 // Read three digits at a time
209 // Each 10 bits encodes three digits
210 int threeDigitsBits = bits.readBits(10);
211 if (threeDigitsBits >= 1000) {
212 throw FormatException.getFormatInstance();
214 result.append(ALPHANUMERIC_CHARS[threeDigitsBits / 100]);
215 result.append(ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10]);
216 result.append(ALPHANUMERIC_CHARS[threeDigitsBits % 10]);
220 // Two digits left over to read, encoded in 7 bits
221 int twoDigitsBits = bits.readBits(7);
222 if (twoDigitsBits >= 100) {
223 throw FormatException.getFormatInstance();
225 result.append(ALPHANUMERIC_CHARS[twoDigitsBits / 10]);
226 result.append(ALPHANUMERIC_CHARS[twoDigitsBits % 10]);
227 } else if (count == 1) {
228 // One digit left over to read
229 int digitBits = bits.readBits(4);
230 if (digitBits >= 10) {
231 throw FormatException.getFormatInstance();
233 result.append(ALPHANUMERIC_CHARS[digitBits]);
237 private static int parseECIValue(BitSource bits) {
238 int firstByte = bits.readBits(8);
239 if ((firstByte & 0x80) == 0) {
241 return firstByte & 0x7F;
242 } else if ((firstByte & 0xC0) == 0x80) {
244 int secondByte = bits.readBits(8);
245 return ((firstByte & 0x3F) << 8) | secondByte;
246 } else if ((firstByte & 0xE0) == 0xC0) {
248 int secondThirdBytes = bits.readBits(16);
249 return ((firstByte & 0x1F) << 16) | secondThirdBytes;
251 throw new IllegalArgumentException("Bad ECI bits starting with byte " + firstByte);