2 * Copyright 2008 ZXing authors
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.zxing.datamatrix.decoder;
19 import com.google.zxing.FormatException;
20 import com.google.zxing.common.BitSource;
21 import com.google.zxing.common.DecoderResult;
23 import java.io.UnsupportedEncodingException;
24 import java.util.Vector;
27 * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
28 * in one Data Matrix Code. This class decodes the bits back into text.</p>
30 * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
32 * @author bbrown@google.com (Brian Brown)
35 final class DecodedBitStreamParser {
38 * See ISO 16022:2006, Annex C Table C.1
39 * The C40 Basic Character Set (*'s used for placeholders for the shift values)
41 private static final char[] C40_BASIC_SET_CHARS = {
42 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
43 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
44 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
47 private static final char[] C40_SHIFT2_SET_CHARS = {
48 '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
49 '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
53 * See ISO 16022:2006, Annex C Table C.2
54 * The Text Basic Character Set (*'s used for placeholders for the shift values)
56 private static final char[] TEXT_BASIC_SET_CHARS = {
57 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
58 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
59 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
62 private static final char[] TEXT_SHIFT3_SET_CHARS = {
63 '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
64 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
67 private static final int PAD_ENCODE = 0; // Not really an encoding
68 private static final int ASCII_ENCODE = 1;
69 private static final int C40_ENCODE = 2;
70 private static final int TEXT_ENCODE = 3;
71 private static final int ANSIX12_ENCODE = 4;
72 private static final int EDIFACT_ENCODE = 5;
73 private static final int BASE256_ENCODE = 6;
75 private DecodedBitStreamParser() {
78 static DecoderResult decode(byte[] bytes) throws FormatException {
79 BitSource bits = new BitSource(bytes);
80 StringBuffer result = new StringBuffer(100);
81 StringBuffer resultTrailer = new StringBuffer(0);
82 Vector byteSegments = new Vector(1);
83 int mode = ASCII_ENCODE;
85 if (mode == ASCII_ENCODE) {
86 mode = decodeAsciiSegment(bits, result, resultTrailer);
90 decodeC40Segment(bits, result);
93 decodeTextSegment(bits, result);
96 decodeAnsiX12Segment(bits, result);
99 decodeEdifactSegment(bits, result);
102 decodeBase256Segment(bits, result, byteSegments);
105 throw FormatException.getFormatInstance();
109 } while (mode != PAD_ENCODE && bits.available() > 0);
110 if (resultTrailer.length() > 0) {
111 result.append(resultTrailer.toString());
113 return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments, null);
117 * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
119 private static int decodeAsciiSegment(BitSource bits, StringBuffer result, StringBuffer resultTrailer)
120 throws FormatException {
121 boolean upperShift = false;
123 int oneByte = bits.readBits(8);
125 throw FormatException.getFormatInstance();
126 } else if (oneByte <= 128) { // ASCII data (ASCII value + 1)
127 oneByte = upperShift ? (oneByte + 128) : oneByte;
129 result.append((char) (oneByte - 1));
131 } else if (oneByte == 129) { // Pad
133 } else if (oneByte <= 229) { // 2-digit data 00-99 (Numeric Value + 130)
134 int value = oneByte - 130;
135 if (value < 10) { // padd with '0' for single digit values
138 result.append(value);
139 } else if (oneByte == 230) { // Latch to C40 encodation
141 } else if (oneByte == 231) { // Latch to Base 256 encodation
142 return BASE256_ENCODE;
143 } else if (oneByte == 232) { // FNC1
144 //throw ReaderException.getInstance();
145 // Ignore this symbol for now
146 } else if (oneByte == 233) { // Structured Append
147 //throw ReaderException.getInstance();
148 // Ignore this symbol for now
149 } else if (oneByte == 234) { // Reader Programming
150 //throw ReaderException.getInstance();
151 // Ignore this symbol for now
152 } else if (oneByte == 235) { // Upper Shift (shift to Extended ASCII)
154 } else if (oneByte == 236) { // 05 Macro
155 result.append("[)>\u001E05\u001D");
156 resultTrailer.insert(0, "\u001E\u0004");
157 } else if (oneByte == 237) { // 06 Macro
158 result.append("[)>\u001E06\u001D");
159 resultTrailer.insert(0, "\u001E\u0004");
160 } else if (oneByte == 238) { // Latch to ANSI X12 encodation
161 return ANSIX12_ENCODE;
162 } else if (oneByte == 239) { // Latch to Text encodation
164 } else if (oneByte == 240) { // Latch to EDIFACT encodation
165 return EDIFACT_ENCODE;
166 } else if (oneByte == 241) { // ECI Character
167 // TODO(bbrown): I think we need to support ECI
168 //throw ReaderException.getInstance();
169 // Ignore this symbol for now
170 } else if (oneByte >= 242) { // Not to be used in ASCII encodation
171 throw FormatException.getFormatInstance();
173 } while (bits.available() > 0);
178 * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
180 private static void decodeC40Segment(BitSource bits, StringBuffer result) throws FormatException {
181 // Three C40 values are encoded in a 16-bit value as
182 // (1600 * C1) + (40 * C2) + C3 + 1
183 // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
184 boolean upperShift = false;
186 int[] cValues = new int[3];
188 // If there is only one byte left then it will be encoded as ASCII
189 if (bits.available() == 8) {
192 int firstByte = bits.readBits(8);
193 if (firstByte == 254) { // Unlatch codeword
197 parseTwoBytes(firstByte, bits.readBits(8), cValues);
200 for (int i = 0; i < 3; i++) {
201 int cValue = cValues[i];
208 result.append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
211 result.append(C40_BASIC_SET_CHARS[cValue]);
217 result.append((char) (cValue + 128));
220 result.append(cValue);
227 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
230 result.append(C40_SHIFT2_SET_CHARS[cValue]);
232 } else if (cValue == 27) { // FNC1
233 throw FormatException.getFormatInstance();
234 } else if (cValue == 30) { // Upper Shift
237 throw FormatException.getFormatInstance();
243 result.append((char) (cValue + 224));
246 result.append((char) (cValue + 96));
251 throw FormatException.getFormatInstance();
254 } while (bits.available() > 0);
258 * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
260 private static void decodeTextSegment(BitSource bits, StringBuffer result) throws FormatException {
261 // Three Text values are encoded in a 16-bit value as
262 // (1600 * C1) + (40 * C2) + C3 + 1
263 // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
264 boolean upperShift = false;
266 int[] cValues = new int[3];
268 // If there is only one byte left then it will be encoded as ASCII
269 if (bits.available() == 8) {
272 int firstByte = bits.readBits(8);
273 if (firstByte == 254) { // Unlatch codeword
277 parseTwoBytes(firstByte, bits.readBits(8), cValues);
280 for (int i = 0; i < 3; i++) {
281 int cValue = cValues[i];
288 result.append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
291 result.append(TEXT_BASIC_SET_CHARS[cValue]);
297 result.append((char) (cValue + 128));
300 result.append(cValue);
305 // Shift 2 for Text is the same encoding as C40
308 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
311 result.append(C40_SHIFT2_SET_CHARS[cValue]);
313 } else if (cValue == 27) { // FNC1
314 throw FormatException.getFormatInstance();
315 } else if (cValue == 30) { // Upper Shift
318 throw FormatException.getFormatInstance();
324 result.append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
327 result.append(TEXT_SHIFT3_SET_CHARS[cValue]);
332 throw FormatException.getFormatInstance();
335 } while (bits.available() > 0);
339 * See ISO 16022:2006, 5.2.7
341 private static void decodeAnsiX12Segment(BitSource bits, StringBuffer result) throws FormatException {
342 // Three ANSI X12 values are encoded in a 16-bit value as
343 // (1600 * C1) + (40 * C2) + C3 + 1
345 int[] cValues = new int[3];
347 // If there is only one byte left then it will be encoded as ASCII
348 if (bits.available() == 8) {
351 int firstByte = bits.readBits(8);
352 if (firstByte == 254) { // Unlatch codeword
356 parseTwoBytes(firstByte, bits.readBits(8), cValues);
358 for (int i = 0; i < 3; i++) {
359 int cValue = cValues[i];
360 if (cValue == 0) { // X12 segment terminator <CR>
362 } else if (cValue == 1) { // X12 segment separator *
364 } else if (cValue == 2) { // X12 sub-element separator >
366 } else if (cValue == 3) { // space
368 } else if (cValue < 14) { // 0 - 9
369 result.append((char) (cValue + 44));
370 } else if (cValue < 40) { // A - Z
371 result.append((char) (cValue + 51));
373 throw FormatException.getFormatInstance();
376 } while (bits.available() > 0);
379 private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
380 int fullBitValue = (firstByte << 8) + secondByte - 1;
381 int temp = fullBitValue / 1600;
383 fullBitValue -= temp * 1600;
384 temp = fullBitValue / 40;
386 result[2] = fullBitValue - temp * 40;
390 * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
392 private static void decodeEdifactSegment(BitSource bits, StringBuffer result) {
393 boolean unlatch = false;
395 // If there is only two or less bytes left then it will be encoded as ASCII
396 if (bits.available() <= 16) {
400 for (int i = 0; i < 4; i++) {
401 int edifactValue = bits.readBits(6);
403 // Check for the unlatch character
404 if (edifactValue == 0x2B67) { // 011111
406 // If we encounter the unlatch code then continue reading because the Codeword triple
407 // is padded with 0's
411 if ((edifactValue & 32) == 0) { // no 1 in the leading (6th) bit
412 edifactValue |= 64; // Add a leading 01 to the 6 bit binary value
414 result.append(edifactValue);
417 } while (!unlatch && bits.available() > 0);
421 * See ISO 16022:2006, 5.2.9 and Annex B, B.2
423 private static void decodeBase256Segment(BitSource bits, StringBuffer result, Vector byteSegments)
424 throws FormatException {
425 // Figure out how long the Base 256 Segment is.
426 int d1 = bits.readBits(8);
428 if (d1 == 0) { // Read the remainder of the symbol
429 count = bits.available() / 8;
430 } else if (d1 < 250) {
433 count = 250 * (d1 - 249) + bits.readBits(8);
435 byte[] bytes = new byte[count];
436 for (int i = 0; i < count; i++) {
437 // Have seen this particular error in the wild, such as at
438 // http://www.bcgen.com/demo/IDAutomationStreamingDataMatrix.aspx?MODE=3&D=Fred&PFMT=3&PT=F&X=0.3&O=0&LM=0.2
439 if (bits.available() < 8) {
440 throw FormatException.getFormatInstance();
442 bytes[i] = unrandomize255State(bits.readBits(8), i);
444 byteSegments.addElement(bytes);
446 result.append(new String(bytes, "ISO8859_1"));
447 } catch (UnsupportedEncodingException uee) {
448 throw new RuntimeException("Platform does not support required encoding: " + uee);
453 * See ISO 16022:2006, Annex B, B.2
455 private static byte unrandomize255State(int randomizedBase256Codeword,
456 int base256CodewordPosition) {
457 int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
458 int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
459 return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));