2 * Copyright 2008 Google Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.zxing.datamatrix.decoder;
19 import com.google.zxing.ReaderException;
20 import com.google.zxing.common.BitSource;
23 * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
24 * in one Data Matrix Code. This class decodes the bits back into text.</p>
26 * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
28 * @author bbrown@google.com (Brian Brown)
30 final class DecodedBitStreamParser {
33 * See ISO 16022:2006, Annex C Table C.1
34 * The C40 Basic Character Set (*'s used for placeholders for the shift values)
36 private static final char[] C40_BASIC_SET_CHARS = new char[]{
37 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
38 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
39 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
42 private static final char[] C40_SHIFT2_SET_CHARS = new char[]{
43 '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
44 '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
48 * See ISO 16022:2006, Annex C Table C.2
49 * The Text Basic Character Set (*'s used for placeholders for the shift values)
51 private static final char[] TEXT_BASIC_SET_CHARS = new char[]{
52 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
53 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
54 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
57 private static final char[] TEXT_SHIFT3_SET_CHARS = new char[]{
58 '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
59 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
62 static final int PAD_ENCODE = 0; // Not really an encoding
63 static final int ASCII_ENCODE = 1;
64 static final int C40_ENCODE = 2;
65 static final int TEXT_ENCODE = 3;
66 static final int ANSIX12_ENCODE = 4;
67 static final int EDIFACT_ENCODE = 5;
68 static final int BASE256_ENCODE = 6;
70 private DecodedBitStreamParser() {
73 static String decode(byte[] bytes) throws ReaderException {
74 BitSource bits = new BitSource(bytes);
75 StringBuffer result = new StringBuffer();
77 int mode = ASCII_ENCODE;
79 if (mode != PAD_ENCODE) {
80 if (mode == ASCII_ENCODE) {
81 mode = decodeAsciiSegment(bits, result);
82 } else if (mode == C40_ENCODE) {
83 mode = decodeC40Segment(bits, result);
84 } else if (mode == TEXT_ENCODE) {
85 mode = decodeTextSegment(bits, result);
86 } else if (mode == ANSIX12_ENCODE) {
87 mode = decodeAnsiX12Segment(bits, result);
88 } else if (mode == EDIFACT_ENCODE) {
89 mode = decodeEdifactSegment(bits, result);
90 } else if (mode == BASE256_ENCODE) {
91 mode = decodeBase256Segment(bits, result);
93 throw new ReaderException("Unsupported mode indicator");
96 } while (mode != PAD_ENCODE && bits.available() > 0);
98 return result.toString();
102 * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
104 private static int decodeAsciiSegment(BitSource bits,
105 StringBuffer result) throws ReaderException {
106 boolean upperShift = false;
108 char oneByte = (char) bits.readBits(8);
109 if (oneByte == '\0') {
110 // TODO(bbrown): I think this would be a bug, not sure
111 throw new ReaderException("0 is an invalid ASCII codeword");
112 } else if (oneByte <= 128) { // ASCII data (ASCII value + 1)
113 oneByte = upperShift ? (char) (oneByte + 128) : oneByte;
115 result.append((char)(oneByte - 1));
117 } else if (oneByte == 129) { // Pad
119 } else if (oneByte <= 229) { // 2-digit data 00-99 (Numeric Value + 130)
120 // TODO(bbrown): Iassume there is some easier way to do this:
121 if (oneByte - 130 < 10) {
124 result.append(Integer.toString(oneByte - 130));
125 } else if (oneByte == 230) { // Latch to C40 encodation
127 } else if (oneByte == 231) { // Latch to Base 256 encodation
128 return BASE256_ENCODE;
129 } else if (oneByte == 232) { // FNC1
130 throw new ReaderException("Currently not supporting FNC1");
131 } else if (oneByte == 233) { // Structured Append
132 throw new ReaderException("Currently not supporting Structured Append");
133 } else if (oneByte == 234) { // Reader Programming
134 throw new ReaderException("Currently not supporting Reader Programming");
135 } else if (oneByte == 235) { // Upper Shift (shift to Extended ASCII)
137 } else if (oneByte == 236) { // 05 Macro
138 throw new ReaderException("Currently not supporting 05 Macro");
139 } else if (oneByte == 237) { // 06 Macro
140 throw new ReaderException("Currently not supporting 06 Macro");
141 } else if (oneByte == 238) { // Latch to ANSI X12 encodation
142 return ANSIX12_ENCODE;
143 } else if (oneByte == 239) { // Latch to Text encodation
145 } else if (oneByte == 240) { // Latch to EDIFACT encodation
146 return EDIFACT_ENCODE;
147 } else if (oneByte == 241) { // ECI Character
148 // TODO(bbrown): I think we need to support ECI
149 throw new ReaderException("Currently not supporting ECI Character");
150 } else if (oneByte >= 242) { // Not to be used in ASCII encodation
151 throw new ReaderException(Integer.toString(oneByte) + " should not be used in ASCII encodation");
153 } while (bits.available() > 0);
158 * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
160 private static int decodeC40Segment(BitSource bits,
161 StringBuffer result) throws ReaderException {
162 // Three C40 values are encoded in a 16-bit value as
163 // (1600 * C1) + (40 * C2) + C3 + 1
165 // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
166 boolean upperShift = false;
169 // If there is only one byte left then it will be encoded as ASCII
170 if (bits.available() == 8) {
174 char firstByte = (char) bits.readBits(8);
176 if (firstByte == 254) { // Unlatch codeword
180 int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
182 char[] cValues = new char[3];
183 cValues[0] = (char) (fullBitValue / 1600);
184 fullBitValue -= cValues[0] * 1600;
185 cValues[1] = (char) (fullBitValue / 40);
186 fullBitValue -= cValues[1] * 40;
187 cValues[2] = (char) fullBitValue;
189 for (int i = 0; i < 3; i++) {
191 if (cValues[i] == 0) { // Shift 1
194 } else if (cValues[i] == 1) { // Shift 2
197 } else if (cValues[i] == 2) { // Shift 3
202 result.append((char)(C40_BASIC_SET_CHARS[cValues[i]] + 128));
205 result.append(C40_BASIC_SET_CHARS[cValues[i]]);
207 } else if (shift == 1) {
209 result.append((char) (cValues[i] + 128));
212 result.append(cValues[i]);
214 } else if (shift == 2) {
215 if (cValues[i] < 27) {
217 result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
220 result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
222 } else if (cValues[i] == 27) { // FNC1
223 throw new ReaderException("Currently not supporting FNC1");
224 } else if (cValues[i] == 30) { // Upper Shirt
227 throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
229 } else if (shift == 3) {
231 result.append((char) (cValues[i] + 224));
234 result.append((char) cValues[i] + 96);
237 throw new ReaderException("Invalid shift value");
240 } while (bits.available() > 0);
245 * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
247 private static int decodeTextSegment(BitSource bits,
248 StringBuffer result) throws ReaderException {
249 // Three Text values are encoded in a 16-bit value as
250 // (1600 * C1) + (40 * C2) + C3 + 1
252 // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
253 boolean upperShift = false;
256 // If there is only one byte left then it will be encoded as ASCII
257 if (bits.available() == 8) {
261 char firstByte = (char) bits.readBits(8);
263 if (firstByte == 254) { // Unlatch codeword
267 int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
269 char[] cValues = new char[3];
270 cValues[0] = (char) (fullBitValue / 1600);
271 fullBitValue -= cValues[0] * 1600;
272 cValues[1] = (char) (fullBitValue / 40);
273 fullBitValue -= cValues[1] * 40;
274 cValues[2] = (char) fullBitValue;
276 for (int i = 0; i < 3; i++) {
278 if (cValues[i] == 0) { // Shift 1
281 } else if (cValues[i] == 1) { // Shift 2
284 } else if (cValues[i] == 2) { // Shift 3
289 result.append((char)(TEXT_BASIC_SET_CHARS[cValues[i]] + 128));
292 result.append(TEXT_BASIC_SET_CHARS[cValues[i]]);
294 } else if (shift == 1) {
296 result.append((char) (cValues[i] + 128));
299 result.append((char) cValues[i]);
301 } else if (shift == 2) {
302 // Shift 2 for Text is the same encoding as C40
303 if (cValues[i] < 27) {
305 result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
308 result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
310 } else if (cValues[i] == 27) { // FNC1
311 throw new ReaderException("Currently not supporting FNC1");
312 } else if (cValues[i] == 30) { // Upper Shirt
315 throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
317 } else if (shift == 3) {
319 result.append((char)(TEXT_SHIFT3_SET_CHARS[cValues[i]] + 128));
322 result.append(TEXT_SHIFT3_SET_CHARS[cValues[i]]);
325 throw new ReaderException("Invalid shift value");
328 } while (bits.available() > 0);
333 * See ISO 16022:2006, 5.2.7
335 private static int decodeAnsiX12Segment(BitSource bits,
336 StringBuffer result) throws ReaderException {
337 // Three ANSI X12 values are encoded in a 16-bit value as
338 // (1600 * C1) + (40 * C2) + C3 + 1
341 // If there is only one byte left then it will be encoded as ASCII
342 if (bits.available() == 8) {
346 char firstByte = (char) bits.readBits(8);
348 if (firstByte == 254) { // Unlatch codeword
352 int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
354 char[] cValues = new char[3];
355 cValues[0] = (char) (fullBitValue / 1600);
356 fullBitValue -= cValues[0] * 1600;
357 cValues[1] = (char) (fullBitValue / 40);
358 fullBitValue -= cValues[1] * 40;
359 cValues[2] = (char) fullBitValue;
361 for (int i = 0; i < 3; i++) {
362 // TODO(bbrown): These really aren't X12 symbols, we are converting to ASCII chars
363 if (cValues[i] == 0) { // X12 segment terminator <CR>
364 result.append("<CR>");
365 } else if (cValues[i] == 1) { // X12 segment separator *
367 } else if (cValues[i] == 2) { // X12 sub-element separator >
369 } else if (cValues[i] == 3) { // space
371 } else if (cValues[i] < 14) { // 0 - 9
372 result.append((char) (cValues[i] + 44));
373 } else if (cValues[i] < 40) { // A - Z
374 result.append((char) (cValues[i] + 51));
376 throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the ANSI X12 set");
379 } while (bits.available() > 0);
385 * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
387 private static int decodeEdifactSegment(BitSource bits,
388 StringBuffer result) throws ReaderException {
389 boolean unlatch = false;
391 // If there is only two or less bytes left then it will be encoded as ASCII
392 if (bits.available() <= 16) {
396 for (int i = 0; i < 4; i++) {
397 char edifactValue = (char) bits.readBits(6);
399 // Check for the unlatch character
400 if (edifactValue == 0x2B67) { // 011111
402 // If we encounter the unlatch code then continue reading because the Codeword triple
403 // is padded with 0's
407 if ((edifactValue & 32) == 0) { // no 1 in the leading (6th) bit
408 edifactValue |= 64; // Add a leading 01 to the 6 bit binary value
410 result.append((char) edifactValue);
413 } while (!unlatch && bits.available() > 0);
419 * See ISO 16022:2006, 5.2.9 and Annex B, B.2
421 private static int decodeBase256Segment(BitSource bits,
422 StringBuffer result) throws ReaderException {
423 // Figure out how long the Base 256 Segment is.
424 char d1 = (char) bits.readBits(8);
426 if (d1 == 0) { // Read the remainder of the symbol
427 count = bits.available() / 8;
428 } else if (d1 < 250) {
431 count = 250 * (d1 - 249) + bits.readBits(8);
433 char[] readBytes = new char[count];
434 for (int i = 0; i < count; i++) {
435 result.append(unrandomize255State((char) bits.readBits(8), count));
442 * See ISO 16022:2006, Annex B, B.2
444 private static char unrandomize255State(char randomizedBase256Codeword,
445 int base256CodewordPosition) {
446 char pseudoRandomNumber = (char) (((149 * base256CodewordPosition) % 255) + 1);
447 int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
448 if (tempVariable >= 0) {
449 return (char) tempVariable;
451 return (char) (tempVariable + 256);