2 * Copyright 2008 Google Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package com.google.zxing.datamatrix.decoder;
19 import com.google.zxing.ReaderException;
20 import com.google.zxing.common.BitSource;
21 import java.io.UnsupportedEncodingException;
24 * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
25 * in one Data Matrix Code. This class decodes the bits back into text.</p>
27 * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
29 * @author bbrown@google.com (Brian Brown)
31 final class DecodedBitStreamParser {
34 * See ISO 16022:2006, Annex C Table C.1
35 * The C40 Basic Character Set (*'s used for placeholders for the shift values)
37 private static final char[] C40_BASIC_SET_CHARS = new char[]{
38 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
39 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
40 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
43 private static final char[] C40_SHIFT2_SET_CHARS = new char[]{
44 '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
45 '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
49 * See ISO 16022:2006, Annex C Table C.2
50 * The Text Basic Character Set (*'s used for placeholders for the shift values)
52 private static final char[] TEXT_BASIC_SET_CHARS = new char[]{
53 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
54 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
55 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
58 private static final char[] TEXT_SHIFT3_SET_CHARS = new char[]{
59 '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
60 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
63 static final int PAD_ENCODE = 0; // Not really an encoding
64 static final int ASCII_ENCODE = 1;
65 static final int C40_ENCODE = 2;
66 static final int TEXT_ENCODE = 3;
67 static final int ANSIX12_ENCODE = 4;
68 static final int EDIFACT_ENCODE = 5;
69 static final int BASE256_ENCODE = 6;
71 private DecodedBitStreamParser() {
74 static String decode(byte[] bytes) throws ReaderException {
75 BitSource bits = new BitSource(bytes);
76 StringBuffer result = new StringBuffer();
78 int mode = ASCII_ENCODE;
80 if (mode != PAD_ENCODE) {
81 if (mode == ASCII_ENCODE) {
82 mode = decodeAsciiSegment(bits, result);
83 } else if (mode == C40_ENCODE) {
84 mode = decodeC40Segment(bits, result);
85 } else if (mode == TEXT_ENCODE) {
86 mode = decodeTextSegment(bits, result);
87 } else if (mode == ANSIX12_ENCODE) {
88 mode = decodeAnsiX12Segment(bits, result);
89 } else if (mode == EDIFACT_ENCODE) {
90 mode = decodeEdifactSegment(bits, result);
91 } else if (mode == BASE256_ENCODE) {
92 mode = decodeBase256Segment(bits, result);
94 throw new ReaderException("Unsupported mode indicator");
97 } while (mode != PAD_ENCODE && bits.available() > 0);
99 return result.toString();
103 * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
105 private static int decodeAsciiSegment(BitSource bits,
106 StringBuffer result) throws ReaderException {
108 boolean upperShift = false;
109 int bytesProcessed = 0;
111 oneByte = (char) bits.readBits(8);
113 // TODO(bbrown): I think this would be a bug, not sure
114 throw new ReaderException("0 is an invalid ASCII codeword");
115 } else if (oneByte <= 128) { // ASCII data (ASCII value + 1)
116 oneByte = upperShift ? (char) (oneByte + 128) : oneByte;
118 result.append((char)(oneByte - 1));
120 } else if (oneByte == 129) { // Pad
122 } else if (oneByte <= 229) { // 2-digit data 00-99 (Numeric Value + 130)
123 // TODO(bbrown): Iassume there is some easier way to do this:
124 if (oneByte - 130 < 10) {
127 result.append(Integer.toString(oneByte - 130));
128 } else if (oneByte == 230) { // Latch to C40 encodation
130 } else if (oneByte == 231) { // Latch to Base 256 encodation
131 return BASE256_ENCODE;
132 } else if (oneByte == 232) { // FNC1
133 throw new ReaderException("Currently not supporting FNC1");
134 } else if (oneByte == 233) { // Structured Append
135 throw new ReaderException("Currently not supporting Structured Append");
136 } else if (oneByte == 234) { // Reader Programming
137 throw new ReaderException("Currently not supporting Reader Programming");
138 } else if (oneByte == 235) { // Upper Shift (shift to Extended ASCII)
140 } else if (oneByte == 236) { // 05 Macro
141 throw new ReaderException("Currently not supporting 05 Macro");
142 } else if (oneByte == 237) { // 06 Macro
143 throw new ReaderException("Currently not supporting 06 Macro");
144 } else if (oneByte == 238) { // Latch to ANSI X12 encodation
145 return ANSIX12_ENCODE;
146 } else if (oneByte == 239) { // Latch to Text encodation
148 } else if (oneByte == 240) { // Latch to EDIFACT encodation
149 return EDIFACT_ENCODE;
150 } else if (oneByte == 241) { // ECI Character
151 // TODO(bbrown): I think we need to support ECI
152 throw new ReaderException("Currently not supporting ECI Character");
153 } else if (oneByte >= 242) { // Not to be used in ASCII encodation
154 throw new ReaderException(Integer.toString(oneByte) + " should not be used in ASCII encodation");
156 } while (bits.available() > 0);
161 * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
163 private static int decodeC40Segment(BitSource bits,
164 StringBuffer result) throws ReaderException {
165 // Three C40 values are encoded in a 16-bit value as
166 // (1600 * C1) + (40 * C2) + C3 + 1
169 // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
170 boolean upperShift = false;
173 // If there is only one byte left then it will be encoded as ASCII
174 if (bits.available() == 8) {
178 firstByte = (char) bits.readBits(8);
180 if (firstByte == 254) { // Unlatch codeword
184 int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
186 char[] CValues = new char[3];
187 CValues[0] = (char) (fullBitValue / 1600);
188 fullBitValue -= CValues[0] * 1600;
189 CValues[1] = (char) (fullBitValue / 40);
190 fullBitValue -= CValues[1] * 40;
191 CValues[2] = (char) (fullBitValue);
193 for (int i = 0; i < 3; i++) {
195 if (CValues[i] == 0) { // Shift 1
198 } else if (CValues[i] == 1) { // Shift 2
201 } else if (CValues[i] == 2) { // Shift 3
206 result.append((char)(C40_BASIC_SET_CHARS[CValues[i]] + 128));
209 result.append(C40_BASIC_SET_CHARS[CValues[i]]);
211 } else if (shift == 1) {
213 result.append((char) (CValues[i] + 128));
216 result.append((char) CValues[i]);
218 } else if (shift == 2) {
219 if (CValues[i] < 27) {
221 result.append((char)(C40_SHIFT2_SET_CHARS[CValues[i]] + 128));
224 result.append(C40_SHIFT2_SET_CHARS[CValues[i]]);
226 } else if (CValues[i] == 27) { // FNC1
227 throw new ReaderException("Currently not supporting FNC1");
228 } else if (CValues[i] == 30) { // Upper Shirt
231 throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the C40 Shift 2 set");
233 } else if (shift == 3) {
235 result.append((char) (CValues[i] + 224));
238 result.append((char) CValues[i] + 96);
241 throw new ReaderException("Invalid shift value");
244 } while (bits.available() > 0);
249 * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
251 private static int decodeTextSegment(BitSource bits,
252 StringBuffer result) throws ReaderException {
253 // Three Text values are encoded in a 16-bit value as
254 // (1600 * C1) + (40 * C2) + C3 + 1
257 // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
258 boolean upperShift = false;
261 // If there is only one byte left then it will be encoded as ASCII
262 if (bits.available() == 8) {
266 firstByte = (char) bits.readBits(8);
268 if (firstByte == 254) { // Unlatch codeword
272 int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
274 char[] CValues = new char[3];
275 CValues[0] = (char) (fullBitValue / 1600);
276 fullBitValue -= CValues[0] * 1600;
277 CValues[1] = (char) (fullBitValue / 40);
278 fullBitValue -= CValues[1] * 40;
279 CValues[2] = (char) (fullBitValue);
281 for (int i = 0; i < 3; i++) {
283 if (CValues[i] == 0) { // Shift 1
286 } else if (CValues[i] == 1) { // Shift 2
289 } else if (CValues[i] == 2) { // Shift 3
294 result.append((char)(TEXT_BASIC_SET_CHARS[CValues[i]] + 128));
297 result.append(TEXT_BASIC_SET_CHARS[CValues[i]]);
299 } else if (shift == 1) {
301 result.append((char) (CValues[i] + 128));
304 result.append((char) CValues[i]);
306 } else if (shift == 2) {
307 // Shift 2 for Text is the same encoding as C40
308 if (CValues[i] < 27) {
310 result.append((char)(C40_SHIFT2_SET_CHARS[CValues[i]] + 128));
313 result.append(C40_SHIFT2_SET_CHARS[CValues[i]]);
315 } else if (CValues[i] == 27) { // FNC1
316 throw new ReaderException("Currently not supporting FNC1");
317 } else if (CValues[i] == 30) { // Upper Shirt
320 throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the C40 Shift 2 set");
322 } else if (shift == 3) {
324 result.append((char)(TEXT_SHIFT3_SET_CHARS[CValues[i]] + 128));
327 result.append(TEXT_SHIFT3_SET_CHARS[CValues[i]]);
330 throw new ReaderException("Invalid shift value");
333 } while (bits.available() > 0);
338 * See ISO 16022:2006, 5.2.7
340 private static int decodeAnsiX12Segment(BitSource bits,
341 StringBuffer result) throws ReaderException {
342 // Three ANSI X12 values are encoded in a 16-bit value as
343 // (1600 * C1) + (40 * C2) + C3 + 1
347 // If there is only one byte left then it will be encoded as ASCII
348 if (bits.available() == 8) {
352 firstByte = (char) bits.readBits(8);
354 if (firstByte == 254) { // Unlatch codeword
358 int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
360 char[] CValues = new char[3];
361 CValues[0] = (char) (fullBitValue / 1600);
362 fullBitValue -= CValues[0] * 1600;
363 CValues[1] = (char) (fullBitValue / 40);
364 fullBitValue -= CValues[1] * 40;
365 CValues[2] = (char) (fullBitValue);
367 for (int i = 0; i < 3; i++) {
368 // TODO(bbrown): These really aren't X12 symbols, we are converting to ASCII chars
369 if (CValues[i] == 0) { // X12 segment terminator <CR>
370 result.append("<CR>");
371 } else if (CValues[i] == 1) { // X12 segment separator *
373 } else if (CValues[i] == 2) { // X12 sub-element separator >
375 } else if (CValues[i] == 3) { // space
377 } else if (CValues[i] < 14) { // 0 - 9
378 result.append((char) (CValues[i] + 44));
379 } else if (CValues[i] < 40) { // A - Z
380 result.append((char) (CValues[i] + 51));
382 throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the ANSI X12 set");
385 } while (bits.available() > 0);
391 * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
393 private static int decodeEdifactSegment(BitSource bits,
394 StringBuffer result) throws ReaderException {
395 boolean unlatch = false;
397 // If there is only two or less bytes left then it will be encoded as ASCII
398 if (bits.available() <= 16) {
403 for (int i = 0; i < 4; i++) {
404 edifactValue = (char) bits.readBits(6);
406 // Check for the unlatch character
407 if (edifactValue == 0x2B67) { // 011111
409 // If we encounter the unlatch code then continue reading because the Codeword triple
410 // is padded with 0's
414 if ((edifactValue & 32) == 0) { // no 1 in the leading (6th) bit
415 edifactValue |= 64; // Add a leading 01 to the 6 bit binary value
417 result.append((char) edifactValue);
420 } while (!unlatch && bits.available() > 0);
426 * See ISO 16022:2006, 5.2.9 and Annex B, B.2
428 private static int decodeBase256Segment(BitSource bits,
429 StringBuffer result) throws ReaderException {
430 // Figure out how long the Base 256 Segment is.
431 char d1 = (char) bits.readBits(8);
433 if (d1 == 0) { // Read the remainder of the symbol
434 count = bits.available() / 8;
435 } else if (d1 < 250) {
438 count = 250 * (d1 - 249) + bits.readBits(8);
440 char[] readBytes = new char[count];
441 for (int i = 0; i < count; i++) {
442 result.append((char)unrandomize255State((char) bits.readBits(8), count));
449 * See ISO 16022:2006, Annex B, B.2
451 private static char unrandomize255State(char randomizedBase256Codeword,
452 int base256CodewordPosition) {
453 char pseudoRandomNumber = (char) (((149 * base256CodewordPosition) % 255) + 1);
454 int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
455 if (tempVariable >= 0) {
456 return (char) tempVariable;
458 return (char) (tempVariable + 256);