2 * Copyright 2008 ZXing authors
\r
4 * Licensed under the Apache License, Version 2.0 (the "License");
\r
5 * you may not use this file except in compliance with the License.
\r
6 * You may obtain a copy of the License at
\r
8 * http://www.apache.org/licenses/LICENSE-2.0
\r
10 * Unless required by applicable law or agreed to in writing, software
\r
11 * distributed under the License is distributed on an "AS IS" BASIS,
\r
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 * See the License for the specific language governing permissions and
\r
14 * limitations under the License.
\r
18 using System.Collections.Generic;
\r
21 using com.google.zxing.common;
\r
23 namespace com.google.zxing.datamatrix.decoder
\r
26 * <p>Encapsulates a block of data within a Data Matrix Code. Data Matrix Codes may split their data into
\r
27 * multiple blocks, each of which is a unit of data and error-correction codewords. Each
\r
28 * is represented by an instance of this class.</p>
\r
30 * @author bbrown@google.com (Brian Brown)
\r
32 public sealed class DecodedBitStreamParser
\r
35 * See ISO 16022:2006, Annex C Table C.1
\r
36 * The C40 Basic Character Set (*'s used for placeholders for the shift values)
\r
38 private static char[] C40_BASIC_SET_CHARS = {
\r
39 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
\r
40 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
\r
41 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
\r
44 private static char[] C40_SHIFT2_SET_CHARS = {
\r
45 '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
\r
46 '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
\r
50 * See ISO 16022:2006, Annex C Table C.2
\r
51 * The Text Basic Character Set (*'s used for placeholders for the shift values)
\r
53 private static char[] TEXT_BASIC_SET_CHARS = {
\r
54 '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
\r
55 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
\r
56 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
\r
59 private static char[] TEXT_SHIFT3_SET_CHARS = {
\r
60 '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
\r
61 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
\r
64 private const int PAD_ENCODE = 0; // Not really an encoding
\r
65 private const int ASCII_ENCODE = 1;
\r
66 private const int C40_ENCODE = 2;
\r
67 private const int TEXT_ENCODE = 3;
\r
68 private const int ANSIX12_ENCODE = 4;
\r
69 private const int EDIFACT_ENCODE = 5;
\r
70 private const int BASE256_ENCODE = 6;
\r
72 private DecodedBitStreamParser() {
\r
75 public static DecoderResult decode(sbyte[] bytes) {
\r
76 BitSource bits = new BitSource(bytes);
\r
77 StringBuilder result = new StringBuilder();
\r
78 StringBuilder resultTrailer = new StringBuilder(0);
\r
79 System.Collections.ArrayList byteSegments = new System.Collections.ArrayList(1);
\r
80 int mode = ASCII_ENCODE;
\r
82 if (mode == ASCII_ENCODE) {
\r
83 mode = decodeAsciiSegment(bits, result, resultTrailer);
\r
87 decodeC40Segment(bits, result);
\r
90 decodeTextSegment(bits, result);
\r
92 case ANSIX12_ENCODE:
\r
93 decodeAnsiX12Segment(bits, result);
\r
95 case EDIFACT_ENCODE:
\r
96 decodeEdifactSegment(bits, result);
\r
98 case BASE256_ENCODE:
\r
99 decodeBase256Segment(bits, result, byteSegments);
\r
102 throw new ReaderException();
\r
104 mode = ASCII_ENCODE;
\r
106 } while (mode != PAD_ENCODE && bits.available() > 0);
\r
107 if (resultTrailer.Length > 0) {
\r
108 result.Append(resultTrailer);
\r
110 return new DecoderResult(bytes, result.ToString(), int.Equals(byteSegments.Count,0) ? null : byteSegments);
\r
114 * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
\r
116 private static int decodeAsciiSegment(BitSource bits, StringBuilder result, StringBuilder resultTrailer)
\r
118 bool upperShift = false;
\r
120 int oneByte = bits.readBits(8);
\r
121 if (oneByte == 0) {
\r
122 throw new ReaderException();
\r
123 } else if (oneByte <= 128) { // ASCII data (ASCII value + 1)
\r
124 oneByte = upperShift ? (oneByte + 128) : oneByte;
\r
125 upperShift = false;
\r
126 result.Append((char) (oneByte - 1));
\r
127 return ASCII_ENCODE;
\r
128 } else if (oneByte == 129) { // Pad
\r
130 } else if (oneByte <= 229) { // 2-digit data 00-99 (Numeric Value + 130)
\r
131 int value = oneByte - 130;
\r
132 if (value < 10) { // padd with '0' for single digit values
\r
133 result.Append('0');
\r
135 result.Append(value);
\r
136 } else if (oneByte == 230) { // Latch to C40 encodation
\r
138 } else if (oneByte == 231) { // Latch to Base 256 encodation
\r
139 return BASE256_ENCODE;
\r
140 } else if (oneByte == 232) { // FNC1
\r
141 throw new ReaderException();
\r
142 } else if (oneByte == 233) { // Structured Append
\r
143 throw new ReaderException();
\r
144 } else if (oneByte == 234) { // Reader Programming
\r
145 throw new ReaderException();
\r
146 } else if (oneByte == 235) { // Upper Shift (shift to Extended ASCII)
\r
148 } else if (oneByte == 236) { // 05 Macro
\r
149 result.Append("[)>\u001E05\u001D");
\r
150 resultTrailer.Insert(0, "\u001E\u0004");
\r
151 } else if (oneByte == 237) { // 06 Macro
\r
152 result.Append("[)>\u001E06\u001D");
\r
153 resultTrailer.Insert(0, "\u001E\u0004");
\r
154 } else if (oneByte == 238) { // Latch to ANSI X12 encodation
\r
155 return ANSIX12_ENCODE;
\r
156 } else if (oneByte == 239) { // Latch to Text encodation
\r
157 return TEXT_ENCODE;
\r
158 } else if (oneByte == 240) { // Latch to EDIFACT encodation
\r
159 return EDIFACT_ENCODE;
\r
160 } else if (oneByte == 241) { // ECI Character
\r
161 // TODO(bbrown): I think we need to support ECI
\r
162 throw new ReaderException();
\r
163 } else if (oneByte >= 242) { // Not to be used in ASCII encodation
\r
164 throw new ReaderException();
\r
166 } while (bits.available() > 0);
\r
167 return ASCII_ENCODE;
\r
171 * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
\r
173 private static void decodeC40Segment(BitSource bits, StringBuilder result) {
\r
174 // Three C40 values are encoded in a 16-bit value as
\r
175 // (1600 * C1) + (40 * C2) + C3 + 1
\r
176 // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
\r
177 bool upperShift = false;
\r
179 int[] cValues = new int[3];
\r
181 // If there is only one byte left then it will be encoded as ASCII
\r
182 if (bits.available() == 8) {
\r
185 int firstByte = bits.readBits(8);
\r
186 if (firstByte == 254) { // Unlatch codeword
\r
190 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
193 for (int i = 0; i < 3; i++) {
\r
194 int cValue = cValues[i];
\r
198 shift = cValue + 1;
\r
201 result.Append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
\r
202 upperShift = false;
\r
204 result.Append(C40_BASIC_SET_CHARS[cValue]);
\r
210 result.Append((char) (cValue + 128));
\r
211 upperShift = false;
\r
213 result.Append(cValue);
\r
220 result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
\r
221 upperShift = false;
\r
223 result.Append(C40_SHIFT2_SET_CHARS[cValue]);
\r
225 } else if (cValue == 27) { // FNC1
\r
226 throw new ReaderException();
\r
227 } else if (cValue == 30) { // Upper Shift
\r
230 throw new ReaderException();
\r
236 result.Append((char) (cValue + 224));
\r
237 upperShift = false;
\r
239 result.Append((char) (cValue + 96));
\r
244 throw new ReaderException();
\r
247 } while (bits.available() > 0);
\r
251 * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
\r
253 private static void decodeTextSegment(BitSource bits, StringBuilder result) {
\r
254 // Three Text values are encoded in a 16-bit value as
\r
255 // (1600 * C1) + (40 * C2) + C3 + 1
\r
256 // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
\r
257 bool upperShift = false;
\r
259 int[] cValues = new int[3];
\r
261 // If there is only one byte left then it will be encoded as ASCII
\r
262 if (bits.available() == 8) {
\r
265 int firstByte = bits.readBits(8);
\r
266 if (firstByte == 254) { // Unlatch codeword
\r
270 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
273 for (int i = 0; i < 3; i++) {
\r
274 int cValue = cValues[i];
\r
278 shift = cValue + 1;
\r
281 result.Append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
\r
282 upperShift = false;
\r
284 result.Append(TEXT_BASIC_SET_CHARS[cValue]);
\r
290 result.Append((char) (cValue + 128));
\r
291 upperShift = false;
\r
293 result.Append(cValue);
\r
298 // Shift 2 for Text is the same encoding as C40
\r
301 result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
\r
302 upperShift = false;
\r
304 result.Append(C40_SHIFT2_SET_CHARS[cValue]);
\r
306 } else if (cValue == 27) { // FNC1
\r
307 throw new ReaderException();
\r
308 } else if (cValue == 30) { // Upper Shift
\r
311 throw new ReaderException();
\r
317 result.Append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
\r
318 upperShift = false;
\r
320 result.Append(TEXT_SHIFT3_SET_CHARS[cValue]);
\r
325 throw new ReaderException();
\r
328 } while (bits.available() > 0);
\r
332 * See ISO 16022:2006, 5.2.7
\r
334 private static void decodeAnsiX12Segment(BitSource bits, StringBuilder result) {
\r
335 // Three ANSI X12 values are encoded in a 16-bit value as
\r
336 // (1600 * C1) + (40 * C2) + C3 + 1
\r
338 int[] cValues = new int[3];
\r
340 // If there is only one byte left then it will be encoded as ASCII
\r
341 if (bits.available() == 8) {
\r
344 int firstByte = bits.readBits(8);
\r
345 if (firstByte == 254) { // Unlatch codeword
\r
349 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
351 for (int i = 0; i < 3; i++) {
\r
352 int cValue = cValues[i];
\r
353 if (cValue == 0) { // X12 segment terminator <CR>
\r
354 result.Append('\r');
\r
355 } else if (cValue == 1) { // X12 segment separator *
\r
356 result.Append('*');
\r
357 } else if (cValue == 2) { // X12 sub-element separator >
\r
358 result.Append('>');
\r
359 } else if (cValue == 3) { // space
\r
360 result.Append(' ');
\r
361 } else if (cValue < 14) { // 0 - 9
\r
362 result.Append((char) (cValue + 44));
\r
363 } else if (cValue < 40) { // A - Z
\r
364 result.Append((char) (cValue + 51));
\r
366 throw new ReaderException();
\r
369 } while (bits.available() > 0);
\r
372 private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
\r
373 int fullBitValue = (firstByte << 8) + secondByte - 1;
\r
374 int temp = fullBitValue / 1600;
\r
376 fullBitValue -= temp * 1600;
\r
377 temp = fullBitValue / 40;
\r
379 result[2] = fullBitValue - temp * 40;
\r
383 * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
\r
385 private static void decodeEdifactSegment(BitSource bits, StringBuilder result) {
\r
386 bool unlatch = false;
\r
388 // If there is only two or less bytes left then it will be encoded as ASCII
\r
389 if (bits.available() <= 16) {
\r
393 for (int i = 0; i < 4; i++) {
\r
394 int edifactValue = bits.readBits(6);
\r
396 // Check for the unlatch character
\r
397 if (edifactValue == 0x2B67) { // 011111
\r
399 // If we encounter the unlatch code then continue reading because the Codeword triple
\r
400 // is padded with 0's
\r
404 if ((edifactValue & 32) == 0) { // no 1 in the leading (6th) bit
\r
405 edifactValue |= 64; // Add a leading 01 to the 6 bit binary value
\r
407 result.Append(edifactValue);
\r
410 } while (!unlatch && bits.available() > 0);
\r
414 * See ISO 16022:2006, 5.2.9 and Annex B, B.2
\r
416 private static void decodeBase256Segment(BitSource bits, StringBuilder result, System.Collections.ArrayList byteSegments) {
\r
417 // Figure out how long the Base 256 Segment is.
\r
418 int d1 = bits.readBits(8);
\r
420 if (d1 == 0) { // Read the remainder of the symbol
\r
421 count = bits.available() / 8;
\r
422 } else if (d1 < 250) {
\r
425 count = 250 * (d1 - 249) + bits.readBits(8);
\r
427 byte[] bytes = new byte[count];
\r
428 for (int i = 0; i < count; i++) {
\r
429 bytes[i] = unrandomize255State(bits.readBits(8), i);
\r
431 byteSegments.Add(bytes);
\r
433 result.Append(System.Text.Encoding.GetEncoding("iso-8859-1").GetString(bytes));
\r
434 } catch (Exception uee) {
\r
435 throw new Exception("Platform does not support required encoding: " + uee);
\r
440 * See ISO 16022:2006, Annex B, B.2
\r
442 private static byte unrandomize255State(int randomizedBase256Codeword,
\r
443 int base256CodewordPosition) {
\r
444 int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
\r
445 int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
\r
446 return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));
\r