2 * Copyright 2008 ZXing authors
\r
4 * Licensed under the Apache License, Version 2.0 (the "License");
\r
5 * you may not use this file except in compliance with the License.
\r
6 * You may obtain a copy of the License at
\r
8 * http://www.apache.org/licenses/LICENSE-2.0
\r
10 * Unless required by applicable law or agreed to in writing, software
\r
11 * distributed under the License is distributed on an "AS IS" BASIS,
\r
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
13 * See the License for the specific language governing permissions and
\r
14 * limitations under the License.
\r
17 using ReaderException = com.google.zxing.ReaderException;
\r
18 using BitSource = com.google.zxing.common.BitSource;
\r
19 using DecoderResult = com.google.zxing.common.DecoderResult;
\r
20 namespace com.google.zxing.datamatrix.decoder
\r
23 /// <summary> <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
\r
24 /// in one Data Matrix Code. This class decodes the bits back into text.</p>
\r
26 /// <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
\r
29 /// <author> bbrown@google.com (Brian Brown)
\r
31 /// <author> Sean Owen
\r
33 /// <author>www.Redivivus.in (suraj.supekar@redivivus.in) - Ported from ZXING Java Source
\r
35 sealed class DecodedBitStreamParser
\r
38 /// <summary> See ISO 16022:2006, Annex C Table C.1
\r
39 /// The C40 Basic Character Set (*'s used for placeholders for the shift values)
\r
41 //UPGRADE_NOTE: Final was removed from the declaration of 'C40_BASIC_SET_CHARS'. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
\r
42 private static readonly char[] C40_BASIC_SET_CHARS = new char[]{'*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'};
\r
44 //UPGRADE_NOTE: Final was removed from the declaration of 'C40_SHIFT2_SET_CHARS'. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
\r
45 private static readonly char[] C40_SHIFT2_SET_CHARS = new char[]{'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'};
\r
47 /// <summary> See ISO 16022:2006, Annex C Table C.2
\r
48 /// The Text Basic Character Set (*'s used for placeholders for the shift values)
\r
50 //UPGRADE_NOTE: Final was removed from the declaration of 'TEXT_BASIC_SET_CHARS'. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"
\r
51 private static readonly char[] TEXT_BASIC_SET_CHARS = new char[]{'*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};
\r
53 private static char[] TEXT_SHIFT3_SET_CHARS = new char[]{'\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127};
\r
55 private const int PAD_ENCODE = 0; // Not really an encoding
\r
56 private const int ASCII_ENCODE = 1;
\r
57 private const int C40_ENCODE = 2;
\r
58 private const int TEXT_ENCODE = 3;
\r
59 private const int ANSIX12_ENCODE = 4;
\r
60 private const int EDIFACT_ENCODE = 5;
\r
61 private const int BASE256_ENCODE = 6;
\r
63 private DecodedBitStreamParser()
\r
67 internal static DecoderResult decode(sbyte[] bytes)
\r
69 BitSource bits = new BitSource(bytes);
\r
70 System.Text.StringBuilder result = new System.Text.StringBuilder(100);
\r
71 System.Text.StringBuilder resultTrailer = new System.Text.StringBuilder(0);
\r
72 System.Collections.ArrayList byteSegments = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(1));
\r
73 int mode = ASCII_ENCODE;
\r
76 if (mode == ASCII_ENCODE)
\r
78 mode = decodeAsciiSegment(bits, result, resultTrailer);
\r
86 decodeC40Segment(bits, result);
\r
90 decodeTextSegment(bits, result);
\r
93 case ANSIX12_ENCODE:
\r
94 decodeAnsiX12Segment(bits, result);
\r
97 case EDIFACT_ENCODE:
\r
98 decodeEdifactSegment(bits, result);
\r
101 case BASE256_ENCODE:
\r
102 decodeBase256Segment(bits, result, byteSegments);
\r
106 throw ReaderException.Instance;
\r
109 mode = ASCII_ENCODE;
\r
112 while (mode != PAD_ENCODE && bits.available() > 0);
\r
113 if (resultTrailer.Length > 0)
\r
115 result.Append(resultTrailer.ToString());
\r
117 return new DecoderResult(bytes, result.ToString(), (byteSegments.Count == 0)?null:byteSegments, null);
\r
120 /// <summary> See ISO 16022:2006, 5.2.3 and Annex C, Table C.2</summary>
\r
121 private static int decodeAsciiSegment(BitSource bits, System.Text.StringBuilder result, System.Text.StringBuilder resultTrailer)
\r
123 bool upperShift = false;
\r
126 int oneByte = bits.readBits(8);
\r
129 throw ReaderException.Instance;
\r
131 else if (oneByte <= 128)
\r
133 // ASCII data (ASCII value + 1)
\r
134 oneByte = upperShift?(oneByte + 128):oneByte;
\r
135 upperShift = false;
\r
136 result.Append((char) (oneByte - 1));
\r
137 return ASCII_ENCODE;
\r
139 else if (oneByte == 129)
\r
144 else if (oneByte <= 229)
\r
146 // 2-digit data 00-99 (Numeric Value + 130)
\r
147 int value_Renamed = oneByte - 130;
\r
148 if (value_Renamed < 10)
\r
150 // padd with '0' for single digit values
\r
151 result.Append('0');
\r
153 result.Append(value_Renamed);
\r
155 else if (oneByte == 230)
\r
157 // Latch to C40 encodation
\r
160 else if (oneByte == 231)
\r
162 // Latch to Base 256 encodation
\r
163 return BASE256_ENCODE;
\r
165 else if (oneByte == 232)
\r
168 //throw ReaderException.getInstance();
\r
169 // Ignore this symbol for now
\r
171 else if (oneByte == 233)
\r
173 // Structured Append
\r
174 //throw ReaderException.getInstance();
\r
175 // Ignore this symbol for now
\r
177 else if (oneByte == 234)
\r
179 // Reader Programming
\r
180 //throw ReaderException.getInstance();
\r
181 // Ignore this symbol for now
\r
183 else if (oneByte == 235)
\r
185 // Upper Shift (shift to Extended ASCII)
\r
188 else if (oneByte == 236)
\r
191 result.Append("[)>\u001E05\u001D");
\r
192 resultTrailer.Insert(0, "\u001E\u0004");
\r
194 else if (oneByte == 237)
\r
197 result.Append("[)>\u001E06\u001D");
\r
198 resultTrailer.Insert(0, "\u001E\u0004");
\r
200 else if (oneByte == 238)
\r
202 // Latch to ANSI X12 encodation
\r
203 return ANSIX12_ENCODE;
\r
205 else if (oneByte == 239)
\r
207 // Latch to Text encodation
\r
208 return TEXT_ENCODE;
\r
210 else if (oneByte == 240)
\r
212 // Latch to EDIFACT encodation
\r
213 return EDIFACT_ENCODE;
\r
215 else if (oneByte == 241)
\r
218 // TODO(bbrown): I think we need to support ECI
\r
219 //throw ReaderException.getInstance();
\r
220 // Ignore this symbol for now
\r
222 else if (oneByte >= 242)
\r
224 // Not to be used in ASCII encodation
\r
225 throw ReaderException.Instance;
\r
228 while (bits.available() > 0);
\r
229 return ASCII_ENCODE;
\r
232 /// <summary> See ISO 16022:2006, 5.2.5 and Annex C, Table C.1</summary>
\r
233 private static void decodeC40Segment(BitSource bits, System.Text.StringBuilder result)
\r
235 // Three C40 values are encoded in a 16-bit value as
\r
236 // (1600 * C1) + (40 * C2) + C3 + 1
\r
237 // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
\r
238 bool upperShift = false;
\r
240 int[] cValues = new int[3];
\r
243 // If there is only one byte left then it will be encoded as ASCII
\r
244 if (bits.available() == 8)
\r
248 int firstByte = bits.readBits(8);
\r
249 if (firstByte == 254)
\r
251 // Unlatch codeword
\r
255 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
258 for (int i = 0; i < 3; i++)
\r
260 int cValue = cValues[i];
\r
267 shift = cValue + 1;
\r
273 result.Append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
\r
274 upperShift = false;
\r
278 result.Append(C40_BASIC_SET_CHARS[cValue]);
\r
286 result.Append((char) (cValue + 128));
\r
287 upperShift = false;
\r
291 result.Append(cValue);
\r
301 result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
\r
302 upperShift = false;
\r
306 result.Append(C40_SHIFT2_SET_CHARS[cValue]);
\r
309 else if (cValue == 27)
\r
312 throw ReaderException.Instance;
\r
314 else if (cValue == 30)
\r
321 throw ReaderException.Instance;
\r
329 result.Append((char) (cValue + 224));
\r
330 upperShift = false;
\r
334 result.Append((char) (cValue + 96));
\r
340 throw ReaderException.Instance;
\r
345 while (bits.available() > 0);
\r
348 /// <summary> See ISO 16022:2006, 5.2.6 and Annex C, Table C.2</summary>
\r
349 private static void decodeTextSegment(BitSource bits, System.Text.StringBuilder result)
\r
351 // Three Text values are encoded in a 16-bit value as
\r
352 // (1600 * C1) + (40 * C2) + C3 + 1
\r
353 // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
\r
354 bool upperShift = false;
\r
356 int[] cValues = new int[3];
\r
359 // If there is only one byte left then it will be encoded as ASCII
\r
360 if (bits.available() == 8)
\r
364 int firstByte = bits.readBits(8);
\r
365 if (firstByte == 254)
\r
367 // Unlatch codeword
\r
371 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
374 for (int i = 0; i < 3; i++)
\r
376 int cValue = cValues[i];
\r
383 shift = cValue + 1;
\r
389 result.Append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
\r
390 upperShift = false;
\r
394 result.Append(TEXT_BASIC_SET_CHARS[cValue]);
\r
402 result.Append((char) (cValue + 128));
\r
403 upperShift = false;
\r
407 result.Append(cValue);
\r
413 // Shift 2 for Text is the same encoding as C40
\r
418 result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
\r
419 upperShift = false;
\r
423 result.Append(C40_SHIFT2_SET_CHARS[cValue]);
\r
426 else if (cValue == 27)
\r
429 throw ReaderException.Instance;
\r
431 else if (cValue == 30)
\r
438 throw ReaderException.Instance;
\r
446 result.Append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
\r
447 upperShift = false;
\r
451 result.Append(TEXT_SHIFT3_SET_CHARS[cValue]);
\r
457 throw ReaderException.Instance;
\r
462 while (bits.available() > 0);
\r
465 /// <summary> See ISO 16022:2006, 5.2.7</summary>
\r
466 private static void decodeAnsiX12Segment(BitSource bits, System.Text.StringBuilder result)
\r
468 // Three ANSI X12 values are encoded in a 16-bit value as
\r
469 // (1600 * C1) + (40 * C2) + C3 + 1
\r
471 int[] cValues = new int[3];
\r
474 // If there is only one byte left then it will be encoded as ASCII
\r
475 if (bits.available() == 8)
\r
479 int firstByte = bits.readBits(8);
\r
480 if (firstByte == 254)
\r
482 // Unlatch codeword
\r
486 parseTwoBytes(firstByte, bits.readBits(8), cValues);
\r
488 for (int i = 0; i < 3; i++)
\r
490 int cValue = cValues[i];
\r
493 // X12 segment terminator <CR>
\r
494 result.Append('\r');
\r
496 else if (cValue == 1)
\r
498 // X12 segment separator *
\r
499 result.Append('*');
\r
501 else if (cValue == 2)
\r
503 // X12 sub-element separator >
\r
504 result.Append('>');
\r
506 else if (cValue == 3)
\r
509 result.Append(' ');
\r
511 else if (cValue < 14)
\r
514 result.Append((char) (cValue + 44));
\r
516 else if (cValue < 40)
\r
519 result.Append((char) (cValue + 51));
\r
523 throw ReaderException.Instance;
\r
527 while (bits.available() > 0);
\r
530 private static void parseTwoBytes(int firstByte, int secondByte, int[] result)
\r
532 int fullBitValue = (firstByte << 8) + secondByte - 1;
\r
533 int temp = fullBitValue / 1600;
\r
535 fullBitValue -= temp * 1600;
\r
536 temp = fullBitValue / 40;
\r
538 result[2] = fullBitValue - temp * 40;
\r
541 /// <summary> See ISO 16022:2006, 5.2.8 and Annex C Table C.3</summary>
\r
542 private static void decodeEdifactSegment(BitSource bits, System.Text.StringBuilder result)
\r
544 bool unlatch = false;
\r
547 // If there is only two or less bytes left then it will be encoded as ASCII
\r
548 if (bits.available() <= 16)
\r
553 for (int i = 0; i < 4; i++)
\r
555 int edifactValue = bits.readBits(6);
\r
557 // Check for the unlatch character
\r
558 if (edifactValue == 0x2B67)
\r
562 // If we encounter the unlatch code then continue reading because the Codeword triple
\r
563 // is padded with 0's
\r
568 if ((edifactValue & 32) == 0)
\r
570 // no 1 in the leading (6th) bit
\r
571 edifactValue |= 64; // Add a leading 01 to the 6 bit binary value
\r
573 result.Append(edifactValue);
\r
577 while (!unlatch && bits.available() > 0);
\r
580 /// <summary> See ISO 16022:2006, 5.2.9 and Annex B, B.2</summary>
\r
581 private static void decodeBase256Segment(BitSource bits, System.Text.StringBuilder result, System.Collections.ArrayList byteSegments)
\r
583 // Figure out how long the Base 256 Segment is.
\r
584 int d1 = bits.readBits(8);
\r
588 // Read the remainder of the symbol
\r
589 count = bits.available() / 8;
\r
597 count = 250 * (d1 - 249) + bits.readBits(8);
\r
599 sbyte[] bytes = new sbyte[count];
\r
600 for (int i = 0; i < count; i++)
\r
602 bytes[i] = unrandomize255State(bits.readBits(8), i);
\r
604 byteSegments.Add(SupportClass.ToByteArray(bytes));
\r
607 //UPGRADE_TODO: The differences in the Format of parameters for constructor 'java.lang.String.String' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"
\r
608 result.Append(System.Text.Encoding.GetEncoding("ISO8859_1").GetString(SupportClass.ToByteArray(bytes)));
\r
610 catch (System.IO.IOException uee)
\r
612 //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'"
\r
613 throw new System.SystemException("Platform does not support required encoding: " + uee);
\r
617 /// <summary> See ISO 16022:2006, Annex B, B.2</summary>
\r
618 private static sbyte unrandomize255State(int randomizedBase256Codeword, int base256CodewordPosition)
\r
620 int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
\r
621 int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
\r
622 return (sbyte) (tempVariable >= 0?tempVariable:(tempVariable + 256));
\r