csharp/datamatrix/decoder/DecodedBitStreamParser.cs

   1 /*\r
   2  * Copyright 2008 ZXing authors\r
   3  *\r
   4  * Licensed under the Apache License, Version 2.0 (the "License");\r
   5  * you may not use this file except in compliance with the License.\r
   6  * You may obtain a copy of the License at\r
   7  *\r
   8  *      http://www.apache.org/licenses/LICENSE-2.0\r
   9  *\r
  10  * Unless required by applicable law or agreed to in writing, software\r
  11  * distributed under the License is distributed on an "AS IS" BASIS,\r
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
  13  * See the License for the specific language governing permissions and\r
  14  * limitations under the License.\r
  15  */\r
  16 \r
  17 using System;\r
  18 using System.Collections.Generic;\r
  19 using System.Linq;\r
  20 using System.Text;\r
  21 using com.google.zxing.common; \r
  22 \r
  23 namespace com.google.zxing.datamatrix.decoder\r
  24 {\r
  25     /**\r
  26      * <p>Encapsulates a block of data within a Data Matrix Code. Data Matrix Codes may split their data into\r
  27      * multiple blocks, each of which is a unit of data and error-correction codewords. Each\r
  28      * is represented by an instance of this class.</p>\r
  29      *\r
  30      * @author bbrown@google.com (Brian Brown)\r
  31      */\r
  32     public sealed class DecodedBitStreamParser\r
  33     {\r
  34            /**\r
  35            * See ISO 16022:2006, Annex C Table C.1\r
  36            * The C40 Basic Character Set (*'s used for placeholders for the shift values)\r
  37            */\r
  38           private static  char[] C40_BASIC_SET_CHARS = {\r
  39               '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',\r
  40               'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',\r
  41               'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'\r
  42           };\r
  43           \r
  44           private static  char[] C40_SHIFT2_SET_CHARS = {\r
  45             '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',\r
  46             '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'\r
  47                 };\r
  48           \r
  49           /**\r
  50            * See ISO 16022:2006, Annex C Table C.2\r
  51            * The Text Basic Character Set (*'s used for placeholders for the shift values)\r
  52            */\r
  53           private static  char[] TEXT_BASIC_SET_CHARS = {\r
  54             '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',\r
  55             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',\r
  56             'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'\r
  57           };\r
  58           \r
  59           private static  char[] TEXT_SHIFT3_SET_CHARS = {\r
  60             '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',\r
  61             'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127\r
  62           };\r
  63           \r
  64           private const  int PAD_ENCODE = 0;  // Not really an encoding\r
  65           private const int ASCII_ENCODE = 1;\r
  66           private const int C40_ENCODE = 2;\r
  67           private const int TEXT_ENCODE = 3;\r
  68           private const int ANSIX12_ENCODE = 4;\r
  69           private const int EDIFACT_ENCODE = 5;\r
  70           private const int BASE256_ENCODE = 6;\r
  71 \r
  72           private DecodedBitStreamParser() {\r
  73           }\r
  74 \r
  75           public static DecoderResult decode(sbyte[] bytes) {\r
  76             BitSource bits = new BitSource(bytes);\r
  77             StringBuilder result = new StringBuilder();\r
  78             StringBuilder resultTrailer = new StringBuilder(0);\r
  79             System.Collections.ArrayList byteSegments = new System.Collections.ArrayList(1);\r
  80             int mode = ASCII_ENCODE;\r
  81             do {\r
  82               if (mode == ASCII_ENCODE) {\r
  83                 mode = decodeAsciiSegment(bits, result, resultTrailer);\r
  84               } else {\r
  85                 switch (mode) {\r
  86                   case C40_ENCODE:\r
  87                     decodeC40Segment(bits, result);\r
  88                     break;\r
  89                   case TEXT_ENCODE:\r
  90                     decodeTextSegment(bits, result);\r
  91                     break;\r
  92                   case ANSIX12_ENCODE:\r
  93                     decodeAnsiX12Segment(bits, result);\r
  94                     break;\r
  95                   case EDIFACT_ENCODE:\r
  96                     decodeEdifactSegment(bits, result);\r
  97                     break;\r
  98                   case BASE256_ENCODE:\r
  99                     decodeBase256Segment(bits, result, byteSegments);\r
 100                     break;\r
 101                   default:\r
 102                     throw new ReaderException();\r
 103                 }\r
 104                 mode = ASCII_ENCODE;\r
 105               }\r
 106             } while (mode != PAD_ENCODE && bits.available() > 0);\r
 107             if (resultTrailer.Length > 0) {\r
 108               result.Append(resultTrailer);\r
 109             }\r
 110             return new DecoderResult(bytes, result.ToString(), int.Equals(byteSegments.Count,0) ? null : byteSegments);\r
 111           }\r
 112           \r
 113           /**\r
 114            * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2\r
 115            */\r
 116           private static int decodeAsciiSegment(BitSource bits, StringBuilder result, StringBuilder resultTrailer)\r
 117               {\r
 118             bool upperShift = false;\r
 119             do {\r
 120               int oneByte = bits.readBits(8);\r
 121               if (oneByte == 0) {\r
 122                         throw new ReaderException();\r
 123                     } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)\r
 124                         oneByte = upperShift ? (oneByte + 128) : oneByte;\r
 125                         upperShift = false;\r
 126                         result.Append((char) (oneByte - 1));\r
 127                         return ASCII_ENCODE;\r
 128                     } else if (oneByte == 129) {  // Pad\r
 129                         return PAD_ENCODE;\r
 130                     } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)\r
 131                       int value = oneByte - 130;\r
 132                       if (value < 10) { // padd with '0' for single digit values\r
 133                         result.Append('0');\r
 134                       }\r
 135                         result.Append(value);\r
 136                     } else if (oneByte == 230) {  // Latch to C40 encodation\r
 137                         return C40_ENCODE;\r
 138                     } else if (oneByte == 231) {  // Latch to Base 256 encodation\r
 139                         return BASE256_ENCODE;\r
 140                     } else if (oneByte == 232) {  // FNC1\r
 141                         throw new ReaderException();\r
 142                     } else if (oneByte == 233) {  // Structured Append\r
 143                         throw new ReaderException();\r
 144                     } else if (oneByte == 234) {  // Reader Programming\r
 145                         throw new ReaderException();\r
 146                     } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)\r
 147                         upperShift = true;\r
 148                     } else if (oneByte == 236) {  // 05 Macro\r
 149                 result.Append("[)>\u001E05\u001D");\r
 150                 resultTrailer.Insert(0, "\u001E\u0004");\r
 151               } else if (oneByte == 237) {  // 06 Macro\r
 152                         result.Append("[)>\u001E06\u001D");\r
 153                 resultTrailer.Insert(0, "\u001E\u0004");\r
 154                     } else if (oneByte == 238) {  // Latch to ANSI X12 encodation\r
 155                         return ANSIX12_ENCODE;\r
 156                     } else if (oneByte == 239) {  // Latch to Text encodation\r
 157                         return TEXT_ENCODE;\r
 158                     } else if (oneByte == 240) {  // Latch to EDIFACT encodation\r
 159                         return EDIFACT_ENCODE;\r
 160                     } else if (oneByte == 241) {  // ECI Character\r
 161                         // TODO(bbrown): I think we need to support ECI\r
 162                         throw new ReaderException();\r
 163                     } else if (oneByte >= 242) {  // Not to be used in ASCII encodation\r
 164                         throw new ReaderException();\r
 165                     }\r
 166             } while (bits.available() > 0);\r
 167             return ASCII_ENCODE;\r
 168           }\r
 169 \r
 170           /**\r
 171            * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1\r
 172            */\r
 173           private static void decodeC40Segment(BitSource bits, StringBuilder result) {\r
 174             // Three C40 values are encoded in a 16-bit value as\r
 175             // (1600 * C1) + (40 * C2) + C3 + 1\r
 176             // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time\r
 177             bool upperShift = false;\r
 178 \r
 179             int[] cValues = new int[3];\r
 180             do {\r
 181               // If there is only one byte left then it will be encoded as ASCII\r
 182               if (bits.available() == 8) {\r
 183                 return;\r
 184               }\r
 185               int firstByte = bits.readBits(8);\r
 186               if (firstByte == 254) {  // Unlatch codeword\r
 187                 return;\r
 188               }\r
 189 \r
 190               parseTwoBytes(firstByte, bits.readBits(8), cValues);\r
 191 \r
 192               int shift = 0;\r
 193               for (int i = 0; i < 3; i++) {\r
 194                 int cValue = cValues[i];\r
 195                 switch (shift) {\r
 196                   case 0:\r
 197                     if (cValue < 3) {\r
 198                       shift = cValue + 1;\r
 199                     } else {\r
 200                       if (upperShift) {\r
 201                         result.Append((char) (C40_BASIC_SET_CHARS[cValue] + 128));\r
 202                         upperShift = false;\r
 203                       } else {\r
 204                         result.Append(C40_BASIC_SET_CHARS[cValue]);\r
 205                       }\r
 206                     }\r
 207                     break;\r
 208                   case 1:\r
 209                     if (upperShift) {\r
 210                       result.Append((char) (cValue + 128));\r
 211                       upperShift = false;\r
 212                     } else {\r
 213                       result.Append(cValue);\r
 214                     }\r
 215                     shift = 0;\r
 216                     break;\r
 217                   case 2:\r
 218                     if (cValue < 27) {\r
 219                       if (upperShift) {\r
 220                         result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));\r
 221                         upperShift = false;\r
 222                       } else {\r
 223                         result.Append(C40_SHIFT2_SET_CHARS[cValue]);\r
 224                       }\r
 225                     } else if (cValue == 27) {  // FNC1\r
 226                       throw new ReaderException();\r
 227                     } else if (cValue == 30) {  // Upper Shift\r
 228                       upperShift = true;\r
 229                     } else {\r
 230                       throw new ReaderException();\r
 231                     }\r
 232                     shift = 0;\r
 233                     break;\r
 234                   case 3:\r
 235                     if (upperShift) {\r
 236                       result.Append((char) (cValue + 224));\r
 237                       upperShift = false;\r
 238                     } else {\r
 239                       result.Append((char) (cValue + 96));\r
 240                     }\r
 241                     shift = 0;\r
 242                     break;\r
 243                   default:\r
 244                     throw new ReaderException();\r
 245                 }\r
 246               }\r
 247             } while (bits.available() > 0);\r
 248           }\r
 249           \r
 250           /**\r
 251            * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2\r
 252            */\r
 253           private static void decodeTextSegment(BitSource bits, StringBuilder result) {\r
 254             // Three Text values are encoded in a 16-bit value as\r
 255             // (1600 * C1) + (40 * C2) + C3 + 1\r
 256             // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time\r
 257             bool upperShift = false;\r
 258 \r
 259             int[] cValues = new int[3];\r
 260             do {\r
 261               // If there is only one byte left then it will be encoded as ASCII\r
 262               if (bits.available() == 8) {\r
 263                 return;\r
 264               }\r
 265               int firstByte = bits.readBits(8);\r
 266               if (firstByte == 254) {  // Unlatch codeword\r
 267                 return;\r
 268               }\r
 269 \r
 270               parseTwoBytes(firstByte, bits.readBits(8), cValues);\r
 271 \r
 272               int shift = 0;\r
 273               for (int i = 0; i < 3; i++) {\r
 274                 int cValue = cValues[i];\r
 275                 switch (shift) {\r
 276                   case 0:\r
 277                     if (cValue < 3) {\r
 278                       shift = cValue + 1;\r
 279                     } else {\r
 280                       if (upperShift) {\r
 281                         result.Append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));\r
 282                         upperShift = false;\r
 283                       } else {\r
 284                         result.Append(TEXT_BASIC_SET_CHARS[cValue]);\r
 285                       }\r
 286                     }\r
 287                     break;\r
 288                   case 1:\r
 289                     if (upperShift) {\r
 290                       result.Append((char) (cValue + 128));\r
 291                       upperShift = false;\r
 292                     } else {\r
 293                       result.Append(cValue);\r
 294                     }\r
 295                     shift = 0;\r
 296                     break;\r
 297                   case 2:\r
 298                     // Shift 2 for Text is the same encoding as C40\r
 299                     if (cValue < 27) {\r
 300                       if (upperShift) {\r
 301                         result.Append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));\r
 302                         upperShift = false;\r
 303                       } else {\r
 304                         result.Append(C40_SHIFT2_SET_CHARS[cValue]);\r
 305                       }\r
 306                     } else if (cValue == 27) {  // FNC1\r
 307                       throw new ReaderException();\r
 308                     } else if (cValue == 30) {  // Upper Shift\r
 309                       upperShift = true;\r
 310                     } else {\r
 311                       throw new ReaderException();\r
 312                     }\r
 313                     shift = 0;\r
 314                     break;\r
 315                   case 3:\r
 316                     if (upperShift) {\r
 317                       result.Append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));\r
 318                       upperShift = false;\r
 319                     } else {\r
 320                       result.Append(TEXT_SHIFT3_SET_CHARS[cValue]);\r
 321                     }\r
 322                     shift = 0;\r
 323                     break;\r
 324                   default:\r
 325                     throw new ReaderException();\r
 326                 }\r
 327               }\r
 328             } while (bits.available() > 0);\r
 329           }\r
 330           \r
 331           /**\r
 332            * See ISO 16022:2006, 5.2.7\r
 333            */\r
 334           private static void decodeAnsiX12Segment(BitSource bits, StringBuilder result) {\r
 335             // Three ANSI X12 values are encoded in a 16-bit value as\r
 336             // (1600 * C1) + (40 * C2) + C3 + 1\r
 337 \r
 338             int[] cValues = new int[3];\r
 339             do {\r
 340               // If there is only one byte left then it will be encoded as ASCII\r
 341               if (bits.available() == 8) {\r
 342                 return;\r
 343               }\r
 344               int firstByte = bits.readBits(8);\r
 345               if (firstByte == 254) {  // Unlatch codeword\r
 346                 return;\r
 347               }\r
 348 \r
 349               parseTwoBytes(firstByte, bits.readBits(8), cValues);\r
 350 \r
 351               for (int i = 0; i < 3; i++) {\r
 352                 int cValue = cValues[i];\r
 353                 if (cValue == 0) {  // X12 segment terminator <CR>\r
 354                   result.Append('\r');\r
 355                 } else if (cValue == 1) {  // X12 segment separator *\r
 356                   result.Append('*');\r
 357                 } else if (cValue == 2) {  // X12 sub-element separator >\r
 358                   result.Append('>');\r
 359                 } else if (cValue == 3) {  // space\r
 360                   result.Append(' ');\r
 361                 } else if (cValue < 14) {  // 0 - 9\r
 362                   result.Append((char) (cValue + 44));\r
 363                 } else if (cValue < 40) {  // A - Z\r
 364                   result.Append((char) (cValue + 51));\r
 365                 } else {\r
 366                   throw new ReaderException();\r
 367                 }\r
 368               }\r
 369             } while (bits.available() > 0);\r
 370           }\r
 371 \r
 372           private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {\r
 373             int fullBitValue = (firstByte << 8) + secondByte - 1;\r
 374             int temp = fullBitValue / 1600;\r
 375             result[0] = temp;\r
 376             fullBitValue -= temp * 1600;\r
 377             temp = fullBitValue / 40;\r
 378             result[1] = temp;\r
 379             result[2] = fullBitValue - temp * 40;\r
 380           }\r
 381           \r
 382           /**\r
 383            * See ISO 16022:2006, 5.2.8 and Annex C Table C.3\r
 384            */\r
 385           private static void decodeEdifactSegment(BitSource bits, StringBuilder result) {\r
 386             bool unlatch = false;\r
 387             do {\r
 388               // If there is only two or less bytes left then it will be encoded as ASCII\r
 389               if (bits.available() <= 16) {\r
 390                 return;\r
 391               }\r
 392 \r
 393               for (int i = 0; i < 4; i++) {\r
 394                 int edifactValue = bits.readBits(6);\r
 395 \r
 396                 // Check for the unlatch character\r
 397                 if (edifactValue == 0x2B67) {  // 011111\r
 398                   unlatch = true;\r
 399                   // If we encounter the unlatch code then continue reading because the Codeword triple\r
 400                   // is padded with 0's\r
 401                 }\r
 402                 \r
 403                 if (!unlatch) {\r
 404                   if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit\r
 405                     edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value\r
 406                   }\r
 407                   result.Append(edifactValue);\r
 408                 }\r
 409               }\r
 410             } while (!unlatch && bits.available() > 0);\r
 411           }\r
 412           \r
 413           /**\r
 414            * See ISO 16022:2006, 5.2.9 and Annex B, B.2\r
 415            */\r
 416           private static void decodeBase256Segment(BitSource bits, StringBuilder result, System.Collections.ArrayList byteSegments) {\r
 417             // Figure out how long the Base 256 Segment is.\r
 418             int d1 = bits.readBits(8);\r
 419             int count;\r
 420             if (d1 == 0) {  // Read the remainder of the symbol\r
 421               count = bits.available() / 8;\r
 422             } else if (d1 < 250) {\r
 423               count = d1;\r
 424             } else {\r
 425               count = 250 * (d1 - 249) + bits.readBits(8);\r
 426             }\r
 427             byte[] bytes = new byte[count];\r
 428             for (int i = 0; i < count; i++) {\r
 429               bytes[i] = unrandomize255State(bits.readBits(8), i);\r
 430             }\r
 431             byteSegments.Add(bytes);\r
 432             try {\r
 433                 result.Append(System.Text.Encoding.GetEncoding("iso-8859-1").GetString(bytes));\r
 434             } catch (Exception uee) {\r
 435               throw new Exception("Platform does not support required encoding: " + uee);\r
 436             }\r
 437           }\r
 438           \r
 439           /**\r
 440            * See ISO 16022:2006, Annex B, B.2\r
 441            */\r
 442           private static byte unrandomize255State(int randomizedBase256Codeword,\r
 443                                                   int base256CodewordPosition) {\r
 444             int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;\r
 445             int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;\r
 446             return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));\r
 447           }\r
 448     }\r
 449 }\r