core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

   1 /*
   2  * Copyright 2008 Google Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package com.google.zxing.datamatrix.decoder;
  18
  19 import com.google.zxing.ReaderException;
  20 import com.google.zxing.common.BitSource;
  21
  22 /**
  23  * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
  24  * in one Data Matrix Code. This class decodes the bits back into text.</p>
  25  *
  26  * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
  27  *
  28  * @author bbrown@google.com (Brian Brown)
  29  */
  30 final class DecodedBitStreamParser {
  31
  32   /**
  33    * See ISO 16022:2006, Annex C Table C.1
  34    * The C40 Basic Character Set (*'s used for placeholders for the shift values)
  35    */
  36   private static final char[] C40_BASIC_SET_CHARS = new char[]{
  37       '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  38       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  39       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
  40   };
  41
  42   private static final char[] C40_SHIFT2_SET_CHARS = new char[]{
  43     '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
  44     '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
  45         };
  46
  47   /**
  48    * See ISO 16022:2006, Annex C Table C.2
  49    * The Text Basic Character Set (*'s used for placeholders for the shift values)
  50    */
  51   private static final char[] TEXT_BASIC_SET_CHARS = new char[]{
  52     '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  53     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  54     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
  55   };
  56
  57   private static final char[] TEXT_SHIFT3_SET_CHARS = new char[]{
  58     '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  59     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
  60   };
  61
  62   static final int PAD_ENCODE = 0;  // Not really an encoding
  63   static final int ASCII_ENCODE = 1;
  64   static final int C40_ENCODE = 2;
  65   static final int TEXT_ENCODE = 3;
  66   static final int ANSIX12_ENCODE = 4;
  67   static final int EDIFACT_ENCODE = 5;
  68   static final int BASE256_ENCODE = 6;
  69
  70   private DecodedBitStreamParser() {
  71   }
  72
  73   static String decode(byte[] bytes) throws ReaderException {
  74     BitSource bits = new BitSource(bytes);
  75     StringBuffer result = new StringBuffer();
  76
  77     int mode = ASCII_ENCODE;
  78     do {
  79       if (mode != PAD_ENCODE) {
  80         if (mode == ASCII_ENCODE) {
  81           mode = decodeAsciiSegment(bits, result);
  82         } else if (mode == C40_ENCODE) {
  83           mode = decodeC40Segment(bits, result);
  84         } else if (mode == TEXT_ENCODE) {
  85           mode = decodeTextSegment(bits, result);
  86         } else if (mode == ANSIX12_ENCODE) {
  87           mode = decodeAnsiX12Segment(bits, result);
  88         } else if (mode == EDIFACT_ENCODE) {
  89           mode = decodeEdifactSegment(bits, result);
  90         } else if (mode == BASE256_ENCODE) {
  91           mode = decodeBase256Segment(bits, result);
  92         } else {
  93           throw new ReaderException("Unsupported mode indicator");
  94         }
  95       }
  96     } while (mode != PAD_ENCODE && bits.available() > 0);
  97
  98     return result.toString();
  99   }
 100
 101   /**
 102   * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
 103   */
 104   private static int decodeAsciiSegment(BitSource bits,
 105                                         StringBuffer result) throws ReaderException {
 106     boolean upperShift = false;
 107     do {
 108       char oneByte = (char) bits.readBits(8);
 109       if (oneByte == '\0') {
 110                 // TODO(bbrown): I think this would be a bug, not sure
 111                 throw new ReaderException("0 is an invalid ASCII codeword");
 112             } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
 113                 oneByte = upperShift ? (char) (oneByte + 128) : oneByte;
 114                 upperShift = false;
 115                 result.append((char)(oneByte - 1));
 116                 return ASCII_ENCODE;
 117             } else if (oneByte == 129) {  // Pad
 118                 return PAD_ENCODE;
 119             } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
 120               // TODO(bbrown): Iassume there is some easier way to do this:
 121               if (oneByte - 130 < 10) {
 122                 result.append('0');
 123               }
 124                 result.append(Integer.toString(oneByte - 130));
 125             } else if (oneByte == 230) {  // Latch to C40 encodation
 126                 return C40_ENCODE;
 127             } else if (oneByte == 231) {  // Latch to Base 256 encodation
 128                 return BASE256_ENCODE;
 129             } else if (oneByte == 232) {  // FNC1
 130                 throw new ReaderException("Currently not supporting FNC1");
 131             } else if (oneByte == 233) {  // Structured Append
 132                 throw new ReaderException("Currently not supporting Structured Append");
 133             } else if (oneByte == 234) {  // Reader Programming
 134                 throw new ReaderException("Currently not supporting Reader Programming");
 135             } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
 136                 upperShift = true;
 137             } else if (oneByte == 236) {  // 05 Macro
 138                 throw new ReaderException("Currently not supporting 05 Macro");
 139             } else if (oneByte == 237) {  // 06 Macro
 140                 throw new ReaderException("Currently not supporting 06 Macro");
 141             } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
 142                 return ANSIX12_ENCODE;
 143             } else if (oneByte == 239) {  // Latch to Text encodation
 144                 return TEXT_ENCODE;
 145             } else if (oneByte == 240) {  // Latch to EDIFACT encodation
 146                 return EDIFACT_ENCODE;
 147             } else if (oneByte == 241) {  // ECI Character
 148                 // TODO(bbrown): I think we need to support ECI
 149                 throw new ReaderException("Currently not supporting ECI Character");
 150             } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
 151                 throw new ReaderException(Integer.toString(oneByte) + " should not be used in ASCII encodation");
 152             }
 153     } while (bits.available() > 0);
 154     return ASCII_ENCODE;
 155   }
 156
 157   /**
 158   * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
 159   */
 160   private static int decodeC40Segment(BitSource bits,
 161                                       StringBuffer result) throws ReaderException {
 162     // Three C40 values are encoded in a 16-bit value as
 163     // (1600 * C1) + (40 * C2) + C3 + 1
 164     int shift = 0;
 165     // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
 166     boolean upperShift = false;
 167
 168     do {
 169       // If there is only one byte left then it will be encoded as ASCII
 170       if (bits.available() == 8) {
 171         return ASCII_ENCODE;
 172       }
 173
 174       char firstByte = (char) bits.readBits(8);
 175
 176       if (firstByte == 254) {  // Unlatch codeword
 177         return ASCII_ENCODE;
 178       }
 179
 180       int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
 181
 182       char[] cValues = new char[3];
 183       cValues[0] = (char) (fullBitValue / 1600);
 184       fullBitValue -= cValues[0] * 1600;
 185       cValues[1] = (char) (fullBitValue / 40);
 186       fullBitValue -= cValues[1] * 40;
 187       cValues[2] = (char) fullBitValue;
 188
 189       for (int i = 0; i < 3; i++) {
 190         if (shift == 0) {
 191           if (cValues[i] == 0) {  // Shift 1
 192             shift = 1;
 193             continue;
 194           } else if (cValues[i] == 1) {  // Shift 2
 195             shift = 2;
 196             continue;
 197           } else if (cValues[i] == 2) {  // Shift 3
 198             shift = 3;
 199             continue;
 200           }
 201           if (upperShift) {
 202             result.append((char)(C40_BASIC_SET_CHARS[cValues[i]] + 128));
 203             upperShift = false;
 204           } else {
 205             result.append(C40_BASIC_SET_CHARS[cValues[i]]);
 206           }
 207         } else if (shift == 1) {
 208           if (upperShift) {
 209             result.append((char) (cValues[i] + 128));
 210             upperShift = false;
 211           } else {
 212             result.append(cValues[i]);
 213           }
 214         } else if (shift == 2) {
 215           if (cValues[i] < 27) {
 216             if(upperShift) {
 217               result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
 218               upperShift = false;
 219             } else {
 220               result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
 221             }
 222           } else if (cValues[i] == 27) {  // FNC1
 223             throw new ReaderException("Currently not supporting FNC1");
 224           } else if (cValues[i] == 30) {  // Upper Shirt
 225             upperShift = true;
 226           } else {
 227             throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
 228           }
 229         } else if (shift == 3) {
 230           if (upperShift) {
 231             result.append((char) (cValues[i] + 224));
 232             upperShift = false;
 233           } else {
 234             result.append((char) cValues[i] + 96);
 235           }
 236         } else {
 237           throw new ReaderException("Invalid shift value");
 238         }
 239       }
 240     } while (bits.available() > 0);
 241     return ASCII_ENCODE;
 242   }
 243
 244   /**
 245   * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
 246   */
 247   private static int decodeTextSegment(BitSource bits,
 248                                        StringBuffer result) throws ReaderException {
 249     // Three Text values are encoded in a 16-bit value as
 250     // (1600 * C1) + (40 * C2) + C3 + 1
 251     int shift = 0;
 252     // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
 253     boolean upperShift = false;
 254
 255     do {
 256       // If there is only one byte left then it will be encoded as ASCII
 257       if (bits.available() == 8) {
 258         return ASCII_ENCODE;
 259       }
 260
 261       char firstByte = (char) bits.readBits(8);
 262
 263       if (firstByte == 254) {  // Unlatch codeword
 264         return ASCII_ENCODE;
 265       }
 266
 267       int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
 268
 269       char[] cValues = new char[3];
 270       cValues[0] = (char) (fullBitValue / 1600);
 271       fullBitValue -= cValues[0] * 1600;
 272       cValues[1] = (char) (fullBitValue / 40);
 273       fullBitValue -= cValues[1] * 40;
 274       cValues[2] = (char) fullBitValue;
 275
 276       for (int i = 0; i < 3; i++) {
 277         if (shift == 0) {
 278           if (cValues[i] == 0) {  // Shift 1
 279             shift = 1;
 280             continue;
 281           } else if (cValues[i] == 1) {  // Shift 2
 282             shift = 2;
 283             continue;
 284           } else if (cValues[i] == 2) {  // Shift 3
 285             shift = 3;
 286             continue;
 287           }
 288           if (upperShift) {
 289             result.append((char)(TEXT_BASIC_SET_CHARS[cValues[i]] + 128));
 290             upperShift = false;
 291           } else {
 292             result.append(TEXT_BASIC_SET_CHARS[cValues[i]]);
 293           }
 294         } else if (shift == 1) {
 295           if (upperShift) {
 296             result.append((char) (cValues[i] + 128));
 297             upperShift = false;
 298           } else {
 299             result.append((char) cValues[i]);
 300           }
 301         } else if (shift == 2) {
 302           // Shift 2 for Text is the same encoding as C40
 303           if (cValues[i] < 27) {
 304             if(upperShift) {
 305               result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
 306               upperShift = false;
 307             } else {
 308               result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
 309             }
 310           } else if (cValues[i] == 27) {  // FNC1
 311             throw new ReaderException("Currently not supporting FNC1");
 312           } else if (cValues[i] == 30) {  // Upper Shirt
 313             upperShift = true;
 314           } else {
 315             throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
 316           }
 317         } else if (shift == 3) {
 318           if (upperShift) {
 319             result.append((char)(TEXT_SHIFT3_SET_CHARS[cValues[i]] + 128));
 320             upperShift = false;
 321           } else {
 322             result.append(TEXT_SHIFT3_SET_CHARS[cValues[i]]);
 323           }
 324         } else {
 325           throw new ReaderException("Invalid shift value");
 326         }
 327       }
 328     } while (bits.available() > 0);
 329     return ASCII_ENCODE;
 330   }
 331
 332   /**
 333   * See ISO 16022:2006, 5.2.7
 334   */
 335   private static int decodeAnsiX12Segment(BitSource bits,
 336                                           StringBuffer result) throws ReaderException {
 337     // Three ANSI X12 values are encoded in a 16-bit value as
 338     // (1600 * C1) + (40 * C2) + C3 + 1
 339
 340     do {
 341       // If there is only one byte left then it will be encoded as ASCII
 342       if (bits.available() == 8) {
 343         return ASCII_ENCODE;
 344       }
 345
 346       char firstByte = (char) bits.readBits(8);
 347
 348       if (firstByte == 254) {  // Unlatch codeword
 349         return ASCII_ENCODE;
 350       }
 351
 352       int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
 353
 354       char[] cValues = new char[3];
 355       cValues[0] = (char) (fullBitValue / 1600);
 356       fullBitValue -= cValues[0] * 1600;
 357       cValues[1] = (char) (fullBitValue / 40);
 358       fullBitValue -= cValues[1] * 40;
 359       cValues[2] = (char) fullBitValue;
 360
 361       for (int i = 0; i < 3; i++) {
 362         // TODO(bbrown): These really aren't X12 symbols, we are converting to ASCII chars
 363         if (cValues[i] == 0) {  // X12 segment terminator <CR>
 364           result.append("<CR>");
 365         } else if (cValues[i] == 1) {  // X12 segment separator *
 366           result.append('*');
 367         } else if (cValues[i] == 2) {  // X12 sub-element separator >
 368           result.append('>');
 369         } else if (cValues[i] == 3) {  // space
 370           result.append(' ');
 371         } else if (cValues[i] < 14) {  // 0 - 9
 372           result.append((char) (cValues[i] + 44));
 373         } else if (cValues[i] < 40) {  // A - Z
 374           result.append((char) (cValues[i] + 51));
 375         } else {
 376           throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the ANSI X12 set");
 377         }
 378       }
 379     } while (bits.available() > 0);
 380
 381     return ASCII_ENCODE;
 382   }
 383
 384   /**
 385   * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
 386   */
 387   private static int decodeEdifactSegment(BitSource bits,
 388                                           StringBuffer result) throws ReaderException {
 389     boolean unlatch = false;
 390     do {
 391       // If there is only two or less bytes left then it will be encoded as ASCII
 392       if (bits.available() <= 16) {
 393         return ASCII_ENCODE;
 394       }
 395
 396       for (int i = 0; i < 4; i++) {
 397         char edifactValue = (char) bits.readBits(6);
 398
 399         // Check for the unlatch character
 400         if (edifactValue == 0x2B67) {  // 011111
 401           unlatch = true;
 402           // If we encounter the unlatch code then continue reading because the Codeword triple
 403           // is padded with 0's
 404         }
 405
 406         if (!unlatch) {
 407           if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit
 408             edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value
 409           }
 410           result.append((char) edifactValue);
 411         }
 412       }
 413     } while (!unlatch && bits.available() > 0);
 414
 415     return ASCII_ENCODE;
 416   }
 417
 418   /**
 419   * See ISO 16022:2006, 5.2.9 and Annex B, B.2
 420   */
 421   private static int decodeBase256Segment(BitSource bits,
 422                                           StringBuffer result) throws ReaderException {
 423     // Figure out how long the Base 256 Segment is.
 424     char d1 = (char) bits.readBits(8);
 425     int count;
 426     if (d1 == 0) {  // Read the remainder of the symbol
 427       count = bits.available() / 8;
 428     } else if (d1 < 250) {
 429       count = d1;
 430     } else {
 431       count = 250 * (d1 - 249) + bits.readBits(8);
 432     }
 433     char[] readBytes = new char[count];
 434     for (int i = 0; i < count; i++) {
 435       result.append(unrandomize255State((char) bits.readBits(8), count));
 436     }
 437
 438     return ASCII_ENCODE;
 439   }
 440
 441   /**
 442   * See ISO 16022:2006, Annex B, B.2
 443   */
 444   private static char unrandomize255State(char randomizedBase256Codeword,
 445                                           int base256CodewordPosition) {
 446     char pseudoRandomNumber = (char) (((149 * base256CodewordPosition) % 255) + 1);
 447     int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
 448     if (tempVariable >= 0) {
 449       return (char) tempVariable;
 450     } else {
 451       return (char) (tempVariable + 256);
 452     }
 453   }
 454
 455 }