core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

   1 /*
   2  * Copyright 2008 ZXing authors
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package com.google.zxing.datamatrix.decoder;
  18
  19 import com.google.zxing.ReaderException;
  20 import com.google.zxing.common.BitSource;
  21
  22 /**
  23  * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
  24  * in one Data Matrix Code. This class decodes the bits back into text.</p>
  25  *
  26  * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
  27  *
  28  * @author bbrown@google.com (Brian Brown)
  29  * @author Sean Owen
  30  */
  31 final class DecodedBitStreamParser {
  32
  33   /**
  34    * See ISO 16022:2006, Annex C Table C.1
  35    * The C40 Basic Character Set (*'s used for placeholders for the shift values)
  36    */
  37   private static final char[] C40_BASIC_SET_CHARS = {
  38       '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  39       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  40       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
  41   };
  42
  43   private static final char[] C40_SHIFT2_SET_CHARS = {
  44     '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
  45     '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
  46         };
  47
  48   /**
  49    * See ISO 16022:2006, Annex C Table C.2
  50    * The Text Basic Character Set (*'s used for placeholders for the shift values)
  51    */
  52   private static final char[] TEXT_BASIC_SET_CHARS = {
  53     '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  54     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  55     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
  56   };
  57
  58   private static final char[] TEXT_SHIFT3_SET_CHARS = {
  59     '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  60     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
  61   };
  62
  63   private static final int PAD_ENCODE = 0;  // Not really an encoding
  64   private static final int ASCII_ENCODE = 1;
  65   private static final int C40_ENCODE = 2;
  66   private static final int TEXT_ENCODE = 3;
  67   private static final int ANSIX12_ENCODE = 4;
  68   private static final int EDIFACT_ENCODE = 5;
  69   private static final int BASE256_ENCODE = 6;
  70
  71   private DecodedBitStreamParser() {
  72   }
  73
  74   static String decode(byte[] bytes) throws ReaderException {
  75     BitSource bits = new BitSource(bytes);
  76     StringBuffer result = new StringBuffer();
  77     StringBuffer resultTrailer = new StringBuffer(0);
  78     int mode = ASCII_ENCODE;
  79     do {
  80       if (mode == ASCII_ENCODE) {
  81         mode = decodeAsciiSegment(bits, result, resultTrailer);
  82       } else {
  83         switch (mode) {
  84           case C40_ENCODE:
  85             decodeC40Segment(bits, result);
  86             break;
  87           case TEXT_ENCODE:
  88             decodeTextSegment(bits, result);
  89             break;
  90           case ANSIX12_ENCODE:
  91             decodeAnsiX12Segment(bits, result);
  92             break;
  93           case EDIFACT_ENCODE:
  94             decodeEdifactSegment(bits, result);
  95             break;
  96           case BASE256_ENCODE:
  97             decodeBase256Segment(bits, result);
  98             break;
  99           default:
 100             throw new ReaderException("Unsupported mode indicator");
 101         }
 102         mode = ASCII_ENCODE;
 103       }
 104     } while (mode != PAD_ENCODE && bits.available() > 0);
 105     if (resultTrailer.length() > 0) {
 106       result.append(resultTrailer);
 107     }
 108     return result.toString();
 109   }
 110
 111   /**
 112    * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
 113    */
 114   private static int decodeAsciiSegment(BitSource bits, StringBuffer result, StringBuffer resultTrailer)
 115       throws ReaderException {
 116     boolean upperShift = false;
 117     do {
 118       int oneByte = bits.readBits(8);
 119       if (oneByte == 0) {
 120                 throw new ReaderException("0 is an invalid ASCII codeword");
 121             } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
 122                 oneByte = upperShift ? (oneByte + 128) : oneByte;
 123                 upperShift = false;
 124                 result.append((char) (oneByte - 1));
 125                 return ASCII_ENCODE;
 126             } else if (oneByte == 129) {  // Pad
 127                 return PAD_ENCODE;
 128             } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
 129               int value = oneByte - 130;
 130               if (value < 10) { // padd with '0' for single digit values
 131                 result.append('0');
 132               }
 133                 result.append(value);
 134             } else if (oneByte == 230) {  // Latch to C40 encodation
 135                 return C40_ENCODE;
 136             } else if (oneByte == 231) {  // Latch to Base 256 encodation
 137                 return BASE256_ENCODE;
 138             } else if (oneByte == 232) {  // FNC1
 139                 throw new ReaderException("Currently not supporting FNC1");
 140             } else if (oneByte == 233) {  // Structured Append
 141                 throw new ReaderException("Currently not supporting Structured Append");
 142             } else if (oneByte == 234) {  // Reader Programming
 143                 throw new ReaderException("Currently not supporting Reader Programming");
 144             } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
 145                 upperShift = true;
 146             } else if (oneByte == 236) {  // 05 Macro
 147         result.append("[)>\u001E05\u001D");
 148         resultTrailer.insert(0, "\u001E\u0004");
 149       } else if (oneByte == 237) {  // 06 Macro
 150                 result.append("[)>\u001E06\u001D");
 151         resultTrailer.insert(0, "\u001E\u0004");
 152             } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
 153                 return ANSIX12_ENCODE;
 154             } else if (oneByte == 239) {  // Latch to Text encodation
 155                 return TEXT_ENCODE;
 156             } else if (oneByte == 240) {  // Latch to EDIFACT encodation
 157                 return EDIFACT_ENCODE;
 158             } else if (oneByte == 241) {  // ECI Character
 159                 // TODO(bbrown): I think we need to support ECI
 160                 throw new ReaderException("Currently not supporting ECI Character");
 161             } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
 162                 throw new ReaderException(oneByte + " should not be used in ASCII encodation");
 163             }
 164     } while (bits.available() > 0);
 165     return ASCII_ENCODE;
 166   }
 167
 168   /**
 169    * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
 170    */
 171   private static void decodeC40Segment(BitSource bits, StringBuffer result) throws ReaderException {
 172     // Three C40 values are encoded in a 16-bit value as
 173     // (1600 * C1) + (40 * C2) + C3 + 1
 174     // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
 175     boolean upperShift = false;
 176
 177     int[] cValues = new int[3];
 178     do {
 179       // If there is only one byte left then it will be encoded as ASCII
 180       if (bits.available() == 8) {
 181         return;
 182       }
 183       int firstByte = bits.readBits(8);
 184       if (firstByte == 254) {  // Unlatch codeword
 185         return;
 186       }
 187
 188       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 189
 190       int shift = 0;
 191       for (int i = 0; i < 3; i++) {
 192         int cValue = cValues[i];
 193         switch (shift) {
 194           case 0:
 195             if (cValue < 3) {
 196               shift = cValue + 1;
 197             } else {
 198               if (upperShift) {
 199                 result.append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
 200                 upperShift = false;
 201               } else {
 202                 result.append(C40_BASIC_SET_CHARS[cValue]);
 203               }
 204             }
 205             break;
 206           case 1:
 207             if (upperShift) {
 208               result.append((char) (cValue + 128));
 209               upperShift = false;
 210             } else {
 211               result.append(cValue);
 212             }
 213             shift = 0;
 214             break;
 215           case 2:
 216             if (cValue < 27) {
 217               if (upperShift) {
 218                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 219                 upperShift = false;
 220               } else {
 221                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 222               }
 223             } else if (cValue == 27) {  // FNC1
 224               throw new ReaderException("Currently not supporting FNC1");
 225             } else if (cValue == 30) {  // Upper Shift
 226               upperShift = true;
 227             } else {
 228               throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
 229             }
 230             shift = 0;
 231             break;
 232           case 3:
 233             if (upperShift) {
 234               result.append((char) (cValue + 224));
 235               upperShift = false;
 236             } else {
 237               result.append((char) (cValue + 96));
 238             }
 239             shift = 0;
 240             break;
 241           default:
 242             throw new ReaderException("Invalid shift value");
 243         }
 244       }
 245     } while (bits.available() > 0);
 246   }
 247
 248   /**
 249    * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
 250    */
 251   private static void decodeTextSegment(BitSource bits, StringBuffer result) throws ReaderException {
 252     // Three Text values are encoded in a 16-bit value as
 253     // (1600 * C1) + (40 * C2) + C3 + 1
 254     // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
 255     boolean upperShift = false;
 256
 257     int[] cValues = new int[3];
 258     do {
 259       // If there is only one byte left then it will be encoded as ASCII
 260       if (bits.available() == 8) {
 261         return;
 262       }
 263       int firstByte = bits.readBits(8);
 264       if (firstByte == 254) {  // Unlatch codeword
 265         return;
 266       }
 267
 268       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 269
 270       int shift = 0;
 271       for (int i = 0; i < 3; i++) {
 272         int cValue = cValues[i];
 273         switch (shift) {
 274           case 0:
 275             if (cValue < 3) {
 276               shift = cValue + 1;
 277             } else {
 278               if (upperShift) {
 279                 result.append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
 280                 upperShift = false;
 281               } else {
 282                 result.append(TEXT_BASIC_SET_CHARS[cValue]);
 283               }
 284             }
 285             break;
 286           case 1:
 287             if (upperShift) {
 288               result.append((char) (cValue + 128));
 289               upperShift = false;
 290             } else {
 291               result.append(cValue);
 292             }
 293             shift = 0;
 294             break;
 295           case 2:
 296             // Shift 2 for Text is the same encoding as C40
 297             if (cValue < 27) {
 298               if (upperShift) {
 299                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 300                 upperShift = false;
 301               } else {
 302                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 303               }
 304             } else if (cValue == 27) {  // FNC1
 305               throw new ReaderException("Currently not supporting FNC1");
 306             } else if (cValue == 30) {  // Upper Shift
 307               upperShift = true;
 308             } else {
 309               throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
 310             }
 311             shift = 0;
 312             break;
 313           case 3:
 314             if (upperShift) {
 315               result.append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
 316               upperShift = false;
 317             } else {
 318               result.append(TEXT_SHIFT3_SET_CHARS[cValue]);
 319             }
 320             shift = 0;
 321             break;
 322           default:
 323             throw new ReaderException("Invalid shift value");
 324         }
 325       }
 326     } while (bits.available() > 0);
 327   }
 328
 329   /**
 330    * See ISO 16022:2006, 5.2.7
 331    */
 332   private static void decodeAnsiX12Segment(BitSource bits, StringBuffer result) throws ReaderException {
 333     // Three ANSI X12 values are encoded in a 16-bit value as
 334     // (1600 * C1) + (40 * C2) + C3 + 1
 335
 336     int[] cValues = new int[3];
 337     do {
 338       // If there is only one byte left then it will be encoded as ASCII
 339       if (bits.available() == 8) {
 340         return;
 341       }
 342       int firstByte = bits.readBits(8);
 343       if (firstByte == 254) {  // Unlatch codeword
 344         return;
 345       }
 346
 347       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 348
 349       for (int i = 0; i < 3; i++) {
 350         int cValue = cValues[i];
 351         if (cValue == 0) {  // X12 segment terminator <CR>
 352           result.append('\r');
 353         } else if (cValue == 1) {  // X12 segment separator *
 354           result.append('*');
 355         } else if (cValue == 2) {  // X12 sub-element separator >
 356           result.append('>');
 357         } else if (cValue == 3) {  // space
 358           result.append(' ');
 359         } else if (cValue < 14) {  // 0 - 9
 360           result.append((char) (cValue + 44));
 361         } else if (cValue < 40) {  // A - Z
 362           result.append((char) (cValue + 51));
 363         } else {
 364           throw new ReaderException(cValue + " is not valid in the ANSI X12 set");
 365         }
 366       }
 367     } while (bits.available() > 0);
 368   }
 369
 370   private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
 371     int fullBitValue = (firstByte << 8) + secondByte - 1;
 372     int temp = fullBitValue / 1600;
 373     result[0] = temp;
 374     fullBitValue -= temp * 1600;
 375     temp = fullBitValue / 40;
 376     result[1] = temp;
 377     result[2] = fullBitValue - temp * 40;
 378   }
 379
 380   /**
 381    * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
 382    */
 383   private static void decodeEdifactSegment(BitSource bits, StringBuffer result) {
 384     boolean unlatch = false;
 385     do {
 386       // If there is only two or less bytes left then it will be encoded as ASCII
 387       if (bits.available() <= 16) {
 388         return;
 389       }
 390
 391       for (int i = 0; i < 4; i++) {
 392         int edifactValue = bits.readBits(6);
 393
 394         // Check for the unlatch character
 395         if (edifactValue == 0x2B67) {  // 011111
 396           unlatch = true;
 397           // If we encounter the unlatch code then continue reading because the Codeword triple
 398           // is padded with 0's
 399         }
 400
 401         if (!unlatch) {
 402           if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit
 403             edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value
 404           }
 405           result.append(edifactValue);
 406         }
 407       }
 408     } while (!unlatch && bits.available() > 0);
 409   }
 410
 411   /**
 412    * See ISO 16022:2006, 5.2.9 and Annex B, B.2
 413    */
 414   private static void decodeBase256Segment(BitSource bits, StringBuffer result) {
 415     // Figure out how long the Base 256 Segment is.
 416     int d1 = bits.readBits(8);
 417     int count;
 418     if (d1 == 0) {  // Read the remainder of the symbol
 419       count = bits.available() / 8;
 420     } else if (d1 < 250) {
 421       count = d1;
 422     } else {
 423       count = 250 * (d1 - 249) + bits.readBits(8);
 424     }
 425     for (int i = 0; i < count; i++) {
 426       result.append(unrandomize255State(bits.readBits(8), count));
 427     }
 428   }
 429
 430   /**
 431    * See ISO 16022:2006, Annex B, B.2
 432    */
 433   private static char unrandomize255State(int randomizedBase256Codeword,
 434                                           int base256CodewordPosition) {
 435     int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
 436     int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
 437     if (tempVariable >= 0) {
 438       return (char) tempVariable;
 439     } else {
 440       return (char) (tempVariable + 256);
 441     }
 442   }
 443
 444 }