core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

   1 /*
   2  * Copyright 2008 Google Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package com.google.zxing.datamatrix.decoder;
  18
  19 import com.google.zxing.ReaderException;
  20 import com.google.zxing.common.BitSource;
  21 import java.io.UnsupportedEncodingException;
  22
  23 /**
  24  * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
  25  * in one Data Matrix Code. This class decodes the bits back into text.</p>
  26  *
  27  * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
  28  *
  29  * @author bbrown@google.com (Brian Brown)
  30  */
  31 final class DecodedBitStreamParser {
  32
  33   /**
  34    * See ISO 16022:2006, Annex C Table C.1
  35    * The C40 Basic Character Set (*'s used for placeholders for the shift values)
  36    */
  37   private static final char[] C40_BASIC_SET_CHARS = new char[]{
  38       '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  39       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  40       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
  41   };
  42
  43   private static final char[] C40_SHIFT2_SET_CHARS = new char[]{
  44     '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
  45     '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
  46         };
  47
  48   /**
  49    * See ISO 16022:2006, Annex C Table C.2
  50    * The Text Basic Character Set (*'s used for placeholders for the shift values)
  51    */
  52   private static final char[] TEXT_BASIC_SET_CHARS = new char[]{
  53     '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  54     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  55     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
  56   };
  57
  58   private static final char[] TEXT_SHIFT3_SET_CHARS = new char[]{
  59     '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  60     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
  61   };
  62
  63   static final int PAD_ENCODE = 0;  // Not really an encoding
  64   static final int ASCII_ENCODE = 1;
  65   static final int C40_ENCODE = 2;
  66   static final int TEXT_ENCODE = 3;
  67   static final int ANSIX12_ENCODE = 4;
  68   static final int EDIFACT_ENCODE = 5;
  69   static final int BASE256_ENCODE = 6;
  70
  71   private DecodedBitStreamParser() {
  72   }
  73
  74   static String decode(byte[] bytes) throws ReaderException {
  75     BitSource bits = new BitSource(bytes);
  76     StringBuffer result = new StringBuffer();
  77
  78     int mode = ASCII_ENCODE;
  79     do {
  80       if (mode != PAD_ENCODE) {
  81         if (mode == ASCII_ENCODE) {
  82           mode = decodeAsciiSegment(bits, result);
  83         } else if (mode == C40_ENCODE) {
  84           mode = decodeC40Segment(bits, result);
  85         } else if (mode == TEXT_ENCODE) {
  86           mode = decodeTextSegment(bits, result);
  87         } else if (mode == ANSIX12_ENCODE) {
  88           mode = decodeAnsiX12Segment(bits, result);
  89         } else if (mode == EDIFACT_ENCODE) {
  90           mode = decodeEdifactSegment(bits, result);
  91         } else if (mode == BASE256_ENCODE) {
  92           mode = decodeBase256Segment(bits, result);
  93         } else {
  94           throw new ReaderException("Unsupported mode indicator");
  95         }
  96       }
  97     } while (mode != PAD_ENCODE && bits.available() > 0);
  98
  99     return result.toString();
 100   }
 101
 102   /**
 103   * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
 104   */
 105   private static int decodeAsciiSegment(BitSource bits,
 106                                         StringBuffer result) throws ReaderException {
 107     char oneByte;
 108     boolean upperShift = false;
 109     int bytesProcessed = 0;
 110     do {
 111             oneByte = (char) bits.readBits(8);
 112             if (oneByte == 0) {
 113                 // TODO(bbrown): I think this would be a bug, not sure
 114                 throw new ReaderException("0 is an invalid ASCII codeword");
 115             } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
 116                 oneByte = upperShift ? (char) (oneByte + 128) : oneByte;
 117                 upperShift = false;
 118                 result.append((char)(oneByte - 1));
 119                 return ASCII_ENCODE;
 120             } else if (oneByte == 129) {  // Pad
 121                 return PAD_ENCODE;
 122             } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
 123               // TODO(bbrown): Iassume there is some easier way to do this:
 124               if (oneByte - 130 < 10) {
 125                 result.append('0');
 126               }
 127                 result.append(Integer.toString(oneByte - 130));
 128             } else if (oneByte == 230) {  // Latch to C40 encodation
 129                 return C40_ENCODE;
 130             } else if (oneByte == 231) {  // Latch to Base 256 encodation
 131                 return BASE256_ENCODE;
 132             } else if (oneByte == 232) {  // FNC1
 133                 throw new ReaderException("Currently not supporting FNC1");
 134             } else if (oneByte == 233) {  // Structured Append
 135                 throw new ReaderException("Currently not supporting Structured Append");
 136             } else if (oneByte == 234) {  // Reader Programming
 137                 throw new ReaderException("Currently not supporting Reader Programming");
 138             } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
 139                 upperShift = true;
 140             } else if (oneByte == 236) {  // 05 Macro
 141                 throw new ReaderException("Currently not supporting 05 Macro");
 142             } else if (oneByte == 237) {  // 06 Macro
 143                 throw new ReaderException("Currently not supporting 06 Macro");
 144             } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
 145                 return ANSIX12_ENCODE;
 146             } else if (oneByte == 239) {  // Latch to Text encodation
 147                 return TEXT_ENCODE;
 148             } else if (oneByte == 240) {  // Latch to EDIFACT encodation
 149                 return EDIFACT_ENCODE;
 150             } else if (oneByte == 241) {  // ECI Character
 151                 // TODO(bbrown): I think we need to support ECI
 152                 throw new ReaderException("Currently not supporting ECI Character");
 153             } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
 154                 throw new ReaderException(Integer.toString(oneByte) + " should not be used in ASCII encodation");
 155             }
 156     } while (bits.available() > 0);
 157     return ASCII_ENCODE;
 158   }
 159
 160   /**
 161   * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
 162   */
 163   private static int decodeC40Segment(BitSource bits,
 164                                       StringBuffer result) throws ReaderException {
 165     // Three C40 values are encoded in a 16-bit value as
 166     // (1600 * C1) + (40 * C2) + C3 + 1
 167     char firstByte;
 168     int shift = 0;
 169     // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
 170     boolean upperShift = false;
 171
 172     do {
 173       // If there is only one byte left then it will be encoded as ASCII
 174       if (bits.available() == 8) {
 175         return ASCII_ENCODE;
 176       }
 177
 178       firstByte = (char) bits.readBits(8);
 179
 180       if (firstByte == 254) {  // Unlatch codeword
 181         return ASCII_ENCODE;
 182       }
 183
 184       int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
 185
 186       char[] CValues = new char[3];
 187       CValues[0] = (char) (fullBitValue / 1600);
 188       fullBitValue -= CValues[0] * 1600;
 189       CValues[1] = (char) (fullBitValue / 40);
 190       fullBitValue -= CValues[1] * 40;
 191       CValues[2] = (char) (fullBitValue);
 192
 193       for (int i = 0; i < 3; i++) {
 194         if (shift == 0) {
 195           if (CValues[i] == 0) {  // Shift 1
 196             shift = 1;
 197             continue;
 198           } else if (CValues[i] == 1) {  // Shift 2
 199             shift = 2;
 200             continue;
 201           } else if (CValues[i] == 2) {  // Shift 3
 202             shift = 3;
 203             continue;
 204           }
 205           if (upperShift) {
 206             result.append((char)(C40_BASIC_SET_CHARS[CValues[i]] + 128));
 207             upperShift = false;
 208           } else {
 209             result.append(C40_BASIC_SET_CHARS[CValues[i]]);
 210           }
 211         } else if (shift == 1) {
 212           if (upperShift) {
 213             result.append((char) (CValues[i] + 128));
 214             upperShift = false;
 215           } else {
 216             result.append((char) CValues[i]);
 217           }
 218         } else if (shift == 2) {
 219           if (CValues[i] < 27) {
 220             if(upperShift) {
 221               result.append((char)(C40_SHIFT2_SET_CHARS[CValues[i]] + 128));
 222               upperShift = false;
 223             } else {
 224               result.append(C40_SHIFT2_SET_CHARS[CValues[i]]);
 225             }
 226           } else if (CValues[i] == 27) {  // FNC1
 227             throw new ReaderException("Currently not supporting FNC1");
 228           } else if (CValues[i] == 30) {  // Upper Shirt
 229             upperShift = true;
 230           } else {
 231             throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the C40 Shift 2 set");
 232           }
 233         } else if (shift == 3) {
 234           if (upperShift) {
 235             result.append((char) (CValues[i] + 224));
 236             upperShift = false;
 237           } else {
 238             result.append((char) CValues[i] + 96);
 239           }
 240         } else {
 241           throw new ReaderException("Invalid shift value");
 242         }
 243       }
 244     } while (bits.available() > 0);
 245     return ASCII_ENCODE;
 246   }
 247
 248   /**
 249   * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
 250   */
 251   private static int decodeTextSegment(BitSource bits,
 252                                        StringBuffer result) throws ReaderException {
 253     // Three Text values are encoded in a 16-bit value as
 254     // (1600 * C1) + (40 * C2) + C3 + 1
 255     char firstByte;
 256     int shift = 0;
 257     // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
 258     boolean upperShift = false;
 259
 260     do {
 261       // If there is only one byte left then it will be encoded as ASCII
 262       if (bits.available() == 8) {
 263         return ASCII_ENCODE;
 264       }
 265
 266       firstByte = (char) bits.readBits(8);
 267
 268       if (firstByte == 254) {  // Unlatch codeword
 269         return ASCII_ENCODE;
 270       }
 271
 272       int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
 273
 274       char[] CValues = new char[3];
 275       CValues[0] = (char) (fullBitValue / 1600);
 276       fullBitValue -= CValues[0] * 1600;
 277       CValues[1] = (char) (fullBitValue / 40);
 278       fullBitValue -= CValues[1] * 40;
 279       CValues[2] = (char) (fullBitValue);
 280
 281       for (int i = 0; i < 3; i++) {
 282         if (shift == 0) {
 283           if (CValues[i] == 0) {  // Shift 1
 284             shift = 1;
 285             continue;
 286           } else if (CValues[i] == 1) {  // Shift 2
 287             shift = 2;
 288             continue;
 289           } else if (CValues[i] == 2) {  // Shift 3
 290             shift = 3;
 291             continue;
 292           }
 293           if (upperShift) {
 294             result.append((char)(TEXT_BASIC_SET_CHARS[CValues[i]] + 128));
 295             upperShift = false;
 296           } else {
 297             result.append(TEXT_BASIC_SET_CHARS[CValues[i]]);
 298           }
 299         } else if (shift == 1) {
 300           if (upperShift) {
 301             result.append((char) (CValues[i] + 128));
 302             upperShift = false;
 303           } else {
 304             result.append((char) CValues[i]);
 305           }
 306         } else if (shift == 2) {
 307           // Shift 2 for Text is the same encoding as C40
 308           if (CValues[i] < 27) {
 309             if(upperShift) {
 310               result.append((char)(C40_SHIFT2_SET_CHARS[CValues[i]] + 128));
 311               upperShift = false;
 312             } else {
 313               result.append(C40_SHIFT2_SET_CHARS[CValues[i]]);
 314             }
 315           } else if (CValues[i] == 27) {  // FNC1
 316             throw new ReaderException("Currently not supporting FNC1");
 317           } else if (CValues[i] == 30) {  // Upper Shirt
 318             upperShift = true;
 319           } else {
 320             throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the C40 Shift 2 set");
 321           }
 322         } else if (shift == 3) {
 323           if (upperShift) {
 324             result.append((char)(TEXT_SHIFT3_SET_CHARS[CValues[i]] + 128));
 325             upperShift = false;
 326           } else {
 327             result.append(TEXT_SHIFT3_SET_CHARS[CValues[i]]);
 328           }
 329         } else {
 330           throw new ReaderException("Invalid shift value");
 331         }
 332       }
 333     } while (bits.available() > 0);
 334     return ASCII_ENCODE;
 335   }
 336
 337   /**
 338   * See ISO 16022:2006, 5.2.7
 339   */
 340   private static int decodeAnsiX12Segment(BitSource bits,
 341                                           StringBuffer result) throws ReaderException {
 342     // Three ANSI X12 values are encoded in a 16-bit value as
 343     // (1600 * C1) + (40 * C2) + C3 + 1
 344     char firstByte;
 345
 346     do {
 347       // If there is only one byte left then it will be encoded as ASCII
 348       if (bits.available() == 8) {
 349         return ASCII_ENCODE;
 350       }
 351
 352       firstByte = (char) bits.readBits(8);
 353
 354       if (firstByte == 254) {  // Unlatch codeword
 355         return ASCII_ENCODE;
 356       }
 357
 358       int fullBitValue = firstByte * 256 + bits.readBits(8) - 1;
 359
 360       char[] CValues = new char[3];
 361       CValues[0] = (char) (fullBitValue / 1600);
 362       fullBitValue -= CValues[0] * 1600;
 363       CValues[1] = (char) (fullBitValue / 40);
 364       fullBitValue -= CValues[1] * 40;
 365       CValues[2] = (char) (fullBitValue);
 366
 367       for (int i = 0; i < 3; i++) {
 368         // TODO(bbrown): These really aren't X12 symbols, we are converting to ASCII chars
 369         if (CValues[i] == 0) {  // X12 segment terminator <CR>
 370           result.append("<CR>");
 371         } else if (CValues[i] == 1) {  // X12 segment separator *
 372           result.append('*');
 373         } else if (CValues[i] == 2) {  // X12 sub-element separator >
 374           result.append('>');
 375         } else if (CValues[i] == 3) {  // space
 376           result.append(' ');
 377         } else if (CValues[i] < 14) {  // 0 - 9
 378           result.append((char) (CValues[i] + 44));
 379         } else if (CValues[i] < 40) {  // A - Z
 380           result.append((char) (CValues[i] + 51));
 381         } else {
 382           throw new ReaderException(Integer.toString(CValues[i]) + " is not valid in the ANSI X12 set");
 383         }
 384       }
 385     } while (bits.available() > 0);
 386
 387     return ASCII_ENCODE;
 388   }
 389
 390   /**
 391   * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
 392   */
 393   private static int decodeEdifactSegment(BitSource bits,
 394                                           StringBuffer result) throws ReaderException {
 395     boolean unlatch = false;
 396     do {
 397       // If there is only two or less bytes left then it will be encoded as ASCII
 398       if (bits.available() <= 16) {
 399         return ASCII_ENCODE;
 400       }
 401
 402       char edifactValue;
 403       for (int i = 0; i < 4; i++) {
 404         edifactValue = (char) bits.readBits(6);
 405
 406         // Check for the unlatch character
 407         if (edifactValue == 0x2B67) {  // 011111
 408           unlatch = true;
 409           // If we encounter the unlatch code then continue reading because the Codeword triple
 410           // is padded with 0's
 411         }
 412
 413         if (!unlatch) {
 414           if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit
 415             edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value
 416           }
 417           result.append((char) edifactValue);
 418         }
 419       }
 420     } while (!unlatch && bits.available() > 0);
 421
 422     return ASCII_ENCODE;
 423   }
 424
 425   /**
 426   * See ISO 16022:2006, 5.2.9 and Annex B, B.2
 427   */
 428   private static int decodeBase256Segment(BitSource bits,
 429                                           StringBuffer result) throws ReaderException {
 430     // Figure out how long the Base 256 Segment is.
 431     char d1 = (char) bits.readBits(8);
 432     int count = 0;
 433     if (d1 == 0) {  // Read the remainder of the symbol
 434       count = bits.available() / 8;
 435     } else if (d1 < 250) {
 436       count = d1;
 437     } else {
 438       count = 250 * (d1 - 249) + bits.readBits(8);
 439     }
 440     char[] readBytes = new char[count];
 441     for (int i = 0; i < count; i++) {
 442       result.append((char)unrandomize255State((char) bits.readBits(8), count));
 443     }
 444
 445     return ASCII_ENCODE;
 446   }
 447
 448   /**
 449   * See ISO 16022:2006, Annex B, B.2
 450   */
 451   private static char unrandomize255State(char randomizedBase256Codeword,
 452                                           int base256CodewordPosition) {
 453     char pseudoRandomNumber = (char) (((149 * base256CodewordPosition) % 255) + 1);
 454     int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
 455     if (tempVariable >= 0) {
 456       return (char) tempVariable;
 457     } else {
 458       return (char) (tempVariable + 256);
 459     }
 460   }
 461
 462 }