core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

   1 /*
   2  * Copyright 2008 ZXing authors
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package com.google.zxing.datamatrix.decoder;
  18
  19 import com.google.zxing.FormatException;
  20 import com.google.zxing.common.BitSource;
  21 import com.google.zxing.common.DecoderResult;
  22
  23 import java.io.UnsupportedEncodingException;
  24 import java.util.Vector;
  25
  26 /**
  27  * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
  28  * in one Data Matrix Code. This class decodes the bits back into text.</p>
  29  *
  30  * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
  31  *
  32  * @author bbrown@google.com (Brian Brown)
  33  * @author Sean Owen
  34  */
  35 final class DecodedBitStreamParser {
  36
  37   /**
  38    * See ISO 16022:2006, Annex C Table C.1
  39    * The C40 Basic Character Set (*'s used for placeholders for the shift values)
  40    */
  41   private static final char[] C40_BASIC_SET_CHARS = {
  42       '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  43       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  44       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
  45   };
  46
  47   private static final char[] C40_SHIFT2_SET_CHARS = {
  48     '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
  49     '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
  50   };
  51
  52   /**
  53    * See ISO 16022:2006, Annex C Table C.2
  54    * The Text Basic Character Set (*'s used for placeholders for the shift values)
  55    */
  56   private static final char[] TEXT_BASIC_SET_CHARS = {
  57     '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  58     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  59     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
  60   };
  61
  62   private static final char[] TEXT_SHIFT3_SET_CHARS = {
  63     '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  64     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
  65   };
  66
  67   private static final int PAD_ENCODE = 0;  // Not really an encoding
  68   private static final int ASCII_ENCODE = 1;
  69   private static final int C40_ENCODE = 2;
  70   private static final int TEXT_ENCODE = 3;
  71   private static final int ANSIX12_ENCODE = 4;
  72   private static final int EDIFACT_ENCODE = 5;
  73   private static final int BASE256_ENCODE = 6;
  74
  75   private DecodedBitStreamParser() {
  76   }
  77
  78   static DecoderResult decode(byte[] bytes) throws FormatException {
  79     BitSource bits = new BitSource(bytes);
  80     StringBuffer result = new StringBuffer(100);
  81     StringBuffer resultTrailer = new StringBuffer(0);
  82     Vector byteSegments = new Vector(1);
  83     int mode = ASCII_ENCODE;
  84     do {
  85       if (mode == ASCII_ENCODE) {
  86         mode = decodeAsciiSegment(bits, result, resultTrailer);
  87       } else {
  88         switch (mode) {
  89           case C40_ENCODE:
  90             decodeC40Segment(bits, result);
  91             break;
  92           case TEXT_ENCODE:
  93             decodeTextSegment(bits, result);
  94             break;
  95           case ANSIX12_ENCODE:
  96             decodeAnsiX12Segment(bits, result);
  97             break;
  98           case EDIFACT_ENCODE:
  99             decodeEdifactSegment(bits, result);
 100             break;
 101           case BASE256_ENCODE:
 102             decodeBase256Segment(bits, result, byteSegments);
 103             break;
 104           default:
 105             throw FormatException.getFormatInstance();
 106         }
 107         mode = ASCII_ENCODE;
 108       }
 109     } while (mode != PAD_ENCODE && bits.available() > 0);
 110     if (resultTrailer.length() > 0) {
 111       result.append(resultTrailer.toString());
 112     }
 113     return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments, null);
 114   }
 115
 116   /**
 117    * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
 118    */
 119   private static int decodeAsciiSegment(BitSource bits, StringBuffer result, StringBuffer resultTrailer)
 120       throws FormatException {
 121     boolean upperShift = false;
 122     do {
 123       int oneByte = bits.readBits(8);
 124       if (oneByte == 0) {
 125         throw FormatException.getFormatInstance();
 126       } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
 127         oneByte = upperShift ? (oneByte + 128) : oneByte;
 128         upperShift = false;
 129         result.append((char) (oneByte - 1));
 130         return ASCII_ENCODE;
 131       } else if (oneByte == 129) {  // Pad
 132         return PAD_ENCODE;
 133       } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
 134         int value = oneByte - 130;
 135         if (value < 10) { // padd with '0' for single digit values
 136           result.append('0');
 137         }
 138         result.append(value);
 139       } else if (oneByte == 230) {  // Latch to C40 encodation
 140         return C40_ENCODE;
 141       } else if (oneByte == 231) {  // Latch to Base 256 encodation
 142         return BASE256_ENCODE;
 143       } else if (oneByte == 232) {  // FNC1
 144         //throw ReaderException.getInstance();
 145         // Ignore this symbol for now
 146       } else if (oneByte == 233) {  // Structured Append
 147         //throw ReaderException.getInstance();
 148         // Ignore this symbol for now
 149       } else if (oneByte == 234) {  // Reader Programming
 150         //throw ReaderException.getInstance();
 151         // Ignore this symbol for now
 152       } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
 153         upperShift = true;
 154       } else if (oneByte == 236) {  // 05 Macro
 155         result.append("[)>\u001E05\u001D");
 156         resultTrailer.insert(0, "\u001E\u0004");
 157       } else if (oneByte == 237) {  // 06 Macro
 158         result.append("[)>\u001E06\u001D");
 159         resultTrailer.insert(0, "\u001E\u0004");
 160       } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
 161         return ANSIX12_ENCODE;
 162       } else if (oneByte == 239) {  // Latch to Text encodation
 163         return TEXT_ENCODE;
 164       } else if (oneByte == 240) {  // Latch to EDIFACT encodation
 165         return EDIFACT_ENCODE;
 166       } else if (oneByte == 241) {  // ECI Character
 167         // TODO(bbrown): I think we need to support ECI
 168         //throw ReaderException.getInstance();
 169         // Ignore this symbol for now
 170       } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
 171         throw FormatException.getFormatInstance();
 172       }
 173     } while (bits.available() > 0);
 174     return ASCII_ENCODE;
 175   }
 176
 177   /**
 178    * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
 179    */
 180   private static void decodeC40Segment(BitSource bits, StringBuffer result) throws FormatException {
 181     // Three C40 values are encoded in a 16-bit value as
 182     // (1600 * C1) + (40 * C2) + C3 + 1
 183     // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
 184     boolean upperShift = false;
 185
 186     int[] cValues = new int[3];
 187     do {
 188       // If there is only one byte left then it will be encoded as ASCII
 189       if (bits.available() == 8) {
 190         return;
 191       }
 192       int firstByte = bits.readBits(8);
 193       if (firstByte == 254) {  // Unlatch codeword
 194         return;
 195       }
 196
 197       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 198
 199       int shift = 0;
 200       for (int i = 0; i < 3; i++) {
 201         int cValue = cValues[i];
 202         switch (shift) {
 203           case 0:
 204             if (cValue < 3) {
 205               shift = cValue + 1;
 206             } else {
 207               if (upperShift) {
 208                 result.append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
 209                 upperShift = false;
 210               } else {
 211                 result.append(C40_BASIC_SET_CHARS[cValue]);
 212               }
 213             }
 214             break;
 215           case 1:
 216             if (upperShift) {
 217               result.append((char) (cValue + 128));
 218               upperShift = false;
 219             } else {
 220               result.append(cValue);
 221             }
 222             shift = 0;
 223             break;
 224           case 2:
 225             if (cValue < 27) {
 226               if (upperShift) {
 227                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 228                 upperShift = false;
 229               } else {
 230                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 231               }
 232             } else if (cValue == 27) {  // FNC1
 233               throw FormatException.getFormatInstance();
 234             } else if (cValue == 30) {  // Upper Shift
 235               upperShift = true;
 236             } else {
 237               throw FormatException.getFormatInstance();
 238             }
 239             shift = 0;
 240             break;
 241           case 3:
 242             if (upperShift) {
 243               result.append((char) (cValue + 224));
 244               upperShift = false;
 245             } else {
 246               result.append((char) (cValue + 96));
 247             }
 248             shift = 0;
 249             break;
 250           default:
 251             throw FormatException.getFormatInstance();
 252         }
 253       }
 254     } while (bits.available() > 0);
 255   }
 256
 257   /**
 258    * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
 259    */
 260   private static void decodeTextSegment(BitSource bits, StringBuffer result) throws FormatException {
 261     // Three Text values are encoded in a 16-bit value as
 262     // (1600 * C1) + (40 * C2) + C3 + 1
 263     // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
 264     boolean upperShift = false;
 265
 266     int[] cValues = new int[3];
 267     do {
 268       // If there is only one byte left then it will be encoded as ASCII
 269       if (bits.available() == 8) {
 270         return;
 271       }
 272       int firstByte = bits.readBits(8);
 273       if (firstByte == 254) {  // Unlatch codeword
 274         return;
 275       }
 276
 277       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 278
 279       int shift = 0;
 280       for (int i = 0; i < 3; i++) {
 281         int cValue = cValues[i];
 282         switch (shift) {
 283           case 0:
 284             if (cValue < 3) {
 285               shift = cValue + 1;
 286             } else {
 287               if (upperShift) {
 288                 result.append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
 289                 upperShift = false;
 290               } else {
 291                 result.append(TEXT_BASIC_SET_CHARS[cValue]);
 292               }
 293             }
 294             break;
 295           case 1:
 296             if (upperShift) {
 297               result.append((char) (cValue + 128));
 298               upperShift = false;
 299             } else {
 300               result.append(cValue);
 301             }
 302             shift = 0;
 303             break;
 304           case 2:
 305             // Shift 2 for Text is the same encoding as C40
 306             if (cValue < 27) {
 307               if (upperShift) {
 308                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 309                 upperShift = false;
 310               } else {
 311                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 312               }
 313             } else if (cValue == 27) {  // FNC1
 314               throw FormatException.getFormatInstance();
 315             } else if (cValue == 30) {  // Upper Shift
 316               upperShift = true;
 317             } else {
 318               throw FormatException.getFormatInstance();
 319             }
 320             shift = 0;
 321             break;
 322           case 3:
 323             if (upperShift) {
 324               result.append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
 325               upperShift = false;
 326             } else {
 327               result.append(TEXT_SHIFT3_SET_CHARS[cValue]);
 328             }
 329             shift = 0;
 330             break;
 331           default:
 332             throw FormatException.getFormatInstance();
 333         }
 334       }
 335     } while (bits.available() > 0);
 336   }
 337
 338   /**
 339    * See ISO 16022:2006, 5.2.7
 340    */
 341   private static void decodeAnsiX12Segment(BitSource bits, StringBuffer result) throws FormatException {
 342     // Three ANSI X12 values are encoded in a 16-bit value as
 343     // (1600 * C1) + (40 * C2) + C3 + 1
 344
 345     int[] cValues = new int[3];
 346     do {
 347       // If there is only one byte left then it will be encoded as ASCII
 348       if (bits.available() == 8) {
 349         return;
 350       }
 351       int firstByte = bits.readBits(8);
 352       if (firstByte == 254) {  // Unlatch codeword
 353         return;
 354       }
 355
 356       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 357
 358       for (int i = 0; i < 3; i++) {
 359         int cValue = cValues[i];
 360         if (cValue == 0) {  // X12 segment terminator <CR>
 361           result.append('\r');
 362         } else if (cValue == 1) {  // X12 segment separator *
 363           result.append('*');
 364         } else if (cValue == 2) {  // X12 sub-element separator >
 365           result.append('>');
 366         } else if (cValue == 3) {  // space
 367           result.append(' ');
 368         } else if (cValue < 14) {  // 0 - 9
 369           result.append((char) (cValue + 44));
 370         } else if (cValue < 40) {  // A - Z
 371           result.append((char) (cValue + 51));
 372         } else {
 373           throw FormatException.getFormatInstance();
 374         }
 375       }
 376     } while (bits.available() > 0);
 377   }
 378
 379   private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
 380     int fullBitValue = (firstByte << 8) + secondByte - 1;
 381     int temp = fullBitValue / 1600;
 382     result[0] = temp;
 383     fullBitValue -= temp * 1600;
 384     temp = fullBitValue / 40;
 385     result[1] = temp;
 386     result[2] = fullBitValue - temp * 40;
 387   }
 388
 389   /**
 390    * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
 391    */
 392   private static void decodeEdifactSegment(BitSource bits, StringBuffer result) {
 393     boolean unlatch = false;
 394     do {
 395       // If there is only two or less bytes left then it will be encoded as ASCII
 396       if (bits.available() <= 16) {
 397         return;
 398       }
 399
 400       for (int i = 0; i < 4; i++) {
 401         int edifactValue = bits.readBits(6);
 402
 403         // Check for the unlatch character
 404         if (edifactValue == 0x2B67) {  // 011111
 405           unlatch = true;
 406           // If we encounter the unlatch code then continue reading because the Codeword triple
 407           // is padded with 0's
 408         }
 409
 410         if (!unlatch) {
 411           if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit
 412             edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value
 413           }
 414           result.append(edifactValue);
 415         }
 416       }
 417     } while (!unlatch && bits.available() > 0);
 418   }
 419
 420   /**
 421    * See ISO 16022:2006, 5.2.9 and Annex B, B.2
 422    */
 423   private static void decodeBase256Segment(BitSource bits, StringBuffer result, Vector byteSegments)
 424       throws FormatException {
 425     // Figure out how long the Base 256 Segment is.
 426     int d1 = bits.readBits(8);
 427     int count;
 428     if (d1 == 0) {  // Read the remainder of the symbol
 429       count = bits.available() / 8;
 430     } else if (d1 < 250) {
 431       count = d1;
 432     } else {
 433       count = 250 * (d1 - 249) + bits.readBits(8);
 434     }
 435     byte[] bytes = new byte[count];
 436     for (int i = 0; i < count; i++) {
 437       // Have seen this particular error in the wild, such as at
 438       // http://www.bcgen.com/demo/IDAutomationStreamingDataMatrix.aspx?MODE=3&D=Fred&PFMT=3&PT=F&X=0.3&O=0&LM=0.2
 439       if (bits.available() < 8) {
 440         throw FormatException.getFormatInstance();
 441       }
 442       bytes[i] = unrandomize255State(bits.readBits(8), i);
 443     }
 444     byteSegments.addElement(bytes);
 445     try {
 446       result.append(new String(bytes, "ISO8859_1"));
 447     } catch (UnsupportedEncodingException uee) {
 448       throw new RuntimeException("Platform does not support required encoding: " + uee);
 449     }
 450   }
 451
 452   /**
 453    * See ISO 16022:2006, Annex B, B.2
 454    */
 455   private static byte unrandomize255State(int randomizedBase256Codeword,
 456                                           int base256CodewordPosition) {
 457     int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
 458     int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
 459     return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));
 460   }
 461
 462 }