core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

   1 /*
   2  * Copyright 2008 ZXing authors
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package com.google.zxing.datamatrix.decoder;
  18
  19 import com.google.zxing.ReaderException;
  20 import com.google.zxing.common.BitSource;
  21 import com.google.zxing.common.DecoderResult;
  22
  23 import java.util.Vector;
  24 import java.io.UnsupportedEncodingException;
  25
  26 /**
  27  * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
  28  * in one Data Matrix Code. This class decodes the bits back into text.</p>
  29  *
  30  * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
  31  *
  32  * @author bbrown@google.com (Brian Brown)
  33  * @author Sean Owen
  34  */
  35 final class DecodedBitStreamParser {
  36
  37   /**
  38    * See ISO 16022:2006, Annex C Table C.1
  39    * The C40 Basic Character Set (*'s used for placeholders for the shift values)
  40    */
  41   private static final char[] C40_BASIC_SET_CHARS = {
  42       '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  43       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  44       'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
  45   };
  46
  47   private static final char[] C40_SHIFT2_SET_CHARS = {
  48     '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
  49     '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_'
  50         };
  51
  52   /**
  53    * See ISO 16022:2006, Annex C Table C.2
  54    * The Text Basic Character Set (*'s used for placeholders for the shift values)
  55    */
  56   private static final char[] TEXT_BASIC_SET_CHARS = {
  57     '*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
  58     'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
  59     'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
  60   };
  61
  62   private static final char[] TEXT_SHIFT3_SET_CHARS = {
  63     '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
  64     'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
  65   };
  66
  67   private static final int PAD_ENCODE = 0;  // Not really an encoding
  68   private static final int ASCII_ENCODE = 1;
  69   private static final int C40_ENCODE = 2;
  70   private static final int TEXT_ENCODE = 3;
  71   private static final int ANSIX12_ENCODE = 4;
  72   private static final int EDIFACT_ENCODE = 5;
  73   private static final int BASE256_ENCODE = 6;
  74
  75   private DecodedBitStreamParser() {
  76   }
  77
  78   static DecoderResult decode(byte[] bytes) throws ReaderException {
  79     BitSource bits = new BitSource(bytes);
  80     StringBuffer result = new StringBuffer();
  81     StringBuffer resultTrailer = new StringBuffer(0);
  82     Vector byteSegments = new Vector(1);
  83     int mode = ASCII_ENCODE;
  84     do {
  85       if (mode == ASCII_ENCODE) {
  86         mode = decodeAsciiSegment(bits, result, resultTrailer);
  87       } else {
  88         switch (mode) {
  89           case C40_ENCODE:
  90             decodeC40Segment(bits, result);
  91             break;
  92           case TEXT_ENCODE:
  93             decodeTextSegment(bits, result);
  94             break;
  95           case ANSIX12_ENCODE:
  96             decodeAnsiX12Segment(bits, result);
  97             break;
  98           case EDIFACT_ENCODE:
  99             decodeEdifactSegment(bits, result);
 100             break;
 101           case BASE256_ENCODE:
 102             decodeBase256Segment(bits, result, byteSegments);
 103             break;
 104           default:
 105             throw new ReaderException("Unsupported mode indicator");
 106         }
 107         mode = ASCII_ENCODE;
 108       }
 109     } while (mode != PAD_ENCODE && bits.available() > 0);
 110     if (resultTrailer.length() > 0) {
 111       result.append(resultTrailer);
 112     }
 113     return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments);
 114   }
 115
 116   /**
 117    * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
 118    */
 119   private static int decodeAsciiSegment(BitSource bits, StringBuffer result, StringBuffer resultTrailer)
 120       throws ReaderException {
 121     boolean upperShift = false;
 122     do {
 123       int oneByte = bits.readBits(8);
 124       if (oneByte == 0) {
 125                 throw new ReaderException("0 is an invalid ASCII codeword");
 126             } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
 127                 oneByte = upperShift ? (oneByte + 128) : oneByte;
 128                 upperShift = false;
 129                 result.append((char) (oneByte - 1));
 130                 return ASCII_ENCODE;
 131             } else if (oneByte == 129) {  // Pad
 132                 return PAD_ENCODE;
 133             } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
 134               int value = oneByte - 130;
 135               if (value < 10) { // padd with '0' for single digit values
 136                 result.append('0');
 137               }
 138                 result.append(value);
 139             } else if (oneByte == 230) {  // Latch to C40 encodation
 140                 return C40_ENCODE;
 141             } else if (oneByte == 231) {  // Latch to Base 256 encodation
 142                 return BASE256_ENCODE;
 143             } else if (oneByte == 232) {  // FNC1
 144                 throw new ReaderException("Currently not supporting FNC1");
 145             } else if (oneByte == 233) {  // Structured Append
 146                 throw new ReaderException("Currently not supporting Structured Append");
 147             } else if (oneByte == 234) {  // Reader Programming
 148                 throw new ReaderException("Currently not supporting Reader Programming");
 149             } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
 150                 upperShift = true;
 151             } else if (oneByte == 236) {  // 05 Macro
 152         result.append("[)>\u001E05\u001D");
 153         resultTrailer.insert(0, "\u001E\u0004");
 154       } else if (oneByte == 237) {  // 06 Macro
 155                 result.append("[)>\u001E06\u001D");
 156         resultTrailer.insert(0, "\u001E\u0004");
 157             } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
 158                 return ANSIX12_ENCODE;
 159             } else if (oneByte == 239) {  // Latch to Text encodation
 160                 return TEXT_ENCODE;
 161             } else if (oneByte == 240) {  // Latch to EDIFACT encodation
 162                 return EDIFACT_ENCODE;
 163             } else if (oneByte == 241) {  // ECI Character
 164                 // TODO(bbrown): I think we need to support ECI
 165                 throw new ReaderException("Currently not supporting ECI Character");
 166             } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
 167                 throw new ReaderException(oneByte + " should not be used in ASCII encodation");
 168             }
 169     } while (bits.available() > 0);
 170     return ASCII_ENCODE;
 171   }
 172
 173   /**
 174    * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
 175    */
 176   private static void decodeC40Segment(BitSource bits, StringBuffer result) throws ReaderException {
 177     // Three C40 values are encoded in a 16-bit value as
 178     // (1600 * C1) + (40 * C2) + C3 + 1
 179     // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
 180     boolean upperShift = false;
 181
 182     int[] cValues = new int[3];
 183     do {
 184       // If there is only one byte left then it will be encoded as ASCII
 185       if (bits.available() == 8) {
 186         return;
 187       }
 188       int firstByte = bits.readBits(8);
 189       if (firstByte == 254) {  // Unlatch codeword
 190         return;
 191       }
 192
 193       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 194
 195       int shift = 0;
 196       for (int i = 0; i < 3; i++) {
 197         int cValue = cValues[i];
 198         switch (shift) {
 199           case 0:
 200             if (cValue < 3) {
 201               shift = cValue + 1;
 202             } else {
 203               if (upperShift) {
 204                 result.append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
 205                 upperShift = false;
 206               } else {
 207                 result.append(C40_BASIC_SET_CHARS[cValue]);
 208               }
 209             }
 210             break;
 211           case 1:
 212             if (upperShift) {
 213               result.append((char) (cValue + 128));
 214               upperShift = false;
 215             } else {
 216               result.append(cValue);
 217             }
 218             shift = 0;
 219             break;
 220           case 2:
 221             if (cValue < 27) {
 222               if (upperShift) {
 223                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 224                 upperShift = false;
 225               } else {
 226                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 227               }
 228             } else if (cValue == 27) {  // FNC1
 229               throw new ReaderException("Currently not supporting FNC1");
 230             } else if (cValue == 30) {  // Upper Shift
 231               upperShift = true;
 232             } else {
 233               throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
 234             }
 235             shift = 0;
 236             break;
 237           case 3:
 238             if (upperShift) {
 239               result.append((char) (cValue + 224));
 240               upperShift = false;
 241             } else {
 242               result.append((char) (cValue + 96));
 243             }
 244             shift = 0;
 245             break;
 246           default:
 247             throw new ReaderException("Invalid shift value");
 248         }
 249       }
 250     } while (bits.available() > 0);
 251   }
 252
 253   /**
 254    * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
 255    */
 256   private static void decodeTextSegment(BitSource bits, StringBuffer result) throws ReaderException {
 257     // Three Text values are encoded in a 16-bit value as
 258     // (1600 * C1) + (40 * C2) + C3 + 1
 259     // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
 260     boolean upperShift = false;
 261
 262     int[] cValues = new int[3];
 263     do {
 264       // If there is only one byte left then it will be encoded as ASCII
 265       if (bits.available() == 8) {
 266         return;
 267       }
 268       int firstByte = bits.readBits(8);
 269       if (firstByte == 254) {  // Unlatch codeword
 270         return;
 271       }
 272
 273       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 274
 275       int shift = 0;
 276       for (int i = 0; i < 3; i++) {
 277         int cValue = cValues[i];
 278         switch (shift) {
 279           case 0:
 280             if (cValue < 3) {
 281               shift = cValue + 1;
 282             } else {
 283               if (upperShift) {
 284                 result.append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
 285                 upperShift = false;
 286               } else {
 287                 result.append(TEXT_BASIC_SET_CHARS[cValue]);
 288               }
 289             }
 290             break;
 291           case 1:
 292             if (upperShift) {
 293               result.append((char) (cValue + 128));
 294               upperShift = false;
 295             } else {
 296               result.append(cValue);
 297             }
 298             shift = 0;
 299             break;
 300           case 2:
 301             // Shift 2 for Text is the same encoding as C40
 302             if (cValue < 27) {
 303               if (upperShift) {
 304                 result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
 305                 upperShift = false;
 306               } else {
 307                 result.append(C40_SHIFT2_SET_CHARS[cValue]);
 308               }
 309             } else if (cValue == 27) {  // FNC1
 310               throw new ReaderException("Currently not supporting FNC1");
 311             } else if (cValue == 30) {  // Upper Shift
 312               upperShift = true;
 313             } else {
 314               throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
 315             }
 316             shift = 0;
 317             break;
 318           case 3:
 319             if (upperShift) {
 320               result.append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
 321               upperShift = false;
 322             } else {
 323               result.append(TEXT_SHIFT3_SET_CHARS[cValue]);
 324             }
 325             shift = 0;
 326             break;
 327           default:
 328             throw new ReaderException("Invalid shift value");
 329         }
 330       }
 331     } while (bits.available() > 0);
 332   }
 333
 334   /**
 335    * See ISO 16022:2006, 5.2.7
 336    */
 337   private static void decodeAnsiX12Segment(BitSource bits, StringBuffer result) throws ReaderException {
 338     // Three ANSI X12 values are encoded in a 16-bit value as
 339     // (1600 * C1) + (40 * C2) + C3 + 1
 340
 341     int[] cValues = new int[3];
 342     do {
 343       // If there is only one byte left then it will be encoded as ASCII
 344       if (bits.available() == 8) {
 345         return;
 346       }
 347       int firstByte = bits.readBits(8);
 348       if (firstByte == 254) {  // Unlatch codeword
 349         return;
 350       }
 351
 352       parseTwoBytes(firstByte, bits.readBits(8), cValues);
 353
 354       for (int i = 0; i < 3; i++) {
 355         int cValue = cValues[i];
 356         if (cValue == 0) {  // X12 segment terminator <CR>
 357           result.append('\r');
 358         } else if (cValue == 1) {  // X12 segment separator *
 359           result.append('*');
 360         } else if (cValue == 2) {  // X12 sub-element separator >
 361           result.append('>');
 362         } else if (cValue == 3) {  // space
 363           result.append(' ');
 364         } else if (cValue < 14) {  // 0 - 9
 365           result.append((char) (cValue + 44));
 366         } else if (cValue < 40) {  // A - Z
 367           result.append((char) (cValue + 51));
 368         } else {
 369           throw new ReaderException(cValue + " is not valid in the ANSI X12 set");
 370         }
 371       }
 372     } while (bits.available() > 0);
 373   }
 374
 375   private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
 376     int fullBitValue = (firstByte << 8) + secondByte - 1;
 377     int temp = fullBitValue / 1600;
 378     result[0] = temp;
 379     fullBitValue -= temp * 1600;
 380     temp = fullBitValue / 40;
 381     result[1] = temp;
 382     result[2] = fullBitValue - temp * 40;
 383   }
 384
 385   /**
 386    * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
 387    */
 388   private static void decodeEdifactSegment(BitSource bits, StringBuffer result) {
 389     boolean unlatch = false;
 390     do {
 391       // If there is only two or less bytes left then it will be encoded as ASCII
 392       if (bits.available() <= 16) {
 393         return;
 394       }
 395
 396       for (int i = 0; i < 4; i++) {
 397         int edifactValue = bits.readBits(6);
 398
 399         // Check for the unlatch character
 400         if (edifactValue == 0x2B67) {  // 011111
 401           unlatch = true;
 402           // If we encounter the unlatch code then continue reading because the Codeword triple
 403           // is padded with 0's
 404         }
 405
 406         if (!unlatch) {
 407           if ((edifactValue & 32) == 0) {  // no 1 in the leading (6th) bit
 408             edifactValue |= 64;  // Add a leading 01 to the 6 bit binary value
 409           }
 410           result.append(edifactValue);
 411         }
 412       }
 413     } while (!unlatch && bits.available() > 0);
 414   }
 415
 416   /**
 417    * See ISO 16022:2006, 5.2.9 and Annex B, B.2
 418    */
 419   private static void decodeBase256Segment(BitSource bits, StringBuffer result, Vector byteSegments) {
 420     // Figure out how long the Base 256 Segment is.
 421     int d1 = bits.readBits(8);
 422     int count;
 423     if (d1 == 0) {  // Read the remainder of the symbol
 424       count = bits.available() / 8;
 425     } else if (d1 < 250) {
 426       count = d1;
 427     } else {
 428       count = 250 * (d1 - 249) + bits.readBits(8);
 429     }
 430     byte[] bytes = new byte[count];
 431     for (int i = 0; i < count; i++) {
 432       bytes[i] = unrandomize255State(bits.readBits(8), i);
 433     }
 434     byteSegments.addElement(bytes);
 435     try {
 436       result.append(new String(bytes, "ISO8859_1"));
 437     } catch (UnsupportedEncodingException uee) {
 438       throw new RuntimeException("Platform does not support required encoding: " + uee);
 439     }
 440   }
 441
 442   /**
 443    * See ISO 16022:2006, Annex B, B.2
 444    */
 445   private static byte unrandomize255State(int randomizedBase256Codeword,
 446                                           int base256CodewordPosition) {
 447     int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
 448     int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
 449     return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));
 450   }
 451
 452 }