Add to result the raw, but parsed, bytes of byte segments in 2D barcodes

[zxing.git] / core / src / com / google / zxing / datamatrix / decoder / DecodedBitStreamParser.java
diff --git a/core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java b/core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java

index c2c42c3..d5b0394 100644 (file)
--- a/core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
+++ b/core/src/com/google/zxing/datamatrix/decoder/DecodedBitStreamParser.java
@@ -18,6 +18,10 @@ package com.google.zxing.datamatrix.decoder;
  
  import com.google.zxing.ReaderException;
  import com.google.zxing.common.BitSource;
+import com.google.zxing.common.DecoderResult;
+
+import java.util.Vector;
+import java.io.UnsupportedEncodingException;
  
  /**
   * <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
@@ -26,6 +30,7 @@ import com.google.zxing.common.BitSource;
   * <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
   *
   * @author bbrown@google.com (Brian Brown)
+ * @author Sean Owen
   */
  final class DecodedBitStreamParser {
  
@@ -56,7 +61,7 @@ final class DecodedBitStreamParser {
    
    private static final char[] TEXT_SHIFT3_SET_CHARS = {
      '\'', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
-    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
+    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', (char) 127
    };
    
    private static final int PAD_ENCODE = 0;  // Not really an encoding
@@ -70,58 +75,67 @@ final class DecodedBitStreamParser {
    private DecodedBitStreamParser() {
    }
  
-  static String decode(byte[] bytes) throws ReaderException {
+  static DecoderResult decode(byte[] bytes) throws ReaderException {
      BitSource bits = new BitSource(bytes);
      StringBuffer result = new StringBuffer();
-    
+    StringBuffer resultTrailer = new StringBuffer(0);
+    Vector byteSegments = new Vector(1);
      int mode = ASCII_ENCODE;
      do {
-      if (mode != PAD_ENCODE) {
-        if (mode == ASCII_ENCODE) {
-          mode = decodeAsciiSegment(bits, result);
-        } else if (mode == C40_ENCODE) {
-          mode = decodeC40Segment(bits, result);
-        } else if (mode == TEXT_ENCODE) {
-          mode = decodeTextSegment(bits, result);
-        } else if (mode == ANSIX12_ENCODE) {
-          mode = decodeAnsiX12Segment(bits, result);
-        } else if (mode == EDIFACT_ENCODE) {
-          mode = decodeEdifactSegment(bits, result);
-        } else if (mode == BASE256_ENCODE) {
-          mode = decodeBase256Segment(bits, result);
-        } else {
-          throw new ReaderException("Unsupported mode indicator");
+      if (mode == ASCII_ENCODE) {
+        mode = decodeAsciiSegment(bits, result, resultTrailer);
+      } else {
+        switch (mode) {
+          case C40_ENCODE:
+            decodeC40Segment(bits, result);
+            break;
+          case TEXT_ENCODE:
+            decodeTextSegment(bits, result);
+            break;
+          case ANSIX12_ENCODE:
+            decodeAnsiX12Segment(bits, result);
+            break;
+          case EDIFACT_ENCODE:
+            decodeEdifactSegment(bits, result);
+            break;
+          case BASE256_ENCODE:
+            decodeBase256Segment(bits, result, byteSegments);
+            break;
+          default:
+            throw new ReaderException("Unsupported mode indicator");
          }
+        mode = ASCII_ENCODE;
        }
      } while (mode != PAD_ENCODE && bits.available() > 0);
-
-    return result.toString();
+    if (resultTrailer.length() > 0) {
+      result.append(resultTrailer);
+    }
+    return new DecoderResult(bytes, result.toString(), byteSegments.isEmpty() ? null : byteSegments);
    }
    
    /**
-  * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
-  */
-  private static int decodeAsciiSegment(BitSource bits,
-                                        StringBuffer result) throws ReaderException {
+   * See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
+   */
+  private static int decodeAsciiSegment(BitSource bits, StringBuffer result, StringBuffer resultTrailer)
+      throws ReaderException {
      boolean upperShift = false;
      do {
-      char oneByte = (char) bits.readBits(8);
-      if (oneByte == '\0') {
-               // TODO(bbrown): I think this would be a bug, not sure
+      int oneByte = bits.readBits(8);
+      if (oneByte == 0) {
                 throw new ReaderException("0 is an invalid ASCII codeword");
             } else if (oneByte <= 128) {  // ASCII data (ASCII value + 1)
-               oneByte = upperShift ? (char) (oneByte + 128) : oneByte;
+               oneByte = upperShift ? (oneByte + 128) : oneByte;
                 upperShift = false;
-               result.append((char)(oneByte - 1));
+               result.append((char) (oneByte - 1));
                 return ASCII_ENCODE;
             } else if (oneByte == 129) {  // Pad
                 return PAD_ENCODE;
             } else if (oneByte <= 229) {  // 2-digit data 00-99 (Numeric Value + 130)
-             // TODO(bbrown): Iassume there is some easier way to do this:
-             if (oneByte - 130 < 10) {
+             int value = oneByte - 130;
+             if (value < 10) { // padd with '0' for single digit values
                 result.append('0');
               }
-               result.append(Integer.toString(oneByte - 130));
+               result.append(value);
             } else if (oneByte == 230) {  // Latch to C40 encodation
                 return C40_ENCODE;
             } else if (oneByte == 231) {  // Latch to Base 256 encodation
@@ -135,9 +149,11 @@ final class DecodedBitStreamParser {
             } else if (oneByte == 235) {  // Upper Shift (shift to Extended ASCII)
                 upperShift = true;
             } else if (oneByte == 236) {  // 05 Macro
-               throw new ReaderException("Currently not supporting 05 Macro");
-           } else if (oneByte == 237) {  // 06 Macro
-               throw new ReaderException("Currently not supporting 06 Macro");
+        result.append("[)>\u001E05\u001D");
+        resultTrailer.insert(0, "\u001E\u0004");
+      } else if (oneByte == 237) {  // 06 Macro
+               result.append("[)>\u001E06\u001D");
+        resultTrailer.insert(0, "\u001E\u0004");
             } else if (oneByte == 238) {  // Latch to ANSI X12 encodation
                 return ANSIX12_ENCODE;
             } else if (oneByte == 239) {  // Latch to Text encodation
@@ -148,252 +164,237 @@ final class DecodedBitStreamParser {
                 // TODO(bbrown): I think we need to support ECI
                 throw new ReaderException("Currently not supporting ECI Character");
             } else if (oneByte >= 242) {  // Not to be used in ASCII encodation
-               throw new ReaderException(Integer.toString(oneByte) + " should not be used in ASCII encodation");
+               throw new ReaderException(oneByte + " should not be used in ASCII encodation");
             }
      } while (bits.available() > 0);
      return ASCII_ENCODE;
    }
  
    /**
-  * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
-  */
-  private static int decodeC40Segment(BitSource bits,
-                                      StringBuffer result) throws ReaderException {
+   * See ISO 16022:2006, 5.2.5 and Annex C, Table C.1
+   */
+  private static void decodeC40Segment(BitSource bits, StringBuffer result) throws ReaderException {
      // Three C40 values are encoded in a 16-bit value as
      // (1600 * C1) + (40 * C2) + C3 + 1
-    int shift = 0;
      // TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
      boolean upperShift = false;
  
+    int[] cValues = new int[3];
      do {
        // If there is only one byte left then it will be encoded as ASCII
        if (bits.available() == 8) {
-        return ASCII_ENCODE;
+        return;
        }
-
-      char firstByte = (char) bits.readBits(8);
-
+      int firstByte = bits.readBits(8);
        if (firstByte == 254) {  // Unlatch codeword
-        return ASCII_ENCODE;
+        return;
        }
  
-      int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
-
-      char[] cValues = new char[3];
-      cValues[0] = (char) (fullBitValue / 1600);
-      fullBitValue -= cValues[0] * 1600;
-      cValues[1] = (char) (fullBitValue / 40);
-      fullBitValue -= cValues[1] * 40;
-      cValues[2] = (char) fullBitValue;
+      parseTwoBytes(firstByte, bits.readBits(8), cValues);
  
+      int shift = 0;
        for (int i = 0; i < 3; i++) {
-        if (shift == 0) {
-          if (cValues[i] == 0) {  // Shift 1
-            shift = 1;
-            continue;
-          } else if (cValues[i] == 1) {  // Shift 2
-            shift = 2;
-            continue;
-          } else if (cValues[i] == 2) {  // Shift 3
-            shift = 3;
-            continue;
-          }
-          if (upperShift) {
-            result.append((char)(C40_BASIC_SET_CHARS[cValues[i]] + 128));
-            upperShift = false;
-          } else {
-            result.append(C40_BASIC_SET_CHARS[cValues[i]]);
-          }
-        } else if (shift == 1) {
-          if (upperShift) {
-            result.append((char) (cValues[i] + 128));
-            upperShift = false;
-          } else {
-            result.append(cValues[i]);
-          }
-        } else if (shift == 2) {
-          if (cValues[i] < 27) {
-            if(upperShift) {
-              result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
+        int cValue = cValues[i];
+        switch (shift) {
+          case 0:
+            if (cValue < 3) {
+              shift = cValue + 1;
+            } else {
+              if (upperShift) {
+                result.append((char) (C40_BASIC_SET_CHARS[cValue] + 128));
+                upperShift = false;
+              } else {
+                result.append(C40_BASIC_SET_CHARS[cValue]);
+              }
+            }
+            break;
+          case 1:
+            if (upperShift) {
+              result.append((char) (cValue + 128));
                upperShift = false;
              } else {
-              result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
+              result.append(cValue);
              }
-          } else if (cValues[i] == 27) {  // FNC1
-            throw new ReaderException("Currently not supporting FNC1");
-          } else if (cValues[i] == 30) {  // Upper Shirt
-            upperShift = true;
-          } else {
-            throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
-          }
-        } else if (shift == 3) {
-          if (upperShift) {
-            result.append((char) (cValues[i] + 224));
-            upperShift = false;
-          } else {
-            result.append((char) (cValues[i] + 96));
-          }
-        } else {
-          throw new ReaderException("Invalid shift value");
+            shift = 0;
+            break;
+          case 2:
+            if (cValue < 27) {
+              if (upperShift) {
+                result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
+                upperShift = false;
+              } else {
+                result.append(C40_SHIFT2_SET_CHARS[cValue]);
+              }
+            } else if (cValue == 27) {  // FNC1
+              throw new ReaderException("Currently not supporting FNC1");
+            } else if (cValue == 30) {  // Upper Shift
+              upperShift = true;
+            } else {
+              throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
+            }
+            shift = 0;
+            break;
+          case 3:
+            if (upperShift) {
+              result.append((char) (cValue + 224));
+              upperShift = false;
+            } else {
+              result.append((char) (cValue + 96));
+            }
+            shift = 0;
+            break;
+          default:
+            throw new ReaderException("Invalid shift value");
          }
        }
      } while (bits.available() > 0);
-    return ASCII_ENCODE;
    }
    
    /**
-  * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
-  */
-  private static int decodeTextSegment(BitSource bits,
-                                       StringBuffer result) throws ReaderException {
+   * See ISO 16022:2006, 5.2.6 and Annex C, Table C.2
+   */
+  private static void decodeTextSegment(BitSource bits, StringBuffer result) throws ReaderException {
      // Three Text values are encoded in a 16-bit value as
      // (1600 * C1) + (40 * C2) + C3 + 1
-    int shift = 0;
      // TODO(bbrown): The Upper Shift with Text doesn't work in the 4 value scenario all the time
      boolean upperShift = false;
  
+    int[] cValues = new int[3];
      do {
        // If there is only one byte left then it will be encoded as ASCII
        if (bits.available() == 8) {
-        return ASCII_ENCODE;
+        return;
        }
-
-      char firstByte = (char) bits.readBits(8);
-
+      int firstByte = bits.readBits(8);
        if (firstByte == 254) {  // Unlatch codeword
-        return ASCII_ENCODE;
+        return;
        }
  
-      int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
-
-      char[] cValues = new char[3];
-      cValues[0] = (char) (fullBitValue / 1600);
-      fullBitValue -= cValues[0] * 1600;
-      cValues[1] = (char) (fullBitValue / 40);
-      fullBitValue -= cValues[1] * 40;
-      cValues[2] = (char) fullBitValue;
+      parseTwoBytes(firstByte, bits.readBits(8), cValues);
  
+      int shift = 0;
        for (int i = 0; i < 3; i++) {
-        if (shift == 0) {
-          if (cValues[i] == 0) {  // Shift 1
-            shift = 1;
-            continue;
-          } else if (cValues[i] == 1) {  // Shift 2
-            shift = 2;
-            continue;
-          } else if (cValues[i] == 2) {  // Shift 3
-            shift = 3;
-            continue;
-          }
-          if (upperShift) {
-            result.append((char)(TEXT_BASIC_SET_CHARS[cValues[i]] + 128));
-            upperShift = false;
-          } else {
-            result.append(TEXT_BASIC_SET_CHARS[cValues[i]]);
-          }
-        } else if (shift == 1) {
-          if (upperShift) {
-            result.append((char) (cValues[i] + 128));
-            upperShift = false;
-          } else {
-            result.append(cValues[i]);
-          }
-        } else if (shift == 2) {
-          // Shift 2 for Text is the same encoding as C40
-          if (cValues[i] < 27) {
-            if(upperShift) {
-              result.append((char)(C40_SHIFT2_SET_CHARS[cValues[i]] + 128));
+        int cValue = cValues[i];
+        switch (shift) {
+          case 0:
+            if (cValue < 3) {
+              shift = cValue + 1;
+            } else {
+              if (upperShift) {
+                result.append((char) (TEXT_BASIC_SET_CHARS[cValue] + 128));
+                upperShift = false;
+              } else {
+                result.append(TEXT_BASIC_SET_CHARS[cValue]);
+              }
+            }
+            break;
+          case 1:
+            if (upperShift) {
+              result.append((char) (cValue + 128));
                upperShift = false;
              } else {
-              result.append(C40_SHIFT2_SET_CHARS[cValues[i]]);
+              result.append(cValue);
              }
-          } else if (cValues[i] == 27) {  // FNC1
-            throw new ReaderException("Currently not supporting FNC1");
-          } else if (cValues[i] == 30) {  // Upper Shirt
-            upperShift = true;
-          } else {
-            throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the C40 Shift 2 set");
-          }
-        } else if (shift == 3) {
-          if (upperShift) {
-            result.append((char)(TEXT_SHIFT3_SET_CHARS[cValues[i]] + 128));
-            upperShift = false;
-          } else {
-            result.append(TEXT_SHIFT3_SET_CHARS[cValues[i]]);
-          }
-        } else {
-          throw new ReaderException("Invalid shift value");
+            shift = 0;
+            break;
+          case 2:
+            // Shift 2 for Text is the same encoding as C40
+            if (cValue < 27) {
+              if (upperShift) {
+                result.append((char) (C40_SHIFT2_SET_CHARS[cValue] + 128));
+                upperShift = false;
+              } else {
+                result.append(C40_SHIFT2_SET_CHARS[cValue]);
+              }
+            } else if (cValue == 27) {  // FNC1
+              throw new ReaderException("Currently not supporting FNC1");
+            } else if (cValue == 30) {  // Upper Shift
+              upperShift = true;
+            } else {
+              throw new ReaderException(cValue + " is not valid in the C40 Shift 2 set");
+            }
+            shift = 0;
+            break;
+          case 3:
+            if (upperShift) {
+              result.append((char) (TEXT_SHIFT3_SET_CHARS[cValue] + 128));
+              upperShift = false;
+            } else {
+              result.append(TEXT_SHIFT3_SET_CHARS[cValue]);
+            }
+            shift = 0;
+            break;
+          default:
+            throw new ReaderException("Invalid shift value");
          }
        }
      } while (bits.available() > 0);
-    return ASCII_ENCODE;
    }
    
    /**
-  * See ISO 16022:2006, 5.2.7
-  */
-  private static int decodeAnsiX12Segment(BitSource bits,
-                                          StringBuffer result) throws ReaderException {
+   * See ISO 16022:2006, 5.2.7
+   */
+  private static void decodeAnsiX12Segment(BitSource bits, StringBuffer result) throws ReaderException {
      // Three ANSI X12 values are encoded in a 16-bit value as
      // (1600 * C1) + (40 * C2) + C3 + 1
  
+    int[] cValues = new int[3];
      do {
        // If there is only one byte left then it will be encoded as ASCII
        if (bits.available() == 8) {
-        return ASCII_ENCODE;
+        return;
        }
-
-      char firstByte = (char) bits.readBits(8);
-
+      int firstByte = bits.readBits(8);
        if (firstByte == 254) {  // Unlatch codeword
-        return ASCII_ENCODE;
+        return;
        }
  
-      int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
-
-      char[] cValues = new char[3];
-      cValues[0] = (char) (fullBitValue / 1600);
-      fullBitValue -= cValues[0] * 1600;
-      cValues[1] = (char) (fullBitValue / 40);
-      fullBitValue -= cValues[1] * 40;
-      cValues[2] = (char) fullBitValue;
+      parseTwoBytes(firstByte, bits.readBits(8), cValues);
  
        for (int i = 0; i < 3; i++) {
-        // TODO(bbrown): These really aren't X12 symbols, we are converting to ASCII chars
-        if (cValues[i] == 0) {  // X12 segment terminator <CR>
-          result.append("<CR>");
-        } else if (cValues[i] == 1) {  // X12 segment separator *
+        int cValue = cValues[i];
+        if (cValue == 0) {  // X12 segment terminator <CR>
+          result.append('\r');
+        } else if (cValue == 1) {  // X12 segment separator *
            result.append('*');
-        } else if (cValues[i] == 2) {  // X12 sub-element separator >
+        } else if (cValue == 2) {  // X12 sub-element separator >
            result.append('>');
-        } else if (cValues[i] == 3) {  // space
+        } else if (cValue == 3) {  // space
            result.append(' ');
-        } else if (cValues[i] < 14) {  // 0 - 9
-          result.append((char) (cValues[i] + 44));
-        } else if (cValues[i] < 40) {  // A - Z
-          result.append((char) (cValues[i] + 51));
+        } else if (cValue < 14) {  // 0 - 9
+          result.append((char) (cValue + 44));
+        } else if (cValue < 40) {  // A - Z
+          result.append((char) (cValue + 51));
          } else {
-          throw new ReaderException(Integer.toString(cValues[i]) + " is not valid in the ANSI X12 set");
+          throw new ReaderException(cValue + " is not valid in the ANSI X12 set");
          }
        }
      } while (bits.available() > 0);
-    
-    return ASCII_ENCODE;
+  }
+
+  private static void parseTwoBytes(int firstByte, int secondByte, int[] result) {
+    int fullBitValue = (firstByte << 8) + secondByte - 1;
+    int temp = fullBitValue / 1600;
+    result[0] = temp;
+    fullBitValue -= temp * 1600;
+    temp = fullBitValue / 40;
+    result[1] = temp;
+    result[2] = fullBitValue - temp * 40;
    }
    
    /**
-  * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
-  */
-  private static int decodeEdifactSegment(BitSource bits, StringBuffer result) {
+   * See ISO 16022:2006, 5.2.8 and Annex C Table C.3
+   */
+  private static void decodeEdifactSegment(BitSource bits, StringBuffer result) {
      boolean unlatch = false;
      do {
        // If there is only two or less bytes left then it will be encoded as ASCII
        if (bits.available() <= 16) {
-        return ASCII_ENCODE;
+        return;
        }
  
        for (int i = 0; i < 4; i++) {
-        char edifactValue = (char) bits.readBits(6);
+        int edifactValue = bits.readBits(6);
  
          // Check for the unlatch character
          if (edifactValue == 0x2B67) {  // 011111
@@ -410,16 +411,14 @@ final class DecodedBitStreamParser {
          }
        }
      } while (!unlatch && bits.available() > 0);
-
-    return ASCII_ENCODE;
    }
    
    /**
-  * See ISO 16022:2006, 5.2.9 and Annex B, B.2
-  */
-  private static int decodeBase256Segment(BitSource bits, StringBuffer result) {
+   * See ISO 16022:2006, 5.2.9 and Annex B, B.2
+   */
+  private static void decodeBase256Segment(BitSource bits, StringBuffer result, Vector byteSegments) {
      // Figure out how long the Base 256 Segment is.
-    char d1 = (char) bits.readBits(8);
+    int d1 = bits.readBits(8);
      int count;
      if (d1 == 0) {  // Read the remainder of the symbol
        count = bits.available() / 8;
@@ -428,25 +427,26 @@ final class DecodedBitStreamParser {
      } else {
        count = 250 * (d1 - 249) + bits.readBits(8);
      }
+    byte[] bytes = new byte[count];
      for (int i = 0; i < count; i++) {
-      result.append(unrandomize255State((char) bits.readBits(8), count));
+      bytes[i] = unrandomize255State(bits.readBits(8), i);
+    }
+    byteSegments.addElement(bytes);
+    try {
+      result.append(new String(bytes, "ISO8859_1"));
+    } catch (UnsupportedEncodingException uee) {
+      throw new RuntimeException("Platform does not support required encoding: " + uee);
      }
-    
-    return ASCII_ENCODE;
    }
    
    /**
-  * See ISO 16022:2006, Annex B, B.2
-  */
-  private static char unrandomize255State(char randomizedBase256Codeword,
+   * See ISO 16022:2006, Annex B, B.2
+   */
+  private static byte unrandomize255State(int randomizedBase256Codeword,
                                            int base256CodewordPosition) {
-    char pseudoRandomNumber = (char) (((149 * base256CodewordPosition) % 255) + 1);
+    int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
      int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
-    if (tempVariable >= 0) {
-      return (char) tempVariable;
-    } else {
-      return (char) (tempVariable + 256);
-    }
+    return (byte) (tempVariable >= 0 ? tempVariable : (tempVariable + 256));
    }
    
  }