Go ahead and enable Code 93 in android
[zxing.git] / csharp / qrcode / decoder / DecodedBitStreamParser.cs
1 /*\r
2 * Copyright 2007 ZXing authors\r
3 *\r
4 * Licensed under the Apache License, Version 2.0 (the "License");\r
5 * you may not use this file except in compliance with the License.\r
6 * You may obtain a copy of the License at\r
7 *\r
8 *      http://www.apache.org/licenses/LICENSE-2.0\r
9 *\r
10 * Unless required by applicable law or agreed to in writing, software\r
11 * distributed under the License is distributed on an "AS IS" BASIS,\r
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
13 * See the License for the specific language governing permissions and\r
14 * limitations under the License.\r
15 */\r
16 using System;\r
17 using ReaderException = com.google.zxing.ReaderException;\r
18 using BitSource = com.google.zxing.common.BitSource;\r
19 using CharacterSetECI = com.google.zxing.common.CharacterSetECI;\r
20 using DecoderResult = com.google.zxing.common.DecoderResult;\r
21 namespace com.google.zxing.qrcode.decoder\r
22 {\r
23         \r
24         /// <summary> <p>QR Codes can encode text as bits in one of several modes, and can use multiple modes\r
25         /// in one QR Code. This class decodes the bits back into text.</p>\r
26         /// \r
27         /// <p>See ISO 18004:2006, 6.4.3 - 6.4.7</p>\r
28         /// \r
29         /// </summary>\r
30         /// <author>  Sean Owen\r
31         /// </author>\r
32         /// <author>www.Redivivus.in (suraj.supekar@redivivus.in) - Ported from ZXING Java Source \r
33         /// </author>\r
34         sealed class DecodedBitStreamParser\r
35         {\r
36                 \r
37                 /// <summary> See ISO 18004:2006, 6.4.4 Table 5</summary>\r
38                 //UPGRADE_NOTE: Final was removed from the declaration of 'ALPHANUMERIC_CHARS'. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1003'"\r
39                 private static readonly char[] ALPHANUMERIC_CHARS = new char[]{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ', '$', '%', '*', '+', '-', '.', '/', ':'};\r
40                 private const System.String SHIFT_JIS = "SJIS";\r
41                 private const System.String EUC_JP = "EUC_JP";\r
42                 private static bool ASSUME_SHIFT_JIS;\r
43                 private const System.String UTF8 = "UTF8";\r
44         // Redivivus.in Java to c# Porting update\r
45         // 30/01/2010 \r
46         // Commented & Added        \r
47         private const System.String ISO88591 = "ISO-8859-1";\r
48                 \r
49                 private DecodedBitStreamParser()\r
50                 {\r
51                 }\r
52                 \r
53                 internal static DecoderResult decode(sbyte[] bytes, Version version, ErrorCorrectionLevel ecLevel)\r
54                 {\r
55                         BitSource bits = new BitSource(bytes);\r
56                         System.Text.StringBuilder result = new System.Text.StringBuilder(50);\r
57                         CharacterSetECI currentCharacterSetECI = null;\r
58                         bool fc1InEffect = false;\r
59                         System.Collections.ArrayList byteSegments = System.Collections.ArrayList.Synchronized(new System.Collections.ArrayList(1));\r
60                         Mode mode;\r
61                         do \r
62                         {\r
63                                 // While still another segment to read...\r
64                                 if (bits.available() < 4)\r
65                                 {\r
66                                         // OK, assume we're done. Really, a TERMINATOR mode should have been recorded here\r
67                                         mode = Mode.TERMINATOR;\r
68                                 }\r
69                                 else\r
70                                 {\r
71                                         try\r
72                                         {\r
73                                                 mode = Mode.forBits(bits.readBits(4)); // mode is encoded by 4 bits\r
74                                         }\r
75                                         catch (System.ArgumentException iae)\r
76                                         {\r
77                                                 throw ReaderException.Instance;\r
78                                         }\r
79                                 }\r
80                                 if (!mode.Equals(Mode.TERMINATOR))\r
81                                 {\r
82                                         if (mode.Equals(Mode.FNC1_FIRST_POSITION) || mode.Equals(Mode.FNC1_SECOND_POSITION))\r
83                                         {\r
84                                                 // We do little with FNC1 except alter the parsed result a bit according to the spec\r
85                                                 fc1InEffect = true;\r
86                                         }\r
87                                         else if (mode.Equals(Mode.STRUCTURED_APPEND))\r
88                                         {\r
89                                                 // not really supported; all we do is ignore it\r
90                                                 // Read next 8 bits (symbol sequence #) and 8 bits (parity data), then continue\r
91                                                 bits.readBits(16);\r
92                                         }\r
93                                         else if (mode.Equals(Mode.ECI))\r
94                                         {\r
95                                                 // Count doesn't apply to ECI\r
96                                                 int value_Renamed = parseECIValue(bits);\r
97                                                 currentCharacterSetECI = CharacterSetECI.getCharacterSetECIByValue(value_Renamed);\r
98                                                 if (currentCharacterSetECI == null)\r
99                                                 {\r
100                                                         throw ReaderException.Instance;\r
101                                                 }\r
102                                         }\r
103                                         else\r
104                                         {\r
105                                                 // How many characters will follow, encoded in this mode?\r
106                                                 int count = bits.readBits(mode.getCharacterCountBits(version));\r
107                                                 if (mode.Equals(Mode.NUMERIC))\r
108                                                 {\r
109                                                         decodeNumericSegment(bits, result, count);\r
110                                                 }\r
111                                                 else if (mode.Equals(Mode.ALPHANUMERIC))\r
112                                                 {\r
113                                                         decodeAlphanumericSegment(bits, result, count, fc1InEffect);\r
114                                                 }\r
115                                                 else if (mode.Equals(Mode.BYTE))\r
116                                                 {\r
117                                                         decodeByteSegment(bits, result, count, currentCharacterSetECI, byteSegments);\r
118                                                 }\r
119                                                 else if (mode.Equals(Mode.KANJI))\r
120                                                 {\r
121                                                         decodeKanjiSegment(bits, result, count);\r
122                                                 }\r
123                                                 else\r
124                                                 {\r
125                                                         throw ReaderException.Instance;\r
126                                                 }\r
127                                         }\r
128                                 }\r
129                         }\r
130                         while (!mode.Equals(Mode.TERMINATOR));\r
131                         \r
132                         return new DecoderResult(bytes, result.ToString(), (byteSegments.Count == 0)?null:byteSegments, ecLevel);\r
133                 }\r
134                 \r
135                 private static void  decodeKanjiSegment(BitSource bits, System.Text.StringBuilder result, int count)\r
136                 {\r
137                         // Each character will require 2 bytes. Read the characters as 2-byte pairs\r
138                         // and decode as Shift_JIS afterwards\r
139                         sbyte[] buffer = new sbyte[2 * count];\r
140                         int offset = 0;\r
141                         while (count > 0)\r
142                         {\r
143                                 // Each 13 bits encodes a 2-byte character\r
144                                 int twoBytes = bits.readBits(13);\r
145                                 int assembledTwoBytes = ((twoBytes / 0x0C0) << 8) | (twoBytes % 0x0C0);\r
146                                 if (assembledTwoBytes < 0x01F00)\r
147                                 {\r
148                                         // In the 0x8140 to 0x9FFC range\r
149                                         assembledTwoBytes += 0x08140;\r
150                                 }\r
151                                 else\r
152                                 {\r
153                                         // In the 0xE040 to 0xEBBF range\r
154                                         assembledTwoBytes += 0x0C140;\r
155                                 }\r
156                                 buffer[offset] = (sbyte) (assembledTwoBytes >> 8);\r
157                                 buffer[offset + 1] = (sbyte) assembledTwoBytes;\r
158                                 offset += 2;\r
159                                 count--;\r
160                         }\r
161                         // Shift_JIS may not be supported in some environments:\r
162                         try\r
163                         {\r
164                                 //UPGRADE_TODO: The differences in the Format  of parameters for constructor 'java.lang.String.String'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"\r
165                                 result.Append(System.Text.Encoding.GetEncoding(SHIFT_JIS).GetString(SupportClass.ToByteArray(buffer)));\r
166                         }\r
167                         catch (System.IO.IOException uee)\r
168                         {\r
169                                 throw ReaderException.Instance;\r
170                         }\r
171                 }\r
172                 \r
173                 private static void  decodeByteSegment(BitSource bits, System.Text.StringBuilder result, int count, CharacterSetECI currentCharacterSetECI, System.Collections.ArrayList byteSegments)\r
174                 {\r
175                         sbyte[] readBytes = new sbyte[count];\r
176                         if (count << 3 > bits.available())\r
177                         {\r
178                                 throw ReaderException.Instance;\r
179                         }\r
180                         for (int i = 0; i < count; i++)\r
181                         {\r
182                                 readBytes[i] = (sbyte) bits.readBits(8);\r
183                         }\r
184                         System.String encoding;\r
185                         if (currentCharacterSetECI == null)\r
186                         {\r
187                                 // The spec isn't clear on this mode; see\r
188                                 // section 6.4.5: t does not say which encoding to assuming\r
189                                 // upon decoding. I have seen ISO-8859-1 used as well as\r
190                                 // Shift_JIS -- without anything like an ECI designator to\r
191                                 // give a hint.\r
192                                 encoding = guessEncoding(readBytes);\r
193                         }\r
194                         else\r
195                         {\r
196                                 encoding = currentCharacterSetECI.EncodingName;\r
197                         }\r
198                         try\r
199                         {\r
200                                 //UPGRADE_TODO: The differences in the Format  of parameters for constructor 'java.lang.String.String'  may cause compilation errors.  "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'"\r
201                                 result.Append(System.Text.Encoding.GetEncoding(encoding).GetString(SupportClass.ToByteArray(readBytes)));\r
202                         }\r
203                         catch (System.IO.IOException uce)\r
204                         {\r
205                                 throw ReaderException.Instance;\r
206                         }\r
207                         byteSegments.Add(SupportClass.ToByteArray(readBytes));\r
208                 }\r
209                 \r
210                 private static void  decodeAlphanumericSegment(BitSource bits, System.Text.StringBuilder result, int count, bool fc1InEffect)\r
211                 {\r
212                         // Read two characters at a time\r
213                         int start = result.Length;\r
214                         while (count > 1)\r
215                         {\r
216                                 int nextTwoCharsBits = bits.readBits(11);\r
217                                 result.Append(ALPHANUMERIC_CHARS[nextTwoCharsBits / 45]);\r
218                                 result.Append(ALPHANUMERIC_CHARS[nextTwoCharsBits % 45]);\r
219                                 count -= 2;\r
220                         }\r
221                         if (count == 1)\r
222                         {\r
223                                 // special case: one character left\r
224                                 result.Append(ALPHANUMERIC_CHARS[bits.readBits(6)]);\r
225                         }\r
226                         // See section 6.4.8.1, 6.4.8.2\r
227                         if (fc1InEffect)\r
228                         {\r
229                                 // We need to massage the result a bit if in an FNC1 mode:\r
230                                 for (int i = start; i < result.Length; i++)\r
231                                 {\r
232                                         if (result[i] == '%')\r
233                                         {\r
234                                                 if (i < result.Length - 1 && result[i + 1] == '%')\r
235                                                 {\r
236                                                         // %% is rendered as %\r
237                                                         result.Remove(i + 1, 1);\r
238                                                 }\r
239                                                 else\r
240                                                 {\r
241                                                         // In alpha mode, % should be converted to FNC1 separator 0x1D\r
242                                                         result[i] = (char) 0x1D;\r
243                                                 }\r
244                                         }\r
245                                 }\r
246                         }\r
247                 }\r
248                 \r
249                 private static void  decodeNumericSegment(BitSource bits, System.Text.StringBuilder result, int count)\r
250                 {\r
251                         // Read three digits at a time\r
252                         while (count >= 3)\r
253                         {\r
254                                 // Each 10 bits encodes three digits\r
255                                 int threeDigitsBits = bits.readBits(10);\r
256                                 if (threeDigitsBits >= 1000)\r
257                                 {\r
258                                         throw ReaderException.Instance;\r
259                                 }\r
260                                 result.Append(ALPHANUMERIC_CHARS[threeDigitsBits / 100]);\r
261                                 result.Append(ALPHANUMERIC_CHARS[(threeDigitsBits / 10) % 10]);\r
262                                 result.Append(ALPHANUMERIC_CHARS[threeDigitsBits % 10]);\r
263                                 count -= 3;\r
264                         }\r
265                         if (count == 2)\r
266                         {\r
267                                 // Two digits left over to read, encoded in 7 bits\r
268                                 int twoDigitsBits = bits.readBits(7);\r
269                                 if (twoDigitsBits >= 100)\r
270                                 {\r
271                                         throw ReaderException.Instance;\r
272                                 }\r
273                                 result.Append(ALPHANUMERIC_CHARS[twoDigitsBits / 10]);\r
274                                 result.Append(ALPHANUMERIC_CHARS[twoDigitsBits % 10]);\r
275                         }\r
276                         else if (count == 1)\r
277                         {\r
278                                 // One digit left over to read\r
279                                 int digitBits = bits.readBits(4);\r
280                                 if (digitBits >= 10)\r
281                                 {\r
282                                         throw ReaderException.Instance;\r
283                                 }\r
284                                 result.Append(ALPHANUMERIC_CHARS[digitBits]);\r
285                         }\r
286                 }\r
287                 \r
288                 private static System.String guessEncoding(sbyte[] bytes)\r
289                 {\r
290                         if (ASSUME_SHIFT_JIS)\r
291                         {\r
292                                 return SHIFT_JIS;\r
293                         }\r
294                         // Does it start with the UTF-8 byte order mark? then guess it's UTF-8\r
295                         if (bytes.Length > 3 && bytes[0] == (sbyte) SupportClass.Identity(0xEF) && bytes[1] == (sbyte) SupportClass.Identity(0xBB) && bytes[2] == (sbyte) SupportClass.Identity(0xBF))\r
296                         {\r
297                                 return UTF8;\r
298                         }\r
299                         // For now, merely tries to distinguish ISO-8859-1, UTF-8 and Shift_JIS,\r
300                         // which should be by far the most common encodings. ISO-8859-1\r
301                         // should not have bytes in the 0x80 - 0x9F range, while Shift_JIS\r
302                         // uses this as a first byte of a two-byte character. If we see this\r
303                         // followed by a valid second byte in Shift_JIS, assume it is Shift_JIS.\r
304                         // If we see something else in that second byte, we'll make the risky guess\r
305                         // that it's UTF-8.\r
306                         int length = bytes.Length;\r
307                         bool canBeISO88591 = true;\r
308                         bool canBeShiftJIS = true;\r
309                         int maybeDoubleByteCount = 0;\r
310                         int maybeSingleByteKatakanaCount = 0;\r
311                         bool sawLatin1Supplement = false;\r
312                         bool lastWasPossibleDoubleByteStart = false;\r
313                         for (int i = 0; i < length && (canBeISO88591 || canBeShiftJIS); i++)\r
314                         {\r
315                                 int value_Renamed = bytes[i] & 0xFF;\r
316                                 if ((value_Renamed == 0xC2 || value_Renamed == 0xC3) && i < length - 1)\r
317                                 {\r
318                                         // This is really a poor hack. The slightly more exotic characters people might want to put in\r
319                                         // a QR Code, by which I mean the Latin-1 supplement characters (e.g. u-umlaut) have encodings\r
320                                         // that start with 0xC2 followed by [0xA0,0xBF], or start with 0xC3 followed by [0x80,0xBF].\r
321                                         int nextValue = bytes[i + 1] & 0xFF;\r
322                                         if (nextValue <= 0xBF && ((value_Renamed == 0xC2 && nextValue >= 0xA0) || (value_Renamed == 0xC3 && nextValue >= 0x80)))\r
323                                         {\r
324                                                 sawLatin1Supplement = true;\r
325                                         }\r
326                                 }\r
327                                 if (value_Renamed >= 0x7F && value_Renamed <= 0x9F)\r
328                                 {\r
329                                         canBeISO88591 = false;\r
330                                 }\r
331                                 if (value_Renamed >= 0xA1 && value_Renamed <= 0xDF)\r
332                                 {\r
333                                         // count the number of characters that might be a Shift_JIS single-byte Katakana character\r
334                                         if (!lastWasPossibleDoubleByteStart)\r
335                                         {\r
336                                                 maybeSingleByteKatakanaCount++;\r
337                                         }\r
338                                 }\r
339                                 if (!lastWasPossibleDoubleByteStart && ((value_Renamed >= 0xF0 && value_Renamed <= 0xFF) || value_Renamed == 0x80 || value_Renamed == 0xA0))\r
340                                 {\r
341                                         canBeShiftJIS = false;\r
342                                 }\r
343                                 if (((value_Renamed >= 0x81 && value_Renamed <= 0x9F) || (value_Renamed >= 0xE0 && value_Renamed <= 0xEF)))\r
344                                 {\r
345                                         // These start double-byte characters in Shift_JIS. Let's see if it's followed by a valid\r
346                                         // second byte.\r
347                                         if (lastWasPossibleDoubleByteStart)\r
348                                         {\r
349                                                 // If we just checked this and the last byte for being a valid double-byte\r
350                                                 // char, don't check starting on this byte. If this and the last byte\r
351                                                 // formed a valid pair, then this shouldn't be checked to see if it starts\r
352                                                 // a double byte pair of course.\r
353                                                 lastWasPossibleDoubleByteStart = false;\r
354                                         }\r
355                                         else\r
356                                         {\r
357                                                 // ... otherwise do check to see if this plus the next byte form a valid\r
358                                                 // double byte pair encoding a character.\r
359                                                 lastWasPossibleDoubleByteStart = true;\r
360                                                 if (i >= bytes.Length - 1)\r
361                                                 {\r
362                                                         canBeShiftJIS = false;\r
363                                                 }\r
364                                                 else\r
365                                                 {\r
366                                                         int nextValue = bytes[i + 1] & 0xFF;\r
367                                                         if (nextValue < 0x40 || nextValue > 0xFC)\r
368                                                         {\r
369                                                                 canBeShiftJIS = false;\r
370                                                         }\r
371                                                         else\r
372                                                         {\r
373                                                                 maybeDoubleByteCount++;\r
374                                                         }\r
375                                                         // There is some conflicting information out there about which bytes can follow which in\r
376                                                         // double-byte Shift_JIS characters. The rule above seems to be the one that matches practice.\r
377                                                 }\r
378                                         }\r
379                                 }\r
380                                 else\r
381                                 {\r
382                                         lastWasPossibleDoubleByteStart = false;\r
383                                 }\r
384                         }\r
385                         // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is:\r
386                         // - If we saw\r
387                         //   - at least three byte that starts a double-byte value (bytes that are rare in ISO-8859-1), or\r
388                         //   - over 5% of bytes that could be single-byte Katakana (also rare in ISO-8859-1),\r
389                         // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS\r
390                         if (canBeShiftJIS && (maybeDoubleByteCount >= 3 || 20 * maybeSingleByteKatakanaCount > length))\r
391                         {\r
392                                 return SHIFT_JIS;\r
393                         }\r
394                         // Otherwise, we default to ISO-8859-1 unless we know it can't be\r
395                         if (!sawLatin1Supplement && canBeISO88591)\r
396                         {\r
397                                 return ISO88591;\r
398                         }\r
399                         // Otherwise, we take a wild guess with UTF-8\r
400                         return UTF8;\r
401                 }\r
402                 \r
403                 private static int parseECIValue(BitSource bits)\r
404                 {\r
405                         int firstByte = bits.readBits(8);\r
406                         if ((firstByte & 0x80) == 0)\r
407                         {\r
408                                 // just one byte\r
409                                 return firstByte & 0x7F;\r
410                         }\r
411                         else if ((firstByte & 0xC0) == 0x80)\r
412                         {\r
413                                 // two bytes\r
414                                 int secondByte = bits.readBits(8);\r
415                                 return ((firstByte & 0x3F) << 8) | secondByte;\r
416                         }\r
417                         else if ((firstByte & 0xE0) == 0xC0)\r
418                         {\r
419                                 // three bytes\r
420                                 int secondThirdBytes = bits.readBits(16);\r
421                                 return ((firstByte & 0x1F) << 16) | secondThirdBytes;\r
422                         }\r
423                         throw new System.ArgumentException("Bad ECI bits starting with byte " + firstByte);\r
424                 }\r
425                 static DecodedBitStreamParser()\r
426                 {\r
427                         {\r
428                 // Redivivus.in Java to c# Porting update\r
429                 // 30/01/2010 \r
430                 // Commented & Added\r
431                                 //System.String platformDefault = System_Renamed.getProperty("file.encoding");\r
432                                 //ASSUME_SHIFT_JIS = SHIFT_JIS.ToUpper().Equals(platformDefault.ToUpper()) || EUC_JP.ToUpper().Equals(platformDefault.ToUpper());\r
433                 ASSUME_SHIFT_JIS = false;\r
434                         }\r
435                 }\r
436         }\r
437 }