+ // Distinguishing Shift_JIS and ISO-8859-1 can be a little tough. The crude heuristic is:
+ // - If we saw
+ // - at least one byte that starts a double-byte value (bytes that are rare in ISO-8859-1), or
+ // - over 5% of bytes that could be single-byte Katakana (also rare in ISO-8859-1),
+ // - and, saw no sequences that are invalid in Shift_JIS, then we conclude Shift_JIS
+ if ((sawDoubleByteStart || 20 * maybeSingleByteKatakanaCount > length) && canBeShiftJIS) {
+ return SHIFT_JIS;
+ }
+ // Otherwise, we default to ISO-8859-1 unless we know it can't be
+ if (canBeISO88591) {
+ return ISO88591;
+ }
+ // Otherwise, we take a wild guess with UTF-8
+ return UTF8;