fs/udf/unicode.c

   1 /*
   2  * unicode.c
   3  *
   4  * PURPOSE
   5  *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
   6  *      Also handles filename mangling
   7  *
   8  * DESCRIPTION
   9  *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
  10  *              http://www.osta.org/
  11  *      UTF-8 is explained in the IETF RFC XXXX.
  12  *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
  13  *
  14  * CONTACTS
  15  *      E-mail regarding any portion of the Linux UDF file system should be
  16  *      directed to the development team's mailing list (run by majordomo):
  17  *              linux_udf@hpesjro.fc.hp.com
  18  *
  19  * COPYRIGHT
  20  *      This file is distributed under the terms of the GNU General Public
  21  *      License (GPL). Copies of the GPL can be obtained from:
  22  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
  23  *      Each contributing author retains all rights to their own work.
  24  */
  25
  26 #include "udfdecl.h"
  27
  28 #include <linux/kernel.h>
  29 #include <linux/string.h>       /* for memset */
  30 #include <linux/nls.h>
  31 #include <linux/udf_fs.h>
  32
  33 #include "udf_sb.h"
  34
  35 int udf_ustr_to_dchars(uint8_t *dest, const struct ustr *src, int strlen)
  36 {
  37         if ( (!dest) || (!src) || (!strlen) || (src->u_len > strlen) )
  38                 return 0;
  39         memcpy(dest+1, src->u_name, src->u_len);
  40         dest[0] = src->u_cmpID;
  41         return src->u_len + 1;
  42 }
  43
  44 int udf_ustr_to_char(uint8_t *dest, const struct ustr *src, int strlen)
  45 {
  46         if ( (!dest) || (!src) || (!strlen) || (src->u_len >= strlen) )
  47                 return 0;
  48         memcpy(dest, src->u_name, src->u_len);
  49         return src->u_len;
  50 }
  51
  52 int udf_ustr_to_dstring(dstring *dest, const struct ustr *src, int dlength)
  53 {
  54         if ( udf_ustr_to_dchars(dest, src, dlength-1) )
  55         {
  56                 dest[dlength-1] = src->u_len + 1;
  57                 return dlength;
  58         }
  59         else
  60                 return 0;
  61 }
  62
  63 int udf_dchars_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
  64 {
  65         if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN) )
  66                 return 0;
  67         memset(dest, 0, sizeof(struct ustr));
  68         memcpy(dest->u_name, src+1, strlen-1);
  69         dest->u_cmpID = src[0];
  70         dest->u_len = strlen-1;
  71         return strlen-1;
  72 }
  73
  74 int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
  75 {
  76         if ( (!dest) || (!src) || (!strlen) || (strlen >= UDF_NAME_LEN) )
  77                 return 0;
  78         memset(dest, 0, sizeof(struct ustr));
  79         memcpy(dest->u_name, src, strlen);
  80         dest->u_cmpID = 0x08;
  81         dest->u_len = strlen;
  82         return strlen;
  83 }
  84
  85
  86 int udf_dstring_to_ustr(struct ustr *dest, const dstring *src, int dlength)
  87 {
  88         if ( dlength && udf_dchars_to_ustr(dest, src, src[dlength-1]) )
  89                 return dlength;
  90         else
  91                 return 0;
  92 }
  93
  94 /*
  95  * udf_build_ustr
  96  */
  97 int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
  98 {
  99         int usesize;
 100
 101         if ( (!dest) || (!ptr) || (!size) )
 102                 return -1;
 103
 104         memset(dest, 0, sizeof(struct ustr));
 105         usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
 106         dest->u_cmpID=ptr[0];
 107         dest->u_len=ptr[size-1];
 108         memcpy(dest->u_name, ptr+1, usesize-1);
 109         return 0;
 110 }
 111
 112 /*
 113  * udf_build_ustr_exact
 114  */
 115 int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
 116 {
 117         if ( (!dest) || (!ptr) || (!exactsize) )
 118                 return -1;
 119
 120         memset(dest, 0, sizeof(struct ustr));
 121         dest->u_cmpID=ptr[0];
 122         dest->u_len=exactsize-1;
 123         memcpy(dest->u_name, ptr+1, exactsize-1);
 124         return 0;
 125 }
 126
 127 /*
 128  * udf_ocu_to_utf8
 129  *
 130  * PURPOSE
 131  *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
 132  *
 133  * DESCRIPTION
 134  *      This routine is only called by udf_filldir().
 135  *
 136  * PRE-CONDITIONS
 137  *      utf                     Pointer to UTF-8 output buffer.
 138  *      ocu                     Pointer to OSTA Compressed Unicode input buffer
 139  *                              of size UDF_NAME_LEN bytes.
 140  *                              both of type "struct ustr *"
 141  *
 142  * POST-CONDITIONS
 143  *      <return>                Zero on success.
 144  *
 145  * HISTORY
 146  *      November 12, 1997 - Andrew E. Mileski
 147  *      Written, tested, and released.
 148  */
 149 int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
 150 {
 151         uint8_t *ocu;
 152         uint32_t c;
 153         uint8_t cmp_id, ocu_len;
 154         int i;
 155
 156         ocu = ocu_i->u_name;
 157
 158         ocu_len = ocu_i->u_len;
 159         cmp_id = ocu_i->u_cmpID;
 160         utf_o->u_len = 0;
 161
 162         if (ocu_len == 0)
 163         {
 164                 memset(utf_o, 0, sizeof(struct ustr));
 165                 utf_o->u_cmpID = 0;
 166                 utf_o->u_len = 0;
 167                 return 0;
 168         }
 169
 170         if ((cmp_id != 8) && (cmp_id != 16))
 171         {
 172                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
 173                 return 0;
 174         }
 175
 176         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
 177         {
 178
 179                 /* Expand OSTA compressed Unicode to Unicode */
 180                 c = ocu[i++];
 181                 if (cmp_id == 16)
 182                         c = (c << 8) | ocu[i++];
 183
 184                 /* Compress Unicode to UTF-8 */
 185                 if (c < 0x80U)
 186                         utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
 187                 else if (c < 0x800U)
 188                 {
 189                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6));
 190                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
 191                 }
 192                 else
 193                 {
 194                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12));
 195                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f));
 196                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
 197                 }
 198         }
 199         utf_o->u_cmpID=8;
 200
 201         return utf_o->u_len;
 202 }
 203
 204 /*
 205  *
 206  * udf_utf8_to_ocu
 207  *
 208  * PURPOSE
 209  *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
 210  *
 211  * DESCRIPTION
 212  *      This routine is only called by udf_lookup().
 213  *
 214  * PRE-CONDITIONS
 215  *      ocu                     Pointer to OSTA Compressed Unicode output
 216  *                              buffer of size UDF_NAME_LEN bytes.
 217  *      utf                     Pointer to UTF-8 input buffer.
 218  *      utf_len                 Length of UTF-8 input buffer in bytes.
 219  *
 220  * POST-CONDITIONS
 221  *      <return>                Zero on success.
 222  *
 223  * HISTORY
 224  *      November 12, 1997 - Andrew E. Mileski
 225  *      Written, tested, and released.
 226  */
 227 int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
 228 {
 229         unsigned c, i, max_val, utf_char;
 230         int utf_cnt;
 231         int u_len = 0;
 232
 233         memset(ocu, 0, sizeof(dstring) * length);
 234         ocu[0] = 8;
 235         max_val = 0xffU;
 236
 237 try_again:
 238         utf_char = 0U;
 239         utf_cnt = 0U;
 240         for (i = 0U; i < utf->u_len; i++)
 241         {
 242                 c = (uint8_t)utf->u_name[i];
 243
 244                 /* Complete a multi-byte UTF-8 character */
 245                 if (utf_cnt)
 246                 {
 247                         utf_char = (utf_char << 6) | (c & 0x3fU);
 248                         if (--utf_cnt)
 249                                 continue;
 250                 }
 251                 else
 252                 {
 253                         /* Check for a multi-byte UTF-8 character */
 254                         if (c & 0x80U)
 255                         {
 256                                 /* Start a multi-byte UTF-8 character */
 257                                 if ((c & 0xe0U) == 0xc0U)
 258                                 {
 259                                         utf_char = c & 0x1fU;
 260                                         utf_cnt = 1;
 261                                 }
 262                                 else if ((c & 0xf0U) == 0xe0U)
 263                                 {
 264                                         utf_char = c & 0x0fU;
 265                                         utf_cnt = 2;
 266                                 }
 267                                 else if ((c & 0xf8U) == 0xf0U)
 268                                 {
 269                                         utf_char = c & 0x07U;
 270                                         utf_cnt = 3;
 271                                 }
 272                                 else if ((c & 0xfcU) == 0xf8U)
 273                                 {
 274                                         utf_char = c & 0x03U;
 275                                         utf_cnt = 4;
 276                                 }
 277                                 else if ((c & 0xfeU) == 0xfcU)
 278                                 {
 279                                         utf_char = c & 0x01U;
 280                                         utf_cnt = 5;
 281                                 }
 282                                 else
 283                                         goto error_out;
 284                                 continue;
 285                         } else
 286                                 /* Single byte UTF-8 character (most common) */
 287                                 utf_char = c;
 288                 }
 289
 290                 /* Choose no compression if necessary */
 291                 if (utf_char > max_val)
 292                 {
 293                         if ( 0xffU == max_val )
 294                         {
 295                                 max_val = 0xffffU;
 296                                 ocu[0] = (uint8_t)0x10U;
 297                                 goto try_again;
 298                         }
 299                         goto error_out;
 300                 }
 301
 302                 if (max_val == 0xffffU)
 303                 {
 304                         ocu[++u_len] = (uint8_t)(utf_char >> 8);
 305                 }
 306                 ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
 307         }
 308
 309
 310         if (utf_cnt)
 311         {
 312 error_out:
 313                 printk(KERN_ERR "udf: bad UTF-8 character\n");
 314                 return 0;
 315         }
 316
 317         ocu[length - 1] = (uint8_t)u_len + 1;
 318         return u_len + 1;
 319 }
 320
 321 int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i)
 322 {
 323         uint8_t *ocu;
 324         uint32_t c;
 325         uint8_t cmp_id, ocu_len;
 326         int i;
 327
 328         ocu = ocu_i->u_name;
 329
 330         ocu_len = ocu_i->u_len;
 331         cmp_id = ocu_i->u_cmpID;
 332         utf_o->u_len = 0;
 333
 334         if (ocu_len == 0)
 335         {
 336                 memset(utf_o, 0, sizeof(struct ustr));
 337                 utf_o->u_cmpID = 0;
 338                 utf_o->u_len = 0;
 339                 return 0;
 340         }
 341
 342         if ((cmp_id != 8) && (cmp_id != 16))
 343         {
 344                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
 345                 return 0;
 346         }
 347
 348         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
 349         {
 350                 /* Expand OSTA compressed Unicode to Unicode */
 351                 c = ocu[i++];
 352                 if (cmp_id == 16)
 353                         c = (c << 8) | ocu[i++];
 354
 355                 utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
 356                         UDF_NAME_LEN - utf_o->u_len);
 357         }
 358         utf_o->u_cmpID=8;
 359
 360         return utf_o->u_len;
 361 }
 362
 363 int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length)
 364 {
 365         unsigned len, i, max_val;
 366         uint16_t uni_char;
 367         int uni_cnt;
 368         int u_len = 0;
 369
 370         memset(ocu, 0, sizeof(dstring) * length);
 371         ocu[0] = 8;
 372         max_val = 0xffU;
 373
 374 try_again:
 375         uni_char = 0U;
 376         uni_cnt = 0U;
 377         for (i = 0U; i < uni->u_len; i++)
 378         {
 379                 len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
 380
 381                 if (len == 2 && max_val == 0xff)
 382                 {
 383                         max_val = 0xffffU;
 384                         ocu[0] = (uint8_t)0x10U;
 385                         goto try_again;
 386                 }
 387
 388                 if (max_val == 0xffffU)
 389                 {
 390                         ocu[++u_len] = (uint8_t)(uni_char >> 8);
 391                         i++;
 392                 }
 393                 ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
 394         }
 395
 396         ocu[length - 1] = (uint8_t)u_len + 1;
 397         return u_len + 1;
 398 }
 399
 400 int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen)
 401 {
 402         struct ustr filename, unifilename;
 403         int len;
 404
 405         if (udf_build_ustr_exact(&unifilename, sname, flen))
 406         {
 407                 return 0;
 408         }
 409
 410         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
 411         {
 412                 if (!udf_CS0toUTF8(&filename, &unifilename) )
 413                 {
 414                         udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
 415                         return 0;
 416                 }
 417         }
 418         else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
 419         {
 420                 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
 421                 {
 422                         udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
 423                         return 0;
 424                 }
 425         }
 426         else
 427                 return 0;
 428
 429         if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
 430                 unifilename.u_name, unifilename.u_len)))
 431         {
 432                 return len;
 433         }
 434         return 0;
 435 }
 436
 437 #define ILLEGAL_CHAR_MARK       '_'
 438 #define EXT_MARK                        '.'
 439 #define CRC_MARK                        '#'
 440 #define EXT_SIZE                        5
 441
 442 int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen)
 443 {
 444         int index, newIndex = 0, needsCRC = 0;
 445         int extIndex = 0, newExtIndex = 0, hasExt = 0;
 446         unsigned short valueCRC;
 447         uint8_t curr;
 448         const uint8_t hexChar[] = "0123456789ABCDEF";
 449
 450         if (udfName[0] == '.' && (udfLen == 1 ||
 451                 (udfLen == 2 && udfName[1] == '.')))
 452         {
 453                 needsCRC = 1;
 454                 newIndex = udfLen;
 455                 memcpy(newName, udfName, udfLen);
 456         }
 457         else
 458         {
 459                 for (index = 0; index < udfLen; index++)
 460                 {
 461                         curr = udfName[index];
 462                         if (curr == '/' || curr == 0)
 463                         {
 464                                 needsCRC = 1;
 465                                 curr = ILLEGAL_CHAR_MARK;
 466                                 while (index+1 < udfLen && (udfName[index+1] == '/' ||
 467                                         udfName[index+1] == 0))
 468                                         index++;
 469                         }
 470                         if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
 471                         {
 472                                 if (udfLen == index + 1)
 473                                         hasExt = 0;
 474                                 else
 475                                 {
 476                                         hasExt = 1;
 477                                         extIndex = index;
 478                                         newExtIndex = newIndex;
 479                                 }
 480                         }
 481                         if (newIndex < 256)
 482                                 newName[newIndex++] = curr;
 483                         else
 484                                 needsCRC = 1;
 485                 }
 486         }
 487         if (needsCRC)
 488         {
 489                 uint8_t ext[EXT_SIZE];
 490                 int localExtIndex = 0;
 491
 492                 if (hasExt)
 493                 {
 494                         int maxFilenameLen;
 495                         for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
 496                                 index++ )
 497                         {
 498                                 curr = udfName[extIndex + index + 1];
 499
 500                                 if (curr == '/' || curr == 0)
 501                                 {
 502                                         needsCRC = 1;
 503                                         curr = ILLEGAL_CHAR_MARK;
 504                                         while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
 505                                                 && (udfName[extIndex + index + 2] == '/' ||
 506                                                         udfName[extIndex + index + 2] == 0)))
 507                                                 index++;
 508                                 }
 509                                 ext[localExtIndex++] = curr;
 510                         }
 511                         maxFilenameLen = 250 - localExtIndex;
 512                         if (newIndex > maxFilenameLen)
 513                                 newIndex = maxFilenameLen;
 514                         else
 515                                 newIndex = newExtIndex;
 516                 }
 517                 else if (newIndex > 250)
 518                         newIndex = 250;
 519                 newName[newIndex++] = CRC_MARK;
 520                 valueCRC = udf_crc(fidName, fidNameLen, 0);
 521                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
 522                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
 523                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
 524                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
 525
 526                 if (hasExt)
 527                 {
 528                         newName[newIndex++] = EXT_MARK;
 529                         for (index = 0;index < localExtIndex ;index++ )
 530                                 newName[newIndex++] = ext[index];
 531                 }
 532         }
 533         return newIndex;
 534 }