import of ftp.dlink.com/GPL/DSMG-600_reB/ppclinux.tar.gz
[linux-2.4.21-pre4.git] / fs / udf / unicode.c
1 /*
2  * unicode.c
3  *
4  * PURPOSE
5  *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
6  *      Also handles filename mangling
7  *
8  * DESCRIPTION
9  *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
10  *              http://www.osta.org/
11  *      UTF-8 is explained in the IETF RFC XXXX.
12  *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
13  *
14  * CONTACTS
15  *      E-mail regarding any portion of the Linux UDF file system should be
16  *      directed to the development team's mailing list (run by majordomo):
17  *              linux_udf@hpesjro.fc.hp.com
18  *
19  * COPYRIGHT
20  *      This file is distributed under the terms of the GNU General Public
21  *      License (GPL). Copies of the GPL can be obtained from:
22  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
23  *      Each contributing author retains all rights to their own work.
24  */
25
26 #include "udfdecl.h"
27
28 #include <linux/kernel.h>
29 #include <linux/string.h>       /* for memset */
30 #include <linux/nls.h>
31 #include <linux/udf_fs.h>
32
33 #include "udf_sb.h"
34
35 int udf_ustr_to_dchars(uint8_t *dest, const struct ustr *src, int strlen)
36 {
37         if ( (!dest) || (!src) || (!strlen) || (src->u_len > strlen) )
38                 return 0;
39         memcpy(dest+1, src->u_name, src->u_len);
40         dest[0] = src->u_cmpID;
41         return src->u_len + 1;
42 }
43
44 int udf_ustr_to_char(uint8_t *dest, const struct ustr *src, int strlen)
45 {
46         if ( (!dest) || (!src) || (!strlen) || (src->u_len >= strlen) )
47                 return 0;
48         memcpy(dest, src->u_name, src->u_len);
49         return src->u_len;
50 }
51
52 int udf_ustr_to_dstring(dstring *dest, const struct ustr *src, int dlength)
53 {
54         if ( udf_ustr_to_dchars(dest, src, dlength-1) )
55         {
56                 dest[dlength-1] = src->u_len + 1;
57                 return dlength;
58         }
59         else
60                 return 0;
61 }
62
63 int udf_dchars_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
64 {
65         if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN) )
66                 return 0;
67         memset(dest, 0, sizeof(struct ustr));
68         memcpy(dest->u_name, src+1, strlen-1);
69         dest->u_cmpID = src[0];
70         dest->u_len = strlen-1;
71         return strlen-1;
72 }
73
74 int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
75 {
76         if ( (!dest) || (!src) || (!strlen) || (strlen >= UDF_NAME_LEN) )
77                 return 0;
78         memset(dest, 0, sizeof(struct ustr));
79         memcpy(dest->u_name, src, strlen);
80         dest->u_cmpID = 0x08;
81         dest->u_len = strlen;
82         return strlen;
83 }
84
85
86 int udf_dstring_to_ustr(struct ustr *dest, const dstring *src, int dlength)
87 {
88         if ( dlength && udf_dchars_to_ustr(dest, src, src[dlength-1]) )
89                 return dlength;
90         else
91                 return 0;
92 }
93
94 /*
95  * udf_build_ustr
96  */
97 int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
98 {
99         int usesize;
100
101         if ( (!dest) || (!ptr) || (!size) )
102                 return -1;
103
104         memset(dest, 0, sizeof(struct ustr));
105         usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
106         dest->u_cmpID=ptr[0];
107         dest->u_len=ptr[size-1];
108         memcpy(dest->u_name, ptr+1, usesize-1);
109         return 0;
110 }
111
112 /*
113  * udf_build_ustr_exact
114  */
115 int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
116 {
117         if ( (!dest) || (!ptr) || (!exactsize) )
118                 return -1;
119
120         memset(dest, 0, sizeof(struct ustr));
121         dest->u_cmpID=ptr[0];
122         dest->u_len=exactsize-1;
123         memcpy(dest->u_name, ptr+1, exactsize-1);
124         return 0;
125 }
126
127 /*
128  * udf_ocu_to_utf8
129  *
130  * PURPOSE
131  *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
132  *
133  * DESCRIPTION
134  *      This routine is only called by udf_filldir().
135  *
136  * PRE-CONDITIONS
137  *      utf                     Pointer to UTF-8 output buffer.
138  *      ocu                     Pointer to OSTA Compressed Unicode input buffer
139  *                              of size UDF_NAME_LEN bytes.
140  *                              both of type "struct ustr *"
141  *
142  * POST-CONDITIONS
143  *      <return>                Zero on success.
144  *
145  * HISTORY
146  *      November 12, 1997 - Andrew E. Mileski
147  *      Written, tested, and released.
148  */
149 int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
150 {
151         uint8_t *ocu;
152         uint32_t c;
153         uint8_t cmp_id, ocu_len;
154         int i;
155
156         ocu = ocu_i->u_name;
157
158         ocu_len = ocu_i->u_len;
159         cmp_id = ocu_i->u_cmpID;
160         utf_o->u_len = 0;
161
162         if (ocu_len == 0)
163         {
164                 memset(utf_o, 0, sizeof(struct ustr));
165                 utf_o->u_cmpID = 0;
166                 utf_o->u_len = 0;
167                 return 0;
168         }
169
170         if ((cmp_id != 8) && (cmp_id != 16))
171         {
172                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
173                 return 0;
174         }
175
176         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
177         {
178
179                 /* Expand OSTA compressed Unicode to Unicode */
180                 c = ocu[i++];
181                 if (cmp_id == 16)
182                         c = (c << 8) | ocu[i++];
183
184                 /* Compress Unicode to UTF-8 */
185                 if (c < 0x80U)
186                         utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
187                 else if (c < 0x800U)
188                 {
189                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6));
190                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
191                 }
192                 else
193                 {
194                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12));
195                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f));
196                         utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
197                 }
198         }
199         utf_o->u_cmpID=8;
200
201         return utf_o->u_len;
202 }
203
204 /*
205  *
206  * udf_utf8_to_ocu
207  *
208  * PURPOSE
209  *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
210  *
211  * DESCRIPTION
212  *      This routine is only called by udf_lookup().
213  *
214  * PRE-CONDITIONS
215  *      ocu                     Pointer to OSTA Compressed Unicode output
216  *                              buffer of size UDF_NAME_LEN bytes.
217  *      utf                     Pointer to UTF-8 input buffer.
218  *      utf_len                 Length of UTF-8 input buffer in bytes.
219  *
220  * POST-CONDITIONS
221  *      <return>                Zero on success.
222  *
223  * HISTORY
224  *      November 12, 1997 - Andrew E. Mileski
225  *      Written, tested, and released.
226  */
227 int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
228 {
229         unsigned c, i, max_val, utf_char;
230         int utf_cnt;
231         int u_len = 0;
232
233         memset(ocu, 0, sizeof(dstring) * length);
234         ocu[0] = 8;
235         max_val = 0xffU;
236
237 try_again:
238         utf_char = 0U;
239         utf_cnt = 0U;
240         for (i = 0U; i < utf->u_len; i++)
241         {
242                 c = (uint8_t)utf->u_name[i];
243
244                 /* Complete a multi-byte UTF-8 character */
245                 if (utf_cnt)
246                 {
247                         utf_char = (utf_char << 6) | (c & 0x3fU);
248                         if (--utf_cnt)
249                                 continue;
250                 }
251                 else
252                 {
253                         /* Check for a multi-byte UTF-8 character */
254                         if (c & 0x80U)
255                         {
256                                 /* Start a multi-byte UTF-8 character */
257                                 if ((c & 0xe0U) == 0xc0U)
258                                 {
259                                         utf_char = c & 0x1fU;
260                                         utf_cnt = 1;
261                                 }
262                                 else if ((c & 0xf0U) == 0xe0U)
263                                 {
264                                         utf_char = c & 0x0fU;
265                                         utf_cnt = 2;
266                                 }
267                                 else if ((c & 0xf8U) == 0xf0U)
268                                 {
269                                         utf_char = c & 0x07U;
270                                         utf_cnt = 3;
271                                 }
272                                 else if ((c & 0xfcU) == 0xf8U)
273                                 {
274                                         utf_char = c & 0x03U;
275                                         utf_cnt = 4;
276                                 }
277                                 else if ((c & 0xfeU) == 0xfcU)
278                                 {
279                                         utf_char = c & 0x01U;
280                                         utf_cnt = 5;
281                                 }
282                                 else
283                                         goto error_out;
284                                 continue;
285                         } else
286                                 /* Single byte UTF-8 character (most common) */
287                                 utf_char = c;
288                 }
289
290                 /* Choose no compression if necessary */
291                 if (utf_char > max_val)
292                 {
293                         if ( 0xffU == max_val )
294                         {
295                                 max_val = 0xffffU;
296                                 ocu[0] = (uint8_t)0x10U;
297                                 goto try_again;
298                         }
299                         goto error_out;
300                 }
301
302                 if (max_val == 0xffffU)
303                 {
304                         ocu[++u_len] = (uint8_t)(utf_char >> 8);
305                 }
306                 ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
307         }
308
309
310         if (utf_cnt)
311         {
312 error_out:
313                 printk(KERN_ERR "udf: bad UTF-8 character\n");
314                 return 0;
315         }
316
317         ocu[length - 1] = (uint8_t)u_len + 1;
318         return u_len + 1;
319 }
320
321 int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i)
322 {
323         uint8_t *ocu;
324         uint32_t c;
325         uint8_t cmp_id, ocu_len;
326         int i;
327
328         ocu = ocu_i->u_name;
329
330         ocu_len = ocu_i->u_len;
331         cmp_id = ocu_i->u_cmpID;
332         utf_o->u_len = 0;
333
334         if (ocu_len == 0)
335         {
336                 memset(utf_o, 0, sizeof(struct ustr));
337                 utf_o->u_cmpID = 0;
338                 utf_o->u_len = 0;
339                 return 0;
340         }
341
342         if ((cmp_id != 8) && (cmp_id != 16))
343         {
344                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
345                 return 0;
346         }
347
348         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
349         {
350                 /* Expand OSTA compressed Unicode to Unicode */
351                 c = ocu[i++];
352                 if (cmp_id == 16)
353                         c = (c << 8) | ocu[i++];
354
355                 utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], 
356                         UDF_NAME_LEN - utf_o->u_len);
357         }
358         utf_o->u_cmpID=8;
359
360         return utf_o->u_len;
361 }
362
363 int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length)
364 {
365         unsigned len, i, max_val;
366         uint16_t uni_char;
367         int uni_cnt;
368         int u_len = 0;
369
370         memset(ocu, 0, sizeof(dstring) * length);
371         ocu[0] = 8;
372         max_val = 0xffU;
373
374 try_again:
375         uni_char = 0U;
376         uni_cnt = 0U;
377         for (i = 0U; i < uni->u_len; i++)
378         {
379                 len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
380
381                 if (len == 2 && max_val == 0xff)
382                 {
383                         max_val = 0xffffU;
384                         ocu[0] = (uint8_t)0x10U;
385                         goto try_again;
386                 }
387                 
388                 if (max_val == 0xffffU)
389                 {
390                         ocu[++u_len] = (uint8_t)(uni_char >> 8);
391                         i++;
392                 }
393                 ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
394         }
395
396         ocu[length - 1] = (uint8_t)u_len + 1;
397         return u_len + 1;
398 }
399
400 int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen)
401 {
402         struct ustr filename, unifilename;
403         int len;
404
405         if (udf_build_ustr_exact(&unifilename, sname, flen))
406         {
407                 return 0;
408         }
409
410         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
411         {
412                 if (!udf_CS0toUTF8(&filename, &unifilename) )
413                 {
414                         udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
415                         return 0;
416                 }
417         }
418         else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
419         {
420                 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
421                 {
422                         udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
423                         return 0;
424                 }
425         }
426         else
427                 return 0;
428
429         if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
430                 unifilename.u_name, unifilename.u_len)))
431         {
432                 return len;
433         }
434         return 0;
435 }
436
437 #define ILLEGAL_CHAR_MARK       '_'
438 #define EXT_MARK                        '.'
439 #define CRC_MARK                        '#'
440 #define EXT_SIZE                        5
441
442 int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen)
443 {
444         int index, newIndex = 0, needsCRC = 0;  
445         int extIndex = 0, newExtIndex = 0, hasExt = 0;
446         unsigned short valueCRC;
447         uint8_t curr;
448         const uint8_t hexChar[] = "0123456789ABCDEF";
449
450         if (udfName[0] == '.' && (udfLen == 1 ||
451                 (udfLen == 2 && udfName[1] == '.')))
452         {
453                 needsCRC = 1;
454                 newIndex = udfLen;
455                 memcpy(newName, udfName, udfLen);
456         }
457         else
458         {       
459                 for (index = 0; index < udfLen; index++)
460                 {
461                         curr = udfName[index];
462                         if (curr == '/' || curr == 0)
463                         {
464                                 needsCRC = 1;
465                                 curr = ILLEGAL_CHAR_MARK;
466                                 while (index+1 < udfLen && (udfName[index+1] == '/' ||
467                                         udfName[index+1] == 0))
468                                         index++;
469                         }
470                         if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
471                         {
472                                 if (udfLen == index + 1)
473                                         hasExt = 0;
474                                 else
475                                 {
476                                         hasExt = 1;
477                                         extIndex = index;
478                                         newExtIndex = newIndex;
479                                 }
480                         }
481                         if (newIndex < 256)
482                                 newName[newIndex++] = curr;
483                         else
484                                 needsCRC = 1;
485                 }
486         }
487         if (needsCRC)
488         {
489                 uint8_t ext[EXT_SIZE];
490                 int localExtIndex = 0;
491
492                 if (hasExt)
493                 {
494                         int maxFilenameLen;
495                         for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
496                                 index++ )
497                         {
498                                 curr = udfName[extIndex + index + 1];
499
500                                 if (curr == '/' || curr == 0)
501                                 {
502                                         needsCRC = 1;
503                                         curr = ILLEGAL_CHAR_MARK;
504                                         while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
505                                                 && (udfName[extIndex + index + 2] == '/' ||
506                                                         udfName[extIndex + index + 2] == 0)))
507                                                 index++;
508                                 }
509                                 ext[localExtIndex++] = curr;
510                         }
511                         maxFilenameLen = 250 - localExtIndex;
512                         if (newIndex > maxFilenameLen)
513                                 newIndex = maxFilenameLen;
514                         else
515                                 newIndex = newExtIndex;
516                 }
517                 else if (newIndex > 250)
518                         newIndex = 250;
519                 newName[newIndex++] = CRC_MARK;
520                 valueCRC = udf_crc(fidName, fidNameLen, 0);
521                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
522                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
523                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
524                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
525
526                 if (hasExt)
527                 {
528                         newName[newIndex++] = EXT_MARK;
529                         for (index = 0;index < localExtIndex ;index++ )
530                                 newName[newIndex++] = ext[index];
531                 }
532         }
533         return newIndex;
534 }