Alan Stern 74675a5850 NLS: update handling of Unicode
This patch (as1239) updates the kernel's treatment of Unicode.  The
character-set conversion routines are well behind the current state of
the Unicode specification: They don't recognize the existence of code
points beyond plane 0 or of surrogate pairs in the UTF-16 encoding.

The old wchar_t 16-bit type is retained because it's still used in
lots of places.  This shouldn't cause any new problems; if a
conversion now results in an invalid 16-bit code then before it must
have yielded an undefined code.

Difficult-to-read names like "utf_mbstowcs" are replaced with more
transparent names like "utf8s_to_utf16s" and the ordering of the
parameters is rationalized (buffer lengths come immediate after the
pointers they refer to, and the inputs precede the outputs).
Fortunately the low-level conversion routines are used in only a few
places; the interfaces to the higher-level uni2char and char2uni
methods have been left unchanged.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2009-06-15 21:44:43 -07:00

70 lines
1.3 KiB
C

/*
* linux/fs/isofs/joliet.c
*
* (C) 1996 Gordon Chaffee
*
* Joliet: Microsoft's Unicode extensions to iso9660
*/
#include <linux/types.h>
#include <linux/nls.h>
#include "isofs.h"
/*
* Convert Unicode 16 to UTF-8 or ASCII.
*/
static int
uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
{
__be16 *ip, ch;
unsigned char *op;
ip = uni;
op = ascii;
while ((ch = get_unaligned(ip)) && len) {
int llen;
llen = nls->uni2char(be16_to_cpu(ch), op, NLS_MAX_CHARSET_SIZE);
if (llen > 0)
op += llen;
else
*op++ = '?';
ip++;
len--;
}
*op = 0;
return (op - ascii);
}
int
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
{
unsigned char utf8;
struct nls_table *nls;
unsigned char len = 0;
utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
if (utf8) {
len = utf16s_to_utf8s((const wchar_t *) de->name,
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
outname, PAGE_SIZE);
} else {
len = uni16_to_x8(outname, (__be16 *) de->name,
de->name_len[0] >> 1, nls);
}
if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1'))
len -= 2;
/*
* Windows doesn't like periods at the end of a name,
* so neither do we
*/
while (len >= 2 && (outname[len-1] == '.'))
len--;
return len;
}