mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-18 02:46:06 +00:00
74675a5850
This patch (as1239) updates the kernel's treatment of Unicode. The character-set conversion routines are well behind the current state of the Unicode specification: They don't recognize the existence of code points beyond plane 0 or of surrogate pairs in the UTF-16 encoding. The old wchar_t 16-bit type is retained because it's still used in lots of places. This shouldn't cause any new problems; if a conversion now results in an invalid 16-bit code then before it must have yielded an undefined code. Difficult-to-read names like "utf_mbstowcs" are replaced with more transparent names like "utf8s_to_utf16s" and the ordering of the parameters is rationalized (buffer lengths come immediate after the pointers they refer to, and the inputs precede the outputs). Fortunately the low-level conversion routines are used in only a few places; the interfaces to the higher-level uni2char and char2uni methods have been left unchanged. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Clemens Ladisch <clemens@ladisch.de> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
70 lines
1.3 KiB
C
70 lines
1.3 KiB
C
/*
|
|
* linux/fs/isofs/joliet.c
|
|
*
|
|
* (C) 1996 Gordon Chaffee
|
|
*
|
|
* Joliet: Microsoft's Unicode extensions to iso9660
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/nls.h>
|
|
#include "isofs.h"
|
|
|
|
/*
|
|
* Convert Unicode 16 to UTF-8 or ASCII.
|
|
*/
|
|
static int
|
|
uni16_to_x8(unsigned char *ascii, __be16 *uni, int len, struct nls_table *nls)
|
|
{
|
|
__be16 *ip, ch;
|
|
unsigned char *op;
|
|
|
|
ip = uni;
|
|
op = ascii;
|
|
|
|
while ((ch = get_unaligned(ip)) && len) {
|
|
int llen;
|
|
llen = nls->uni2char(be16_to_cpu(ch), op, NLS_MAX_CHARSET_SIZE);
|
|
if (llen > 0)
|
|
op += llen;
|
|
else
|
|
*op++ = '?';
|
|
ip++;
|
|
|
|
len--;
|
|
}
|
|
*op = 0;
|
|
return (op - ascii);
|
|
}
|
|
|
|
int
|
|
get_joliet_filename(struct iso_directory_record * de, unsigned char *outname, struct inode * inode)
|
|
{
|
|
unsigned char utf8;
|
|
struct nls_table *nls;
|
|
unsigned char len = 0;
|
|
|
|
utf8 = ISOFS_SB(inode->i_sb)->s_utf8;
|
|
nls = ISOFS_SB(inode->i_sb)->s_nls_iocharset;
|
|
|
|
if (utf8) {
|
|
len = utf16s_to_utf8s((const wchar_t *) de->name,
|
|
de->name_len[0] >> 1, UTF16_BIG_ENDIAN,
|
|
outname, PAGE_SIZE);
|
|
} else {
|
|
len = uni16_to_x8(outname, (__be16 *) de->name,
|
|
de->name_len[0] >> 1, nls);
|
|
}
|
|
if ((len > 2) && (outname[len-2] == ';') && (outname[len-1] == '1'))
|
|
len -= 2;
|
|
|
|
/*
|
|
* Windows doesn't like periods at the end of a name,
|
|
* so neither do we
|
|
*/
|
|
while (len >= 2 && (outname[len-1] == '.'))
|
|
len--;
|
|
|
|
return len;
|
|
}
|