mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 23:39:18 +00:00
efi/libstub: Add UTF-8 decoding to efi_puts
In order to be able to use the UTF-16 support added to vsprintf in the previous commit, enhance efi_puts to decode UTF-8 into UTF-16. Invalid UTF-8 encodings are passed through unchanged. Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu> Link: https://lore.kernel.org/r/20200518190716.751506-22-nivedita@alum.mit.edu Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
This commit is contained in:
parent
d850a2ff91
commit
4b75bd363d
@ -36,17 +36,74 @@ void efi_char16_puts(efi_char16_t *str)
|
||||
output_string, str);
|
||||
}
|
||||
|
||||
static
|
||||
u32 utf8_to_utf32(const u8 **s8)
|
||||
{
|
||||
u32 c32;
|
||||
u8 c0, cx;
|
||||
size_t clen, i;
|
||||
|
||||
c0 = cx = *(*s8)++;
|
||||
/*
|
||||
* The position of the most-significant 0 bit gives us the length of
|
||||
* a multi-octet encoding.
|
||||
*/
|
||||
for (clen = 0; cx & 0x80; ++clen)
|
||||
cx <<= 1;
|
||||
/*
|
||||
* If the 0 bit is in position 8, this is a valid single-octet
|
||||
* encoding. If the 0 bit is in position 7 or positions 1-3, the
|
||||
* encoding is invalid.
|
||||
* In either case, we just return the first octet.
|
||||
*/
|
||||
if (clen < 2 || clen > 4)
|
||||
return c0;
|
||||
/* Get the bits from the first octet. */
|
||||
c32 = cx >> clen--;
|
||||
for (i = 0; i < clen; ++i) {
|
||||
/* Trailing octets must have 10 in most significant bits. */
|
||||
cx = (*s8)[i] ^ 0x80;
|
||||
if (cx & 0xc0)
|
||||
return c0;
|
||||
c32 = (c32 << 6) | cx;
|
||||
}
|
||||
/*
|
||||
* Check for validity:
|
||||
* - The character must be in the Unicode range.
|
||||
* - It must not be a surrogate.
|
||||
* - It must be encoded using the correct number of octets.
|
||||
*/
|
||||
if (c32 > 0x10ffff ||
|
||||
(c32 & 0xf800) == 0xd800 ||
|
||||
clen != (c32 >= 0x80) + (c32 >= 0x800) + (c32 >= 0x10000))
|
||||
return c0;
|
||||
*s8 += clen;
|
||||
return c32;
|
||||
}
|
||||
|
||||
void efi_puts(const char *str)
|
||||
{
|
||||
efi_char16_t buf[128];
|
||||
size_t pos = 0, lim = ARRAY_SIZE(buf);
|
||||
const u8 *s8 = (const u8 *)str;
|
||||
u32 c32;
|
||||
|
||||
while (*str) {
|
||||
if (*str == '\n')
|
||||
while (*s8) {
|
||||
if (*s8 == '\n')
|
||||
buf[pos++] = L'\r';
|
||||
/* Cast to unsigned char to avoid sign-extension */
|
||||
buf[pos++] = (unsigned char)(*str++);
|
||||
if (*str == '\0' || pos >= lim - 2) {
|
||||
c32 = utf8_to_utf32(&s8);
|
||||
if (c32 < 0x10000) {
|
||||
/* Characters in plane 0 use a single word. */
|
||||
buf[pos++] = c32;
|
||||
} else {
|
||||
/*
|
||||
* Characters in other planes encode into a surrogate
|
||||
* pair.
|
||||
*/
|
||||
buf[pos++] = (0xd800 - (0x10000 >> 10)) + (c32 >> 10);
|
||||
buf[pos++] = 0xdc00 + (c32 & 0x3ff);
|
||||
}
|
||||
if (*s8 == '\0' || pos >= lim - 2) {
|
||||
buf[pos] = L'\0';
|
||||
efi_char16_puts(buf);
|
||||
pos = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user