diff options
author | Arvind Sankar | 2020-05-18 15:07:15 -0400 |
---|---|---|
committer | Ard Biesheuvel | 2020-05-20 19:09:20 +0200 |
commit | 15c316bcbc23b777eae4e21f129526190aa20af1 (patch) | |
tree | 503a287a15d51fbcc7621f72fe5f447bd6cfa07d /drivers/firmware/efi/libstub | |
parent | a713979e443d4036e76e13bb3d30e9fa248915aa (diff) |
efi/libstub: Get the exact UTF-8 length
efi_convert_cmdline currently overestimates the length of the equivalent
UTF-8 encoding.
snprintf can now be used to do the conversion to UTF-8, however, it does
not have a way to specify the size of the UTF-16 string, only the size
of the resulting UTF-8 string. So in order to use it, we need to
precalculate the exact UTF-8 size.
Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Link: https://lore.kernel.org/r/20200518190716.751506-24-nivedita@alum.mit.edu
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Diffstat (limited to 'drivers/firmware/efi/libstub')
-rw-r--r-- | drivers/firmware/efi/libstub/efi-stub-helper.c | 44 |
1 files changed, 32 insertions, 12 deletions
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 0d0007355c1e..4d544f395403 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -206,15 +206,6 @@ efi_status_t efi_parse_options(char const *cmdline) } /* - * Get the number of UTF-8 bytes corresponding to an UTF-16 character. - * This overestimates for surrogates, but that is okay. - */ -static int efi_utf8_bytes(u16 c) -{ - return 1 + (c >= 0x80) + (c >= 0x800); -} - -/* * Convert an UTF-16 string, not necessarily null terminated, to UTF-8. */ static u8 *efi_utf16_to_utf8(u8 *dst, const u16 *src, int n) @@ -274,10 +265,39 @@ char *efi_convert_cmdline(efi_loaded_image_t *image, if (options) { s2 = options; - while (*s2 && *s2 != '\n' - && options_chars < load_options_chars) { - options_bytes += efi_utf8_bytes(*s2++); + while (options_chars < load_options_chars) { + u16 c = *s2++; + + if (c == L'\0' || c == L'\n') + break; + /* + * Get the number of UTF-8 bytes corresponding to a + * UTF-16 character. + * The first part handles everything in the BMP. + */ + options_bytes += 1 + (c >= 0x80) + (c >= 0x800); options_chars++; + /* + * Add one more byte for valid surrogate pairs. Invalid + * surrogates will be replaced with 0xfffd and take up + * only 3 bytes. + */ + if ((c & 0xfc00) == 0xd800) { + /* + * If the very last word is a high surrogate, + * we must ignore it since we can't access the + * low surrogate. + */ + if (options_chars == load_options_chars) { + options_bytes -= 3; + options_chars--; + break; + } else if ((*s2 & 0xfc00) == 0xdc00) { + options_bytes++; + options_chars++; + s2++; + } + } } } |