utf8: automatically determine length of string if SIZE_MAX is specified

Let's make utf8_to_utf16() and utf16_to_utf8() a bit nicer to use by
adding shortcuts for common cases.

This is particularly relevant for utf16_to_utf8() since the
multiplication with 2 is easy to forget.
This commit is contained in:
Lennart Poettering
2023-08-14 13:15:52 +02:00
parent b56ee69233
commit ba091282e5
6 changed files with 30 additions and 9 deletions

View File

@@ -265,7 +265,7 @@ finish:
int efi_set_variable_string(const char *variable, const char *value) {
_cleanup_free_ char16_t *u16 = NULL;
u16 = utf8_to_utf16(value, strlen(value));
u16 = utf8_to_utf16(value, SIZE_MAX);
if (!u16)
return -ENOMEM;

View File

@@ -389,11 +389,23 @@ char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
const uint8_t *f;
char *r, *t;
if (length == 0)
return new0(char, 1);
assert(s);
if (length == SIZE_MAX) {
length = char16_strlen(s);
if (length > SIZE_MAX/2)
return NULL; /* overflow */
length *= 2;
}
/* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
* take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
if (length * 2 < length)
if (length > (SIZE_MAX - 1) / 2)
return NULL; /* overflow */
r = new(char, length * 2 + 1);
@@ -463,8 +475,17 @@ char16_t *utf8_to_utf16(const char *s, size_t length) {
char16_t *n, *p;
int r;
if (length == 0)
return new0(char16_t, 1);
assert(s);
if (length == SIZE_MAX)
length = strlen(s);
if (length > SIZE_MAX - 1)
return NULL; /* overflow */
n = new(char16_t, length + 1);
if (!n)
return NULL;

View File

@@ -34,7 +34,7 @@ static int parse_timeout(const char *arg1, char16_t **ret_timeout, size_t *ret_t
xsprintf(utf8, USEC_FMT, MIN(timeout / USEC_PER_SEC, UINT32_MAX));
encoded = utf8_to_utf16(utf8, strlen(utf8));
encoded = utf8_to_utf16(utf8, SIZE_MAX);
if (!encoded)
return log_oom();
@@ -69,7 +69,7 @@ static int parse_loader_entry_target_arg(const char *arg1, char16_t **ret_target
} else if (arg1[0] == '@' && !streq(arg1, "@saved"))
return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unsupported special entry identifier: %s", arg1);
else {
encoded = utf8_to_utf16(arg1, strlen(arg1));
encoded = utf8_to_utf16(arg1, SIZE_MAX);
if (!encoded)
return log_oom();

View File

@@ -152,7 +152,7 @@ TEST(argv_bcds) {
char16_t *title = get_bcd_title(bcd, len);
if (title) {
_cleanup_free_ char *title_utf8 = utf16_to_utf8(title, char16_strlen(title) * 2);
_cleanup_free_ char *title_utf8 = utf16_to_utf8(title, SIZE_MAX);
log_info("%s: \"%s\"", saved_argv[i], title_utf8);
} else
log_info("%s: Bad BCD", saved_argv[i]);

View File

@@ -273,7 +273,7 @@ Architecture gpt_partition_type_uuid_to_arch(sd_id128_t id) {
int gpt_partition_label_valid(const char *s) {
_cleanup_free_ char16_t *recoded = NULL;
recoded = utf8_to_utf16(s, strlen(s));
recoded = utf8_to_utf16(s, SIZE_MAX);
if (!recoded)
return -ENOMEM;

View File

@@ -183,7 +183,7 @@ TEST(utf16_to_utf8) {
assert_se(b);
free(a);
a = utf16_to_utf8(b, char16_strlen(b) * 2);
a = utf16_to_utf8(b, SIZE_MAX);
assert_se(a);
assert_se(strlen(a) == sizeof(utf8));
assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
@@ -218,10 +218,10 @@ TEST(utf8_to_utf16) {
_cleanup_free_ char16_t *a = NULL;
_cleanup_free_ char *b = NULL;
a = utf8_to_utf16(p, strlen(p));
a = utf8_to_utf16(p, SIZE_MAX);
assert_se(a);
b = utf16_to_utf8(a, char16_strlen(a) * 2);
b = utf16_to_utf8(a, SIZE_MAX);
assert_se(b);
assert_se(streq(p, b));
}