utf8: automatically determine length of string if SIZE_MAX is specified

Let's make utf8_to_utf16() and utf16_to_utf8() a bit nicer to use by adding shortcuts for common cases. This is particularly relevant for utf16_to_utf8() since the multiplication with 2 is easy to forget.
2026-03-06 15:02:31 -08:00 · 2023-08-14 13:15:52 +02:00
parent b56ee69233
commit ba091282e5
6 changed files with 30 additions and 9 deletions
--- a/src/basic/efivars.c
+++ b/src/basic/efivars.c
@@ -265,7 +265,7 @@ finish:
 int efi_set_variable_string(const char *variable, const char *value) {
        _cleanup_free_ char16_t *u16 = NULL;

-        u16 = utf8_to_utf16(value, strlen(value));
+        u16 = utf8_to_utf16(value, SIZE_MAX);
        if (!u16)
                return -ENOMEM;

--- a/src/basic/utf8.c
+++ b/src/basic/utf8.c
@@ -389,11 +389,23 @@ char *utf16_to_utf8(const char16_t *s, size_t length /* bytes! */) {
        const uint8_t *f;
        char *r, *t;

+        if (length == 0)
+                return new0(char, 1);
+
        assert(s);

+        if (length == SIZE_MAX) {
+                length = char16_strlen(s);
+
+                if (length > SIZE_MAX/2)
+                        return NULL; /* overflow */
+
+                length *= 2;
+        }
+
        /* Input length is in bytes, i.e. the shortest possible character takes 2 bytes. Each unicode character may
         * take up to 4 bytes in UTF-8. Let's also account for a trailing NUL byte. */
-        if (length * 2 < length)
+        if (length > (SIZE_MAX - 1) / 2)
                return NULL; /* overflow */

        r = new(char, length * 2 + 1);
@@ -463,8 +475,17 @@ char16_t *utf8_to_utf16(const char *s, size_t length) {
        char16_t *n, *p;
        int r;

+        if (length == 0)
+                return new0(char16_t, 1);
+
        assert(s);

+        if (length == SIZE_MAX)
+                length = strlen(s);
+
+        if (length > SIZE_MAX - 1)
+                return NULL; /* overflow */
+
        n = new(char16_t, length + 1);
        if (!n)
                return NULL;
--- a/src/boot/bootctl-set-efivar.c
+++ b/src/boot/bootctl-set-efivar.c
@@ -34,7 +34,7 @@ static int parse_timeout(const char *arg1, char16_t **ret_timeout, size_t *ret_t

        xsprintf(utf8, USEC_FMT, MIN(timeout / USEC_PER_SEC, UINT32_MAX));

-        encoded = utf8_to_utf16(utf8, strlen(utf8));
+        encoded = utf8_to_utf16(utf8, SIZE_MAX);
        if (!encoded)
                return log_oom();

@@ -69,7 +69,7 @@ static int parse_loader_entry_target_arg(const char *arg1, char16_t **ret_target
        } else if (arg1[0] == '@' && !streq(arg1, "@saved"))
                return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Unsupported special entry identifier: %s", arg1);
        else {
-                encoded = utf8_to_utf16(arg1, strlen(arg1));
+                encoded = utf8_to_utf16(arg1, SIZE_MAX);
                if (!encoded)
                        return log_oom();

--- a/src/boot/efi/test-bcd.c
+++ b/src/boot/efi/test-bcd.c
@@ -152,7 +152,7 @@ TEST(argv_bcds) {

                char16_t *title = get_bcd_title(bcd, len);
                if (title) {
-                        _cleanup_free_ char *title_utf8 = utf16_to_utf8(title, char16_strlen(title) * 2);
+                        _cleanup_free_ char *title_utf8 = utf16_to_utf8(title, SIZE_MAX);
                        log_info("%s: \"%s\"", saved_argv[i], title_utf8);
                } else
                        log_info("%s: Bad BCD", saved_argv[i]);
--- a/src/shared/gpt.c
+++ b/src/shared/gpt.c
@@ -273,7 +273,7 @@ Architecture gpt_partition_type_uuid_to_arch(sd_id128_t id) {
 int gpt_partition_label_valid(const char *s) {
        _cleanup_free_ char16_t *recoded = NULL;

-        recoded = utf8_to_utf16(s, strlen(s));
+        recoded = utf8_to_utf16(s, SIZE_MAX);
        if (!recoded)
                return -ENOMEM;

--- a/src/test/test-utf8.c
+++ b/src/test/test-utf8.c
@@ -183,7 +183,7 @@ TEST(utf16_to_utf8) {
        assert_se(b);

        free(a);
-        a = utf16_to_utf8(b, char16_strlen(b) * 2);
+        a = utf16_to_utf8(b, SIZE_MAX);
        assert_se(a);
        assert_se(strlen(a) == sizeof(utf8));
        assert_se(memcmp(a, utf8, sizeof(utf8)) == 0);
@@ -218,10 +218,10 @@ TEST(utf8_to_utf16) {
                _cleanup_free_ char16_t *a = NULL;
                _cleanup_free_ char *b = NULL;

-                a = utf8_to_utf16(p, strlen(p));
+                a = utf8_to_utf16(p, SIZE_MAX);
                assert_se(a);

-                b = utf16_to_utf8(a, char16_strlen(a) * 2);
+                b = utf16_to_utf8(a, SIZE_MAX);
                assert_se(b);
                assert_se(streq(p, b));
        }