From 449d530700aef1ec65c3677747889b8eea49bc12 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:31:54 +0100 Subject: [PATCH 01/13] makefs: simplify SPDX header --- src/partition/makefs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/partition/makefs.c b/src/partition/makefs.c index ee4907f73f..951989cbb6 100644 --- a/src/partition/makefs.c +++ b/src/partition/makefs.c @@ -1,6 +1,4 @@ -/*** - SPDX-License-Identifier: LGPL-2.1+ -***/ +/* SPDX-License-Identifier: LGPL-2.1+ */ #include #include From 1293a168f16d16682e1b1628dddb67b62dc02c64 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 11:32:59 +0100 Subject: [PATCH 02/13] id128: move make_v4_uuid into id128-util.h to make it generally useful --- src/libsystemd/sd-id128/id128-util.c | 13 +++++++++++++ src/libsystemd/sd-id128/id128-util.h | 2 ++ src/libsystemd/sd-id128/sd-id128.c | 17 ++--------------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/libsystemd/sd-id128/id128-util.c b/src/libsystemd/sd-id128/id128-util.c index 985872b82d..335f22b920 100644 --- a/src/libsystemd/sd-id128/id128-util.c +++ b/src/libsystemd/sd-id128/id128-util.c @@ -190,4 +190,17 @@ int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) { return memcmp(a, b, 16); } +sd_id128_t id128_make_v4_uuid(sd_id128_t id) { + /* Stolen from generate_random_uuid() of drivers/char/random.c + * in the kernel sources */ + + /* Set UUID version to 4 --- truly random generation */ + id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; + + /* Set the UUID variant to DCE */ + id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; + + return id; +} + DEFINE_HASH_OPS(id128_hash_ops, sd_id128_t, id128_hash_func, id128_compare_func); diff --git a/src/libsystemd/sd-id128/id128-util.h b/src/libsystemd/sd-id128/id128-util.h index fe0149a8aa..1901bf119f 100644 --- a/src/libsystemd/sd-id128/id128-util.h +++ b/src/libsystemd/sd-id128/id128-util.h @@ -30,3 +30,5 @@ int id128_write(const char *p, Id128Format f, sd_id128_t id, bool do_sync); void id128_hash_func(const sd_id128_t *p, struct siphash *state); int id128_compare_func(const sd_id128_t *a, const sd_id128_t *b) _pure_; extern const struct hash_ops id128_hash_ops; + +sd_id128_t id128_make_v4_uuid(sd_id128_t id); diff --git a/src/libsystemd/sd-id128/sd-id128.c b/src/libsystemd/sd-id128/sd-id128.c index b331a6b432..9b38ef0c56 100644 --- a/src/libsystemd/sd-id128/sd-id128.c +++ b/src/libsystemd/sd-id128/sd-id128.c @@ -250,19 +250,6 @@ _public_ int sd_id128_get_invocation(sd_id128_t *ret) { return 0; } -static sd_id128_t make_v4_uuid(sd_id128_t id) { - /* Stolen from generate_random_uuid() of drivers/char/random.c - * in the kernel sources */ - - /* Set UUID version to 4 --- truly random generation */ - id.bytes[6] = (id.bytes[6] & 0x0F) | 0x40; - - /* Set the UUID variant to DCE */ - id.bytes[8] = (id.bytes[8] & 0x3F) | 0x80; - - return id; -} - _public_ int sd_id128_randomize(sd_id128_t *ret) { sd_id128_t t; int r; @@ -279,7 +266,7 @@ _public_ int sd_id128_randomize(sd_id128_t *ret) { * only guarantee this for newly generated UUIDs, not for * pre-existing ones. */ - *ret = make_v4_uuid(t); + *ret = id128_make_v4_uuid(t); return 0; } @@ -306,7 +293,7 @@ static int get_app_specific(sd_id128_t base, sd_id128_t app_id, sd_id128_t *ret) /* We chop off the trailing 16 bytes */ memcpy(&result, p, MIN(khash_get_size(h), sizeof(result))); - *ret = make_v4_uuid(result); + *ret = id128_make_v4_uuid(result); return 0; } From 137688dff466a2e85585f796bb2c3f43d871d05c Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:28:16 +0100 Subject: [PATCH 03/13] format-table: add support for formatting uuids/id128 values --- src/shared/format-table.c | 52 ++++++++++++++++++++++++++++++++++++++- src/shared/format-table.h | 2 ++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/shared/format-table.c b/src/shared/format-table.c index d7cb976757..4250130464 100644 --- a/src/shared/format-table.c +++ b/src/shared/format-table.c @@ -4,12 +4,15 @@ #include #include +#include "sd-id128.h" + #include "alloc-util.h" #include "fd-util.h" #include "fileio.h" #include "format-table.h" #include "format-util.h" #include "gunicode.h" +#include "id128-util.h" #include "in-addr-util.h" #include "locale-util.h" #include "memory-util.h" @@ -94,6 +97,7 @@ typedef struct TableData { int percent; /* we use 'int' as datatype for percent values in order to match the result of parse_percent() */ int ifindex; union in_addr_union address; + sd_id128_t id128; /* … add more here as we start supporting more cell data types … */ }; } TableData; @@ -289,6 +293,10 @@ static size_t table_data_size(TableDataType type, const void *data) { case TABLE_IN6_ADDR: return sizeof(struct in6_addr); + case TABLE_UUID: + case TABLE_ID128: + return sizeof(sd_id128_t); + default: assert_not_reached("Uh? Unexpected cell type"); } @@ -335,7 +343,6 @@ static bool table_data_matches( k = table_data_size(type, data); l = table_data_size(d->type, d->data); - if (k != l) return false; @@ -778,6 +785,7 @@ int table_add_many_internal(Table *t, TableDataType first_type, ...) { int ifindex; bool b; union in_addr_union address; + sd_id128_t id128; } buffer; switch (type) { @@ -901,6 +909,12 @@ int table_add_many_internal(Table *t, TableDataType first_type, ...) { data = &buffer.address.in6; break; + case TABLE_UUID: + case TABLE_ID128: + buffer.id128 = va_arg(ap, sd_id128_t); + data = &buffer.id128; + break; + case TABLE_SET_MINIMUM_WIDTH: { size_t w = va_arg(ap, size_t); @@ -1137,6 +1151,10 @@ static int cell_data_compare(TableData *a, size_t index_a, TableData *b, size_t case TABLE_IN6_ADDR: return memcmp(&a->address.in6, &b->address.in6, FAMILY_ADDRESS_SIZE(AF_INET6)); + case TABLE_UUID: + case TABLE_ID128: + return memcmp(&a->id128, &b->id128, sizeof(sd_id128_t)); + default: ; } @@ -1451,6 +1469,28 @@ static const char *table_data_format(Table *t, TableData *d) { break; } + case TABLE_ID128: { + char *p; + + p = new(char, SD_ID128_STRING_MAX); + if (!p) + return NULL; + + d->formatted = sd_id128_to_string(d->id128, p); + break; + } + + case TABLE_UUID: { + char *p; + + p = new(char, ID128_UUID_STRING_MAX); + if (!p) + return NULL; + + d->formatted = id128_to_uuid_string(d->id128, p); + break; + } + default: assert_not_reached("Unexpected type?"); } @@ -2155,6 +2195,16 @@ static int table_data_to_json(TableData *d, JsonVariant **ret) { case TABLE_IN6_ADDR: return json_variant_new_array_bytes(ret, &d->address, FAMILY_ADDRESS_SIZE(AF_INET6)); + case TABLE_ID128: { + char buf[SD_ID128_STRING_MAX]; + return json_variant_new_string(ret, sd_id128_to_string(d->id128, buf)); + } + + case TABLE_UUID: { + char buf[ID128_UUID_STRING_MAX]; + return json_variant_new_string(ret, id128_to_uuid_string(d->id128, buf)); + } + default: return -EINVAL; } diff --git a/src/shared/format-table.h b/src/shared/format-table.h index fa7a2bd6d6..870a29d385 100644 --- a/src/shared/format-table.h +++ b/src/shared/format-table.h @@ -35,6 +35,8 @@ typedef enum TableDataType { TABLE_IFINDEX, TABLE_IN_ADDR, /* Takes a union in_addr_union (or a struct in_addr) */ TABLE_IN6_ADDR, /* Takes a union in_addr_union (or a struct in6_addr) */ + TABLE_ID128, + TABLE_UUID, _TABLE_DATA_TYPE_MAX, /* The following are not really data types, but commands for table_add_cell_many() to make changes to From 1d2a1a0cb808fafa4d5e4b818a56e234040d8cf2 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:30:09 +0100 Subject: [PATCH 04/13] locale-util: add block drawing special glyphs --- src/basic/locale-util.c | 4 ++++ src/basic/locale-util.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c index dc62511935..d2da6e58d7 100644 --- a/src/basic/locale-util.c +++ b/src/basic/locale-util.c @@ -345,6 +345,8 @@ const char *special_glyph(SpecialGlyph code) { [SPECIAL_GLYPH_MU] = "u", [SPECIAL_GLYPH_CHECK_MARK] = "+", [SPECIAL_GLYPH_CROSS_MARK] = "-", + [SPECIAL_GLYPH_LIGHT_SHADE] = "-", + [SPECIAL_GLYPH_DARK_SHADE] = "X", [SPECIAL_GLYPH_ARROW] = "->", [SPECIAL_GLYPH_ELLIPSIS] = "...", [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]", @@ -371,6 +373,8 @@ const char *special_glyph(SpecialGlyph code) { [SPECIAL_GLYPH_MU] = "\316\274", /* μ (actually called: GREEK SMALL LETTER MU) */ [SPECIAL_GLYPH_CHECK_MARK] = "\342\234\223", /* ✓ */ [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ (actually called: BALLOT X) */ + [SPECIAL_GLYPH_LIGHT_SHADE] = "\342\226\221", /* ░ */ + [SPECIAL_GLYPH_DARK_SHADE] = "\342\226\223", /* ▒ */ /* Single glyph in Unicode, two in ASCII */ [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → (actually called: RIGHTWARDS ARROW) */ diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h index 1df8ac4cb0..79242a3e76 100644 --- a/src/basic/locale-util.h +++ b/src/basic/locale-util.h @@ -51,6 +51,8 @@ typedef enum { SPECIAL_GLYPH_CROSS_MARK, SPECIAL_GLYPH_ARROW, SPECIAL_GLYPH_ELLIPSIS, + SPECIAL_GLYPH_LIGHT_SHADE, + SPECIAL_GLYPH_DARK_SHADE, _SPECIAL_GLYPH_FIRST_SMILEY, SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_SMILEY, SPECIAL_GLYPH_HAPPY_SMILEY, From 7e70f2cb0e43c8c4e4eddb508d8b0f83116f4871 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:26:47 +0100 Subject: [PATCH 05/13] =?UTF-8?q?locale-util:=20add=20special=20glyph=20?= =?UTF-8?q?=CE=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/basic/locale-util.c | 2 ++ src/basic/locale-util.h | 1 + 2 files changed, 3 insertions(+) diff --git a/src/basic/locale-util.c b/src/basic/locale-util.c index d2da6e58d7..96151ffbf8 100644 --- a/src/basic/locale-util.c +++ b/src/basic/locale-util.c @@ -347,6 +347,7 @@ const char *special_glyph(SpecialGlyph code) { [SPECIAL_GLYPH_CROSS_MARK] = "-", [SPECIAL_GLYPH_LIGHT_SHADE] = "-", [SPECIAL_GLYPH_DARK_SHADE] = "X", + [SPECIAL_GLYPH_SIGMA] = "S", [SPECIAL_GLYPH_ARROW] = "->", [SPECIAL_GLYPH_ELLIPSIS] = "...", [SPECIAL_GLYPH_ECSTATIC_SMILEY] = ":-]", @@ -375,6 +376,7 @@ const char *special_glyph(SpecialGlyph code) { [SPECIAL_GLYPH_CROSS_MARK] = "\342\234\227", /* ✗ (actually called: BALLOT X) */ [SPECIAL_GLYPH_LIGHT_SHADE] = "\342\226\221", /* ░ */ [SPECIAL_GLYPH_DARK_SHADE] = "\342\226\223", /* ▒ */ + [SPECIAL_GLYPH_SIGMA] = "\316\243", /* Σ */ /* Single glyph in Unicode, two in ASCII */ [SPECIAL_GLYPH_ARROW] = "\342\206\222", /* → (actually called: RIGHTWARDS ARROW) */ diff --git a/src/basic/locale-util.h b/src/basic/locale-util.h index 79242a3e76..cefc4e7f0e 100644 --- a/src/basic/locale-util.h +++ b/src/basic/locale-util.h @@ -53,6 +53,7 @@ typedef enum { SPECIAL_GLYPH_ELLIPSIS, SPECIAL_GLYPH_LIGHT_SHADE, SPECIAL_GLYPH_DARK_SHADE, + SPECIAL_GLYPH_SIGMA, _SPECIAL_GLYPH_FIRST_SMILEY, SPECIAL_GLYPH_ECSTATIC_SMILEY = _SPECIAL_GLYPH_FIRST_SMILEY, SPECIAL_GLYPH_HAPPY_SMILEY, From b57ebc6004bbe512546bc30366256d15da20219a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:32:10 +0100 Subject: [PATCH 06/13] conf-parser: add parser for 32bit signed integers --- src/shared/conf-parser.c | 1 + src/shared/conf-parser.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c index cb20279dda..bb8a00c568 100644 --- a/src/shared/conf-parser.c +++ b/src/shared/conf-parser.c @@ -515,6 +515,7 @@ DEFINE_PARSER(long, long, safe_atoli); DEFINE_PARSER(uint8, uint8_t, safe_atou8); DEFINE_PARSER(uint16, uint16_t, safe_atou16); DEFINE_PARSER(uint32, uint32_t, safe_atou32); +DEFINE_PARSER(int32, int32_t, safe_atoi32); DEFINE_PARSER(uint64, uint64_t, safe_atou64); DEFINE_PARSER(unsigned, unsigned, safe_atou); DEFINE_PARSER(double, double, safe_atod); diff --git a/src/shared/conf-parser.h b/src/shared/conf-parser.h index 287620ad71..3a07bec303 100644 --- a/src/shared/conf-parser.h +++ b/src/shared/conf-parser.h @@ -115,6 +115,7 @@ CONFIG_PARSER_PROTOTYPE(config_parse_long); CONFIG_PARSER_PROTOTYPE(config_parse_uint8); CONFIG_PARSER_PROTOTYPE(config_parse_uint16); CONFIG_PARSER_PROTOTYPE(config_parse_uint32); +CONFIG_PARSER_PROTOTYPE(config_parse_int32); CONFIG_PARSER_PROTOTYPE(config_parse_uint64); CONFIG_PARSER_PROTOTYPE(config_parse_double); CONFIG_PARSER_PROTOTYPE(config_parse_iec_size); From e594a3b154bd06c535a934a1cc7231b1ef76df73 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 21:31:41 +0100 Subject: [PATCH 07/13] repart: add new systemd-repart tool Fixes: #14052 --- meson.build | 40 + meson_options.txt | 4 + src/partition/meson.build | 5 + src/partition/repart.c | 3096 +++++++++++++++++++++++++++++++++++++ 4 files changed, 3145 insertions(+) create mode 100644 src/partition/meson.build create mode 100644 src/partition/repart.c diff --git a/meson.build b/meson.build index 848140bb03..54820d3f6a 100644 --- a/meson.build +++ b/meson.build @@ -873,6 +873,17 @@ endif libmount = dependency('mount', version : fuzzer_build ? '>= 0' : '>= 2.30') +want_libfdisk = get_option('fdisk') +if want_libfdisk != 'false' and not skip_deps + libfdisk = dependency('fdisk', + required : want_libfdisk == 'true') + have = libfdisk.found() +else + have = false + libfdisk = [] +endif +conf.set10('HAVE_LIBFDISK', have) + want_seccomp = get_option('seccomp') if want_seccomp != 'false' and not skip_deps libseccomp = dependency('libseccomp', @@ -1279,6 +1290,18 @@ conf.set('DEFAULT_DNS_OVER_TLS_MODE', 'DNS_OVER_TLS_' + default_dns_over_tls.underscorify().to_upper()) substs.set('DEFAULT_DNS_OVER_TLS_MODE', default_dns_over_tls) +want_repart = get_option('repart') +if want_repart != 'false' + have = (conf.get('HAVE_OPENSSL') == 1 and + conf.get('HAVE_LIBFDISK') == 1) + if want_repart == 'true' and not have + error('repart support was requested, but dependencies are not available') + endif +else + have = false +endif +conf.set10('ENABLE_REPART', have) + want_importd = get_option('importd') if want_importd != 'false' have = (conf.get('HAVE_LIBCURL') == 1 and @@ -1535,6 +1558,7 @@ subdir('src/coredump') subdir('src/pstore') subdir('src/hostname') subdir('src/import') +subdir('src/partition') subdir('src/kernel-install') subdir('src/locale') subdir('src/machine') @@ -2381,6 +2405,21 @@ if conf.get('ENABLE_BINFMT') == 1 mkdir_p.format(join_paths(sysconfdir, 'binfmt.d'))) endif +if conf.get('ENABLE_REPART') == 1 + executable('systemd-repart', + systemd_repart_sources, + include_directories : includes, + link_with : [libshared], + dependencies : [threads, + libcryptsetup, + libblkid, + libfdisk, + libopenssl], + install_rpath : rootlibexecdir, + install : true, + install_dir : rootbindir) +endif + if conf.get('ENABLE_VCONSOLE') == 1 executable('systemd-vconsole-setup', 'src/vconsole/vconsole-setup.c', @@ -3276,6 +3315,7 @@ foreach tuple : [ ['libiptc'], ['elfutils'], ['binfmt'], + ['repart'], ['vconsole'], ['quotacheck'], ['tmpfiles'], diff --git a/meson_options.txt b/meson_options.txt index 6736240f39..e512d25480 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -80,6 +80,8 @@ option('environment-d', type : 'boolean', description : 'support for environment.d') option('binfmt', type : 'boolean', description : 'support for custom binary formats') +option('repart', type : 'combo', choices : ['auto', 'true', 'false'], + description : 'install the systemd-repart tool') option('coredump', type : 'boolean', description : 'install the coredump handler') option('pstore', type : 'boolean', @@ -260,6 +262,8 @@ option('audit', type : 'combo', choices : ['auto', 'true', 'false'], description : 'libaudit support') option('blkid', type : 'combo', choices : ['auto', 'true', 'false'], description : 'libblkid support') +option('fdisk', type : 'combo', choices : ['auto', 'true', 'false'], + description : 'libfdisk support') option('kmod', type : 'combo', choices : ['auto', 'true', 'false'], description : 'support for loadable modules') option('pam', type : 'combo', choices : ['auto', 'true', 'false'], diff --git a/src/partition/meson.build b/src/partition/meson.build new file mode 100644 index 0000000000..d0c111a473 --- /dev/null +++ b/src/partition/meson.build @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: LGPL-2.1+ + +systemd_repart_sources = files(''' + repart.c +'''.split()) diff --git a/src/partition/repart.c b/src/partition/repart.c new file mode 100644 index 0000000000..9844de5961 --- /dev/null +++ b/src/partition/repart.c @@ -0,0 +1,3096 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#if HAVE_VALGRIND_MEMCHECK_H +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "sd-id128.h" + +#include "alloc-util.h" +#include "blkid-util.h" +#include "blockdev-util.h" +#include "btrfs-util.h" +#include "conf-files.h" +#include "conf-parser.h" +#include "def.h" +#include "efivars.h" +#include "errno-util.h" +#include "fd-util.h" +#include "format-table.h" +#include "format-util.h" +#include "fs-util.h" +#include "gpt.h" +#include "id128-util.h" +#include "list.h" +#include "locale-util.h" +#include "main-func.h" +#include "parse-util.h" +#include "path-util.h" +#include "pretty-print.h" +#include "proc-cmdline.h" +#include "sort-util.h" +#include "stat-util.h" +#include "stdio-util.h" +#include "string-util.h" +#include "strv.h" +#include "terminal-util.h" +#include "utf8.h" + +/* Note: When growing and placing new partitions we always align to 4K sector size. It's how newer hard disks + * are designed, and if everything is aligned to that performance is best. And for older hard disks with 512B + * sector size devices were generally assumed to have an even number of sectors, hence at the worst we'll + * waste 3K per partition, which is probably fine. */ + +static enum { + EMPTY_REFUSE, /* refuse empty disks, never create a partition table */ + EMPTY_ALLOW, /* allow empty disks, create partition table if necessary */ + EMPTY_REQUIRE, /* require an empty disk, create a partition table */ + EMPTY_FORCE, /* make disk empty, erase everything, create a partition table always */ +} arg_empty = EMPTY_REFUSE; + +static bool arg_dry_run = true; +static const char *arg_node = NULL; +static char *arg_root = NULL; +static char *arg_definitions = NULL; +static bool arg_discard = true; +static bool arg_can_factory_reset = false; +static int arg_factory_reset = -1; +static sd_id128_t arg_seed = SD_ID128_NULL; +static bool arg_randomize = false; +static int arg_pretty = -1; + +STATIC_DESTRUCTOR_REGISTER(arg_root, freep); +STATIC_DESTRUCTOR_REGISTER(arg_definitions, freep); + +typedef struct Partition Partition; +typedef struct FreeArea FreeArea; +typedef struct Context Context; + +struct Partition { + char *definition_path; + + sd_id128_t type_uuid; + sd_id128_t current_uuid, new_uuid; + char *current_label, *new_label; + + bool dropped; + bool factory_reset; + int32_t priority; + + uint32_t weight, padding_weight; + + uint64_t current_size, new_size; + uint64_t size_min, size_max; + + uint64_t current_padding, new_padding; + uint64_t padding_min, padding_max; + + uint64_t partno; + uint64_t offset; + + struct fdisk_partition *current_partition; + struct fdisk_partition *new_partition; + FreeArea *padding_area; + FreeArea *allocated_to_area; + + LIST_FIELDS(Partition, partitions); +}; + +#define PARTITION_IS_FOREIGN(p) (!(p)->definition_path) +#define PARTITION_EXISTS(p) (!!(p)->current_partition) + +struct FreeArea { + Partition *after; + uint64_t size; + uint64_t allocated; +}; + +struct Context { + LIST_HEAD(Partition, partitions); + size_t n_partitions; + + FreeArea **free_areas; + size_t n_free_areas, n_allocated_free_areas; + + uint64_t start, end, total; + + struct fdisk_context *fdisk_context; + + sd_id128_t seed; +}; + +static uint64_t round_down_size(uint64_t v, uint64_t p) { + return (v / p) * p; +} + +static uint64_t round_up_size(uint64_t v, uint64_t p) { + + v = DIV_ROUND_UP(v, p); + + if (v > UINT64_MAX / p) + return UINT64_MAX; /* overflow */ + + return v * p; +} + +static Partition *partition_new(void) { + Partition *p; + + p = new(Partition, 1); + if (!p) + return NULL; + + *p = (Partition) { + .weight = 1000, + .padding_weight = 0, + .current_size = UINT64_MAX, + .new_size = UINT64_MAX, + .size_min = UINT64_MAX, + .size_max = UINT64_MAX, + .current_padding = UINT64_MAX, + .new_padding = UINT64_MAX, + .padding_min = UINT64_MAX, + .padding_max = UINT64_MAX, + .partno = UINT64_MAX, + .offset = UINT64_MAX, + }; + + return p; +} + +static Partition* partition_free(Partition *p) { + if (!p) + return NULL; + + free(p->current_label); + free(p->new_label); + free(p->definition_path); + + if (p->current_partition) + fdisk_unref_partition(p->current_partition); + if (p->new_partition) + fdisk_unref_partition(p->new_partition); + + return mfree(p); +} + +static Partition* partition_unlink_and_free(Context *context, Partition *p) { + if (!p) + return NULL; + + LIST_REMOVE(partitions, context->partitions, p); + + assert(context->n_partitions > 0); + context->n_partitions--; + + return partition_free(p); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(Partition*, partition_free); + +static Context *context_new(sd_id128_t seed) { + Context *context; + + context = new(Context, 1); + if (!context) + return NULL; + + *context = (Context) { + .start = UINT64_MAX, + .end = UINT64_MAX, + .total = UINT64_MAX, + .seed = seed, + }; + + return context; +} + +static void context_free_free_areas(Context *context) { + assert(context); + + for (size_t i = 0; i < context->n_free_areas; i++) + free(context->free_areas[i]); + + context->free_areas = mfree(context->free_areas); + context->n_free_areas = 0; + context->n_allocated_free_areas = 0; +} + +static Context *context_free(Context *context) { + if (!context) + return NULL; + + while (context->partitions) + partition_unlink_and_free(context, context->partitions); + assert(context->n_partitions == 0); + + context_free_free_areas(context); + + if (context->fdisk_context) + fdisk_unref_context(context->fdisk_context); + + return mfree(context); +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(Context*, context_free); + +static int context_add_free_area( + Context *context, + uint64_t size, + Partition *after) { + + FreeArea *a; + + assert(context); + assert(!after || !after->padding_area); + + if (!GREEDY_REALLOC(context->free_areas, context->n_allocated_free_areas, context->n_free_areas + 1)) + return -ENOMEM; + + a = new(FreeArea, 1); + if (!a) + return -ENOMEM; + + *a = (FreeArea) { + .size = size, + .after = after, + }; + + context->free_areas[context->n_free_areas++] = a; + + if (after) + after->padding_area = a; + + return 0; +} + +static bool context_drop_one_priority(Context *context) { + int32_t priority = 0; + Partition *p; + bool exists = false; + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->dropped) + continue; + if (p->priority < priority) + continue; + if (p->priority == priority) { + exists = exists || PARTITION_EXISTS(p); + continue; + } + + priority = p->priority; + exists = PARTITION_EXISTS(p); + } + + /* Refuse to drop partitions with 0 or negative priorities or partitions of priorities that have at + * least one existing priority */ + if (priority <= 0 || exists) + return false; + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->priority < priority) + continue; + + if (p->dropped) + continue; + + p->dropped = true; + log_info("Can't fit partition %s of priority %" PRIi32 ", dropping.", p->definition_path, p->priority); + } + + return true; +} + +static uint64_t partition_min_size(const Partition *p) { + uint64_t sz; + + /* Calculate the disk space we really need at minimum for this partition. If the partition already + * exists the current size is what we really need. If it doesn't exist yet refuse to allocate less + * than 4K. */ + + if (PARTITION_IS_FOREIGN(p)) { + /* Don't allow changing size of partitions not managed by us */ + assert(p->current_size != UINT64_MAX); + return p->current_size; + } + + sz = p->current_size != UINT64_MAX ? p->current_size : 4096; + if (p->size_min != UINT64_MAX) + return MAX(p->size_min, sz); + + return sz; +} + +static uint64_t partition_max_size(const Partition *p) { + /* Calculate how large the partition may become at max. This is generally the configured maximum + * size, except when it already exists and is larger than that. In that case it's the existing size, + * since we never want to shrink partitions. */ + + if (PARTITION_IS_FOREIGN(p)) { + /* Don't allow changing size of partitions not managed by us */ + assert(p->current_size != UINT64_MAX); + return p->current_size; + } + + if (p->current_size != UINT64_MAX) + return MAX(p->current_size, p->size_max); + + return p->size_max; +} + +static uint64_t partition_min_size_with_padding(const Partition *p) { + uint64_t sz; + + /* Calculate the disk space we need for this partition plus any free space coming after it. This + * takes user configured padding into account as well as any additional whitespace needed to align + * the next partition to 4K again. */ + + sz = partition_min_size(p); + + if (p->padding_min != UINT64_MAX) + sz += p->padding_min; + + if (PARTITION_EXISTS(p)) { + /* If the partition wasn't aligned, add extra space so that any we might add will be aligned */ + assert(p->offset != UINT64_MAX); + return round_up_size(p->offset + sz, 4096) - p->offset; + } + + /* If this is a new partition we'll place it aligned, hence we just need to round up the required size here */ + return round_up_size(sz, 4096); +} + +static uint64_t free_area_available(const FreeArea *a) { + assert(a); + + /* Determines how much of this free area is not allocated yet */ + + assert(a->size >= a->allocated); + return a->size - a->allocated; +} + +static uint64_t free_area_available_for_new_partitions(const FreeArea *a) { + uint64_t avail; + + /* Similar to free_area_available(), but takes into account that the required size and padding of the + * preceeding partition is honoured. */ + + avail = free_area_available(a); + if (a->after) { + uint64_t need, space; + + need = partition_min_size_with_padding(a->after); + + assert(a->after->offset != UINT64_MAX); + assert(a->after->current_size != UINT64_MAX); + + space = round_up_size(a->after->offset + a->after->current_size, 4096) - a->after->offset + avail; + if (need >= space) + return 0; + + return space - need; + } + + return avail; +} + +static int free_area_compare(FreeArea *const *a, FreeArea *const*b) { + return CMP(free_area_available_for_new_partitions(*a), + free_area_available_for_new_partitions(*b)); +} + +static uint64_t charge_size(uint64_t total, uint64_t amount) { + uint64_t rounded; + + assert(amount <= total); + + /* Subtract the specified amount from total, rounding up to multiple of 4K if there's room */ + rounded = round_up_size(amount, 4096); + if (rounded >= total) + return 0; + + return total - rounded; +} + +static uint64_t charge_weight(uint64_t total, uint64_t amount) { + assert(amount <= total); + return total - amount; +} + +static bool context_allocate_partitions(Context *context) { + Partition *p; + + assert(context); + + /* A simple first-fit algorithm, assuming the array of free areas is sorted by size in decreasing + * order. */ + + LIST_FOREACH(partitions, p, context->partitions) { + bool fits = false; + uint64_t required; + FreeArea *a = NULL; + + /* Skip partitions we already dropped or that already exist */ + if (p->dropped || PARTITION_EXISTS(p)) + continue; + + /* Sort by size */ + typesafe_qsort(context->free_areas, context->n_free_areas, free_area_compare); + + /* How much do we need to fit? */ + required = partition_min_size_with_padding(p); + assert(required % 4096 == 0); + + for (size_t i = 0; i < context->n_free_areas; i++) { + a = context->free_areas[i]; + + if (free_area_available_for_new_partitions(a) >= required) { + fits = true; + break; + } + } + + if (!fits) + return false; /* 😢 Oh no! We can't fit this partition into any free area! */ + + /* Assign the partition to this free area */ + p->allocated_to_area = a; + + /* Budget the minimal partition size */ + a->allocated += required; + } + + return true; +} + +static int context_sum_weights(Context *context, FreeArea *a, uint64_t *ret) { + uint64_t weight_sum = 0; + Partition *p; + + assert(context); + assert(a); + assert(ret); + + /* Determine the sum of the weights of all partitions placed in or before the specified free area */ + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->padding_area != a && p->allocated_to_area != a) + continue; + + if (p->weight > UINT64_MAX - weight_sum) + goto overflow_sum; + weight_sum += p->weight; + + if (p->padding_weight > UINT64_MAX - weight_sum) + goto overflow_sum; + weight_sum += p->padding_weight; + } + + *ret = weight_sum; + return 0; + +overflow_sum: + return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Combined weight of partition exceeds unsigned 64bit range, refusing."); +} + +static int scale_by_weight(uint64_t value, uint64_t weight, uint64_t weight_sum, uint64_t *ret) { + assert(weight_sum >= weight); + assert(ret); + + if (weight == 0) { + *ret = 0; + return 0; + } + + if (value > UINT64_MAX / weight) + return log_error_errno(SYNTHETIC_ERRNO(EOVERFLOW), "Scaling by weight of partition exceeds unsigned 64bit range, refusing."); + + *ret = value * weight / weight_sum; + return 0; +} + +typedef enum GrowPartitionPhase { + /* The first phase: we charge partitions which need more (according to constraints) than their weight-based share. */ + PHASE_OVERCHARGE, + + /* The second phase: we charge partitions which need less (according to constraints) than their weight-based share. */ + PHASE_UNDERCHARGE, + + /* The third phase: we distribute what remains among the remaining partitions, according to the weights */ + PHASE_DISTRIBUTE, +} GrowPartitionPhase; + +static int context_grow_partitions_phase( + Context *context, + FreeArea *a, + GrowPartitionPhase phase, + uint64_t *span, + uint64_t *weight_sum) { + + Partition *p; + int r; + + assert(context); + assert(a); + + /* Now let's look at the intended weights and adjust them taking the minimum space assignments into + * account. i.e. if a partition has a small weight but a high minimum space value set it should not + * get any additional room from the left-overs. Similar, if two partitions have the same weight they + * should get the same space if possible, even if one has a smaller minimum size than the other. */ + LIST_FOREACH(partitions, p, context->partitions) { + + /* Look only at partitions associated with this free area, i.e. immediately + * preceeding it, or allocated into it */ + if (p->allocated_to_area != a && p->padding_area != a) + continue; + + if (p->new_size == UINT64_MAX) { + bool charge = false, try_again = false; + uint64_t share, rsz, xsz; + + /* Calculate how much this space this partition needs if everyone would get + * the weight based share */ + r = scale_by_weight(*span, p->weight, *weight_sum, &share); + if (r < 0) + return r; + + rsz = partition_min_size(p); + xsz = partition_max_size(p); + + if (phase == PHASE_OVERCHARGE && rsz > share) { + /* This partition needs more than its calculated share. Let's assign + * it that, and take this partition out of all calculations and start + * again. */ + + p->new_size = rsz; + charge = try_again = true; + + } else if (phase == PHASE_UNDERCHARGE && xsz != UINT64_MAX && xsz < share) { + /* This partition accepts less than its calculated + * share. Let's assign it that, and take this partition out + * of all calculations and start again. */ + + p->new_size = xsz; + charge = try_again = true; + + } else if (phase == PHASE_DISTRIBUTE) { + /* This partition can accept its calculated share. Let's + * assign it. There's no need to restart things here since + * assigning this shouldn't impact the shares of the other + * partitions. */ + + if (PARTITION_IS_FOREIGN(p)) + /* Never change of foreign partitions (i.e. those we don't manage) */ + p->new_size = p->current_size; + else + p->new_size = MAX(round_down_size(share, 4096), rsz); + + charge = true; + } + + if (charge) { + *span = charge_size(*span, p->new_size); + *weight_sum = charge_weight(*weight_sum, p->weight); + } + + if (try_again) + return 0; /* try again */ + } + + if (p->new_padding == UINT64_MAX) { + bool charge = false, try_again = false; + uint64_t share; + + r = scale_by_weight(*span, p->padding_weight, *weight_sum, &share); + if (r < 0) + return r; + + if (phase == PHASE_OVERCHARGE && p->padding_min != UINT64_MAX && p->padding_min > share) { + p->new_padding = p->padding_min; + charge = try_again = true; + } else if (phase == PHASE_UNDERCHARGE && p->padding_max != UINT64_MAX && p->padding_max < share) { + p->new_padding = p->padding_max; + charge = try_again = true; + } else if (phase == PHASE_DISTRIBUTE) { + + p->new_padding = round_down_size(share, 4096); + if (p->padding_min != UINT64_MAX && p->new_padding < p->padding_min) + p->new_padding = p->padding_min; + + charge = true; + } + + if (charge) { + *span = charge_size(*span, p->new_padding); + *weight_sum = charge_weight(*weight_sum, p->padding_weight); + } + + if (try_again) + return 0; /* try again */ + } + } + + return 1; /* done */ +} + +static int context_grow_partitions_on_free_area(Context *context, FreeArea *a) { + uint64_t weight_sum = 0, span; + int r; + + assert(context); + assert(a); + + r = context_sum_weights(context, a, &weight_sum); + if (r < 0) + return r; + + /* Let's calculate the total area covered by this free area and the partition before it */ + span = a->size; + if (a->after) { + assert(a->after->offset != UINT64_MAX); + assert(a->after->current_size != UINT64_MAX); + + span += round_up_size(a->after->offset + a->after->current_size, 4096) - a->after->offset; + } + + GrowPartitionPhase phase = PHASE_OVERCHARGE; + for (;;) { + r = context_grow_partitions_phase(context, a, phase, &span, &weight_sum); + if (r < 0) + return r; + if (r == 0) /* not done yet, re-run this phase */ + continue; + + if (phase == PHASE_OVERCHARGE) + phase = PHASE_UNDERCHARGE; + else if (phase == PHASE_UNDERCHARGE) + phase = PHASE_DISTRIBUTE; + else if (phase == PHASE_DISTRIBUTE) + break; + } + + /* We still have space left over? Donate to preceeding partition if we have one */ + if (span > 0 && a->after && !PARTITION_IS_FOREIGN(a->after)) { + uint64_t m, xsz; + + assert(a->after->new_size != UINT64_MAX); + m = a->after->new_size + span; + + xsz = partition_max_size(a->after); + if (xsz != UINT64_MAX && m > xsz) + m = xsz; + + span = charge_size(span, m - a->after->new_size); + a->after->new_size = m; + } + + /* What? Even still some space left (maybe because there was no preceeding partition, or it had a + * size limit), then let's donate it to whoever wants it. */ + if (span > 0) { + Partition *p; + + LIST_FOREACH(partitions, p, context->partitions) { + uint64_t m, xsz; + + if (p->allocated_to_area != a) + continue; + + if (PARTITION_IS_FOREIGN(p)) + continue; + + assert(p->new_size != UINT64_MAX); + m = p->new_size + span; + + xsz = partition_max_size(a->after); + if (xsz != UINT64_MAX && m > xsz) + m = xsz; + + span = charge_size(span, m - p->new_size); + p->new_size = m; + + if (span == 0) + break; + } + } + + /* Yuck, still noone? Then make it padding */ + if (span > 0 && a->after) { + assert(a->after->new_padding != UINT64_MAX); + a->after->new_padding += span; + } + + return 0; +} + +static int context_grow_partitions(Context *context) { + Partition *p; + int r; + + assert(context); + + for (size_t i = 0; i < context->n_free_areas; i++) { + r = context_grow_partitions_on_free_area(context, context->free_areas[i]); + if (r < 0) + return r; + } + + /* All existing partitions that have no free space after them can't change size */ + LIST_FOREACH(partitions, p, context->partitions) { + if (p->dropped) + continue; + + if (!PARTITION_EXISTS(p) || p->padding_area) { + /* The algorithm above must have initialized this already */ + assert(p->new_size != UINT64_MAX); + continue; + } + + assert(p->new_size == UINT64_MAX); + p->new_size = p->current_size; + + assert(p->new_padding == UINT64_MAX); + p->new_padding = p->current_padding; + } + + return 0; +} + +static void context_place_partitions(Context *context) { + uint64_t partno = 0; + Partition *p; + + assert(context); + + /* Determine next partition number to assign */ + LIST_FOREACH(partitions, p, context->partitions) { + if (!PARTITION_EXISTS(p)) + continue; + + assert(p->partno != UINT64_MAX); + if (p->partno >= partno) + partno = p->partno + 1; + } + + for (size_t i = 0; i < context->n_free_areas; i++) { + FreeArea *a = context->free_areas[i]; + uint64_t start, left; + + if (a->after) { + assert(a->after->offset != UINT64_MAX); + assert(a->after->new_size != UINT64_MAX); + assert(a->after->new_padding != UINT64_MAX); + + start = a->after->offset + a->after->new_size + a->after->new_padding; + } else + start = context->start; + + start = round_up_size(start, 4096); + left = a->size; + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->allocated_to_area != a) + continue; + + p->offset = start; + p->partno = partno++; + + assert(left >= p->new_size); + start += p->new_size; + left -= p->new_size; + + assert(left >= p->new_padding); + start += p->new_padding; + left -= p->new_padding; + } + } +} + +typedef struct GptPartitionType { + sd_id128_t uuid; + const char *name; +} GptPartitionType; + +static const GptPartitionType gpt_partition_type_table[] = { + { GPT_ROOT_X86, "root-x86" }, + { GPT_ROOT_X86_VERITY, "root-x86-verity" }, + { GPT_ROOT_X86_64, "root-x86-64" }, + { GPT_ROOT_X86_64_VERITY, "root-x86-64-verity" }, + { GPT_ROOT_ARM, "root-arm" }, + { GPT_ROOT_ARM_VERITY, "root-arm-verity" }, + { GPT_ROOT_ARM_64, "root-arm64" }, + { GPT_ROOT_ARM_64_VERITY, "root-arm64-verity" }, + { GPT_ROOT_IA64, "root-ia64" }, + { GPT_ROOT_IA64_VERITY, "root-ia64-verity" }, +#ifdef GPT_ROOT_NATIVE + { GPT_ROOT_NATIVE, "root" }, + { GPT_ROOT_NATIVE_VERITY, "root-verity" }, +#endif +#ifdef GPT_ROOT_SECONDARY + { GPT_ROOT_SECONDARY, "root-secondary" }, + { GPT_ROOT_SECONDARY_VERITY, "root-secondary-verity" }, +#endif + { GPT_ESP, "esp" }, + { GPT_XBOOTLDR, "xbootldr" }, + { GPT_SWAP, "swap" }, + { GPT_HOME, "home" }, + { GPT_SRV, "srv" }, + { GPT_VAR, "var" }, + { GPT_TMP, "tmp" }, + { GPT_LINUX_GENERIC, "linux-generic", }, +}; + +static const char *gpt_partition_type_uuid_to_string(sd_id128_t id) { + for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table); i++) + if (sd_id128_equal(id, gpt_partition_type_table[i].uuid)) + return gpt_partition_type_table[i].name; + + return NULL; +} + +static const char *gpt_partition_type_uuid_to_string_harder( + sd_id128_t id, + char buffer[static ID128_UUID_STRING_MAX]) { + + const char *s; + + assert(buffer); + + s = gpt_partition_type_uuid_to_string(id); + if (s) + return s; + + return id128_to_uuid_string(id, buffer); +} + +static int gpt_partition_type_uuid_from_string(const char *s, sd_id128_t *ret) { + assert(s); + assert(ret); + + for (size_t i = 0; i < ELEMENTSOF(gpt_partition_type_table); i++) + if (streq(s, gpt_partition_type_table[i].name)) { + *ret = gpt_partition_type_table[i].uuid; + return 0; + } + + return sd_id128_from_string(s, ret); +} + +static int config_parse_type( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + sd_id128_t *type_uuid = data; + int r; + + assert(rvalue); + assert(type_uuid); + + r = gpt_partition_type_uuid_from_string(rvalue, type_uuid); + if (r < 0) + return log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse partition type: %s", rvalue); + + return 0; +} + +static int config_parse_label( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + _cleanup_free_ char16_t *recoded = NULL; + char **label = data; + int r; + + assert(rvalue); + assert(label); + + if (!utf8_is_valid(rvalue)) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Partition label not valid UTF-8, ignoring: %s", rvalue); + return 0; + } + + recoded = utf8_to_utf16(rvalue, strlen(rvalue)); + if (!recoded) + return log_oom(); + + if (char16_strlen(recoded) > 36) { + log_syntax(unit, LOG_WARNING, filename, line, 0, + "Partition label too long for GPT table, ignoring: %s", rvalue); + return 0; + } + + r = free_and_strdup(label, rvalue); + if (r < 0) + return log_oom(); + + return 0; +} + +static int config_parse_weight( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint32_t *priority = data, v; + int r; + + assert(rvalue); + assert(priority); + + r = safe_atou32(rvalue, &v); + if (r < 0) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse weight value, ignoring: %s", rvalue); + return 0; + } + + if (v > 1000U*1000U) { + log_syntax(unit, LOG_WARNING, filename, line, r, + "Weight needs to be in range 0…10000000, ignoring: %" PRIu32, v); + return 0; + } + + *priority = v; + return 0; +} + +static int config_parse_size4096( + const char *unit, + const char *filename, + unsigned line, + const char *section, + unsigned section_line, + const char *lvalue, + int ltype, + const char *rvalue, + void *data, + void *userdata) { + + uint64_t *sz = data, parsed; + int r; + + assert(rvalue); + assert(data); + + r = parse_size(rvalue, 1024, &parsed); + if (r < 0) + return log_syntax(unit, LOG_WARNING, filename, line, r, + "Failed to parse size value: %s", rvalue); + + if (ltype > 0) + *sz = round_up_size(parsed, 4096); + else if (ltype < 0) + *sz = round_down_size(parsed, 4096); + else + *sz = parsed; + + if (*sz != parsed) + log_syntax(unit, LOG_NOTICE, filename, line, r, "Rounded %s= size %" PRIu64 " → %" PRIu64 ", a multiple of 4096.", lvalue, parsed, *sz); + + return 0; +} + +static int partition_read_definition(Partition *p, const char *path) { + + ConfigTableItem table[] = { + { "Partition", "Type", config_parse_type, 0, &p->type_uuid }, + { "Partition", "Label", config_parse_label, 0, &p->new_label }, + { "Partition", "Priority", config_parse_int32, 0, &p->priority }, + { "Partition", "Weight", config_parse_weight, 0, &p->weight }, + { "Partition", "PaddingWeight", config_parse_weight, 0, &p->padding_weight }, + { "Partition", "SizeMinBytes", config_parse_size4096, 1, &p->size_min }, + { "Partition", "SizeMaxBytes", config_parse_size4096, -1, &p->size_max }, + { "Partition", "PaddingMinBytes", config_parse_size4096, 1, &p->padding_min }, + { "Partition", "PaddingMaxBytes", config_parse_size4096, -1, &p->padding_max }, + { "Partition", "FactoryReset", config_parse_bool, 0, &p->factory_reset }, + {} + }; + int r; + + r = config_parse(NULL, path, NULL, "Partition\0", config_item_table_lookup, table, CONFIG_PARSE_WARN, p); + if (r < 0) + return r; + + if (p->size_min != UINT64_MAX && p->size_max != UINT64_MAX && p->size_min > p->size_max) + return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL), + "SizeMinBytes= larger than SizeMaxBytes=, refusing."); + + if (p->padding_min != UINT64_MAX && p->padding_max != UINT64_MAX && p->padding_min > p->padding_max) + return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL), + "PaddingMinBytes= larger than PaddingMaxBytes=, refusing."); + + if (sd_id128_is_null(p->type_uuid)) + return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL), + "Type= not defined, refusing."); + + return 0; +} + +static int context_read_definitions( + Context *context, + const char *directory, + const char *root) { + + _cleanup_strv_free_ char **files = NULL; + Partition *last = NULL; + char **f; + int r; + + assert(context); + + if (directory) + r = conf_files_list_strv(&files, ".conf", NULL, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char**) STRV_MAKE(directory)); + else + r = conf_files_list_strv(&files, ".conf", root, CONF_FILES_REGULAR|CONF_FILES_FILTER_MASKED, (const char**) CONF_PATHS_STRV("repart.d")); + if (r < 0) + return log_error_errno(r, "Failed to enumerate *.conf files: %m"); + + STRV_FOREACH(f, files) { + _cleanup_(partition_freep) Partition *p = NULL; + + p = partition_new(); + if (!p) + return log_oom(); + + p->definition_path = strdup(*f); + if (!p->definition_path) + return log_oom(); + + r = partition_read_definition(p, *f); + if (r < 0) + return r; + + LIST_INSERT_AFTER(partitions, context->partitions, last, p); + last = TAKE_PTR(p); + context->n_partitions++; + } + + return 0; +} + +DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_context*, fdisk_unref_context); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_partition*, fdisk_unref_partition); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_parttype*, fdisk_unref_parttype); +DEFINE_TRIVIAL_CLEANUP_FUNC(struct fdisk_table*, fdisk_unref_table); + +static int determine_current_padding( + struct fdisk_context *c, + struct fdisk_table *t, + struct fdisk_partition *p, + uint64_t *ret) { + + size_t n_partitions; + uint64_t offset, next = UINT64_MAX; + + assert(c); + assert(t); + assert(p); + + if (!fdisk_partition_has_end(p)) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition has no end!"); + + offset = fdisk_partition_get_end(p); + assert(offset < UINT64_MAX / 512); + offset *= 512; + + n_partitions = fdisk_table_get_nents(t); + for (size_t i = 0; i < n_partitions; i++) { + struct fdisk_partition *q; + uint64_t start; + + q = fdisk_table_get_partition(t, i); + if (!q) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m"); + + if (fdisk_partition_is_used(q) <= 0) + continue; + + if (!fdisk_partition_has_start(q)) + continue; + + start = fdisk_partition_get_start(q); + assert(start < UINT64_MAX / 512); + start *= 512; + + if (start >= offset && (next == UINT64_MAX || next > start)) + next = start; + } + + if (next == UINT64_MAX) { + /* No later partition? In that case check the end of the usable area */ + next = fdisk_get_last_lba(c); + assert(next < UINT64_MAX); + next++; /* The last LBA is one sector before the end */ + + assert(next < UINT64_MAX / 512); + next *= 512; + + if (offset > next) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end."); + } + + assert(next >= offset); + offset = round_up_size(offset, 4096); + next = round_down_size(next, 4096); + + if (next >= offset) /* Check again, rounding might have fucked things up */ + *ret = next - offset; + else + *ret = 0; + + return 0; +} + +static int fdisk_ask_cb(struct fdisk_context *c, struct fdisk_ask *ask, void *data) { + _cleanup_free_ char *ids = NULL; + int r; + + if (fdisk_ask_get_type(ask) != FDISK_ASKTYPE_STRING) + return -EINVAL; + + ids = new(char, ID128_UUID_STRING_MAX); + if (!ids) + return -ENOMEM; + + r = fdisk_ask_string_set_result(ask, id128_to_uuid_string(*(sd_id128_t*) data, ids)); + if (r < 0) + return r; + + TAKE_PTR(ids); + return 0; +} + +static int fdisk_set_disklabel_id_by_uuid(struct fdisk_context *c, sd_id128_t id) { + int r; + + r = fdisk_set_ask(c, fdisk_ask_cb, &id); + if (r < 0) + return r; + + r = fdisk_set_disklabel_id(c); + if (r < 0) + return r; + + return fdisk_set_ask(c, NULL, NULL); +} + +#define DISK_UUID_TOKEN "disk-uuid" + +static int disk_acquire_uuid(Context *context, sd_id128_t *ret) { + union { + unsigned char md[SHA256_DIGEST_LENGTH]; + sd_id128_t id; + } result; + + assert(context); + assert(ret); + + /* Calculate the HMAC-SHA256 of the string "disk-uuid", keyed off the machine ID. We use the machine + * ID as key (and not as cleartext!) since it's the machine ID we don't want to leak. */ + + if (!HMAC(EVP_sha256(), + &context->seed, sizeof(context->seed), + (const unsigned char*) DISK_UUID_TOKEN, strlen(DISK_UUID_TOKEN), + result.md, NULL)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "HMAC-SHA256 calculation failed."); + + /* Take the first half, mark it as v4 UUID */ + assert_cc(sizeof(result.md) == sizeof(result.id) * 2); + *ret = id128_make_v4_uuid(result.id); + return 0; +} + +static int context_load_partition_table(Context *context, const char *node) { + _cleanup_(fdisk_unref_contextp) struct fdisk_context *c = NULL; + _cleanup_(fdisk_unref_tablep) struct fdisk_table *t = NULL; + uint64_t left_boundary = UINT64_MAX, first_lba, last_lba, nsectors; + _cleanup_free_ char *disk_uuid_string = NULL; + bool from_scratch = false; + sd_id128_t disk_uuid; + size_t n_partitions; + int r; + + assert(context); + assert(node); + + c = fdisk_new_context(); + if (!c) + return log_oom(); + + r = fdisk_assign_device(c, node, arg_dry_run); + if (r < 0) + return log_error_errno(r, "Failed to open device: %m"); + + /* Tell udev not to interfere while we are processing the device */ + if (flock(fdisk_get_devfd(c), arg_dry_run ? LOCK_SH : LOCK_EX) < 0) + return log_error_errno(errno, "Failed to lock block device: %m"); + + switch (arg_empty) { + + case EMPTY_REFUSE: + /* Refuse empty disks, insist on an existing GPT partition table */ + if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT)) + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has no GPT disk label, not repartitioning.", node); + + break; + + case EMPTY_REQUIRE: + /* Require an empty disk, refuse any existing partition table */ + r = fdisk_has_label(c); + if (r < 0) + return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node); + if (r > 0) + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s already has a disk label, refusing.", node); + + from_scratch = true; + break; + + case EMPTY_ALLOW: + /* Allow both an empty disk and an existing partition table, but only GPT */ + r = fdisk_has_label(c); + if (r < 0) + return log_error_errno(r, "Failed to determine whether disk %s has a disk label: %m", node); + if (r > 0) { + if (!fdisk_is_labeltype(c, FDISK_DISKLABEL_GPT)) + return log_notice_errno(SYNTHETIC_ERRNO(EHWPOISON), "Disk %s has non-GPT disk label, not repartitioning.", node); + } else + from_scratch = true; + + break; + + case EMPTY_FORCE: + /* Always reinitiaize the disk, don't consider what there was on the disk before */ + from_scratch = true; + break; + } + + if (from_scratch) { + r = fdisk_enable_wipe(c, true); + if (r < 0) + return log_error_errno(r, "Failed to enable wiping of disk signature: %m"); + + r = fdisk_create_disklabel(c, "gpt"); + if (r < 0) + return log_error_errno(r, "Failed to create GPT disk label: %m"); + + r = disk_acquire_uuid(context, &disk_uuid); + if (r < 0) + return log_error_errno(r, "Failed to acquire disk GPT uuid: %m"); + + r = fdisk_set_disklabel_id_by_uuid(c, disk_uuid); + if (r < 0) + return log_error_errno(r, "Failed to set GPT disk label: %m"); + + goto add_initial_free_area; + } + + r = fdisk_get_disklabel_id(c, &disk_uuid_string); + if (r < 0) + return log_error_errno(r, "Failed to get current GPT disk label UUID: %m"); + + r = sd_id128_from_string(disk_uuid_string, &disk_uuid); + if (r < 0) + return log_error_errno(r, "Failed to parse current GPT disk label UUID: %m"); + + if (sd_id128_is_null(disk_uuid)) { + r = disk_acquire_uuid(context, &disk_uuid); + if (r < 0) + return log_error_errno(r, "Failed to acquire disk GPT uuid: %m"); + + r = fdisk_set_disklabel_id(c); + if (r < 0) + return log_error_errno(r, "Failed to set GPT disk label: %m"); + } + + r = fdisk_get_partitions(c, &t); + if (r < 0) + return log_error_errno(r, "Failed to acquire partition table: %m"); + + n_partitions = fdisk_table_get_nents(t); + for (size_t i = 0; i < n_partitions; i++) { + _cleanup_free_ char *label_copy = NULL; + Partition *pp, *last = NULL; + struct fdisk_partition *p; + struct fdisk_parttype *pt; + const char *pts, *ids, *label; + uint64_t sz, start; + bool found = false; + sd_id128_t ptid, id; + size_t partno; + + p = fdisk_table_get_partition(t, i); + if (!p) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to read partition metadata: %m"); + + if (fdisk_partition_is_used(p) <= 0) + continue; + + if (fdisk_partition_has_start(p) <= 0 || + fdisk_partition_has_size(p) <= 0 || + fdisk_partition_has_partno(p) <= 0) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a position, size or number."); + + pt = fdisk_partition_get_type(p); + if (!pt) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition: %m"); + + pts = fdisk_parttype_get_string(pt); + if (!pts) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Failed to acquire type of partition as string: %m"); + + r = sd_id128_from_string(pts, &ptid); + if (r < 0) + return log_error_errno(r, "Failed to parse partition type UUID %s: %m", pts); + + ids = fdisk_partition_get_uuid(p); + if (!ids) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), "Found a partition without a UUID."); + + r = sd_id128_from_string(ids, &id); + if (r < 0) + return log_error_errno(r, "Failed to parse partition UUID %s: %m", ids); + + label = fdisk_partition_get_name(p); + if (!isempty(label)) { + label_copy = strdup(label); + if (!label_copy) + return log_oom(); + } + + sz = fdisk_partition_get_size(p); + assert_se(sz <= UINT64_MAX/512); + sz *= 512; + + start = fdisk_partition_get_start(p); + assert_se(start <= UINT64_MAX/512); + start *= 512; + + partno = fdisk_partition_get_partno(p); + + if (left_boundary == UINT64_MAX || left_boundary > start) + left_boundary = start; + + /* Assign this existing partition to the first partition of the right type that doesn't have + * an existing one assigned yet. */ + LIST_FOREACH(partitions, pp, context->partitions) { + last = pp; + + if (!sd_id128_equal(pp->type_uuid, ptid)) + continue; + + if (!pp->current_partition) { + pp->current_uuid = id; + pp->current_size = sz; + pp->offset = start; + pp->partno = partno; + pp->current_label = TAKE_PTR(label_copy); + + pp->current_partition = p; + fdisk_ref_partition(p); + + r = determine_current_padding(c, t, p, &pp->current_padding); + if (r < 0) + return r; + + if (pp->current_padding > 0) { + r = context_add_free_area(context, pp->current_padding, pp); + if (r < 0) + return r; + } + + found = true; + break; + } + } + + /* If we have no matching definition, create a new one. */ + if (!found) { + _cleanup_(partition_freep) Partition *np = NULL; + + np = partition_new(); + if (!np) + return log_oom(); + + np->current_uuid = id; + np->type_uuid = ptid; + np->current_size = sz; + np->offset = start; + np->partno = partno; + np->current_label = TAKE_PTR(label_copy); + + np->current_partition = p; + fdisk_ref_partition(p); + + r = determine_current_padding(c, t, p, &np->current_padding); + if (r < 0) + return r; + + if (np->current_padding > 0) { + r = context_add_free_area(context, np->current_padding, np); + if (r < 0) + return r; + } + + LIST_INSERT_AFTER(partitions, context->partitions, last, TAKE_PTR(np)); + context->n_partitions++; + } + } + +add_initial_free_area: + nsectors = fdisk_get_nsectors(c); + assert(nsectors <= UINT64_MAX/512); + nsectors *= 512; + + first_lba = fdisk_get_first_lba(c); + assert(first_lba <= UINT64_MAX/512); + first_lba *= 512; + + last_lba = fdisk_get_last_lba(c); + assert(last_lba < UINT64_MAX); + last_lba++; + assert(last_lba <= UINT64_MAX/512); + last_lba *= 512; + + assert(last_lba >= first_lba); + + if (left_boundary == UINT64_MAX) { + /* No partitions at all? Then the whole disk is up for grabs. */ + + first_lba = round_up_size(first_lba, 4096); + last_lba = round_down_size(last_lba, 4096); + + if (last_lba > first_lba) { + r = context_add_free_area(context, last_lba - first_lba, NULL); + if (r < 0) + return r; + } + } else { + /* Add space left of first partition */ + assert(left_boundary >= first_lba); + + first_lba = round_up_size(first_lba, 4096); + left_boundary = round_down_size(left_boundary, 4096); + last_lba = round_down_size(last_lba, 4096); + + if (left_boundary > first_lba) { + r = context_add_free_area(context, left_boundary - first_lba, NULL); + if (r < 0) + return r; + } + } + + context->start = first_lba; + context->end = last_lba; + context->total = nsectors; + context->fdisk_context = TAKE_PTR(c); + + return from_scratch; +} + +static void context_unload_partition_table(Context *context) { + Partition *p, *next; + + assert(context); + + LIST_FOREACH_SAFE(partitions, p, next, context->partitions) { + + /* Entirely remove partitions that have no configuration */ + if (PARTITION_IS_FOREIGN(p)) { + partition_unlink_and_free(context, p); + continue; + } + + /* Otherwise drop all data we read off the block device and everything we might have + * calculated based on it */ + + p->dropped = false; + p->current_size = UINT64_MAX; + p->new_size = UINT64_MAX; + p->current_padding = UINT64_MAX; + p->new_padding = UINT64_MAX; + p->partno = UINT64_MAX; + p->offset = UINT64_MAX; + + if (p->current_partition) { + fdisk_unref_partition(p->current_partition); + p->current_partition = NULL; + } + + if (p->new_partition) { + fdisk_unref_partition(p->new_partition); + p->new_partition = NULL; + } + + p->padding_area = NULL; + p->allocated_to_area = NULL; + + p->current_uuid = p->new_uuid = SD_ID128_NULL; + } + + context->start = UINT64_MAX; + context->end = UINT64_MAX; + context->total = UINT64_MAX; + + if (context->fdisk_context) { + fdisk_unref_context(context->fdisk_context); + context->fdisk_context = NULL; + } + + context_free_free_areas(context); +} + +static int format_size_change(uint64_t from, uint64_t to, char **ret) { + char format_buffer1[FORMAT_BYTES_MAX], format_buffer2[FORMAT_BYTES_MAX], *buf; + + if (from != UINT64_MAX) + format_bytes(format_buffer1, sizeof(format_buffer1), from); + if (to != UINT64_MAX) + format_bytes(format_buffer2, sizeof(format_buffer2), to); + + if (from != UINT64_MAX) { + if (from == to || to == UINT64_MAX) + buf = strdup(format_buffer1); + else + buf = strjoin(format_buffer1, " ", special_glyph(SPECIAL_GLYPH_ARROW), " ", format_buffer2); + } else if (to != UINT64_MAX) + buf = strjoin(special_glyph(SPECIAL_GLYPH_ARROW), " ", format_buffer2); + else { + *ret = NULL; + return 0; + } + + if (!buf) + return log_oom(); + + *ret = TAKE_PTR(buf); + return 1; +} + +static const char *partition_label(const Partition *p) { + assert(p); + + if (p->new_label) + return p->new_label; + + if (p->current_label) + return p->current_label; + + return gpt_partition_type_uuid_to_string(p->type_uuid); +} + +static int context_dump_partitions(Context *context, const char *node) { + _cleanup_(table_unrefp) Table *t = NULL; + uint64_t sum_padding = 0, sum_size = 0; + Partition *p; + int r; + + t = table_new("type", "label", "uuid", "file", "node", "offset", "raw size", "size", "raw padding", "padding"); + if (!t) + return log_oom(); + + if (!DEBUG_LOGGING) + (void) table_set_display(t, 0, 1, 2, 3, 4, 7, 9, (size_t) -1); + + (void) table_set_align_percent(t, table_get_cell(t, 0, 4), 100); + (void) table_set_align_percent(t, table_get_cell(t, 0, 5), 100); + + LIST_FOREACH(partitions, p, context->partitions) { + _cleanup_free_ char *size_change = NULL, *padding_change = NULL, *partname = NULL; + char uuid_buffer[ID128_UUID_STRING_MAX]; + const char *label; + + if (p->dropped) + continue; + + label = partition_label(p); + partname = p->partno != UINT64_MAX ? fdisk_partname(node, p->partno+1) : NULL; + + r = format_size_change(p->current_size, p->new_size, &size_change); + if (r < 0) + return r; + + r = format_size_change(p->current_padding, p->new_padding, &padding_change); + if (r < 0) + return r; + + if (p->new_size != UINT64_MAX) + sum_size += p->new_size; + if (p->new_padding != UINT64_MAX) + sum_padding += p->new_padding; + + r = table_add_many( + t, + TABLE_STRING, gpt_partition_type_uuid_to_string_harder(p->type_uuid, uuid_buffer), + TABLE_STRING, label ?: "-", TABLE_SET_COLOR, label ? NULL : ansi_grey(), + TABLE_UUID, sd_id128_is_null(p->new_uuid) ? p->current_uuid : p->new_uuid, + TABLE_STRING, p->definition_path ? basename(p->definition_path) : "-", TABLE_SET_COLOR, p->definition_path ? NULL : ansi_grey(), + TABLE_STRING, partname ?: "no", TABLE_SET_COLOR, partname ? NULL : ansi_highlight(), + TABLE_UINT64, p->offset, + TABLE_UINT64, p->new_size, + TABLE_STRING, size_change, TABLE_SET_COLOR, !p->partitions_next && sum_size > 0 ? ansi_underline() : NULL, + TABLE_UINT64, p->new_padding, + TABLE_STRING, padding_change, TABLE_SET_COLOR, !p->partitions_next && sum_padding > 0 ? ansi_underline() : NULL); + if (r < 0) + return log_error_errno(r, "Failed to add row to table: %m"); + } + + if (sum_padding > 0 || sum_size > 0) { + char s[FORMAT_BYTES_MAX]; + const char *a, *b; + + a = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", format_bytes(s, sizeof(s), sum_size)); + b = strjoina(special_glyph(SPECIAL_GLYPH_SIGMA), " = ", format_bytes(s, sizeof(s), sum_padding)); + + r = table_add_many( + t, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_EMPTY, + TABLE_STRING, a, + TABLE_EMPTY, + TABLE_STRING, b); + if (r < 0) + return log_error_errno(r, "Failed to add row to table: %m"); + } + + r = table_print(t, stdout); + if (r < 0) + return log_error_errno(r, "Failed to dump table: %m"); + + return 0; +} + +static void context_bar_char_process_partition( + Context *context, + Partition *bar[], + size_t n, + Partition *p, + size_t *ret_start) { + + uint64_t from, to, total; + size_t x, y; + + assert(context); + assert(bar); + assert(n > 0); + assert(p); + + if (p->dropped) + return; + + assert(p->offset != UINT64_MAX); + assert(p->new_size != UINT64_MAX); + + from = p->offset; + to = from + p->new_size; + + assert(context->end >= context->start); + total = context->end - context->start; + + assert(from >= context->start); + assert(from <= context->end); + x = (from - context->start) * n / total; + + assert(to >= context->start); + assert(to <= context->end); + y = (to - context->start) * n / total; + + assert(x <= y); + assert(y <= n); + + for (size_t i = x; i < y; i++) + bar[i] = p; + + *ret_start = x; +} + +static int partition_hint(const Partition *p, const char *node, char **ret) { + _cleanup_free_ char *buf = NULL; + char ids[ID128_UUID_STRING_MAX]; + const char *label; + sd_id128_t id; + + /* Tries really hard to find a suitable description for this partition */ + + if (p->definition_path) { + buf = strdup(basename(p->definition_path)); + goto done; + } + + label = partition_label(p); + if (!isempty(label)) { + buf = strdup(label); + goto done; + } + + if (p->partno != UINT64_MAX) { + buf = fdisk_partname(node, p->partno+1); + goto done; + } + + if (!sd_id128_is_null(p->new_uuid)) + id = p->new_uuid; + else if (!sd_id128_is_null(p->current_uuid)) + id = p->current_uuid; + else + id = p->type_uuid; + + buf = strdup(id128_to_uuid_string(id, ids)); + +done: + if (!buf) + return -ENOMEM; + + *ret = TAKE_PTR(buf); + return 0; +} + +static int context_dump_partition_bar(Context *context, const char *node) { + _cleanup_free_ Partition **bar = NULL; + _cleanup_free_ size_t *start_array = NULL; + Partition *p, *last = NULL; + bool z = false; + size_t c, j = 0; + + assert((c = columns()) >= 2); + c -= 2; /* We do not use the leftmost and rightmost character cell */ + + bar = new0(Partition*, c); + if (!bar) + return log_oom(); + + start_array = new(size_t, context->n_partitions); + if (!start_array) + return log_oom(); + + LIST_FOREACH(partitions, p, context->partitions) + context_bar_char_process_partition(context, bar, c, p, start_array + j++); + + putc(' ', stdout); + + for (size_t i = 0; i < c; i++) { + if (bar[i]) { + if (last != bar[i]) + z = !z; + + fputs(z ? ansi_green() : ansi_yellow(), stdout); + fputs(special_glyph(SPECIAL_GLYPH_DARK_SHADE), stdout); + } else { + fputs(ansi_normal(), stdout); + fputs(special_glyph(SPECIAL_GLYPH_LIGHT_SHADE), stdout); + } + + last = bar[i]; + } + + fputs(ansi_normal(), stdout); + putc('\n', stdout); + + for (size_t i = 0; i < context->n_partitions; i++) { + _cleanup_free_ char **line = NULL; + + line = new0(char*, c); + if (!line) + return log_oom(); + + j = 0; + LIST_FOREACH(partitions, p, context->partitions) { + _cleanup_free_ char *d = NULL; + j++; + + if (i < context->n_partitions - j) { + + if (line[start_array[j-1]]) { + const char *e; + + /* Upgrade final corner to the right with a branch to the right */ + e = startswith(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_RIGHT)); + if (e) { + d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), e); + if (!d) + return log_oom(); + } + } + + if (!d) { + d = strdup(special_glyph(SPECIAL_GLYPH_TREE_VERTICAL)); + if (!d) + return log_oom(); + } + + } else if (i == context->n_partitions - j) { + _cleanup_free_ char *hint = NULL; + + (void) partition_hint(p, node, &hint); + + if (streq_ptr(line[start_array[j-1]], special_glyph(SPECIAL_GLYPH_TREE_VERTICAL))) + d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_BRANCH), " ", strna(hint)); + else + d = strjoin(special_glyph(SPECIAL_GLYPH_TREE_RIGHT), " ", strna(hint)); + + if (!d) + return log_oom(); + } + + if (d) + free_and_replace(line[start_array[j-1]], d); + } + + putc(' ', stdout); + + j = 0; + while (j < c) { + if (line[j]) { + fputs(line[j], stdout); + j += utf8_console_width(line[j]); + } else { + putc(' ', stdout); + j++; + } + } + + putc('\n', stdout); + + for (j = 0; j < c; j++) + free(line[j]); + } + + return 0; +} + +static bool context_changed(const Context *context) { + Partition *p; + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->dropped) + continue; + + if (p->allocated_to_area) + return true; + + if (p->new_size != p->current_size) + return true; + } + + return false; +} + +static int context_wipe_partition(Context *context, Partition *p) { + _cleanup_(blkid_free_probep) blkid_probe probe = NULL; + int r; + + assert(context); + assert(p); + assert(!PARTITION_EXISTS(p)); /* Safety check: never wipe existing partitions */ + + probe = blkid_new_probe(); + if (!probe) + return log_oom(); + + assert(p->offset != UINT64_MAX); + assert(p->new_size != UINT64_MAX); + + errno = 0; + r = blkid_probe_set_device(probe, fdisk_get_devfd(context->fdisk_context), p->offset, p->new_size); + if (r < 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to allocate device probe for partition %" PRIu64 ".", p->partno); + + errno = 0; + if (blkid_probe_enable_superblocks(probe, true) < 0 || + blkid_probe_set_superblocks_flags(probe, BLKID_SUBLKS_MAGIC|BLKID_SUBLKS_BADCSUM) < 0 || + blkid_probe_enable_partitions(probe, true) < 0 || + blkid_probe_set_partitions_flags(probe, BLKID_PARTS_MAGIC) < 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to enable superblock and partition probing for partition %" PRIu64 ".", p->partno); + + for (;;) { + errno = 0; + r = blkid_do_probe(probe); + if (r < 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to probe for file systems."); + if (r > 0) + break; + + errno = 0; + if (blkid_do_wipe(probe, false) < 0) + return log_error_errno(errno ?: SYNTHETIC_ERRNO(EIO), "Failed to wipe file system signature."); + } + + log_info("Successfully wiped file system signatures from partition %" PRIu64 ".", p->partno); + return 0; +} + +static int context_discard_range(Context *context, uint64_t offset, uint64_t size) { + struct stat st; + int fd; + + assert(context); + assert(offset != UINT64_MAX); + assert(size != UINT64_MAX); + + if (size <= 0) + return 0; + + fd = fdisk_get_devfd(context->fdisk_context); + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISREG(st.st_mode)) { + if (fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, size) < 0) { + if (ERRNO_IS_NOT_SUPPORTED(errno)) + return -EOPNOTSUPP; + + return -errno; + } + + return 1; + } + + if (S_ISBLK(st.st_mode)) { + uint64_t range[2], end; + + range[0] = round_up_size(offset, 512); + + end = offset + size; + if (end <= range[0]) + return 0; + + range[1] = round_down_size(end - range[0], 512); + if (range[1] <= 0) + return 0; + + if (ioctl(fd, BLKDISCARD, range) < 0) { + if (ERRNO_IS_NOT_SUPPORTED(errno)) + return -EOPNOTSUPP; + + return -errno; + } + + return 1; + } + + return -EOPNOTSUPP; +} + +static int context_discard_partition(Context *context, Partition *p) { + int r; + + assert(context); + assert(p); + + assert(p->offset != UINT64_MAX); + assert(p->new_size != UINT64_MAX); + assert(!PARTITION_EXISTS(p)); /* Safety check: never discard existing partitions */ + + if (!arg_discard) + return 0; + + r = context_discard_range(context, p->offset, p->new_size); + if (r == -EOPNOTSUPP) { + log_info("Storage does not support discarding, not discarding data in new partition %" PRIu64 ".", p->partno); + return 0; + } + if (r == 0) { + log_info("Partition %" PRIu64 " too short for discard, skipping.", p->partno); + return 0; + } + if (r < 0) + return log_error_errno(r, "Failed to discard data for new partition %" PRIu64 ".", p->partno); + + log_info("Successfully discarded data from partition %" PRIu64 ".", p->partno); + return 1; +} + +static int context_discard_gap_after(Context *context, Partition *p) { + uint64_t gap, next = UINT64_MAX; + Partition *q; + int r; + + assert(context); + assert(!p || (p->offset != UINT64_MAX && p->new_size != UINT64_MAX)); + + if (p) + gap = p->offset + p->new_size; + else + gap = context->start; + + LIST_FOREACH(partitions, q, context->partitions) { + if (q->dropped) + continue; + + assert(q->offset != UINT64_MAX); + assert(q->new_size != UINT64_MAX); + + if (q->offset < gap) + continue; + + if (next == UINT64_MAX || q->offset < next) + next = q->offset; + } + + if (next == UINT64_MAX) { + next = context->end; + if (gap > next) + return log_error_errno(SYNTHETIC_ERRNO(EIO), "Partition end beyond disk end."); + } + + assert(next >= gap); + r = context_discard_range(context, gap, next - gap); + if (r == -EOPNOTSUPP) { + if (p) + log_info("Storage does not support discarding, not discarding gap after partition %" PRIu64 ".", p->partno); + else + log_info("Storage does not support discarding, not discarding gap at beginning of disk."); + return 0; + } + if (r == 0) /* Too short */ + return 0; + if (r < 0) { + if (p) + return log_error_errno(r, "Failed to discard gap after partition %" PRIu64 ".", p->partno); + else + return log_error_errno(r, "Failed to discard gap at beginning of disk."); + } + + if (p) + log_info("Successfully discarded gap after partition %" PRIu64 ".", p->partno); + else + log_info("Successfully discarded gap at beginning of disk."); + + return 0; +} + +static int context_wipe_and_discard(Context *context, bool from_scratch) { + Partition *p; + int r; + + assert(context); + + /* Wipe and discard the contents of all partitions we are about to create. We skip the discarding if + * we were supposed to start from scratch anyway, as in that case we just discard the whole block + * device in one go early on. */ + + LIST_FOREACH(partitions, p, context->partitions) { + + if (!p->allocated_to_area) + continue; + + if (!from_scratch) { + r = context_discard_partition(context, p); + if (r < 0) + return r; + } + + r = context_wipe_partition(context, p); + if (r < 0) + return r; + + if (!from_scratch) { + r = context_discard_gap_after(context, p); + if (r < 0) + return r; + } + } + + if (!from_scratch) { + r = context_discard_gap_after(context, NULL); + if (r < 0) + return r; + } + + return 0; +} + +static int partition_acquire_uuid(Context *context, Partition *p, sd_id128_t *ret) { + struct { + sd_id128_t type_uuid; + uint64_t counter; + } _packed_ plaintext = {}; + union { + unsigned char md[SHA256_DIGEST_LENGTH]; + sd_id128_t id; + } result; + + uint64_t k = 0; + Partition *q; + int r; + + assert(context); + assert(p); + assert(ret); + + /* Calculate a good UUID for the indicated partition. We want a certain degree of reproducibility, + * hence we won't generate the UUIDs randomly. Instead we use a cryptographic hash (precisely: + * HMAC-SHA256) to derive them from a single seed. The seed is generally the machine ID of the + * installation we are processing, but if random behaviour is desired can be random, too. We use the + * seed value as key for the HMAC (since the machine ID is something we generally don't want to leak) + * and the partition type as plaintext. The partition type is suffixed with a counter (only for the + * second and later partition of the same type) if we have more than one partition of the same + * time. Or in other words: + * + * With: + * SEED := /etc/machine-id + * + * If first partition instance of type TYPE_UUID: + * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID) + * + * For all later partition instances of type TYPE_UUID with INSTANCE being the LE64 encoded instance number: + * PARTITION_UUID := HMAC-SHA256(SEED, TYPE_UUID || INSTANCE) + */ + + LIST_FOREACH(partitions, q, context->partitions) { + if (p == q) + break; + + if (!sd_id128_equal(p->type_uuid, q->type_uuid)) + continue; + + k++; + } + + plaintext.type_uuid = p->type_uuid; + plaintext.counter = htole64(k); + + if (!HMAC(EVP_sha256(), + &context->seed, sizeof(context->seed), + (const unsigned char*) &plaintext, k == 0 ? sizeof(sd_id128_t) : sizeof(plaintext), + result.md, NULL)) + return log_error_errno(SYNTHETIC_ERRNO(ENOTRECOVERABLE), "SHA256 calculation failed."); + + /* Take the first half, mark it as v4 UUID */ + assert_cc(sizeof(result.md) == sizeof(result.id) * 2); + result.id = id128_make_v4_uuid(result.id); + + /* Ensure this partition UUID is actually unique, and there's no remaining partition from an earlier run? */ + LIST_FOREACH(partitions, q, context->partitions) { + if (p == q) + continue; + + if (sd_id128_equal(q->current_uuid, result.id) || + sd_id128_equal(q->new_uuid, result.id)) { + log_warning("Partition UUID calculated from seed for partition %" PRIu64 " exists already, reverting to randomized UUID.", p->partno); + + r = sd_id128_randomize(&result.id); + if (r < 0) + return log_error_errno(r, "Failed to generate randomized UUID: %m"); + + break; + } + } + + *ret = result.id; + return 0; +} + +static int partition_acquire_label(Context *context, Partition *p, char **ret) { + _cleanup_free_ char *label = NULL; + const char *prefix; + unsigned k = 1; + + assert(context); + assert(p); + assert(ret); + + prefix = gpt_partition_type_uuid_to_string(p->type_uuid); + if (!prefix) + prefix = "linux"; + + for (;;) { + const char *ll = label ?: prefix; + bool retry = false; + Partition *q; + + LIST_FOREACH(partitions, q, context->partitions) { + if (p == q) + break; + + if (streq_ptr(ll, q->current_label) || + streq_ptr(ll, q->new_label)) { + retry = true; + break; + } + } + + if (!retry) + break; + + label = mfree(label); + + + if (asprintf(&label, "%s-%u", prefix, ++k) < 0) + return log_oom(); + } + + if (!label) { + label = strdup(prefix); + if (!label) + return log_oom(); + } + + *ret = TAKE_PTR(label); + return 0; +} + +static int context_acquire_partition_uuids_and_labels(Context *context) { + Partition *p; + int r; + + assert(context); + + LIST_FOREACH(partitions, p, context->partitions) { + assert(sd_id128_is_null(p->new_uuid)); + assert(!p->new_label); + + /* Never touch foreign partitions */ + if (PARTITION_IS_FOREIGN(p)) { + p->new_uuid = p->current_uuid; + + if (p->current_label) { + p->new_label = strdup(p->current_label); + if (!p->new_label) + return log_oom(); + } + + continue; + } + + if (!sd_id128_is_null(p->current_uuid)) + p->new_uuid = p->current_uuid; /* Never change initialized UUIDs */ + else { + r = partition_acquire_uuid(context, p, &p->new_uuid); + if (r < 0) + return r; + } + + if (!isempty(p->current_label)) { + p->new_label = strdup(p->current_label); /* never change initialized labels */ + if (!p->new_label) + return log_oom(); + } else { + r = partition_acquire_label(context, p, &p->new_label); + if (r < 0) + return r; + } + } + + return 0; +} + +static int device_kernel_partitions_supported(int fd) { + struct loop_info64 info; + struct stat st; + + assert(fd >= 0); + + if (fstat(fd, &st) < 0) + return log_error_errno(fd, "Failed to fstat() image file: %m"); + if (!S_ISBLK(st.st_mode)) + return false; + + if (ioctl(fd, LOOP_GET_STATUS64, &info) < 0) { + + if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EINVAL) + return true; /* not a loopback device, let's assume partition are supported */ + + return log_error_errno(fd, "Failed to issue LOOP_GET_STATUS64 on block device: %m"); + } + +#if HAVE_VALGRIND_MEMCHECK_H + /* Valgrind currently doesn't know LOOP_GET_STATUS64. Remove this once it does */ + VALGRIND_MAKE_MEM_DEFINED(&info, sizeof(info)); +#endif + + return FLAGS_SET(info.lo_flags, LO_FLAGS_PARTSCAN); +} + +static int context_write_partition_table( + Context *context, + const char *node, + bool from_scratch) { + + _cleanup_(fdisk_unref_tablep) struct fdisk_table *original_table = NULL; + int capable, r; + Partition *p; + + assert(context); + + if (arg_pretty > 0 || + (arg_pretty < 0 && isatty(STDOUT_FILENO) > 0)) { + + if (context->n_partitions == 0) + puts("Empty partition table."); + else + (void) context_dump_partitions(context, node); + + putc('\n', stdout); + + (void) context_dump_partition_bar(context, node); + putc('\n', stdout); + fflush(stdout); + } + + if (!from_scratch && !context_changed(context)) { + log_info("No changes."); + return 0; + } + + if (arg_dry_run) { + log_notice("Refusing to repartition, please re-run with --dry-run=no."); + return 0; + } + + log_info("Applying changes."); + + if (from_scratch) { + r = context_discard_range(context, 0, context->total); + if (r == -EOPNOTSUPP) + log_info("Storage does not support discarding, not discarding entire block device data."); + else if (r < 0) + return log_error_errno(r, "Failed to discard entire block device: %m"); + else if (r > 0) + log_info("Discarded entire block device."); + } + + r = fdisk_get_partitions(context->fdisk_context, &original_table); + if (r < 0) + return log_error_errno(r, "Failed to acquire partition table: %m"); + + /* Wipe fs signatures and discard sectors where the new partitions are going to be placed and in the + * gaps between partitions, just to be sure. */ + r = context_wipe_and_discard(context, from_scratch); + if (r < 0) + return r; + + LIST_FOREACH(partitions, p, context->partitions) { + if (p->dropped) + continue; + + assert(p->new_size != UINT64_MAX); + assert(p->offset != UINT64_MAX); + assert(p->partno != UINT64_MAX); + + if (PARTITION_EXISTS(p)) { + bool changed = false; + + assert(p->current_partition); + + if (p->new_size != p->current_size) { + assert(p->new_size >= p->current_size); + assert(p->new_size % 512 == 0); + + r = fdisk_partition_size_explicit(p->current_partition, true); + if (r < 0) + return log_error_errno(r, "Failed to enable explicit sizing: %m"); + + r = fdisk_partition_set_size(p->current_partition, p->new_size / 512); + if (r < 0) + return log_error_errno(r, "Failed to grow partition: %m"); + + log_info("Growing existing partition %" PRIu64 ".", p->partno); + changed = true; + } + + if (!sd_id128_equal(p->new_uuid, p->current_uuid)) { + char buf[ID128_UUID_STRING_MAX]; + + assert(!sd_id128_is_null(p->new_uuid)); + + r = fdisk_partition_set_uuid(p->current_partition, id128_to_uuid_string(p->new_uuid, buf)); + if (r < 0) + return log_error_errno(r, "Failed to set partition UUID: %m"); + + log_info("Initializing UUID of existing partition %" PRIu64 ".", p->partno); + changed = true; + } + + if (!streq_ptr(p->new_label, p->current_label)) { + assert(!isempty(p->new_label)); + + r = fdisk_partition_set_name(p->current_partition, p->new_label); + if (r < 0) + return log_error_errno(r, "Failed to set partition label: %m"); + + log_info("Setting partition label of existing partition %" PRIu64 ".", p->partno); + changed = true; + } + + if (changed) { + assert(!PARTITION_IS_FOREIGN(p)); /* never touch foreign partitions */ + + r = fdisk_set_partition(context->fdisk_context, p->partno, p->current_partition); + if (r < 0) + return log_error_errno(r, "Failed to update partition: %m"); + } + } else { + _cleanup_(fdisk_unref_partitionp) struct fdisk_partition *q = NULL; + _cleanup_(fdisk_unref_parttypep) struct fdisk_parttype *t = NULL; + char ids[ID128_UUID_STRING_MAX]; + + assert(!p->new_partition); + assert(p->offset % 512 == 0); + assert(p->new_size % 512 == 0); + assert(!sd_id128_is_null(p->new_uuid)); + assert(!isempty(p->new_label)); + + t = fdisk_new_parttype(); + if (!t) + return log_oom(); + + r = fdisk_parttype_set_typestr(t, id128_to_uuid_string(p->type_uuid, ids)); + if (r < 0) + return log_error_errno(r, "Failed to initialize partition type: %m"); + + q = fdisk_new_partition(); + if (!q) + return log_oom(); + + r = fdisk_partition_set_type(q, t); + if (r < 0) + return log_error_errno(r, "Failed to set partition type: %m"); + + r = fdisk_partition_size_explicit(q, true); + if (r < 0) + return log_error_errno(r, "Failed to enable explicit sizing: %m"); + + r = fdisk_partition_set_start(q, p->offset / 512); + if (r < 0) + return log_error_errno(r, "Failed to position partition: %m"); + + r = fdisk_partition_set_size(q, p->new_size / 512); + if (r < 0) + return log_error_errno(r, "Failed to grow partition: %m"); + + r = fdisk_partition_set_partno(q, p->partno); + if (r < 0) + return log_error_errno(r, "Failed to set partition number: %m"); + + r = fdisk_partition_set_uuid(q, id128_to_uuid_string(p->new_uuid, ids)); + if (r < 0) + return log_error_errno(r, "Failed to set partition UUID: %m"); + + r = fdisk_partition_set_name(q, p->new_label); + if (r < 0) + return log_error_errno(r, "Failed to set partition label: %m"); + + log_info("Creating new partition %" PRIu64 ".", p->partno); + + r = fdisk_add_partition(context->fdisk_context, q, NULL); + if (r < 0) + return log_error_errno(r, "Failed to add partition: %m"); + + assert(!p->new_partition); + p->new_partition = TAKE_PTR(q); + } + } + + log_info("Writing new partition table."); + + r = fdisk_write_disklabel(context->fdisk_context); + if (r < 0) + return log_error_errno(r, "Failed to write partition table: %m"); + + capable = device_kernel_partitions_supported(fdisk_get_devfd(context->fdisk_context)); + if (capable < 0) + return capable; + if (capable > 0) { + log_info("Telling kernel to reread partition table."); + + if (from_scratch) + r = fdisk_reread_partition_table(context->fdisk_context); + else + r = fdisk_reread_changes(context->fdisk_context, original_table); + if (r < 0) + return log_error_errno(r, "Failed to reread partition table: %m"); + } else + log_notice("Not telling kernel to reread partition table, because selected image does not support kernel partition block devices."); + + log_info("All done."); + + return 0; +} + +static int context_read_seed(Context *context, const char *root) { + int r; + + assert(context); + + if (!sd_id128_is_null(context->seed)) + return 0; + + if (!arg_randomize) { + _cleanup_close_ int fd = -1; + + fd = chase_symlinks_and_open("/etc/machine-id", root, CHASE_PREFIX_ROOT, O_RDONLY|O_CLOEXEC, NULL); + if (fd == -ENOENT) + log_info("No machine ID set, using randomized partition UUIDs."); + else if (fd < 0) + return log_error_errno(fd, "Failed to determine machine ID of image: %m"); + else { + r = id128_read_fd(fd, ID128_PLAIN, &context->seed); + if (r == -ENOMEDIUM) + log_info("No machine ID set, using randomized partition UUIDs."); + else if (r < 0) + return log_error_errno(r, "Failed to parse machine ID of image: %m"); + + return 0; + } + } + + r = sd_id128_randomize(&context->seed); + if (r < 0) + return log_error_errno(r, "Failed to generate randomized seed: %m"); + + return 0; +} + +static int context_factory_reset(Context *context, bool from_scratch) { + Partition *p; + size_t n = 0; + int r; + + assert(context); + + if (arg_factory_reset <= 0) + return 0; + + if (from_scratch) /* Nothing to reset if we start from scratch */ + return 0; + + if (arg_dry_run) { + log_notice("Refusing to factory reset, please re-run with --dry-run=no."); + return 0; + } + + log_info("Applying factory reset."); + + LIST_FOREACH(partitions, p, context->partitions) { + + if (!p->factory_reset || !PARTITION_EXISTS(p)) + continue; + + assert(p->partno != UINT64_MAX); + + log_info("Removing partition %" PRIu64 " for factory reset.", p->partno); + + r = fdisk_delete_partition(context->fdisk_context, p->partno); + if (r < 0) + return log_error_errno(r, "Failed to remove partition %" PRIu64 ": %m", p->partno); + + n++; + } + + if (n == 0) { + log_info("Factory reset requested, but no partitions to delete found."); + return 0; + } + + r = fdisk_write_disklabel(context->fdisk_context); + if (r < 0) + return log_error_errno(r, "Failed to write disk label: %m"); + + log_info("Successfully deleted %zu partitions.", n); + return 1; +} + +static int context_can_factory_reset(Context *context) { + Partition *p; + + assert(context); + + LIST_FOREACH(partitions, p, context->partitions) + if (p->factory_reset && PARTITION_EXISTS(p)) + return true; + + return false; +} + +static int help(void) { + _cleanup_free_ char *link = NULL; + int r; + + r = terminal_urlify_man("systemd-repart", "1", &link); + if (r < 0) + return log_oom(); + + printf("%s [OPTIONS...] [DEVICE]\n" + "\n%sGrow and add partitions to partition table.%s\n\n" + " -h --help Show this help\n" + " --version Show package version\n" + " --dry-run=BOOL Whether to run dry-run operation\n" + " --empty=MODE One of refuse, allow, require, force; controls how to\n" + " handle empty disks lacking partition table\n" + " --discard=BOOL Whether to discard backing blocks for new partitions\n" + " --pretty=BOOL Whether to show pretty summary before executing operation\n" + " --factory-reset=BOOL Whether to remove data partitions before recreating\n" + " them\n" + " --can-factory-reset Test whether factory reset is defined\n" + " --root=PATH Operate relative to root path\n" + " --definitions=DIR Find partitions in specified directory\n" + " --seed=UUID 128bit seed UUID to derive all UUIDs from\n" + "\nSee the %s for details.\n" + , program_invocation_short_name + , ansi_highlight(), ansi_normal() + , link + ); + + return 0; +} + +static int parse_argv(int argc, char *argv[]) { + + enum { + ARG_VERSION = 0x100, + ARG_DRY_RUN, + ARG_EMPTY, + ARG_DISCARD, + ARG_FACTORY_RESET, + ARG_CAN_FACTORY_RESET, + ARG_ROOT, + ARG_SEED, + ARG_PRETTY, + ARG_DEFINITIONS, + }; + + static const struct option options[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, ARG_VERSION }, + { "dry-run", required_argument, NULL, ARG_DRY_RUN }, + { "empty", required_argument, NULL, ARG_EMPTY }, + { "discard", required_argument, NULL, ARG_DISCARD }, + { "factory-reset", required_argument, NULL, ARG_FACTORY_RESET }, + { "can-factory-reset", no_argument, NULL, ARG_CAN_FACTORY_RESET }, + { "root", required_argument, NULL, ARG_ROOT }, + { "seed", required_argument, NULL, ARG_SEED }, + { "pretty", required_argument, NULL, ARG_PRETTY }, + { "definitions", required_argument, NULL, ARG_DEFINITIONS }, + {} + }; + + int c, r; + + assert(argc >= 0); + assert(argv); + + while ((c = getopt_long(argc, argv, "h", options, NULL)) >= 0) + + switch (c) { + + case 'h': + return help(); + + case ARG_VERSION: + return version(); + + case ARG_DRY_RUN: + r = parse_boolean(optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --dry-run= parameter: %s", optarg); + + arg_dry_run = r; + break; + + case ARG_EMPTY: + if (isempty(optarg) || streq(optarg, "refuse")) + arg_empty = EMPTY_REFUSE; + else if (streq(optarg, "allow")) + arg_empty = EMPTY_ALLOW; + else if (streq(optarg, "require")) + arg_empty = EMPTY_REQUIRE; + else if (streq(optarg, "force")) + arg_empty = EMPTY_FORCE; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Failed to parse --empty= parameter: %s", optarg); + break; + + case ARG_DISCARD: + r = parse_boolean(optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --discard= parameter: %s", optarg); + + arg_discard = r; + break; + + case ARG_FACTORY_RESET: + r = parse_boolean(optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --factory-reset= parameter: %s", optarg); + + arg_factory_reset = r; + break; + + case ARG_CAN_FACTORY_RESET: + arg_can_factory_reset = true; + break; + + case ARG_ROOT: + r = parse_path_argument_and_warn(optarg, false, &arg_root); + if (r < 0) + return r; + break; + + case ARG_SEED: + if (isempty(optarg)) { + arg_seed = SD_ID128_NULL; + arg_randomize = false; + } else if (streq(optarg, "random")) + arg_randomize = true; + else { + r = sd_id128_from_string(optarg, &arg_seed); + if (r < 0) + return log_error_errno(r, "Failed to parse seed: %s", optarg); + + arg_randomize = false; + } + + break; + + case ARG_PRETTY: + r = parse_boolean(optarg); + if (r < 0) + return log_error_errno(r, "Failed to parse --pretty= parameter: %s", optarg); + + arg_pretty = r; + break; + + case ARG_DEFINITIONS: + r = parse_path_argument_and_warn(optarg, false, &arg_definitions); + if (r < 0) + return r; + break; + + case '?': + return -EINVAL; + + default: + assert_not_reached("Unhandled option"); + } + + if (argc - optind > 1) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Expected at most one argument, the path to the block device."); + + if (arg_factory_reset > 0 && IN_SET(arg_empty, EMPTY_FORCE, EMPTY_REQUIRE)) + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Combination of --factory-reset=yes and --empty=force/--empty=require is invalid."); + + if (arg_can_factory_reset) + arg_dry_run = true; + + arg_node = argc > optind ? argv[optind] : NULL; + return 1; +} + +static int parse_proc_cmdline_factory_reset(void) { + bool b; + int r; + + if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */ + return 0; + + if (!in_initrd()) /* Never honour kernel command line factory reset request outside of the initrd */ + return 0; + + r = proc_cmdline_get_bool("systemd.factory_reset", &b); + if (r < 0) + return log_error_errno(r, "Failed to parse systemd.factory_reset kernel command line argument: %m"); + if (r > 0) { + arg_factory_reset = b; + + if (b) + log_notice("Honouring factory reset requested via kernel command line."); + } + + return 0; +} + +static int parse_efi_variable_factory_reset(void) { + _cleanup_free_ char *value = NULL; + int r; + + if (arg_factory_reset >= 0) /* Never override what is specified on the process command line */ + return 0; + + if (!in_initrd()) /* Never honour EFI variable factory reset request outside of the initrd */ + return 0; + + r = efi_get_variable_string(EFI_VENDOR_SYSTEMD, "FactoryReset", &value); + if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r)) + return 0; + if (r < 0) + return log_error_errno(r, "Failed to read EFI variable FactoryReset: %m"); + + r = parse_boolean(value); + if (r < 0) + return log_error_errno(r, "Failed to parse EFI variable FactoryReset: %m"); + + arg_factory_reset = r; + if (r) + log_notice("Honouring factory reset requested via EFI variable FactoryReset: %m"); + + return 0; +} + +static int remove_efi_variable_factory_reset(void) { + int r; + + r = efi_set_variable(EFI_VENDOR_SYSTEMD, "FactoryReset", NULL, 0); + if (r == -ENOENT || ERRNO_IS_NOT_SUPPORTED(r)) + return 0; + if (r < 0) + return log_error_errno(r, "Failed to remove EFI variable FactoryReset: %m"); + + log_info("Successfully unset EFI variable FactoryReset."); + return 0; +} + +static int acquire_root_devno(const char *p, int mode, char **ret) { + _cleanup_close_ int fd = -1; + struct stat st; + dev_t devno; + int r; + + fd = open(p, mode); + if (fd < 0) + return -errno; + + if (fstat(fd, &st) < 0) + return -errno; + + if (S_ISREG(st.st_mode)) { + char *s; + + s = strdup(p); + if (!s) + return log_oom(); + + *ret = s; + return 0; + } + + if (S_ISBLK(st.st_mode)) + devno = st.st_rdev; + else if (S_ISDIR(st.st_mode)) { + + devno = st.st_dev; + + if (major(st.st_dev) == 0) { + r = btrfs_get_block_device_fd(fd, &devno); + if (r == -ENOTTY) /* not btrfs */ + return -ENODEV; + if (r < 0) + return r; + } + + } else + return -ENOTBLK; + + /* From dm-crypt to backing partition */ + r = block_get_originating(devno, &devno); + if (r < 0) + log_debug_errno(r, "Failed to find underlying block device for '%s', ignoring: %m", p); + + /* From partition to whole disk containing it */ + r = block_get_whole_disk(devno, &devno); + if (r < 0) + log_debug_errno(r, "Failed to find whole disk block device for '%s', ingoring: %m", p); + + return device_path_make_canonical(S_IFBLK, devno, ret); +} + +static int find_root(char **ret) { + const char *t; + int r; + + if (arg_node) { + r = acquire_root_devno(arg_node, O_RDONLY|O_CLOEXEC, ret); + if (r < 0) + return log_error_errno(r, "Failed to determine backing device of %s: %m", arg_node); + + return 0; + } + + /* Let's search for the root device. We look for two cases here: first in /, and then in /usr. The + * latter we check for cases where / is a tmpfs and only /usr is an actual persistent block device + * (think: volatile setups) */ + + FOREACH_STRING(t, "/", "/usr") { + _cleanup_free_ char *j = NULL; + const char *p; + + if (in_initrd()) { + j = path_join("/sysroot", t); + if (!j) + return log_oom(); + + p = j; + } else + p = t; + + r = acquire_root_devno(p, O_RDONLY|O_DIRECTORY|O_CLOEXEC, ret); + if (r < 0) { + if (r != -ENODEV) + return log_error_errno(r, "Failed to determine backing device of %s: %m", p); + } else + return 0; + } + + return log_error_errno(SYNTHETIC_ERRNO(ENODEV), "Failed to discover root block device."); +} + +static int run(int argc, char *argv[]) { + _cleanup_(context_freep) Context* context = NULL; + _cleanup_free_ char *node = NULL; + bool from_scratch; + int r; + + log_show_color(true); + log_parse_environment(); + log_open(); + + if (in_initrd()) { + /* Default to operation on /sysroot when invoked in the initrd! */ + arg_root = strdup("/sysroot"); + if (!arg_root) + return log_oom(); + } + + r = parse_argv(argc, argv); + if (r <= 0) + return r; + + r = parse_proc_cmdline_factory_reset(); + if (r < 0) + return r; + + r = parse_efi_variable_factory_reset(); + if (r < 0) + return r; + + r = find_root(&node); + if (r < 0) + return r; + + context = context_new(arg_seed); + if (!context) + return log_oom(); + + r = context_read_definitions(context, arg_definitions, arg_root); + if (r < 0) + return r; + + r = context_load_partition_table(context, node); + if (r == -EHWPOISON) + return 77; /* Special return value which means "Not GPT, so not doing anything". This isn't + * really an error when called at boot. */ + if (r < 0) + return r; + from_scratch = r > 0; /* Starting from scratch */ + + if (arg_can_factory_reset) { + r = context_can_factory_reset(context); + if (r < 0) + return r; + if (r == 0) + return EXIT_FAILURE; + + return 0; + } + + r = context_factory_reset(context, from_scratch); + if (r < 0) + return r; + if (r > 0) { + /* We actually did a factory reset! */ + r = remove_efi_variable_factory_reset(); + if (r < 0) + return r; + + /* Reload the reduced partition table */ + context_unload_partition_table(context); + r = context_load_partition_table(context, node); + if (r < 0) + return r; + } + +#if 0 + (void) context_dump_partitions(context, node); + putchar('\n'); +#endif + + r = context_read_seed(context, arg_root); + if (r < 0) + return r; + + /* First try to fit new partitions in, dropping by priority until it fits */ + for (;;) { + if (context_allocate_partitions(context)) + break; /* Success! */ + + if (!context_drop_one_priority(context)) + return log_error_errno(SYNTHETIC_ERRNO(ENOSPC), + "Can't fit requested partitions into free space, refusing."); + } + + /* Now assign free space according to the weight logic */ + r = context_grow_partitions(context); + if (r < 0) + return r; + + /* Now calculate where each partition gets placed */ + context_place_partitions(context); + + /* Make sure each partition has a unique UUID and unique label */ + r = context_acquire_partition_uuids_and_labels(context); + if (r < 0) + return r; + + r = context_write_partition_table(context, node, from_scratch); + if (r < 0) + return r; + + return 0; +} + +DEFINE_MAIN_FUNCTION_WITH_POSITIVE_FAILURE(run); From 64db6f3644c3f889eb4f961f5d7dfebce1fc37ef Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 12 Dec 2019 17:05:56 +0100 Subject: [PATCH 08/13] mkosi: modernize Fedora minimization had some effect, hence add some required dependencies explicitly so that we work anyway. Also, the libtool is not used anymore, drop it. Finally, the mkosi output format is called gpt_ext4 now, not raw_ext4 anymore. --- .mkosi/mkosi.fedora | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.mkosi/mkosi.fedora b/.mkosi/mkosi.fedora index 911908cb77..dbd9b4df99 100644 --- a/.mkosi/mkosi.fedora +++ b/.mkosi/mkosi.fedora @@ -8,7 +8,7 @@ Distribution=fedora Release=31 [Output] -Format=raw_btrfs +Format=gpt_ext4 Bootable=yes KernelCommandLine=printk.devkmsg=on @@ -38,19 +38,20 @@ BuildPackages= libblkid-devel libcap-devel libcurl-devel + libfdisk-devel libgcrypt-devel libidn2-devel libmicrohttpd-devel libmount-devel libseccomp-devel libselinux-devel - libtool libxkbcommon-devel libxslt lz4 lz4-devel m4 meson + openssl-devel pam-devel pcre2-devel pkgconfig @@ -58,10 +59,17 @@ BuildPackages= python3-lxml qrencode-devel tree + valgrind-devel xz-devel Packages= + coreutils + cryptsetup-libs + kmod-libs libidn2 + libseccomp + procps-ng + util-linux BuildDirectory=mkosi.builddir Cache=mkosi.cache From 29ee6541a41424a90bb41786647b1a8375478fd4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 12 Dec 2019 17:06:06 +0100 Subject: [PATCH 09/13] units: add unit file for systemd-repart to automatically run at boot --- units/meson.build | 2 ++ units/systemd-repart.service.in | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 units/systemd-repart.service.in diff --git a/units/meson.build b/units/meson.build index f7653c920c..581f44f99e 100644 --- a/units/meson.build +++ b/units/meson.build @@ -220,6 +220,8 @@ in_units = [ 'multi-user.target.wants/'], ['systemd-vconsole-setup.service', 'ENABLE_VCONSOLE'], ['systemd-volatile-root.service', ''], + ['systemd-repart.service', 'ENABLE_REPART', + 'sysinit.target.wants/ initrd-root-fs.target.wants/'], ['user-runtime-dir@.service', ''], ['user@.service', ''], ] diff --git a/units/systemd-repart.service.in b/units/systemd-repart.service.in new file mode 100644 index 0000000000..7ce6aefd29 --- /dev/null +++ b/units/systemd-repart.service.in @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: LGPL-2.1+ +# +# This file is part of systemd. +# +# systemd is free software; you can redistribute it and/or modify it +# under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 2.1 of the License, or +# (at your option) any later version. + +[Unit] +Description=Repartition Root Disk +Documentation=man:systemd-repart.service(8) +DefaultDependencies=no +Conflicts=shutdown.target +After=sysroot.mount +Before=initrd-root-fs.target shutdown.target +ConditionVirtualization=!container + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=@rootbindir@/systemd-repart --dry-run=no + +# The tool returns 77 if there's no GPT partition table pre-existing +SuccessExitStatus=77 From 2f62a8c688091ab1ccaef0700af440d11febd6ea Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 16 Dec 2019 17:32:44 +0100 Subject: [PATCH 10/13] test: add repart test --- test/TEST-45-REPART/Makefile | 1 + test/TEST-45-REPART/test.sh | 37 +++++++++ test/TEST-45-REPART/testsuite.sh | 124 +++++++++++++++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 120000 test/TEST-45-REPART/Makefile create mode 100755 test/TEST-45-REPART/test.sh create mode 100755 test/TEST-45-REPART/testsuite.sh diff --git a/test/TEST-45-REPART/Makefile b/test/TEST-45-REPART/Makefile new file mode 120000 index 0000000000..e9f93b1104 --- /dev/null +++ b/test/TEST-45-REPART/Makefile @@ -0,0 +1 @@ +../TEST-01-BASIC/Makefile \ No newline at end of file diff --git a/test/TEST-45-REPART/test.sh b/test/TEST-45-REPART/test.sh new file mode 100755 index 0000000000..cdec7c4bdc --- /dev/null +++ b/test/TEST-45-REPART/test.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e +TEST_DESCRIPTION="test systemd-repart" + +. $TEST_BASE_DIR/test-functions + +test_setup() { + create_empty_image_rootdir + + ( + LOG_LEVEL=5 + eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) + + setup_basic_environment + + mask_supporting_services + dracut_install truncate sfdisk + + # setup the testsuite service + cat >$initdir/etc/systemd/system/testsuite.service < /testok + exit 0 +fi + +systemd-analyze log-level debug + +truncate -s 1G /tmp/zzz + +SEED=e2a40bf9-73f1-4278-9160-49c031e7aef8 + +systemd-repart /tmp/zzz --empty=force --dry-run=no --seed=$SEED + +sfdisk -d /tmp/zzz > /tmp/empty + +cmp /tmp/empty - < /tmp/definitions/root.conf < /tmp/definitions/home.conf < /tmp/definitions/swap.conf < /tmp/populated + +cmp /tmp/populated - < /tmp/definitions/swap.conf < /tmp/definitions/extra.conf < /tmp/populated2 + +cmp /tmp/populated2 - < /tmp/populated3 + +cmp /tmp/populated3 - < /testok + +exit 0 From 917cc8082bbd1d380ddf7cdc8ae40606a7de0bfd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 17 Dec 2019 13:47:21 +0100 Subject: [PATCH 11/13] man: document systemd-repart --- man/repart.d.xml | 388 ++++++++++++++++++++++++++++++++ man/rules/meson.build | 2 + man/systemd-makefs@.service.xml | 1 + man/systemd-repart.xml | 269 ++++++++++++++++++++++ 4 files changed, 660 insertions(+) create mode 100644 man/repart.d.xml create mode 100644 man/systemd-repart.xml diff --git a/man/repart.d.xml b/man/repart.d.xml new file mode 100644 index 0000000000..2fe74193d3 --- /dev/null +++ b/man/repart.d.xml @@ -0,0 +1,388 @@ + + + + + + repart.d + systemd + + + + repart.d + 5 + + + + repart.d + Partition Definition Files for Automatic Boot-Time Repartitioning + + + + /etc/repart.d/*.conf +/run/repart.d/*.conf +/usr/lib/repart.d/*.conf + + + + + Description + + repart.d/*.conf files describe basic properties of partitions of block + devices of the local system. They may be used to declare types, names and sizes of partitions that shall + exist. The + systemd-repart8 + service reads these files and attempts to add new partitions currently missing and enlarge existing + partitions according to these definitions. Operation is generally incremental, i.e. when applied, what + exists already is left intact, and partitions are never shrunk, moved or deleted. + + These definition files are useful for implementing operating system images that are prepared and + delivered with minimally sized images (for example lacking any state or swap partitions), and which on + first boot automatically take possession of any remaining disk space following a few basic rules. + + Currently, support for partition definition files is only implemented for GPT partitition + tables. + + Partition files are generally matched against any partitions already existing on disk in a simple + algorithm: the partition files are sorted by their filename (ignoring the directory prefix), and then + compared in order against existing partitions matching the same partition type UUID. Specifically, the + first existing partition with a specific partition type UUID is assigned the first definition file with + the same partition type UUID, and the second existing partition with a specific type UUID the second + partition file with the same type UUID, and so on. Any left-over partition files that have no matching + existing partition are assumed to define new partition that shall be created. Such partitions are + appended to the end of the partition table, in the order defined by their names utilizing the first + partition slot greater than the highest slot number currently in use. Any existing partitions that have + no matching partition file are left as they are. + + Note that these partition definition files do not describe the contents of the partitions, such as + the file system used. Separate mechanisms, such as + systemd-growfs8 and + systemd-makefs maybe be used to initialize or grow the file systems inside of these + partitions. + + + + [Partition] Section Options + + + + Type= + + The GPT partition type UUID to match. This may be a GPT partition type UUID such as + 4f68bce3-e8cd-4db1-96e7-fbcaf984b709, or one of the following special + identifiers: + + + GPT partition type identifiers + + + + + + + + Identifier + Explanation + + + + + + esp + EFI System Partition + + + + xbootldr + Extended Boot Loader Partition + + + + swap + Swap partition + + + + home + Home (/home/) partition + + + + srv + Server data (/srv/) partition + + + + var + Variable data (/var/) partition + + + + tmp + Temporary data (/var/tmp/) partition + + + + linux-generic + Generic Linux file system partition + + + + root + Root file system partition type appropriate for the local architecture (an alias for an architecture root file system partition type listed below, e.g. root-x86-64) + + + + root-verity + Verity data for the root file system partition for the local architecture + + + + root-secondary + Root file system partition of the secondary architecture of the local architecture; usually the matching 32bit architecture for the local 64bit architecture) + + + + root-secondary-verity + Verity data for the root file system partition of the secondary architecture + + + + root-x86 + Root file system partition for the x86 (32bit, aka i386) architecture + + + + root-x86-verity + Verity data for the x86 (32bit) root file system partition + + + + root-x86-64 + Root file system partition for the x86_64 (64bit, aka amd64) architecture + + + + root-x86-64-verity + Verity data for the x86_64 (64bit) root file system partition + + + + root-arm + Root file system partition for the ARM (32bit) architecture + + + + root-arm-verity + Verity data for the ARM (32bit) root file system partition + + + + root-arm64 + Root file system partition for the ARM (64bit, aka aarch64) architecture + + + + root-arm64-verity + Verity data for the ARM (64bit, aka aarch64) root file system partition + + + + root-ia64 + Root file system partition for the ia64 architecture + + + + root-ia64-verity + Verity data for the ia64 root file system partition + + + +
+ + This setting defaults to linux-generic. + + Most of the partition type UUIDs listed above are defined in the Discoverable Partitions + Specification.
+
+ + + Label= + + The textual label to assign to the partition if none is assigned yet. Note that this + setting is not used for matching. It is also not used when a label is already set for an existing + partition. It is thus only used when a partition is newly created or when an existing one had a no + label set (that is: an empty label). If not specified a label derived from the partition type is + automatically used. + + + + Priority= + + A numeric priority to assign to this partition, in the range -2147483648…2147483647, + with smaller values indicating higher priority, and higher values indicating smaller priority. This + priority is used in case the configured size constraints on the defined partitions do not permit + fitting all partitions onto the available disk space. If the partitions do not fit, the highest + numeric partition priority of all defined partitions is determined, and all defined partitions with + this priority are removed from the list of new partitions to create (which may be multiple, if the + same priority is used for multiple partitions). The fitting algorithm is then tried again. If the + partitions still do not fit, the now highest numeric partition priority is determined, and the + matching partitions removed too, and so on. Partitions of a priority of 0 or lower are never + removed. If all partitions with a priority above 0 are removed and the partitions still do not fit on + the device the operation fails. Note that this priority has no effect on ordering partitions, for + that use the alphabetical order of the filenames of the partition definition files. Defaults to + 0. + + + + Weight= + + A numeric weight to assign to this partition in the range 0…1000000. Available disk + space is assigned the defined partitions according to their relative weights (subject to the size + constraints configured with SizeMinBytes=, SizeMaxBytes=), so + that a partition with weight 2000 gets double the space as one with weight 1000, and a partition with + weight 333 a third of that. Defaults to 1000. + + The Weight= setting is used to distribute available disk space in an + "elastic" fashion, based on the disk size and existing partitions. If a partition shall have a fixed + size use both SizeMinBytes= and SizeMaxBytes= with the same + value in order to fixate the size to one value, in which case the weight has no + effect. + + + + PaddingWeight= + + Similar to Weight= but sets a weight for the free space after the + partition (the "padding"). When distributing available space the weights of all partitions and all + defined padding is summed, and then each partition and padding gets the fraction defined by its + weight. Defaults to 0, i.e. by default no padding is applied. + + Padding is useful if empty space shall be left for later additions or a safety margin at the + end of the device or between partitions. + + + + SizeMinBytes= + SizeMaxBytes= + + Specifies minimum and maximum size constraints in bytes. Takes the usual K, M, G, T, + … suffixes (to the base of 1024). If SizeMinBytes= is specified the partition is + created at or grown to at least the specified size. If SizeMaxBytes= is specified + the partition is created at or grown to at most the specified size. The precise size is determined + through the weight value value configured with Weight=, see above. When + SizeMinBytes= is set equal to SizeMaxBytes= the configured + weight has no effect as the partition is explicitly sized to the specified fixed value. Note that + partitions are never created smaller than 4096 bytes, and since partitions are never shrunk the + previous size of the partition (in case the partition already exists) is also enforced as lower bound + for the new size. The values should be specified as multiples of 4096 bytes, and are rounded upwards + (in case of SizeMinBytes=) or downwards (in case of + SizeMaxBytes=) otherwise. If the backing device does not provide enough space to + fulfill the constraints placing the partition will fail. For partitions that shall be created, + depending on the setting of Priority= (see above) the partition might be dropped + and the placing algorithm restarted. By default no size constraints are set. + + + + PaddingMinBytes= + PaddingMaxBytes= + + Specifies minimum and maximum size constrains in bytes for the free space after the + partition (the "padding"). Semantics are similar to SizeMinBytes= and + SizeMaxBytes=, except that unlike partition sizes free space can be shrunk and can + be as small as zero. By default no size constraints on padding are set, so that only + PaddingWeight= determines the size of the padding applied. + + + + FactoryReset= + + Takes a boolean argument. If specified the partition is marked for removal during a + factory reset operation. This functionality is useful to implement schemes where images can be reset + into their original state by removing partitions and creating them anew. Defaults to off. + +
+
+ + + Examples + + + Grow the root partition to the full disk size at first boot + + With the following file the root partition is automatically grown to the full disk if possible during boot. + + # /usr/lib/repart.d/50-root.conf +[Partition] +Type=root + + + + + Create a swap and home partition automatically on boot, if missing + + The home partition gets all available disk space while the swap partition gets 1G at most and 64M + at least. We set a priority > 0 on the swap partition to ensure the swap partition is not used if not + enough space is available. For every three bytes assigned to the home partition the swap partition gets + assigned one. + + # /usr/lib/repart.d/60-home.conf +[Partition] +Type=home + + + # /usr/lib/repart.d/70-swap.conf +[Partition] +Type=swap +SizeMinBytes=64M +SizeMaxBytes=1G +Priority=1 +Weight=333 + + + + + Create B partitions in an A/B Verity setup, if missing + + Let's say the vendor intends to update OS images in an A/B setup, i.e. with two root partitions + (and two matching Verity partitions) that shall be used alternatingly during upgrades. To minimize + image sizes the original image is shipped only with one root and one Verity partition (the "A" set), + and the second root and Verity partitions (the "B" set) shall be created on first boot on the free + space on the medium. + + # /usr/lib/repart.d/50-root.conf +[Partition] +Type=root +SizeMinBytes=512M +SizeMaxBytes=512M + + + # /usr/lib/repart.d/60-root-verity.conf +[Partition] +Type=root-verity +SizeMinBytes=64M +SizeMaxBytes=64M + + + The definitions above cover the "A" set of root partition (of a fixed 512M size) and Verity + partition for the root partition (of a fixed 64M size). Let's use symlinks to create the "B" set of + partitions, since after all they shall have the same properties and sizes as the "A" set. + +# ln -s 50-root.conf /usr/lib/repart.d/70-root-b.conf +# ln -s 60-root-verity.conf /usr/lib/repart.d/80-root-verity-b.conf + + + + + + + See Also + + systemd1, + systemd-repart8, + sfdisk8 + + + +
diff --git a/man/rules/meson.build b/man/rules/meson.build index 90376da775..3dc0a045a7 100644 --- a/man/rules/meson.build +++ b/man/rules/meson.build @@ -47,6 +47,7 @@ manpages = [ ['pam_systemd', '8', [], 'HAVE_PAM'], ['portablectl', '1', [], 'ENABLE_PORTABLED'], ['pstore.conf', '5', ['pstore.conf.d'], 'ENABLE_PSTORE'], + ['repart.d', '5', [], ''], ['resolvectl', '1', ['resolvconf'], 'ENABLE_RESOLVE'], ['resolved.conf', '5', ['resolved.conf.d'], 'ENABLE_RESOLVE'], ['runlevel', '8', [], ''], @@ -769,6 +770,7 @@ manpages = [ 'ENABLE_RANDOMSEED'], ['systemd-rc-local-generator', '8', [], ''], ['systemd-remount-fs.service', '8', ['systemd-remount-fs'], ''], + ['systemd-repart', '8', ['systemd-repart.service'], ''], ['systemd-resolved.service', '8', ['systemd-resolved'], 'ENABLE_RESOLVE'], ['systemd-rfkill.service', '8', diff --git a/man/systemd-makefs@.service.xml b/man/systemd-makefs@.service.xml index 8514af67bc..d07d90315a 100644 --- a/man/systemd-makefs@.service.xml +++ b/man/systemd-makefs@.service.xml @@ -80,6 +80,7 @@ systemd1, systemd.mount8, systemd-fstab-generator8, + systemd-repart8, mkfs.btrfs8, mkfs.cramfs8, mkfs.ext48, diff --git a/man/systemd-repart.xml b/man/systemd-repart.xml new file mode 100644 index 0000000000..cffcb5403a --- /dev/null +++ b/man/systemd-repart.xml @@ -0,0 +1,269 @@ + + + + + + + + systemd-repart + systemd + + + + systemd-repart + 8 + + + + systemd-repart + systemd-repart.service + Automatically grow and add partitions + + + + + systemd-repart + OPTIONS + BLOCKDEVICE + + + systemd-repart.service + + + + Description + + systemd-repart grows and adds partitions to a partition table, based on the + configuration files described in + repart.d5. + + + If invoked with no arguments, it operates on the block device backing the root file system partition + of the OS, thus growing and adding partitions of the booted OS image itself. When called in the initial + RAM disk it operates on the block device backing /sysroot/ instead, i.e. on the + block device the system will soon transition into. The systemd-repart.service + service is generally run at boot in the initial RAM disk, in order to augment the partition table of the + OS before its partitions are mounted. systemd-repart (mostly) operates in a purely + incremental mode: it only grows existing and adds new partitions; it does not shrink, delete or move + existing partitions. The service is intended to be run on every boot, but when it detects that the + partition table already matches the installed repart.d/*.conf configuration + files, it executes no operation. + + systemd-repart is intended to be used when deploying OS images, to automatically + adjust them to the system they are running on, during first boot. This way the deployed image can be + minimal in size and may be augmented automatically at boot when needed, taking possession of disk space + available but not yet used. Specifically the following use cases are among those covered: + + + The root partition may be grown to cover the whole available disk space + A /home/, swap or /srv partition can be added in + A second (or third, …) root partition may be added in, to cover A/B style setups + where a second version of the root file system is alternatingly used for implementing update + schemes. The deployed image would carry only a single partition ("A") but on first boot a second + partition ("B") for this purpose is automatically created. + + + The algorithm executed by systemd-repart is roughly as follows: + + + The repart.d/*.conf configuration files are loaded and parsed, + and ordered by filename (without the directory suffix). + + The partition table already existing on the block device is loaded and + parsed. + + The existing partitions in the partition table are matched up with the + repart.d/*.conf files by GPT partition type UUID. The first existing partition + of a specific type is assigned the first configuration file declaring the same type. The second + existing partition of a specific type is then assigned the second configuration file declaring the same + type, and so on. After this iterative assigning is complete any left-over existing partitions that have + no matching configuration file are considered "foreign" and left as they are. And any configuration + files for which no partition currently exists are understood as a request to create such a + partition. + + Taking the size constraints and weights declared in the configuration files into + account, all partitions that shall be created are now allocated to the disk, taking up all free space, + always respecting the size and padding requests. Similar, existing partitions that are determined to + grow are grown. New partitions are always appended to the end of the existing partition table, taking + the first partition table slot whose index is greater than the indexes of all existing + partitions. Partition table slots are never reordered and thus partition numbers are ensured to remain + stable. Note that this allocation happens in RAM only, the partition table on disk is not updated + yet. + + All existing partitions for which configuration files exist and which currently have no + GPT partition label set will be assigned a label, either explicitly configured in the configuration or + (if that's missing) derived automatically from the partition type. The same is done for all partitions + that are newly created. These assignments are done in RAM only, too, the disk is not updated + yet. + + Similarly, all existing partitions for which configuration files exist and which + currently have an all-zero identifying UUID will be assigned a new UUID. This UUID is cryptographically + hashed from a common seed value together with the partition type UUID (and a counter in case multiple + partitions of the same type are defined), see below. The same is done for all partitions that are + created anew. These assignments are done in RAM only, too, the disk is not updated + yet. + + Similarly, if the disk's volume UUID is all zeroes it is also initialized, also + cryptographically hashed from the same common seed value. Also, in RAM only, too. + + The disk space assigned to new partitions (i.e. what was previously considered free + space but is no longer) is now erased. Specifically, all file system signatures are removed, and if the + device supports it the BLKDISCARD I/O control command is issued to inform the + hardware that the space is empty now. In addition any "padding" between partitions and at the end of + the device is similarly erased. + + The new partition table is finally written to disk. The kernel is asked to reread the + partition table. + + + As exception to the normally strictly incremental operation, when called in a special "factory + reset" mode systemd-repart may also be used to erase select existing partitions to + reset an installation back to vendor defaults. This mode of operation is used when either the + switch is passed on the tool's command line, or the + option specified on the kernel command line, or the + FactoryReset EFI variable (vendor UUID + 8cf2644b-4b0b-428f-9387-6d876050dc67) is set to "yes". It alters the algorithm above + slightly: between the 3rd and the 4th step above the any partition marked explicitly via the + FactoryReset= boolean is deleted, and the algorithm restarted, thus immediately + re-creating these partitions anew empty. + + Note that systemd-repart only changes partition tables, it does not create or + resize any file systems within these partitions. A separate mechanism should be used for that, for + example + systemd-growfs8 and + systemd-makefs. + + The UUIDs identifying the new partitions created (or assigned to existing partitions that have no + UUID yet), as well as the disk as a whole are hashed cryptographically from a common seed value. This + seed value is usually the + machine-id5 of the + system, so that the machine ID reproducibly determines the UUIDs assigned to all partitions. If the + machine ID cannot be read (or the user passes , see below) the seed is + generated randomly instead, so that the partition UUIDs are also effectively random. The seed value may + also be set explicitly, formatted as UUID via the option. By hashing these UUIDs + from a common seed images prepared with this tool become reproducible and the result of the algorithm + above deterministic. + + + + Options + + The following options are understood: + + + + + Takes a boolean. If this switch is not specified is + the implied default. Controls whether systemd-repart executes the requested + re-partition operations or whether it should only show what it would do. Unless + is specified systemd-repart will not actually + touch the device's partition table. + + + + + Takes one of refuse, allow, + require or force. Controls how to operate on block devices that + are entirely empty, i.e. carry no partition table/disk label yet. If this switch is not specified the + implied default is refuse. + + If refuse systemd-repart requires that the block device + it shall operate on already carries a partition table and refuses operation if none is found. If + allow the command will extend an existing partition table or create a new one if + none exists. If require the command will create a new partition table if none + exists so far, and refuse operation if one already exists. If force it will create + a fresh partition table unconditionally, erasing the disk fully in effect. If + force no existing partitions will be taken into account or survive the + operation. Hence: use with care, this is a great way to lose all your data. + + + + + + Takes a boolean. If this switch is not specified is + the implied default. Controls whether to issue the BLKDISCARD I/O control + command on the space taken up by any added partitions or on the space in between them. Usually, it's + a good idea to issue this request since it tells the underlying hardware that the covered blocks + shall be considered empty, improving performance. + + + + + + Takes boolean. If this switch is not specified is + the implied default. Controls whether to operate in "factory reset" mode, see above. If set to true + this will remove all existing partitions marked with FactoryReset= set to yes + early while executing the re-partitioning algorithm. Use with care, this is a great way to lose all + your data. Note that partition files need to explicitly turn FactoryReset= on, as + the option defaults to off. If no partitions are marked for factory reset this switch has no + effect. Note that there are two other methods to request factory reset operation: via the kernel + command line and via an EFI variable, see above. + + + + + + If this switch is specified the disk is not re-partitioned. Instead it is determined + if any existing partitions are marked with FactoryReset=. If there are the tool + will exit with exit status zero, otherwise non-zero. This switch may be used to quickly determine + whether the running system supports a factory reset mechanism built on + systemd-repart. + + + + + + Takes a path to a directory to use as root file system when searching for + repart.d/*.conf files and for the machine ID file to use as seed. By default + when invoked on the regular system this defaults to the host's root file system + /. If invoked from the initial RAM disk this defaults to + /sysroot/, so that the tool operates on the configuration and machine ID stored + in the root file system later transitioned into itself. + + + + + + Takes a UUID as argument or the special value random. If a UUID + is specified the UUIDs to assign to partitions and the partition table itself are derived via + cryptographic hashing from it. If not specified it is attempted to read the machine ID from the host + (or more precisely, the root directory configured via ) and use it as seed + instead, falling back to a randomized seed otherwise. Use to force a + randomized seed. Explicitly specifying the seed may be used to generated strictly reproducible + partition tables. + + + + + + Takes a boolean argument. If this switch is not specified, it defaults to on when + called from an interactive terminal and off otherwise. Controls whether to show a user friendly table + and graphic illustrating the changes applied. + + + + + + Takes a file system path. If specified the *.conf are directly + read from the specified directory instead of searching in + /usr/lib/repart.d/*.conf, /etc/repart.d/*.conf, + /run/repart.d/*.conf. + + + + + + + + + See Also + + systemd1, + repart.d5, + machine-id5 + + + + From e65f29b4c6e82722bf80be441f74ec41488ba8b0 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 17 Dec 2019 19:19:02 +0100 Subject: [PATCH 12/13] ci: add dependencies for repart + cryptsetup's pkcs#11 support Let's make sure we can test systemd-repart properly (And while we are at it, also test cryptsetup-pkcs#11) --- .lgtm.yml | 3 +++ fuzzbuzz.yaml | 1 + semaphoreci/semaphore-runner.sh | 2 ++ travis-ci/managers/debian.sh | 5 ++++- travis-ci/managers/fedora.sh | 5 ++++- travis-ci/managers/fuzzbuzz.sh | 2 ++ travis-ci/managers/fuzzit.sh | 2 ++ 7 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.lgtm.yml b/.lgtm.yml index 5948d8c2bc..51ec50e3b1 100644 --- a/.lgtm.yml +++ b/.lgtm.yml @@ -5,6 +5,9 @@ extraction: - python3-pip - python3-setuptools - python3-wheel + - libfdisk-dev + - libp11-kit-dev + - libssl-dev after_prepare: - pip3 install meson - export PATH="$HOME/.local/bin/:$PATH" diff --git a/fuzzbuzz.yaml b/fuzzbuzz.yaml index 18c70e3555..6dfdd81237 100644 --- a/fuzzbuzz.yaml +++ b/fuzzbuzz.yaml @@ -5,6 +5,7 @@ setup: - sudo apt-get update -y - sudo apt-get build-dep -y systemd - sudo apt-get install -y python3-pip +- sudo apt-get install -y libfdisk-dev libp11-kit-dev libssl-dev # FIXME: temporarily pin the meson version as 0.53 doesn't work with older # python 3.5 # # See: https://github.com/mesonbuild/meson/issues/6427 diff --git a/semaphoreci/semaphore-runner.sh b/semaphoreci/semaphore-runner.sh index 504ada59ee..a5d8bc26a1 100755 --- a/semaphoreci/semaphore-runner.sh +++ b/semaphoreci/semaphore-runner.sh @@ -35,6 +35,8 @@ while [ -z "\$(ip route list 0/0)" ]; do sleep 1; done apt-get -q --allow-releaseinfo-change update apt-get -y dist-upgrade apt-get install -y eatmydata +# The following four are needed as long as these deps are not covered by Debian's own packaging +apt-get install -y libfdisk-dev libp11-kit-dev libssl-dev apt-get purge --auto-remove -y unattended-upgrades systemctl unmask systemd-networkd systemctl enable systemd-networkd diff --git a/travis-ci/managers/debian.sh b/travis-ci/managers/debian.sh index 6a6923fbc8..dedddab3b5 100755 --- a/travis-ci/managers/debian.sh +++ b/travis-ci/managers/debian.sh @@ -18,7 +18,10 @@ REPO_ROOT="${REPO_ROOT:-$PWD}" ADDITIONAL_DEPS=(python3-libevdev python3-pyparsing clang - perl) + perl + libfdisk-dev + libp11-kit-dev + libssl-dev) function info() { echo -e "\033[33;1m$1\033[0m" diff --git a/travis-ci/managers/fedora.sh b/travis-ci/managers/fedora.sh index e07b4938df..dbd484c25c 100755 --- a/travis-ci/managers/fedora.sh +++ b/travis-ci/managers/fedora.sh @@ -23,7 +23,10 @@ ADDITIONAL_DEPS=(dnf-plugins-core libubsan clang llvm - perl) + perl + libfdisk-devel + openssl-devel + p11-kit-devel) function info() { echo -e "\033[33;1m$1\033[0m" diff --git a/travis-ci/managers/fuzzbuzz.sh b/travis-ci/managers/fuzzbuzz.sh index 1541b46652..21b736cb60 100755 --- a/travis-ci/managers/fuzzbuzz.sh +++ b/travis-ci/managers/fuzzbuzz.sh @@ -10,6 +10,8 @@ sudo bash -c "echo 'deb-src http://archive.ubuntu.com/ubuntu/ xenial main restri sudo apt-get update -y sudo apt-get build-dep systemd -y sudo apt-get install -y ninja-build python3-pip python3-setuptools quota +# The following should be dropped when debian packaging has been updated to include them +sudo apt-get install -y libfdisk-dev libp11-kit-dev libssl-dev # FIXME: temporarily pin the meson version as 0.53 doesn't work with older python 3.5 # See: https://github.com/mesonbuild/meson/issues/6427 pip3 install meson==0.52.1 diff --git a/travis-ci/managers/fuzzit.sh b/travis-ci/managers/fuzzit.sh index 376761e20c..044d126e15 100755 --- a/travis-ci/managers/fuzzit.sh +++ b/travis-ci/managers/fuzzit.sh @@ -14,6 +14,8 @@ sudo bash -c "echo 'deb-src http://archive.ubuntu.com/ubuntu/ xenial main restri sudo apt-get update -y sudo apt-get build-dep systemd -y sudo apt-get install -y ninja-build python3-pip python3-setuptools +# The following should be dropped when debian packaging has been updated to include them +sudo apt-get install -y libfdisk-dev libp11-kit-dev libssl-dev # FIXME: temporarily pin the meson version as 0.53 doesn't work with older python 3.5 # See: https://github.com/mesonbuild/meson/issues/6427 pip3 install meson==0.52.1 From 492f91d8c6c39d339e525ac5c58a0a796bd36c72 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Tue, 10 Dec 2019 23:47:06 +0100 Subject: [PATCH 13/13] update TODO --- TODO | 59 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/TODO b/TODO index ce37869ce1..d77f5ffb87 100644 --- a/TODO +++ b/TODO @@ -19,6 +19,11 @@ Janitorial Clean-ups: Features: +* bootctl: + - teach it to prepare an ESP wholesale, i.e. with mkfs.vfat invocation + - teach it to copy in unified kernel images and maybe type #1 boot loader spec entries from host + - make it operate on loopback files, dissecting enough to find ESP to operate on + * when dissecting images, warn about unrecognized partition flags * honour specifiers in unit files that resolve to some very basic @@ -27,10 +32,53 @@ Features: * socket units: allow creating a udev monitor socket with ListenDevices= or so, with matches, then actviate app thorugh that passing socket oveer +* unify on openssl: + - port sd_id128_get_machine_app_specific() over from khash + - port resolved over from libgcrypt (DNSSEC code) + - port journald + fsprg over from libgcrypt + - port importd over from libgcrypt + - when that's done: kill khash.c + - when that's done: kill gnutls support in resolved + * kill zenata, all hail weblate? +* when we resize disks (homed?) always round up to 4K sectors, not 512K + +* add growvol and makevol options for /etc/crypttab, similar to + x-systemd.growfs and x-systemd-makefs. + +* hook up the TPM to /etc/crypttab, with a new option that is similar to the + new PKCS#11 option in crypttab, and allows unlocking a LUKS volume via a key + unsealed from the TPM. Optionally, if TPM is not available fall back to + TPM-less mode, and set up linear DM mapping instead (inspired by kpartx), so + that the device paths stay the same, regardless if crypto is used or not. + * move discoverable partitions spec into markdown and our tree +* systemd-repart: by default generate minimized partition tables (i.e. tables + that only covere the space actually used, excluding any free space at the + end), in order to maximize dd'ability. Requires libfdisk work, see + https://github.com/karelzak/util-linux/issues/907 + +* systemd-repart: optionally, allow specifiying a path to initialize new + partitions from, i.e. an fs image file or a source device node. This would + then turn systemd-repart into a simple installer: with a few .repart files + you could replicate the host system on another device. + +* systemd-repart: MBR partition table support. Care needs to be taken regarding + Type=, so that partition definitions can sanely apply to both the GPT and the + MBR case. Idea: accept syntax "Type=gpt:home mbr:0x83" for setting the types + for the two partition types explicitly. And provide an internal mapping so + that "Type=linux-generic" maps to the right types for both partition tables + automatically. + +* systemd-repart: allow sizing partitions as factor of available RAM, so that + we can reasonably size swap partitions for hibernation. + +* when switching root from initrd to host, set the machine_id env var so that + if the host has no machine ID set yet we continue to use the random one the + initrd had set. + * sd-event: add native support for P_ALL waitid() watching, then move PID 1 to it fo reaping assigned but unknown children. This needs to some special care to operate somewhat sensibly in light of priorities: P_ALL will return @@ -177,13 +225,6 @@ Features: * introduce per-unit (i.e. per-slice, per-service) journal log size limits. -* optionally, if a per-partition GPT flag is set for the root/home/… partitions - format the partition on next boot and unset the flag, in order to implement - factory reset. also, add a second flag that simply indicates whether such a - scheme is supported. then, add a tool (or maybe beef up systemd-dissect) to - show state of these flags, and optionally trigger such a factory reset on - next boot by setting the flag. - * sd-boot: automatically load EFI modules from some drop-in dir, so that people can add in file system drivers and such @@ -397,10 +438,6 @@ Features: yogas can be recognized as "convertible" too, even if they predate the DMI "convertible" form factor -* Maybe add a small tool invoked early at boot, that adds in or resizes - partitions automatically, to be used when the media used is actually larger - than the image written onto it is. - * Maybe add PrivatePIDs= as new unit setting, and do minimal PID namespacing after all. Be strict however, only support the equivalent of nspawn's --as-pid2 switch, and sanely proxy sd_notify() messages dropping stuff such