Merge pull request #13097 from poettering/mount-state-fix

Scan /proc/self/mountinfo before waitid() handling
2026-03-06 15:02:31 -08:00 · 2019-07-18 17:33:20 +02:00
parent f4c961169c 9ddaa3e459
commit deeabb45ae
2 changed files with 72 additions and 32 deletions
--- a/src/core/mount.c
+++ b/src/core/mount.c
@@ -50,6 +50,7 @@ static const UnitActiveState state_translation_table[_MOUNT_STATE_MAX] = {

 static int mount_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int mount_process_proc_self_mountinfo(Manager *m);

 static bool MOUNT_STATE_WITH_PROCESS(MountState state) {
        return IN_SET(state,
@@ -232,7 +233,7 @@ _pure_ static MountParameters* get_mount_parameters(Mount *m) {
        return get_mount_parameters_fragment(m);
 }

-static int update_parameters_proc_self_mount_info(
+static int update_parameters_proc_self_mountinfo(
                Mount *m,
                const char *what,
                const char *options,
@@ -1280,6 +1281,22 @@ static void mount_sigchld_event(Unit *u, pid_t pid, int code, int status) {
        if (pid != m->control_pid)
                return;

+        /* So here's the thing, we really want to know before /usr/bin/mount or /usr/bin/umount exit whether
+         * they established/remove a mount. This is important when mounting, but even more so when unmounting
+         * since we need to deal with nested mounts and otherwise cannot safely determine whether to repeat
+         * the unmounts. In theory, the kernel fires /proc/self/mountinfo changes off before returning from
+         * the mount() or umount() syscalls, and thus we should see the changes to the proc file before we
+         * process the waitid() for the /usr/bin/(u)mount processes. However, this is unfortunately racy: we
+         * have to waitid() for processes using P_ALL (since we need to reap unexpected children that got
+         * reparented to PID 1), but when using P_ALL we might end up reaping processes that terminated just
+         * instants ago, i.e. already after our last event loop iteration (i.e. after the last point we might
+         * have noticed /proc/self/mountinfo events via epoll). This means event loop priorities for
+         * processing SIGCHLD vs. /proc/self/mountinfo IO events are not as relevant as we want. To fix that
+         * race, let's explicitly scan /proc/self/mountinfo before we start processing /usr/bin/(u)mount
+         * dying. It's ugly, but it makes our ordering systematic again, and makes sure we always see
+         * /proc/self/mountinfo changes before our mount/umount exits. */
+        (void) mount_process_proc_self_mountinfo(u->manager);
+
        m->control_pid = 0;

        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
@@ -1468,7 +1485,7 @@ static int mount_setup_new_unit(
        if (r < 0)
                return r;

-        r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+        r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
        if (r < 0)
                return r;

@@ -1506,7 +1523,7 @@ static int mount_setup_existing_unit(
                        return -ENOMEM;
        }

-        r = update_parameters_proc_self_mount_info(MOUNT(u), what, options, fstype);
+        r = update_parameters_proc_self_mountinfo(MOUNT(u), what, options, fstype);
        if (r < 0)
                return r;
        if (r > 0)
@@ -1751,39 +1768,41 @@ fail:
        mount_shutdown(m);
 }

-static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+static int drain_libmount(Manager *m) {
+        bool rescan = false;
+        int r;
+
+        assert(m);
+
+        /* Drain all events and verify that the event is valid.
+         *
+         * Note that libmount also monitors /run/mount mkdir if the directory does not exist yet. The mkdir
+         * may generate event which is irrelevant for us.
+         *
+         * error: r < 0; valid: r == 0, false positive: r == 1 */
+        do {
+                r = mnt_monitor_next_change(m->mount_monitor, NULL, NULL);
+                if (r < 0)
+                        return log_error_errno(r, "Failed to drain libmount events: %m");
+                if (r == 0)
+                        rescan = true;
+        } while (r == 0);
+
+        return rescan;
+}
+
+static int mount_process_proc_self_mountinfo(Manager *m) {
        _cleanup_set_free_free_ Set *around = NULL, *gone = NULL;
-        Manager *m = userdata;
        const char *what;
        Iterator i;
        Unit *u;
        int r;

        assert(m);
-        assert(revents & EPOLLIN);

-        if (fd == mnt_monitor_get_fd(m->mount_monitor)) {
-                bool rescan = false;
-
-                /* Drain all events and verify that the event is valid.
-                 *
-                 * Note that libmount also monitors /run/mount mkdir if the
-                 * directory does not exist yet. The mkdir may generate event
-                 * which is irrelevant for us.
-                 *
-                 * error: r < 0; valid: r == 0, false positive: rc == 1 */
-                do {
-                        r = mnt_monitor_next_change(m->mount_monitor, NULL, NULL);
-                        if (r == 0)
-                                rescan = true;
-                        else if (r < 0)
-                                return log_error_errno(r, "Failed to drain libmount events: %m");
-                } while (r == 0);
-
-                log_debug("libmount event [rescan: %s]", yes_no(rescan));
-                if (!rescan)
-                        return 0;
-        }
+        r = drain_libmount(m);
+        if (r <= 0)
+                return r;

        r = mount_load_proc_self_mountinfo(m, true);
        if (r < 0) {
@@ -1815,7 +1834,7 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
                        }

                        mount->from_proc_self_mountinfo = false;
-                        assert_se(update_parameters_proc_self_mount_info(mount, NULL, NULL, NULL) >= 0);
+                        assert_se(update_parameters_proc_self_mountinfo(mount, NULL, NULL, NULL) >= 0);

                        switch (mount->state) {

@@ -1884,6 +1903,15 @@ static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents,
        return 0;
 }

+static int mount_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        Manager *m = userdata;
+
+        assert(m);
+        assert(revents & EPOLLIN);
+
+        return mount_process_proc_self_mountinfo(m);
+}
+
 static void mount_reset_failed(Unit *u) {
        Mount *m = MOUNT(u);

--- a/src/core/swap.c
+++ b/src/core/swap.c
@@ -43,6 +43,7 @@ static const UnitActiveState state_translation_table[_SWAP_STATE_MAX] = {

 static int swap_dispatch_timer(sd_event_source *source, usec_t usec, void *userdata);
 static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata);
+static int swap_process_proc_swaps(Manager *m);

 static bool SWAP_STATE_WITH_PROCESS(SwapState state) {
        return IN_SET(state,
@@ -1014,6 +1015,10 @@ static void swap_sigchld_event(Unit *u, pid_t pid, int code, int status) {
        if (pid != s->control_pid)
                return;

+        /* Let's scan /proc/swaps before we process SIGCHLD. For the reasoning see the similar code in
+         * mount.c */
+        (void) swap_process_proc_swaps(u->manager);
+
        s->control_pid = 0;

        if (is_clean_exit(code, status, EXIT_CLEAN_COMMAND, NULL))
@@ -1149,13 +1154,11 @@ static int swap_load_proc_swaps(Manager *m, bool set_flags) {
        return 0;
 }

-static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
-        Manager *m = userdata;
+static int swap_process_proc_swaps(Manager *m) {
        Unit *u;
        int r;

        assert(m);
-        assert(revents & EPOLLPRI);

        r = swap_load_proc_swaps(m, true);
        if (r < 0) {
@@ -1230,6 +1233,15 @@ static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, v
        return 1;
 }

+static int swap_dispatch_io(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
+        Manager *m = userdata;
+
+        assert(m);
+        assert(revents & EPOLLPRI);
+
+        return swap_process_proc_swaps(m);
+}
+
 static Unit *swap_following(Unit *u) {
        Swap *s = SWAP(u);
        Swap *other, *first = NULL;