From 83f3bf4b6f345c71505a5def3794672f3deea49b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Aug 2022 11:10:05 +0200 Subject: [PATCH 1/5] shutdown: rebreak all comments to coding style No actual change of words. --- src/shutdown/umount.c | 66 ++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c index 1e691379a4..9e9888bce8 100644 --- a/src/shutdown/umount.c +++ b/src/shutdown/umount.c @@ -96,41 +96,33 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { fstype = mnt_fs_get_fstype(fs); - /* Combine the generic VFS options with the FS-specific - * options. Duplicates are not a problem here, because the only - * options that should come up twice are typically ro/rw, which - * are turned into MS_RDONLY or the inversion of it. + /* Combine the generic VFS options with the FS-specific options. Duplicates are not a problem + * here, because the only options that should come up twice are typically ro/rw, which are + * turned into MS_RDONLY or the inversion of it. * - * Even if there are duplicates later in mount_option_mangle() - * they shouldn't hurt anyways as they override each other. - */ + * Even if there are duplicates later in mount_option_mangle() they shouldn't hurt anyways as + * they override each other. */ if (!strextend_with_separator(&options, ",", mnt_fs_get_vfs_options(fs))) return log_oom(); if (!strextend_with_separator(&options, ",", mnt_fs_get_fs_options(fs))) return log_oom(); - /* Ignore mount points we can't unmount because they - * are API or because we are keeping them open (like - * /dev/console). Also, ignore all mounts below API - * file systems, since they are likely virtual too, - * and hence not worth spending time on. Also, in - * unprivileged containers we might lack the rights to - * unmount these things, hence don't bother. */ + /* Ignore mount points we can't unmount because they are API or because we are keeping them + * open (like /dev/console). Also, ignore all mounts below API file systems, since they are + * likely virtual too, and hence not worth spending time on. Also, in unprivileged containers + * we might lack the rights to unmount these things, hence don't bother. */ if (mount_point_is_api(path) || mount_point_ignore(path) || PATH_STARTSWITH_SET(path, "/dev", "/sys", "/proc")) continue; - /* If we are in a container, don't attempt to - * read-only mount anything as that brings no real - * benefits, but might confuse the host, as we remount - * the superblock here, not the bind mount. + /* If we are in a container, don't attempt to read-only mount anything as that brings no real + * benefits, but might confuse the host, as we remount the superblock here, not the bind + * mount. * - * If the filesystem is a network fs, also skip the - * remount. It brings no value (we cannot leave - * a "dirty fs") and could hang if the network is down. - * Note that umount2() is more careful and will not - * hang because of the network being down. */ + * If the filesystem is a network fs, also skip the remount. It brings no value (we cannot + * leave a "dirty fs") and could hang if the network is down. Note that umount2() is more + * careful and will not hang because of the network being down. */ try_remount_ro = detect_container() <= 0 && !fstype_is_network(fstype) && !fstype_is_api_vfs(fstype) && @@ -138,10 +130,9 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { !fstab_test_yes_no_option(options, "ro\0rw\0"); if (try_remount_ro) { - /* mount(2) states that mount flags and options need to be exactly the same - * as they were when the filesystem was mounted, except for the desired - * changes. So we reconstruct both here and adjust them for the later - * remount call too. */ + /* mount(2) states that mount flags and options need to be exactly the same as they + * were when the filesystem was mounted, except for the desired changes. So we + * reconstruct both here and adjust them for the later remount call too. */ r = mnt_fs_get_propagation(fs, &remount_flags); if (r < 0) { @@ -377,8 +368,8 @@ static int md_list_get(MountPoint **head) { continue; } - /* MD "containers" are a special type of MD devices, used for external metadata. - * Since it doesn't provide RAID functionality in itself we don't need to stop it. */ + /* MD "containers" are a special type of MD devices, used for external metadata. Since it + * doesn't provide RAID functionality in itself we don't need to stop it. */ if (streq(md_level, "container")) continue; @@ -635,13 +626,11 @@ static int umount_with_timeout(MountPoint *m, bool last_try) { if (r == 0) { log_info("Unmounting '%s'.", m->path); - /* Start the mount operation here in the child Using MNT_FORCE - * causes some filesystems (e.g. FUSE and NFS and other network - * filesystems) to abort any pending requests and return -EIO - * rather than blocking indefinitely. If the filesysten is - * "busy", this may allow processes to die, thus making the - * filesystem less busy so the unmount might succeed (rather - * than return EBUSY). */ + /* Start the mount operation here in the child Using MNT_FORCE causes some filesystems + * (e.g. FUSE and NFS and other network filesystems) to abort any pending requests and return + * -EIO rather than blocking indefinitely. If the filesysten is "busy", this may allow + * processes to die, thus making the filesystem less busy so the unmount might succeed + * (rather than return EBUSY). */ r = RET_NERRNO(umount2(m->path, MNT_FORCE)); if (r < 0) { log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Failed to unmount %s: %m", m->path); @@ -855,9 +844,8 @@ int umount_all(bool *changed, bool last_try) { assert(changed); - /* Retry umount, until nothing can be umounted anymore. Mounts are - * processed in order, newest first. The retries are needed when - * an old mount has been moved, to a path inside a newer mount. */ + /* Retry umount, until nothing can be umounted anymore. Mounts are processed in order, newest + * first. The retries are needed when an old mount has been moved, to a path inside a newer mount. */ do { umount_changed = false; From efc90b98142889d443a86c18304f1b13ebda9ddb Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Aug 2022 11:16:40 +0200 Subject: [PATCH 2/5] umount: use structured initialization --- src/shutdown/umount.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c index 9e9888bce8..6d133d5761 100644 --- a/src/shutdown/umount.c +++ b/src/shutdown/umount.c @@ -151,17 +151,21 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { remount_flags = (remount_flags|MS_REMOUNT|MS_RDONLY) & ~MS_BIND; } - m = new0(MountPoint, 1); + m = new(MountPoint, 1); if (!m) return log_oom(); + *m = (MountPoint) { + .remount_options = remount_options, + .remount_flags = remount_flags, + .try_remount_ro = try_remount_ro, + }; + m->path = strdup(path); if (!m->path) return log_oom(); - m->remount_options = TAKE_PTR(remount_options); - m->remount_flags = remount_flags; - m->try_remount_ro = try_remount_ro; + TAKE_PTR(remount_options); LIST_PREPEND(mount_point, *head, TAKE_PTR(m)); } From c905aaf68607754a2aa131854544f5b44a378874 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Aug 2022 11:16:55 +0200 Subject: [PATCH 3/5] umount: minor modernizations --- src/shutdown/umount.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c index 6d133d5761..2fd1b464e4 100644 --- a/src/shutdown/umount.c +++ b/src/shutdown/umount.c @@ -76,16 +76,15 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { return log_error_errno(r, "Failed to parse %s: %m", mountinfo ?: "/proc/self/mountinfo"); for (;;) { + _cleanup_free_ char *options = NULL, *remount_options = NULL; struct libmnt_fs *fs; const char *path, *fstype; - _cleanup_free_ char *options = NULL; unsigned long remount_flags = 0u; - _cleanup_free_ char *remount_options = NULL; bool try_remount_ro; _cleanup_free_ MountPoint *m = NULL; r = mnt_table_next_fs(table, iter, &fs); - if (r == 1) + if (r == 1) /* EOF */ break; if (r < 0) return log_error_errno(r, "Failed to get next entry from %s: %m", mountinfo ?: "/proc/self/mountinfo"); @@ -197,7 +196,7 @@ int swap_list_get(const char *swaps, MountPoint **head) { const char *source; r = mnt_table_next_fs(t, i, &fs); - if (r == 1) + if (r == 1) /* EOF */ break; if (r < 0) return log_error_errno(r, "Failed to get next entry from %s: %m", swaps ?: "/proc/swaps"); From f11efcbeb17781a3e03596fe6b1016fbb5c31360 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Aug 2022 11:23:41 +0200 Subject: [PATCH 4/5] umount: unmount profcs/sysfs/.. lazily Alternative for: df48b430a4a85f923eaecb3fadf9c514692d2082 --- src/shutdown/umount.c | 25 +++++++++++-------------- src/shutdown/umount.h | 3 ++- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c index 2fd1b464e4..7ddeb67e97 100644 --- a/src/shutdown/umount.c +++ b/src/shutdown/umount.c @@ -80,7 +80,7 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { struct libmnt_fs *fs; const char *path, *fstype; unsigned long remount_flags = 0u; - bool try_remount_ro; + bool try_remount_ro, is_api_vfs; _cleanup_free_ MountPoint *m = NULL; r = mnt_table_next_fs(table, iter, &fs); @@ -115,6 +115,8 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { PATH_STARTSWITH_SET(path, "/dev", "/sys", "/proc")) continue; + is_api_vfs = fstype_is_api_vfs(fstype); + /* If we are in a container, don't attempt to read-only mount anything as that brings no real * benefits, but might confuse the host, as we remount the superblock here, not the bind * mount. @@ -124,7 +126,7 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { * careful and will not hang because of the network being down. */ try_remount_ro = detect_container() <= 0 && !fstype_is_network(fstype) && - !fstype_is_api_vfs(fstype) && + !is_api_vfs && !fstype_is_ro(fstype) && !fstab_test_yes_no_option(options, "ro\0rw\0"); @@ -158,6 +160,10 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) { .remount_options = remount_options, .remount_flags = remount_flags, .try_remount_ro = try_remount_ro, + + /* Unmount sysfs/procfs/… lazily, since syncing doesn't matter there, and it's OK if + * something keeps an fd open to it. */ + .umount_lazily = is_api_vfs, }; m->path = strdup(path); @@ -634,23 +640,14 @@ static int umount_with_timeout(MountPoint *m, bool last_try) { * -EIO rather than blocking indefinitely. If the filesysten is "busy", this may allow * processes to die, thus making the filesystem less busy so the unmount might succeed * (rather than return EBUSY). */ - r = RET_NERRNO(umount2(m->path, MNT_FORCE)); + r = RET_NERRNO(umount2(m->path, + UMOUNT_NOFOLLOW | /* Don't follow symlinks: this should never happen unless our mount list was wrong */ + (m->umount_lazily ? MNT_DETACH : MNT_FORCE))); if (r < 0) { log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Failed to unmount %s: %m", m->path); if (r == -EBUSY && last_try) log_umount_blockers(m->path); - - /* If API filesystems under /oldroot cannot be unmounted we can still lazily unmount - * them to unblock /oldroot. They serve no function to us anymore and should be - * memory-only and hence safe to unmount like this. */ - if (in_initrd() && - PATH_STARTSWITH_SET(m->path, "/oldroot/dev", "/oldroot/proc", "/oldroot/sys")) { - log_info("Lazily unmounting '%s' instead.", m->path); - r = umount2(m->path, MNT_FORCE | MNT_DETACH); - if (r < 0) - log_error_errno(errno, "Failed to lazily unmount %s: %m", m->path); - } } _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS); diff --git a/src/shutdown/umount.h b/src/shutdown/umount.h index 618b754011..a4154c9099 100644 --- a/src/shutdown/umount.h +++ b/src/shutdown/umount.h @@ -18,7 +18,8 @@ typedef struct MountPoint { char *path; char *remount_options; unsigned long remount_flags; - bool try_remount_ro; + bool try_remount_ro:1; + bool umount_lazily:1; dev_t devnum; LIST_FIELDS(struct MountPoint, mount_point); } MountPoint; From 05768ae36b4af0c18b4837fa6041fd66a05f5e36 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 24 Aug 2022 11:55:14 +0200 Subject: [PATCH 5/5] shutdown: rework log_umount_blockers() a bit Let's go directly from opening /proc/ to opening /proc/$PID/fd/ instead of indirectly via opening /proc/$PID/ first. Saves a syscall. Also, add error logging about all unexpected errors. Finally, drop redundant denylist for /proc/, /sys/, /dev/ prefix checking, should be redundant, given the ealier check against the 'mnt' prefix. --- src/shutdown/umount.c | 78 +++++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/src/shutdown/umount.c b/src/shutdown/umount.c index 7ddeb67e97..e650b82170 100644 --- a/src/shutdown/umount.c +++ b/src/shutdown/umount.c @@ -529,11 +529,12 @@ static bool nonunmountable_path(const char *path) { } static void log_umount_blockers(const char *mnt) { + _cleanup_free_ char *blockers = NULL; + int r; + _cleanup_closedir_ DIR *dir = opendir("/proc"); if (!dir) - return (void) log_warning_errno(errno, "opendir(/proc) failed: %m"); - - _cleanup_free_ char *blockers = NULL; + return (void) log_warning_errno(errno, "Failed to open /proc/: %m"); FOREACH_DIRENT_ALL(de, dir, break) { if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN)) @@ -543,37 +544,50 @@ static void log_umount_blockers(const char *mnt) { if (parse_pid(de->d_name, &pid) < 0) continue; - _cleanup_closedir_ DIR *pid_dir = xopendirat(dirfd(dir), de->d_name, 0); - if (!pid_dir) + _cleanup_free_ char *fdp = path_join(de->d_name, "fd"); + if (!fdp) + return (void) log_oom(); + + _cleanup_closedir_ DIR *fd_dir = xopendirat(dirfd(dir), fdp, 0); + if (!fd_dir) { + if (errno != ENOENT) /* process gone by now? */ + log_debug_errno(errno, "Failed to open /proc/%s/, ignoring: %m",fdp); continue; - - _cleanup_closedir_ DIR *fd_dir = xopendirat(dirfd(pid_dir), "fd", 0); - if (!fd_dir) - continue; - - FOREACH_DIRENT(fd_de, fd_dir, break) { - _cleanup_free_ char *open_file = NULL, *comm = NULL; - - if (readlinkat_malloc(dirfd(fd_dir), fd_de->d_name, &open_file) < 0) - continue; - - if (!path_startswith(open_file, mnt)) - continue; - - if (PATH_STARTSWITH_SET(open_file, "/dev", "/sys", "/proc")) - continue; - - if (get_process_comm(pid, &comm) < 0) - continue; - - if (!strextend_with_separator(&blockers, ", ", comm)) - return (void) log_oom(); - - if (!strextend(&blockers, "(", de->d_name, ")")) - return (void) log_oom(); - - break; } + + bool culprit = false; + FOREACH_DIRENT(fd_de, fd_dir, break) { + _cleanup_free_ char *open_file = NULL; + + r = readlinkat_malloc(dirfd(fd_dir), fd_de->d_name, &open_file); + if (r < 0) { + if (r != -ENOENT) /* fd closed by now */ + log_debug_errno(r, "Failed to read link /proc/%s/%s, ignoring: %m", fdp, fd_de->d_name); + continue; + } + + if (path_startswith(open_file, mnt)) { + culprit = true; + break; + } + } + + if (!culprit) + continue; + + _cleanup_free_ char *comm = NULL; + r = get_process_comm(pid, &comm); + if (r < 0) { + if (r != -ESRCH) /* process gone by now */ + log_debug_errno(r, "Failed to read process name of PID " PID_FMT ": %m", pid); + continue; + } + + if (!strextend_with_separator(&blockers, ", ", comm)) + return (void) log_oom(); + + if (!strextend(&blockers, "(", de->d_name, ")")) + return (void) log_oom(); } if (blockers)