mirror of
https://github.com/Dasharo/systemd.git
synced 2026-03-06 15:02:31 -08:00
Merge pull request #24425 from poettering/shutdown-lazily
shutdown: lazily umount all API vfs
This commit is contained in:
@@ -76,16 +76,15 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
|
||||
return log_error_errno(r, "Failed to parse %s: %m", mountinfo ?: "/proc/self/mountinfo");
|
||||
|
||||
for (;;) {
|
||||
_cleanup_free_ char *options = NULL, *remount_options = NULL;
|
||||
struct libmnt_fs *fs;
|
||||
const char *path, *fstype;
|
||||
_cleanup_free_ char *options = NULL;
|
||||
unsigned long remount_flags = 0u;
|
||||
_cleanup_free_ char *remount_options = NULL;
|
||||
bool try_remount_ro;
|
||||
bool try_remount_ro, is_api_vfs;
|
||||
_cleanup_free_ MountPoint *m = NULL;
|
||||
|
||||
r = mnt_table_next_fs(table, iter, &fs);
|
||||
if (r == 1)
|
||||
if (r == 1) /* EOF */
|
||||
break;
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get next entry from %s: %m", mountinfo ?: "/proc/self/mountinfo");
|
||||
@@ -96,52 +95,45 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
|
||||
|
||||
fstype = mnt_fs_get_fstype(fs);
|
||||
|
||||
/* Combine the generic VFS options with the FS-specific
|
||||
* options. Duplicates are not a problem here, because the only
|
||||
* options that should come up twice are typically ro/rw, which
|
||||
* are turned into MS_RDONLY or the inversion of it.
|
||||
/* Combine the generic VFS options with the FS-specific options. Duplicates are not a problem
|
||||
* here, because the only options that should come up twice are typically ro/rw, which are
|
||||
* turned into MS_RDONLY or the inversion of it.
|
||||
*
|
||||
* Even if there are duplicates later in mount_option_mangle()
|
||||
* they shouldn't hurt anyways as they override each other.
|
||||
*/
|
||||
* Even if there are duplicates later in mount_option_mangle() they shouldn't hurt anyways as
|
||||
* they override each other. */
|
||||
if (!strextend_with_separator(&options, ",", mnt_fs_get_vfs_options(fs)))
|
||||
return log_oom();
|
||||
if (!strextend_with_separator(&options, ",", mnt_fs_get_fs_options(fs)))
|
||||
return log_oom();
|
||||
|
||||
/* Ignore mount points we can't unmount because they
|
||||
* are API or because we are keeping them open (like
|
||||
* /dev/console). Also, ignore all mounts below API
|
||||
* file systems, since they are likely virtual too,
|
||||
* and hence not worth spending time on. Also, in
|
||||
* unprivileged containers we might lack the rights to
|
||||
* unmount these things, hence don't bother. */
|
||||
/* Ignore mount points we can't unmount because they are API or because we are keeping them
|
||||
* open (like /dev/console). Also, ignore all mounts below API file systems, since they are
|
||||
* likely virtual too, and hence not worth spending time on. Also, in unprivileged containers
|
||||
* we might lack the rights to unmount these things, hence don't bother. */
|
||||
if (mount_point_is_api(path) ||
|
||||
mount_point_ignore(path) ||
|
||||
PATH_STARTSWITH_SET(path, "/dev", "/sys", "/proc"))
|
||||
continue;
|
||||
|
||||
/* If we are in a container, don't attempt to
|
||||
* read-only mount anything as that brings no real
|
||||
* benefits, but might confuse the host, as we remount
|
||||
* the superblock here, not the bind mount.
|
||||
is_api_vfs = fstype_is_api_vfs(fstype);
|
||||
|
||||
/* If we are in a container, don't attempt to read-only mount anything as that brings no real
|
||||
* benefits, but might confuse the host, as we remount the superblock here, not the bind
|
||||
* mount.
|
||||
*
|
||||
* If the filesystem is a network fs, also skip the
|
||||
* remount. It brings no value (we cannot leave
|
||||
* a "dirty fs") and could hang if the network is down.
|
||||
* Note that umount2() is more careful and will not
|
||||
* hang because of the network being down. */
|
||||
* If the filesystem is a network fs, also skip the remount. It brings no value (we cannot
|
||||
* leave a "dirty fs") and could hang if the network is down. Note that umount2() is more
|
||||
* careful and will not hang because of the network being down. */
|
||||
try_remount_ro = detect_container() <= 0 &&
|
||||
!fstype_is_network(fstype) &&
|
||||
!fstype_is_api_vfs(fstype) &&
|
||||
!is_api_vfs &&
|
||||
!fstype_is_ro(fstype) &&
|
||||
!fstab_test_yes_no_option(options, "ro\0rw\0");
|
||||
|
||||
if (try_remount_ro) {
|
||||
/* mount(2) states that mount flags and options need to be exactly the same
|
||||
* as they were when the filesystem was mounted, except for the desired
|
||||
* changes. So we reconstruct both here and adjust them for the later
|
||||
* remount call too. */
|
||||
/* mount(2) states that mount flags and options need to be exactly the same as they
|
||||
* were when the filesystem was mounted, except for the desired changes. So we
|
||||
* reconstruct both here and adjust them for the later remount call too. */
|
||||
|
||||
r = mnt_fs_get_propagation(fs, &remount_flags);
|
||||
if (r < 0) {
|
||||
@@ -160,17 +152,25 @@ int mount_points_list_get(const char *mountinfo, MountPoint **head) {
|
||||
remount_flags = (remount_flags|MS_REMOUNT|MS_RDONLY) & ~MS_BIND;
|
||||
}
|
||||
|
||||
m = new0(MountPoint, 1);
|
||||
m = new(MountPoint, 1);
|
||||
if (!m)
|
||||
return log_oom();
|
||||
|
||||
*m = (MountPoint) {
|
||||
.remount_options = remount_options,
|
||||
.remount_flags = remount_flags,
|
||||
.try_remount_ro = try_remount_ro,
|
||||
|
||||
/* Unmount sysfs/procfs/… lazily, since syncing doesn't matter there, and it's OK if
|
||||
* something keeps an fd open to it. */
|
||||
.umount_lazily = is_api_vfs,
|
||||
};
|
||||
|
||||
m->path = strdup(path);
|
||||
if (!m->path)
|
||||
return log_oom();
|
||||
|
||||
m->remount_options = TAKE_PTR(remount_options);
|
||||
m->remount_flags = remount_flags;
|
||||
m->try_remount_ro = try_remount_ro;
|
||||
TAKE_PTR(remount_options);
|
||||
|
||||
LIST_PREPEND(mount_point, *head, TAKE_PTR(m));
|
||||
}
|
||||
@@ -202,7 +202,7 @@ int swap_list_get(const char *swaps, MountPoint **head) {
|
||||
const char *source;
|
||||
|
||||
r = mnt_table_next_fs(t, i, &fs);
|
||||
if (r == 1)
|
||||
if (r == 1) /* EOF */
|
||||
break;
|
||||
if (r < 0)
|
||||
return log_error_errno(r, "Failed to get next entry from %s: %m", swaps ?: "/proc/swaps");
|
||||
@@ -377,8 +377,8 @@ static int md_list_get(MountPoint **head) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* MD "containers" are a special type of MD devices, used for external metadata.
|
||||
* Since it doesn't provide RAID functionality in itself we don't need to stop it. */
|
||||
/* MD "containers" are a special type of MD devices, used for external metadata. Since it
|
||||
* doesn't provide RAID functionality in itself we don't need to stop it. */
|
||||
if (streq(md_level, "container"))
|
||||
continue;
|
||||
|
||||
@@ -529,11 +529,12 @@ static bool nonunmountable_path(const char *path) {
|
||||
}
|
||||
|
||||
static void log_umount_blockers(const char *mnt) {
|
||||
_cleanup_free_ char *blockers = NULL;
|
||||
int r;
|
||||
|
||||
_cleanup_closedir_ DIR *dir = opendir("/proc");
|
||||
if (!dir)
|
||||
return (void) log_warning_errno(errno, "opendir(/proc) failed: %m");
|
||||
|
||||
_cleanup_free_ char *blockers = NULL;
|
||||
return (void) log_warning_errno(errno, "Failed to open /proc/: %m");
|
||||
|
||||
FOREACH_DIRENT_ALL(de, dir, break) {
|
||||
if (!IN_SET(de->d_type, DT_DIR, DT_UNKNOWN))
|
||||
@@ -543,37 +544,50 @@ static void log_umount_blockers(const char *mnt) {
|
||||
if (parse_pid(de->d_name, &pid) < 0)
|
||||
continue;
|
||||
|
||||
_cleanup_closedir_ DIR *pid_dir = xopendirat(dirfd(dir), de->d_name, 0);
|
||||
if (!pid_dir)
|
||||
_cleanup_free_ char *fdp = path_join(de->d_name, "fd");
|
||||
if (!fdp)
|
||||
return (void) log_oom();
|
||||
|
||||
_cleanup_closedir_ DIR *fd_dir = xopendirat(dirfd(dir), fdp, 0);
|
||||
if (!fd_dir) {
|
||||
if (errno != ENOENT) /* process gone by now? */
|
||||
log_debug_errno(errno, "Failed to open /proc/%s/, ignoring: %m",fdp);
|
||||
continue;
|
||||
|
||||
_cleanup_closedir_ DIR *fd_dir = xopendirat(dirfd(pid_dir), "fd", 0);
|
||||
if (!fd_dir)
|
||||
continue;
|
||||
|
||||
FOREACH_DIRENT(fd_de, fd_dir, break) {
|
||||
_cleanup_free_ char *open_file = NULL, *comm = NULL;
|
||||
|
||||
if (readlinkat_malloc(dirfd(fd_dir), fd_de->d_name, &open_file) < 0)
|
||||
continue;
|
||||
|
||||
if (!path_startswith(open_file, mnt))
|
||||
continue;
|
||||
|
||||
if (PATH_STARTSWITH_SET(open_file, "/dev", "/sys", "/proc"))
|
||||
continue;
|
||||
|
||||
if (get_process_comm(pid, &comm) < 0)
|
||||
continue;
|
||||
|
||||
if (!strextend_with_separator(&blockers, ", ", comm))
|
||||
return (void) log_oom();
|
||||
|
||||
if (!strextend(&blockers, "(", de->d_name, ")"))
|
||||
return (void) log_oom();
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
bool culprit = false;
|
||||
FOREACH_DIRENT(fd_de, fd_dir, break) {
|
||||
_cleanup_free_ char *open_file = NULL;
|
||||
|
||||
r = readlinkat_malloc(dirfd(fd_dir), fd_de->d_name, &open_file);
|
||||
if (r < 0) {
|
||||
if (r != -ENOENT) /* fd closed by now */
|
||||
log_debug_errno(r, "Failed to read link /proc/%s/%s, ignoring: %m", fdp, fd_de->d_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (path_startswith(open_file, mnt)) {
|
||||
culprit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!culprit)
|
||||
continue;
|
||||
|
||||
_cleanup_free_ char *comm = NULL;
|
||||
r = get_process_comm(pid, &comm);
|
||||
if (r < 0) {
|
||||
if (r != -ESRCH) /* process gone by now */
|
||||
log_debug_errno(r, "Failed to read process name of PID " PID_FMT ": %m", pid);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!strextend_with_separator(&blockers, ", ", comm))
|
||||
return (void) log_oom();
|
||||
|
||||
if (!strextend(&blockers, "(", de->d_name, ")"))
|
||||
return (void) log_oom();
|
||||
}
|
||||
|
||||
if (blockers)
|
||||
@@ -635,30 +649,19 @@ static int umount_with_timeout(MountPoint *m, bool last_try) {
|
||||
if (r == 0) {
|
||||
log_info("Unmounting '%s'.", m->path);
|
||||
|
||||
/* Start the mount operation here in the child Using MNT_FORCE
|
||||
* causes some filesystems (e.g. FUSE and NFS and other network
|
||||
* filesystems) to abort any pending requests and return -EIO
|
||||
* rather than blocking indefinitely. If the filesysten is
|
||||
* "busy", this may allow processes to die, thus making the
|
||||
* filesystem less busy so the unmount might succeed (rather
|
||||
* than return EBUSY). */
|
||||
r = RET_NERRNO(umount2(m->path, MNT_FORCE));
|
||||
/* Start the mount operation here in the child Using MNT_FORCE causes some filesystems
|
||||
* (e.g. FUSE and NFS and other network filesystems) to abort any pending requests and return
|
||||
* -EIO rather than blocking indefinitely. If the filesysten is "busy", this may allow
|
||||
* processes to die, thus making the filesystem less busy so the unmount might succeed
|
||||
* (rather than return EBUSY). */
|
||||
r = RET_NERRNO(umount2(m->path,
|
||||
UMOUNT_NOFOLLOW | /* Don't follow symlinks: this should never happen unless our mount list was wrong */
|
||||
(m->umount_lazily ? MNT_DETACH : MNT_FORCE)));
|
||||
if (r < 0) {
|
||||
log_full_errno(last_try ? LOG_ERR : LOG_INFO, r, "Failed to unmount %s: %m", m->path);
|
||||
|
||||
if (r == -EBUSY && last_try)
|
||||
log_umount_blockers(m->path);
|
||||
|
||||
/* If API filesystems under /oldroot cannot be unmounted we can still lazily unmount
|
||||
* them to unblock /oldroot. They serve no function to us anymore and should be
|
||||
* memory-only and hence safe to unmount like this. */
|
||||
if (in_initrd() &&
|
||||
PATH_STARTSWITH_SET(m->path, "/oldroot/dev", "/oldroot/proc", "/oldroot/sys")) {
|
||||
log_info("Lazily unmounting '%s' instead.", m->path);
|
||||
r = umount2(m->path, MNT_FORCE | MNT_DETACH);
|
||||
if (r < 0)
|
||||
log_error_errno(errno, "Failed to lazily unmount %s: %m", m->path);
|
||||
}
|
||||
}
|
||||
|
||||
_exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
|
||||
@@ -855,9 +858,8 @@ int umount_all(bool *changed, bool last_try) {
|
||||
|
||||
assert(changed);
|
||||
|
||||
/* Retry umount, until nothing can be umounted anymore. Mounts are
|
||||
* processed in order, newest first. The retries are needed when
|
||||
* an old mount has been moved, to a path inside a newer mount. */
|
||||
/* Retry umount, until nothing can be umounted anymore. Mounts are processed in order, newest
|
||||
* first. The retries are needed when an old mount has been moved, to a path inside a newer mount. */
|
||||
do {
|
||||
umount_changed = false;
|
||||
|
||||
|
||||
@@ -18,7 +18,8 @@ typedef struct MountPoint {
|
||||
char *path;
|
||||
char *remount_options;
|
||||
unsigned long remount_flags;
|
||||
bool try_remount_ro;
|
||||
bool try_remount_ro:1;
|
||||
bool umount_lazily:1;
|
||||
dev_t devnum;
|
||||
LIST_FIELDS(struct MountPoint, mount_point);
|
||||
} MountPoint;
|
||||
|
||||
Reference in New Issue
Block a user