diff --git a/src/core/mount-setup.c b/src/core/mount-setup.c index 7ef5b127ad..5dfcb6158a 100644 --- a/src/core/mount-setup.c +++ b/src/core/mount-setup.c @@ -536,7 +536,7 @@ int mount_setup(bool loaded_policy) { /* Also create /run/systemd/inaccessible nodes, so that we always have something to mount inaccessible nodes * from. */ - (void) make_inaccessible_nodes(NULL, UID_INVALID, GID_INVALID); + (void) make_inaccessible_nodes("/run/systemd", UID_INVALID, GID_INVALID); return 0; } diff --git a/src/core/namespace.c b/src/core/namespace.c index 104e96193d..fee4c98096 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -12,6 +12,7 @@ #include "base-filesystem.h" #include "dev-setup.h" #include "fd-util.h" +#include "format-util.h" #include "fs-util.h" #include "label.h" #include "loop-util.h" @@ -905,6 +906,7 @@ static int apply_mount( const char *root_directory, MountEntry *m) { + _cleanup_free_ char *inaccessible = NULL; bool rbind = true, make = false; const char *what; int r; @@ -916,6 +918,8 @@ static int apply_mount( switch (m->mode) { case INACCESSIBLE: { + _cleanup_free_ char *tmp = NULL; + const char *runtime_dir; struct stat target; /* First, get rid of everything that is below if there @@ -930,10 +934,20 @@ static int apply_mount( return log_debug_errno(errno, "Failed to lstat() %s to determine what to mount over it: %m", mount_entry_path(m)); } - what = mode_to_inaccessible_node(target.st_mode); - if (!what) + if (geteuid() == 0) + runtime_dir = "/run/systemd"; + else { + if (asprintf(&tmp, "/run/user/"UID_FMT, geteuid()) < 0) + log_oom(); + + runtime_dir = tmp; + } + + r = mode_to_inaccessible_node(runtime_dir, target.st_mode, &inaccessible); + if (r < 0) return log_debug_errno(SYNTHETIC_ERRNO(ELOOP), "File type not supported for inaccessible mounts. Note that symlinks are not allowed"); + what = inaccessible; break; } diff --git a/src/login/user-runtime-dir.c b/src/login/user-runtime-dir.c index c8a5625605..1f98898b69 100644 --- a/src/login/user-runtime-dir.c +++ b/src/login/user-runtime-dir.c @@ -6,6 +6,7 @@ #include "sd-bus.h" #include "bus-error.h" +#include "dev-setup.h" #include "fs-util.h" #include "format-util.h" #include "label.h" @@ -91,6 +92,8 @@ static int user_mkdir_runtime_path( log_warning_errno(r, "Failed to fix label of \"%s\", ignoring: %m", runtime_path); } + /* Set up inaccessible nodes now so they're available if we decide to use them with user namespaces. */ + (void) make_inaccessible_nodes(runtime_path, uid, gid); return 0; fail: diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index 0fb83a4ff3..f423f62590 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -883,8 +883,7 @@ static int mount_overlay(const char *dest, CustomMount *m) { } static int mount_inaccessible(const char *dest, CustomMount *m) { - _cleanup_free_ char *where = NULL; - const char *source; + _cleanup_free_ char *where = NULL, *source = NULL; struct stat st; int r; @@ -897,7 +896,9 @@ static int mount_inaccessible(const char *dest, CustomMount *m) { return m->graceful ? 0 : r; } - assert_se(source = mode_to_inaccessible_node(st.st_mode)); + r = mode_to_inaccessible_node("/run/systemd", st.st_mode, &source); + if (r < 0) + return m->graceful ? 0 : r; r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, source, where, NULL, MS_BIND, NULL); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index 27ea592158..9113f6e323 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3252,6 +3252,7 @@ static int outer_child( int netns_fd) { _cleanup_close_ int fd = -1; + const char *p; pid_t pid; ssize_t l; int r; @@ -3447,7 +3448,9 @@ static int outer_child( return r; (void) dev_setup(directory, arg_uid_shift, arg_uid_shift); - (void) make_inaccessible_nodes(directory, arg_uid_shift, arg_uid_shift); + + p = prefix_roota(directory, "/run/systemd"); + (void) make_inaccessible_nodes(p, arg_uid_shift, arg_uid_shift); r = setup_pts(directory); if (r < 0) diff --git a/src/shared/dev-setup.c b/src/shared/dev-setup.c index 071ff7b30c..4bce8b167b 100644 --- a/src/shared/dev-setup.c +++ b/src/shared/dev-setup.c @@ -61,20 +61,20 @@ int make_inaccessible_nodes(const char *root, uid_t uid, gid_t gid) { const char *name; mode_t mode; } table[] = { - { "/run/systemd", S_IFDIR | 0755 }, - { "/run/systemd/inaccessible", S_IFDIR | 0000 }, - { "/run/systemd/inaccessible/reg", S_IFREG | 0000 }, - { "/run/systemd/inaccessible/dir", S_IFDIR | 0000 }, - { "/run/systemd/inaccessible/fifo", S_IFIFO | 0000 }, - { "/run/systemd/inaccessible/sock", S_IFSOCK | 0000 }, + { "", S_IFDIR | 0755 }, + { "/inaccessible", S_IFDIR | 0000 }, + { "/inaccessible/reg", S_IFREG | 0000 }, + { "/inaccessible/dir", S_IFDIR | 0000 }, + { "/inaccessible/fifo", S_IFIFO | 0000 }, + { "/inaccessible/sock", S_IFSOCK | 0000 }, /* The following two are likely to fail if we lack the privs for it (for example in an userns * environment, if CAP_SYS_MKNOD is missing, or if a device node policy prohibit major/minor of 0 * device nodes to be created). But that's entirely fine. Consumers of these files should carry - * fallback to use a different node then, for example /run/systemd/inaccessible/sock, which is close + * fallback to use a different node then, for example /inaccessible/sock, which is close * enough in behaviour and semantics for most uses. */ - { "/run/systemd/inaccessible/chr", S_IFCHR | 0000 }, - { "/run/systemd/inaccessible/blk", S_IFBLK | 0000 }, + { "/inaccessible/chr", S_IFCHR | 0000 }, + { "/inaccessible/blk", S_IFBLK | 0000 }, }; _cleanup_umask_ mode_t u; diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c index d316e81aa2..95d7ea9691 100644 --- a/src/shared/mount-util.c +++ b/src/shared/mount-util.c @@ -339,38 +339,72 @@ int repeat_unmount(const char *path, int flags) { } } -const char* mode_to_inaccessible_node(mode_t mode) { +int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest) { /* This function maps a node type to a corresponding inaccessible file node. These nodes are created during * early boot by PID 1. In some cases we lacked the privs to create the character and block devices (maybe * because we run in an userns environment, or miss CAP_SYS_MKNOD, or run with a devices policy that excludes * device nodes with major and minor of 0), but that's fine, in that case we use an AF_UNIX file node instead, * which is not the same, but close enough for most uses. And most importantly, the kernel allows bind mounts * from socket nodes to any non-directory file nodes, and that's the most important thing that matters. */ + _cleanup_free_ char *d = NULL; + const char *node = NULL; + char *tmp; + + assert(dest); switch(mode & S_IFMT) { case S_IFREG: - return "/run/systemd/inaccessible/reg"; + node = "/inaccessible/reg"; + break; case S_IFDIR: - return "/run/systemd/inaccessible/dir"; + node = "/inaccessible/dir"; + break; case S_IFCHR: - if (access("/run/systemd/inaccessible/chr", F_OK) == 0) - return "/run/systemd/inaccessible/chr"; - return "/run/systemd/inaccessible/sock"; + d = path_join(runtime_dir, "/inaccessible/chr"); + if (!d) + return log_oom(); + + if (access(d, F_OK) == 0) { + *dest = TAKE_PTR(d); + return 0; + } + + node = "/inaccessible/sock"; + break; case S_IFBLK: - if (access("/run/systemd/inaccessible/blk", F_OK) == 0) - return "/run/systemd/inaccessible/blk"; - return "/run/systemd/inaccessible/sock"; + d = path_join(runtime_dir, "/inaccessible/blk"); + if (!d) + return log_oom(); + + if (access(d, F_OK) == 0) { + *dest = TAKE_PTR(d); + return 0; + } + + node = "/inaccessible/sock"; + break; case S_IFIFO: - return "/run/systemd/inaccessible/fifo"; + node = "/inaccessible/fifo"; + break; case S_IFSOCK: - return "/run/systemd/inaccessible/sock"; + node = "/inaccessible/sock"; + break; } - return NULL; + + if (!node) + return -EINVAL; + + tmp = path_join(runtime_dir, node); + if (!tmp) + return log_oom(); + + *dest = tmp; + return 0; } #define FLAG(name) (flags & name ? STRINGIFY(name) "|" : "") diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h index 8649fca39b..9a8d073631 100644 --- a/src/shared/mount-util.h +++ b/src/shared/mount-util.h @@ -31,4 +31,4 @@ int mount_option_mangle( unsigned long *ret_mount_flags, char **ret_remaining_options); -const char* mode_to_inaccessible_node(mode_t mode); +int mode_to_inaccessible_node(const char *runtime_dir, mode_t mode, char **dest); diff --git a/src/test/test-dev-setup.c b/src/test/test-dev-setup.c index 9414ea6c3e..d991fe5200 100644 --- a/src/test/test-dev-setup.c +++ b/src/test/test-dev-setup.c @@ -20,7 +20,8 @@ int main(int argc, char *argv[]) { f = prefix_roota(p, "/run"); assert_se(mkdir(f, 0755) >= 0); - assert_se(make_inaccessible_nodes(p, 1, 1) >= 0); + f = prefix_roota(p, "/run/systemd"); + assert_se(make_inaccessible_nodes(f, 1, 1) >= 0); f = prefix_roota(p, "/run/systemd/inaccessible/reg"); assert_se(stat(f, &st) >= 0); diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh index 23904a3c3f..49d61c6a7f 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/test.sh @@ -11,6 +11,7 @@ test_setup() { eval $(udevadm info --export --query=env --name=${LOOPDEV}p2) setup_basic_environment + inst_binary stat mask_supporting_services diff --git a/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh b/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh index 3cabd78574..cd61dc95c9 100755 --- a/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh +++ b/test/TEST-43-PRIVATEUSER-UNPRIV/testsuite.sh @@ -46,6 +46,15 @@ runas nobody systemd-run --user --unit=test-protect-home-tmpfs \ -p PrivateUsers=yes -p ProtectHome=tmpfs \ -P test ! -e /home/nobody +# Confirm that home, /root, and /run/user are inaccessible under "yes" +runas nobody systemd-run --user --unit=test-protect-home-yes \ + -p PrivateUsers=yes -p ProtectHome=yes \ + -P bash -c ' + test "$(stat -c %a /home)" = "0" + test "$(stat -c %a /root)" = "0" + test "$(stat -c %a /run/user)" = "0" + ' + # Confirm we cannot change groups because we only have one mapping in the user # namespace (no CAP_SETGID in the parent namespace to write the additional # mapping of the user supplied group and thus cannot change groups to an