diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml
index eb411102bc..9bc4bb7c44 100644
--- a/man/systemd-nspawn.xml
+++ b/man/systemd-nspawn.xml
@@ -133,6 +133,75 @@
The following options are understood:
+
+
+
+
+
+ Turns off any status output by the tool
+ itself. When this switch is used, the only output from nspawn
+ will be the console output of the container OS
+ itself.
+
+
+
+ MODE
+
+ Controls whether
+ systemd-nspawn shall search for and use
+ additional per-container settings from
+ .nspawn files. Takes a boolean or the
+ special values or
+ .
+
+ If enabled (the default), a settings file named after the
+ machine (as specified with the
+ setting, or derived from the directory or image file name)
+ with the suffix .nspawn is searched in
+ /etc/systemd/nspawn/ and
+ /run/systemd/nspawn/. If it is found
+ there, its settings are read and used. If it is not found
+ there, it is subsequently searched in the same directory as the
+ image file or in the immediate parent of the root directory of
+ the container. In this case, if the file is found, its settings
+ will be also read and used, but potentially unsafe settings
+ are ignored. Note that in both these cases, settings on the
+ command line take precedence over the corresponding settings
+ from loaded .nspawn files, if both are
+ specified. Unsafe settings are considered all settings that
+ elevate the container's privileges or grant access to
+ additional resources such as files or directories of the
+ host. For details about the format and contents of
+ .nspawn files, consult
+ systemd.nspawn5.
+
+ If this option is set to , the
+ file is searched, read and used the same way, however, the order of
+ precedence is reversed: settings read from the
+ .nspawn file will take precedence over
+ the corresponding command line options, if both are
+ specified.
+
+ If this option is set to , the
+ file is searched, read and used the same way, but regardless
+ of being found in /etc/systemd/nspawn/,
+ /run/systemd/nspawn/ or next to the image
+ file or container root directory, all settings will take
+ effect, however, command line arguments still take precedence
+ over corresponding settings.
+
+ If disabled, no .nspawn file is read
+ and no settings except the ones on the command line are in
+ effect.
+
+
+
+
+
+ Image Options
+
+
+
@@ -247,6 +316,66 @@
from the OCI runtime JSON data (but data passed on the command line takes precedence).
+
+
+
+ Mount the container's root file system (and any other file systems container in the container
+ image) read-only. This has no effect on additional mounts made with ,
+ and similar options. This mode is implied if the container image file or directory is
+ marked read-only itself. It is also implied if is used. In this case the container
+ image on disk is strictly read-only, while changes are permitted but kept non-persistently in memory only. For
+ further details, see below.
+
+
+
+
+ MODE
+
+ Boots the container in volatile mode. When no mode parameter is passed or when mode is
+ specified as , full volatile mode is enabled. This means the root directory is mounted as a
+ mostly unpopulated tmpfs instance, and /usr/ from the OS tree is
+ mounted into it in read-only mode (the system thus starts up with read-only OS image, but pristine state and
+ configuration, any changes are lost on shutdown). When the mode parameter is specified as
+ , the OS tree is mounted read-only, but /var/ is mounted as a
+ writable tmpfs instance into it (the system thus starts up with read-only OS resources and
+ configuration, but pristine state, and any changes to the latter are lost on shutdown). When the mode parameter
+ is specified as the read-only root file system is combined with a writable
+ tmpfs instance through overlayfs, so that it appears at it normally
+ would, but any changes are applied to the temporary file system only and lost when the container is
+ terminated. When the mode parameter is specified as (the default), the whole OS tree is
+ made available writable (unless is specified, see above).
+
+ Note that if one of the volatile modes is chosen, its effect is limited to the root file system (or
+ /var/ in case of ), and any other mounts placed in the hierarchy are
+ unaffected — regardless if they are established automatically (e.g. the EFI system partition that might be
+ mounted to /efi/ or /boot/) or explicitly (e.g. through an additional
+ command line option such as , see below). This means, even if
+ is used changes to /efi/ or
+ /boot/ are prohibited in case such a partition exists in the container image operated on,
+ and even if is used the hypothetical file /etc/foobar is
+ potentially writable if if used to mount it from outside the read-only
+ container /etc directory.
+
+ The option is closely related to this setting, and provides similar
+ behaviour by making a temporary, ephemeral copy of the whole OS image and executing that. For further details,
+ see above.
+
+ The and options provide similar functionality, but
+ for specific sub-directories of the OS image only. For details, see below.
+
+ This option provides similar functionality for containers as the systemd.volatile=
+ kernel command line switch provides for host systems. See
+ kernel-command-line7 for
+ details.
+
+ Note that setting this option to or will only work correctly
+ with operating systems in the container that can boot up with only /usr mounted, and are
+ able to automatically populate /var, and also /etc in case of
+ --volatile=yes. The option does not require any particular
+ preparations in the OS, but do note that overlayfs behaviour differs from regular file
+ systems in a number of ways, and hence compatibility is limited.
+
+
@@ -262,6 +391,27 @@
used, also as formatted hexadecimal characters.
+
+
+
+ Pivot the specified directory to / inside the container, and either unmount the
+ container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the
+ specified path will be pivoted to / and the old root will be unmounted; or a colon-separated pair
+ of new root path and pivot destination for the old root. The new root path will be pivoted to /,
+ and the old / will be pivoted to the other directory. Both paths must be absolute, and are resolved
+ in the container's file system namespace.
+
+ This is for containers which have several bootable directories in them; for example, several
+ OSTree deployments. It emulates the behavior of
+ the boot loader and initial RAM disk which normally select which directory to mount as the root and start the
+ container's PID 1 in.
+
+
+
+
+ Execution Options
+
+
@@ -336,19 +486,14 @@
-
+
+
- Pivot the specified directory to / inside the container, and either unmount the
- container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the
- specified path will be pivoted to / and the old root will be unmounted; or a colon-separated pair
- of new root path and pivot destination for the old root. The new root path will be pivoted to /,
- and the old / will be pivoted to the other directory. Both paths must be absolute, and are resolved
- in the container's file system namespace.
-
- This is for containers which have several bootable directories in them; for example, several
- OSTree deployments. It emulates the behavior of
- the boot loader and initial RAM disk which normally select which directory to mount as the root and start the
- container's PID 1 in.
+ Specifies an environment variable assignment
+ to pass to the init process in the container, in the format
+ NAME=VALUE. This may be used to override
+ the default variables or to set additional variables. This
+ parameter may be used more than once.
@@ -362,6 +507,36 @@
destructive operations only.
+
+
+
+ Specify the process signal to send to the container's PID 1 when nspawn itself receives
+ SIGTERM, in order to trigger an orderly shutdown of the container. Defaults to
+ SIGRTMIN+3 if is used (on systemd-compatible init systems
+ SIGRTMIN+3 triggers an orderly shutdown). If is not used and this
+ option is not specified the container's processes are terminated abruptly via SIGKILL. For
+ a list of valid signals, see signal7.
+
+
+
+
+
+ Configures support for notifications from the container's init process.
+ takes a boolean ( and ).
+ With option systemd-nspawn notifies systemd
+ with a READY=1 message when the init process is created.
+ With option systemd-nspawn waits for the
+ READY=1 message from the init process in the container
+ before sending its own to systemd. For more details about notifications
+ see sd_notify3).
+
+
+
+
+ System Identity Options
+
+
@@ -406,7 +581,12 @@
/etc/machine-id in the container is
unpopulated.
+
+
+ Property Options
+
+
@@ -427,6 +607,42 @@
+
+
+
+ Controls whether the container is registered with
+ systemd-machined8. Takes a
+ boolean argument, which defaults to yes. This option should be enabled when the container
+ runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to
+ ensure that the container is accessible via
+ machinectl1 and shown by
+ tools such as ps1. If the container
+ does not run a service manager, it is recommended to set this option to
+ no.
+
+
+
+
+
+ Instead of creating a transient scope unit to run the container in, simply use the service or
+ scope unit systemd-nspawn has been invoked in. If is set
+ this unit is registered with
+ systemd-machined8. This
+ switch should be used if systemd-nspawn is invoked from within a service unit, and the
+ service unit's sole purpose is to run a single systemd-nspawn container. This option is not
+ available if run from a user session.
+ Note that passing disables the effect of and
+ . Use and in
+ combination to disable any kind of unit allocation or registration with
+ systemd-machined.
+
+
+
+
+ User Namespacing Options
+
+
@@ -519,6 +735,13 @@
+
+
+
+ Networking Options
+
+
+
@@ -535,23 +758,6 @@
-
-
-
- Takes the path to a file representing a kernel
- network namespace that the container shall run in. The specified path
- should refer to a (possibly bind-mounted) network namespace file, as
- exposed by the kernel below /proc/$PID/ns/net.
- This makes the container enter the given network namespace. One of the
- typical use cases is to give a network namespace under
- /run/netns created by ip-netns8,
- for example, .
- Note that this option cannot be used together with other
- network-related options, such as
- or .
-
-
@@ -689,6 +895,23 @@
+
+
+
+ Takes the path to a file representing a kernel
+ network namespace that the container shall run in. The specified path
+ should refer to a (possibly bind-mounted) network namespace file, as
+ exposed by the kernel below /proc/$PID/ns/net.
+ This makes the container enter the given network namespace. One of the
+ typical use cases is to give a network namespace under
+ /run/netns created by ip-netns8,
+ for example, .
+ Note that this option cannot be used together with other
+ network-related options, such as
+ or .
+
+
@@ -707,26 +930,12 @@
,
.
+
-
-
-
-
- Sets the SELinux security context to be used
- to label processes in the container.
-
-
-
-
-
-
-
- Sets the SELinux security context to be used
- to label files in the virtual API file systems in the
- container.
-
-
+
+ Security Options
+
@@ -780,6 +989,31 @@
capabilities are passed using the --capabilities=.
+
+
+
+
+ Sets the SELinux security context to be used
+ to label processes in the container.
+
+
+
+
+
+
+
+ Sets the SELinux security context to be used
+ to label files in the virtual API file systems in the
+ container.
+
+
+
+
+
+ Resource Options
+
+
+
@@ -825,58 +1059,23 @@
-
+
- Specify the process signal to send to the container's PID 1 when nspawn itself receives
- SIGTERM, in order to trigger an orderly shutdown of the container. Defaults to
- SIGRTMIN+3 if is used (on systemd-compatible init systems
- SIGRTMIN+3 triggers an orderly shutdown). If is not used and this
- option is not specified the container's processes are terminated abruptly via SIGKILL. For
- a list of valid signals, see signal7.
+ Control the architecture ("personality")
+ reported by
+ uname2
+ in the container. Currently, only x86 and
+ x86-64 are supported. This is useful when
+ running a 32-bit container on a 64-bit host. If this setting
+ is not used, the personality reported in the container is the
+ same as the one reported on the host.
+
-
-
-
- Control whether the container's journal shall
- be made visible to the host system. If enabled, allows viewing
- the container's journal files from the host (but not vice
- versa). Takes one of no,
- host, try-host,
- guest, try-guest,
- auto. If no, the journal
- is not linked. If host, the journal files
- are stored on the host file system (beneath
- /var/log/journal/machine-id)
- and the subdirectory is bind-mounted into the container at the
- same location. If guest, the journal files
- are stored on the guest file system (beneath
- /var/log/journal/machine-id)
- and the subdirectory is symlinked into the host at the same
- location. try-host and
- try-guest do the same but do not fail if
- the host does not have persistent journaling enabled. If
- auto (the default), and the right
- subdirectory of /var/log/journal exists,
- it will be bind mounted into the container. If the
- subdirectory does not exist, no linking is performed.
- Effectively, booting a container once with
- guest or host will link
- the journal persistently if further on the default of
- auto is used.
-
- Note that is the default if the
- systemd-nspawn@.service template unit file is used.
-
-
-
-
-
- Equivalent to
- .
-
+
+ Integration Options
+
@@ -926,16 +1125,53 @@
-
+
- Mount the container's root file system (and any other file systems container in the container
- image) read-only. This has no effect on additional mounts made with ,
- and similar options. This mode is implied if the container image file or directory is
- marked read-only itself. It is also implied if is used. In this case the container
- image on disk is strictly read-only, while changes are permitted but kept non-persistently in memory only. For
- further details, see below.
+ Control whether the container's journal shall
+ be made visible to the host system. If enabled, allows viewing
+ the container's journal files from the host (but not vice
+ versa). Takes one of no,
+ host, try-host,
+ guest, try-guest,
+ auto. If no, the journal
+ is not linked. If host, the journal files
+ are stored on the host file system (beneath
+ /var/log/journal/machine-id)
+ and the subdirectory is bind-mounted into the container at the
+ same location. If guest, the journal files
+ are stored on the guest file system (beneath
+ /var/log/journal/machine-id)
+ and the subdirectory is symlinked into the host at the same
+ location. try-host and
+ try-guest do the same but do not fail if
+ the host does not have persistent journaling enabled. If
+ auto (the default), and the right
+ subdirectory of /var/log/journal exists,
+ it will be bind mounted into the container. If the
+ subdirectory does not exist, no linking is performed.
+ Effectively, booting a container once with
+ guest or host will link
+ the journal persistently if further on the default of
+ auto is used.
+
+ Note that is the default if the
+ systemd-nspawn@.service template unit file is used.
+
+
+
+ Equivalent to
+ .
+
+
+
+
+
+ Mount Options
+
+
+
@@ -1044,65 +1280,15 @@
this switch.
Note that this option cannot be used to replace the root file system of the container with an overlay
- file system. However, the option described below provides similar functionality,
+ file system. However, the option described above provides similar functionality,
with a focus on implementing stateless operating system images.
+
-
-
-
-
- Specifies an environment variable assignment
- to pass to the init process in the container, in the format
- NAME=VALUE. This may be used to override
- the default variables or to set additional variables. This
- parameter may be used more than once.
-
-
-
-
-
- Controls whether the container is registered with
- systemd-machined8. Takes a
- boolean argument, which defaults to yes. This option should be enabled when the container
- runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to
- ensure that the container is accessible via
- machinectl1 and shown by
- tools such as ps1. If the container
- does not run a service manager, it is recommended to set this option to
- no.
-
-
-
-
-
- Instead of creating a transient scope unit to run the container in, simply use the service or
- scope unit systemd-nspawn has been invoked in. If is set
- this unit is registered with
- systemd-machined8. This
- switch should be used if systemd-nspawn is invoked from within a service unit, and the
- service unit's sole purpose is to run a single systemd-nspawn container. This option is not
- available if run from a user session.
- Note that passing disables the effect of and
- . Use and in
- combination to disable any kind of unit allocation or registration with
- systemd-machined.
-
-
-
-
-
- Control the architecture ("personality")
- reported by
- uname2
- in the container. Currently, only x86 and
- x86-64 are supported. This is useful when
- running a 32-bit container on a 64-bit host. If this setting
- is not used, the personality reported in the container is the
- same as the one reported on the host.
-
+
+ Input/Output Options
+ MODE
@@ -1139,133 +1325,10 @@
Equivalent to .
-
-
-
-
- Turns off any status output by the tool
- itself. When this switch is used, the only output from nspawn
- will be the console output of the container OS
- itself.
-
-
-
-
- MODE
-
- Boots the container in volatile mode. When no mode parameter is passed or when mode is
- specified as , full volatile mode is enabled. This means the root directory is mounted as a
- mostly unpopulated tmpfs instance, and /usr/ from the OS tree is
- mounted into it in read-only mode (the system thus starts up with read-only OS image, but pristine state and
- configuration, any changes are lost on shutdown). When the mode parameter is specified as
- , the OS tree is mounted read-only, but /var/ is mounted as a
- writable tmpfs instance into it (the system thus starts up with read-only OS resources and
- configuration, but pristine state, and any changes to the latter are lost on shutdown). When the mode parameter
- is specified as the read-only root file system is combined with a writable
- tmpfs instance through overlayfs, so that it appears at it normally
- would, but any changes are applied to the temporary file system only and lost when the container is
- terminated. When the mode parameter is specified as (the default), the whole OS tree is
- made available writable (unless is specified, see above).
-
- Note that if one of the volatile modes is chosen, its effect is limited to the root file system (or
- /var/ in case of ), and any other mounts placed in the hierarchy are
- unaffected — regardless if they are established automatically (e.g. the EFI system partition that might be
- mounted to /efi/ or /boot/) or explicitly (e.g. through an additional
- command line option such as , see above). This means, even if
- is used changes to /efi/ or
- /boot/ are prohibited in case such a partition exists in the container image operated on,
- and even if is used the hypothetical file /etc/foobar is
- potentially writable if if used to mount it from outside the read-only
- container /etc directory.
-
- The option is closely related to this setting, and provides similar
- behaviour by making a temporary, ephemeral copy of the whole OS image and executing that. For further details,
- see above.
-
- The and options provide similar functionality, but
- for specific sub-directories of the OS image only. For details, see above.
-
- This option provides similar functionality for containers as the systemd.volatile=
- kernel command line switch provides for host systems. See
- kernel-command-line7 for
- details.
-
- Note that setting this option to or will only work correctly
- with operating systems in the container that can boot up with only /usr mounted, and are
- able to automatically populate /var, and also /etc in case of
- --volatile=yes. The option does not require any particular
- preparations in the OS, but do note that overlayfs behaviour differs from regular file
- systems in a number of ways, and hence compatibility is limited.
-
-
-
- MODE
-
- Controls whether
- systemd-nspawn shall search for and use
- additional per-container settings from
- .nspawn files. Takes a boolean or the
- special values or
- .
-
- If enabled (the default), a settings file named after the
- machine (as specified with the
- setting, or derived from the directory or image file name)
- with the suffix .nspawn is searched in
- /etc/systemd/nspawn/ and
- /run/systemd/nspawn/. If it is found
- there, its settings are read and used. If it is not found
- there, it is subsequently searched in the same directory as the
- image file or in the immediate parent of the root directory of
- the container. In this case, if the file is found, its settings
- will be also read and used, but potentially unsafe settings
- are ignored. Note that in both these cases, settings on the
- command line take precedence over the corresponding settings
- from loaded .nspawn files, if both are
- specified. Unsafe settings are considered all settings that
- elevate the container's privileges or grant access to
- additional resources such as files or directories of the
- host. For details about the format and contents of
- .nspawn files, consult
- systemd.nspawn5.
-
- If this option is set to , the
- file is searched, read and used the same way, however, the order of
- precedence is reversed: settings read from the
- .nspawn file will take precedence over
- the corresponding command line options, if both are
- specified.
-
- If this option is set to , the
- file is searched, read and used the same way, but regardless
- of being found in /etc/systemd/nspawn/,
- /run/systemd/nspawn/ or next to the image
- file or container root directory, all settings will take
- effect, however, command line arguments still take precedence
- over corresponding settings.
-
- If disabled, no .nspawn file is read
- and no settings except the ones on the command line are in
- effect.
-
-
-
-
-
- Configures support for notifications from the container's init process.
- takes a boolean ( and ).
- With option systemd-nspawn notifies systemd
- with a READY=1 message when the init process is created.
- With option systemd-nspawn waits for the
- READY=1 message from the init process in the container
- before sending its own to systemd. For more details about notifications
- see sd_notify3).
-
-
-
+
diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c
index 99628f6260..0035352954 100644
--- a/src/basic/capability-util.c
+++ b/src/basic/capability-util.c
@@ -28,8 +28,8 @@ int have_effective_cap(int value) {
if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
return -errno;
- else
- return fv == CAP_SET;
+
+ return fv == CAP_SET;
}
unsigned long cap_last_cap(void) {
@@ -50,7 +50,7 @@ unsigned long cap_last_cap(void) {
if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps,
* then we are in trouble (since we, as much userspace and kernel space
- * store capability masks in uint64_t types. Let's hence protect
+ * store capability masks in uint64_t types). Let's hence protect
* ourselves against that and always cap at 63 for now. */
p = 63;
@@ -61,7 +61,7 @@ unsigned long cap_last_cap(void) {
}
/* fall back to syscall-probing for pre linux-3.2 */
- p = (unsigned long) CAP_LAST_CAP;
+ p = MIN((unsigned long) CAP_LAST_CAP, 63U);
if (prctl(PR_CAPBSET_READ, p) < 0) {
@@ -107,13 +107,13 @@ int capability_update_inherited_set(cap_t caps, uint64_t set) {
}
int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
- unsigned long i;
_cleanup_cap_free_ cap_t caps = NULL;
+ unsigned long i;
+ int r;
/* Add the capabilities to the ambient set. */
if (also_inherit) {
- int r;
caps = cap_get_proc();
if (!caps)
return -errno;
@@ -272,16 +272,12 @@ int capability_bounding_set_drop_usermode(uint64_t keep) {
}
int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
- _cleanup_cap_free_ cap_t d = NULL;
- unsigned i, j = 0;
int r;
- /* Unfortunately we cannot leave privilege dropping to PID 1
- * here, since we want to run as user but want to keep some
- * capabilities. Since file capabilities have been introduced
- * this cannot be done across exec() anymore, unless our
- * binary has the capability configured in the file system,
- * which we want to avoid. */
+ /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
+ * want to keep some capabilities. Since file capabilities have been introduced this cannot be done
+ * across exec() anymore, unless our binary has the capability configured in the file system, which
+ * we want to avoid. */
if (setresgid(gid, gid, gid) < 0)
return log_error_errno(errno, "Failed to change group ID: %m");
@@ -290,7 +286,9 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
if (r < 0)
return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
- /* Ensure we keep the permitted caps across the setresuid() */
+ /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
+ * don't want to keep any capabilities, since we want to be able to drop them from the bounding set
+ * too, and we can only do that if we have capabilities. */
if (prctl(PR_SET_KEEPCAPS, 1) < 0)
return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
@@ -300,18 +298,21 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
if (prctl(PR_SET_KEEPCAPS, 0) < 0)
return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
- /* Drop all caps from the bounding set, except the ones we want */
+ /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
+ * the ones we want to keep */
r = capability_bounding_set_drop(keep_capabilities, true);
if (r < 0)
return log_error_errno(r, "Failed to drop capabilities: %m");
/* Now upgrade the permitted caps we still kept to effective caps */
- d = cap_init();
- if (!d)
- return log_oom();
-
- if (keep_capabilities) {
+ if (keep_capabilities != 0) {
cap_value_t bits[u64log2(keep_capabilities) + 1];
+ _cleanup_cap_free_ cap_t d = NULL;
+ unsigned i, j = 0;
+
+ d = cap_init();
+ if (!d)
+ return log_oom();
for (i = 0; i < ELEMENTSOF(bits); i++)
if (keep_capabilities & (1ULL << i))
@@ -320,7 +321,7 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
/* use enough bits */
assert(i == 64 || (keep_capabilities >> i) == 0);
/* don't use too many bits */
- assert(keep_capabilities & (1ULL << (i - 1)));
+ assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
@@ -515,8 +516,8 @@ int capability_quintet_enforce(const CapabilityQuintet *q) {
/* Now, let's enforce the caps for the first time. Note that this is where we acquire
* caps in any of the sets we currently don't have. We have to do this before
- * droppoing the bounding caps below, since at that point we can never acquire new
- * caps in inherited/permitted/effective anymore, but only lose them.*/
+ * dropping the bounding caps below, since at that point we can never acquire new
+ * caps in inherited/permitted/effective anymore, but only lose them. */
if (cap_set_proc(modified ?: c) < 0)
return -errno;
}
diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c
index cf093cd0a2..a3447d948a 100644
--- a/src/nspawn/nspawn-mount.c
+++ b/src/nspawn/nspawn-mount.c
@@ -847,8 +847,10 @@ static int mount_inaccessible(const char *dest, CustomMount *m) {
return m->graceful ? 0 : r;
r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL);
- if (r < 0)
+ if (r < 0) {
+ umount_verbose(where);
return m->graceful ? 0 : r;
+ }
return 0;
}
diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c
index b2cf419484..0cb207528a 100644
--- a/src/nspawn/nspawn.c
+++ b/src/nspawn/nspawn.c
@@ -238,11 +238,13 @@ static int help(void) {
if (r < 0)
return log_oom();
- printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
+ printf("%1$s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n"
"Spawn a command or OS in a light-weight container.\n\n"
" -h --help Show this help\n"
" --version Print version string\n"
" -q --quiet Do not show status information\n"
+ " --settings=BOOLEAN Load additional settings from .nspawn file\n\n"
+ "%3$sImage:%4$s\n"
" -D --directory=PATH Root directory for the container\n"
" --template=PATH Initialize root directory from template directory,\n"
" if missing\n"
@@ -250,22 +252,35 @@ static int help(void) {
" remove it after exit\n"
" -i --image=PATH File system device or disk image for the container\n"
" --oci-bundle=PATH OCI bundle directory\n"
+ " --read-only Mount the root directory read-only\n"
+ " --volatile[=MODE] Run the system in volatile mode\n"
" --root-hash=HASH Specify verity root hash\n"
+ " --pivot-root=PATH[:PATH]\n"
+ " Pivot root to given directory in the container\n\n"
+ "%3$sExecution:%4$s\n"
" -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
" -b --boot Boot up full system (i.e. invoke init)\n"
" --chdir=PATH Set working directory in the container\n"
- " --pivot-root=PATH[:PATH]\n"
- " Pivot root to given directory in the container\n"
- " -u --user=USER Run the command under specified user or uid\n"
+ " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
+ " -u --user=USER Run the command under specified user or UID\n"
+ " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
+ " --notify-ready=BOOLEAN Receive notifications from the child init process\n\n"
+ "%3$sSystem Identity:%4$s\n"
" -M --machine=NAME Set the machine name for the container\n"
" --hostname=NAME Override the hostname for the container\n"
- " --uuid=UUID Set a specific machine UUID for the container\n"
+ " --uuid=UUID Set a specific machine UUID for the container\n\n"
+ "%3$sProperties:%4$s\n"
" -S --slice=SLICE Place the container in the specified slice\n"
" --property=NAME=VALUE Set scope unit property\n"
+ " --register=BOOLEAN Register container as machine\n"
+ " --keep-unit Do not register a scope for the machine, reuse\n"
+ " the service unit nspawn is running in\n\n"
+ "%3$sUser Namespacing:%4$s\n"
" -U --private-users=pick Run within user namespace, autoselect UID/GID range\n"
" --private-users[=UIDBASE[:NUIDS]]\n"
" Similar, but with user configured UID/GID range\n"
- " --private-users-chown Adjust OS tree ownership to private UID/GID range\n"
+ " --private-users-chown Adjust OS tree ownership to private UID/GID range\n\n"
+ "%3$sNetworking:%4$s\n"
" --private-network Disable network in container\n"
" --network-interface=INTERFACE\n"
" Assign an existing network interface to the\n"
@@ -290,29 +305,33 @@ static int help(void) {
" Set network namespace to the one represented by\n"
" the specified kernel namespace file node\n"
" -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n"
- " Expose a container IP port on the host\n"
+ " Expose a container IP port on the host\n\n"
+ "%3$sSecurity:%4$s\n"
+ " --capability=CAP In addition to the default, retain specified\n"
+ " capability\n"
+ " --drop-capability=CAP Drop the specified capability from the default set\n"
+ " --no-new-privileges Set PR_SET_NO_NEW_PRIVS flag for container payload\n"
+ " --system-call-filter=LIST|~LIST\n"
+ " Permit/prohibit specific system calls\n"
" -Z --selinux-context=SECLABEL\n"
" Set the SELinux security context to be used by\n"
" processes in the container\n"
" -L --selinux-apifs-context=SECLABEL\n"
" Set the SELinux security context to be used by\n"
- " API/tmpfs file systems in the container\n"
- " --capability=CAP In addition to the default, retain specified\n"
- " capability\n"
- " --drop-capability=CAP Drop the specified capability from the default set\n"
- " --system-call-filter=LIST|~LIST\n"
- " Permit/prohibit specific system calls\n"
+ " API/tmpfs file systems in the container\n\n"
+ "%3$sResources:%4$s\n"
" --rlimit=NAME=LIMIT Set a resource limit for the payload\n"
" --oom-score-adjust=VALUE\n"
" Adjust the OOM score value for the payload\n"
" --cpu-affinity=CPUS Adjust the CPU affinity of the container\n"
- " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n"
- " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
- " host, try-guest, try-host\n"
- " -j Equivalent to --link-journal=try-guest\n"
+ " --personality=ARCH Pick personality for this container\n\n"
+ "%3$sIntegration:%4$s\n"
" --resolv-conf=MODE Select mode of /etc/resolv.conf initialization\n"
" --timezone=MODE Select mode of /etc/localtime initialization\n"
- " --read-only Mount the root directory read-only\n"
+ " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n"
+ " host, try-guest, try-host\n"
+ " -j Equivalent to --link-journal=try-guest\n\n"
+ "%3$sMounts:%4$s\n"
" --bind=PATH[:PATH[:OPTIONS]]\n"
" Bind mount a file or directory from the host into\n"
" the container\n"
@@ -325,21 +344,15 @@ static int help(void) {
" Create an overlay mount from the host to \n"
" the container\n"
" --overlay-ro=PATH[:PATH...]:PATH\n"
- " Similar, but creates a read-only overlay mount\n"
- " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n"
- " --register=BOOLEAN Register container as machine\n"
- " --keep-unit Do not register a scope for the machine, reuse\n"
- " the service unit nspawn is running in\n"
- " --volatile[=MODE] Run the system in volatile mode\n"
- " --settings=BOOLEAN Load additional settings from .nspawn file\n"
- " --notify-ready=BOOLEAN Receive notifications from the child init process\n"
+ " Similar, but creates a read-only overlay mount\n\n"
+ "%3$sInput/Output:%4$s\n"
" --console=MODE Select how stdin/stdout/stderr and /dev/console are\n"
" set up for the container.\n"
" -P --pipe Equivalent to --console=pipe\n"
- "\nSee the %s for details.\n"
+ "\nSee the %2$s for details.\n"
, program_invocation_short_name
, link
- );
+ , ansi_underline(), ansi_normal());
return 0;
}
diff --git a/src/test/test-capability.c b/src/test/test-capability.c
index 325d7c8cc2..6b2de66bb7 100644
--- a/src/test/test-capability.c
+++ b/src/test/test-capability.c
@@ -215,11 +215,32 @@ static void test_set_ambient_caps(void) {
assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 1);
}
+static void test_ensure_cap_64bit(void) {
+ _cleanup_free_ char *content = NULL;
+ unsigned long p = 0;
+ int r;
+
+ r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
+ if (r == -ENOENT) /* kernel pre 3.2 */
+ return;
+ assert_se(r >= 0);
+
+ assert_se(safe_atolu(content, &p) >= 0);
+
+ /* If caps don't fit into 64bit anymore, we have a problem, fail the test. */
+ assert_se(p <= 63);
+
+ /* Also check for the header definition */
+ assert_se(CAP_LAST_CAP <= 63);
+}
+
int main(int argc, char *argv[]) {
bool run_ambient;
test_setup_logging(LOG_INFO);
+ test_ensure_cap_64bit();
+
test_last_cap_file();
test_last_cap_probe();