diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml index eb411102bc..9bc4bb7c44 100644 --- a/man/systemd-nspawn.xml +++ b/man/systemd-nspawn.xml @@ -133,6 +133,75 @@ The following options are understood: + + + + + + Turns off any status output by the tool + itself. When this switch is used, the only output from nspawn + will be the console output of the container OS + itself. + + + + MODE + + Controls whether + systemd-nspawn shall search for and use + additional per-container settings from + .nspawn files. Takes a boolean or the + special values or + . + + If enabled (the default), a settings file named after the + machine (as specified with the + setting, or derived from the directory or image file name) + with the suffix .nspawn is searched in + /etc/systemd/nspawn/ and + /run/systemd/nspawn/. If it is found + there, its settings are read and used. If it is not found + there, it is subsequently searched in the same directory as the + image file or in the immediate parent of the root directory of + the container. In this case, if the file is found, its settings + will be also read and used, but potentially unsafe settings + are ignored. Note that in both these cases, settings on the + command line take precedence over the corresponding settings + from loaded .nspawn files, if both are + specified. Unsafe settings are considered all settings that + elevate the container's privileges or grant access to + additional resources such as files or directories of the + host. For details about the format and contents of + .nspawn files, consult + systemd.nspawn5. + + If this option is set to , the + file is searched, read and used the same way, however, the order of + precedence is reversed: settings read from the + .nspawn file will take precedence over + the corresponding command line options, if both are + specified. + + If this option is set to , the + file is searched, read and used the same way, but regardless + of being found in /etc/systemd/nspawn/, + /run/systemd/nspawn/ or next to the image + file or container root directory, all settings will take + effect, however, command line arguments still take precedence + over corresponding settings. + + If disabled, no .nspawn file is read + and no settings except the ones on the command line are in + effect. + + + + + + Image Options + + + @@ -247,6 +316,66 @@ from the OCI runtime JSON data (but data passed on the command line takes precedence). + + + + Mount the container's root file system (and any other file systems container in the container + image) read-only. This has no effect on additional mounts made with , + and similar options. This mode is implied if the container image file or directory is + marked read-only itself. It is also implied if is used. In this case the container + image on disk is strictly read-only, while changes are permitted but kept non-persistently in memory only. For + further details, see below. + + + + + MODE + + Boots the container in volatile mode. When no mode parameter is passed or when mode is + specified as , full volatile mode is enabled. This means the root directory is mounted as a + mostly unpopulated tmpfs instance, and /usr/ from the OS tree is + mounted into it in read-only mode (the system thus starts up with read-only OS image, but pristine state and + configuration, any changes are lost on shutdown). When the mode parameter is specified as + , the OS tree is mounted read-only, but /var/ is mounted as a + writable tmpfs instance into it (the system thus starts up with read-only OS resources and + configuration, but pristine state, and any changes to the latter are lost on shutdown). When the mode parameter + is specified as the read-only root file system is combined with a writable + tmpfs instance through overlayfs, so that it appears at it normally + would, but any changes are applied to the temporary file system only and lost when the container is + terminated. When the mode parameter is specified as (the default), the whole OS tree is + made available writable (unless is specified, see above). + + Note that if one of the volatile modes is chosen, its effect is limited to the root file system (or + /var/ in case of ), and any other mounts placed in the hierarchy are + unaffected — regardless if they are established automatically (e.g. the EFI system partition that might be + mounted to /efi/ or /boot/) or explicitly (e.g. through an additional + command line option such as , see below). This means, even if + is used changes to /efi/ or + /boot/ are prohibited in case such a partition exists in the container image operated on, + and even if is used the hypothetical file /etc/foobar is + potentially writable if if used to mount it from outside the read-only + container /etc directory. + + The option is closely related to this setting, and provides similar + behaviour by making a temporary, ephemeral copy of the whole OS image and executing that. For further details, + see above. + + The and options provide similar functionality, but + for specific sub-directories of the OS image only. For details, see below. + + This option provides similar functionality for containers as the systemd.volatile= + kernel command line switch provides for host systems. See + kernel-command-line7 for + details. + + Note that setting this option to or will only work correctly + with operating systems in the container that can boot up with only /usr mounted, and are + able to automatically populate /var, and also /etc in case of + --volatile=yes. The option does not require any particular + preparations in the OS, but do note that overlayfs behaviour differs from regular file + systems in a number of ways, and hence compatibility is limited. + + @@ -262,6 +391,27 @@ used, also as formatted hexadecimal characters. + + + + Pivot the specified directory to / inside the container, and either unmount the + container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the + specified path will be pivoted to / and the old root will be unmounted; or a colon-separated pair + of new root path and pivot destination for the old root. The new root path will be pivoted to /, + and the old / will be pivoted to the other directory. Both paths must be absolute, and are resolved + in the container's file system namespace. + + This is for containers which have several bootable directories in them; for example, several + OSTree deployments. It emulates the behavior of + the boot loader and initial RAM disk which normally select which directory to mount as the root and start the + container's PID 1 in. + + + + + Execution Options + + @@ -336,19 +486,14 @@ - + + - Pivot the specified directory to / inside the container, and either unmount the - container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the - specified path will be pivoted to / and the old root will be unmounted; or a colon-separated pair - of new root path and pivot destination for the old root. The new root path will be pivoted to /, - and the old / will be pivoted to the other directory. Both paths must be absolute, and are resolved - in the container's file system namespace. - - This is for containers which have several bootable directories in them; for example, several - OSTree deployments. It emulates the behavior of - the boot loader and initial RAM disk which normally select which directory to mount as the root and start the - container's PID 1 in. + Specifies an environment variable assignment + to pass to the init process in the container, in the format + NAME=VALUE. This may be used to override + the default variables or to set additional variables. This + parameter may be used more than once. @@ -362,6 +507,36 @@ destructive operations only. + + + + Specify the process signal to send to the container's PID 1 when nspawn itself receives + SIGTERM, in order to trigger an orderly shutdown of the container. Defaults to + SIGRTMIN+3 if is used (on systemd-compatible init systems + SIGRTMIN+3 triggers an orderly shutdown). If is not used and this + option is not specified the container's processes are terminated abruptly via SIGKILL. For + a list of valid signals, see signal7. + + + + + + Configures support for notifications from the container's init process. + takes a boolean ( and ). + With option systemd-nspawn notifies systemd + with a READY=1 message when the init process is created. + With option systemd-nspawn waits for the + READY=1 message from the init process in the container + before sending its own to systemd. For more details about notifications + see sd_notify3). + + + + + System Identity Options + + @@ -406,7 +581,12 @@ /etc/machine-id in the container is unpopulated. + + + Property Options + + @@ -427,6 +607,42 @@ + + + + Controls whether the container is registered with + systemd-machined8. Takes a + boolean argument, which defaults to yes. This option should be enabled when the container + runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to + ensure that the container is accessible via + machinectl1 and shown by + tools such as ps1. If the container + does not run a service manager, it is recommended to set this option to + no. + + + + + + Instead of creating a transient scope unit to run the container in, simply use the service or + scope unit systemd-nspawn has been invoked in. If is set + this unit is registered with + systemd-machined8. This + switch should be used if systemd-nspawn is invoked from within a service unit, and the + service unit's sole purpose is to run a single systemd-nspawn container. This option is not + available if run from a user session. + Note that passing disables the effect of and + . Use and in + combination to disable any kind of unit allocation or registration with + systemd-machined. + + + + + User Namespacing Options + + @@ -519,6 +735,13 @@ + + + + Networking Options + + + @@ -535,23 +758,6 @@ - - - - Takes the path to a file representing a kernel - network namespace that the container shall run in. The specified path - should refer to a (possibly bind-mounted) network namespace file, as - exposed by the kernel below /proc/$PID/ns/net. - This makes the container enter the given network namespace. One of the - typical use cases is to give a network namespace under - /run/netns created by ip-netns8, - for example, . - Note that this option cannot be used together with other - network-related options, such as - or . - - @@ -689,6 +895,23 @@ + + + + Takes the path to a file representing a kernel + network namespace that the container shall run in. The specified path + should refer to a (possibly bind-mounted) network namespace file, as + exposed by the kernel below /proc/$PID/ns/net. + This makes the container enter the given network namespace. One of the + typical use cases is to give a network namespace under + /run/netns created by ip-netns8, + for example, . + Note that this option cannot be used together with other + network-related options, such as + or . + + @@ -707,26 +930,12 @@ , . + - - - - - Sets the SELinux security context to be used - to label processes in the container. - - - - - - - - Sets the SELinux security context to be used - to label files in the virtual API file systems in the - container. - - + + Security Options + @@ -780,6 +989,31 @@ capabilities are passed using the --capabilities=. + + + + + Sets the SELinux security context to be used + to label processes in the container. + + + + + + + + Sets the SELinux security context to be used + to label files in the virtual API file systems in the + container. + + + + + + Resource Options + + + @@ -825,58 +1059,23 @@ - + - Specify the process signal to send to the container's PID 1 when nspawn itself receives - SIGTERM, in order to trigger an orderly shutdown of the container. Defaults to - SIGRTMIN+3 if is used (on systemd-compatible init systems - SIGRTMIN+3 triggers an orderly shutdown). If is not used and this - option is not specified the container's processes are terminated abruptly via SIGKILL. For - a list of valid signals, see signal7. + Control the architecture ("personality") + reported by + uname2 + in the container. Currently, only x86 and + x86-64 are supported. This is useful when + running a 32-bit container on a 64-bit host. If this setting + is not used, the personality reported in the container is the + same as the one reported on the host. + - - - - Control whether the container's journal shall - be made visible to the host system. If enabled, allows viewing - the container's journal files from the host (but not vice - versa). Takes one of no, - host, try-host, - guest, try-guest, - auto. If no, the journal - is not linked. If host, the journal files - are stored on the host file system (beneath - /var/log/journal/machine-id) - and the subdirectory is bind-mounted into the container at the - same location. If guest, the journal files - are stored on the guest file system (beneath - /var/log/journal/machine-id) - and the subdirectory is symlinked into the host at the same - location. try-host and - try-guest do the same but do not fail if - the host does not have persistent journaling enabled. If - auto (the default), and the right - subdirectory of /var/log/journal exists, - it will be bind mounted into the container. If the - subdirectory does not exist, no linking is performed. - Effectively, booting a container once with - guest or host will link - the journal persistently if further on the default of - auto is used. - - Note that is the default if the - systemd-nspawn@.service template unit file is used. - - - - - - Equivalent to - . - + + Integration Options + @@ -926,16 +1125,53 @@ - + - Mount the container's root file system (and any other file systems container in the container - image) read-only. This has no effect on additional mounts made with , - and similar options. This mode is implied if the container image file or directory is - marked read-only itself. It is also implied if is used. In this case the container - image on disk is strictly read-only, while changes are permitted but kept non-persistently in memory only. For - further details, see below. + Control whether the container's journal shall + be made visible to the host system. If enabled, allows viewing + the container's journal files from the host (but not vice + versa). Takes one of no, + host, try-host, + guest, try-guest, + auto. If no, the journal + is not linked. If host, the journal files + are stored on the host file system (beneath + /var/log/journal/machine-id) + and the subdirectory is bind-mounted into the container at the + same location. If guest, the journal files + are stored on the guest file system (beneath + /var/log/journal/machine-id) + and the subdirectory is symlinked into the host at the same + location. try-host and + try-guest do the same but do not fail if + the host does not have persistent journaling enabled. If + auto (the default), and the right + subdirectory of /var/log/journal exists, + it will be bind mounted into the container. If the + subdirectory does not exist, no linking is performed. + Effectively, booting a container once with + guest or host will link + the journal persistently if further on the default of + auto is used. + + Note that is the default if the + systemd-nspawn@.service template unit file is used. + + + + Equivalent to + . + + + + + + Mount Options + + + @@ -1044,65 +1280,15 @@ this switch. Note that this option cannot be used to replace the root file system of the container with an overlay - file system. However, the option described below provides similar functionality, + file system. However, the option described above provides similar functionality, with a focus on implementing stateless operating system images. + - - - - - Specifies an environment variable assignment - to pass to the init process in the container, in the format - NAME=VALUE. This may be used to override - the default variables or to set additional variables. This - parameter may be used more than once. - - - - - - Controls whether the container is registered with - systemd-machined8. Takes a - boolean argument, which defaults to yes. This option should be enabled when the container - runs a full Operating System (more specifically: a system and service manager as PID 1), and is useful to - ensure that the container is accessible via - machinectl1 and shown by - tools such as ps1. If the container - does not run a service manager, it is recommended to set this option to - no. - - - - - - Instead of creating a transient scope unit to run the container in, simply use the service or - scope unit systemd-nspawn has been invoked in. If is set - this unit is registered with - systemd-machined8. This - switch should be used if systemd-nspawn is invoked from within a service unit, and the - service unit's sole purpose is to run a single systemd-nspawn container. This option is not - available if run from a user session. - Note that passing disables the effect of and - . Use and in - combination to disable any kind of unit allocation or registration with - systemd-machined. - - - - - - Control the architecture ("personality") - reported by - uname2 - in the container. Currently, only x86 and - x86-64 are supported. This is useful when - running a 32-bit container on a 64-bit host. If this setting - is not used, the personality reported in the container is the - same as the one reported on the host. - + + Input/Output Options + MODE @@ -1139,133 +1325,10 @@ Equivalent to . - - - - - Turns off any status output by the tool - itself. When this switch is used, the only output from nspawn - will be the console output of the container OS - itself. - - - - - MODE - - Boots the container in volatile mode. When no mode parameter is passed or when mode is - specified as , full volatile mode is enabled. This means the root directory is mounted as a - mostly unpopulated tmpfs instance, and /usr/ from the OS tree is - mounted into it in read-only mode (the system thus starts up with read-only OS image, but pristine state and - configuration, any changes are lost on shutdown). When the mode parameter is specified as - , the OS tree is mounted read-only, but /var/ is mounted as a - writable tmpfs instance into it (the system thus starts up with read-only OS resources and - configuration, but pristine state, and any changes to the latter are lost on shutdown). When the mode parameter - is specified as the read-only root file system is combined with a writable - tmpfs instance through overlayfs, so that it appears at it normally - would, but any changes are applied to the temporary file system only and lost when the container is - terminated. When the mode parameter is specified as (the default), the whole OS tree is - made available writable (unless is specified, see above). - - Note that if one of the volatile modes is chosen, its effect is limited to the root file system (or - /var/ in case of ), and any other mounts placed in the hierarchy are - unaffected — regardless if they are established automatically (e.g. the EFI system partition that might be - mounted to /efi/ or /boot/) or explicitly (e.g. through an additional - command line option such as , see above). This means, even if - is used changes to /efi/ or - /boot/ are prohibited in case such a partition exists in the container image operated on, - and even if is used the hypothetical file /etc/foobar is - potentially writable if if used to mount it from outside the read-only - container /etc directory. - - The option is closely related to this setting, and provides similar - behaviour by making a temporary, ephemeral copy of the whole OS image and executing that. For further details, - see above. - - The and options provide similar functionality, but - for specific sub-directories of the OS image only. For details, see above. - - This option provides similar functionality for containers as the systemd.volatile= - kernel command line switch provides for host systems. See - kernel-command-line7 for - details. - - Note that setting this option to or will only work correctly - with operating systems in the container that can boot up with only /usr mounted, and are - able to automatically populate /var, and also /etc in case of - --volatile=yes. The option does not require any particular - preparations in the OS, but do note that overlayfs behaviour differs from regular file - systems in a number of ways, and hence compatibility is limited. - - - - MODE - - Controls whether - systemd-nspawn shall search for and use - additional per-container settings from - .nspawn files. Takes a boolean or the - special values or - . - - If enabled (the default), a settings file named after the - machine (as specified with the - setting, or derived from the directory or image file name) - with the suffix .nspawn is searched in - /etc/systemd/nspawn/ and - /run/systemd/nspawn/. If it is found - there, its settings are read and used. If it is not found - there, it is subsequently searched in the same directory as the - image file or in the immediate parent of the root directory of - the container. In this case, if the file is found, its settings - will be also read and used, but potentially unsafe settings - are ignored. Note that in both these cases, settings on the - command line take precedence over the corresponding settings - from loaded .nspawn files, if both are - specified. Unsafe settings are considered all settings that - elevate the container's privileges or grant access to - additional resources such as files or directories of the - host. For details about the format and contents of - .nspawn files, consult - systemd.nspawn5. - - If this option is set to , the - file is searched, read and used the same way, however, the order of - precedence is reversed: settings read from the - .nspawn file will take precedence over - the corresponding command line options, if both are - specified. - - If this option is set to , the - file is searched, read and used the same way, but regardless - of being found in /etc/systemd/nspawn/, - /run/systemd/nspawn/ or next to the image - file or container root directory, all settings will take - effect, however, command line arguments still take precedence - over corresponding settings. - - If disabled, no .nspawn file is read - and no settings except the ones on the command line are in - effect. - - - - - - Configures support for notifications from the container's init process. - takes a boolean ( and ). - With option systemd-nspawn notifies systemd - with a READY=1 message when the init process is created. - With option systemd-nspawn waits for the - READY=1 message from the init process in the container - before sending its own to systemd. For more details about notifications - see sd_notify3). - - - + diff --git a/src/basic/capability-util.c b/src/basic/capability-util.c index 99628f6260..0035352954 100644 --- a/src/basic/capability-util.c +++ b/src/basic/capability-util.c @@ -28,8 +28,8 @@ int have_effective_cap(int value) { if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0) return -errno; - else - return fv == CAP_SET; + + return fv == CAP_SET; } unsigned long cap_last_cap(void) { @@ -50,7 +50,7 @@ unsigned long cap_last_cap(void) { if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps, * then we are in trouble (since we, as much userspace and kernel space - * store capability masks in uint64_t types. Let's hence protect + * store capability masks in uint64_t types). Let's hence protect * ourselves against that and always cap at 63 for now. */ p = 63; @@ -61,7 +61,7 @@ unsigned long cap_last_cap(void) { } /* fall back to syscall-probing for pre linux-3.2 */ - p = (unsigned long) CAP_LAST_CAP; + p = MIN((unsigned long) CAP_LAST_CAP, 63U); if (prctl(PR_CAPBSET_READ, p) < 0) { @@ -107,13 +107,13 @@ int capability_update_inherited_set(cap_t caps, uint64_t set) { } int capability_ambient_set_apply(uint64_t set, bool also_inherit) { - unsigned long i; _cleanup_cap_free_ cap_t caps = NULL; + unsigned long i; + int r; /* Add the capabilities to the ambient set. */ if (also_inherit) { - int r; caps = cap_get_proc(); if (!caps) return -errno; @@ -272,16 +272,12 @@ int capability_bounding_set_drop_usermode(uint64_t keep) { } int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { - _cleanup_cap_free_ cap_t d = NULL; - unsigned i, j = 0; int r; - /* Unfortunately we cannot leave privilege dropping to PID 1 - * here, since we want to run as user but want to keep some - * capabilities. Since file capabilities have been introduced - * this cannot be done across exec() anymore, unless our - * binary has the capability configured in the file system, - * which we want to avoid. */ + /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but + * want to keep some capabilities. Since file capabilities have been introduced this cannot be done + * across exec() anymore, unless our binary has the capability configured in the file system, which + * we want to avoid. */ if (setresgid(gid, gid, gid) < 0) return log_error_errno(errno, "Failed to change group ID: %m"); @@ -290,7 +286,9 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { if (r < 0) return log_error_errno(r, "Failed to drop auxiliary groups list: %m"); - /* Ensure we keep the permitted caps across the setresuid() */ + /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually + * don't want to keep any capabilities, since we want to be able to drop them from the bounding set + * too, and we can only do that if we have capabilities. */ if (prctl(PR_SET_KEEPCAPS, 1) < 0) return log_error_errno(errno, "Failed to enable keep capabilities flag: %m"); @@ -300,18 +298,21 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { if (prctl(PR_SET_KEEPCAPS, 0) < 0) return log_error_errno(errno, "Failed to disable keep capabilities flag: %m"); - /* Drop all caps from the bounding set, except the ones we want */ + /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except + * the ones we want to keep */ r = capability_bounding_set_drop(keep_capabilities, true); if (r < 0) return log_error_errno(r, "Failed to drop capabilities: %m"); /* Now upgrade the permitted caps we still kept to effective caps */ - d = cap_init(); - if (!d) - return log_oom(); - - if (keep_capabilities) { + if (keep_capabilities != 0) { cap_value_t bits[u64log2(keep_capabilities) + 1]; + _cleanup_cap_free_ cap_t d = NULL; + unsigned i, j = 0; + + d = cap_init(); + if (!d) + return log_oom(); for (i = 0; i < ELEMENTSOF(bits); i++) if (keep_capabilities & (1ULL << i)) @@ -320,7 +321,7 @@ int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) { /* use enough bits */ assert(i == 64 || (keep_capabilities >> i) == 0); /* don't use too many bits */ - assert(keep_capabilities & (1ULL << (i - 1))); + assert(keep_capabilities & (UINT64_C(1) << (i - 1))); if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 || cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0) @@ -515,8 +516,8 @@ int capability_quintet_enforce(const CapabilityQuintet *q) { /* Now, let's enforce the caps for the first time. Note that this is where we acquire * caps in any of the sets we currently don't have. We have to do this before - * droppoing the bounding caps below, since at that point we can never acquire new - * caps in inherited/permitted/effective anymore, but only lose them.*/ + * dropping the bounding caps below, since at that point we can never acquire new + * caps in inherited/permitted/effective anymore, but only lose them. */ if (cap_set_proc(modified ?: c) < 0) return -errno; } diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c index cf093cd0a2..a3447d948a 100644 --- a/src/nspawn/nspawn-mount.c +++ b/src/nspawn/nspawn-mount.c @@ -847,8 +847,10 @@ static int mount_inaccessible(const char *dest, CustomMount *m) { return m->graceful ? 0 : r; r = mount_verbose(m->graceful ? LOG_DEBUG : LOG_ERR, NULL, where, NULL, MS_BIND|MS_RDONLY|MS_REMOUNT, NULL); - if (r < 0) + if (r < 0) { + umount_verbose(where); return m->graceful ? 0 : r; + } return 0; } diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index b2cf419484..0cb207528a 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -238,11 +238,13 @@ static int help(void) { if (r < 0) return log_oom(); - printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" + printf("%1$s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" "Spawn a command or OS in a light-weight container.\n\n" " -h --help Show this help\n" " --version Print version string\n" " -q --quiet Do not show status information\n" + " --settings=BOOLEAN Load additional settings from .nspawn file\n\n" + "%3$sImage:%4$s\n" " -D --directory=PATH Root directory for the container\n" " --template=PATH Initialize root directory from template directory,\n" " if missing\n" @@ -250,22 +252,35 @@ static int help(void) { " remove it after exit\n" " -i --image=PATH File system device or disk image for the container\n" " --oci-bundle=PATH OCI bundle directory\n" + " --read-only Mount the root directory read-only\n" + " --volatile[=MODE] Run the system in volatile mode\n" " --root-hash=HASH Specify verity root hash\n" + " --pivot-root=PATH[:PATH]\n" + " Pivot root to given directory in the container\n\n" + "%3$sExecution:%4$s\n" " -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n" " -b --boot Boot up full system (i.e. invoke init)\n" " --chdir=PATH Set working directory in the container\n" - " --pivot-root=PATH[:PATH]\n" - " Pivot root to given directory in the container\n" - " -u --user=USER Run the command under specified user or uid\n" + " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n" + " -u --user=USER Run the command under specified user or UID\n" + " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n" + " --notify-ready=BOOLEAN Receive notifications from the child init process\n\n" + "%3$sSystem Identity:%4$s\n" " -M --machine=NAME Set the machine name for the container\n" " --hostname=NAME Override the hostname for the container\n" - " --uuid=UUID Set a specific machine UUID for the container\n" + " --uuid=UUID Set a specific machine UUID for the container\n\n" + "%3$sProperties:%4$s\n" " -S --slice=SLICE Place the container in the specified slice\n" " --property=NAME=VALUE Set scope unit property\n" + " --register=BOOLEAN Register container as machine\n" + " --keep-unit Do not register a scope for the machine, reuse\n" + " the service unit nspawn is running in\n\n" + "%3$sUser Namespacing:%4$s\n" " -U --private-users=pick Run within user namespace, autoselect UID/GID range\n" " --private-users[=UIDBASE[:NUIDS]]\n" " Similar, but with user configured UID/GID range\n" - " --private-users-chown Adjust OS tree ownership to private UID/GID range\n" + " --private-users-chown Adjust OS tree ownership to private UID/GID range\n\n" + "%3$sNetworking:%4$s\n" " --private-network Disable network in container\n" " --network-interface=INTERFACE\n" " Assign an existing network interface to the\n" @@ -290,29 +305,33 @@ static int help(void) { " Set network namespace to the one represented by\n" " the specified kernel namespace file node\n" " -p --port=[PROTOCOL:]HOSTPORT[:CONTAINERPORT]\n" - " Expose a container IP port on the host\n" + " Expose a container IP port on the host\n\n" + "%3$sSecurity:%4$s\n" + " --capability=CAP In addition to the default, retain specified\n" + " capability\n" + " --drop-capability=CAP Drop the specified capability from the default set\n" + " --no-new-privileges Set PR_SET_NO_NEW_PRIVS flag for container payload\n" + " --system-call-filter=LIST|~LIST\n" + " Permit/prohibit specific system calls\n" " -Z --selinux-context=SECLABEL\n" " Set the SELinux security context to be used by\n" " processes in the container\n" " -L --selinux-apifs-context=SECLABEL\n" " Set the SELinux security context to be used by\n" - " API/tmpfs file systems in the container\n" - " --capability=CAP In addition to the default, retain specified\n" - " capability\n" - " --drop-capability=CAP Drop the specified capability from the default set\n" - " --system-call-filter=LIST|~LIST\n" - " Permit/prohibit specific system calls\n" + " API/tmpfs file systems in the container\n\n" + "%3$sResources:%4$s\n" " --rlimit=NAME=LIMIT Set a resource limit for the payload\n" " --oom-score-adjust=VALUE\n" " Adjust the OOM score value for the payload\n" " --cpu-affinity=CPUS Adjust the CPU affinity of the container\n" - " --kill-signal=SIGNAL Select signal to use for shutting down PID 1\n" - " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n" - " host, try-guest, try-host\n" - " -j Equivalent to --link-journal=try-guest\n" + " --personality=ARCH Pick personality for this container\n\n" + "%3$sIntegration:%4$s\n" " --resolv-conf=MODE Select mode of /etc/resolv.conf initialization\n" " --timezone=MODE Select mode of /etc/localtime initialization\n" - " --read-only Mount the root directory read-only\n" + " --link-journal=MODE Link up guest journal, one of no, auto, guest, \n" + " host, try-guest, try-host\n" + " -j Equivalent to --link-journal=try-guest\n\n" + "%3$sMounts:%4$s\n" " --bind=PATH[:PATH[:OPTIONS]]\n" " Bind mount a file or directory from the host into\n" " the container\n" @@ -325,21 +344,15 @@ static int help(void) { " Create an overlay mount from the host to \n" " the container\n" " --overlay-ro=PATH[:PATH...]:PATH\n" - " Similar, but creates a read-only overlay mount\n" - " -E --setenv=NAME=VALUE Pass an environment variable to PID 1\n" - " --register=BOOLEAN Register container as machine\n" - " --keep-unit Do not register a scope for the machine, reuse\n" - " the service unit nspawn is running in\n" - " --volatile[=MODE] Run the system in volatile mode\n" - " --settings=BOOLEAN Load additional settings from .nspawn file\n" - " --notify-ready=BOOLEAN Receive notifications from the child init process\n" + " Similar, but creates a read-only overlay mount\n\n" + "%3$sInput/Output:%4$s\n" " --console=MODE Select how stdin/stdout/stderr and /dev/console are\n" " set up for the container.\n" " -P --pipe Equivalent to --console=pipe\n" - "\nSee the %s for details.\n" + "\nSee the %2$s for details.\n" , program_invocation_short_name , link - ); + , ansi_underline(), ansi_normal()); return 0; } diff --git a/src/test/test-capability.c b/src/test/test-capability.c index 325d7c8cc2..6b2de66bb7 100644 --- a/src/test/test-capability.c +++ b/src/test/test-capability.c @@ -215,11 +215,32 @@ static void test_set_ambient_caps(void) { assert_se(prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_CHOWN, 0, 0) == 1); } +static void test_ensure_cap_64bit(void) { + _cleanup_free_ char *content = NULL; + unsigned long p = 0; + int r; + + r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content); + if (r == -ENOENT) /* kernel pre 3.2 */ + return; + assert_se(r >= 0); + + assert_se(safe_atolu(content, &p) >= 0); + + /* If caps don't fit into 64bit anymore, we have a problem, fail the test. */ + assert_se(p <= 63); + + /* Also check for the header definition */ + assert_se(CAP_LAST_CAP <= 63); +} + int main(int argc, char *argv[]) { bool run_ambient; test_setup_logging(LOG_INFO); + test_ensure_cap_64bit(); + test_last_cap_file(); test_last_cap_probe();