core/namespace: mount new sysfs when new network namespace is requested

Even when a mount namespace is created, previously host's sysfs is used,
especially with RootDirectory= or RootImage=, thus service processes can
still access the properties of the network interfaces in the main network
namespace through sysfs.

This makes, sysfs is remounted with the new network namespace tag, except
when PrivateMounts= is explicitly disabled. Hence, the properties of the
network interfaces in the main network namespace cannot be accessed by
service processes through sysfs.

Fixes #26422.
This commit is contained in:
Yu Watanabe
2023-02-17 13:50:17 +09:00
parent 277b269e25
commit c2da3bf237
4 changed files with 18 additions and 0 deletions

View File

@@ -1663,6 +1663,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
not available), and the unit should be written in a way that does not solely rely on this setting for not available), and the unit should be written in a way that does not solely rely on this setting for
security.</para> security.</para>
<para>When this option is enabled, <varname>PrivateMounts=</varname> is implied unless it is
explicitly disabled, and <filename>/sys</filename> will be remounted to associate it with the new
network namespace.</para>
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be <para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
bound within a private network namespace. This may be combined with bound within a private network namespace. This may be combined with
<varname>JoinsNamespaceOf=</varname> to listen on sockets inside of network namespaces of other <varname>JoinsNamespaceOf=</varname> to listen on sockets inside of network namespaces of other
@@ -1684,6 +1688,10 @@ BindReadOnlyPaths=/var/lib/systemd</programlisting>
<varname>NetworkNamespacePath=</varname> configured, as otherwise the network namespace of those <varname>NetworkNamespacePath=</varname> configured, as otherwise the network namespace of those
units is reused.</para> units is reused.</para>
<para>When this option is enabled, <varname>PrivateMounts=</varname> is implied unless it is
explicitly disabled, and <filename>/sys</filename> will be remounted to associate it with the new
network namespace.</para>
<para>When this option is used on a socket unit any sockets bound on behalf of this unit will be <para>When this option is used on a socket unit any sockets bound on behalf of this unit will be
bound within the specified network namespace.</para> bound within the specified network namespace.</para>

View File

@@ -2075,6 +2075,7 @@ bool exec_needs_mount_namespace(
if (context->private_devices || if (context->private_devices ||
context->private_mounts > 0 || context->private_mounts > 0 ||
(context->private_mounts < 0 && exec_needs_network_namespace(context)) ||
context->protect_system != PROTECT_SYSTEM_NO || context->protect_system != PROTECT_SYSTEM_NO ||
context->protect_home != PROTECT_HOME_NO || context->protect_home != PROTECT_HOME_NO ||
context->protect_kernel_tunables || context->protect_kernel_tunables ||
@@ -3606,6 +3607,7 @@ static int apply_mount_namespace(
.protect_system = context->protect_system, .protect_system = context->protect_system,
.protect_proc = context->protect_proc, .protect_proc = context->protect_proc,
.proc_subset = context->proc_subset, .proc_subset = context->proc_subset,
.private_network = exec_needs_network_namespace(context),
.private_ipc = exec_needs_ipc_namespace(context), .private_ipc = exec_needs_ipc_namespace(context),
/* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */ /* If NNP is on, we can turn on MS_NOSUID, since it won't have any effect anymore. */
.mount_nosuid = context->no_new_privileges && !mac_selinux_use(), .mount_nosuid = context->no_new_privileges && !mac_selinux_use(),

View File

@@ -1748,6 +1748,7 @@ static size_t namespace_calculate_mounts(
!!log_namespace + !!log_namespace +
setup_propagate + /* /run/systemd/incoming */ setup_propagate + /* /run/systemd/incoming */
!!notify_socket + !!notify_socket +
ns_info->private_network + /* /sys */
ns_info->private_ipc; /* /dev/mqueue */ ns_info->private_ipc; /* /dev/mqueue */
} }
@@ -2354,6 +2355,12 @@ int setup_namespace(
}; };
} }
if (ns_info->private_network)
*(m++) = (MountEntry) {
.path_const = "/sys",
.mode = PRIVATE_SYSFS,
};
if (ns_info->private_ipc) if (ns_info->private_ipc)
*(m++) = (MountEntry) { *(m++) = (MountEntry) {
.path_const = "/dev/mqueue", .path_const = "/dev/mqueue",

View File

@@ -61,6 +61,7 @@ struct NamespaceInfo {
bool protect_kernel_logs; bool protect_kernel_logs;
bool mount_apivfs; bool mount_apivfs;
bool protect_hostname; bool protect_hostname;
bool private_network;
bool private_ipc; bool private_ipc;
bool mount_nosuid; bool mount_nosuid;
ProtectHome protect_home; ProtectHome protect_home;