From 620dbdd2489515696d53b90c061208b43b65aafa Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Mon, 4 Nov 2019 17:17:01 -0800 Subject: [PATCH 1/9] shared: Add ProtectKernelLogs property Add seccomp_protect_syslog, which adds a filter rule for the syslog system call. --- src/shared/bus-unit-util.c | 4 ++-- src/shared/seccomp-util.c | 32 ++++++++++++++++++++++++++++++++ src/shared/seccomp-util.h | 1 + 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/src/shared/bus-unit-util.c b/src/shared/bus-unit-util.c index c9f352f796..29dd89d3c1 100644 --- a/src/shared/bus-unit-util.c +++ b/src/shared/bus-unit-util.c @@ -818,8 +818,8 @@ static int bus_append_execute_property(sd_bus_message *m, const char *field, con "PrivateDevices", "PrivateNetwork", "PrivateUsers", "PrivateMounts", "NoNewPrivileges", "SyslogLevelPrefix", "MemoryDenyWriteExecute", "RestrictRealtime", "DynamicUser", "RemoveIPC", "ProtectKernelTunables", "ProtectKernelModules", - "ProtectControlGroups", "MountAPIVFS", "CPUSchedulingResetOnFork", "LockPersonality", - "ProtectHostname", "RestrictSUIDSGID")) + "ProtectKernelLogs", "ProtectControlGroups", "MountAPIVFS", "CPUSchedulingResetOnFork", + "LockPersonality", "ProtectHostname", "RestrictSUIDSGID")) return bus_append_parse_boolean(m, field, eq); if (STR_IN_SET(field, diff --git a/src/shared/seccomp-util.c b/src/shared/seccomp-util.c index 22110d0d73..bd4f004cc8 100644 --- a/src/shared/seccomp-util.c +++ b/src/shared/seccomp-util.c @@ -1281,6 +1281,38 @@ int seccomp_protect_sysctl(void) { return 0; } +int seccomp_protect_syslog(void) { + uint32_t arch; + int r; + + SECCOMP_FOREACH_LOCAL_ARCH(arch) { + _cleanup_(seccomp_releasep) scmp_filter_ctx seccomp = NULL; + + r = seccomp_init_for_arch(&seccomp, arch, SCMP_ACT_ALLOW); + if (r < 0) + return r; + + r = seccomp_rule_add_exact( + seccomp, + SCMP_ACT_ERRNO(EPERM), + SCMP_SYS(syslog), + 0); + + if (r < 0) { + log_debug_errno(r, "Failed to add syslog() rule for architecture %s, skipping %m", seccomp_arch_to_string(arch)); + continue; + } + + r = seccomp_load(seccomp); + if (ERRNO_IS_SECCOMP_FATAL(r)) + return r; + if (r < 0) + log_debug_errno(r, "Failed to install syslog protection rules for architecture %s, skipping %m", seccomp_arch_to_string(arch)); + } + + return 0; +} + int seccomp_restrict_address_families(Set *address_families, bool whitelist) { uint32_t arch; int r; diff --git a/src/shared/seccomp-util.h b/src/shared/seccomp-util.h index b29082a488..0b48e74a87 100644 --- a/src/shared/seccomp-util.h +++ b/src/shared/seccomp-util.h @@ -82,6 +82,7 @@ int seccomp_parse_syscall_filter( int seccomp_restrict_archs(Set *archs); int seccomp_restrict_namespaces(unsigned long retain); int seccomp_protect_sysctl(void); +int seccomp_protect_syslog(void); int seccomp_restrict_address_families(Set *address_families, bool whitelist); int seccomp_restrict_realtime(void); int seccomp_memory_deny_write_execute(void); From 84703040186de5b4b90f5c41fe4db7f7a5ada05e Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Mon, 4 Nov 2019 17:18:42 -0800 Subject: [PATCH 2/9] core: Add ProtectKernelLogs If seccomp is enabled, load the SYSCALL_FILTER_SET_SYSLOG into the seccomp filter set. Drop the CAP_SYSLOG capability. --- src/core/dbus-execute.c | 4 ++++ src/core/execute.c | 22 ++++++++++++++++++++++ src/core/execute.h | 1 + src/core/load-fragment-gperf.gperf.m4 | 1 + src/core/unit.c | 3 +++ 5 files changed, 31 insertions(+) diff --git a/src/core/dbus-execute.c b/src/core/dbus-execute.c index 185fc6a368..13ff6f489a 100644 --- a/src/core/dbus-execute.c +++ b/src/core/dbus-execute.c @@ -783,6 +783,7 @@ const sd_bus_vtable bus_exec_vtable[] = { SD_BUS_PROPERTY("PrivateDevices", "b", bus_property_get_bool, offsetof(ExecContext, private_devices), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectKernelTunables", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_tunables), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectKernelModules", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_modules), SD_BUS_VTABLE_PROPERTY_CONST), + SD_BUS_PROPERTY("ProtectKernelLogs", "b", bus_property_get_bool, offsetof(ExecContext, protect_kernel_logs), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("ProtectControlGroups", "b", bus_property_get_bool, offsetof(ExecContext, protect_control_groups), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateNetwork", "b", bus_property_get_bool, offsetof(ExecContext, private_network), SD_BUS_VTABLE_PROPERTY_CONST), SD_BUS_PROPERTY("PrivateUsers", "b", bus_property_get_bool, offsetof(ExecContext, private_users), SD_BUS_VTABLE_PROPERTY_CONST), @@ -1274,6 +1275,9 @@ int bus_exec_context_set_transient_property( if (streq(name, "ProtectKernelModules")) return bus_set_transient_bool(u, name, &c->protect_kernel_modules, message, flags, error); + if (streq(name, "ProtectKernelLogs")) + return bus_set_transient_bool(u, name, &c->protect_kernel_logs, message, flags, error); + if (streq(name, "ProtectControlGroups")) return bus_set_transient_bool(u, name, &c->protect_control_groups, message, flags, error); diff --git a/src/core/execute.c b/src/core/execute.c index 1c22c3d80e..89c485a19a 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1396,6 +1396,7 @@ static bool context_has_no_new_privileges(const ExecContext *c) { exec_context_restrict_namespaces_set(c) || c->protect_kernel_tunables || c->protect_kernel_modules || + c->protect_kernel_logs || c->private_devices || context_has_syscall_filters(c) || !set_isempty(c->syscall_archs) || @@ -1542,6 +1543,19 @@ static int apply_protect_kernel_modules(const Unit *u, const ExecContext *c) { return seccomp_load_syscall_filter_set(SCMP_ACT_ALLOW, syscall_filter_sets + SYSCALL_FILTER_SET_MODULE, SCMP_ACT_ERRNO(EPERM), false); } +static int apply_protect_kernel_logs(const Unit *u, const ExecContext *c) { + assert(u); + assert(c); + + if (!c->protect_kernel_logs) + return 0; + + if (skip_seccomp_unavailable(u, "ProtectKernelLogs=")) + return 0; + + return seccomp_protect_syslog(); +} + static int apply_private_devices(const Unit *u, const ExecContext *c) { assert(u); assert(c); @@ -3679,6 +3693,12 @@ static int exec_child( return log_unit_error_errno(unit, r, "Failed to apply module loading restrictions: %m"); } + r = apply_protect_kernel_logs(unit, context); + if (r < 0) { + *exit_status = EXIT_SECCOMP; + return log_unit_error_errno(unit, r, "Failed to apply kernel log restrictions: %m"); + } + r = apply_private_devices(unit, context); if (r < 0) { *exit_status = EXIT_SECCOMP; @@ -4318,6 +4338,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { "%sPrivateDevices: %s\n" "%sProtectKernelTunables: %s\n" "%sProtectKernelModules: %s\n" + "%sProtectKernelLogs: %s\n" "%sProtectControlGroups: %s\n" "%sPrivateNetwork: %s\n" "%sPrivateUsers: %s\n" @@ -4338,6 +4359,7 @@ void exec_context_dump(const ExecContext *c, FILE* f, const char *prefix) { prefix, yes_no(c->private_devices), prefix, yes_no(c->protect_kernel_tunables), prefix, yes_no(c->protect_kernel_modules), + prefix, yes_no(c->protect_kernel_logs), prefix, yes_no(c->protect_control_groups), prefix, yes_no(c->private_network), prefix, yes_no(c->private_users), diff --git a/src/core/execute.h b/src/core/execute.h index 2508c6d668..c923b1fa21 100644 --- a/src/core/execute.h +++ b/src/core/execute.h @@ -257,6 +257,7 @@ struct ExecContext { bool private_mounts; bool protect_kernel_tunables; bool protect_kernel_modules; + bool protect_kernel_logs; bool protect_control_groups; ProtectSystem protect_system; ProtectHome protect_home; diff --git a/src/core/load-fragment-gperf.gperf.m4 b/src/core/load-fragment-gperf.gperf.m4 index 58c1970d05..42fc4eaac9 100644 --- a/src/core/load-fragment-gperf.gperf.m4 +++ b/src/core/load-fragment-gperf.gperf.m4 @@ -115,6 +115,7 @@ $1.PrivateTmp, config_parse_bool, 0, $1.PrivateDevices, config_parse_bool, 0, offsetof($1, exec_context.private_devices) $1.ProtectKernelTunables, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_tunables) $1.ProtectKernelModules, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_modules) +$1.ProtectKernelLogs, config_parse_bool, 0, offsetof($1, exec_context.protect_kernel_logs) $1.ProtectControlGroups, config_parse_bool, 0, offsetof($1, exec_context.protect_control_groups) $1.NetworkNamespacePath, config_parse_unit_path_printf, 0, offsetof($1, exec_context.network_namespace_path) $1.PrivateNetwork, config_parse_bool, 0, offsetof($1, exec_context.private_network) diff --git a/src/core/unit.c b/src/core/unit.c index 5f2ca44701..c2722a15da 100644 --- a/src/core/unit.c +++ b/src/core/unit.c @@ -4269,6 +4269,9 @@ int unit_patch_contexts(Unit *u) { if (ec->protect_kernel_modules) ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE); + if (ec->protect_kernel_logs) + ec->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYSLOG); + if (ec->dynamic_user) { if (!ec->user) { r = user_from_unit_name(u, &ec->user); From 07cab0f72b084644f12dc3083f880514250590fe Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Mon, 4 Nov 2019 17:20:47 -0800 Subject: [PATCH 3/9] tests: Add capability tests for ProtectKernelLogs --- test/meson.build | 2 ++ .../exec-protectkernellogs-no-capabilities.service | 7 +++++++ .../exec-protectkernellogs-yes-capabilities.service | 7 +++++++ 3 files changed, 16 insertions(+) create mode 100644 test/test-execute/exec-protectkernellogs-no-capabilities.service create mode 100644 test/test-execute/exec-protectkernellogs-yes-capabilities.service diff --git a/test/meson.build b/test/meson.build index 24ab4d1dee..eeb3c5f8f0 100644 --- a/test/meson.build +++ b/test/meson.build @@ -109,6 +109,8 @@ test_data_files = ''' test-execute/exec-privatetmp-no.service test-execute/exec-privatetmp-yes.service test-execute/exec-protecthome-tmpfs-vs-protectsystem-strict.service + test-execute/exec-protectkernellogs-yes-capabilities.service + test-execute/exec-protectkernellogs-no-capabilities.service test-execute/exec-protectkernelmodules-no-capabilities.service test-execute/exec-protectkernelmodules-yes-capabilities.service test-execute/exec-protectkernelmodules-yes-mount-propagation.service diff --git a/test/test-execute/exec-protectkernellogs-no-capabilities.service b/test/test-execute/exec-protectkernellogs-no-capabilities.service new file mode 100644 index 0000000000..f0c7d4d685 --- /dev/null +++ b/test/test-execute/exec-protectkernellogs-no-capabilities.service @@ -0,0 +1,7 @@ +[Unit] +Description=Test CAP_SYSLOG for ProtectKernelLogs=no + +[Service] +ProtectKernelLogs=no +ExecStart=/bin/sh -x -c 'capsh --print | grep cap_syslog' +Type=oneshot diff --git a/test/test-execute/exec-protectkernellogs-yes-capabilities.service b/test/test-execute/exec-protectkernellogs-yes-capabilities.service new file mode 100644 index 0000000000..803ba7d552 --- /dev/null +++ b/test/test-execute/exec-protectkernellogs-yes-capabilities.service @@ -0,0 +1,7 @@ +[Unit] +Description=Test CAP_SYSLOG for ProtectKernelLogs=yes + +[Service] +ProtectKernelLogs=yes +ExecStart=/bin/sh -x -c '! capsh --print | grep cap_syslog' +Type=oneshot From 94a7b2759d39fcfed1381ac324cc24a83ec4d1be Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Sun, 10 Nov 2019 01:17:01 -0800 Subject: [PATCH 4/9] core: ProtectKernelLogs= mask kmsg in proc and sys Block access to /dev/kmsg and /proc/kmsg when ProtectKernelLogs is set. --- src/core/execute.c | 2 ++ src/core/namespace.c | 15 ++++++++++++++- src/core/namespace.h | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/core/execute.c b/src/core/execute.c index 89c485a19a..8ab4b18dc7 100644 --- a/src/core/execute.c +++ b/src/core/execute.c @@ -1872,6 +1872,7 @@ static bool exec_needs_mount_namespace( context->protect_home != PROTECT_HOME_NO || context->protect_kernel_tunables || context->protect_kernel_modules || + context->protect_kernel_logs || context->protect_control_groups) return true; @@ -2507,6 +2508,7 @@ static int apply_mount_namespace( .protect_control_groups = context->protect_control_groups, .protect_kernel_tunables = context->protect_kernel_tunables, .protect_kernel_modules = context->protect_kernel_modules, + .protect_kernel_logs = context->protect_kernel_logs, .protect_hostname = context->protect_hostname, .mount_apivfs = context->mount_apivfs, .private_mounts = context->private_mounts, diff --git a/src/core/namespace.c b/src/core/namespace.c index df0455b7eb..bbb372459b 100644 --- a/src/core/namespace.c +++ b/src/core/namespace.c @@ -109,6 +109,12 @@ static const MountEntry protect_kernel_modules_table[] = { { "/usr/lib/modules", INACCESSIBLE, true }, }; +/* ProtectKernelLogs= option */ +static const MountEntry protect_kernel_logs_table[] = { + { "/proc/kmsg", INACCESSIBLE, true }, + { "/dev/kmsg", INACCESSIBLE, true }, +}; + /* * ProtectHome=read-only table, protect $HOME and $XDG_RUNTIME_DIR and rest of * system should be protected by ProtectSystem= @@ -1147,8 +1153,9 @@ static size_t namespace_calculate_mounts( n_temporary_filesystems + ns_info->private_dev + (ns_info->protect_kernel_tunables ? ELEMENTSOF(protect_kernel_tunables_table) : 0) + - (ns_info->protect_control_groups ? 1 : 0) + (ns_info->protect_kernel_modules ? ELEMENTSOF(protect_kernel_modules_table) : 0) + + (ns_info->protect_kernel_logs ? ELEMENTSOF(protect_kernel_logs_table) : 0) + + (ns_info->protect_control_groups ? 1 : 0) + protect_home_cnt + protect_system_cnt + (ns_info->protect_hostname ? 2 : 0) + (namespace_info_mount_apivfs(ns_info) ? ELEMENTSOF(apivfs_table) : 0); @@ -1319,6 +1326,12 @@ int setup_namespace( goto finish; } + if (ns_info->protect_kernel_logs) { + r = append_static_mounts(&m, protect_kernel_logs_table, ELEMENTSOF(protect_kernel_logs_table), ns_info->ignore_protect_paths); + if (r < 0) + goto finish; + } + if (ns_info->protect_control_groups) { *(m++) = (MountEntry) { .path_const = "/sys/fs/cgroup", diff --git a/src/core/namespace.h b/src/core/namespace.h index e5cd8e5313..60a6abcd45 100644 --- a/src/core/namespace.h +++ b/src/core/namespace.h @@ -51,6 +51,7 @@ struct NamespaceInfo { bool protect_control_groups:1; bool protect_kernel_tunables:1; bool protect_kernel_modules:1; + bool protect_kernel_logs:1; bool mount_apivfs:1; bool protect_hostname:1; }; From 97d05f3b709c39acebc53749b1d9eb29a24690b1 Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Sun, 10 Nov 2019 20:37:20 -0800 Subject: [PATCH 5/9] test/test-seccomp: add test_protect_syslog --- src/test/test-seccomp.c | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/test/test-seccomp.c b/src/test/test-seccomp.c index 018c20f8be..ca3f37344a 100644 --- a/src/test/test-seccomp.c +++ b/src/test/test-seccomp.c @@ -322,6 +322,48 @@ static void test_protect_sysctl(void) { assert_se(wait_for_terminate_and_check("sysctlseccomp", pid, WAIT_LOG) == EXIT_SUCCESS); } +static void test_protect_syslog(void) { + pid_t pid; + + log_info("/* %s */", __func__); + + if (!is_seccomp_available()) { + log_notice("Seccomp not available, skipping %s", __func__); + return; + } + if (geteuid() != 0) { + log_notice("Not root, skipping %s", __func__); + return; + } + + /* in containers syslog() is likely missing anyway */ + if (detect_container() > 0) { + log_notice("Testing in container, skipping %s", __func__); + return; + } + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { +#if defined __NR_syslog && __NR_syslog > 0 + assert_se(syscall(__NR_syslog, -1, NULL, 0) < 0); + assert_se(errno == EINVAL); +#endif + + assert_se(seccomp_protect_syslog() >= 0); + +#if defined __NR_syslog && __NR_syslog > 0 + assert_se(syscall(__NR_syslog, 0, 0, 0) < 0); + assert_se(errno == EPERM); +#endif + + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_check("syslogseccomp", pid, WAIT_LOG) == EXIT_SUCCESS); +} + static void test_restrict_address_families(void) { pid_t pid; @@ -982,6 +1024,7 @@ int main(int argc, char *argv[]) { test_filter_sets_ordered(); test_restrict_namespace(); test_protect_sysctl(); + test_protect_syslog(); test_restrict_address_families(); test_restrict_realtime(); test_memory_deny_write_execute_mmap(); From d916e35b9f1cd03c02ca8acc34f56a156dcc5868 Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Sun, 10 Nov 2019 21:27:29 -0800 Subject: [PATCH 6/9] man: Add description for ProtectKernelLogs= --- man/systemd.exec.xml | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/man/systemd.exec.xml b/man/systemd.exec.xml index 8701005e6b..93fc9e95a6 100644 --- a/man/systemd.exec.xml +++ b/man/systemd.exec.xml @@ -402,11 +402,11 @@ CapabilityBoundingSet=~CAP_B CAP_C SystemCallFilter=, SystemCallArchitectures=, RestrictAddressFamilies=, RestrictNamespaces=, PrivateDevices=, ProtectKernelTunables=, - ProtectKernelModules=, MemoryDenyWriteExecute=, - RestrictRealtime=, RestrictSUIDSGID=, - DynamicUser= or LockPersonality= are specified. Note that even - if this setting is overridden by them, systemctl show shows the original value of - this setting. Also see ProtectKernelModules=, ProtectKernelLogs=, + MemoryDenyWriteExecute=, RestrictRealtime=, + RestrictSUIDSGID=, DynamicUser= or LockPersonality= + are specified. Note that even if this setting is overridden by them, systemctl show shows the + original value of this setting. Also see No New Privileges Flag. @@ -1321,6 +1321,22 @@ BindReadOnlyPaths=/var/lib/systemd + + ProtectKernelLogs= + + Takes a boolean argument. If true, access to the kernel log ring buffer will be denied. It is + recommended to turn this on for most services that do not need to read from or write to the kernel log ring + buffer. Enabling this option removes CAP_SYSLOG from the capability bounding set for this + unit, and installs a system call filter to block the + syslog2 + system call (not to be confused with the libc API + syslog3 + for userspace logging). The kernel exposes its log buffer to userspace via /dev/kmsg and + /proc/kmsg. If enabled, these are made inaccessible to all the processes in the unit. + + + + ProtectControlGroups= @@ -1772,8 +1788,8 @@ SystemCallErrorNumber=EPERM mappings. Specifically these are the options PrivateTmp=, PrivateDevices=, ProtectSystem=, ProtectHome=, ProtectKernelTunables=, ProtectControlGroups=, - ReadOnlyPaths=, InaccessiblePaths= and - ReadWritePaths=. + ProtectKernelLogs=, ReadOnlyPaths=, + InaccessiblePaths= and ReadWritePaths=. From 806aea3879ca86355af24a7c36cdbf7432b0c7c7 Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Wed, 13 Nov 2019 16:38:33 -0800 Subject: [PATCH 7/9] test-namespace: Add test for ProtectKernelLogs= --- src/test/test-namespace.c | 73 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/src/test/test-namespace.c b/src/test/test-namespace.c index 73ad2b25dd..41ea733b7e 100644 --- a/src/test/test-namespace.c +++ b/src/test/test-namespace.c @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ +#include #include #include @@ -9,7 +10,9 @@ #include "process-util.h" #include "string-util.h" #include "tests.h" +#include "user-util.h" #include "util.h" +#include "virt.h" static void test_tmpdir(const char *id, const char *A, const char *B) { _cleanup_free_ char *a, *b; @@ -48,14 +51,16 @@ static void test_tmpdir(const char *id, const char *A, const char *B) { assert_se(rmdir(b) >= 0); } -static int test_netns(void) { +static void test_netns(void) { _cleanup_close_pair_ int s[2] = { -1, -1 }; pid_t pid1, pid2, pid3; int r, n = 0; siginfo_t si; - if (geteuid() > 0) - return log_tests_skipped("not root"); + if (geteuid() > 0) { + (void) log_tests_skipped("not root"); + return; + } assert_se(socketpair(AF_UNIX, SOCK_DGRAM, 0, s) >= 0); @@ -102,7 +107,62 @@ static int test_netns(void) { n += si.si_status; assert_se(n == 1); - return EXIT_SUCCESS; +} + +static void test_protect_kernel_logs(void) { + int r; + pid_t pid; + static const NamespaceInfo ns_info = { + .protect_kernel_logs = true, + }; + + if (geteuid() > 0) { + (void) log_tests_skipped("not root"); + return; + } + + /* In a container we likely don't have access to /dev/kmsg */ + if (detect_container() > 0) { + (void) log_tests_skipped("in container"); + return; + } + + + pid = fork(); + assert_se(pid >= 0); + + if (pid == 0) { + _cleanup_close_ int fd = -1; + + fd = open("/dev/kmsg", O_RDONLY | O_CLOEXEC); + assert_se(fd > 0); + + r = setup_namespace(NULL, + NULL, + &ns_info, + NULL, + NULL, + NULL, + NULL, + NULL, 0, + NULL, 0, + NULL, + NULL, + PROTECT_HOME_NO, + PROTECT_SYSTEM_NO, + 0, + 0, + NULL); + assert_se(r == 0); + + assert_se(setresuid(UID_NOBODY, UID_NOBODY, UID_NOBODY) >= 0); + assert_se(open("/dev/kmsg", O_RDONLY | O_CLOEXEC) < 0); + assert_se(errno == EACCES); + + _exit(EXIT_SUCCESS); + } + + assert_se(wait_for_terminate_and_check("ns-kernellogs", pid, WAIT_LOG) == EXIT_SUCCESS); } int main(int argc, char *argv[]) { @@ -133,5 +193,8 @@ int main(int argc, char *argv[]) { test_tmpdir("sys-devices-pci0000:00-0000:00:1a.0-usb3-3\\x2d1-3\\x2d1:1.0-bluetooth-hci0.device", z, zz); - return test_netns(); + test_netns(); + test_protect_kernel_logs(); + + return EXIT_SUCCESS; } From 6168ae5840bf206b1d1f88d5173fb292230f56a8 Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Wed, 13 Nov 2019 16:56:23 -0800 Subject: [PATCH 8/9] units: set ProtectKernelLogs=yes on relevant units We set ProtectKernelLogs=yes on all long running services except for udevd, since it accesses /dev/kmsg, and journald, since it calls syslog and accesses /dev/kmsg. --- units/systemd-coredump@.service.in | 1 + units/systemd-hostnamed.service.in | 1 + units/systemd-journal-gatewayd.service.in | 1 + units/systemd-journal-remote.service.in | 1 + units/systemd-journal-upload.service.in | 1 + units/systemd-localed.service.in | 1 + units/systemd-logind.service.in | 1 + units/systemd-machined.service.in | 1 + units/systemd-networkd.service.in | 1 + units/systemd-portabled.service.in | 1 + units/systemd-resolved.service.in | 1 + units/systemd-timedated.service.in | 1 + units/systemd-timesyncd.service.in | 1 + 13 files changed, 13 insertions(+) diff --git a/units/systemd-coredump@.service.in b/units/systemd-coredump@.service.in index afb2ab9d17..951faa62a1 100644 --- a/units/systemd-coredump@.service.in +++ b/units/systemd-coredump@.service.in @@ -32,6 +32,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict RestrictAddressFamilies=AF_UNIX RestrictNamespaces=yes diff --git a/units/systemd-hostnamed.service.in b/units/systemd-hostnamed.service.in index 1fbbafdd6f..1365d749ca 100644 --- a/units/systemd-hostnamed.service.in +++ b/units/systemd-hostnamed.service.in @@ -27,6 +27,7 @@ ProtectControlGroups=yes ProtectHome=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX diff --git a/units/systemd-journal-gatewayd.service.in b/units/systemd-journal-gatewayd.service.in index 50f774512b..8071395e68 100644 --- a/units/systemd-journal-gatewayd.service.in +++ b/units/systemd-journal-gatewayd.service.in @@ -24,6 +24,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 RestrictNamespaces=yes RestrictRealtime=yes diff --git a/units/systemd-journal-remote.service.in b/units/systemd-journal-remote.service.in index 7f5238802f..6181d15d77 100644 --- a/units/systemd-journal-remote.service.in +++ b/units/systemd-journal-remote.service.in @@ -26,6 +26,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 RestrictNamespaces=yes diff --git a/units/systemd-journal-upload.service.in b/units/systemd-journal-upload.service.in index 33ef3b8dca..2f1cce8518 100644 --- a/units/systemd-journal-upload.service.in +++ b/units/systemd-journal-upload.service.in @@ -24,6 +24,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 RestrictNamespaces=yes RestrictRealtime=yes diff --git a/units/systemd-localed.service.in b/units/systemd-localed.service.in index f9a81fa8dd..10ecff5184 100644 --- a/units/systemd-localed.service.in +++ b/units/systemd-localed.service.in @@ -28,6 +28,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX diff --git a/units/systemd-logind.service.in b/units/systemd-logind.service.in index ef802a4e6f..ccbe631586 100644 --- a/units/systemd-logind.service.in +++ b/units/systemd-logind.service.in @@ -41,6 +41,7 @@ ProtectControlGroups=yes ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes +ProtectKernelLogs=yes ProtectSystem=strict ReadWritePaths=/etc /run Restart=always diff --git a/units/systemd-machined.service.in b/units/systemd-machined.service.in index 3db0281f81..fa344d487d 100644 --- a/units/systemd-machined.service.in +++ b/units/systemd-machined.service.in @@ -24,6 +24,7 @@ LockPersonality=yes MemoryDenyWriteExecute=yes NoNewPrivileges=yes ProtectHostname=yes +ProtectKernelLogs=yes RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 RestrictRealtime=yes SystemCallArchitectures=native diff --git a/units/systemd-networkd.service.in b/units/systemd-networkd.service.in index ed985f64fa..01931665a4 100644 --- a/units/systemd-networkd.service.in +++ b/units/systemd-networkd.service.in @@ -29,6 +29,7 @@ NoNewPrivileges=yes ProtectControlGroups=yes ProtectHome=yes ProtectKernelModules=yes +ProtectKernelLogs=yes ProtectSystem=strict Restart=on-failure RestartSec=0 diff --git a/units/systemd-portabled.service.in b/units/systemd-portabled.service.in index fb79f454fd..3051fbd3d0 100644 --- a/units/systemd-portabled.service.in +++ b/units/systemd-portabled.service.in @@ -18,6 +18,7 @@ BusName=org.freedesktop.portable1 CapabilityBoundingSet=CAP_KILL CAP_SYS_PTRACE CAP_SYS_ADMIN CAP_SETGID CAP_SYS_CHROOT CAP_DAC_READ_SEARCH CAP_DAC_OVERRIDE CAP_CHOWN CAP_FOWNER CAP_FSETID CAP_MKNOD MemoryDenyWriteExecute=yes ProtectHostname=yes +ProtectKernelLogs=yes RestrictRealtime=yes RestrictAddressFamilies=AF_UNIX AF_NETLINK AF_INET AF_INET6 SystemCallFilter=@system-service @mount diff --git a/units/systemd-resolved.service.in b/units/systemd-resolved.service.in index 22cb202363..f73697832c 100644 --- a/units/systemd-resolved.service.in +++ b/units/systemd-resolved.service.in @@ -32,6 +32,7 @@ ProtectControlGroups=yes ProtectHome=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict Restart=always RestartSec=0 diff --git a/units/systemd-timedated.service.in b/units/systemd-timedated.service.in index 819cb4dba2..87859f4aef 100644 --- a/units/systemd-timedated.service.in +++ b/units/systemd-timedated.service.in @@ -27,6 +27,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict ReadWritePaths=/etc RestrictAddressFamilies=AF_UNIX diff --git a/units/systemd-timesyncd.service.in b/units/systemd-timesyncd.service.in index 1a866fcc7a..f0486a70ab 100644 --- a/units/systemd-timesyncd.service.in +++ b/units/systemd-timesyncd.service.in @@ -32,6 +32,7 @@ ProtectHome=yes ProtectHostname=yes ProtectKernelModules=yes ProtectKernelTunables=yes +ProtectKernelLogs=yes ProtectSystem=strict Restart=always RestartSec=0 From 82dce83b19d70e27eb6e50238c3a0a9ac1aa35f9 Mon Sep 17 00:00:00 2001 From: Kevin Kuehler Date: Wed, 13 Nov 2019 17:37:05 -0800 Subject: [PATCH 9/9] systemd-analyze: Add ProtectKernelLogs to security --- src/analyze/analyze-security.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/analyze/analyze-security.c b/src/analyze/analyze-security.c index 18d1ce2895..79b099a409 100644 --- a/src/analyze/analyze-security.c +++ b/src/analyze/analyze-security.c @@ -64,6 +64,7 @@ struct security_info { bool protect_control_groups; bool protect_kernel_modules; bool protect_kernel_tunables; + bool protect_kernel_logs; char *protect_home; char *protect_system; @@ -772,6 +773,16 @@ static const struct security_assessor security_assessor_table[] = { .assess = assess_bool, .offset = offsetof(struct security_info, protect_kernel_tunables), }, + { + .id = "ProtectKernelLogs=", + .description_good = "Service cannot read from or write to the kernel log ring buffer", + .description_bad = "Service may read from or write to the kernel log ring buffer", + .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectKernelLogs=", + .weight = 1000, + .range = 1, + .assess = assess_bool, + .offset = offsetof(struct security_info, protect_kernel_logs), + }, { .id = "ProtectHome=", .url = "https://www.freedesktop.org/software/systemd/man/systemd.exec.html#ProtectHome=", @@ -1906,6 +1917,7 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_ { "ProtectHostname", "b", NULL, offsetof(struct security_info, protect_hostname) }, { "ProtectKernelModules", "b", NULL, offsetof(struct security_info, protect_kernel_modules) }, { "ProtectKernelTunables", "b", NULL, offsetof(struct security_info, protect_kernel_tunables) }, + { "ProtectKernelLogs", "b", NULL, offsetof(struct security_info, protect_kernel_logs) }, { "ProtectSystem", "s", NULL, offsetof(struct security_info, protect_system) }, { "RemoveIPC", "b", NULL, offsetof(struct security_info, remove_ipc) }, { "RestrictAddressFamilies", "(bas)", property_read_restrict_address_families, 0 }, @@ -1980,6 +1992,9 @@ static int acquire_security_info(sd_bus *bus, const char *name, struct security_ if (info->protect_kernel_modules) info->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYS_MODULE); + if (info->protect_kernel_logs) + info->capability_bounding_set &= ~(UINT64_C(1) << CAP_SYSLOG); + if (info->private_devices) info->capability_bounding_set &= ~((UINT64_C(1) << CAP_MKNOD) | (UINT64_C(1) << CAP_SYS_RAWIO));