From c16460cf781c8fe1a122cb83ad5427a014a236eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 16 Jan 2020 15:51:44 +0100 Subject: [PATCH 1/7] shared/sysctl-util: add missing header one_zero() is used later in the header... --- src/shared/sysctl-util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h index cb30a97752..dce0151519 100644 --- a/src/shared/sysctl-util.h +++ b/src/shared/sysctl-util.h @@ -6,7 +6,7 @@ #include "macro.h" #include "stdio-util.h" -#include "util.h" +#include "string-util.h" char *sysctl_normalize(char *s); int sysctl_read(const char *property, char **value); From f3b136a4847a0993e2dc1197779160dca4da6dac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 16 Jan 2020 15:53:57 +0100 Subject: [PATCH 2/7] shared/sysctl-util: normalize repeated slashes or dots to a single value We use those strings as hash keys. While writing "a...b" looks strange, "a///b" does not look so strange. Both syntaxes would actually result in the value being correctly written to the file, but they would confuse our de-deplication over keys. So let's normalize. Output also becomes nicer. Add test. --- src/shared/sysctl-util.c | 28 +++++++++++++---------- src/test/meson.build | 4 ++++ src/test/test-sysctl-util.c | 44 +++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 11 deletions(-) create mode 100644 src/test/test-sysctl-util.c diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c index 12fb3ef7ea..8543dbd2d0 100644 --- a/src/shared/sysctl-util.c +++ b/src/shared/sysctl-util.c @@ -9,6 +9,7 @@ #include "fileio.h" #include "log.h" #include "macro.h" +#include "path-util.h" #include "string-util.h" #include "sysctl-util.h" @@ -16,22 +17,27 @@ char *sysctl_normalize(char *s) { char *n; n = strpbrk(s, "/."); + /* If the first separator is a slash, the path is * assumed to be normalized and slashes remain slashes * and dots remains dots. */ - if (!n || *n == '/') - return s; - /* Otherwise, dots become slashes and slashes become - * dots. Fun. */ - while (n) { - if (*n == '.') - *n = '/'; - else - *n = '.'; + if (n && *n == '.') + /* Dots become slashes and slashes become dots. Fun. */ + do { + if (*n == '.') + *n = '/'; + else + *n = '.'; - n = strpbrk(n + 1, "/."); - } + n = strpbrk(n + 1, "/."); + } while (n); + + path_simplify(s, true); + + /* Kill the leading slash, but keep the first character of the string in the same place. */ + if (*s == '/' && *(s+1)) + memmove(s, s+1, strlen(s)); return s; } diff --git a/src/test/meson.build b/src/test/meson.build index 5df1366561..b79b197c16 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -325,6 +325,10 @@ tests += [ [], []], + [['src/test/test-sysctl-util.c'], + [], + []], + [['src/test/test-user-util.c'], [], []], diff --git a/src/test/test-sysctl-util.c b/src/test/test-sysctl-util.c new file mode 100644 index 0000000000..2b957dd4d6 --- /dev/null +++ b/src/test/test-sysctl-util.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "strv.h" +#include "sysctl-util.h" +#include "tests.h" + +static const char* cases[] = { + "a.b.c", "a/b/c", + "a/b/c", "a/b/c", + "a/b.c/d", "a/b.c/d", + "a.b/c.d", "a/b.c/d", + + "net.ipv4.conf.enp3s0/200.forwarding", "net/ipv4/conf/enp3s0.200/forwarding", + "net/ipv4/conf/enp3s0.200/forwarding", "net/ipv4/conf/enp3s0.200/forwarding", + + "a...b...c", "a/b/c", + "a///b///c", "a/b/c", + ".a...b...c", "a/b/c", + "/a///b///c", "a/b/c", + NULL, +}; + +static void test_sysctl_normalize(void) { + log_info("/* %s */", __func__); + + const char **s, **expected; + STRV_FOREACH_PAIR(s, expected, cases) { + _cleanup_free_ char *t; + + assert_se(t = strdup(*s)); + assert_se(sysctl_normalize(t) == t); + + log_info("\"%s\" → \"%s\", expected \"%s\"", *s, t, *expected); + assert_se(streq(t, *expected)); + } +} + +int main(int argc, char *argv[]) { + test_setup_logging(LOG_INFO); + + test_sysctl_normalize(); + + return 0; +} From fa2111bd3ed2fc436a392a8ce9c3fe58bc2ba527 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 16 Jan 2020 19:39:18 +0100 Subject: [PATCH 3/7] man: document logging downgrade in systemctl Fixup for 32458cc968. --- man/sysctl.d.xml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 32084ee8d6..71a236b50b 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -59,11 +59,12 @@ /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. - Any access permission errors and attempts to write variables not defined on the local system are - logged, but do not cause the service to fail. Moreover, if a variable assignment is prefixed with a - single - character, failure to set the variable will be logged, but will not cause the - service to fail. All other errors when setting variables cause the service to return failure at the end - (other variables are still processed). + Any access permission errors and attempts to write variables not present on the local system are + logged, but do not cause the service to fail. Debug log level is used, which means that the message will + not show up at all by default. Moreover, if a variable assignment is prefixed with a single + - character, any failure to set the variable will be logged at debug level, but will + not cause the service to fail. All other errors when setting variables are logged with higher priority + and cause the service to return failure at the end (other variables are still processed). The settings configured with sysctl.d files will be applied early on boot. The network From def94437934bc83355528e6ca1e75e706d90118e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 16 Jan 2020 19:17:47 +0100 Subject: [PATCH 4/7] Revert "sysctl: always write net.ipv4.conf.all.xyz= in addition to net.ipv4.conf.default.xyz=" This reverts commits 1836bf9e1d70240c8079e4db4312309f4f1f91fd and 9fefb9e3cdebcefa681672423d23ccc72ae6c165. The race is reintroduced, and will be fixed later. --- sysctl.d/50-default.conf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index 41bd1f9183..c22d690de4 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -22,13 +22,13 @@ kernel.sysrq = 16 kernel.core_uses_pid = 1 # Source route verification -net.ipv4.conf.all.rp_filter = 2 +net.ipv4.conf.default.rp_filter = 2 # Do not accept source routing -net.ipv4.conf.all.accept_source_route = 0 +net.ipv4.conf.default.accept_source_route = 0 # Promote secondary addresses when the primary address is removed -net.ipv4.conf.all.promote_secondaries = 1 +net.ipv4.conf.default.promote_secondaries = 1 # ping(8) without CAP_NET_ADMIN and CAP_NET_RAW # The upper limit is set to 2^31-1. Values greater than that get rejected by From 02d89f9a623afcd8ce8333d5936c37c10b131988 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Jan 2020 10:16:48 +0100 Subject: [PATCH 5/7] man: add syntax quickhelp to sysctl.d(5) --- man/sysctl.d.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 71a236b50b..9f3303a630 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -24,6 +24,13 @@ /etc/sysctl.d/*.conf /run/sysctl.d/*.conf /usr/lib/sysctl.d/*.conf + + key.name.under.proc.sys = some value +key/name/under/proc/sys = some value +key/middle.part.with.dots/foo = 123 +key.middle/part/with/dots.foo = 123 +-key.that.will.not.fail = value + From e0f424790d3dbde136a29a7fa4c2777c2e3fd695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 16 Jan 2020 19:38:21 +0100 Subject: [PATCH 6/7] sysctl: add glob syntax to sysctl.d files This is intended for net.*.conf.*.foo files. Setting just "default" is not very useful because any interfaces present before systemd-sysctl is invoked are not affected. Setting "all" is too harsh, because the kernel takes the stronger of the device-specific setting and the "all" value, so effectively having a weaker setting for specific interfaces is not possible. Let's add a way in which can set "default" first and then all the others without "all". --- man/sysctl.d.xml | 58 ++++++++---- src/sysctl/sysctl.c | 210 +++++++++++++++++++++++++++++--------------- 2 files changed, 177 insertions(+), 91 deletions(-) diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 9f3303a630..4df71e21eb 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -30,6 +30,9 @@ key/name/under/proc/sys = some value key/middle.part.with.dots/foo = 123 key.middle/part/with/dots.foo = 123 -key.that.will.not.fail = value +key.pattern.*.with.glob = whatever +-key.pattern.excluded.with.glob +key.pattern.overriden.with.glob = custom @@ -51,20 +54,20 @@ key.middle/part/with/dots.foo = 123 first non-whitespace character is # or ; are ignored. - Note that either / or - . may be used as separators within sysctl - variable names. If the first separator is a slash, remaining - slashes and dots are left intact. If the first separator is a dot, - dots and slashes are interchanged. - kernel.domainname=foo and - kernel/domainname=foo are equivalent and will - cause foo to be written to + Note that either / or . may be used as separators within + sysctl variable names. If the first separator is a slash, remaining slashes and dots are left intact. If + the first separator is a dot, dots and slashes are interchanged. + kernel.domainname=foo and kernel/domainname=foo are equivalent and + will cause foo to be written to /proc/sys/kernel/domainname. Either net.ipv4.conf.enp3s0/200.forwarding or - net/ipv4/conf/enp3s0.200/forwarding may be used - to refer to - /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. - + net/ipv4/conf/enp3s0.200/forwarding may be used to refer to + /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. A glob + glob7 pattern may be + used to write the same value to all matching keys. Keys for which an explicit pattern exists will be + excluded from any glob matching. In addition, a key may be explicitly excluded from being set by any + matching glob patterns by specifying the key name prefixed with a - character and not + followed by =, see SYNOPSIS. Any access permission errors and attempts to write variables not present on the local system are logged, but do not cause the service to fail. Debug log level is used, which means that the message will @@ -73,13 +76,10 @@ key.middle/part/with/dots.foo = 123 not cause the service to fail. All other errors when setting variables are logged with higher priority and cause the service to return failure at the end (other variables are still processed). - The settings configured with sysctl.d - files will be applied early on boot. The network - interface-specific options will also be applied individually for - each network interface as it shows up in the system. (More - specifically, net.ipv4.conf.*, - net.ipv6.conf.*, - net.ipv4.neigh.* and + The settings configured with sysctl.d files will be applied early on boot. The + network interface-specific options will also be applied individually for each network interface as it + shows up in the system. (More specifically, net.ipv4.conf.*, + net.ipv6.conf.*, net.ipv4.neigh.* and net.ipv6.neigh.*). Many sysctl parameters only become available when certain @@ -156,6 +156,26 @@ net.bridge.bridge-nf-call-arptables = 0 (starting with kernel 3.18), so simply not loading the module is sufficient to avoid filtering. + + + Set network routing properties for all interfaces + /etc/systemd/20-rp_filter.conf: + + net.ipv4.conf.default.rp_filter = 2 +net.ipv4.conf.*.rp_filter = 2 +-net.ipv4.conf.all.rp_filter +net.ipv4.conf.hub0.rp_filter = 1 + + + The key will be set to "2" for all interfaces, except "hub0". We set + net.ipv4.conf.default.rp_filter first, so any interfaces which are added + later will get this value (this also covers any interfaces detected while we're + running). The glob matches any interfaces which were detected earlier. The glob + will also match net.ipv4.conf.all.rp_filter, which we don't want to set at all, so + it is explicitly excluded. And "hub0" is excluded from the glob because it has an explicit setting. + + + diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index afef0a222b..bbcf0c4323 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -14,6 +14,7 @@ #include "errno-util.h" #include "fd-util.h" #include "fileio.h" +#include "glob-util.h" #include "hashmap.h" #include "log.h" #include "main-func.h" @@ -49,63 +50,6 @@ static Option *option_free(Option *o) { DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free); DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free); -static Option *option_new( - const char *key, - const char *value, - bool ignore_failure) { - - _cleanup_(option_freep) Option *o = NULL; - - assert(key); - assert(value); - - o = new(Option, 1); - if (!o) - return NULL; - - *o = (Option) { - .key = strdup(key), - .value = strdup(value), - .ignore_failure = ignore_failure, - }; - - if (!o->key || !o->value) - return NULL; - - return TAKE_PTR(o); -} - -static int apply_all(OrderedHashmap *sysctl_options) { - Option *option; - Iterator i; - int r = 0; - - ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { - int k; - - k = sysctl_write(option->key, option->value); - if (k < 0) { - /* If the sysctl is not available in the kernel or we are running with reduced - * privileges and cannot write it, then log about the issue, and proceed without - * failing. (EROFS is treated as a permission problem here, since that's how - * container managers usually protected their sysctls.) In all other cases log an - * error and make the tool fail. */ - - if (option->ignore_failure || k == -EROFS || ERRNO_IS_PRIVILEGE(k)) - log_debug_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); - else if (k == -ENOENT) - log_info_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); - else { - log_error_errno(k, "Couldn't write '%s' to '%s': %m", option->value, option->key); - if (r == 0) - r = k; - } - } - } - - return r; -} - static bool test_prefix(const char *p) { char **i; @@ -118,6 +62,7 @@ static bool test_prefix(const char *p) { t = path_startswith(*i, "/proc/sys/"); if (!t) t = *i; + if (path_startswith(p, t)) return true; } @@ -125,6 +70,117 @@ static bool test_prefix(const char *p) { return false; } +static Option *option_new( + const char *key, + const char *value, + bool ignore_failure) { + + _cleanup_(option_freep) Option *o = NULL; + + assert(key); + + o = new(Option, 1); + if (!o) + return NULL; + + *o = (Option) { + .key = strdup(key), + .value = value ? strdup(value) : NULL, + .ignore_failure = ignore_failure, + }; + + if (!o->key) + return NULL; + if (value && !o->value) + return NULL; + + return TAKE_PTR(o); +} + +static int sysctl_write_or_warn(const char *key, const char *value, bool ignore_failure) { + int r; + + r = sysctl_write(key, value); + if (r < 0) { + /* If the sysctl is not available in the kernel or we are running with reduced privileges and + * cannot write it, then log about the issue, and proceed without failing. (EROFS is treated + * as a permission problem here, since that's how container managers usually protected their + * sysctls.) In all other cases log an error and make the tool fail. */ + if (ignore_failure || r == -EROFS || ERRNO_IS_PRIVILEGE(r)) + log_debug_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key); + else if (r == -ENOENT) + log_info_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key); + else + return log_error_errno(r, "Couldn't write '%s' to '%s': %m", value, key); + } + + return 0; +} + +static int apply_all(OrderedHashmap *sysctl_options) { + Option *option; + Iterator i; + int r = 0; + + ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { + int k; + + /* Ignore "negative match" options, they are there only to exclude stuff from globs. */ + if (!option->value) + continue; + + if (string_is_glob(option->key)) { + _cleanup_strv_free_ char **paths = NULL; + _cleanup_free_ char *pattern = NULL; + char **s; + + pattern = path_join("/proc/sys", option->key); + if (!pattern) + return log_oom(); + + k = glob_extend(&paths, pattern); + if (k < 0) { + if (option->ignore_failure || ERRNO_IS_PRIVILEGE(r)) + log_debug_errno(k, "Failed to resolve glob '%s', ignoring: %m", + option->key); + else { + log_error_errno(k, "Couldn't resolve glob '%s': %m", + option->key); + if (r == 0) + r = k; + } + + } else if (strv_isempty(paths)) + log_debug("No match for glob: %s", option->key); + + STRV_FOREACH(s, paths) { + const char *key; + + assert_se(key = path_startswith(*s, "/proc/sys")); + + if (!test_prefix(key)) + continue; + + if (ordered_hashmap_contains(sysctl_options, key)) { + log_info("Not setting %s (explicit setting exists).", key); + continue; + } + + k = sysctl_write_or_warn(key, option->value, option->ignore_failure); + if (r == 0) + r = k; + } + + } else { + k = sysctl_write_or_warn(option->key, option->value, option->ignore_failure); + if (r == 0) + r = k; + } + } + + return r; +} + static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ignore_enoent) { _cleanup_fclose_ FILE *f = NULL; unsigned c = 0; @@ -144,7 +200,7 @@ static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ig for (;;) { _cleanup_(option_freep) Option *new_option = NULL; _cleanup_free_ char *l = NULL; - bool ignore_failure; + bool ignore_failure = false; Option *existing; char *p, *value; int k; @@ -165,25 +221,35 @@ static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ig continue; value = strchr(p, '='); - if (!value) { - log_syntax(NULL, LOG_WARNING, path, c, 0, "Line is not an assignment, ignoring: %s", p); - if (r == 0) - r = -EINVAL; - continue; + if (value) { + if (p[0] == '-') { + ignore_failure = true; + p++; + } + + *value = 0; + value++; + value = strstrip(value); + + } else { + if (p[0] == '-') + /* We have a "negative match" option. Let's continue with value==NULL. */ + p++; + else { + log_syntax(NULL, LOG_WARNING, path, c, 0, + "Line is not an assignment, ignoring: %s", p); + if (r == 0) + r = -EINVAL; + continue; + } } - *value = 0; - value++; - p = strstrip(p); - ignore_failure = p[0] == '-'; - if (ignore_failure) - p++; - p = sysctl_normalize(p); - value = strstrip(value); - if (!test_prefix(p)) + /* We can't filter out globs at this point, we'll need to do that later. */ + if (!string_is_glob(p) && + !test_prefix(p)) continue; if (ordered_hashmap_ensure_allocated(sysctl_options, &option_hash_ops) < 0) From 5d4fc0e665a3639f92ac880896c56f9533441307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 30 Jan 2020 10:41:31 +0100 Subject: [PATCH 7/7] sysctl: set ipv4 settings in a race-free way Fixes #6282. This solution is a bit busy, but we close the race without setting *.all.*, so it is still possible to set a different setting for particular interfaces. Setting just "default" is not very useful because any interfaces present before systemd-sysctl is invoked are not affected. Setting "all" is too harsh, because the kernel takes the stronger of the device-specific setting and the "all" value, so effectively having a weaker setting for specific interfaces is not possible. --- sysctl.d/50-default.conf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index c22d690de4..14378b24af 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -23,12 +23,18 @@ kernel.core_uses_pid = 1 # Source route verification net.ipv4.conf.default.rp_filter = 2 +net.ipv4.conf.*.rp_filter = 2 +-net.ipv4.conf.all.rp_filter # Do not accept source routing net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.conf.*.accept_source_route = 0 +-net.ipv4.conf.all.accept_source_route # Promote secondary addresses when the primary address is removed net.ipv4.conf.default.promote_secondaries = 1 +net.ipv4.conf.*.promote_secondaries = 1 +-net.ipv4.conf.all.promote_secondaries # ping(8) without CAP_NET_ADMIN and CAP_NET_RAW # The upper limit is set to 2^31-1. Values greater than that get rejected by