diff --git a/man/sysctl.d.xml b/man/sysctl.d.xml index 32084ee8d6..4df71e21eb 100644 --- a/man/sysctl.d.xml +++ b/man/sysctl.d.xml @@ -24,6 +24,16 @@ /etc/sysctl.d/*.conf /run/sysctl.d/*.conf /usr/lib/sysctl.d/*.conf + + key.name.under.proc.sys = some value +key/name/under/proc/sys = some value +key/middle.part.with.dots/foo = 123 +key.middle/part/with/dots.foo = 123 +-key.that.will.not.fail = value +key.pattern.*.with.glob = whatever +-key.pattern.excluded.with.glob +key.pattern.overriden.with.glob = custom + @@ -44,34 +54,32 @@ first non-whitespace character is # or ; are ignored. - Note that either / or - . may be used as separators within sysctl - variable names. If the first separator is a slash, remaining - slashes and dots are left intact. If the first separator is a dot, - dots and slashes are interchanged. - kernel.domainname=foo and - kernel/domainname=foo are equivalent and will - cause foo to be written to + Note that either / or . may be used as separators within + sysctl variable names. If the first separator is a slash, remaining slashes and dots are left intact. If + the first separator is a dot, dots and slashes are interchanged. + kernel.domainname=foo and kernel/domainname=foo are equivalent and + will cause foo to be written to /proc/sys/kernel/domainname. Either net.ipv4.conf.enp3s0/200.forwarding or - net/ipv4/conf/enp3s0.200/forwarding may be used - to refer to - /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. - + net/ipv4/conf/enp3s0.200/forwarding may be used to refer to + /proc/sys/net/ipv4/conf/enp3s0.200/forwarding. A glob + glob7 pattern may be + used to write the same value to all matching keys. Keys for which an explicit pattern exists will be + excluded from any glob matching. In addition, a key may be explicitly excluded from being set by any + matching glob patterns by specifying the key name prefixed with a - character and not + followed by =, see SYNOPSIS. - Any access permission errors and attempts to write variables not defined on the local system are - logged, but do not cause the service to fail. Moreover, if a variable assignment is prefixed with a - single - character, failure to set the variable will be logged, but will not cause the - service to fail. All other errors when setting variables cause the service to return failure at the end - (other variables are still processed). + Any access permission errors and attempts to write variables not present on the local system are + logged, but do not cause the service to fail. Debug log level is used, which means that the message will + not show up at all by default. Moreover, if a variable assignment is prefixed with a single + - character, any failure to set the variable will be logged at debug level, but will + not cause the service to fail. All other errors when setting variables are logged with higher priority + and cause the service to return failure at the end (other variables are still processed). - The settings configured with sysctl.d - files will be applied early on boot. The network - interface-specific options will also be applied individually for - each network interface as it shows up in the system. (More - specifically, net.ipv4.conf.*, - net.ipv6.conf.*, - net.ipv4.neigh.* and + The settings configured with sysctl.d files will be applied early on boot. The + network interface-specific options will also be applied individually for each network interface as it + shows up in the system. (More specifically, net.ipv4.conf.*, + net.ipv6.conf.*, net.ipv4.neigh.* and net.ipv6.neigh.*). Many sysctl parameters only become available when certain @@ -148,6 +156,26 @@ net.bridge.bridge-nf-call-arptables = 0 (starting with kernel 3.18), so simply not loading the module is sufficient to avoid filtering. + + + Set network routing properties for all interfaces + /etc/systemd/20-rp_filter.conf: + + net.ipv4.conf.default.rp_filter = 2 +net.ipv4.conf.*.rp_filter = 2 +-net.ipv4.conf.all.rp_filter +net.ipv4.conf.hub0.rp_filter = 1 + + + The key will be set to "2" for all interfaces, except "hub0". We set + net.ipv4.conf.default.rp_filter first, so any interfaces which are added + later will get this value (this also covers any interfaces detected while we're + running). The glob matches any interfaces which were detected earlier. The glob + will also match net.ipv4.conf.all.rp_filter, which we don't want to set at all, so + it is explicitly excluded. And "hub0" is excluded from the glob because it has an explicit setting. + + + diff --git a/src/shared/sysctl-util.c b/src/shared/sysctl-util.c index 12fb3ef7ea..8543dbd2d0 100644 --- a/src/shared/sysctl-util.c +++ b/src/shared/sysctl-util.c @@ -9,6 +9,7 @@ #include "fileio.h" #include "log.h" #include "macro.h" +#include "path-util.h" #include "string-util.h" #include "sysctl-util.h" @@ -16,22 +17,27 @@ char *sysctl_normalize(char *s) { char *n; n = strpbrk(s, "/."); + /* If the first separator is a slash, the path is * assumed to be normalized and slashes remain slashes * and dots remains dots. */ - if (!n || *n == '/') - return s; - /* Otherwise, dots become slashes and slashes become - * dots. Fun. */ - while (n) { - if (*n == '.') - *n = '/'; - else - *n = '.'; + if (n && *n == '.') + /* Dots become slashes and slashes become dots. Fun. */ + do { + if (*n == '.') + *n = '/'; + else + *n = '.'; - n = strpbrk(n + 1, "/."); - } + n = strpbrk(n + 1, "/."); + } while (n); + + path_simplify(s, true); + + /* Kill the leading slash, but keep the first character of the string in the same place. */ + if (*s == '/' && *(s+1)) + memmove(s, s+1, strlen(s)); return s; } diff --git a/src/shared/sysctl-util.h b/src/shared/sysctl-util.h index cb30a97752..dce0151519 100644 --- a/src/shared/sysctl-util.h +++ b/src/shared/sysctl-util.h @@ -6,7 +6,7 @@ #include "macro.h" #include "stdio-util.h" -#include "util.h" +#include "string-util.h" char *sysctl_normalize(char *s); int sysctl_read(const char *property, char **value); diff --git a/src/sysctl/sysctl.c b/src/sysctl/sysctl.c index afef0a222b..bbcf0c4323 100644 --- a/src/sysctl/sysctl.c +++ b/src/sysctl/sysctl.c @@ -14,6 +14,7 @@ #include "errno-util.h" #include "fd-util.h" #include "fileio.h" +#include "glob-util.h" #include "hashmap.h" #include "log.h" #include "main-func.h" @@ -49,63 +50,6 @@ static Option *option_free(Option *o) { DEFINE_TRIVIAL_CLEANUP_FUNC(Option*, option_free); DEFINE_HASH_OPS_WITH_VALUE_DESTRUCTOR(option_hash_ops, char, string_hash_func, string_compare_func, Option, option_free); -static Option *option_new( - const char *key, - const char *value, - bool ignore_failure) { - - _cleanup_(option_freep) Option *o = NULL; - - assert(key); - assert(value); - - o = new(Option, 1); - if (!o) - return NULL; - - *o = (Option) { - .key = strdup(key), - .value = strdup(value), - .ignore_failure = ignore_failure, - }; - - if (!o->key || !o->value) - return NULL; - - return TAKE_PTR(o); -} - -static int apply_all(OrderedHashmap *sysctl_options) { - Option *option; - Iterator i; - int r = 0; - - ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { - int k; - - k = sysctl_write(option->key, option->value); - if (k < 0) { - /* If the sysctl is not available in the kernel or we are running with reduced - * privileges and cannot write it, then log about the issue, and proceed without - * failing. (EROFS is treated as a permission problem here, since that's how - * container managers usually protected their sysctls.) In all other cases log an - * error and make the tool fail. */ - - if (option->ignore_failure || k == -EROFS || ERRNO_IS_PRIVILEGE(k)) - log_debug_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); - else if (k == -ENOENT) - log_info_errno(k, "Couldn't write '%s' to '%s', ignoring: %m", option->value, option->key); - else { - log_error_errno(k, "Couldn't write '%s' to '%s': %m", option->value, option->key); - if (r == 0) - r = k; - } - } - } - - return r; -} - static bool test_prefix(const char *p) { char **i; @@ -118,6 +62,7 @@ static bool test_prefix(const char *p) { t = path_startswith(*i, "/proc/sys/"); if (!t) t = *i; + if (path_startswith(p, t)) return true; } @@ -125,6 +70,117 @@ static bool test_prefix(const char *p) { return false; } +static Option *option_new( + const char *key, + const char *value, + bool ignore_failure) { + + _cleanup_(option_freep) Option *o = NULL; + + assert(key); + + o = new(Option, 1); + if (!o) + return NULL; + + *o = (Option) { + .key = strdup(key), + .value = value ? strdup(value) : NULL, + .ignore_failure = ignore_failure, + }; + + if (!o->key) + return NULL; + if (value && !o->value) + return NULL; + + return TAKE_PTR(o); +} + +static int sysctl_write_or_warn(const char *key, const char *value, bool ignore_failure) { + int r; + + r = sysctl_write(key, value); + if (r < 0) { + /* If the sysctl is not available in the kernel or we are running with reduced privileges and + * cannot write it, then log about the issue, and proceed without failing. (EROFS is treated + * as a permission problem here, since that's how container managers usually protected their + * sysctls.) In all other cases log an error and make the tool fail. */ + if (ignore_failure || r == -EROFS || ERRNO_IS_PRIVILEGE(r)) + log_debug_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key); + else if (r == -ENOENT) + log_info_errno(r, "Couldn't write '%s' to '%s', ignoring: %m", value, key); + else + return log_error_errno(r, "Couldn't write '%s' to '%s': %m", value, key); + } + + return 0; +} + +static int apply_all(OrderedHashmap *sysctl_options) { + Option *option; + Iterator i; + int r = 0; + + ORDERED_HASHMAP_FOREACH(option, sysctl_options, i) { + int k; + + /* Ignore "negative match" options, they are there only to exclude stuff from globs. */ + if (!option->value) + continue; + + if (string_is_glob(option->key)) { + _cleanup_strv_free_ char **paths = NULL; + _cleanup_free_ char *pattern = NULL; + char **s; + + pattern = path_join("/proc/sys", option->key); + if (!pattern) + return log_oom(); + + k = glob_extend(&paths, pattern); + if (k < 0) { + if (option->ignore_failure || ERRNO_IS_PRIVILEGE(r)) + log_debug_errno(k, "Failed to resolve glob '%s', ignoring: %m", + option->key); + else { + log_error_errno(k, "Couldn't resolve glob '%s': %m", + option->key); + if (r == 0) + r = k; + } + + } else if (strv_isempty(paths)) + log_debug("No match for glob: %s", option->key); + + STRV_FOREACH(s, paths) { + const char *key; + + assert_se(key = path_startswith(*s, "/proc/sys")); + + if (!test_prefix(key)) + continue; + + if (ordered_hashmap_contains(sysctl_options, key)) { + log_info("Not setting %s (explicit setting exists).", key); + continue; + } + + k = sysctl_write_or_warn(key, option->value, option->ignore_failure); + if (r == 0) + r = k; + } + + } else { + k = sysctl_write_or_warn(option->key, option->value, option->ignore_failure); + if (r == 0) + r = k; + } + } + + return r; +} + static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ignore_enoent) { _cleanup_fclose_ FILE *f = NULL; unsigned c = 0; @@ -144,7 +200,7 @@ static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ig for (;;) { _cleanup_(option_freep) Option *new_option = NULL; _cleanup_free_ char *l = NULL; - bool ignore_failure; + bool ignore_failure = false; Option *existing; char *p, *value; int k; @@ -165,25 +221,35 @@ static int parse_file(OrderedHashmap **sysctl_options, const char *path, bool ig continue; value = strchr(p, '='); - if (!value) { - log_syntax(NULL, LOG_WARNING, path, c, 0, "Line is not an assignment, ignoring: %s", p); - if (r == 0) - r = -EINVAL; - continue; + if (value) { + if (p[0] == '-') { + ignore_failure = true; + p++; + } + + *value = 0; + value++; + value = strstrip(value); + + } else { + if (p[0] == '-') + /* We have a "negative match" option. Let's continue with value==NULL. */ + p++; + else { + log_syntax(NULL, LOG_WARNING, path, c, 0, + "Line is not an assignment, ignoring: %s", p); + if (r == 0) + r = -EINVAL; + continue; + } } - *value = 0; - value++; - p = strstrip(p); - ignore_failure = p[0] == '-'; - if (ignore_failure) - p++; - p = sysctl_normalize(p); - value = strstrip(value); - if (!test_prefix(p)) + /* We can't filter out globs at this point, we'll need to do that later. */ + if (!string_is_glob(p) && + !test_prefix(p)) continue; if (ordered_hashmap_ensure_allocated(sysctl_options, &option_hash_ops) < 0) diff --git a/src/test/meson.build b/src/test/meson.build index 5df1366561..b79b197c16 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -325,6 +325,10 @@ tests += [ [], []], + [['src/test/test-sysctl-util.c'], + [], + []], + [['src/test/test-user-util.c'], [], []], diff --git a/src/test/test-sysctl-util.c b/src/test/test-sysctl-util.c new file mode 100644 index 0000000000..2b957dd4d6 --- /dev/null +++ b/src/test/test-sysctl-util.c @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "strv.h" +#include "sysctl-util.h" +#include "tests.h" + +static const char* cases[] = { + "a.b.c", "a/b/c", + "a/b/c", "a/b/c", + "a/b.c/d", "a/b.c/d", + "a.b/c.d", "a/b.c/d", + + "net.ipv4.conf.enp3s0/200.forwarding", "net/ipv4/conf/enp3s0.200/forwarding", + "net/ipv4/conf/enp3s0.200/forwarding", "net/ipv4/conf/enp3s0.200/forwarding", + + "a...b...c", "a/b/c", + "a///b///c", "a/b/c", + ".a...b...c", "a/b/c", + "/a///b///c", "a/b/c", + NULL, +}; + +static void test_sysctl_normalize(void) { + log_info("/* %s */", __func__); + + const char **s, **expected; + STRV_FOREACH_PAIR(s, expected, cases) { + _cleanup_free_ char *t; + + assert_se(t = strdup(*s)); + assert_se(sysctl_normalize(t) == t); + + log_info("\"%s\" → \"%s\", expected \"%s\"", *s, t, *expected); + assert_se(streq(t, *expected)); + } +} + +int main(int argc, char *argv[]) { + test_setup_logging(LOG_INFO); + + test_sysctl_normalize(); + + return 0; +} diff --git a/sysctl.d/50-default.conf b/sysctl.d/50-default.conf index 41bd1f9183..14378b24af 100644 --- a/sysctl.d/50-default.conf +++ b/sysctl.d/50-default.conf @@ -22,13 +22,19 @@ kernel.sysrq = 16 kernel.core_uses_pid = 1 # Source route verification -net.ipv4.conf.all.rp_filter = 2 +net.ipv4.conf.default.rp_filter = 2 +net.ipv4.conf.*.rp_filter = 2 +-net.ipv4.conf.all.rp_filter # Do not accept source routing -net.ipv4.conf.all.accept_source_route = 0 +net.ipv4.conf.default.accept_source_route = 0 +net.ipv4.conf.*.accept_source_route = 0 +-net.ipv4.conf.all.accept_source_route # Promote secondary addresses when the primary address is removed -net.ipv4.conf.all.promote_secondaries = 1 +net.ipv4.conf.default.promote_secondaries = 1 +net.ipv4.conf.*.promote_secondaries = 1 +-net.ipv4.conf.all.promote_secondaries # ping(8) without CAP_NET_ADMIN and CAP_NET_RAW # The upper limit is set to 2^31-1. Values greater than that get rejected by