manager: add socket for receiving handoff timestamps from forked children

This adds an AF_UNIX socket pair to the manager that we can collect
handoff timestamp messages on.

The idea is that forked off children send a datagram with a timestamp
and we use its sender PID to match it against the right forked off
process.

This part only implements the receiving side: a socket is created, and
listened on. Received datagrams are parsed, verified and then dispatched
to the interested units.
This commit is contained in:
Lennart Poettering
2024-04-23 23:17:49 +02:00
parent 33fc8b0d95
commit 817062e621
4 changed files with 151 additions and 17 deletions

View File

@@ -148,6 +148,12 @@ int manager_serialize(
return r;
}
if (m->handoff_timestamp_fds[0] >= 0) {
r = serialize_fd_many(f, fds, "handoff-timestamp-fds", m->handoff_timestamp_fds, 2);
if (r < 0)
return r;
}
(void) serialize_ratelimit(f, "dump-ratelimit", &m->dump_ratelimit);
(void) serialize_ratelimit(f, "reload-reexec-ratelimit", &m->reload_reexec_ratelimit);

View File

@@ -56,6 +56,7 @@
#include "inotify-util.h"
#include "install.h"
#include "io-util.h"
#include "iovec-util.h"
#include "label-util.h"
#include "load-fragment.h"
#include "locale-setup.h"
@@ -124,6 +125,7 @@ static int manager_dispatch_signal_fd(sd_event_source *source, int fd, uint32_t
static int manager_dispatch_time_change_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_idle_pipe_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata);
static int manager_dispatch_jobs_in_progress(sd_event_source *source, usec_t usec, void *userdata);
static int manager_dispatch_run_queue(sd_event_source *source, void *userdata);
static int manager_dispatch_sigchld(sd_event_source *source, void *userdata);
@@ -904,6 +906,7 @@ int manager_new(RuntimeScope runtime_scope, ManagerTestRunFlags test_run_flags,
.cgroups_agent_fd = -EBADF,
.signal_fd = -EBADF,
.user_lookup_fds = EBADF_PAIR,
.handoff_timestamp_fds = EBADF_PAIR,
.private_listen_fd = -EBADF,
.dev_autofs_fd = -EBADF,
.cgroup_inotify_fd = -EBADF,
@@ -1253,6 +1256,49 @@ static int manager_setup_user_lookup_fd(Manager *m) {
return 0;
}
static int manager_setup_handoff_timestamp_fd(Manager *m) {
int r;
assert(m);
/* Set up the socket pair used for for passing timestamps back when the executor processes we fork
* off invokes execve(), i.e. when we hand off control to our payload processes. */
if (m->handoff_timestamp_fds[0] < 0) {
m->handoff_timestamp_event_source = sd_event_source_disable_unref(m->handoff_timestamp_event_source);
safe_close_pair(m->handoff_timestamp_fds);
if (socketpair(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0, m->handoff_timestamp_fds) < 0)
return log_error_errno(errno, "Failed to allocate handoff timestamp socket: %m");
/* Make sure children never have to block */
(void) fd_increase_rxbuf(m->handoff_timestamp_fds[0], NOTIFY_RCVBUF_SIZE);
r = setsockopt_int(m->handoff_timestamp_fds[0], SOL_SOCKET, SO_PASSCRED, true);
if (r < 0)
return log_error_errno(r, "SO_PASSCRED failed: %m");
/* Mark the receiving socket as O_NONBLOCK (but leave sending side as-is) */
r = fd_nonblock(m->handoff_timestamp_fds[0], true);
if (r < 0)
return log_error_errno(r, "Failed to make handoff timestamp socket O_NONBLOCK: %m");
}
if (!m->handoff_timestamp_event_source) {
r = sd_event_add_io(m->event, &m->handoff_timestamp_event_source, m->handoff_timestamp_fds[0], EPOLLIN, manager_dispatch_handoff_timestamp_fd, m);
if (r < 0)
return log_error_errno(errno, "Failed to allocate handoff timestamp event source: %m");
r = sd_event_source_set_priority(m->handoff_timestamp_event_source, EVENT_PRIORITY_HANDOFF_TIMESTAMP);
if (r < 0)
return log_error_errno(errno, "Failed to set priority of handoff timestamp event source: %m");
(void) sd_event_source_set_description(m->handoff_timestamp_event_source, "handoff-timestamp");
}
return 0;
}
static unsigned manager_dispatch_cleanup_queue(Manager *m) {
Unit *u;
unsigned n = 0;
@@ -1667,12 +1713,14 @@ Manager* manager_free(Manager *m) {
sd_event_source_unref(m->jobs_in_progress_event_source);
sd_event_source_unref(m->run_queue_event_source);
sd_event_source_unref(m->user_lookup_event_source);
sd_event_source_unref(m->handoff_timestamp_event_source);
sd_event_source_unref(m->memory_pressure_event_source);
safe_close(m->signal_fd);
safe_close(m->notify_fd);
safe_close(m->cgroups_agent_fd);
safe_close_pair(m->user_lookup_fds);
safe_close_pair(m->handoff_timestamp_fds);
manager_close_ask_password(m);
@@ -2013,6 +2061,11 @@ int manager_startup(Manager *m, FILE *serialization, FDSet *fds, const char *roo
/* This shouldn't fail, except if things are really broken. */
return r;
r = manager_setup_handoff_timestamp_fd(m);
if (r < 0)
/* This shouldn't fail, except if things are really broken. */
return r;
/* Connect to the bus if we are good for it */
manager_setup_bus(m);
@@ -3655,6 +3708,7 @@ int manager_reload(Manager *m) {
(void) manager_setup_notify(m);
(void) manager_setup_cgroups_agent(m);
(void) manager_setup_user_lookup_fd(m);
(void) manager_setup_handoff_timestamp_fd(m);
/* Third, fire things up! */
manager_coldplug(m);
@@ -4830,6 +4884,73 @@ static int manager_dispatch_user_lookup_fd(sd_event_source *source, int fd, uint
return 0;
}
static int manager_dispatch_handoff_timestamp_fd(sd_event_source *source, int fd, uint32_t revents, void *userdata) {
Manager *m = ASSERT_PTR(userdata);
usec_t ts[2] = {};
CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(struct ucred))) control;
struct msghdr msghdr = {
.msg_iov = &IOVEC_MAKE(ts, sizeof(ts)),
.msg_iovlen = 1,
.msg_control = &control,
.msg_controllen = sizeof(control),
};
ssize_t n;
assert(source);
n = recvmsg_safe(m->handoff_timestamp_fds[0], &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC|MSG_TRUNC);
if (ERRNO_IS_NEG_TRANSIENT(n))
return 0; /* Spurious wakeup, try again */
if (n == -EXFULL) {
log_warning("Got message with truncated control, ignoring.");
return 0;
}
if (n < 0)
return log_error_errno(n, "Failed to receive handoff timestamp message: %m");
if (msghdr.msg_flags & MSG_TRUNC) {
log_warning("Got truncated handoff timestamp message, ignoring.");
return 0;
}
if (n != sizeof(ts)) {
log_warning("Got handoff timestamp message of unexpected size %zi (expected %zu), ignoring.", n, sizeof(ts));
return 0;
}
struct ucred *ucred = CMSG_FIND_DATA(&msghdr, SOL_SOCKET, SCM_CREDENTIALS, struct ucred);
if (!ucred || !pid_is_valid(ucred->pid)) {
log_warning("Received notify message without valid credentials. Ignoring.");
return 0;
}
log_debug("Got handoff timestamp event for PID " PID_FMT ".", ucred->pid);
_cleanup_free_ Unit **units = NULL;
int n_units = manager_get_units_for_pidref(m, &PIDREF_MAKE_FROM_PID(ucred->pid), &units);
if (n_units < 0) {
log_warning_errno(n_units, "Unable to determine units for PID " PID_FMT ", ignoring: %m", ucred->pid);
return 0;
}
if (n_units == 0) {
log_debug("Got handoff timestamp for process " PID_FMT " we are not interested in, ignoring.", ucred->pid);
return 0;
}
dual_timestamp dt = {
.realtime = ts[0],
.monotonic = ts[1],
};
FOREACH_ARRAY(u, units, n_units) {
if (!UNIT_VTABLE(*u)->notify_handoff_timestamp)
continue;
UNIT_VTABLE(*u)->notify_handoff_timestamp(*u, ucred, &dt);
}
return 0;
}
void manager_ref_console(Manager *m) {
assert(m);

View File

@@ -286,6 +286,9 @@ struct Manager {
int user_lookup_fds[2];
sd_event_source *user_lookup_event_source;
int handoff_timestamp_fds[2];
sd_event_source *handoff_timestamp_event_source;
RuntimeScope runtime_scope;
LookupPaths lookup_paths;
@@ -656,22 +659,23 @@ void unit_defaults_done(UnitDefaults *defaults);
enum {
/* most important … */
EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-10,
EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-9,
EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-9,
EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-8, /* cgroupv1 */
EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-8, /* cgroupv2 */
EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-7,
EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6,
EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5,
EVENT_PRIORITY_SIGCHLD = SD_EVENT_PRIORITY_NORMAL-4,
EVENT_PRIORITY_SIGNALS = SD_EVENT_PRIORITY_NORMAL-3,
EVENT_PRIORITY_CGROUP_EMPTY = SD_EVENT_PRIORITY_NORMAL-2,
EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL,
EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE,
EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1,
EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2,
EVENT_PRIORITY_USER_LOOKUP = SD_EVENT_PRIORITY_NORMAL-11,
EVENT_PRIORITY_MOUNT_TABLE = SD_EVENT_PRIORITY_NORMAL-10,
EVENT_PRIORITY_SWAP_TABLE = SD_EVENT_PRIORITY_NORMAL-10,
EVENT_PRIORITY_CGROUP_AGENT = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv1 */
EVENT_PRIORITY_CGROUP_INOTIFY = SD_EVENT_PRIORITY_NORMAL-9, /* cgroupv2 */
EVENT_PRIORITY_CGROUP_OOM = SD_EVENT_PRIORITY_NORMAL-8,
EVENT_PRIORITY_HANDOFF_TIMESTAMP = SD_EVENT_PRIORITY_NORMAL-7,
EVENT_PRIORITY_EXEC_FD = SD_EVENT_PRIORITY_NORMAL-6,
EVENT_PRIORITY_NOTIFY = SD_EVENT_PRIORITY_NORMAL-5,
EVENT_PRIORITY_SIGCHLD = SD_EVENT_PRIORITY_NORMAL-4,
EVENT_PRIORITY_SIGNALS = SD_EVENT_PRIORITY_NORMAL-3,
EVENT_PRIORITY_CGROUP_EMPTY = SD_EVENT_PRIORITY_NORMAL-2,
EVENT_PRIORITY_TIME_CHANGE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_TIME_ZONE = SD_EVENT_PRIORITY_NORMAL-1,
EVENT_PRIORITY_IPC = SD_EVENT_PRIORITY_NORMAL,
EVENT_PRIORITY_REWATCH_PIDS = SD_EVENT_PRIORITY_IDLE,
EVENT_PRIORITY_SERVICE_WATCHDOG = SD_EVENT_PRIORITY_IDLE+1,
EVENT_PRIORITY_RUN_QUEUE = SD_EVENT_PRIORITY_IDLE+2,
/* … to least important */
};

View File

@@ -632,6 +632,9 @@ typedef struct UnitVTable {
/* Called whenever a process of this unit sends us a message */
void (*notify_message)(Unit *u, const struct ucred *ucred, char * const *tags, FDSet *fds);
/* Called whenever we learn a handoff timestamp */
void (*notify_handoff_timestamp)(Unit *u, const struct ucred *ucred, const dual_timestamp *ts);
/* Called whenever a name this Unit registered for comes or goes away. */
void (*bus_name_owner_change)(Unit *u, const char *new_owner);