snapd/tests/lib/tools/tests.session

#!/bin/bash -e

show_help() {
	echo "usage: tests.session exec [-u USER] [-p PID_FILE] [--] <CMD>"
	echo "       tests.session prepare | restore [-u USER | -u USER1,USER2,...]"
	echo "       tests.session kill-leaked"
	echo "       tests.session dump"
	echo "       tests.session has-system-systemd-and-dbus"
	echo "       tests.session has-session-systemd-and-dbus"
}

main() {
	if [ $# -eq 0 ]; then
		show_help
		exit 1
	fi
	user=root
	pid_file=
	action=
	while [ $# -gt 0 ]; do
		case "$1" in
			-h)
				shift
				show_help
				exit 0
				;;
			-p)
				if [ $# -eq 1 ]; then
					echo "tests.session: option -p requires an argument" >&2
					exit 1
				fi
				pid_file="$2"
				shift 2
				;;
			--)
				shift
				break
				;;
			kill-leaked)
				action=kill-leaked
				shift
				;;
			prepare)
				action=prepare
				shift
				;;
			restore)
				action=restore
				shift
				;;
			dump)
				action=dump
				shift
				;;
			has-system-systemd-and-dbus)
				action=has-system-systemd-and-dbus
				shift
				;;
			has-session-systemd-and-dbus)
				action=has-session-systemd-and-dbus
				shift
				;;
			exec)
				action="exec"
				shift
				# remaining arguments are the command to execute
				break
				;;
			-u)
				if [ $# -eq 1 ]; then
					echo "tests.session: option -u requires an argument" >&2
					exit 1
				fi
				user="$2"
				shift 2
				;;
			-*)
				echo "tests.session: unsupported argument $1" >&2
				exit 1
				;;
			*)
				echo "tests.session: unsupported action $1" >&2
				exit 1
				;;
		esac
	done

	if [ "$(id -u)" -ne 0 ]; then
		echo "tests.session needs to be invoked as root" >&2
		exit 1
	fi

	case "$action" in
		kill-leaked)
			# Work around a bug in older versions of logind that leak closing session
			# without a process inhabiting it anymore. In the case we've observed there's a
			# session that is being closed with a missing process.
			for session_id in $(loginctl --no-legend | awk '{ print $1 }'); do
				# It would be easier to use "loginctl show-session --value" but we cannot rely on it.
				if [ "$(loginctl show-session "$session_id" --property=State | cut -d = -f 2-)" = closing ]; then
					leader=$(loginctl show-session "$session_id" --property=Leader | cut -d = -f 2-)
					if [ ! -e "/proc/$leader" ]; then
						loginctl kill-session "$session_id"
					fi
				fi
			done

			exit 0
			;;
		prepare)
			# Try to enable linger for the selected user(s).
			for u in $(echo "$user" | tr ',' ' '); do
				if ! loginctl enable-linger "$u"; then
					echo "tests.session requires external fix for //github.com/systemd/systemd/issues/12401" >&2
					exit 1
				fi
			done

			for u in $(echo "$user" | tr ',' ' '); do
				# We've enabled linger, now let's explicitly start the
				# default.target. This ensures that test code does not need to pay
				# extra attention to synchronization.
				#
				# Because some systems do not support systemd --user (see
				# tests/main/tests.session-support for details), check if this is
				# expected to work ahead of trying.
				if tests.session -u "$u" exec systemctl --user is-enabled default.target >/dev/null; then
					tests.session -u "$u" exec systemctl --user start default.target
				fi
			done

			exit 0
			;;
		restore)
			# Disable linger for the selected user(s).
			for u in $(echo "$user" | tr ',' ' '); do

				loginctl disable-linger "$u"
				# If the user is not root, also stop their user slice and ensure
				# their XDG_RUNTIME_DIR goes away. Currently doing this for root is
				# impossible, because spread logs into a test system over ssh and
				# gets a root user slice, at minimum.
				#
				# If spread ever changes, so that the tests are executed as system
				# services, then there this condition could be removed.
				if [ "$u" != root ]; then
					uid="$(id -u "$u")"

					# Unmount gvfs-fuse and the document portal explicitly.  FUSE
					# file-systems can stay mounted in a broken state even if their
					# corresponding process goes away. This can happen if we stop
					# the slice and the cleanup is not perfectly graceful.
					umount "/run/user/$uid/gvfs" || true
					umount "/run/user/$uid/doc" || true

					# See user@.service(5) for discussion about user-UID.slice and
					# user@UID.service and why both are used. Here we stop the
					# slice as that encompasses both user@UID.service and any
					# existing sessions of that user.
					systemctl stop "user-$uid.slice"

					# On Ubuntu 16.04 and Debian 9 stopping the slice above is
					# insufficient to stop the service responsible for
					# /run/user/UID *immediately*. This seems to be related to
					# *absence* of user-session-dir@.service (template service)
					# which is responsible for this operation later on (and where
					# it works more reliably).
					#
					# Give the system some time to clean things up.
					retry -n 3 --wait 3 test ! -e "/run/user/$uid"
					if [ -e "/run/user/$uid" ]; then
						echo "tests.session: /run/user/$uid still exists" >&2
						exit 1
					fi
				fi
			done

			if [ -e /run/tests.session-core16.workaround ]; then
				rm  /run/tests.session-core16.workaround
				# Undo changes to make /var/lib/systemd/ writable.
				umount -l /var/lib/systemd
			fi

			# Make sure there is not neither stopping nor abandoned user sessions
			systemctl restart systemd-logind.service

			exit 0
			;;
		dump)
			echo "Active sessions:"
			for session_id in $(loginctl list-sessions --no-legend | awk '{ print($1) }'); do
				echo "Details of session $session_id"
				loginctl show-session "$session_id"
			done
			exit 0
			;;
		has-system-systemd-and-dbus)
			#  Ubuntu 14.04 with deputy systemd does not connect to DBus
			#  and systemd-shim is really responding to "systemd" requests.
			test -n "$(command -v busctl)"         			|| ( echo "no busctl"; exit 1 )
			# 		/lib										   /usr/lib									   (if present) dbus-broker.service variant
			test -f /lib/systemd/system/default.target	|| test -f /usr/lib/systemd/system/default.target	|| ( echo "no system default.target"; exit 1 )
			test -f /lib/systemd/system/dbus.socket  	|| test -f /usr/lib/systemd/system/dbus.socket		|| ( echo "no system dbus.socket"; exit 1 )
			test -f /lib/systemd/system/dbus.service	|| test -f /usr/lib/systemd/system/dbus.service		|| test -f /lib/systemd/system/dbus-broker.service || ( echo "no system dbus{,-broker}.service"; exit 1 )
			echo "ok"
			exit 0
			;;
		has-session-systemd-and-dbus)
			#  CentOS 7 and derivatives disabled systemd --user
			#  https://bugs.centos.org/view.php?id=8767
			test -n "$(command -v busctl)"         			|| ( echo "no busctl"; exit 1 )
			# 		/lib										   /usr/lib									   (if present) dbus-broker.service variant
			test -f /lib/systemd/user/default.target 	|| test -f /usr/lib/systemd/user/default.target		|| ( echo "no user default.target"; exit 1)
			test -f /lib/systemd/user/dbus.socket 		|| test -f /usr/lib/systemd/user/dbus.socket 		|| ( echo "no user dbus.socket"; exit 1 )
			test -f /lib/systemd/user/dbus.service		|| test -f /usr/lib/systemd/user/dbus.service		|| test -f /lib/systemd/user/dbus-broker.service || ( echo "no system dbus{,-broker}.service"; exit 1 )
			echo "ok"
			exit 0
			;;
	esac

	if [ -z "$(command -v busctl)" ]; then
		echo "tests.session requires busctl" >&2
		exit 1
	fi

	# This fixes a bug in some older Debian systems where /root/.profile contains
	# unconditional invocation of mesg, which on non-tty shells prints "mesg
	# ttyname failed inappropriate ioctl for device" which pollutes output from
	# invoked programs.
	# TODO: move this to spread wide project setup.
	test -f /root/.profile && sed -i -e 's/mesg n .*true/tty -s \&\& mesg n/g' /root/.profile

	read -r uuid < /proc/sys/kernel/random/uuid
	unit_name="tests.session-$uuid.service"
	tmp_dir="$TESTSTMP/tests.session-$uuid"
	mkdir "$tmp_dir"
	trap 'rm -rf $tmp_dir' EXIT

	(
		echo "#!/bin/sh"
		test -n "$pid_file" && echo "echo \$$ >\"$pid_file\""
		printf "exec "
		for arg in "$@"; do
			printf '%q ' "$arg"
		done
		echo
	)>"$tmp_dir/exec"
	chmod +x "$tmp_dir/exec"

	# When systemd version is higher than 252 we use systemd-run to execute
	# In other scenarios we use the older approach: bustctl with the dbus monitor
	if [ "$(systemctl --version | awk '/systemd [0-9]+/ { print $2 }' | cut -f1 -d"~")" -gt 252 ]; then
		local selinux_context_arg=""
		if os.query is-fedora || os.query is-centos; then
			selinux_context_arg="--property SELinuxContext=unconfined_u:unconfined_r:unconfined_t:s0"
		fi

		# It is needed to have '|| true' to make sure in case systemd-run fails,
		# the command systemctl reset-failed is called
		# shellcheck disable=SC2086
 		systemd-run \
  		--unit="$unit_name" \
		--description "tests.session running $* as $user" \
		--quiet \
		--wait \
		--pipe \
		--service-type simple \
		--setenv TERM=xterm-256color \
		--property "Environment=SNAPD_DEBUG=$SNAPD_DEBUG SNAP_CONFINE_DEBUG=$SNAP_CONFINE_DEBUG" \
		$selinux_context_arg \
		"$(command -v runuser)" -l "$user" - -c "exec $tmp_dir/exec" || true

		# Prevent accumulation of failed sessions.
		if systemctl cat "$unit_name" &>/dev/null; then
			if systemctl is-failed "$unit_name"; then
				systemctl reset-failed "$unit_name"
				exit 1
			else
				exit 0
			fi
		else
			exit 0
		fi
  	fi

	mkfifo -m 0666 "$tmp_dir/result.pipe" "$tmp_dir/stdin.pipe" "$tmp_dir/stdout.pipe" "$tmp_dir/stderr.pipe"
	# Use busctl to spawn a command. The command is wrapped in shell, runuser -l
	# and redirects to capture output. Sadly busctl doesn't support passing file
	# descriptors https://github.com/systemd/systemd/issues/14954 As a workaround
	# we pass a set of pipes. This is good for non-interactive work.
	# NOTE: /dev/stdin is provided explicitly to trigger special behavior in bash,
	# as otherwise background tasks are started with /dev/null for stdin.
	cat </dev/stdin >"$tmp_dir/stdin.pipe" &
	cat_stdin_pid=$!
	cat <"$tmp_dir/stdout.pipe" >&1 &
	cat_stdout_pid=$!
	cat <"$tmp_dir/stderr.pipe" >&2 &
	cat_stderr_pid=$!

  	mkfifo -m 0666 "$tmp_dir/dbus-monitor.pipe" "$tmp_dir/ready.pipe"
  	monitor_expr="type='signal', sender='org.freedesktop.systemd1', interface='org.freedesktop.systemd1.Manager', path='/org/freedesktop/systemd1', member='JobRemoved'"
	stdbuf -oL dbus-monitor --system --monitor "$monitor_expr" > "$tmp_dir/dbus-monitor.pipe" 2>"$tmp_dir/dbus-monitor.stderr" &
	dbus_monitor_pid=$!

	awk_expr="
BEGIN {
	found=0;
	ready=0;
}
# Once we get the NameAcquired message we are sure dbus-monitor is connected
# and will receive JobRemoved once it is sent. The reason we are getting this
# message despite the filter we establish above is that it is sent directly to
# us, which bypasses DBus filter expressions.
/member=NameAcquired/ {
	if (!ready) {
		ready=1;
		print ready > \"$tmp_dir/ready.pipe\";
		close(\"$tmp_dir/ready.pipe\");
	}
}
# This part matches an argument to JobRemoved that contains the name of the
# tests.session-xxx.service name we picked earlier. Once we see this we are sure
# the job is gone.
/   string \"$unit_name\"/ {
	found=1;
	print \"found service file\";
	fflush();
	next;
}
# This matches any string argument but only takes effect once we found our
# JobRemoved message. The order of arguments to JobRemoved is such that the
# immediate successor of the service name is the result word. This is
# translated to a pass / fail exit code from tests.session. Scanning this part
# terminates awk.
/string \".*\"/ {
	if (found==1) {
		print \"found result\";
		print \$2 > \"$tmp_dir/result.pipe\";
		fflush(\"\");
		close(\"$tmp_dir/result.pipe\");
		exit;
	}
}
"
	awk -W interactive "$awk_expr" <"$tmp_dir/dbus-monitor.pipe" >"$tmp_dir/awk.log" 2>&1 &
	awk_pid=$!

	# Wait for dbus-monitor to start.
	cat "$tmp_dir/ready.pipe" >/dev/null

	# We want to provide a SELinuxContext but systemd in older SELinux-using distributions
	# does not recognize this attribute. See https://github.com/systemd/systemd/blob/master/NEWS#L6402
	# The typical user context as reported by id -Z is: unconfined_u:unconfined_r:unconfined_t:s0
	selinux_context_arg=
	case $SPREAD_SYSTEM in
		fedora-*|centos-*)
			local systemd_ver
			systemd_ver="$(systemctl --version | awk '/systemd [0-9]+/ { print $2 }' | cut -f1 -d"~")"
			if [ "$systemd_ver" -gt 219 ]; then
				selinux_context_arg="SELinuxContext s unconfined_u:unconfined_r:unconfined_t:s0"
			fi
			;;
	esac

	# NOTE: This shellcheck directive is for the $selinux_context_arg expansion below.
	# shellcheck disable=SC2086
	busctl \
		--allow-interactive-authorization=no --quiet \
		--system \
		-- \
		call \
		 org.freedesktop.systemd1 \
		/org/freedesktop/systemd1 \
		 org.freedesktop.systemd1.Manager \
		StartTransientUnit "ssa(sv)a(sa(sv))" \
		"$unit_name" fail $(( 4 + $(test -n "$selinux_context_arg" && echo 1 || echo 0))) \
			Description s "tests.session running $* as $user" \
			Type s oneshot \
			Environment as 1 TERM=xterm-256color \
			$selinux_context_arg \
			ExecStart "a(sasb)" 1 \
				"$(command -v runuser)" 6 "$(command -v runuser)" -l "$user" - -c "SNAPD_DEBUG=$SNAPD_DEBUG SNAP_CONFINE_DEBUG=$SNAP_CONFINE_DEBUG exec $tmp_dir/exec <$tmp_dir/stdin.pipe >$tmp_dir/stdout.pipe 2>$tmp_dir/stderr.pipe" false \
		0
	# This is done so that we can configure file redirects. Once Ubuntu 16.04 is no
	# longer supported we can use Standard{Input,Output,Error}=file:path property
	# of systemd, or perhaps even StandardInputFileDescriptor and pass a file
	# descriptor to the FIFOs we open in the script above.

	# Wait for the service to terminate. The trigger is a successful read
	# from the result FIFO. In case we are signaled in one of the familiar
	# ways, kill the started service with the same signal. The reading occurs
	# in a loop as the signal will first interrupt the read process, which will
	# fail and return nothing.
	trap 'systemctl kill --signal=INT $unit_name' INT
	trap 'systemctl kill --signal=TERM $unit_name' TERM
	trap 'systemctl kill --signal=QUIT $unit_name' QUIT
	result=""
	for sig in INT TERM QUIT; do
		result=$(cat "$tmp_dir/result.pipe" 2>/dev/null) && break
		trap - "$sig"
	done
	trap - INT TERM QUIT

	# Kill dbus-monitor that otherwise runs until it notices the pipe is no longer
	# connected, which happens after a longer while. Redirect stderr to /dev/null
	# to avoid upsetting tests which are sensitive to stderr, e.g. tests/main/document-portal-activation
	kill $dbus_monitor_pid 2>/dev/null || true
	wait $dbus_monitor_pid 2>/dev/null || true
	wait $awk_pid

	wait "$cat_stdin_pid"
	wait "$cat_stdout_pid"
	wait "$cat_stderr_pid"

	# Prevent accumulation of failed sessions.

	if [ "$result" != '"done"' ]; then
		systemctl reset-failed "$unit_name"
	fi

	case "$result" in
		'"done"') exit 0; ;;
		'"failed"') exit 1; ;;
		'"canceled"') exit 1; ;;
	esac
}

main "$@"