Files
snapd/daemon/daemon.go
Maciej Borzecki 26e8fb456b daemon: fix data race accessing requestedRestart
Data race was picked up in the tests:

==================
WARNING: DATA RACE
Write at 0x00c00041a648 by goroutine 175:
  github.com/snapcore/snapd/daemon.(*Daemon).HandleRestart()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:446 +0x198
  github.com/snapcore/snapd/overlord/restart.(*RestartManager).handleRestart()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/restart/restart.go:242 +0x10e
  github.com/snapcore/snapd/overlord/restart.Request()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/restart/restart.go:348 +0xd0
  github.com/snapcore/snapd/overlord/standby.(*StandbyOpinions).Start.func1()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/standby/standby.go:102 +0xe4

Previous read at 0x00c00041a648 by goroutine 171:
  github.com/snapcore/snapd/daemon.(*Daemon).Stop()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:531 +0x211
  github.com/snapcore/snapd/daemon.(*daemonSuite).TestRestartIntoSocketModeNoNewChanges()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:1372 +0x3ed
  runtime.call16()
      /snap/go/10630/src/runtime/asm_amd64.s:770 +0x42
  reflect.Value.Call()
      /snap/go/10630/src/reflect/value.go:380 +0xb5
  gopkg.in/check%2ev1.(*suiteRunner).forkTest.func1()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:775 +0x9c5
  gopkg.in/check%2ev1.(*suiteRunner).forkCall.func1()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:669 +0xe9

Goroutine 175 (running) created at:
  github.com/snapcore/snapd/overlord/standby.(*StandbyOpinions).Start()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/standby/standby.go:96 +0xfc
  github.com/snapcore/snapd/daemon.(*Daemon).initStandbyHandling()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:320 +0x69c
  github.com/snapcore/snapd/daemon.(*Daemon).Start()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:379 +0x93d
  github.com/snapcore/snapd/daemon.(*daemonSuite).TestRestartIntoSocketModeNoNewChanges()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:1359 +0x134
  runtime.call16()
      /snap/go/10630/src/runtime/asm_amd64.s:770 +0x42
  reflect.Value.Call()
      /snap/go/10630/src/reflect/value.go:380 +0xb5
  gopkg.in/check%2ev1.(*suiteRunner).forkTest.func1()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:775 +0x9c5
  gopkg.in/check%2ev1.(*suiteRunner).forkCall.func1()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:669 +0xe9

Goroutine 171 (running) created at:
  gopkg.in/check%2ev1.(*suiteRunner).forkCall()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:666 +0x5ba
  gopkg.in/check%2ev1.(*suiteRunner).forkTest()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:757 +0x155
  gopkg.in/check%2ev1.(*suiteRunner).runTest()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:812 +0x419
  gopkg.in/check%2ev1.(*suiteRunner).run()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:618 +0x3c6
  gopkg.in/check%2ev1.Run()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:92 +0x44
  gopkg.in/check%2ev1.RunAll()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:84 +0x124
  gopkg.in/check%2ev1.TestingT()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:72 +0x5d3
  github.com/snapcore/snapd/daemon.Test()
      /home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:63 +0x26
  testing.tRunner()
      /snap/go/10630/src/testing/testing.go:1689 +0x21e
  testing.(*T).Run.gowrap1()
      /snap/go/10630/src/testing/testing.go:1742 +0x44
==================
OK: 682 passed
--- FAIL: Test (71.79s)
    testing.go:1398: race detected during execution of test
FAIL
FAIL	github.com/snapcore/snapd/daemon	72.143s

Signed-off-by: Maciej Borzecki <maciej.borzecki@canonical.com>
2024-07-03 12:06:11 +01:00

747 lines
20 KiB
Go

// -*- Mode: Go; indent-tabs-mode: t -*-
/*
* Copyright (C) 2015-2021 Canonical Ltd
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 3 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package daemon
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"net"
"net/http"
"os"
"os/signal"
"strings"
"sync"
"time"
"github.com/gorilla/mux"
"gopkg.in/tomb.v2"
"github.com/snapcore/snapd/boot"
"github.com/snapcore/snapd/dirs"
"github.com/snapcore/snapd/logger"
"github.com/snapcore/snapd/netutil"
"github.com/snapcore/snapd/osutil"
"github.com/snapcore/snapd/overlord"
"github.com/snapcore/snapd/overlord/auth"
"github.com/snapcore/snapd/overlord/restart"
"github.com/snapcore/snapd/overlord/snapstate"
"github.com/snapcore/snapd/overlord/standby"
"github.com/snapcore/snapd/overlord/state"
"github.com/snapcore/snapd/snapdenv"
"github.com/snapcore/snapd/store"
"github.com/snapcore/snapd/systemd"
)
var ErrRestartSocket = fmt.Errorf("daemon stop requested to wait for socket activation")
var ErrNoFailureRecoveryNeeded = fmt.Errorf("no failure recovery needed")
var systemdSdNotify = systemd.SdNotify
const (
daemonRestartMsg = "daemon is restarting"
systemRestartMsg = "system is restarting"
systemHaltMsg = "system is halting"
systemPoweroffMsg = "system is powering off"
socketRestartMsg = "daemon is stopping to wait for socket activation"
)
// A Daemon listens for requests and routes them to the right command
type Daemon struct {
Version string
overlord *overlord.Overlord
state *state.State
snapdListener net.Listener
snapListener net.Listener
connTracker *connTracker
serve *http.Server
tomb tomb.Tomb
router *mux.Router
standbyOpinions *standby.StandbyOpinions
// set to what kind of restart was requested (if any)
requestedRestart restart.RestartType
// reboot info needed to handle reboots
rebootInfo *boot.RebootInfo
// set to remember that we need to exit the daemon in a way that
// prevents systemd from restarting it
restartSocket bool
// degradedErr is set when the daemon is in degraded mode
degradedErr error
expectedRebootDidNotHappen bool
mu sync.Mutex
cancel func()
}
// A ResponseFunc handles one of the individual verbs for a method
type ResponseFunc func(*Command, *http.Request, *auth.UserState) Response
// A Command routes a request to an individual per-verb ResponseFunc
type Command struct {
Path string
PathPrefix string
//
GET ResponseFunc
PUT ResponseFunc
POST ResponseFunc
// Access control.
ReadAccess accessChecker
WriteAccess accessChecker
d *Daemon
}
func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) {
st := c.d.state
st.Lock()
// TODO Look at the error and fail if there's an attempt to authenticate with invalid data.
user, _ := userFromRequest(st, r)
st.Unlock()
// check if we are in degradedMode
if c.d.degradedErr != nil && r.Method != "GET" {
InternalError(c.d.degradedErr.Error()).ServeHTTP(w, r)
return
}
ucred, err := ucrednetGet(r.RemoteAddr)
if err != nil && err != errNoID {
logger.Noticef("unexpected error when attempting to get UID: %s", err)
InternalError(err.Error()).ServeHTTP(w, r)
return
}
ctx := store.WithClientUserAgent(r.Context(), r)
r = r.WithContext(ctx)
var rspf ResponseFunc
var access accessChecker
switch r.Method {
case "GET":
rspf = c.GET
access = c.ReadAccess
case "PUT":
rspf = c.PUT
access = c.WriteAccess
case "POST":
rspf = c.POST
access = c.WriteAccess
}
if rspf == nil {
MethodNotAllowed("method %q not allowed", r.Method).ServeHTTP(w, r)
return
}
if rspe := access.CheckAccess(c.d, r, ucred, user); rspe != nil {
rspe.ServeHTTP(w, r)
return
}
rsp := rspf(c, r, user)
if srsp, ok := rsp.(StructuredResponse); ok {
rjson := srsp.JSON()
st.Lock()
_, rst := restart.Pending(st)
st.Unlock()
rjson.addMaintenanceFromRestartType(rst)
if rjson.Type != ResponseTypeError {
st.Lock()
count, stamp := st.WarningsSummary()
st.Unlock()
rjson.addWarningCount(count, stamp)
}
// serve the updated serialisation
rsp = rjson
}
rsp.ServeHTTP(w, r)
}
type wrappedWriter struct {
w http.ResponseWriter
s int
}
func (w *wrappedWriter) Header() http.Header {
return w.w.Header()
}
func (w *wrappedWriter) Write(bs []byte) (int, error) {
return w.w.Write(bs)
}
func (w *wrappedWriter) WriteHeader(s int) {
w.w.WriteHeader(s)
w.s = s
}
func (w *wrappedWriter) Flush() {
if f, ok := w.w.(http.Flusher); ok {
f.Flush()
}
}
func logit(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ww := &wrappedWriter{w: w}
t0 := time.Now()
handler.ServeHTTP(ww, r)
t := time.Since(t0)
url := r.URL.String()
if !strings.Contains(url, "/changes/") {
logger.Debugf("%s %s %s %s %d", r.RemoteAddr, r.Method, r.URL, t, ww.s)
}
})
}
// Init sets up the Daemon's internal workings.
// Don't call more than once.
func (d *Daemon) Init() error {
listenerMap, err := netutil.ActivationListeners()
if err != nil {
return err
}
// The SnapdSocket is required -- without it, die.
if listener, err := netutil.GetListener(dirs.SnapdSocket, listenerMap); err == nil {
d.snapdListener = &ucrednetListener{Listener: listener}
} else {
return fmt.Errorf("when trying to listen on %s: %v", dirs.SnapdSocket, err)
}
if listener, err := netutil.GetListener(dirs.SnapSocket, listenerMap); err == nil {
// This listener may also be nil if that socket wasn't among
// the listeners, so check it before using it.
d.snapListener = &ucrednetListener{Listener: listener}
} else {
logger.Debugf("cannot get listener for %q: %v", dirs.SnapSocket, err)
}
d.addRoutes()
logger.Noticef("started %v.", snapdenv.UserAgent())
return nil
}
// SetDegradedMode puts the daemon into a degraded mode. In this mode
// it will return the error given in the "err" argument for commands
// that are not pure HTTP GETs.
//
// This is useful to report errors to the client when the daemon
// cannot work because e.g. a snapd squashfs precondition check failed
// or the system is out of diskspace.
//
// When the system is fine again, calling "SetDegradedMode(nil)" is enough
// to put the daemon into full operation again.
func (d *Daemon) SetDegradedMode(err error) {
d.degradedErr = err
}
func (d *Daemon) addRoutes() {
d.router = mux.NewRouter()
for _, c := range api {
c.d = d
if c.PathPrefix == "" {
d.router.Handle(c.Path, c).Name(c.Path)
} else {
d.router.PathPrefix(c.PathPrefix).Handler(c).Name(c.PathPrefix)
}
}
// also maybe add a /favicon.ico handler...
d.router.NotFoundHandler = NotFound("not found")
}
var (
shutdownTimeout = 25 * time.Second
)
type connTracker struct {
mu sync.Mutex
conns map[net.Conn]struct{}
}
func (ct *connTracker) CanStandby() bool {
ct.mu.Lock()
defer ct.mu.Unlock()
return len(ct.conns) == 0
}
func (ct *connTracker) trackConn(conn net.Conn, state http.ConnState) {
ct.mu.Lock()
defer ct.mu.Unlock()
// we ignore hijacked connections, if we do things with websockets
// we'll need custom shutdown handling for them
if state == http.StateNew || state == http.StateActive {
ct.conns[conn] = struct{}{}
} else {
delete(ct.conns, conn)
}
}
func (d *Daemon) initStandbyHandling() {
d.standbyOpinions = standby.New(d.state)
d.standbyOpinions.AddOpinion(d.connTracker)
d.standbyOpinions.AddOpinion(d.overlord)
d.standbyOpinions.AddOpinion(d.overlord.SnapManager())
d.standbyOpinions.AddOpinion(d.overlord.DeviceManager())
d.standbyOpinions.Start()
}
// Start the Daemon. Takes a context which will be used as the base request
// context in the embedded http.Server.
func (d *Daemon) Start(ctx context.Context) (err error) {
if d.expectedRebootDidNotHappen {
// we need to schedule and wait for a system restart
d.tomb.Kill(nil)
// avoid systemd killing us again while we wait
systemdSdNotify("READY=1")
return nil
}
if d.overlord == nil {
panic("internal error: no Overlord")
}
ctx, cancel := context.WithCancel(ctx)
d.cancel = cancel
defer func() {
// cancel the context on any errors
if err != nil {
cancel()
}
}()
to, reasoning, err := d.overlord.StartupTimeout()
if err != nil {
return err
}
if to > 0 {
to = to.Round(time.Microsecond)
us := to.Nanoseconds() / 1000
logger.Noticef("adjusting startup timeout by %v (%s)", to, reasoning)
systemdSdNotify(fmt.Sprintf("EXTEND_TIMEOUT_USEC=%d", us))
}
// now perform expensive overlord/manages initialization
if err := d.overlord.StartUp(); err != nil {
if errors.Is(err, snapstate.ErrUnexpectedRuntimeRestart) {
logger.Noticef("detected failure recovery context, but no recovery needed")
return ErrNoFailureRecoveryNeeded
}
return err
}
d.connTracker = &connTracker{conns: make(map[net.Conn]struct{})}
d.serve = &http.Server{
Handler: logit(d.router),
ConnState: d.connTracker.trackConn,
BaseContext: func(net.Listener) context.Context {
// requests will use the context provided to Start, as
// the caller will likely cancel it when appropriate
// thus canceling any outstanding requests to the snapd
// API
return ctx
},
}
// enable standby handling
d.initStandbyHandling()
// before serving actual connections remove the maintenance.json file as we
// are no longer down for maintenance, this state most closely corresponds
// to restart.RestartUnset
if err := d.updateMaintenanceFile(restart.RestartUnset); err != nil {
return err
}
// the loop runs in its own goroutine
d.overlord.Loop()
d.tomb.Go(func() error {
if d.snapListener != nil {
d.tomb.Go(func() error {
if err := d.serve.Serve(d.snapListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive {
return err
}
return nil
})
}
if err := d.serve.Serve(d.snapdListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive {
return err
}
return nil
})
// notify systemd that we are ready
systemdSdNotify("READY=1")
return nil
}
// HandleRestart implements overlord.RestartBehavior.
func (d *Daemon) HandleRestart(t restart.RestartType, rebootInfo *boot.RebootInfo) {
d.mu.Lock()
defer d.mu.Unlock()
scheduleFallback := func(a boot.RebootAction) {
if err := reboot(a, rebootWaitTimeout, rebootInfo); err != nil {
logger.Noticef("%s", err)
}
}
d.rebootInfo = rebootInfo
// die when asked to restart (systemd should get us back up!) etc
switch t {
case restart.RestartDaemon:
// save the restart kind to write out a maintenance.json in a bit
d.requestedRestart = t
case restart.RestartSystem, restart.RestartSystemNow:
// try to schedule a fallback slow reboot already here
// in case we get stuck shutting down
// save the restart kind to write out a maintenance.json in a bit
scheduleFallback(boot.RebootReboot)
d.requestedRestart = t
case restart.RestartSystemHaltNow:
scheduleFallback(boot.RebootHalt)
d.requestedRestart = t
case restart.RestartSystemPoweroffNow:
scheduleFallback(boot.RebootPoweroff)
d.requestedRestart = t
case restart.RestartSocket:
// save the restart kind to write out a maintenance.json in a bit
d.requestedRestart = t
d.restartSocket = true
case restart.StopDaemon:
logger.Noticef("stopping snapd as requested")
default:
logger.Noticef("internal error: restart handler called with unknown restart type: %v", t)
}
d.tomb.Kill(nil)
}
var (
rebootNoticeWait = 3 * time.Second
rebootWaitTimeout = 10 * time.Minute
rebootRetryWaitTimeout = 5 * time.Minute
rebootMaxAttempts = 3
)
func (d *Daemon) updateMaintenanceFile(rst restart.RestartType) error {
// for unset restart, just remove the maintenance.json file
if rst == restart.RestartUnset {
err := os.Remove(dirs.SnapdMaintenanceFile)
// only return err if the error was something other than the file not
// existing
if err != nil && !os.IsNotExist(err) {
return err
}
return nil
}
// otherwise marshal and write it out appropriately
b, err := json.Marshal(maintenanceForRestartType(rst))
if err != nil {
return err
}
return osutil.AtomicWrite(dirs.SnapdMaintenanceFile, bytes.NewBuffer(b), 0644, 0)
}
// Stop shuts down the Daemon
func (d *Daemon) Stop(sigCh chan<- os.Signal) error {
// we need to schedule/wait for a system restart again
if d.expectedRebootDidNotHappen {
// make the reboot retry immediate
immediateReboot := true
// TODO: we do not know the RebootInfo from the previous snapd
// instance. Passing nil for the moment, but maybe we should
// cache to disk and recover at this point. In any case, it is
// expected that the reboot will not be harmful even if
// RebootInfo is unknown, and that things will end up in a
// kernel refresh failure, that can be retried later.
return d.doReboot(sigCh, restart.RestartSystem, nil, immediateReboot, rebootRetryWaitTimeout)
}
if d.overlord == nil {
return fmt.Errorf("internal error: no Overlord")
}
if d.cancel != nil {
d.cancel()
}
d.tomb.Kill(nil)
// check the state associated with a potential restart with the lock to
// prevent races
d.mu.Lock()
// needsFullShutdown is whether the entire system will
// shutdown or not as a consequence of this request
needsFullShutdown := false
restartType := d.requestedRestart
switch restartType {
case restart.RestartSystem, restart.RestartSystemNow, restart.RestartSystemHaltNow, restart.RestartSystemPoweroffNow:
needsFullShutdown = true
}
immediateShutdown := false
switch restartType {
case restart.RestartSystemNow, restart.RestartSystemHaltNow, restart.RestartSystemPoweroffNow:
immediateShutdown = true
}
restartSocket := d.restartSocket
rebootInfo := d.rebootInfo
d.mu.Unlock()
// before not accepting any new client connections we need to write the
// maintenance.json file for potential clients to see after the daemon stops
// responding so they can read it correctly and handle the maintenance
if err := d.updateMaintenanceFile(restartType); err != nil {
logger.Noticef("error writing maintenance file: %v", err)
}
// take a timestamp before shutting down the snap listener, and
// use the time we may spend on waiting for hooks against the shutdown
// delay.
ts := time.Now()
if d.snapListener != nil {
// stop running hooks first
// and do it more gracefully if we are restarting
hookMgr := d.overlord.HookManager()
d.state.Lock()
ok, _ := restart.Pending(d.state)
d.state.Unlock()
if ok {
logger.Noticef("gracefully waiting for running hooks")
hookMgr.GracefullyWaitRunningHooks()
logger.Noticef("done waiting for running hooks")
}
hookMgr.StopHooks()
d.snapListener.Close()
}
timeSpent := time.Since(ts)
// When shutting down the snapd listener wait until the rebootNoticeWait
// period has passed before snapdListener is closed to allow polling
// clients to access the daemon. For testing we disable this unless SNAPD_SHUTDOWN_DELAY
// has been set, to avoid incurring this wait for every daemon restart which happens
// quite often in testing.
if !snapdenv.Testing() || osutil.GetenvBool("SNAPD_SHUTDOWN_DELAY") {
time.Sleep(rebootNoticeWait - timeSpent)
}
d.snapdListener.Close()
d.standbyOpinions.Stop()
// We're using the background context here because the tomb's
// context will likely already have been cancelled when we are
// called.
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
d.tomb.Kill(d.serve.Shutdown(ctx))
cancel()
if !needsFullShutdown {
// tell systemd that we are stopping
systemdSdNotify("STOPPING=1")
}
if restartSocket {
// At this point we processed all open requests (and
// stopped accepting new requests) - before going into
// socket activated mode we need to check if any of
// those open requests resulted in something that
// prevents us from going into socket activation mode.
//
// If this is the case we do a "normal" snapd restart
// to process the new changes.
if !d.standbyOpinions.CanStandby() {
d.restartSocket = false
}
}
d.overlord.Stop()
if err := d.tomb.Wait(); err != nil {
if err == context.DeadlineExceeded {
logger.Noticef("WARNING: cannot gracefully shut down in-flight snapd API activity within: %v", shutdownTimeout)
// the process is shutting down anyway, so we may just
// as well close the active connections right now
d.serve.Close()
} else {
// do not stop the shutdown even if the tomb errors
// because we already scheduled a slow shutdown and
// exiting here will just restart snapd (via systemd)
// which will lead to confusing results.
if needsFullShutdown {
logger.Noticef("WARNING: cannot stop daemon: %v", err)
} else {
return err
}
}
}
if needsFullShutdown {
return d.doReboot(sigCh, d.requestedRestart, rebootInfo, immediateShutdown, rebootWaitTimeout)
}
if d.restartSocket {
return ErrRestartSocket
}
return nil
}
func (d *Daemon) rebootDelay(immediate bool) (time.Duration, error) {
d.state.Lock()
defer d.state.Unlock()
now := time.Now()
// see whether a reboot had already been scheduled
var rebootAt time.Time
err := d.state.Get("daemon-system-restart-at", &rebootAt)
if err != nil && !errors.Is(err, state.ErrNoState) {
return 0, err
}
rebootDelay := 1 * time.Minute
if immediate {
rebootDelay = 0
}
if err == nil {
rebootDelay = rebootAt.Sub(now)
} else {
ovr := os.Getenv("SNAPD_REBOOT_DELAY") // for tests
if ovr != "" && !immediate {
d, err := time.ParseDuration(ovr)
if err == nil {
rebootDelay = d
}
}
rebootAt = now.Add(rebootDelay)
d.state.Set("daemon-system-restart-at", rebootAt)
}
return rebootDelay, nil
}
func (d *Daemon) doReboot(sigCh chan<- os.Signal, rst restart.RestartType, rbi *boot.RebootInfo, immediate bool, waitTimeout time.Duration) error {
rebootDelay, err := d.rebootDelay(immediate)
if err != nil {
return err
}
action := boot.RebootReboot
switch rst {
case restart.RestartSystemHaltNow:
action = boot.RebootHalt
case restart.RestartSystemPoweroffNow:
action = boot.RebootPoweroff
}
// ask for shutdown and wait for it to happen.
// if we exit snapd will be restarted by systemd
if err := reboot(action, rebootDelay, rbi); err != nil {
return err
}
// wait for reboot to happen
logger.Noticef("Waiting for %s", action)
if sigCh != nil {
signal.Stop(sigCh)
if len(sigCh) > 0 {
// a signal arrived in between
return nil
}
close(sigCh)
}
time.Sleep(waitTimeout)
return fmt.Errorf("expected %s did not happen", action)
}
var reboot = boot.Reboot
// Dying is a tomb-ish thing
func (d *Daemon) Dying() <-chan struct{} {
return d.tomb.Dying()
}
func clearReboot(st *state.State) {
st.Set("daemon-system-restart-at", nil)
st.Set("daemon-system-restart-tentative", nil)
}
// RebootAsExpected implements part of overlord.RestartBehavior.
func (d *Daemon) RebootAsExpected(st *state.State) error {
clearReboot(st)
return nil
}
var errExpectedReboot = errors.New("expected reboot did not happen")
// RebootDidNotHappen implements part of overlord.RestartBehavior.
func (d *Daemon) RebootDidNotHappen(st *state.State) error {
var attempt int
err := st.Get("daemon-system-restart-tentative", &attempt)
if err != nil && !errors.Is(err, state.ErrNoState) {
return err
}
attempt++
if attempt > rebootMaxAttempts {
// giving up, proceed normally, some in-progress refresh
// might get rolled back!!
restart.ClearReboot(st)
clearReboot(st)
logger.Noticef("snapd was restarted while a system restart was expected, snapd retried to schedule and waited again for a system restart %d times and is giving up", rebootMaxAttempts)
return nil
}
st.Set("daemon-system-restart-tentative", attempt)
d.state = st
logger.Noticef("snapd was restarted while a system restart was expected, snapd will try to schedule and wait for a system restart again (attempt %d/%d)", attempt, rebootMaxAttempts)
return errExpectedReboot
}
// New Daemon
func New() (*Daemon, error) {
d := &Daemon{}
ovld, err := overlord.New(d)
if err == errExpectedReboot {
// we proceed without overlord until we reach Stop
// where we will schedule and wait again for a system restart.
// ATM we cannot do that in New because we need to satisfy
// systemd notify mechanisms.
d.expectedRebootDidNotHappen = true
return d, nil
}
if err != nil {
return nil, err
}
d.overlord = ovld
d.state = ovld.State()
return d, nil
}