mirror of
https://github.com/token2/snapd.git
synced 2026-03-13 11:15:47 -07:00
Data race was picked up in the tests:
==================
WARNING: DATA RACE
Write at 0x00c00041a648 by goroutine 175:
github.com/snapcore/snapd/daemon.(*Daemon).HandleRestart()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:446 +0x198
github.com/snapcore/snapd/overlord/restart.(*RestartManager).handleRestart()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/restart/restart.go:242 +0x10e
github.com/snapcore/snapd/overlord/restart.Request()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/restart/restart.go:348 +0xd0
github.com/snapcore/snapd/overlord/standby.(*StandbyOpinions).Start.func1()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/standby/standby.go:102 +0xe4
Previous read at 0x00c00041a648 by goroutine 171:
github.com/snapcore/snapd/daemon.(*Daemon).Stop()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:531 +0x211
github.com/snapcore/snapd/daemon.(*daemonSuite).TestRestartIntoSocketModeNoNewChanges()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:1372 +0x3ed
runtime.call16()
/snap/go/10630/src/runtime/asm_amd64.s:770 +0x42
reflect.Value.Call()
/snap/go/10630/src/reflect/value.go:380 +0xb5
gopkg.in/check%2ev1.(*suiteRunner).forkTest.func1()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:775 +0x9c5
gopkg.in/check%2ev1.(*suiteRunner).forkCall.func1()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:669 +0xe9
Goroutine 175 (running) created at:
github.com/snapcore/snapd/overlord/standby.(*StandbyOpinions).Start()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/overlord/standby/standby.go:96 +0xfc
github.com/snapcore/snapd/daemon.(*Daemon).initStandbyHandling()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:320 +0x69c
github.com/snapcore/snapd/daemon.(*Daemon).Start()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon.go:379 +0x93d
github.com/snapcore/snapd/daemon.(*daemonSuite).TestRestartIntoSocketModeNoNewChanges()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:1359 +0x134
runtime.call16()
/snap/go/10630/src/runtime/asm_amd64.s:770 +0x42
reflect.Value.Call()
/snap/go/10630/src/reflect/value.go:380 +0xb5
gopkg.in/check%2ev1.(*suiteRunner).forkTest.func1()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:775 +0x9c5
gopkg.in/check%2ev1.(*suiteRunner).forkCall.func1()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:669 +0xe9
Goroutine 171 (running) created at:
gopkg.in/check%2ev1.(*suiteRunner).forkCall()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:666 +0x5ba
gopkg.in/check%2ev1.(*suiteRunner).forkTest()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:757 +0x155
gopkg.in/check%2ev1.(*suiteRunner).runTest()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:812 +0x419
gopkg.in/check%2ev1.(*suiteRunner).run()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/check.go:618 +0x3c6
gopkg.in/check%2ev1.Run()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:92 +0x44
gopkg.in/check%2ev1.RunAll()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:84 +0x124
gopkg.in/check%2ev1.TestingT()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/vendor/gopkg.in/check.v1/run.go:72 +0x5d3
github.com/snapcore/snapd/daemon.Test()
/home/runner/work/snapd/snapd/src/github.com/snapcore/snapd/daemon/daemon_test.go:63 +0x26
testing.tRunner()
/snap/go/10630/src/testing/testing.go:1689 +0x21e
testing.(*T).Run.gowrap1()
/snap/go/10630/src/testing/testing.go:1742 +0x44
==================
OK: 682 passed
--- FAIL: Test (71.79s)
testing.go:1398: race detected during execution of test
FAIL
FAIL github.com/snapcore/snapd/daemon 72.143s
Signed-off-by: Maciej Borzecki <maciej.borzecki@canonical.com>
747 lines
20 KiB
Go
747 lines
20 KiB
Go
// -*- Mode: Go; indent-tabs-mode: t -*-
|
|
|
|
/*
|
|
* Copyright (C) 2015-2021 Canonical Ltd
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 3 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
*/
|
|
|
|
package daemon
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"os"
|
|
"os/signal"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/gorilla/mux"
|
|
"gopkg.in/tomb.v2"
|
|
|
|
"github.com/snapcore/snapd/boot"
|
|
"github.com/snapcore/snapd/dirs"
|
|
"github.com/snapcore/snapd/logger"
|
|
"github.com/snapcore/snapd/netutil"
|
|
"github.com/snapcore/snapd/osutil"
|
|
"github.com/snapcore/snapd/overlord"
|
|
"github.com/snapcore/snapd/overlord/auth"
|
|
"github.com/snapcore/snapd/overlord/restart"
|
|
"github.com/snapcore/snapd/overlord/snapstate"
|
|
"github.com/snapcore/snapd/overlord/standby"
|
|
"github.com/snapcore/snapd/overlord/state"
|
|
"github.com/snapcore/snapd/snapdenv"
|
|
"github.com/snapcore/snapd/store"
|
|
"github.com/snapcore/snapd/systemd"
|
|
)
|
|
|
|
var ErrRestartSocket = fmt.Errorf("daemon stop requested to wait for socket activation")
|
|
var ErrNoFailureRecoveryNeeded = fmt.Errorf("no failure recovery needed")
|
|
|
|
var systemdSdNotify = systemd.SdNotify
|
|
|
|
const (
|
|
daemonRestartMsg = "daemon is restarting"
|
|
systemRestartMsg = "system is restarting"
|
|
systemHaltMsg = "system is halting"
|
|
systemPoweroffMsg = "system is powering off"
|
|
socketRestartMsg = "daemon is stopping to wait for socket activation"
|
|
)
|
|
|
|
// A Daemon listens for requests and routes them to the right command
|
|
type Daemon struct {
|
|
Version string
|
|
overlord *overlord.Overlord
|
|
state *state.State
|
|
snapdListener net.Listener
|
|
snapListener net.Listener
|
|
connTracker *connTracker
|
|
serve *http.Server
|
|
tomb tomb.Tomb
|
|
router *mux.Router
|
|
standbyOpinions *standby.StandbyOpinions
|
|
|
|
// set to what kind of restart was requested (if any)
|
|
requestedRestart restart.RestartType
|
|
// reboot info needed to handle reboots
|
|
rebootInfo *boot.RebootInfo
|
|
// set to remember that we need to exit the daemon in a way that
|
|
// prevents systemd from restarting it
|
|
restartSocket bool
|
|
// degradedErr is set when the daemon is in degraded mode
|
|
degradedErr error
|
|
|
|
expectedRebootDidNotHappen bool
|
|
|
|
mu sync.Mutex
|
|
cancel func()
|
|
}
|
|
|
|
// A ResponseFunc handles one of the individual verbs for a method
|
|
type ResponseFunc func(*Command, *http.Request, *auth.UserState) Response
|
|
|
|
// A Command routes a request to an individual per-verb ResponseFunc
|
|
type Command struct {
|
|
Path string
|
|
PathPrefix string
|
|
//
|
|
GET ResponseFunc
|
|
PUT ResponseFunc
|
|
POST ResponseFunc
|
|
|
|
// Access control.
|
|
ReadAccess accessChecker
|
|
WriteAccess accessChecker
|
|
|
|
d *Daemon
|
|
}
|
|
|
|
func (c *Command) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|
st := c.d.state
|
|
st.Lock()
|
|
// TODO Look at the error and fail if there's an attempt to authenticate with invalid data.
|
|
user, _ := userFromRequest(st, r)
|
|
st.Unlock()
|
|
|
|
// check if we are in degradedMode
|
|
if c.d.degradedErr != nil && r.Method != "GET" {
|
|
InternalError(c.d.degradedErr.Error()).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
ucred, err := ucrednetGet(r.RemoteAddr)
|
|
if err != nil && err != errNoID {
|
|
logger.Noticef("unexpected error when attempting to get UID: %s", err)
|
|
InternalError(err.Error()).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
ctx := store.WithClientUserAgent(r.Context(), r)
|
|
r = r.WithContext(ctx)
|
|
|
|
var rspf ResponseFunc
|
|
var access accessChecker
|
|
|
|
switch r.Method {
|
|
case "GET":
|
|
rspf = c.GET
|
|
access = c.ReadAccess
|
|
case "PUT":
|
|
rspf = c.PUT
|
|
access = c.WriteAccess
|
|
case "POST":
|
|
rspf = c.POST
|
|
access = c.WriteAccess
|
|
}
|
|
|
|
if rspf == nil {
|
|
MethodNotAllowed("method %q not allowed", r.Method).ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
if rspe := access.CheckAccess(c.d, r, ucred, user); rspe != nil {
|
|
rspe.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
rsp := rspf(c, r, user)
|
|
|
|
if srsp, ok := rsp.(StructuredResponse); ok {
|
|
rjson := srsp.JSON()
|
|
|
|
st.Lock()
|
|
_, rst := restart.Pending(st)
|
|
st.Unlock()
|
|
rjson.addMaintenanceFromRestartType(rst)
|
|
|
|
if rjson.Type != ResponseTypeError {
|
|
st.Lock()
|
|
count, stamp := st.WarningsSummary()
|
|
st.Unlock()
|
|
rjson.addWarningCount(count, stamp)
|
|
}
|
|
|
|
// serve the updated serialisation
|
|
rsp = rjson
|
|
}
|
|
|
|
rsp.ServeHTTP(w, r)
|
|
}
|
|
|
|
type wrappedWriter struct {
|
|
w http.ResponseWriter
|
|
s int
|
|
}
|
|
|
|
func (w *wrappedWriter) Header() http.Header {
|
|
return w.w.Header()
|
|
}
|
|
|
|
func (w *wrappedWriter) Write(bs []byte) (int, error) {
|
|
return w.w.Write(bs)
|
|
}
|
|
|
|
func (w *wrappedWriter) WriteHeader(s int) {
|
|
w.w.WriteHeader(s)
|
|
w.s = s
|
|
}
|
|
|
|
func (w *wrappedWriter) Flush() {
|
|
if f, ok := w.w.(http.Flusher); ok {
|
|
f.Flush()
|
|
}
|
|
}
|
|
|
|
func logit(handler http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
ww := &wrappedWriter{w: w}
|
|
t0 := time.Now()
|
|
handler.ServeHTTP(ww, r)
|
|
t := time.Since(t0)
|
|
url := r.URL.String()
|
|
if !strings.Contains(url, "/changes/") {
|
|
logger.Debugf("%s %s %s %s %d", r.RemoteAddr, r.Method, r.URL, t, ww.s)
|
|
}
|
|
})
|
|
}
|
|
|
|
// Init sets up the Daemon's internal workings.
|
|
// Don't call more than once.
|
|
func (d *Daemon) Init() error {
|
|
listenerMap, err := netutil.ActivationListeners()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// The SnapdSocket is required -- without it, die.
|
|
if listener, err := netutil.GetListener(dirs.SnapdSocket, listenerMap); err == nil {
|
|
d.snapdListener = &ucrednetListener{Listener: listener}
|
|
} else {
|
|
return fmt.Errorf("when trying to listen on %s: %v", dirs.SnapdSocket, err)
|
|
}
|
|
|
|
if listener, err := netutil.GetListener(dirs.SnapSocket, listenerMap); err == nil {
|
|
// This listener may also be nil if that socket wasn't among
|
|
// the listeners, so check it before using it.
|
|
d.snapListener = &ucrednetListener{Listener: listener}
|
|
} else {
|
|
logger.Debugf("cannot get listener for %q: %v", dirs.SnapSocket, err)
|
|
}
|
|
|
|
d.addRoutes()
|
|
|
|
logger.Noticef("started %v.", snapdenv.UserAgent())
|
|
|
|
return nil
|
|
}
|
|
|
|
// SetDegradedMode puts the daemon into a degraded mode. In this mode
|
|
// it will return the error given in the "err" argument for commands
|
|
// that are not pure HTTP GETs.
|
|
//
|
|
// This is useful to report errors to the client when the daemon
|
|
// cannot work because e.g. a snapd squashfs precondition check failed
|
|
// or the system is out of diskspace.
|
|
//
|
|
// When the system is fine again, calling "SetDegradedMode(nil)" is enough
|
|
// to put the daemon into full operation again.
|
|
func (d *Daemon) SetDegradedMode(err error) {
|
|
d.degradedErr = err
|
|
}
|
|
|
|
func (d *Daemon) addRoutes() {
|
|
d.router = mux.NewRouter()
|
|
|
|
for _, c := range api {
|
|
c.d = d
|
|
if c.PathPrefix == "" {
|
|
d.router.Handle(c.Path, c).Name(c.Path)
|
|
} else {
|
|
d.router.PathPrefix(c.PathPrefix).Handler(c).Name(c.PathPrefix)
|
|
}
|
|
}
|
|
|
|
// also maybe add a /favicon.ico handler...
|
|
|
|
d.router.NotFoundHandler = NotFound("not found")
|
|
}
|
|
|
|
var (
|
|
shutdownTimeout = 25 * time.Second
|
|
)
|
|
|
|
type connTracker struct {
|
|
mu sync.Mutex
|
|
conns map[net.Conn]struct{}
|
|
}
|
|
|
|
func (ct *connTracker) CanStandby() bool {
|
|
ct.mu.Lock()
|
|
defer ct.mu.Unlock()
|
|
|
|
return len(ct.conns) == 0
|
|
}
|
|
|
|
func (ct *connTracker) trackConn(conn net.Conn, state http.ConnState) {
|
|
ct.mu.Lock()
|
|
defer ct.mu.Unlock()
|
|
// we ignore hijacked connections, if we do things with websockets
|
|
// we'll need custom shutdown handling for them
|
|
if state == http.StateNew || state == http.StateActive {
|
|
ct.conns[conn] = struct{}{}
|
|
} else {
|
|
delete(ct.conns, conn)
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) initStandbyHandling() {
|
|
d.standbyOpinions = standby.New(d.state)
|
|
d.standbyOpinions.AddOpinion(d.connTracker)
|
|
d.standbyOpinions.AddOpinion(d.overlord)
|
|
d.standbyOpinions.AddOpinion(d.overlord.SnapManager())
|
|
d.standbyOpinions.AddOpinion(d.overlord.DeviceManager())
|
|
d.standbyOpinions.Start()
|
|
}
|
|
|
|
// Start the Daemon. Takes a context which will be used as the base request
|
|
// context in the embedded http.Server.
|
|
func (d *Daemon) Start(ctx context.Context) (err error) {
|
|
if d.expectedRebootDidNotHappen {
|
|
// we need to schedule and wait for a system restart
|
|
d.tomb.Kill(nil)
|
|
// avoid systemd killing us again while we wait
|
|
systemdSdNotify("READY=1")
|
|
return nil
|
|
}
|
|
if d.overlord == nil {
|
|
panic("internal error: no Overlord")
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
d.cancel = cancel
|
|
defer func() {
|
|
// cancel the context on any errors
|
|
if err != nil {
|
|
cancel()
|
|
}
|
|
}()
|
|
|
|
to, reasoning, err := d.overlord.StartupTimeout()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if to > 0 {
|
|
to = to.Round(time.Microsecond)
|
|
us := to.Nanoseconds() / 1000
|
|
logger.Noticef("adjusting startup timeout by %v (%s)", to, reasoning)
|
|
systemdSdNotify(fmt.Sprintf("EXTEND_TIMEOUT_USEC=%d", us))
|
|
}
|
|
// now perform expensive overlord/manages initialization
|
|
if err := d.overlord.StartUp(); err != nil {
|
|
if errors.Is(err, snapstate.ErrUnexpectedRuntimeRestart) {
|
|
logger.Noticef("detected failure recovery context, but no recovery needed")
|
|
return ErrNoFailureRecoveryNeeded
|
|
}
|
|
return err
|
|
}
|
|
|
|
d.connTracker = &connTracker{conns: make(map[net.Conn]struct{})}
|
|
d.serve = &http.Server{
|
|
Handler: logit(d.router),
|
|
ConnState: d.connTracker.trackConn,
|
|
BaseContext: func(net.Listener) context.Context {
|
|
// requests will use the context provided to Start, as
|
|
// the caller will likely cancel it when appropriate
|
|
// thus canceling any outstanding requests to the snapd
|
|
// API
|
|
return ctx
|
|
},
|
|
}
|
|
|
|
// enable standby handling
|
|
d.initStandbyHandling()
|
|
|
|
// before serving actual connections remove the maintenance.json file as we
|
|
// are no longer down for maintenance, this state most closely corresponds
|
|
// to restart.RestartUnset
|
|
if err := d.updateMaintenanceFile(restart.RestartUnset); err != nil {
|
|
return err
|
|
}
|
|
|
|
// the loop runs in its own goroutine
|
|
d.overlord.Loop()
|
|
|
|
d.tomb.Go(func() error {
|
|
if d.snapListener != nil {
|
|
d.tomb.Go(func() error {
|
|
if err := d.serve.Serve(d.snapListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
if err := d.serve.Serve(d.snapdListener); err != http.ErrServerClosed && d.tomb.Err() == tomb.ErrStillAlive {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
// notify systemd that we are ready
|
|
systemdSdNotify("READY=1")
|
|
return nil
|
|
}
|
|
|
|
// HandleRestart implements overlord.RestartBehavior.
|
|
func (d *Daemon) HandleRestart(t restart.RestartType, rebootInfo *boot.RebootInfo) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
|
|
scheduleFallback := func(a boot.RebootAction) {
|
|
if err := reboot(a, rebootWaitTimeout, rebootInfo); err != nil {
|
|
logger.Noticef("%s", err)
|
|
}
|
|
}
|
|
d.rebootInfo = rebootInfo
|
|
|
|
// die when asked to restart (systemd should get us back up!) etc
|
|
switch t {
|
|
case restart.RestartDaemon:
|
|
// save the restart kind to write out a maintenance.json in a bit
|
|
d.requestedRestart = t
|
|
case restart.RestartSystem, restart.RestartSystemNow:
|
|
// try to schedule a fallback slow reboot already here
|
|
// in case we get stuck shutting down
|
|
|
|
// save the restart kind to write out a maintenance.json in a bit
|
|
scheduleFallback(boot.RebootReboot)
|
|
d.requestedRestart = t
|
|
case restart.RestartSystemHaltNow:
|
|
scheduleFallback(boot.RebootHalt)
|
|
d.requestedRestart = t
|
|
case restart.RestartSystemPoweroffNow:
|
|
scheduleFallback(boot.RebootPoweroff)
|
|
d.requestedRestart = t
|
|
case restart.RestartSocket:
|
|
// save the restart kind to write out a maintenance.json in a bit
|
|
d.requestedRestart = t
|
|
d.restartSocket = true
|
|
case restart.StopDaemon:
|
|
logger.Noticef("stopping snapd as requested")
|
|
default:
|
|
logger.Noticef("internal error: restart handler called with unknown restart type: %v", t)
|
|
}
|
|
|
|
d.tomb.Kill(nil)
|
|
}
|
|
|
|
var (
|
|
rebootNoticeWait = 3 * time.Second
|
|
rebootWaitTimeout = 10 * time.Minute
|
|
rebootRetryWaitTimeout = 5 * time.Minute
|
|
rebootMaxAttempts = 3
|
|
)
|
|
|
|
func (d *Daemon) updateMaintenanceFile(rst restart.RestartType) error {
|
|
// for unset restart, just remove the maintenance.json file
|
|
if rst == restart.RestartUnset {
|
|
err := os.Remove(dirs.SnapdMaintenanceFile)
|
|
// only return err if the error was something other than the file not
|
|
// existing
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// otherwise marshal and write it out appropriately
|
|
b, err := json.Marshal(maintenanceForRestartType(rst))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return osutil.AtomicWrite(dirs.SnapdMaintenanceFile, bytes.NewBuffer(b), 0644, 0)
|
|
}
|
|
|
|
// Stop shuts down the Daemon
|
|
func (d *Daemon) Stop(sigCh chan<- os.Signal) error {
|
|
// we need to schedule/wait for a system restart again
|
|
if d.expectedRebootDidNotHappen {
|
|
// make the reboot retry immediate
|
|
immediateReboot := true
|
|
// TODO: we do not know the RebootInfo from the previous snapd
|
|
// instance. Passing nil for the moment, but maybe we should
|
|
// cache to disk and recover at this point. In any case, it is
|
|
// expected that the reboot will not be harmful even if
|
|
// RebootInfo is unknown, and that things will end up in a
|
|
// kernel refresh failure, that can be retried later.
|
|
return d.doReboot(sigCh, restart.RestartSystem, nil, immediateReboot, rebootRetryWaitTimeout)
|
|
}
|
|
if d.overlord == nil {
|
|
return fmt.Errorf("internal error: no Overlord")
|
|
}
|
|
|
|
if d.cancel != nil {
|
|
d.cancel()
|
|
}
|
|
|
|
d.tomb.Kill(nil)
|
|
|
|
// check the state associated with a potential restart with the lock to
|
|
// prevent races
|
|
d.mu.Lock()
|
|
// needsFullShutdown is whether the entire system will
|
|
// shutdown or not as a consequence of this request
|
|
needsFullShutdown := false
|
|
restartType := d.requestedRestart
|
|
switch restartType {
|
|
case restart.RestartSystem, restart.RestartSystemNow, restart.RestartSystemHaltNow, restart.RestartSystemPoweroffNow:
|
|
needsFullShutdown = true
|
|
}
|
|
immediateShutdown := false
|
|
switch restartType {
|
|
case restart.RestartSystemNow, restart.RestartSystemHaltNow, restart.RestartSystemPoweroffNow:
|
|
immediateShutdown = true
|
|
}
|
|
restartSocket := d.restartSocket
|
|
rebootInfo := d.rebootInfo
|
|
d.mu.Unlock()
|
|
|
|
// before not accepting any new client connections we need to write the
|
|
// maintenance.json file for potential clients to see after the daemon stops
|
|
// responding so they can read it correctly and handle the maintenance
|
|
if err := d.updateMaintenanceFile(restartType); err != nil {
|
|
logger.Noticef("error writing maintenance file: %v", err)
|
|
}
|
|
|
|
// take a timestamp before shutting down the snap listener, and
|
|
// use the time we may spend on waiting for hooks against the shutdown
|
|
// delay.
|
|
ts := time.Now()
|
|
if d.snapListener != nil {
|
|
// stop running hooks first
|
|
// and do it more gracefully if we are restarting
|
|
hookMgr := d.overlord.HookManager()
|
|
d.state.Lock()
|
|
ok, _ := restart.Pending(d.state)
|
|
d.state.Unlock()
|
|
if ok {
|
|
logger.Noticef("gracefully waiting for running hooks")
|
|
hookMgr.GracefullyWaitRunningHooks()
|
|
logger.Noticef("done waiting for running hooks")
|
|
}
|
|
hookMgr.StopHooks()
|
|
d.snapListener.Close()
|
|
}
|
|
timeSpent := time.Since(ts)
|
|
|
|
// When shutting down the snapd listener wait until the rebootNoticeWait
|
|
// period has passed before snapdListener is closed to allow polling
|
|
// clients to access the daemon. For testing we disable this unless SNAPD_SHUTDOWN_DELAY
|
|
// has been set, to avoid incurring this wait for every daemon restart which happens
|
|
// quite often in testing.
|
|
if !snapdenv.Testing() || osutil.GetenvBool("SNAPD_SHUTDOWN_DELAY") {
|
|
time.Sleep(rebootNoticeWait - timeSpent)
|
|
}
|
|
d.snapdListener.Close()
|
|
d.standbyOpinions.Stop()
|
|
|
|
// We're using the background context here because the tomb's
|
|
// context will likely already have been cancelled when we are
|
|
// called.
|
|
ctx, cancel := context.WithTimeout(context.Background(), shutdownTimeout)
|
|
d.tomb.Kill(d.serve.Shutdown(ctx))
|
|
cancel()
|
|
|
|
if !needsFullShutdown {
|
|
// tell systemd that we are stopping
|
|
systemdSdNotify("STOPPING=1")
|
|
}
|
|
|
|
if restartSocket {
|
|
// At this point we processed all open requests (and
|
|
// stopped accepting new requests) - before going into
|
|
// socket activated mode we need to check if any of
|
|
// those open requests resulted in something that
|
|
// prevents us from going into socket activation mode.
|
|
//
|
|
// If this is the case we do a "normal" snapd restart
|
|
// to process the new changes.
|
|
if !d.standbyOpinions.CanStandby() {
|
|
d.restartSocket = false
|
|
}
|
|
}
|
|
d.overlord.Stop()
|
|
|
|
if err := d.tomb.Wait(); err != nil {
|
|
if err == context.DeadlineExceeded {
|
|
logger.Noticef("WARNING: cannot gracefully shut down in-flight snapd API activity within: %v", shutdownTimeout)
|
|
// the process is shutting down anyway, so we may just
|
|
// as well close the active connections right now
|
|
d.serve.Close()
|
|
} else {
|
|
// do not stop the shutdown even if the tomb errors
|
|
// because we already scheduled a slow shutdown and
|
|
// exiting here will just restart snapd (via systemd)
|
|
// which will lead to confusing results.
|
|
if needsFullShutdown {
|
|
logger.Noticef("WARNING: cannot stop daemon: %v", err)
|
|
} else {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
if needsFullShutdown {
|
|
return d.doReboot(sigCh, d.requestedRestart, rebootInfo, immediateShutdown, rebootWaitTimeout)
|
|
}
|
|
|
|
if d.restartSocket {
|
|
return ErrRestartSocket
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) rebootDelay(immediate bool) (time.Duration, error) {
|
|
d.state.Lock()
|
|
defer d.state.Unlock()
|
|
now := time.Now()
|
|
// see whether a reboot had already been scheduled
|
|
var rebootAt time.Time
|
|
err := d.state.Get("daemon-system-restart-at", &rebootAt)
|
|
if err != nil && !errors.Is(err, state.ErrNoState) {
|
|
return 0, err
|
|
}
|
|
rebootDelay := 1 * time.Minute
|
|
if immediate {
|
|
rebootDelay = 0
|
|
}
|
|
if err == nil {
|
|
rebootDelay = rebootAt.Sub(now)
|
|
} else {
|
|
ovr := os.Getenv("SNAPD_REBOOT_DELAY") // for tests
|
|
if ovr != "" && !immediate {
|
|
d, err := time.ParseDuration(ovr)
|
|
if err == nil {
|
|
rebootDelay = d
|
|
}
|
|
}
|
|
rebootAt = now.Add(rebootDelay)
|
|
d.state.Set("daemon-system-restart-at", rebootAt)
|
|
}
|
|
return rebootDelay, nil
|
|
}
|
|
|
|
func (d *Daemon) doReboot(sigCh chan<- os.Signal, rst restart.RestartType, rbi *boot.RebootInfo, immediate bool, waitTimeout time.Duration) error {
|
|
rebootDelay, err := d.rebootDelay(immediate)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
action := boot.RebootReboot
|
|
switch rst {
|
|
case restart.RestartSystemHaltNow:
|
|
action = boot.RebootHalt
|
|
case restart.RestartSystemPoweroffNow:
|
|
action = boot.RebootPoweroff
|
|
}
|
|
// ask for shutdown and wait for it to happen.
|
|
// if we exit snapd will be restarted by systemd
|
|
if err := reboot(action, rebootDelay, rbi); err != nil {
|
|
return err
|
|
}
|
|
// wait for reboot to happen
|
|
logger.Noticef("Waiting for %s", action)
|
|
if sigCh != nil {
|
|
signal.Stop(sigCh)
|
|
if len(sigCh) > 0 {
|
|
// a signal arrived in between
|
|
return nil
|
|
}
|
|
close(sigCh)
|
|
}
|
|
time.Sleep(waitTimeout)
|
|
return fmt.Errorf("expected %s did not happen", action)
|
|
}
|
|
|
|
var reboot = boot.Reboot
|
|
|
|
// Dying is a tomb-ish thing
|
|
func (d *Daemon) Dying() <-chan struct{} {
|
|
return d.tomb.Dying()
|
|
}
|
|
|
|
func clearReboot(st *state.State) {
|
|
st.Set("daemon-system-restart-at", nil)
|
|
st.Set("daemon-system-restart-tentative", nil)
|
|
}
|
|
|
|
// RebootAsExpected implements part of overlord.RestartBehavior.
|
|
func (d *Daemon) RebootAsExpected(st *state.State) error {
|
|
clearReboot(st)
|
|
return nil
|
|
}
|
|
|
|
var errExpectedReboot = errors.New("expected reboot did not happen")
|
|
|
|
// RebootDidNotHappen implements part of overlord.RestartBehavior.
|
|
func (d *Daemon) RebootDidNotHappen(st *state.State) error {
|
|
var attempt int
|
|
err := st.Get("daemon-system-restart-tentative", &attempt)
|
|
if err != nil && !errors.Is(err, state.ErrNoState) {
|
|
return err
|
|
}
|
|
attempt++
|
|
if attempt > rebootMaxAttempts {
|
|
// giving up, proceed normally, some in-progress refresh
|
|
// might get rolled back!!
|
|
restart.ClearReboot(st)
|
|
clearReboot(st)
|
|
logger.Noticef("snapd was restarted while a system restart was expected, snapd retried to schedule and waited again for a system restart %d times and is giving up", rebootMaxAttempts)
|
|
return nil
|
|
}
|
|
st.Set("daemon-system-restart-tentative", attempt)
|
|
d.state = st
|
|
logger.Noticef("snapd was restarted while a system restart was expected, snapd will try to schedule and wait for a system restart again (attempt %d/%d)", attempt, rebootMaxAttempts)
|
|
return errExpectedReboot
|
|
}
|
|
|
|
// New Daemon
|
|
func New() (*Daemon, error) {
|
|
d := &Daemon{}
|
|
ovld, err := overlord.New(d)
|
|
if err == errExpectedReboot {
|
|
// we proceed without overlord until we reach Stop
|
|
// where we will schedule and wait again for a system restart.
|
|
// ATM we cannot do that in New because we need to satisfy
|
|
// systemd notify mechanisms.
|
|
d.expectedRebootDidNotHappen = true
|
|
return d, nil
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
d.overlord = ovld
|
|
d.state = ovld.State()
|
|
return d, nil
|
|
}
|