Files
gvisor/pkg/sentry/kernel/kernel_restore.go
2025-01-29 21:16:51 -08:00

195 lines
5.4 KiB
Go

// Copyright 2024 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kernel
import (
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sync"
)
// Saver is an interface for saving the kernel.
type Saver interface {
SaveAsync() error
SpecEnviron(containerName string) []string
}
// CheckpointGeneration stores information about the last checkpoint taken.
//
// +stateify savable
type CheckpointGeneration struct {
// Count is incremented every time a checkpoint is triggered, even if the
// checkpoint failed.
Count uint32
// Restore indicates if the current instance resumed after the checkpoint or
// it was restored from a checkpoint.
Restore bool
}
// AddStateToCheckpoint adds a key-value pair to be additionally checkpointed.
func (k *Kernel) AddStateToCheckpoint(key, v any) {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
if k.additionalCheckpointState == nil {
k.additionalCheckpointState = make(map[any]any)
}
k.additionalCheckpointState[key] = v
}
// PopCheckpointState pops a key-value pair from the additional checkpoint
// state. If the key doesn't exist, nil is returned.
func (k *Kernel) PopCheckpointState(key any) any {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
if v, ok := k.additionalCheckpointState[key]; ok {
delete(k.additionalCheckpointState, key)
return v
}
return nil
}
// SetSaver sets the kernel's Saver.
// Thread-compatible.
func (k *Kernel) SetSaver(s Saver) {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
k.saver = s
}
// Saver returns the kernel's Saver.
// Thread-compatible.
func (k *Kernel) Saver() Saver {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
return k.saver
}
// CheckpointGen returns the current checkpoint generation.
func (k *Kernel) CheckpointGen() CheckpointGeneration {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
return k.checkpointGen
}
// OnRestoreDone is called to notify the kernel that a checkpoint restore has been
// completed successfully.
func (k *Kernel) OnRestoreDone() {
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
k.checkpointGen.Count++
k.checkpointGen.Restore = true
k.CheckpointWait.signal(k.checkpointGen, nil)
}
// OnCheckpointAttempt is called when a checkpoint attempt is completed. err is
// any checkpoint errors that may have occurred.
func (k *Kernel) OnCheckpointAttempt(err error) {
if err == nil {
log.Infof("Checkpoint completed successfully.")
} else {
log.Warningf("Checkpoint attempt failed with error: %v", err)
}
k.checkpointMu.Lock()
defer k.checkpointMu.Unlock()
k.checkpointGen.Count++
k.checkpointGen.Restore = false
k.CheckpointWait.signal(k.checkpointGen, err)
}
// WaitForCheckpoint waits for the Kernel to have been successfully checkpointed.
func (k *Kernel) WaitForCheckpoint() error {
// Send checkpoint result to a channel and wait on it.
ch := make(chan error, 1)
callback := func(_ CheckpointGeneration, err error) { ch <- err }
key := k.CheckpointWait.Register(callback, k.CheckpointGen().Count+1)
defer k.CheckpointWait.Unregister(key)
return <-ch
}
type checkpointWaiter struct {
// count indicates the checkpoint generation that this waiter is interested in.
count uint32
// callback is the function that will be called when the checkpoint generation
// reaches the desired count. It is set to nil after the callback is called.
callback func(CheckpointGeneration, error)
}
// CheckpointWaitable is a waitable object that waits for a
// checkpoint to complete.
//
// +stateify savable
type CheckpointWaitable struct {
k *Kernel
mu sync.Mutex `state:"nosave"`
// Don't save the waiters, because they are repopulated after restore. It also
// allows for external entities to wait for the checkpoint.
waiters map[*checkpointWaiter]struct{} `state:"nosave"`
}
// Register registers a callback that is notified when the checkpoint generation count is higher
// than the desired count.
func (w *CheckpointWaitable) Register(cb func(CheckpointGeneration, error), count uint32) any {
w.mu.Lock()
defer w.mu.Unlock()
waiter := &checkpointWaiter{
count: count,
callback: cb,
}
if w.waiters == nil {
w.waiters = make(map[*checkpointWaiter]struct{})
}
w.waiters[waiter] = struct{}{}
if gen := w.k.CheckpointGen(); count <= gen.Count {
// The checkpoint has already occurred. Signal immediately.
waiter.callback(gen, nil)
waiter.callback = nil
}
return waiter
}
// Unregister unregisters a waiter. It must be called even if the channel
// was signalled.
func (w *CheckpointWaitable) Unregister(key any) {
w.mu.Lock()
defer w.mu.Unlock()
delete(w.waiters, key.(*checkpointWaiter))
if len(w.waiters) == 0 {
w.waiters = nil
}
}
func (w *CheckpointWaitable) signal(gen CheckpointGeneration, err error) {
w.mu.Lock()
defer w.mu.Unlock()
for waiter := range w.waiters {
if waiter.callback != nil && waiter.count <= gen.Count {
waiter.callback(gen, err)
waiter.callback = nil
}
}
}