mirror of
https://github.com/netbirdio/gvisor.git
synced 2026-05-22 17:12:49 -07:00
ddfbb50ecc
This avoids racily accessing task fields. Reported-by: syzbot+f6ec7db6c2c12de97f29@syzkaller.appspotmail.com PiperOrigin-RevId: 652976375
290 lines
7.8 KiB
Go
290 lines
7.8 KiB
Go
// Copyright 2021 The gVisor Authors.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package kernel
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
|
|
"gvisor.dev/gvisor/pkg/errors/linuxerr"
|
|
"gvisor.dev/gvisor/pkg/log"
|
|
)
|
|
|
|
// EnterInitialCgroups moves t into an initial set of cgroups.
|
|
// If initCgroups is not nil, the new task will be placed in the specified cgroups.
|
|
// Otherwise, if parent is not nil, the new task will be placed in the parent's cgroups.
|
|
// If neither is specified, the new task will be in the root cgroups.
|
|
//
|
|
// This is analogous to Linux's kernel/cgroup/cgroup.c:cgroup_css_set_fork().
|
|
//
|
|
// Precondition: t isn't in any cgroups yet, t.cgroups is empty.
|
|
func (t *Task) EnterInitialCgroups(parent *Task, initCgroups map[Cgroup]struct{}) {
|
|
var inherit map[Cgroup]struct{}
|
|
if initCgroups != nil {
|
|
inherit = initCgroups
|
|
} else if parent != nil {
|
|
parent.mu.Lock()
|
|
defer parent.mu.Unlock()
|
|
inherit = parent.cgroups
|
|
}
|
|
joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit)
|
|
|
|
t.mu.NestedLock(taskLockChild)
|
|
defer t.mu.NestedUnlock(taskLockChild)
|
|
// Transfer ownership of joinSet refs to the task's cgset.
|
|
t.cgroups = joinSet
|
|
for c := range t.cgroups {
|
|
// Since t isn't in any cgroup yet, we can skip the check against
|
|
// existing cgroups.
|
|
c.Enter(t)
|
|
t.SetMemCgIDFromCgroup(c)
|
|
}
|
|
}
|
|
|
|
// SetMemCgID sets the given memory cgroup id to the task.
|
|
func (t *Task) SetMemCgID(memCgID uint32) {
|
|
t.memCgID.Store(memCgID)
|
|
}
|
|
|
|
// SetMemCgIDFromCgroup sets the id of the given memory cgroup to the task.
|
|
func (t *Task) SetMemCgIDFromCgroup(cg Cgroup) {
|
|
for _, ctl := range cg.Controllers() {
|
|
if ctl.Type() == CgroupControllerMemory {
|
|
t.SetMemCgID(cg.ID())
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// ResetMemCgIDFromCgroup sets the memory cgroup id to zero, if the task has
|
|
// a memory cgroup.
|
|
func (t *Task) ResetMemCgIDFromCgroup(cg Cgroup) {
|
|
for _, ctl := range cg.Controllers() {
|
|
if ctl.Type() == CgroupControllerMemory {
|
|
t.SetMemCgID(0)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// EnterCgroup moves t into c.
|
|
func (t *Task) EnterCgroup(c Cgroup) error {
|
|
newControllers := make(map[CgroupControllerType]struct{})
|
|
for _, ctl := range c.Controllers() {
|
|
newControllers[ctl.Type()] = struct{}{}
|
|
}
|
|
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
|
|
for oldCG := range t.cgroups {
|
|
if oldCG.HierarchyID() == c.HierarchyID() {
|
|
log.Warningf("Cannot enter new cgroup %v due to conflicting controllers. Try migrate instead?", c)
|
|
return linuxerr.EBUSY
|
|
}
|
|
}
|
|
|
|
// No migration required.
|
|
t.enterCgroupLocked(c)
|
|
|
|
return nil
|
|
}
|
|
|
|
// +checklocks:t.mu
|
|
func (t *Task) enterCgroupLocked(c Cgroup) {
|
|
c.IncRef()
|
|
t.cgroups[c] = struct{}{}
|
|
c.Enter(t)
|
|
t.SetMemCgIDFromCgroup(c)
|
|
}
|
|
|
|
// +checklocks:t.mu
|
|
func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) {
|
|
if _, ok := t.cgroups[c]; ok {
|
|
return
|
|
}
|
|
t.enterCgroupLocked(c)
|
|
}
|
|
|
|
// LeaveCgroups removes t out from all its cgroups.
|
|
func (t *Task) LeaveCgroups() {
|
|
t.tg.pidns.owner.mu.Lock() // Prevent migration.
|
|
t.mu.Lock()
|
|
cgs := t.cgroups
|
|
t.cgroups = nil
|
|
for c := range cgs {
|
|
c.Leave(t)
|
|
}
|
|
t.SetMemCgID(0)
|
|
t.mu.Unlock()
|
|
t.tg.pidns.owner.mu.Unlock()
|
|
|
|
for c := range cgs {
|
|
c.decRef()
|
|
}
|
|
}
|
|
|
|
// +checklocks:t.mu
|
|
func (t *Task) findCgroupWithMatchingHierarchyLocked(other Cgroup) (Cgroup, bool) {
|
|
for c := range t.cgroups {
|
|
if c.HierarchyID() != other.HierarchyID() {
|
|
continue
|
|
}
|
|
return c, true
|
|
}
|
|
return Cgroup{}, false
|
|
}
|
|
|
|
// CgroupPrepareMigrate starts a cgroup migration for this task to dst. The
|
|
// migration must be completed through the returned context.
|
|
func (t *Task) CgroupPrepareMigrate(dst Cgroup) (*CgroupMigrationContext, error) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
src, found := t.findCgroupWithMatchingHierarchyLocked(dst)
|
|
if !found {
|
|
log.Warningf("Cannot migrate to cgroup %v since task not currently in target hierarchy %v", dst, dst.HierarchyID())
|
|
return nil, linuxerr.EINVAL
|
|
}
|
|
if err := dst.PrepareMigrate(t, &src); err != nil {
|
|
return nil, err
|
|
}
|
|
return &CgroupMigrationContext{
|
|
src: src,
|
|
dst: dst,
|
|
t: t,
|
|
}, nil
|
|
}
|
|
|
|
// MigrateCgroup migrates all tasks in tg to the dst cgroup. Either all tasks
|
|
// are migrated, or none are. Atomicity of migrations wrt cgroup membership
|
|
// (i.e. a task can't switch cgroups mid-migration due to another migration) is
|
|
// guaranteed because migrations are serialized by TaskSet.mu.
|
|
func (tg *ThreadGroup) MigrateCgroup(dst Cgroup) error {
|
|
tg.pidns.owner.mu.RLock()
|
|
defer tg.pidns.owner.mu.RUnlock()
|
|
|
|
var ctxs []*CgroupMigrationContext
|
|
|
|
// Prepare migrations. On partial failure, abort.
|
|
for t := tg.tasks.Front(); t != nil; t = t.Next() {
|
|
ctx, err := t.CgroupPrepareMigrate(dst)
|
|
if err != nil {
|
|
// Rollback.
|
|
for _, ctx := range ctxs {
|
|
ctx.Abort()
|
|
}
|
|
return err
|
|
}
|
|
ctxs = append(ctxs, ctx)
|
|
}
|
|
|
|
// All migrations are now guaranteed to succeed.
|
|
|
|
for _, ctx := range ctxs {
|
|
ctx.Commit()
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// MigrateCgroup migrates this task to the dst cgroup.
|
|
func (t *Task) MigrateCgroup(dst Cgroup) error {
|
|
t.tg.pidns.owner.mu.RLock()
|
|
defer t.tg.pidns.owner.mu.RUnlock()
|
|
|
|
ctx, err := t.CgroupPrepareMigrate(dst)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ctx.Commit()
|
|
return nil
|
|
}
|
|
|
|
// TaskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to
|
|
// format a cgroup for display.
|
|
type TaskCgroupEntry struct {
|
|
HierarchyID uint32 `json:"hierarchy_id"`
|
|
Controllers string `json:"controllers,omitempty"`
|
|
Path string `json:"path,omitempty"`
|
|
}
|
|
|
|
// GetCgroupEntries generates the contents of /proc/<pid>/cgroup as
|
|
// a TaskCgroupEntry array.
|
|
func (t *Task) GetCgroupEntries() []TaskCgroupEntry {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
|
|
cgEntries := make([]TaskCgroupEntry, 0, len(t.cgroups))
|
|
for c := range t.cgroups {
|
|
ctls := c.Controllers()
|
|
ctlNames := make([]string, 0, len(ctls))
|
|
|
|
// We're guaranteed to have a valid name, a non-empty controller list,
|
|
// or both.
|
|
|
|
// Explicit hierarchy name, if any.
|
|
if name := c.Name(); name != "" {
|
|
ctlNames = append(ctlNames, fmt.Sprintf("name=%s", name))
|
|
}
|
|
|
|
// Controllers attached to this hierarchy, if any.
|
|
for _, ctl := range ctls {
|
|
ctlNames = append(ctlNames, string(ctl.Type()))
|
|
}
|
|
|
|
cgEntries = append(cgEntries, TaskCgroupEntry{
|
|
HierarchyID: c.HierarchyID(),
|
|
Controllers: strings.Join(ctlNames, ","),
|
|
Path: c.Path(),
|
|
})
|
|
}
|
|
|
|
sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].HierarchyID > cgEntries[j].HierarchyID })
|
|
return cgEntries
|
|
}
|
|
|
|
// GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf.
|
|
func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) {
|
|
cgEntries := t.GetCgroupEntries()
|
|
for _, cgE := range cgEntries {
|
|
fmt.Fprintf(buf, "%d:%s:%s\n", cgE.HierarchyID, cgE.Controllers, cgE.Path)
|
|
}
|
|
}
|
|
|
|
// +checklocks:t.mu
|
|
func (t *Task) chargeLocked(target *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
|
|
// Due to the uniqueness of controllers on hierarchies, at most one cgroup
|
|
// in t.cgroups will match.
|
|
for c := range t.cgroups {
|
|
err := c.Charge(target, c.Dentry, ctl, res, value)
|
|
if err == nil {
|
|
c.IncRef()
|
|
}
|
|
return err == nil, c, err
|
|
}
|
|
return false, Cgroup{}, nil
|
|
}
|
|
|
|
// ChargeFor charges t's cgroup on behalf of some other task. Returns
|
|
// the cgroup that's charged if any. Returned cgroup has an extra ref
|
|
// that's transferred to the caller.
|
|
func (t *Task) ChargeFor(other *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
|
|
t.mu.Lock()
|
|
defer t.mu.Unlock()
|
|
return t.chargeLocked(other, ctl, res, value)
|
|
}
|