Files
Nicolas Lacasse ddfbb50ecc Don't log the task in CgroupPrepareMigrate warning.
This avoids racily accessing task fields.

Reported-by: syzbot+f6ec7db6c2c12de97f29@syzkaller.appspotmail.com
PiperOrigin-RevId: 652976375
2024-07-16 14:25:44 -07:00

290 lines
7.8 KiB
Go

// Copyright 2021 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package kernel
import (
"bytes"
"fmt"
"sort"
"strings"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
"gvisor.dev/gvisor/pkg/log"
)
// EnterInitialCgroups moves t into an initial set of cgroups.
// If initCgroups is not nil, the new task will be placed in the specified cgroups.
// Otherwise, if parent is not nil, the new task will be placed in the parent's cgroups.
// If neither is specified, the new task will be in the root cgroups.
//
// This is analogous to Linux's kernel/cgroup/cgroup.c:cgroup_css_set_fork().
//
// Precondition: t isn't in any cgroups yet, t.cgroups is empty.
func (t *Task) EnterInitialCgroups(parent *Task, initCgroups map[Cgroup]struct{}) {
var inherit map[Cgroup]struct{}
if initCgroups != nil {
inherit = initCgroups
} else if parent != nil {
parent.mu.Lock()
defer parent.mu.Unlock()
inherit = parent.cgroups
}
joinSet := t.k.cgroupRegistry.computeInitialGroups(inherit)
t.mu.NestedLock(taskLockChild)
defer t.mu.NestedUnlock(taskLockChild)
// Transfer ownership of joinSet refs to the task's cgset.
t.cgroups = joinSet
for c := range t.cgroups {
// Since t isn't in any cgroup yet, we can skip the check against
// existing cgroups.
c.Enter(t)
t.SetMemCgIDFromCgroup(c)
}
}
// SetMemCgID sets the given memory cgroup id to the task.
func (t *Task) SetMemCgID(memCgID uint32) {
t.memCgID.Store(memCgID)
}
// SetMemCgIDFromCgroup sets the id of the given memory cgroup to the task.
func (t *Task) SetMemCgIDFromCgroup(cg Cgroup) {
for _, ctl := range cg.Controllers() {
if ctl.Type() == CgroupControllerMemory {
t.SetMemCgID(cg.ID())
return
}
}
}
// ResetMemCgIDFromCgroup sets the memory cgroup id to zero, if the task has
// a memory cgroup.
func (t *Task) ResetMemCgIDFromCgroup(cg Cgroup) {
for _, ctl := range cg.Controllers() {
if ctl.Type() == CgroupControllerMemory {
t.SetMemCgID(0)
return
}
}
}
// EnterCgroup moves t into c.
func (t *Task) EnterCgroup(c Cgroup) error {
newControllers := make(map[CgroupControllerType]struct{})
for _, ctl := range c.Controllers() {
newControllers[ctl.Type()] = struct{}{}
}
t.mu.Lock()
defer t.mu.Unlock()
for oldCG := range t.cgroups {
if oldCG.HierarchyID() == c.HierarchyID() {
log.Warningf("Cannot enter new cgroup %v due to conflicting controllers. Try migrate instead?", c)
return linuxerr.EBUSY
}
}
// No migration required.
t.enterCgroupLocked(c)
return nil
}
// +checklocks:t.mu
func (t *Task) enterCgroupLocked(c Cgroup) {
c.IncRef()
t.cgroups[c] = struct{}{}
c.Enter(t)
t.SetMemCgIDFromCgroup(c)
}
// +checklocks:t.mu
func (t *Task) enterCgroupIfNotYetLocked(c Cgroup) {
if _, ok := t.cgroups[c]; ok {
return
}
t.enterCgroupLocked(c)
}
// LeaveCgroups removes t out from all its cgroups.
func (t *Task) LeaveCgroups() {
t.tg.pidns.owner.mu.Lock() // Prevent migration.
t.mu.Lock()
cgs := t.cgroups
t.cgroups = nil
for c := range cgs {
c.Leave(t)
}
t.SetMemCgID(0)
t.mu.Unlock()
t.tg.pidns.owner.mu.Unlock()
for c := range cgs {
c.decRef()
}
}
// +checklocks:t.mu
func (t *Task) findCgroupWithMatchingHierarchyLocked(other Cgroup) (Cgroup, bool) {
for c := range t.cgroups {
if c.HierarchyID() != other.HierarchyID() {
continue
}
return c, true
}
return Cgroup{}, false
}
// CgroupPrepareMigrate starts a cgroup migration for this task to dst. The
// migration must be completed through the returned context.
func (t *Task) CgroupPrepareMigrate(dst Cgroup) (*CgroupMigrationContext, error) {
t.mu.Lock()
defer t.mu.Unlock()
src, found := t.findCgroupWithMatchingHierarchyLocked(dst)
if !found {
log.Warningf("Cannot migrate to cgroup %v since task not currently in target hierarchy %v", dst, dst.HierarchyID())
return nil, linuxerr.EINVAL
}
if err := dst.PrepareMigrate(t, &src); err != nil {
return nil, err
}
return &CgroupMigrationContext{
src: src,
dst: dst,
t: t,
}, nil
}
// MigrateCgroup migrates all tasks in tg to the dst cgroup. Either all tasks
// are migrated, or none are. Atomicity of migrations wrt cgroup membership
// (i.e. a task can't switch cgroups mid-migration due to another migration) is
// guaranteed because migrations are serialized by TaskSet.mu.
func (tg *ThreadGroup) MigrateCgroup(dst Cgroup) error {
tg.pidns.owner.mu.RLock()
defer tg.pidns.owner.mu.RUnlock()
var ctxs []*CgroupMigrationContext
// Prepare migrations. On partial failure, abort.
for t := tg.tasks.Front(); t != nil; t = t.Next() {
ctx, err := t.CgroupPrepareMigrate(dst)
if err != nil {
// Rollback.
for _, ctx := range ctxs {
ctx.Abort()
}
return err
}
ctxs = append(ctxs, ctx)
}
// All migrations are now guaranteed to succeed.
for _, ctx := range ctxs {
ctx.Commit()
}
return nil
}
// MigrateCgroup migrates this task to the dst cgroup.
func (t *Task) MigrateCgroup(dst Cgroup) error {
t.tg.pidns.owner.mu.RLock()
defer t.tg.pidns.owner.mu.RUnlock()
ctx, err := t.CgroupPrepareMigrate(dst)
if err != nil {
return err
}
ctx.Commit()
return nil
}
// TaskCgroupEntry represents a line in /proc/<pid>/cgroup, and is used to
// format a cgroup for display.
type TaskCgroupEntry struct {
HierarchyID uint32 `json:"hierarchy_id"`
Controllers string `json:"controllers,omitempty"`
Path string `json:"path,omitempty"`
}
// GetCgroupEntries generates the contents of /proc/<pid>/cgroup as
// a TaskCgroupEntry array.
func (t *Task) GetCgroupEntries() []TaskCgroupEntry {
t.mu.Lock()
defer t.mu.Unlock()
cgEntries := make([]TaskCgroupEntry, 0, len(t.cgroups))
for c := range t.cgroups {
ctls := c.Controllers()
ctlNames := make([]string, 0, len(ctls))
// We're guaranteed to have a valid name, a non-empty controller list,
// or both.
// Explicit hierarchy name, if any.
if name := c.Name(); name != "" {
ctlNames = append(ctlNames, fmt.Sprintf("name=%s", name))
}
// Controllers attached to this hierarchy, if any.
for _, ctl := range ctls {
ctlNames = append(ctlNames, string(ctl.Type()))
}
cgEntries = append(cgEntries, TaskCgroupEntry{
HierarchyID: c.HierarchyID(),
Controllers: strings.Join(ctlNames, ","),
Path: c.Path(),
})
}
sort.Slice(cgEntries, func(i, j int) bool { return cgEntries[i].HierarchyID > cgEntries[j].HierarchyID })
return cgEntries
}
// GenerateProcTaskCgroup writes the contents of /proc/<pid>/cgroup for t to buf.
func (t *Task) GenerateProcTaskCgroup(buf *bytes.Buffer) {
cgEntries := t.GetCgroupEntries()
for _, cgE := range cgEntries {
fmt.Fprintf(buf, "%d:%s:%s\n", cgE.HierarchyID, cgE.Controllers, cgE.Path)
}
}
// +checklocks:t.mu
func (t *Task) chargeLocked(target *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
// Due to the uniqueness of controllers on hierarchies, at most one cgroup
// in t.cgroups will match.
for c := range t.cgroups {
err := c.Charge(target, c.Dentry, ctl, res, value)
if err == nil {
c.IncRef()
}
return err == nil, c, err
}
return false, Cgroup{}, nil
}
// ChargeFor charges t's cgroup on behalf of some other task. Returns
// the cgroup that's charged if any. Returned cgroup has an extra ref
// that's transferred to the caller.
func (t *Task) ChargeFor(other *Task, ctl CgroupControllerType, res CgroupResourceType, value int64) (bool, Cgroup, error) {
t.mu.Lock()
defer t.mu.Unlock()
return t.chargeLocked(other, ctl, res, value)
}