Enable leak checkers for runsc tests

Updates #4572

PiperOrigin-RevId: 597307765
This commit is contained in:
Andrei Vagin
2024-01-10 11:27:35 -08:00
committed by gVisor bot
parent 94e83c674e
commit 5b33e4a3d8
10 changed files with 120 additions and 15 deletions
+1 -1
View File
@@ -66,7 +66,7 @@ type fdTracker struct {
// DonateFD implements Communicator.DonateFD.
func (d *fdTracker) DonateFD(fd int) {
// Try to make the FD non-blocking.
if err := unix.SetNonblock(fd, true); err != nil {
if err := unix.SetNonblock(fd, true); err != nil && err != unix.EBADF {
// This may fail if fd was opened with O_PATH, because fcntl(F_SETFL) fails
// with EBADF on O_PATH FDs.
log.Warningf("DonateFD: unix.SetNonblock() failed on FD %d: %v", fd, err)
+5 -3
View File
@@ -98,7 +98,7 @@ func (r *RegistryImpl) Get(ctx context.Context, name string, access mq.AccessTyp
return nil, false, linuxerr.EACCES
}
fd, err := r.newFD(qInode.queue, qInode, access, block, flags)
fd, err := r.newFD(ctx, qInode.queue, qInode, access, block, flags)
if err != nil {
return nil, false, err
}
@@ -113,7 +113,7 @@ func (r *RegistryImpl) New(ctx context.Context, name string, q *mq.Queue, access
if err != nil {
return nil, err
}
return r.newFD(q, qInode, access, block, flags)
return r.newFD(ctx, q, qInode, access, block, flags)
}
// Unlink implements mq.RegistryImpl.Unlink.
@@ -128,6 +128,7 @@ func (r *RegistryImpl) Unlink(ctx context.Context, name string) error {
if err != nil {
return err
}
defer inode.DecRef(ctx)
return root.Unlink(ctx, name, inode)
}
@@ -138,7 +139,7 @@ func (r *RegistryImpl) Destroy(ctx context.Context) {
}
// newFD returns a new file description created using the given queue and inode.
func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, access mq.AccessType, block bool, flags uint32) (*vfs.FileDescription, error) {
func (r *RegistryImpl) newFD(ctx context.Context, q *mq.Queue, inode *queueInode, access mq.AccessType, block bool, flags uint32) (*vfs.FileDescription, error) {
view, err := mq.NewView(q, access, block)
if err != nil {
return nil, err
@@ -146,6 +147,7 @@ func (r *RegistryImpl) newFD(q *mq.Queue, inode *queueInode, access mq.AccessTyp
var dentry kernfs.Dentry
dentry.Init(&r.fs.Filesystem, inode)
defer dentry.DecRef(ctx)
fd := &queueFD{queue: view}
err = fd.Init(r.mount, &dentry, inode.queue, inode.Locks(), flags)
+1
View File
@@ -404,6 +404,7 @@ func (r *CgroupRegistry) FindCgroup(ctx context.Context, ctype CgroupControllerT
if vfsfs == nil {
return Cgroup{}, fmt.Errorf("controller not active")
}
defer vfsfs.DecRef(ctx)
rootCG := vfsfs.Impl().(cgroupFS).RootCgroup()
+4 -2
View File
@@ -1027,6 +1027,8 @@ func (k *Kernel) CreateProcess(args CreateProcessArgs) (*ThreadGroup, ThreadID,
// A task with no parent starts out with no session keyring.
SessionKeyring: nil,
}
config.UTSNamespace.IncRef()
config.IPCNamespace.IncRef()
config.NetworkNamespace.IncRef()
t, err := k.tasks.NewTask(ctx, config)
if err != nil {
@@ -1381,13 +1383,11 @@ func (k *Kernel) RootUserNamespace() *auth.UserNamespace {
// RootUTSNamespace returns the root UTSNamespace.
func (k *Kernel) RootUTSNamespace() *UTSNamespace {
k.rootUTSNamespace.IncRef()
return k.rootUTSNamespace
}
// RootIPCNamespace takes a reference and returns the root IPCNamespace.
func (k *Kernel) RootIPCNamespace() *IPCNamespace {
k.rootIPCNamespace.IncRef()
return k.rootIPCNamespace
}
@@ -1750,6 +1750,8 @@ func (k *Kernel) Release() {
k.timekeeper.Destroy()
k.vdso.Release(ctx)
k.RootNetworkNamespace().DecRef(ctx)
k.rootIPCNamespace.DecRef(ctx)
k.rootUTSNamespace.DecRef(ctx)
k.cleaupDevGofers()
}
+1
View File
@@ -170,6 +170,7 @@ func (r *Registry) FindOrCreate(ctx context.Context, opts OpenOpts, mode linux.F
if opts.Create && opts.Exclusive {
// "Both O_CREAT and O_EXCL were specified in oflag, but a queue
// with this name already exists."
fd.DecRef(ctx)
return nil, linuxerr.EEXIST
}
return fd, nil
+1
View File
@@ -57,6 +57,7 @@ func MqOpen(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr,
if err != nil {
return 0, nil, err
}
defer queue.DecRef(t)
fd, err := t.NewFDFrom(0, queue, kernel.FDFlags{
CloseOnExec: flag&linux.O_CLOEXEC != 0,
+17 -4
View File
@@ -618,6 +618,16 @@ func (l *Loader) Destroy() {
}
l.watchdog.Stop()
ctx := l.k.SupervisorContext()
for _, m := range l.sharedMounts {
m.DecRef(ctx)
}
for _, m := range l.cgroupMounts {
m.mount.DecRef(ctx)
m.root.DecRef(ctx)
m.fs.DecRef(ctx)
}
// Stop the control server. This will indirectly stop any
// long-running control operations that are in flight, e.g.
// profiling operations.
@@ -650,6 +660,8 @@ func (l *Loader) Destroy() {
}
l.stopProfiling()
// Check all references.
refs.OnExit()
}
func createPlatform(conf *config.Config, deviceFile *os.File) (platform.Platform, error) {
@@ -1007,6 +1019,11 @@ func (l *Loader) createContainerProcess(info *containerInfo) (*kernel.ThreadGrou
if err := setupContainerVFS(ctx, info, mntr, &info.procArgs); err != nil {
return nil, nil, err
}
defer func() {
for cg := range info.procArgs.InitialCgroups {
cg.Dentry.DecRef(ctx)
}
}()
// Add the HOME environment variable if it is not already set.
info.procArgs.Envv, err = user.MaybeAddExecUserHome(ctx, info.procArgs.MountNamespace,
@@ -1229,7 +1246,6 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
// sandbox is killed by a signal after the ContMgrWait request is completed.
if l.root.procArgs.ContainerID == cid {
// All sentry-created resources should have been released at this point.
refs.DoLeakCheck()
_ = coverage.Report()
}
return nil
@@ -1289,9 +1305,6 @@ func (l *Loader) WaitExit() linux.WaitStatus {
// Wait for container.
l.k.WaitExited()
// Check all references.
refs.OnExit()
return l.k.GlobalInit().ExitStatus()
}
+8
View File
@@ -76,6 +76,7 @@ def _syscall_test(
one_sandbox = True,
fusefs = False,
directfs = False,
leak_check = False,
**kwargs):
# Prepend "runsc" to non-native platform names.
full_platform = platform if platform == "native" else "runsc_" + platform
@@ -151,6 +152,7 @@ def _syscall_test(
"--one-sandbox=" + str(one_sandbox),
"--iouring=" + str(iouring),
"--directfs=" + str(directfs),
"--leak-check=" + str(leak_check),
]
# Trace points are platform agnostic, so enable them for ptrace only.
@@ -185,6 +187,7 @@ def syscall_test(
one_sandbox = True,
iouring = False,
allow_native = True,
leak_check = True,
debug = True,
container = None,
tags = None,
@@ -244,6 +247,7 @@ def syscall_test(
debug = debug,
container = container,
one_sandbox = one_sandbox,
leak_check = leak_check,
**kwargs
)
@@ -261,6 +265,7 @@ def syscall_test(
container = container,
one_sandbox = one_sandbox,
overlay = True,
leak_check = leak_check,
**kwargs
)
if add_hostinet:
@@ -277,6 +282,7 @@ def syscall_test(
iouring = iouring,
container = container,
one_sandbox = one_sandbox,
leak_check = leak_check,
**kwargs
)
if not use_tmpfs:
@@ -294,6 +300,7 @@ def syscall_test(
container = container,
one_sandbox = one_sandbox,
file_access = "shared",
leak_check = leak_check,
**kwargs
)
if add_fusefs:
@@ -309,5 +316,6 @@ def syscall_test(
debug = debug,
container = container,
one_sandbox = one_sandbox,
leak_check = leak_check,
**kwargs
)
+76 -5
View File
@@ -17,6 +17,7 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
@@ -63,10 +64,8 @@ var (
addHostConnector = flag.Bool("add-host-connector", false, "create goroutines that connect to bound UDS that will be created by sandbox")
addHostFIFO = flag.Bool("add-host-fifo", false, "expose a tree of FIFO to test communication with the host")
ioUring = flag.Bool("iouring", false, "Enables IO_URING API for asynchronous I/O")
// TODO(gvisor.dev/issue/4572): properly support leak checking for runsc, and
// set to true as the default for the test runner.
leakCheck = flag.Bool("leak-check", false, "check for reference leaks")
waitForPid = flag.Duration("delay-for-debugger", 0, "Print out the sandbox PID and wait for the specified duration to start the test. This is useful for attaching a debugger to the runsc-sandbox process.")
leakCheck = flag.Bool("leak-check", false, "check for reference leaks")
waitForPid = flag.Duration("delay-for-debugger", 0, "Print out the sandbox PID and wait for the specified duration to start the test. This is useful for attaching a debugger to the runsc-sandbox process.")
)
const (
@@ -279,6 +278,7 @@ func runRunsc(tc *gtest.TestCase, spec *specs.Spec) error {
}
testLogDir := ""
runscLogDir := ""
if undeclaredOutputsDir, ok := unix.Getenv("TEST_UNDECLARED_OUTPUTS_DIR"); ok {
// Create log directory dedicated for this test.
testLogDir = filepath.Join(undeclaredOutputsDir, strings.Replace(name, "/", "_", -1))
@@ -290,8 +290,9 @@ func runRunsc(tc *gtest.TestCase, spec *specs.Spec) error {
return fmt.Errorf("could not create temp dir: %v", err)
}
debugLogDir += "/"
runscLogDir = debugLogDir + "/runsc.log"
log.Infof("runsc logs: %s", debugLogDir)
args = append(args, "-debug-log", debugLogDir)
args = append(args, "-debug-log", runscLogDir)
args = append(args, "-coverage-report", debugLogDir)
// Default -log sends messages to stderr which makes reading the test log
@@ -429,6 +430,76 @@ func runRunsc(tc *gtest.TestCase, spec *specs.Spec) error {
}
}
if err == nil && len(testLogDir) > 0 {
var warningsFound []string
f, err := os.Open(runscLogDir)
if err != nil {
return err
}
scanner := bufio.NewScanner(f)
for scanner.Scan() {
// This is trivial match for Google's log file format.
line := scanner.Text()
if len(line) >= 5 && line[:5] == "panic" {
warningsFound = append(warningsFound, strings.TrimSpace(line))
}
if len(line) >= 2 && (line[0] == 'E' || line[0] == 'W') && (line[1] >= '0' && line[1] <= '9') {
// Ignore a basic set of warnings that we've
// determined to be fine. We want these to stay
// as warnings, even if they are constant.
switch {
// Reasonable warnings, allowed during tests.
case strings.Contains(line, "Will try waiting on the sandbox process instead."):
case strings.Contains(line, "lisafs: batch closing FDs"):
case strings.Contains(line, "This is only safe in tests!"):
case strings.Contains(line, "Capability \"checkpoint_restore\" is not permitted, dropping it."):
case strings.Contains(line, "syscall filters less restrictive!"):
case strings.Contains(line, "Getdent64: skipping file"):
// Capability "perfmon" is not permitted, dropping it.
case strings.Contains(line, "is not permitted, dropping it."):
case strings.Contains(line, "sndPrepopulatedMsg failed"):
case strings.Contains(line, "PR_SET_NO_NEW_PRIVS is assumed to always be set."):
case strings.Contains(line, "TSC snapshot unavailable"):
case strings.Contains(line, "copy up failed to copy up contents"):
case strings.Contains(line, "populate failed for"):
case strings.Contains(line, "ASAN is enabled: syscall filters less restrictive"):
case strings.Contains(line, "MSAN is enabled: syscall filters less restrictive"):
case strings.Contains(line, "TSAN is enabled: syscall filters less restrictive"):
case strings.Contains(line, "Optional feature EnablePCID not supported"):
case strings.Contains(line, "Optional feature EnableSMEP not supported"):
case strings.Contains(line, "Optional feature EnableVPID not supported"):
case strings.Contains(line, "Optional feature GMPWithVPID not supported"):
case strings.Contains(line, "Optional feature ValidateGMPPF not supported"):
case strings.Contains(line, "Pass-through networking enabled"):
// Expected in some tests that create files as 0755,
// ex. /gvisor/test/syscalls/linux/exec.cc
case strings.Contains(line, "Opened a writable executable"):
// Expected in some tests, eg. /gvisor/test/syscalls/linux/sysret.cc
case strings.Contains(line, "invalid rip for 64 bit mode"):
// Ignore clock frequency adjustment messages.
case strings.Contains(line, "adjusted frequency from"):
// FIXME(b/70990997): URPC error: possible race?
case strings.Contains(line, "urpc: error decoding: bad file descriptor"):
// FIXME(b/147228315): GVISOR_PREEMPTION_INTERRUPT not yet supported on AMD.
case strings.Contains(line, "Optional feature PreemptionInterrupt not supported"):
// Ignore denied dirty timestamp writebacks. It occurs because,
// in tests, gofer doesn't have permission to change atime.
case strings.Contains(line, "gofer.dentry.destroyLocked: failed to close file with write dirty timestamps: operation not permitted"):
case strings.Contains(line, "Tsetattrclunk failed, losing FID"):
// gsys_get_timekeeping_params hasn't been implemented for ARM.
case strings.Contains(line, "Error retrieving TSC snapshot, unable to save TSC: function not implemented"):
default:
warningsFound = append(warningsFound, strings.TrimSpace(line))
}
}
}
if len(warningsFound) > 0 {
return fmt.Errorf("%s", warningsFound)
}
// If the test passed, then we erase the log directory. This speeds up
// uploading logs in continuous integration & saves on disk space.
os.RemoveAll(testLogDir)
+6
View File
@@ -122,6 +122,8 @@ syscall_test(
syscall_test(
add_host_connector = True,
add_hostinet = True,
# TODO(b/318948806): lisafs.BoundSocketFD is leaked.
leak_check = False,
one_sandbox = False,
test = "//test/syscalls/linux:bind_external_test",
# Shared mode tests replace /tmp which hides the files created for
@@ -132,6 +134,8 @@ syscall_test(
syscall_test(
add_host_uds = True,
add_hostinet = True,
# TODO(b/318948806): lisafs.BoundSocketFD is leaked.
leak_check = False,
one_sandbox = False,
test = "//test/syscalls/linux:connect_external_test",
# Shared mode tests replace /tmp which hides the files created for
@@ -1096,6 +1100,8 @@ syscall_test(
)
syscall_test(
# TODO(b/318948806): tun.tunEndpoint is leaked.
leak_check = False,
test = "//test/syscalls/linux:tuntap_test",
)