Files
Jing Chen a093ad0450 Simplify and format gVisor codebase.
The changes are just output of `gofmt -s -w .`.
2024-10-13 00:50:32 -07:00

178 lines
5.4 KiB
Go

// Copyright 2020 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package systrap
import (
"fmt"
"golang.org/x/sys/unix"
"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/bpf"
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/seccomp"
"gvisor.dev/gvisor/pkg/sentry/arch"
"gvisor.dev/gvisor/pkg/sentry/memmap"
"gvisor.dev/gvisor/pkg/sentry/platform/systrap/sysmsg"
)
// sysmsgThread describes a sysmsg stub thread which isn't traced
// and communicates with the Sentry via the sysmsg protocol.
//
// This type of thread is used to execute user processes.
type sysmsgThread struct {
// subproc is a link to the subprocess which is used to call native
// system calls.
subproc *subprocess
// thread is a thread identifier.
thread *thread
// msg is a pointer to a shared sysmsg structure in the Sentry address
// space which is used to communicate with the thread.
msg *sysmsg.Msg
// context is the last context that ran on this thread.
context *platformContext
// stackRange is a sysmsg stack in the memory file.
stackRange memmap.FileRange
// fpuStateToMsgOffset is the offset of a thread fpu state relative to sysmsg.
fpuStateToMsgOffset uint64
}
// sysmsgPerThreadMemAddr returns a sysmsg stack address in the thread address
// space.
func (p *sysmsgThread) sysmsgPerThreadMemAddr() uintptr {
return stubSysmsgStack + sysmsg.PerThreadMemSize*uintptr(p.thread.sysmsgStackID)
}
// mapStack maps a sysmsg stack into the thread address space.
func (p *sysmsgThread) mapStack(addr uintptr, readOnly bool) error {
prot := uintptr(unix.PROT_READ)
if !readOnly {
prot |= unix.PROT_WRITE
}
_, err := p.thread.syscallIgnoreInterrupt(&p.thread.initRegs, unix.SYS_MMAP,
arch.SyscallArgument{Value: addr},
arch.SyscallArgument{Value: uintptr(p.stackRange.Length())},
arch.SyscallArgument{Value: prot},
arch.SyscallArgument{Value: unix.MAP_SHARED | unix.MAP_FILE | unix.MAP_FIXED},
arch.SyscallArgument{Value: uintptr(p.subproc.memoryFile.FD())},
arch.SyscallArgument{Value: uintptr(p.stackRange.Start)})
return err
}
// mapPrivateStack maps a private stack into the thread address space.
func (p *sysmsgThread) mapPrivateStack(addr uintptr, size uintptr) error {
prot := uintptr(unix.PROT_READ | unix.PROT_WRITE)
_, err := p.thread.syscallIgnoreInterrupt(&p.thread.initRegs, unix.SYS_MMAP,
arch.SyscallArgument{Value: addr},
arch.SyscallArgument{Value: size},
arch.SyscallArgument{Value: prot},
arch.SyscallArgument{Value: unix.MAP_PRIVATE | unix.MAP_ANONYMOUS | unix.MAP_FIXED},
arch.SyscallArgument{Value: 0},
arch.SyscallArgument{Value: 0})
return err
}
func (p *sysmsgThread) Debugf(format string, v ...any) {
if !log.IsLogging(log.Debug) {
return
}
msg := p.msg
postfix := fmt.Sprintf(": %s", msg)
p.thread.Debugf(format+postfix, v...)
}
func sysmsgSyscallNotifyRules() []bpf.Instruction {
rules := []seccomp.RuleSet{
{
Rules: seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{
unix.SYS_EXIT_GROUP: seccomp.MatchAll{},
}),
Action: linux.SECCOMP_RET_USER_NOTIF,
},
}
instrs, _, err := seccomp.BuildProgram(rules, seccomp.ProgramOptions{
DefaultAction: linux.SECCOMP_RET_ALLOW,
BadArchAction: linux.SECCOMP_RET_ALLOW,
})
if err != nil {
panic(fmt.Sprintf("failed to build rules for sysmsg threads: %v", err))
}
return instrs
}
func sysmsgThreadRules(stubStart uintptr) []bpf.Instruction {
rules := []seccomp.RuleSet{}
rules = appendSysThreadArchSeccompRules(rules)
rules = append(rules, []seccomp.RuleSet{
// Allow instructions from the sysmsg code stub, which is limited by one page.
{
Rules: seccomp.MakeSyscallRules(map[uintptr]seccomp.SyscallRule{
unix.SYS_FUTEX: seccomp.Or{
seccomp.PerArg{
seccomp.GreaterThan(stubStart),
seccomp.EqualTo(linux.FUTEX_WAKE),
seccomp.EqualTo(1),
seccomp.EqualTo(0),
seccomp.EqualTo(0),
seccomp.EqualTo(0),
seccomp.GreaterThan(stubStart), // rip
},
seccomp.PerArg{
seccomp.GreaterThan(stubStart),
seccomp.EqualTo(linux.FUTEX_WAIT),
seccomp.AnyValue{},
seccomp.EqualTo(0),
seccomp.EqualTo(0),
seccomp.EqualTo(0),
seccomp.GreaterThan(stubStart), // rip
},
},
unix.SYS_RT_SIGRETURN: seccomp.PerArg{
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.GreaterThan(stubStart), // rip
},
unix.SYS_SCHED_YIELD: seccomp.PerArg{
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.AnyValue{},
seccomp.GreaterThan(stubStart), // rip
},
}),
Action: linux.SECCOMP_RET_ALLOW,
},
}...)
instrs, _, err := seccomp.BuildProgram(rules, seccomp.ProgramOptions{
DefaultAction: linux.SECCOMP_RET_TRAP,
BadArchAction: linux.SECCOMP_RET_TRAP,
})
if err != nil {
panic(fmt.Sprintf("failed to build rules for sysmsg threads: %v", err))
}
return instrs
}