2019-04-29 14:25:05 -07:00
// Copyright 2018 The gVisor Authors.
2018-04-27 10:37:02 -07:00
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
2018-11-28 14:00:54 -08:00
"context"
2018-04-27 10:37:02 -07:00
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
2018-07-25 09:10:32 -07:00
"path/filepath"
2018-04-27 10:37:02 -07:00
"strconv"
"strings"
"syscall"
"time"
"github.com/google/subcommands"
specs "github.com/opencontainers/runtime-spec/specs-go"
2019-06-13 16:49:09 -07:00
"gvisor.dev/gvisor/pkg/log"
"gvisor.dev/gvisor/pkg/sentry/control"
"gvisor.dev/gvisor/pkg/sentry/kernel/auth"
"gvisor.dev/gvisor/pkg/urpc"
"gvisor.dev/gvisor/runsc/boot"
"gvisor.dev/gvisor/runsc/console"
"gvisor.dev/gvisor/runsc/container"
2020-02-10 13:56:17 -08:00
"gvisor.dev/gvisor/runsc/flag"
2019-06-13 16:49:09 -07:00
"gvisor.dev/gvisor/runsc/specutils"
2018-04-27 10:37:02 -07:00
)
// Exec implements subcommands.Command for the "exec" command.
type Exec struct {
cwd string
env stringSlice
// user contains the UID and GID with which to run the new process.
2018-09-12 15:22:24 -07:00
user user
extraKGIDs stringSlice
caps stringSlice
detach bool
processPath string
pidFile string
internalPidFile string
2018-08-24 17:42:30 -07:00
// consoleSocket is the path to an AF_UNIX socket which will receive a
// file descriptor referencing the master end of the console's
// pseudoterminal.
consoleSocket string
2018-04-27 10:37:02 -07:00
}
// Name implements subcommands.Command.Name.
func ( * Exec ) Name ( ) string {
return "exec"
}
// Synopsis implements subcommands.Command.Synopsis.
func ( * Exec ) Synopsis ( ) string {
return "execute new process inside the container"
}
// Usage implements subcommands.Command.Usage.
func ( * Exec ) Usage ( ) string {
return ` exec [command options] <container-id> <command> [command options] || --process process.json <container-id>
Where "<container-id>" is the name for the instance of the container and
"<command>" is the command to be executed in the container.
"<command>" can't be empty unless a "-process" flag provided.
EXAMPLE:
If the container is configured to run /bin/ps the following will
output a list of processes running in the container:
# runc exec <container-id> ps
OPTIONS:
`
}
// SetFlags implements subcommands.Command.SetFlags.
func ( ex * Exec ) SetFlags ( f * flag . FlagSet ) {
f . StringVar ( & ex . cwd , "cwd" , "" , "current working directory" )
f . Var ( & ex . env , "env" , "set environment variables (e.g. '-env PATH=/bin -env TERM=xterm')" )
f . Var ( & ex . user , "user" , "UID (format: <uid>[:<gid>])" )
f . Var ( & ex . extraKGIDs , "additional-gids" , "additional gids" )
f . Var ( & ex . caps , "cap" , "add a capability to the bounding set for the process" )
f . BoolVar ( & ex . detach , "detach" , false , "detach from the container's process" )
f . StringVar ( & ex . processPath , "process" , "" , "path to the process.json" )
2018-05-15 10:17:19 -07:00
f . StringVar ( & ex . pidFile , "pid-file" , "" , "filename that the container pid will be written to" )
2018-09-12 15:22:24 -07:00
f . StringVar ( & ex . internalPidFile , "internal-pid-file" , "" , "filename that the container-internal pid will be written to" )
2018-08-24 17:42:30 -07:00
f . StringVar ( & ex . consoleSocket , "console-socket" , "" , "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal" )
2018-04-27 10:37:02 -07:00
}
// Execute implements subcommands.Command.Execute. It starts a process in an
2018-05-15 10:17:19 -07:00
// already created container.
2018-04-27 10:37:02 -07:00
func ( ex * Exec ) Execute ( _ context . Context , f * flag . FlagSet , args ... interface { } ) subcommands . ExitStatus {
2019-10-01 11:48:24 -07:00
conf := args [ 0 ] . ( * boot . Config )
e , id , err := ex . parseArgs ( f , conf . EnableRaw )
2018-04-27 10:37:02 -07:00
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "parsing process spec: %v" , err )
2018-04-27 10:37:02 -07:00
}
waitStatus := args [ 1 ] . ( * syscall . WaitStatus )
2018-05-15 10:17:19 -07:00
c , err := container . Load ( conf . RootDir , id )
2018-04-27 10:37:02 -07:00
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "loading sandbox: %v" , err )
2018-04-27 10:37:02 -07:00
}
2019-10-01 11:48:24 -07:00
log . Debugf ( "Exec arguments: %+v" , e )
log . Debugf ( "Exec capablities: %+v" , e . Capabilities )
2018-09-05 18:31:37 -07:00
// Replace empty settings with defaults from container.
2018-04-27 10:37:02 -07:00
if e . WorkingDirectory == "" {
2018-05-15 10:17:19 -07:00
e . WorkingDirectory = c . Spec . Process . Cwd
2018-04-27 10:37:02 -07:00
}
if e . Envv == nil {
2018-05-15 10:17:19 -07:00
e . Envv , err = resolveEnvs ( c . Spec . Process . Env , ex . env )
2018-04-27 10:37:02 -07:00
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "getting environment variables: %v" , err )
2018-04-27 10:37:02 -07:00
}
}
2019-09-23 17:04:45 -07:00
2018-09-05 18:31:37 -07:00
if e . Capabilities == nil {
2019-10-01 11:48:24 -07:00
e . Capabilities , err = specutils . Capabilities ( conf . EnableRaw , c . Spec . Process . Capabilities )
2018-09-05 18:31:37 -07:00
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "creating capabilities: %v" , err )
2018-09-05 18:31:37 -07:00
}
2019-10-01 11:48:24 -07:00
log . Infof ( "Using exec capabilities from container: %+v" , e . Capabilities )
2018-09-05 18:31:37 -07:00
}
2018-04-27 10:37:02 -07:00
// containerd expects an actual process to represent the container being
// executed. If detach was specified, starts a child in non-detach mode,
// write the child's PID to the pid file. So when the container returns, the
// child process will also return and signal containerd.
2018-05-02 17:39:12 -07:00
if ex . detach {
2019-06-06 10:48:19 -07:00
return ex . execChildAndWait ( waitStatus )
2018-04-27 10:37:02 -07:00
}
2019-06-06 10:48:19 -07:00
return ex . exec ( c , e , waitStatus )
}
2018-04-27 10:37:02 -07:00
2019-06-06 10:48:19 -07:00
func ( ex * Exec ) exec ( c * container . Container , e * control . ExecArgs , waitStatus * syscall . WaitStatus ) subcommands . ExitStatus {
2018-09-12 15:22:24 -07:00
// Start the new process and get it pid.
pid , err := c . Execute ( e )
2018-04-27 10:37:02 -07:00
if err != nil {
2019-06-06 10:48:19 -07:00
return Errorf ( "executing processes for container: %v" , err )
2018-04-27 10:37:02 -07:00
}
2018-09-12 15:22:24 -07:00
2018-10-01 22:05:41 -07:00
if e . StdioIsPty {
// Forward signals sent to this process to the foreground
// process in the sandbox.
stopForwarding := c . ForwardSignals ( pid , true /* fgProcess */ )
defer stopForwarding ( )
}
2018-09-12 15:22:24 -07:00
// Write the sandbox-internal pid if required.
if ex . internalPidFile != "" {
pidStr := [ ] byte ( strconv . Itoa ( int ( pid ) ) )
if err := ioutil . WriteFile ( ex . internalPidFile , pidStr , 0644 ) ; err != nil {
2019-06-06 10:48:19 -07:00
return Errorf ( "writing internal pid file %q: %v" , ex . internalPidFile , err )
2018-09-12 15:22:24 -07:00
}
}
2019-06-06 10:48:19 -07:00
// Generate the pid file after the internal pid file is generated, so that
// users can safely assume that the internal pid file is ready after
// `runsc exec -d` returns.
2018-11-12 11:11:47 -08:00
if ex . pidFile != "" {
if err := ioutil . WriteFile ( ex . pidFile , [ ] byte ( strconv . Itoa ( os . Getpid ( ) ) ) , 0644 ) ; err != nil {
2019-06-06 10:48:19 -07:00
return Errorf ( "writing pid file: %v" , err )
2018-11-12 11:11:47 -08:00
}
}
2018-09-12 15:22:24 -07:00
// Wait for the process to exit.
2019-06-03 18:14:52 -07:00
ws , err := c . WaitPID ( pid )
2018-09-12 15:22:24 -07:00
if err != nil {
2019-06-06 10:48:19 -07:00
return Errorf ( "waiting on pid %d: %v" , pid , err )
2018-09-12 15:22:24 -07:00
}
2018-04-27 10:37:02 -07:00
* waitStatus = ws
return subcommands . ExitSuccess
}
2019-06-06 10:48:19 -07:00
func ( ex * Exec ) execChildAndWait ( waitStatus * syscall . WaitStatus ) subcommands . ExitStatus {
2018-05-03 21:08:38 -07:00
var args [ ] string
2019-06-03 18:14:52 -07:00
for _ , a := range os . Args [ 1 : ] {
if ! strings . Contains ( a , "detach" ) {
args = append ( args , a )
}
}
2018-07-25 09:10:32 -07:00
2019-06-06 10:48:19 -07:00
// The command needs to write a pid file so that execChildAndWait can tell
2018-07-25 09:10:32 -07:00
// when it has started. If no pid-file was provided, we should use a
// filename in a temp directory.
pidFile := ex . pidFile
if pidFile == "" {
tmpDir , err := ioutil . TempDir ( "" , "exec-pid-" )
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "creating TempDir: %v" , err )
2018-07-25 09:10:32 -07:00
}
defer os . RemoveAll ( tmpDir )
pidFile = filepath . Join ( tmpDir , "pid" )
args = append ( args , "--pid-file=" + pidFile )
}
2019-06-03 18:14:52 -07:00
cmd := exec . Command ( specutils . ExePath , args ... )
2019-01-22 16:45:45 -08:00
cmd . Args [ 0 ] = "runsc-exec"
2018-08-24 17:42:30 -07:00
// Exec stdio defaults to current process stdio.
2018-05-03 21:08:38 -07:00
cmd . Stdin = os . Stdin
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
2018-08-24 17:42:30 -07:00
// If the console control socket file is provided, then create a new
2018-10-01 22:05:41 -07:00
// pty master/slave pair and set the TTY on the sandbox process.
2018-08-24 17:42:30 -07:00
if ex . consoleSocket != "" {
2019-06-03 18:14:52 -07:00
// Create a new TTY pair and send the master on the provided socket.
2018-08-24 17:42:30 -07:00
tty , err := console . NewWithSocket ( ex . consoleSocket )
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "setting up console with socket %q: %v" , ex . consoleSocket , err )
2018-08-24 17:42:30 -07:00
}
defer tty . Close ( )
2018-10-01 22:05:41 -07:00
// Set stdio to the new TTY slave.
2018-08-24 17:42:30 -07:00
cmd . Stdin = tty
cmd . Stdout = tty
cmd . Stderr = tty
cmd . SysProcAttr = & syscall . SysProcAttr {
Setsid : true ,
Setctty : true ,
2019-06-26 11:26:18 -07:00
// The Ctty FD must be the FD in the child process's FD
// table. Since we set cmd.Stdin/Stdout/Stderr to the
// tty FD, we can use any of 0, 1, or 2 here.
// See https://github.com/golang/go/issues/29458.
Ctty : 0 ,
2018-08-24 17:42:30 -07:00
}
}
2018-05-03 21:08:38 -07:00
if err := cmd . Start ( ) ; err != nil {
Fatalf ( "failure to start child exec process, err: %v" , err )
}
2019-06-03 18:14:52 -07:00
log . Infof ( "Started child (PID: %d) to exec and wait: %s %s" , cmd . Process . Pid , specutils . ExePath , args )
2018-05-03 21:08:38 -07:00
// Wait for PID file to ensure that child process has started. Otherwise,
// '--process' file is deleted as soon as this process returns and the child
// may fail to read it.
ready := func ( ) ( bool , error ) {
2018-09-26 17:40:01 -07:00
pidb , err := ioutil . ReadFile ( pidFile )
2018-05-03 21:08:38 -07:00
if err == nil {
2018-09-26 17:40:01 -07:00
// File appeared, check whether pid is fully written.
pid , err := strconv . Atoi ( string ( pidb ) )
if err != nil {
return false , nil
}
return pid == cmd . Process . Pid , nil
2018-05-03 21:08:38 -07:00
}
if pe , ok := err . ( * os . PathError ) ; ! ok || pe . Err != syscall . ENOENT {
return false , err
}
// No file yet, continue to wait...
return false , nil
}
if err := specutils . WaitForReady ( cmd . Process . Pid , 10 * time . Second , ready ) ; err != nil {
2019-06-06 10:48:19 -07:00
// Don't log fatal error here, otherwise it will override the error logged
// by the child process that has failed to start.
log . Warningf ( "Unexpected error waiting for PID file, err: %v" , err )
return subcommands . ExitFailure
2018-05-03 21:08:38 -07:00
}
* waitStatus = 0
return subcommands . ExitSuccess
}
2018-04-27 10:37:02 -07:00
// parseArgs parses exec information from the command line or a JSON file
// depending on whether the --process flag was used. Returns an ExecArgs and
2018-05-15 10:17:19 -07:00
// the ID of the container to be used.
2019-10-01 11:48:24 -07:00
func ( ex * Exec ) parseArgs ( f * flag . FlagSet , enableRaw bool ) ( * control . ExecArgs , string , error ) {
2018-04-27 10:37:02 -07:00
if ex . processPath == "" {
// Requires at least a container ID and command.
if f . NArg ( ) < 2 {
f . Usage ( )
return nil , "" , fmt . Errorf ( "both a container-id and command are required" )
}
2019-10-01 11:48:24 -07:00
e , err := ex . argsFromCLI ( f . Args ( ) [ 1 : ] , enableRaw )
2018-04-27 10:37:02 -07:00
return e , f . Arg ( 0 ) , err
}
// Requires only the container ID.
if f . NArg ( ) != 1 {
f . Usage ( )
return nil , "" , fmt . Errorf ( "a container-id is required" )
}
2019-10-01 11:48:24 -07:00
e , err := ex . argsFromProcessFile ( enableRaw )
2018-04-27 10:37:02 -07:00
return e , f . Arg ( 0 ) , err
}
2019-10-01 11:48:24 -07:00
func ( ex * Exec ) argsFromCLI ( argv [ ] string , enableRaw bool ) ( * control . ExecArgs , error ) {
2018-04-27 10:37:02 -07:00
extraKGIDs := make ( [ ] auth . KGID , 0 , len ( ex . extraKGIDs ) )
for _ , s := range ex . extraKGIDs {
kgid , err := strconv . Atoi ( s )
if err != nil {
2019-01-18 17:35:09 -08:00
Fatalf ( "parsing GID: %s, %v" , s , err )
2018-04-27 10:37:02 -07:00
}
extraKGIDs = append ( extraKGIDs , auth . KGID ( kgid ) )
}
2018-09-05 18:31:37 -07:00
var caps * auth . TaskCapabilities
if len ( ex . caps ) > 0 {
var err error
2019-10-01 11:48:24 -07:00
caps , err = capabilities ( ex . caps , enableRaw )
2018-09-05 18:31:37 -07:00
if err != nil {
return nil , fmt . Errorf ( "capabilities error: %v" , err )
}
2018-04-27 10:37:02 -07:00
}
return & control . ExecArgs {
Argv : argv ,
WorkingDirectory : ex . cwd ,
KUID : ex . user . kuid ,
KGID : ex . user . kgid ,
ExtraKGIDs : extraKGIDs ,
Capabilities : caps ,
2018-08-24 17:42:30 -07:00
StdioIsPty : ex . consoleSocket != "" ,
FilePayload : urpc . FilePayload { [ ] * os . File { os . Stdin , os . Stdout , os . Stderr } } ,
2018-04-27 10:37:02 -07:00
} , nil
}
2019-10-01 11:48:24 -07:00
func ( ex * Exec ) argsFromProcessFile ( enableRaw bool ) ( * control . ExecArgs , error ) {
2018-04-27 10:37:02 -07:00
f , err := os . Open ( ex . processPath )
if err != nil {
return nil , fmt . Errorf ( "error opening process file: %s, %v" , ex . processPath , err )
}
defer f . Close ( )
var p specs . Process
if err := json . NewDecoder ( f ) . Decode ( & p ) ; err != nil {
return nil , fmt . Errorf ( "error parsing process file: %s, %v" , ex . processPath , err )
}
2019-10-01 11:48:24 -07:00
return argsFromProcess ( & p , enableRaw )
2018-04-27 10:37:02 -07:00
}
// argsFromProcess performs all the non-IO conversion from the Process struct
// to ExecArgs.
2019-10-01 11:48:24 -07:00
func argsFromProcess ( p * specs . Process , enableRaw bool ) ( * control . ExecArgs , error ) {
2018-04-27 10:37:02 -07:00
// Create capabilities.
2018-09-05 18:31:37 -07:00
var caps * auth . TaskCapabilities
if p . Capabilities != nil {
var err error
2019-10-01 11:48:24 -07:00
// Starting from Docker 19, capabilities are explicitly set for exec (instead
// of nil like before). So we can't distinguish 'exec' from
// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
// CAP_NET_RAW in the same way as container start.
caps , err = specutils . Capabilities ( enableRaw , p . Capabilities )
2018-09-05 18:31:37 -07:00
if err != nil {
return nil , fmt . Errorf ( "error creating capabilities: %v" , err )
}
2018-04-27 10:37:02 -07:00
}
// Convert the spec's additional GIDs to KGIDs.
extraKGIDs := make ( [ ] auth . KGID , 0 , len ( p . User . AdditionalGids ) )
for _ , GID := range p . User . AdditionalGids {
extraKGIDs = append ( extraKGIDs , auth . KGID ( GID ) )
}
return & control . ExecArgs {
Argv : p . Args ,
Envv : p . Env ,
WorkingDirectory : p . Cwd ,
KUID : auth . KUID ( p . User . UID ) ,
KGID : auth . KGID ( p . User . GID ) ,
ExtraKGIDs : extraKGIDs ,
Capabilities : caps ,
2018-08-24 17:42:30 -07:00
StdioIsPty : p . Terminal ,
FilePayload : urpc . FilePayload { Files : [ ] * os . File { os . Stdin , os . Stdout , os . Stderr } } ,
2018-04-27 10:37:02 -07:00
} , nil
}
// resolveEnvs transforms lists of environment variables into a single list of
// environment variables. If a variable is defined multiple times, the last
// value is used.
func resolveEnvs ( envs ... [ ] string ) ( [ ] string , error ) {
// First create a map of variable names to values. This removes any
// duplicates.
envMap := make ( map [ string ] string )
for _ , env := range envs {
for _ , str := range env {
parts := strings . SplitN ( str , "=" , 2 )
if len ( parts ) != 2 {
return nil , fmt . Errorf ( "invalid variable: %s" , str )
}
envMap [ parts [ 0 ] ] = parts [ 1 ]
}
}
// Reassemble envMap into a list of environment variables of the form
// NAME=VALUE.
env := make ( [ ] string , 0 , len ( envMap ) )
for k , v := range envMap {
env = append ( env , fmt . Sprintf ( "%s=%s" , k , v ) )
}
return env , nil
}
// capabilities takes a list of capabilities as strings and returns an
// auth.TaskCapabilities struct with those capabilities in every capability set.
// This mimics runc's behavior.
2019-10-01 11:48:24 -07:00
func capabilities ( cs [ ] string , enableRaw bool ) ( * auth . TaskCapabilities , error ) {
2018-04-27 10:37:02 -07:00
var specCaps specs . LinuxCapabilities
for _ , cap := range cs {
specCaps . Ambient = append ( specCaps . Ambient , cap )
specCaps . Bounding = append ( specCaps . Bounding , cap )
specCaps . Effective = append ( specCaps . Effective , cap )
specCaps . Inheritable = append ( specCaps . Inheritable , cap )
specCaps . Permitted = append ( specCaps . Permitted , cap )
}
2019-10-01 11:48:24 -07:00
// Starting from Docker 19, capabilities are explicitly set for exec (instead
// of nil like before). So we can't distinguish 'exec' from
// 'exec --privileged', as both specify CAP_NET_RAW. Therefore, filter
// CAP_NET_RAW in the same way as container start.
return specutils . Capabilities ( enableRaw , & specCaps )
2018-04-27 10:37:02 -07:00
}
// stringSlice allows a flag to be used multiple times, where each occurrence
// adds a value to the flag. For example, a flag called "x" could be invoked
// via "runsc exec -x foo -x bar", and the corresponding stringSlice would be
// {"x", "y"}.
type stringSlice [ ] string
// String implements flag.Value.String.
func ( ss * stringSlice ) String ( ) string {
return fmt . Sprintf ( "%v" , * ss )
}
// Get implements flag.Value.Get.
func ( ss * stringSlice ) Get ( ) interface { } {
return ss
}
// Set implements flag.Value.Set.
func ( ss * stringSlice ) Set ( s string ) error {
* ss = append ( * ss , s )
return nil
}
// user allows -user to convey a UID and, optionally, a GID separated by a
// colon.
type user struct {
kuid auth . KUID
kgid auth . KGID
}
func ( u * user ) String ( ) string {
return fmt . Sprintf ( "%+v" , * u )
}
func ( u * user ) Get ( ) interface { } {
return u
}
func ( u * user ) Set ( s string ) error {
parts := strings . SplitN ( s , ":" , 2 )
kuid , err := strconv . Atoi ( parts [ 0 ] )
if err != nil {
return fmt . Errorf ( "couldn't parse UID: %s" , parts [ 0 ] )
}
u . kuid = auth . KUID ( kuid )
if len ( parts ) > 1 {
kgid , err := strconv . Atoi ( parts [ 1 ] )
if err != nil {
return fmt . Errorf ( "couldn't parse GID: %s" , parts [ 1 ] )
}
u . kgid = auth . KGID ( kgid )
}
return nil
}