exec_unix.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build unix
   6  
   7  // Fork, exec, wait, etc.
   8  
   9  package syscall
  10  
  11  import (
  12  	errorspkg "errors"
  13  	"internal/bytealg"
  14  	"runtime"
  15  	"sync"
  16  	"unsafe"
  17  )
  18  
  19  // ForkLock is used to synchronize creation of new file descriptors
  20  // with fork.
  21  //
  22  // We want the child in a fork/exec sequence to inherit only the
  23  // file descriptors we intend. To do that, we mark all file
  24  // descriptors close-on-exec and then, in the child, explicitly
  25  // unmark the ones we want the exec'ed program to keep.
  26  // Unix doesn't make this easy: there is, in general, no way to
  27  // allocate a new file descriptor close-on-exec. Instead you
  28  // have to allocate the descriptor and then mark it close-on-exec.
  29  // If a fork happens between those two events, the child's exec
  30  // will inherit an unwanted file descriptor.
  31  //
  32  // This lock solves that race: the create new fd/mark close-on-exec
  33  // operation is done holding ForkLock for reading, and the fork itself
  34  // is done holding ForkLock for writing. At least, that's the idea.
  35  // There are some complications.
  36  //
  37  // Some system calls that create new file descriptors can block
  38  // for arbitrarily long times: open on a hung NFS server or named
  39  // pipe, accept on a socket, and so on. We can't reasonably grab
  40  // the lock across those operations.
  41  //
  42  // It is worse to inherit some file descriptors than others.
  43  // If a non-malicious child accidentally inherits an open ordinary file,
  44  // that's not a big deal. On the other hand, if a long-lived child
  45  // accidentally inherits the write end of a pipe, then the reader
  46  // of that pipe will not see EOF until that child exits, potentially
  47  // causing the parent program to hang. This is a common problem
  48  // in threaded C programs that use popen.
  49  //
  50  // Luckily, the file descriptors that are most important not to
  51  // inherit are not the ones that can take an arbitrarily long time
  52  // to create: pipe returns instantly, and the net package uses
  53  // non-blocking I/O to accept on a listening socket.
  54  // The rules for which file descriptor-creating operations use the
  55  // ForkLock are as follows:
  56  //
  57  //   - [Pipe]. Use pipe2 if available. Otherwise, does not block,
  58  //     so use ForkLock.
  59  //   - [Socket]. Use SOCK_CLOEXEC if available. Otherwise, does not
  60  //     block, so use ForkLock.
  61  //   - [Open]. Use [O_CLOEXEC] if available. Otherwise, may block,
  62  //     so live with the race.
  63  //   - [Dup]. Use [F_DUPFD_CLOEXEC] or dup3 if available. Otherwise,
  64  //     does not block, so use ForkLock.
  65  var ForkLock sync.RWMutex
  66  
  67  // StringSlicePtr converts a slice of strings to a slice of pointers
  68  // to NUL-terminated byte arrays. If any string contains a NUL byte
  69  // this function panics instead of returning an error.
  70  //
  71  // Deprecated: Use [SlicePtrFromStrings] instead.
  72  func StringSlicePtr(ss []string) []*byte {
  73  	bb := make([]*byte, len(ss)+1)
  74  	for i := 0; i < len(ss); i++ {
  75  		bb[i] = StringBytePtr(ss[i])
  76  	}
  77  	bb[len(ss)] = nil
  78  	return bb
  79  }
  80  
  81  // SlicePtrFromStrings converts a slice of strings to a slice of
  82  // pointers to NUL-terminated byte arrays. If any string contains
  83  // a NUL byte, it returns (nil, [EINVAL]).
  84  func SlicePtrFromStrings(ss []string) ([]*byte, error) {
  85  	n := 0
  86  	for _, s := range ss {
  87  		if bytealg.IndexByteString(s, 0) != -1 {
  88  			return nil, EINVAL
  89  		}
  90  		n += len(s) + 1 // +1 for NUL
  91  	}
  92  	bb := make([]*byte, len(ss)+1)
  93  	b := make([]byte, n)
  94  	n = 0
  95  	for i, s := range ss {
  96  		bb[i] = &b[n]
  97  		copy(b[n:], s)
  98  		n += len(s) + 1
  99  	}
 100  	return bb, nil
 101  }
 102  
 103  func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) }
 104  
 105  func SetNonblock(fd int, nonblocking bool) (err error) {
 106  	flag, err := fcntl(fd, F_GETFL, 0)
 107  	if err != nil {
 108  		return err
 109  	}
 110  	if (flag&O_NONBLOCK != 0) == nonblocking {
 111  		return nil
 112  	}
 113  	if nonblocking {
 114  		flag |= O_NONBLOCK
 115  	} else {
 116  		flag &^= O_NONBLOCK
 117  	}
 118  	_, err = fcntl(fd, F_SETFL, flag)
 119  	return err
 120  }
 121  
 122  // Credential holds user and group identities to be assumed
 123  // by a child process started by [StartProcess].
 124  type Credential struct {
 125  	Uid         uint32   // User ID.
 126  	Gid         uint32   // Group ID.
 127  	Groups      []uint32 // Supplementary group IDs.
 128  	NoSetGroups bool     // If true, don't set supplementary groups
 129  }
 130  
 131  // ProcAttr holds attributes that will be applied to a new process started
 132  // by [StartProcess].
 133  type ProcAttr struct {
 134  	Dir   string    // Current working directory.
 135  	Env   []string  // Environment.
 136  	Files []uintptr // File descriptors.
 137  	Sys   *SysProcAttr
 138  }
 139  
 140  var zeroProcAttr ProcAttr
 141  var zeroSysProcAttr SysProcAttr
 142  
 143  func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
 144  	var p [2]int
 145  	var n int
 146  	var err1 Errno
 147  	var wstatus WaitStatus
 148  
 149  	if attr == nil {
 150  		attr = &zeroProcAttr
 151  	}
 152  	sys := attr.Sys
 153  	if sys == nil {
 154  		sys = &zeroSysProcAttr
 155  	}
 156  
 157  	// Convert args to C form.
 158  	argv0p, err := BytePtrFromString(argv0)
 159  	if err != nil {
 160  		return 0, err
 161  	}
 162  	argvp, err := SlicePtrFromStrings(argv)
 163  	if err != nil {
 164  		return 0, err
 165  	}
 166  	envvp, err := SlicePtrFromStrings(attr.Env)
 167  	if err != nil {
 168  		return 0, err
 169  	}
 170  
 171  	if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len(argv) > 0 && len(argv[0]) > len(argv0) {
 172  		argvp[0] = argv0p
 173  	}
 174  
 175  	var chroot *byte
 176  	if sys.Chroot != "" {
 177  		chroot, err = BytePtrFromString(sys.Chroot)
 178  		if err != nil {
 179  			return 0, err
 180  		}
 181  	}
 182  	var dir *byte
 183  	if attr.Dir != "" {
 184  		dir, err = BytePtrFromString(attr.Dir)
 185  		if err != nil {
 186  			return 0, err
 187  		}
 188  	}
 189  
 190  	// Both Setctty and Foreground use the Ctty field,
 191  	// but they give it slightly different meanings.
 192  	if sys.Setctty && sys.Foreground {
 193  		return 0, errorspkg.New("both Setctty and Foreground set in SysProcAttr")
 194  	}
 195  	if sys.Setctty && sys.Ctty >= len(attr.Files) {
 196  		return 0, errorspkg.New("Setctty set but Ctty not valid in child")
 197  	}
 198  
 199  	acquireForkLock()
 200  
 201  	// Allocate child status pipe close on exec.
 202  	if err = forkExecPipe(p[:]); err != nil {
 203  		releaseForkLock()
 204  		return 0, err
 205  	}
 206  
 207  	// Kick off child.
 208  	pid, err1 = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, sys, p[1])
 209  	if err1 != 0 {
 210  		Close(p[0])
 211  		Close(p[1])
 212  		releaseForkLock()
 213  		return 0, Errno(err1)
 214  	}
 215  	releaseForkLock()
 216  
 217  	// Read child error status from pipe.
 218  	Close(p[1])
 219  	for {
 220  		n, err = readlen(p[0], (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1)))
 221  		if err != EINTR {
 222  			break
 223  		}
 224  	}
 225  	Close(p[0])
 226  	if err != nil || n != 0 {
 227  		if n == int(unsafe.Sizeof(err1)) {
 228  			err = Errno(err1)
 229  		}
 230  		if err == nil {
 231  			err = EPIPE
 232  		}
 233  
 234  		// Child failed; wait for it to exit, to make sure
 235  		// the zombies don't accumulate.
 236  		_, err1 := Wait4(pid, &wstatus, 0, nil)
 237  		for err1 == EINTR {
 238  			_, err1 = Wait4(pid, &wstatus, 0, nil)
 239  		}
 240  
 241  		// OS-specific cleanup on failure.
 242  		forkAndExecFailureCleanup(attr, sys)
 243  
 244  		return 0, err
 245  	}
 246  
 247  	// Read got EOF, so pipe closed on exec, so exec succeeded.
 248  	return pid, nil
 249  }
 250  
 251  // Combination of fork and exec, careful to be thread safe.
 252  func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
 253  	return forkExec(argv0, argv, attr)
 254  }
 255  
 256  // StartProcess wraps [ForkExec] for package os.
 257  func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
 258  	pid, err = forkExec(argv0, argv, attr)
 259  	return pid, 0, err
 260  }
 261  
 262  // Implemented in runtime package.
 263  func runtime_BeforeExec()
 264  func runtime_AfterExec()
 265  
 266  // execveLibc is non-nil on OS using libc syscall, set to execve in exec_libc.go; this
 267  // avoids a build dependency for other platforms.
 268  var execveLibc func(path uintptr, argv uintptr, envp uintptr) Errno
 269  var execveDarwin func(path *byte, argv **byte, envp **byte) error
 270  var execveOpenBSD func(path *byte, argv **byte, envp **byte) error
 271  
 272  // Exec invokes the execve(2) system call.
 273  func Exec(argv0 string, argv []string, envv []string) (err error) {
 274  	argv0p, err := BytePtrFromString(argv0)
 275  	if err != nil {
 276  		return err
 277  	}
 278  	argvp, err := SlicePtrFromStrings(argv)
 279  	if err != nil {
 280  		return err
 281  	}
 282  	envvp, err := SlicePtrFromStrings(envv)
 283  	if err != nil {
 284  		return err
 285  	}
 286  	runtime_BeforeExec()
 287  
 288  	rlim := origRlimitNofile.Load()
 289  	if rlim != nil {
 290  		Setrlimit(RLIMIT_NOFILE, rlim)
 291  	}
 292  
 293  	var err1 error
 294  	if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" || runtime.GOOS == "aix" {
 295  		// RawSyscall should never be used on Solaris, illumos, or AIX.
 296  		err1 = execveLibc(
 297  			uintptr(unsafe.Pointer(argv0p)),
 298  			uintptr(unsafe.Pointer(&argvp[0])),
 299  			uintptr(unsafe.Pointer(&envvp[0])))
 300  	} else if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
 301  		// Similarly on Darwin.
 302  		err1 = execveDarwin(argv0p, &argvp[0], &envvp[0])
 303  	} else if runtime.GOOS == "openbsd" && runtime.GOARCH != "mips64" {
 304  		// Similarly on OpenBSD.
 305  		err1 = execveOpenBSD(argv0p, &argvp[0], &envvp[0])
 306  	} else {
 307  		_, _, err1 = RawSyscall(SYS_EXECVE,
 308  			uintptr(unsafe.Pointer(argv0p)),
 309  			uintptr(unsafe.Pointer(&argvp[0])),
 310  			uintptr(unsafe.Pointer(&envvp[0])))
 311  	}
 312  	runtime_AfterExec()
 313  	return err1
 314  }
 315