1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:build unix
6 7 // Fork, exec, wait, etc.
8 9 package syscall
10 11 import (
12 errorspkg "errors"
13 "internal/bytealg"
14 "runtime"
15 "sync"
16 "unsafe"
17 )
18 19 // ForkLock is used to synchronize creation of new file descriptors
20 // with fork.
21 //
22 // We want the child in a fork/exec sequence to inherit only the
23 // file descriptors we intend. To do that, we mark all file
24 // descriptors close-on-exec and then, in the child, explicitly
25 // unmark the ones we want the exec'ed program to keep.
26 // Unix doesn't make this easy: there is, in general, no way to
27 // allocate a new file descriptor close-on-exec. Instead you
28 // have to allocate the descriptor and then mark it close-on-exec.
29 // If a fork happens between those two events, the child's exec
30 // will inherit an unwanted file descriptor.
31 //
32 // This lock solves that race: the create new fd/mark close-on-exec
33 // operation is done holding ForkLock for reading, and the fork itself
34 // is done holding ForkLock for writing. At least, that's the idea.
35 // There are some complications.
36 //
37 // Some system calls that create new file descriptors can block
38 // for arbitrarily long times: open on a hung NFS server or named
39 // pipe, accept on a socket, and so on. We can't reasonably grab
40 // the lock across those operations.
41 //
42 // It is worse to inherit some file descriptors than others.
43 // If a non-malicious child accidentally inherits an open ordinary file,
44 // that's not a big deal. On the other hand, if a long-lived child
45 // accidentally inherits the write end of a pipe, then the reader
46 // of that pipe will not see EOF until that child exits, potentially
47 // causing the parent program to hang. This is a common problem
48 // in threaded C programs that use popen.
49 //
50 // Luckily, the file descriptors that are most important not to
51 // inherit are not the ones that can take an arbitrarily long time
52 // to create: pipe returns instantly, and the net package uses
53 // non-blocking I/O to accept on a listening socket.
54 // The rules for which file descriptor-creating operations use the
55 // ForkLock are as follows:
56 //
57 // - [Pipe]. Use pipe2 if available. Otherwise, does not block,
58 // so use ForkLock.
59 // - [Socket]. Use SOCK_CLOEXEC if available. Otherwise, does not
60 // block, so use ForkLock.
61 // - [Open]. Use [O_CLOEXEC] if available. Otherwise, may block,
62 // so live with the race.
63 // - [Dup]. Use [F_DUPFD_CLOEXEC] or dup3 if available. Otherwise,
64 // does not block, so use ForkLock.
65 var ForkLock sync.RWMutex
66 67 // StringSlicePtr converts a slice of strings to a slice of pointers
68 // to NUL-terminated byte arrays. If any string contains a NUL byte
69 // this function panics instead of returning an error.
70 //
71 // Deprecated: Use [SlicePtrFromStrings] instead.
72 func StringSlicePtr(ss []string) []*byte {
73 bb := make([]*byte, len(ss)+1)
74 for i := 0; i < len(ss); i++ {
75 bb[i] = StringBytePtr(ss[i])
76 }
77 bb[len(ss)] = nil
78 return bb
79 }
80 81 // SlicePtrFromStrings converts a slice of strings to a slice of
82 // pointers to NUL-terminated byte arrays. If any string contains
83 // a NUL byte, it returns (nil, [EINVAL]).
84 func SlicePtrFromStrings(ss []string) ([]*byte, error) {
85 n := 0
86 for _, s := range ss {
87 if bytealg.IndexByteString(s, 0) != -1 {
88 return nil, EINVAL
89 }
90 n += len(s) + 1 // +1 for NUL
91 }
92 bb := make([]*byte, len(ss)+1)
93 b := make([]byte, n)
94 n = 0
95 for i, s := range ss {
96 bb[i] = &b[n]
97 copy(b[n:], s)
98 n += len(s) + 1
99 }
100 return bb, nil
101 }
102 103 func CloseOnExec(fd int) { fcntl(fd, F_SETFD, FD_CLOEXEC) }
104 105 func SetNonblock(fd int, nonblocking bool) (err error) {
106 flag, err := fcntl(fd, F_GETFL, 0)
107 if err != nil {
108 return err
109 }
110 if (flag&O_NONBLOCK != 0) == nonblocking {
111 return nil
112 }
113 if nonblocking {
114 flag |= O_NONBLOCK
115 } else {
116 flag &^= O_NONBLOCK
117 }
118 _, err = fcntl(fd, F_SETFL, flag)
119 return err
120 }
121 122 // Credential holds user and group identities to be assumed
123 // by a child process started by [StartProcess].
124 type Credential struct {
125 Uid uint32 // User ID.
126 Gid uint32 // Group ID.
127 Groups []uint32 // Supplementary group IDs.
128 NoSetGroups bool // If true, don't set supplementary groups
129 }
130 131 // ProcAttr holds attributes that will be applied to a new process started
132 // by [StartProcess].
133 type ProcAttr struct {
134 Dir string // Current working directory.
135 Env []string // Environment.
136 Files []uintptr // File descriptors.
137 Sys *SysProcAttr
138 }
139 140 var zeroProcAttr ProcAttr
141 var zeroSysProcAttr SysProcAttr
142 143 func forkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
144 var p [2]int
145 var n int
146 var err1 Errno
147 var wstatus WaitStatus
148 149 if attr == nil {
150 attr = &zeroProcAttr
151 }
152 sys := attr.Sys
153 if sys == nil {
154 sys = &zeroSysProcAttr
155 }
156 157 // Convert args to C form.
158 argv0p, err := BytePtrFromString(argv0)
159 if err != nil {
160 return 0, err
161 }
162 argvp, err := SlicePtrFromStrings(argv)
163 if err != nil {
164 return 0, err
165 }
166 envvp, err := SlicePtrFromStrings(attr.Env)
167 if err != nil {
168 return 0, err
169 }
170 171 if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len(argv) > 0 && len(argv[0]) > len(argv0) {
172 argvp[0] = argv0p
173 }
174 175 var chroot *byte
176 if sys.Chroot != "" {
177 chroot, err = BytePtrFromString(sys.Chroot)
178 if err != nil {
179 return 0, err
180 }
181 }
182 var dir *byte
183 if attr.Dir != "" {
184 dir, err = BytePtrFromString(attr.Dir)
185 if err != nil {
186 return 0, err
187 }
188 }
189 190 // Both Setctty and Foreground use the Ctty field,
191 // but they give it slightly different meanings.
192 if sys.Setctty && sys.Foreground {
193 return 0, errorspkg.New("both Setctty and Foreground set in SysProcAttr")
194 }
195 if sys.Setctty && sys.Ctty >= len(attr.Files) {
196 return 0, errorspkg.New("Setctty set but Ctty not valid in child")
197 }
198 199 acquireForkLock()
200 201 // Allocate child status pipe close on exec.
202 if err = forkExecPipe(p[:]); err != nil {
203 releaseForkLock()
204 return 0, err
205 }
206 207 // Kick off child.
208 pid, err1 = forkAndExecInChild(argv0p, argvp, envvp, chroot, dir, attr, sys, p[1])
209 if err1 != 0 {
210 Close(p[0])
211 Close(p[1])
212 releaseForkLock()
213 return 0, Errno(err1)
214 }
215 releaseForkLock()
216 217 // Read child error status from pipe.
218 Close(p[1])
219 for {
220 n, err = readlen(p[0], (*byte)(unsafe.Pointer(&err1)), int(unsafe.Sizeof(err1)))
221 if err != EINTR {
222 break
223 }
224 }
225 Close(p[0])
226 if err != nil || n != 0 {
227 if n == int(unsafe.Sizeof(err1)) {
228 err = Errno(err1)
229 }
230 if err == nil {
231 err = EPIPE
232 }
233 234 // Child failed; wait for it to exit, to make sure
235 // the zombies don't accumulate.
236 _, err1 := Wait4(pid, &wstatus, 0, nil)
237 for err1 == EINTR {
238 _, err1 = Wait4(pid, &wstatus, 0, nil)
239 }
240 241 // OS-specific cleanup on failure.
242 forkAndExecFailureCleanup(attr, sys)
243 244 return 0, err
245 }
246 247 // Read got EOF, so pipe closed on exec, so exec succeeded.
248 return pid, nil
249 }
250 251 // Combination of fork and exec, careful to be thread safe.
252 func ForkExec(argv0 string, argv []string, attr *ProcAttr) (pid int, err error) {
253 return forkExec(argv0, argv, attr)
254 }
255 256 // StartProcess wraps [ForkExec] for package os.
257 func StartProcess(argv0 string, argv []string, attr *ProcAttr) (pid int, handle uintptr, err error) {
258 pid, err = forkExec(argv0, argv, attr)
259 return pid, 0, err
260 }
261 262 // Implemented in runtime package.
263 func runtime_BeforeExec()
264 func runtime_AfterExec()
265 266 // execveLibc is non-nil on OS using libc syscall, set to execve in exec_libc.go; this
267 // avoids a build dependency for other platforms.
268 var execveLibc func(path uintptr, argv uintptr, envp uintptr) Errno
269 var execveDarwin func(path *byte, argv **byte, envp **byte) error
270 var execveOpenBSD func(path *byte, argv **byte, envp **byte) error
271 272 // Exec invokes the execve(2) system call.
273 func Exec(argv0 string, argv []string, envv []string) (err error) {
274 argv0p, err := BytePtrFromString(argv0)
275 if err != nil {
276 return err
277 }
278 argvp, err := SlicePtrFromStrings(argv)
279 if err != nil {
280 return err
281 }
282 envvp, err := SlicePtrFromStrings(envv)
283 if err != nil {
284 return err
285 }
286 runtime_BeforeExec()
287 288 rlim := origRlimitNofile.Load()
289 if rlim != nil {
290 Setrlimit(RLIMIT_NOFILE, rlim)
291 }
292 293 var err1 error
294 if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" || runtime.GOOS == "aix" {
295 // RawSyscall should never be used on Solaris, illumos, or AIX.
296 err1 = execveLibc(
297 uintptr(unsafe.Pointer(argv0p)),
298 uintptr(unsafe.Pointer(&argvp[0])),
299 uintptr(unsafe.Pointer(&envvp[0])))
300 } else if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
301 // Similarly on Darwin.
302 err1 = execveDarwin(argv0p, &argvp[0], &envvp[0])
303 } else if runtime.GOOS == "openbsd" && runtime.GOARCH != "mips64" {
304 // Similarly on OpenBSD.
305 err1 = execveOpenBSD(argv0p, &argvp[0], &envvp[0])
306 } else {
307 _, _, err1 = RawSyscall(SYS_EXECVE,
308 uintptr(unsafe.Pointer(argv0p)),
309 uintptr(unsafe.Pointer(&argvp[0])),
310 uintptr(unsafe.Pointer(&envvp[0])))
311 }
312 runtime_AfterExec()
313 return err1
314 }
315