1 // Copyright 2021 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package syscall
6 7 import (
8 "runtime"
9 "unsafe"
10 )
11 12 type SysProcAttr struct {
13 Chroot string // Chroot.
14 Credential *Credential // Credential.
15 Ptrace bool // Enable tracing.
16 Setsid bool // Create session.
17 // Setpgid sets the process group ID of the child to Pgid,
18 // or, if Pgid == 0, to the new child's process ID.
19 Setpgid bool
20 // Setctty sets the controlling terminal of the child to
21 // file descriptor Ctty. Ctty must be a descriptor number
22 // in the child process: an index into ProcAttr.Files.
23 // This is only meaningful if Setsid is true.
24 Setctty bool
25 Noctty bool // Detach fd 0 from controlling terminal
26 Ctty int // Controlling TTY fd
27 // Foreground places the child process group in the foreground.
28 // This implies Setpgid. The Ctty field must be set to
29 // the descriptor of the controlling TTY.
30 // Unlike Setctty, in this case Ctty must be a descriptor
31 // number in the parent process.
32 Foreground bool
33 Pgid int // Child's process group ID if Setpgid.
34 Pdeathsig Signal // Signal that the process will get when its parent dies (Linux and FreeBSD only)
35 Jail int // Jail to which the child process is attached (FreeBSD only).
36 }
37 38 const (
39 _P_PID = 0
40 41 _PROC_PDEATHSIG_CTL = 11
42 )
43 44 // Implemented in runtime package.
45 func runtime_BeforeFork()
46 func runtime_AfterFork()
47 func runtime_AfterForkInChild()
48 49 // Fork, dup fd onto 0..len(fd), and exec(argv0, argvv, envv) in child.
50 // If a dup or exec fails, write the errno error to pipe.
51 // (Pipe is close-on-exec so if exec succeeds, it will be closed.)
52 // In the child, this function must not acquire any locks, because
53 // they might have been locked at the time of the fork. This means
54 // no rescheduling, no malloc calls, and no new stack segments.
55 // For the same reason compiler does not race instrument it.
56 // The calls to RawSyscall are okay because they are assembly
57 // functions that do not grow the stack.
58 //
59 //go:norace
60 func forkAndExecInChild(argv0 *byte, argv, envv []*byte, chroot, dir *byte, attr *ProcAttr, sys *SysProcAttr, pipe int) (pid int, err Errno) {
61 // Declare all variables at top in case any
62 // declarations require heap allocation (e.g., err1).
63 var (
64 r1 uintptr
65 err1 Errno
66 nextfd int
67 i int
68 pgrp _C_int
69 cred *Credential
70 ngroups, groups uintptr
71 upid uintptr
72 )
73 74 rlim := origRlimitNofile.Load()
75 76 // Record parent PID so child can test if it has died.
77 ppid, _, _ := RawSyscall(SYS_GETPID, 0, 0, 0)
78 79 // guard against side effects of shuffling fds below.
80 // Make sure that nextfd is beyond any currently open files so
81 // that we can't run the risk of overwriting any of them.
82 fd := make([]int, len(attr.Files))
83 nextfd = len(attr.Files)
84 for i, ufd := range attr.Files {
85 if nextfd < int(ufd) {
86 nextfd = int(ufd)
87 }
88 fd[i] = int(ufd)
89 }
90 nextfd++
91 92 // About to call fork.
93 // No more allocation or calls of non-assembly functions.
94 runtime_BeforeFork()
95 r1, _, err1 = RawSyscall(SYS_FORK, 0, 0, 0)
96 if err1 != 0 {
97 runtime_AfterFork()
98 return 0, err1
99 }
100 101 if r1 != 0 {
102 // parent; return PID
103 runtime_AfterFork()
104 return int(r1), 0
105 }
106 107 // Fork succeeded, now in child.
108 109 // Attach to the given jail, if any. The system call also changes the
110 // process' root and working directories to the jail's path directory.
111 if sys.Jail > 0 {
112 _, _, err1 = RawSyscall(SYS_JAIL_ATTACH, uintptr(sys.Jail), 0, 0)
113 if err1 != 0 {
114 goto childerror
115 }
116 }
117 118 // Enable tracing if requested.
119 if sys.Ptrace {
120 _, _, err1 = RawSyscall(SYS_PTRACE, uintptr(PTRACE_TRACEME), 0, 0)
121 if err1 != 0 {
122 goto childerror
123 }
124 }
125 126 // Session ID
127 if sys.Setsid {
128 _, _, err1 = RawSyscall(SYS_SETSID, 0, 0, 0)
129 if err1 != 0 {
130 goto childerror
131 }
132 }
133 134 // Set process group
135 if sys.Setpgid || sys.Foreground {
136 // Place child in process group.
137 _, _, err1 = RawSyscall(SYS_SETPGID, 0, uintptr(sys.Pgid), 0)
138 if err1 != 0 {
139 goto childerror
140 }
141 }
142 143 if sys.Foreground {
144 // This should really be pid_t, however _C_int (aka int32) is
145 // generally equivalent.
146 pgrp = _C_int(sys.Pgid)
147 if pgrp == 0 {
148 r1, _, err1 = RawSyscall(SYS_GETPID, 0, 0, 0)
149 if err1 != 0 {
150 goto childerror
151 }
152 153 pgrp = _C_int(r1)
154 }
155 156 // Place process group in foreground.
157 _, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSPGRP), uintptr(unsafe.Pointer(&pgrp)))
158 if err1 != 0 {
159 goto childerror
160 }
161 }
162 163 // Restore the signal mask. We do this after TIOCSPGRP to avoid
164 // having the kernel send a SIGTTOU signal to the process group.
165 runtime_AfterForkInChild()
166 167 // Chroot
168 if chroot != nil {
169 _, _, err1 = RawSyscall(SYS_CHROOT, uintptr(unsafe.Pointer(chroot)), 0, 0)
170 if err1 != 0 {
171 goto childerror
172 }
173 }
174 175 // User and groups
176 if cred = sys.Credential; cred != nil {
177 ngroups = uintptr(len(cred.Groups))
178 groups = uintptr(0)
179 if ngroups > 0 {
180 groups = uintptr(unsafe.Pointer(&cred.Groups[0]))
181 }
182 if !cred.NoSetGroups {
183 _, _, err1 = RawSyscall(SYS_SETGROUPS, ngroups, groups, 0)
184 if err1 != 0 {
185 goto childerror
186 }
187 }
188 _, _, err1 = RawSyscall(SYS_SETGID, uintptr(cred.Gid), 0, 0)
189 if err1 != 0 {
190 goto childerror
191 }
192 _, _, err1 = RawSyscall(SYS_SETUID, uintptr(cred.Uid), 0, 0)
193 if err1 != 0 {
194 goto childerror
195 }
196 }
197 198 // Chdir
199 if dir != nil {
200 _, _, err1 = RawSyscall(SYS_CHDIR, uintptr(unsafe.Pointer(dir)), 0, 0)
201 if err1 != 0 {
202 goto childerror
203 }
204 }
205 206 // Parent death signal
207 if sys.Pdeathsig != 0 {
208 switch runtime.GOARCH {
209 case "386", "arm":
210 _, _, err1 = RawSyscall6(SYS_PROCCTL, _P_PID, 0, 0, _PROC_PDEATHSIG_CTL, uintptr(unsafe.Pointer(&sys.Pdeathsig)), 0)
211 default:
212 _, _, err1 = RawSyscall6(SYS_PROCCTL, _P_PID, 0, _PROC_PDEATHSIG_CTL, uintptr(unsafe.Pointer(&sys.Pdeathsig)), 0, 0)
213 }
214 if err1 != 0 {
215 goto childerror
216 }
217 218 // Signal self if parent is already dead. This might cause a
219 // duplicate signal in rare cases, but it won't matter when
220 // using SIGKILL.
221 r1, _, _ = RawSyscall(SYS_GETPPID, 0, 0, 0)
222 if r1 != ppid {
223 upid, _, _ = RawSyscall(SYS_GETPID, 0, 0, 0)
224 _, _, err1 = RawSyscall(SYS_KILL, upid, uintptr(sys.Pdeathsig), 0)
225 if err1 != 0 {
226 goto childerror
227 }
228 }
229 }
230 231 // Pass 1: look for fd[i] < i and move those up above len(fd)
232 // so that pass 2 won't stomp on an fd it needs later.
233 if pipe < nextfd {
234 _, _, err1 = RawSyscall(SYS_FCNTL, uintptr(pipe), F_DUP2FD_CLOEXEC, uintptr(nextfd))
235 if err1 != 0 {
236 goto childerror
237 }
238 pipe = nextfd
239 nextfd++
240 }
241 for i = 0; i < len(fd); i++ {
242 if fd[i] >= 0 && fd[i] < i {
243 if nextfd == pipe { // don't stomp on pipe
244 nextfd++
245 }
246 _, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_DUP2FD_CLOEXEC, uintptr(nextfd))
247 if err1 != 0 {
248 goto childerror
249 }
250 fd[i] = nextfd
251 nextfd++
252 }
253 }
254 255 // Pass 2: dup fd[i] down onto i.
256 for i = 0; i < len(fd); i++ {
257 if fd[i] == -1 {
258 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
259 continue
260 }
261 if fd[i] == i {
262 // dup2(i, i) won't clear close-on-exec flag on Linux,
263 // probably not elsewhere either.
264 _, _, err1 = RawSyscall(SYS_FCNTL, uintptr(fd[i]), F_SETFD, 0)
265 if err1 != 0 {
266 goto childerror
267 }
268 continue
269 }
270 // The new fd is created NOT close-on-exec,
271 // which is exactly what we want.
272 _, _, err1 = RawSyscall(SYS_DUP2, uintptr(fd[i]), uintptr(i), 0)
273 if err1 != 0 {
274 goto childerror
275 }
276 }
277 278 // By convention, we don't close-on-exec the fds we are
279 // started with, so if len(fd) < 3, close 0, 1, 2 as needed.
280 // Programs that know they inherit fds >= 3 will need
281 // to set them close-on-exec.
282 for i = len(fd); i < 3; i++ {
283 RawSyscall(SYS_CLOSE, uintptr(i), 0, 0)
284 }
285 286 // Detach fd 0 from tty
287 if sys.Noctty {
288 _, _, err1 = RawSyscall(SYS_IOCTL, 0, uintptr(TIOCNOTTY), 0)
289 if err1 != 0 {
290 goto childerror
291 }
292 }
293 294 // Set the controlling TTY to Ctty
295 if sys.Setctty {
296 _, _, err1 = RawSyscall(SYS_IOCTL, uintptr(sys.Ctty), uintptr(TIOCSCTTY), 0)
297 if err1 != 0 {
298 goto childerror
299 }
300 }
301 302 // Restore original rlimit.
303 if rlim != nil {
304 RawSyscall(SYS_SETRLIMIT, uintptr(RLIMIT_NOFILE), uintptr(unsafe.Pointer(rlim)), 0)
305 }
306 307 // Time to exec.
308 _, _, err1 = RawSyscall(SYS_EXECVE,
309 uintptr(unsafe.Pointer(argv0)),
310 uintptr(unsafe.Pointer(&argv[0])),
311 uintptr(unsafe.Pointer(&envv[0])))
312 313 childerror:
314 // send error code on pipe
315 RawSyscall(SYS_WRITE, uintptr(pipe), uintptr(unsafe.Pointer(&err1)), unsafe.Sizeof(err1))
316 for {
317 RawSyscall(SYS_EXIT, 253, 0, 0)
318 }
319 }
320 321 // forkAndExecFailureCleanup cleans up after an exec failure.
322 func forkAndExecFailureCleanup(attr *ProcAttr, sys *SysProcAttr) {
323 // Nothing to do.
324 }
325