fd_unix.mx raw
1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build unix || (js && wasm) || wasip1
6
7 package poll
8
9 import (
10 "internal/itoa"
11 "internal/syscall/unix"
12 "io"
13 "sync/atomic"
14 "syscall"
15 )
16
17 // FD is a file descriptor. The net and os packages use this type as a
18 // field of a larger type representing a network connection or OS file.
19 type FD struct {
20 // Lock sysfd and serialize access to Read and Write methods.
21 fdmu fdMutex
22
23 // System file descriptor. Immutable until Close.
24 Sysfd int
25
26 // Platform dependent state of the file descriptor.
27 SysFile
28
29 // I/O poller.
30 pd pollDesc
31
32 // Semaphore signaled when file is closed.
33 csema uint32
34
35 // Non-zero if this file has been set to blocking mode.
36 isBlocking uint32
37
38 // Whether this is a streaming descriptor, as opposed to a
39 // packet-based descriptor like a UDP socket. Immutable.
40 IsStream bool
41
42 // Whether a zero byte read indicates EOF. This is false for a
43 // message based socket connection.
44 ZeroReadIsEOF bool
45
46 // Whether this is a file rather than a network socket.
47 isFile bool
48 }
49
50 // Init initializes the FD. The Sysfd field should already be set.
51 // This can be called multiple times on a single FD.
52 // The net argument is a network name from the net package (e.g., "tcp"),
53 // or "file".
54 // Set pollable to true if fd should be managed by runtime netpoll.
55 func (fd *FD) Init(net []byte, pollable bool) error {
56 fd.SysFile.init()
57
58 // We don't actually care about the various network types.
59 if net == "file" {
60 fd.isFile = true
61 }
62 if !pollable {
63 fd.isBlocking = 1
64 return nil
65 }
66 err := fd.pd.init(fd)
67 if err != nil {
68 // If we could not initialize the runtime poller,
69 // assume we are using blocking mode.
70 fd.isBlocking = 1
71 }
72 return err
73 }
74
75 // Destroy closes the file descriptor. This is called when there are
76 // no remaining references.
77 func (fd *FD) destroy() error {
78 // Poller may want to unregister fd in readiness notification mechanism,
79 // so this must be executed before CloseFunc.
80 fd.pd.close()
81
82 err := fd.SysFile.destroy(fd.Sysfd)
83
84 fd.Sysfd = -1
85 runtime_Semrelease(&fd.csema)
86 return err
87 }
88
89 // Close closes the FD. The underlying file descriptor is closed by the
90 // destroy method when there are no remaining references.
91 func (fd *FD) Close() error {
92 if !fd.fdmu.increfAndClose() {
93 return errClosing(fd.isFile)
94 }
95
96 // Unblock any I/O. Once it all unblocks and returns,
97 // so that it cannot be referring to fd.sysfd anymore,
98 // the final decref will close fd.sysfd. This should happen
99 // fairly quickly, since all the I/O is non-blocking, and any
100 // attempts to block in the pollDesc will return errClosing(fd.isFile).
101 fd.pd.evict()
102
103 // The call to decref will call destroy if there are no other
104 // references.
105 err := fd.decref()
106
107 // Wait until the descriptor is closed. If this was the only
108 // reference, it is already closed. Only wait if the file has
109 // not been set to blocking mode, as otherwise any current I/O
110 // may be blocking, and that would block the Close.
111 // No need for an atomic read of isBlocking, increfAndClose means
112 // we have exclusive access to fd.
113 if fd.isBlocking == 0 {
114 runtime_Semacquire(&fd.csema)
115 }
116
117 return err
118 }
119
120 // SetBlocking puts the file into blocking mode.
121 func (fd *FD) SetBlocking() error {
122 if err := fd.incref(); err != nil {
123 return err
124 }
125 defer fd.decref()
126 // Atomic store so that concurrent calls to SetBlocking
127 // do not cause a race condition. isBlocking only ever goes
128 // from 0 to 1 so there is no real race here.
129 atomic.StoreUint32(&fd.isBlocking, 1)
130 return syscall.SetNonblock(fd.Sysfd, false)
131 }
132
133 // Darwin and FreeBSD can't read or write 2GB+ files at a time,
134 // even on 64-bit systems.
135 // The same is true of socket implementations on many systems.
136 // See golang.org/issue/7812 and golang.org/issue/16266.
137 // Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned.
138 const maxRW = 1 << 30
139
140 // Read implements io.Reader.
141 func (fd *FD) Read(p []byte) (int, error) {
142 if err := fd.readLock(); err != nil {
143 return 0, err
144 }
145 defer fd.readUnlock()
146 if len(p) == 0 {
147 // If the caller wanted a zero byte read, return immediately
148 // without trying (but after acquiring the readLock).
149 // Otherwise syscall.Read returns 0, nil which looks like
150 // io.EOF.
151 // TODO(bradfitz): make it wait for readability? (Issue 15735)
152 return 0, nil
153 }
154 if err := fd.pd.prepareRead(fd.isFile); err != nil {
155 return 0, err
156 }
157 if fd.IsStream && len(p) > maxRW {
158 p = p[:maxRW]
159 }
160 for {
161 n, err := ignoringEINTRIO(syscall.Read, fd.Sysfd, p)
162 if err != nil {
163 n = 0
164 if err == syscall.EAGAIN && fd.pd.pollable() {
165 if err = fd.pd.waitRead(fd.isFile); err == nil {
166 continue
167 }
168 }
169 }
170 err = fd.eofError(n, err)
171 return n, err
172 }
173 }
174
175 // Pread wraps the pread system call.
176 func (fd *FD) Pread(p []byte, off int64) (int, error) {
177 // Call incref, not readLock, because since pread specifies the
178 // offset it is independent from other reads.
179 // Similarly, using the poller doesn't make sense for pread.
180 if err := fd.incref(); err != nil {
181 return 0, err
182 }
183 if fd.IsStream && len(p) > maxRW {
184 p = p[:maxRW]
185 }
186 n, err := ignoringEINTR2(func() (int, error) {
187 return syscall.Pread(fd.Sysfd, p, off)
188 })
189 if err != nil {
190 n = 0
191 }
192 fd.decref()
193 err = fd.eofError(n, err)
194 return n, err
195 }
196
197 // ReadFrom wraps the recvfrom network call.
198 func (fd *FD) ReadFrom(p []byte) (int, syscall.Sockaddr, error) {
199 if err := fd.readLock(); err != nil {
200 return 0, nil, err
201 }
202 defer fd.readUnlock()
203 if err := fd.pd.prepareRead(fd.isFile); err != nil {
204 return 0, nil, err
205 }
206 for {
207 n, sa, err := syscall.Recvfrom(fd.Sysfd, p, 0)
208 if err != nil {
209 if err == syscall.EINTR {
210 continue
211 }
212 n = 0
213 if err == syscall.EAGAIN && fd.pd.pollable() {
214 if err = fd.pd.waitRead(fd.isFile); err == nil {
215 continue
216 }
217 }
218 }
219 err = fd.eofError(n, err)
220 return n, sa, err
221 }
222 }
223
224 // ReadFromInet4 wraps the recvfrom network call for IPv4.
225 func (fd *FD) ReadFromInet4(p []byte, from *syscall.SockaddrInet4) (int, error) {
226 if err := fd.readLock(); err != nil {
227 return 0, err
228 }
229 defer fd.readUnlock()
230 if err := fd.pd.prepareRead(fd.isFile); err != nil {
231 return 0, err
232 }
233 for {
234 n, err := unix.RecvfromInet4(fd.Sysfd, p, 0, from)
235 if err != nil {
236 if err == syscall.EINTR {
237 continue
238 }
239 n = 0
240 if err == syscall.EAGAIN && fd.pd.pollable() {
241 if err = fd.pd.waitRead(fd.isFile); err == nil {
242 continue
243 }
244 }
245 }
246 err = fd.eofError(n, err)
247 return n, err
248 }
249 }
250
251 // ReadFromInet6 wraps the recvfrom network call for IPv6.
252 func (fd *FD) ReadFromInet6(p []byte, from *syscall.SockaddrInet6) (int, error) {
253 if err := fd.readLock(); err != nil {
254 return 0, err
255 }
256 defer fd.readUnlock()
257 if err := fd.pd.prepareRead(fd.isFile); err != nil {
258 return 0, err
259 }
260 for {
261 n, err := unix.RecvfromInet6(fd.Sysfd, p, 0, from)
262 if err != nil {
263 if err == syscall.EINTR {
264 continue
265 }
266 n = 0
267 if err == syscall.EAGAIN && fd.pd.pollable() {
268 if err = fd.pd.waitRead(fd.isFile); err == nil {
269 continue
270 }
271 }
272 }
273 err = fd.eofError(n, err)
274 return n, err
275 }
276 }
277
278 // ReadMsg wraps the recvmsg network call.
279 func (fd *FD) ReadMsg(p []byte, oob []byte, flags int) (int, int, int, syscall.Sockaddr, error) {
280 if err := fd.readLock(); err != nil {
281 return 0, 0, 0, nil, err
282 }
283 defer fd.readUnlock()
284 if err := fd.pd.prepareRead(fd.isFile); err != nil {
285 return 0, 0, 0, nil, err
286 }
287 for {
288 n, oobn, sysflags, sa, err := syscall.Recvmsg(fd.Sysfd, p, oob, flags)
289 if err != nil {
290 if err == syscall.EINTR {
291 continue
292 }
293 // TODO(dfc) should n and oobn be set to 0
294 if err == syscall.EAGAIN && fd.pd.pollable() {
295 if err = fd.pd.waitRead(fd.isFile); err == nil {
296 continue
297 }
298 }
299 }
300 err = fd.eofError(n, err)
301 return n, oobn, sysflags, sa, err
302 }
303 }
304
305 // ReadMsgInet4 is ReadMsg, but specialized for syscall.SockaddrInet4.
306 func (fd *FD) ReadMsgInet4(p []byte, oob []byte, flags int, sa4 *syscall.SockaddrInet4) (int, int, int, error) {
307 if err := fd.readLock(); err != nil {
308 return 0, 0, 0, err
309 }
310 defer fd.readUnlock()
311 if err := fd.pd.prepareRead(fd.isFile); err != nil {
312 return 0, 0, 0, err
313 }
314 for {
315 n, oobn, sysflags, err := unix.RecvmsgInet4(fd.Sysfd, p, oob, flags, sa4)
316 if err != nil {
317 if err == syscall.EINTR {
318 continue
319 }
320 // TODO(dfc) should n and oobn be set to 0
321 if err == syscall.EAGAIN && fd.pd.pollable() {
322 if err = fd.pd.waitRead(fd.isFile); err == nil {
323 continue
324 }
325 }
326 }
327 err = fd.eofError(n, err)
328 return n, oobn, sysflags, err
329 }
330 }
331
332 // ReadMsgInet6 is ReadMsg, but specialized for syscall.SockaddrInet6.
333 func (fd *FD) ReadMsgInet6(p []byte, oob []byte, flags int, sa6 *syscall.SockaddrInet6) (int, int, int, error) {
334 if err := fd.readLock(); err != nil {
335 return 0, 0, 0, err
336 }
337 defer fd.readUnlock()
338 if err := fd.pd.prepareRead(fd.isFile); err != nil {
339 return 0, 0, 0, err
340 }
341 for {
342 n, oobn, sysflags, err := unix.RecvmsgInet6(fd.Sysfd, p, oob, flags, sa6)
343 if err != nil {
344 if err == syscall.EINTR {
345 continue
346 }
347 // TODO(dfc) should n and oobn be set to 0
348 if err == syscall.EAGAIN && fd.pd.pollable() {
349 if err = fd.pd.waitRead(fd.isFile); err == nil {
350 continue
351 }
352 }
353 }
354 err = fd.eofError(n, err)
355 return n, oobn, sysflags, err
356 }
357 }
358
359 // Write implements io.Writer.
360 func (fd *FD) Write(p []byte) (int, error) {
361 if err := fd.writeLock(); err != nil {
362 return 0, err
363 }
364 defer fd.writeUnlock()
365 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
366 return 0, err
367 }
368 var nn int
369 for {
370 max := len(p)
371 if fd.IsStream && max-nn > maxRW {
372 max = nn + maxRW
373 }
374 n, err := ignoringEINTRIO(syscall.Write, fd.Sysfd, p[nn:max])
375 if n > 0 {
376 if n > max-nn {
377 // This can reportedly happen when using
378 // some VPN software. Issue #61060.
379 // If we don't check this we will panic
380 // with slice bounds out of range.
381 // Use a more informative panic.
382 panic("invalid return from write: got " + itoa.Itoa(n) + " from a write of " + itoa.Itoa(max-nn))
383 }
384 nn += n
385 }
386 if nn == len(p) {
387 return nn, err
388 }
389 if err == syscall.EAGAIN && fd.pd.pollable() {
390 if err = fd.pd.waitWrite(fd.isFile); err == nil {
391 continue
392 }
393 }
394 if err != nil {
395 return nn, err
396 }
397 if n == 0 {
398 return nn, io.ErrUnexpectedEOF
399 }
400 }
401 }
402
403 // Pwrite wraps the pwrite system call.
404 func (fd *FD) Pwrite(p []byte, off int64) (int, error) {
405 // Call incref, not writeLock, because since pwrite specifies the
406 // offset it is independent from other writes.
407 // Similarly, using the poller doesn't make sense for pwrite.
408 if err := fd.incref(); err != nil {
409 return 0, err
410 }
411 defer fd.decref()
412 var nn int
413 for {
414 max := len(p)
415 if fd.IsStream && max-nn > maxRW {
416 max = nn + maxRW
417 }
418 n, err := syscall.Pwrite(fd.Sysfd, p[nn:max], off+int64(nn))
419 if err == syscall.EINTR {
420 continue
421 }
422 if n > 0 {
423 nn += n
424 }
425 if nn == len(p) {
426 return nn, err
427 }
428 if err != nil {
429 return nn, err
430 }
431 if n == 0 {
432 return nn, io.ErrUnexpectedEOF
433 }
434 }
435 }
436
437 // WriteToInet4 wraps the sendto network call for IPv4 addresses.
438 func (fd *FD) WriteToInet4(p []byte, sa *syscall.SockaddrInet4) (int, error) {
439 if err := fd.writeLock(); err != nil {
440 return 0, err
441 }
442 defer fd.writeUnlock()
443 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
444 return 0, err
445 }
446 for {
447 err := unix.SendtoInet4(fd.Sysfd, p, 0, sa)
448 if err == syscall.EINTR {
449 continue
450 }
451 if err == syscall.EAGAIN && fd.pd.pollable() {
452 if err = fd.pd.waitWrite(fd.isFile); err == nil {
453 continue
454 }
455 }
456 if err != nil {
457 return 0, err
458 }
459 return len(p), nil
460 }
461 }
462
463 // WriteToInet6 wraps the sendto network call for IPv6 addresses.
464 func (fd *FD) WriteToInet6(p []byte, sa *syscall.SockaddrInet6) (int, error) {
465 if err := fd.writeLock(); err != nil {
466 return 0, err
467 }
468 defer fd.writeUnlock()
469 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
470 return 0, err
471 }
472 for {
473 err := unix.SendtoInet6(fd.Sysfd, p, 0, sa)
474 if err == syscall.EINTR {
475 continue
476 }
477 if err == syscall.EAGAIN && fd.pd.pollable() {
478 if err = fd.pd.waitWrite(fd.isFile); err == nil {
479 continue
480 }
481 }
482 if err != nil {
483 return 0, err
484 }
485 return len(p), nil
486 }
487 }
488
489 // WriteTo wraps the sendto network call.
490 func (fd *FD) WriteTo(p []byte, sa syscall.Sockaddr) (int, error) {
491 if err := fd.writeLock(); err != nil {
492 return 0, err
493 }
494 defer fd.writeUnlock()
495 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
496 return 0, err
497 }
498 for {
499 err := syscall.Sendto(fd.Sysfd, p, 0, sa)
500 if err == syscall.EINTR {
501 continue
502 }
503 if err == syscall.EAGAIN && fd.pd.pollable() {
504 if err = fd.pd.waitWrite(fd.isFile); err == nil {
505 continue
506 }
507 }
508 if err != nil {
509 return 0, err
510 }
511 return len(p), nil
512 }
513 }
514
515 // WriteMsg wraps the sendmsg network call.
516 func (fd *FD) WriteMsg(p []byte, oob []byte, sa syscall.Sockaddr) (int, int, error) {
517 if err := fd.writeLock(); err != nil {
518 return 0, 0, err
519 }
520 defer fd.writeUnlock()
521 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
522 return 0, 0, err
523 }
524 for {
525 n, err := syscall.SendmsgN(fd.Sysfd, p, oob, sa, 0)
526 if err == syscall.EINTR {
527 continue
528 }
529 if err == syscall.EAGAIN && fd.pd.pollable() {
530 if err = fd.pd.waitWrite(fd.isFile); err == nil {
531 continue
532 }
533 }
534 if err != nil {
535 return n, 0, err
536 }
537 return n, len(oob), err
538 }
539 }
540
541 // WriteMsgInet4 is WriteMsg specialized for syscall.SockaddrInet4.
542 func (fd *FD) WriteMsgInet4(p []byte, oob []byte, sa *syscall.SockaddrInet4) (int, int, error) {
543 if err := fd.writeLock(); err != nil {
544 return 0, 0, err
545 }
546 defer fd.writeUnlock()
547 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
548 return 0, 0, err
549 }
550 for {
551 n, err := unix.SendmsgNInet4(fd.Sysfd, p, oob, sa, 0)
552 if err == syscall.EINTR {
553 continue
554 }
555 if err == syscall.EAGAIN && fd.pd.pollable() {
556 if err = fd.pd.waitWrite(fd.isFile); err == nil {
557 continue
558 }
559 }
560 if err != nil {
561 return n, 0, err
562 }
563 return n, len(oob), err
564 }
565 }
566
567 // WriteMsgInet6 is WriteMsg specialized for syscall.SockaddrInet6.
568 func (fd *FD) WriteMsgInet6(p []byte, oob []byte, sa *syscall.SockaddrInet6) (int, int, error) {
569 if err := fd.writeLock(); err != nil {
570 return 0, 0, err
571 }
572 defer fd.writeUnlock()
573 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
574 return 0, 0, err
575 }
576 for {
577 n, err := unix.SendmsgNInet6(fd.Sysfd, p, oob, sa, 0)
578 if err == syscall.EINTR {
579 continue
580 }
581 if err == syscall.EAGAIN && fd.pd.pollable() {
582 if err = fd.pd.waitWrite(fd.isFile); err == nil {
583 continue
584 }
585 }
586 if err != nil {
587 return n, 0, err
588 }
589 return n, len(oob), err
590 }
591 }
592
593 // Accept wraps the accept network call.
594 func (fd *FD) Accept() (int, syscall.Sockaddr, []byte, error) {
595 if err := fd.readLock(); err != nil {
596 return -1, nil, "", err
597 }
598 defer fd.readUnlock()
599
600 if err := fd.pd.prepareRead(fd.isFile); err != nil {
601 return -1, nil, "", err
602 }
603 for {
604 s, rsa, errcall, err := accept(fd.Sysfd)
605 if err == nil {
606 return s, rsa, "", err
607 }
608 switch err {
609 case syscall.EINTR:
610 continue
611 case syscall.EAGAIN:
612 if fd.pd.pollable() {
613 if err = fd.pd.waitRead(fd.isFile); err == nil {
614 continue
615 }
616 }
617 case syscall.ECONNABORTED:
618 // This means that a socket on the listen
619 // queue was closed before we Accept()ed it;
620 // it's a silly error, so try again.
621 continue
622 }
623 return -1, nil, errcall, err
624 }
625 }
626
627 // Fchmod wraps syscall.Fchmod.
628 func (fd *FD) Fchmod(mode uint32) error {
629 if err := fd.incref(); err != nil {
630 return err
631 }
632 defer fd.decref()
633 return ignoringEINTR(func() error {
634 return syscall.Fchmod(fd.Sysfd, mode)
635 })
636 }
637
638 // Fstat wraps syscall.Fstat
639 func (fd *FD) Fstat(s *syscall.Stat_t) error {
640 if err := fd.incref(); err != nil {
641 return err
642 }
643 defer fd.decref()
644 return ignoringEINTR(func() error {
645 return syscall.Fstat(fd.Sysfd, s)
646 })
647 }
648
649 // dupCloexecUnsupported indicates whether F_DUPFD_CLOEXEC is supported by the kernel.
650 var dupCloexecUnsupported atomic.Bool
651
652 // DupCloseOnExec dups fd and marks it close-on-exec.
653 func DupCloseOnExec(fd int) (int, []byte, error) {
654 if syscall.F_DUPFD_CLOEXEC != 0 && !dupCloexecUnsupported.Load() {
655 r0, err := unix.Fcntl(fd, syscall.F_DUPFD_CLOEXEC, 0)
656 if err == nil {
657 return r0, "", nil
658 }
659 switch err {
660 case syscall.EINVAL, syscall.ENOSYS:
661 // Old kernel, or js/wasm (which returns
662 // ENOSYS). Fall back to the portable way from
663 // now on.
664 dupCloexecUnsupported.Store(true)
665 default:
666 return -1, "fcntl", err
667 }
668 }
669 return dupCloseOnExecOld(fd)
670 }
671
672 // Dup duplicates the file descriptor.
673 func (fd *FD) Dup() (int, []byte, error) {
674 if err := fd.incref(); err != nil {
675 return -1, "", err
676 }
677 defer fd.decref()
678 return DupCloseOnExec(fd.Sysfd)
679 }
680
681 // On Unix variants only, expose the IO event for the net code.
682
683 // WaitWrite waits until data can be written to fd.
684 func (fd *FD) WaitWrite() error {
685 return fd.pd.waitWrite(fd.isFile)
686 }
687
688 // WriteOnce is for testing only. It makes a single write call.
689 func (fd *FD) WriteOnce(p []byte) (int, error) {
690 if err := fd.writeLock(); err != nil {
691 return 0, err
692 }
693 defer fd.writeUnlock()
694 return ignoringEINTRIO(syscall.Write, fd.Sysfd, p)
695 }
696
697 // RawRead invokes the user-defined function f for a read operation.
698 func (fd *FD) RawRead(f func(uintptr) bool) error {
699 if err := fd.readLock(); err != nil {
700 return err
701 }
702 defer fd.readUnlock()
703 if err := fd.pd.prepareRead(fd.isFile); err != nil {
704 return err
705 }
706 for {
707 if f(uintptr(fd.Sysfd)) {
708 return nil
709 }
710 if err := fd.pd.waitRead(fd.isFile); err != nil {
711 return err
712 }
713 }
714 }
715
716 // RawWrite invokes the user-defined function f for a write operation.
717 func (fd *FD) RawWrite(f func(uintptr) bool) error {
718 if err := fd.writeLock(); err != nil {
719 return err
720 }
721 defer fd.writeUnlock()
722 if err := fd.pd.prepareWrite(fd.isFile); err != nil {
723 return err
724 }
725 for {
726 if f(uintptr(fd.Sysfd)) {
727 return nil
728 }
729 if err := fd.pd.waitWrite(fd.isFile); err != nil {
730 return err
731 }
732 }
733 }
734
735 // ignoringEINTRIO is like ignoringEINTR, but just for IO calls.
736 func ignoringEINTRIO(fn func(fd int, p []byte) (int, error), fd int, p []byte) (int, error) {
737 for {
738 n, err := fn(fd, p)
739 if err != syscall.EINTR {
740 return n, err
741 }
742 }
743 }
744