1 // Copyright 2018 The gVisor Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 15 // Package tcpip provides the interfaces and related types that users of the
16 // tcpip stack will use in order to create endpoints used to send and receive
17 // data over the network stack.
18 //
19 // The starting point is the creation and configuration of a stack. A stack can
20 // be created by calling the New() function of the tcpip/stack/stack package;
21 // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()),
22 // adding network addresses (via calls to Stack.AddProtocolAddress()), and
23 // setting a route table (via a call to Stack.SetRouteTable()).
24 //
25 // Once a stack is configured, endpoints can be created by calling
26 // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect
27 // to peers, listen for connections, accept connections, etc., depending on the
28 // transport protocol selected.
29 package tcpip
30 31 import (
32 "bytes"
33 "errors"
34 "fmt"
35 "io"
36 "math"
37 "math/bits"
38 "net"
39 "reflect"
40 "strconv"
41 "strings"
42 "time"
43 44 "gvisor.dev/gvisor/pkg/atomicbitops"
45 "gvisor.dev/gvisor/pkg/rand"
46 "gvisor.dev/gvisor/pkg/sync"
47 "gvisor.dev/gvisor/pkg/waiter"
48 )
49 50 // Using the header package here would cause an import cycle.
51 const (
52 ipv4AddressSize = 4
53 ipv4ProtocolNumber = 0x0800
54 ipv6AddressSize = 16
55 ipv6ProtocolNumber = 0x86dd
56 )
57 58 const (
59 // LinkAddressSize is the size of a MAC address.
60 LinkAddressSize = 6
61 )
62 63 // Known IP address.
64 var (
65 IPv4Zero = []byte{0, 0, 0, 0}
66 IPv6Zero = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
67 )
68 69 // Errors related to Subnet
70 var (
71 errSubnetLengthMismatch = errors.New("subnet length of address and mask differ")
72 errSubnetAddressMasked = errors.New("subnet address has bits set outside the mask")
73 )
74 75 // ErrSaveRejection indicates a failed save due to unsupported networking state.
76 // This type of errors is only used for save logic.
77 type ErrSaveRejection struct {
78 Err error
79 }
80 81 // Error returns a sensible description of the save rejection error.
82 func (e *ErrSaveRejection) Error() string {
83 return "save rejected due to unsupported networking state: " + e.Err.Error()
84 }
85 86 // MonotonicTime is a monotonic clock reading.
87 //
88 // +stateify savable
89 type MonotonicTime struct {
90 nanoseconds int64
91 }
92 93 // String implements Stringer.
94 func (mt MonotonicTime) String() string {
95 return strconv.FormatInt(mt.nanoseconds, 10)
96 }
97 98 // MonotonicTimeInfinite returns the monotonic timestamp as far away in the
99 // future as possible.
100 func MonotonicTimeInfinite() MonotonicTime {
101 return MonotonicTime{nanoseconds: math.MaxInt64}
102 }
103 104 // Before reports whether the monotonic clock reading mt is before u.
105 func (mt MonotonicTime) Before(u MonotonicTime) bool {
106 return mt.nanoseconds < u.nanoseconds
107 }
108 109 // After reports whether the monotonic clock reading mt is after u.
110 func (mt MonotonicTime) After(u MonotonicTime) bool {
111 return mt.nanoseconds > u.nanoseconds
112 }
113 114 // Add returns the monotonic clock reading mt+d.
115 func (mt MonotonicTime) Add(d time.Duration) MonotonicTime {
116 return MonotonicTime{
117 nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(),
118 }
119 }
120 121 // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum)
122 // value that can be stored in a Duration, the maximum (or minimum) duration
123 // will be returned. To compute t-d for a duration d, use t.Add(-d).
124 func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration {
125 return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds))
126 }
127 128 // Milliseconds returns the time in milliseconds.
129 func (mt MonotonicTime) Milliseconds() int64 {
130 return mt.nanoseconds / 1e6
131 }
132 133 // A Clock provides the current time and schedules work for execution.
134 //
135 // Times returned by a Clock should always be used for application-visible
136 // time. Only monotonic times should be used for netstack internal timekeeping.
137 type Clock interface {
138 // Now returns the current local time.
139 Now() time.Time
140 141 // NowMonotonic returns the current monotonic clock reading.
142 NowMonotonic() MonotonicTime
143 144 // AfterFunc waits for the duration to elapse and then calls f in its own
145 // goroutine. It returns a Timer that can be used to cancel the call using
146 // its Stop method.
147 AfterFunc(d time.Duration, f func()) Timer
148 }
149 150 // Timer represents a single event. A Timer must be created with
151 // Clock.AfterFunc.
152 type Timer interface {
153 // Stop prevents the Timer from firing. It returns true if the call stops the
154 // timer, false if the timer has already expired or been stopped.
155 //
156 // If Stop returns false, then the timer has already expired and the function
157 // f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop
158 // does not wait for f to complete before returning. If the caller needs to
159 // know whether f is completed, it must coordinate with f explicitly.
160 Stop() bool
161 162 // Reset changes the timer to expire after duration d.
163 //
164 // Reset should be invoked only on stopped or expired timers. If the timer is
165 // known to have expired, Reset can be used directly. Otherwise, the caller
166 // must coordinate with the function f of Clock.AfterFunc(d, f).
167 Reset(d time.Duration)
168 }
169 170 // Address is a byte slice cast as a string that represents the address of a
171 // network node. Or, in the case of unix endpoints, it may represent a path.
172 //
173 // +stateify savable
174 type Address struct {
175 addr [16]byte
176 length int
177 }
178 179 // AddrFrom4 converts addr to an Address.
180 func AddrFrom4(addr [4]byte) Address {
181 ret := Address{
182 length: 4,
183 }
184 // It's guaranteed that copy will return 4.
185 copy(ret.addr[:], addr[:])
186 return ret
187 }
188 189 // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4.
190 func AddrFrom4Slice(addr []byte) Address {
191 if len(addr) != 4 {
192 panic(fmt.Sprintf("bad address length for address %v", addr))
193 }
194 ret := Address{
195 length: 4,
196 }
197 // It's guaranteed that copy will return 4.
198 copy(ret.addr[:], addr)
199 return ret
200 }
201 202 // AddrFrom16 converts addr to an Address.
203 func AddrFrom16(addr [16]byte) Address {
204 ret := Address{
205 length: 16,
206 }
207 // It's guaranteed that copy will return 16.
208 copy(ret.addr[:], addr[:])
209 return ret
210 }
211 212 // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16.
213 func AddrFrom16Slice(addr []byte) Address {
214 if len(addr) != 16 {
215 panic(fmt.Sprintf("bad address length for address %v", addr))
216 }
217 ret := Address{
218 length: 16,
219 }
220 // It's guaranteed that copy will return 16.
221 copy(ret.addr[:], addr)
222 return ret
223 }
224 225 // AddrFromSlice converts addr to an Address. It returns the Address zero value
226 // if len(addr) != 4 or 16.
227 func AddrFromSlice(addr []byte) Address {
228 switch len(addr) {
229 case ipv4AddressSize:
230 return AddrFrom4Slice(addr)
231 case ipv6AddressSize:
232 return AddrFrom16Slice(addr)
233 }
234 return Address{}
235 }
236 237 // As4 returns a as a 4 byte array. It panics if the address length is not 4.
238 func (a Address) As4() [4]byte {
239 if a.Len() != 4 {
240 panic(fmt.Sprintf("bad address length for address %v", a.addr))
241 }
242 return [4]byte(a.addr[:4])
243 }
244 245 // As16 returns a as a 16 byte array. It panics if the address length is not 16.
246 func (a Address) As16() [16]byte {
247 if a.Len() != 16 {
248 panic(fmt.Sprintf("bad address length for address %v", a.addr))
249 }
250 return [16]byte(a.addr[:16])
251 }
252 253 // AsSlice returns a as a byte slice. Callers should be careful as it can
254 // return a window into existing memory.
255 //
256 // +checkescape
257 func (a *Address) AsSlice() []byte {
258 return a.addr[:a.length]
259 }
260 261 // BitLen returns the length in bits of a.
262 func (a Address) BitLen() int {
263 return a.Len() * 8
264 }
265 266 // Len returns the length in bytes of a.
267 func (a Address) Len() int {
268 return a.length
269 }
270 271 // WithPrefix returns the address with a prefix that represents a point subnet.
272 func (a Address) WithPrefix() AddressWithPrefix {
273 return AddressWithPrefix{
274 Address: a,
275 PrefixLen: a.BitLen(),
276 }
277 }
278 279 // Unspecified returns true if the address is unspecified.
280 func (a Address) Unspecified() bool {
281 for _, b := range a.addr {
282 if b != 0 {
283 return false
284 }
285 }
286 return true
287 }
288 289 // Equal returns whether a and other are equal. It exists for use by the cmp
290 // library.
291 func (a Address) Equal(other Address) bool {
292 return a == other
293 }
294 295 // MatchingPrefix returns the matching prefix length in bits.
296 //
297 // Panics if b and a have different lengths.
298 func (a Address) MatchingPrefix(b Address) uint8 {
299 const bitsInAByte = 8
300 301 if a.Len() != b.Len() {
302 panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b))
303 }
304 305 var prefix uint8
306 for i := 0; i < a.length; i++ {
307 aByte := a.addr[i]
308 bByte := b.addr[i]
309 310 if aByte == bByte {
311 prefix += bitsInAByte
312 continue
313 }
314 315 // Count the remaining matching bits in the byte from MSbit to LSBbit.
316 mask := uint8(1) << (bitsInAByte - 1)
317 for {
318 if aByte&mask == bByte&mask {
319 prefix++
320 mask >>= 1
321 continue
322 }
323 324 break
325 }
326 327 break
328 }
329 330 return prefix
331 }
332 333 // AddressMask is a bitmask for an address.
334 //
335 // +stateify savable
336 type AddressMask struct {
337 mask [16]byte
338 length int
339 }
340 341 // MaskFrom returns a Mask based on str.
342 //
343 // MaskFrom may allocate, and so should not be in hot paths.
344 func MaskFrom(str string) AddressMask {
345 mask := AddressMask{length: len(str)}
346 copy(mask.mask[:], str)
347 return mask
348 }
349 350 // MaskFromBytes returns a Mask based on bs.
351 func MaskFromBytes(bs []byte) AddressMask {
352 mask := AddressMask{length: len(bs)}
353 copy(mask.mask[:], bs)
354 return mask
355 }
356 357 // String implements Stringer.
358 func (m AddressMask) String() string {
359 return fmt.Sprintf("%x", m.mask)
360 }
361 362 // AsSlice returns a as a byte slice. Callers should be careful as it can
363 // return a window into existing memory.
364 func (m *AddressMask) AsSlice() []byte {
365 return []byte(m.mask[:m.length])
366 }
367 368 // BitLen returns the length of the mask in bits.
369 func (m AddressMask) BitLen() int {
370 return m.length * 8
371 }
372 373 // Len returns the length of the mask in bytes.
374 func (m AddressMask) Len() int {
375 return m.length
376 }
377 378 // Prefix returns the number of bits before the first host bit.
379 func (m AddressMask) Prefix() int {
380 p := 0
381 for _, b := range m.mask[:m.length] {
382 p += bits.LeadingZeros8(^b)
383 }
384 return p
385 }
386 387 // Equal returns whether m and other are equal. It exists for use by the cmp
388 // library.
389 func (m AddressMask) Equal(other AddressMask) bool {
390 return m == other
391 }
392 393 // Subnet is a subnet defined by its address and mask.
394 //
395 // +stateify savable
396 type Subnet struct {
397 address Address
398 mask AddressMask
399 }
400 401 // NewSubnet creates a new Subnet, checking that the address and mask are the same length.
402 func NewSubnet(a Address, m AddressMask) (Subnet, error) {
403 if a.Len() != m.Len() {
404 return Subnet{}, errSubnetLengthMismatch
405 }
406 for i := 0; i < a.Len(); i++ {
407 if a.addr[i]&^m.mask[i] != 0 {
408 return Subnet{}, errSubnetAddressMasked
409 }
410 }
411 return Subnet{a, m}, nil
412 }
413 414 // String implements Stringer.
415 func (s Subnet) String() string {
416 return fmt.Sprintf("%s/%d", s.ID(), s.Prefix())
417 }
418 419 // Contains returns true iff the address is of the same length and matches the
420 // subnet address and mask.
421 func (s *Subnet) Contains(a Address) bool {
422 if a.Len() != s.address.Len() {
423 return false
424 }
425 for i := 0; i < a.Len(); i++ {
426 if a.addr[i]&s.mask.mask[i] != s.address.addr[i] {
427 return false
428 }
429 }
430 return true
431 }
432 433 // ID returns the subnet ID.
434 func (s *Subnet) ID() Address {
435 return s.address
436 }
437 438 // Bits returns the number of ones (network bits) and zeros (host bits) in the
439 // subnet mask.
440 func (s *Subnet) Bits() (ones int, zeros int) {
441 ones = s.mask.Prefix()
442 return ones, s.mask.BitLen() - ones
443 }
444 445 // Prefix returns the number of bits before the first host bit.
446 func (s *Subnet) Prefix() int {
447 return s.mask.Prefix()
448 }
449 450 // Mask returns the subnet mask.
451 func (s *Subnet) Mask() AddressMask {
452 return s.mask
453 }
454 455 // Broadcast returns the subnet's broadcast address.
456 func (s *Subnet) Broadcast() Address {
457 addrCopy := s.address
458 for i := 0; i < addrCopy.Len(); i++ {
459 addrCopy.addr[i] |= ^s.mask.mask[i]
460 }
461 return addrCopy
462 }
463 464 // IsBroadcast returns true if the address is considered a broadcast address.
465 func (s *Subnet) IsBroadcast(address Address) bool {
466 // Only IPv4 supports the notion of a broadcast address.
467 if address.Len() != ipv4AddressSize {
468 return false
469 }
470 471 // Normally, we would just compare address with the subnet's broadcast
472 // address but there is an exception where a simple comparison is not
473 // correct. This exception is for /31 and /32 IPv4 subnets where all
474 // addresses are considered valid host addresses.
475 //
476 // For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that
477 // both addresses in a /31 subnet "MUST be interpreted as host addresses."
478 //
479 // For /32, the case is a bit more vague. RFC 3021 makes no mention of /32
480 // subnets. However, the same reasoning applies - if an exception is not
481 // made, then there do not exist any host addresses in a /32 subnet. RFC
482 // 4632 Section 3.1 also vaguely implies this interpretation by referring
483 // to addresses in /32 subnets as "host routes."
484 return s.Prefix() <= 30 && s.Broadcast() == address
485 }
486 487 // Equal returns true if this Subnet is equal to the given Subnet.
488 func (s Subnet) Equal(o Subnet) bool {
489 // If this changes, update Route.Equal accordingly.
490 return s == o
491 }
492 493 // NICID is a number that uniquely identifies a NIC.
494 type NICID int32
495 496 // ShutdownFlags represents flags that can be passed to the Shutdown() method
497 // of the Endpoint interface.
498 type ShutdownFlags int
499 500 // Values of the flags that can be passed to the Shutdown() method. They can
501 // be OR'ed together.
502 const (
503 ShutdownRead ShutdownFlags = 1 << iota
504 ShutdownWrite
505 )
506 507 // PacketType is used to indicate the destination of the packet.
508 type PacketType uint8
509 510 const (
511 // PacketHost indicates a packet addressed to the local host.
512 PacketHost PacketType = iota
513 514 // PacketOtherHost indicates an outgoing packet addressed to
515 // another host caught by a NIC in promiscuous mode.
516 PacketOtherHost
517 518 // PacketOutgoing for a packet originating from the local host
519 // that is looped back to a packet socket.
520 PacketOutgoing
521 522 // PacketBroadcast indicates a link layer broadcast packet.
523 PacketBroadcast
524 525 // PacketMulticast indicates a link layer multicast packet.
526 PacketMulticast
527 )
528 529 // FullAddress represents a full transport node address, as required by the
530 // Connect() and Bind() methods.
531 //
532 // +stateify savable
533 type FullAddress struct {
534 // NIC is the ID of the NIC this address refers to.
535 //
536 // This may not be used by all endpoint types.
537 NIC NICID
538 539 // Addr is the network address.
540 Addr Address
541 542 // Port is the transport port.
543 //
544 // This may not be used by all endpoint types.
545 Port uint16
546 547 // LinkAddr is the link layer address.
548 LinkAddr LinkAddress
549 }
550 551 // Payloader is an interface that provides data.
552 //
553 // This interface allows the endpoint to request the amount of data it needs
554 // based on internal buffers without exposing them.
555 type Payloader interface {
556 io.Reader
557 558 // Len returns the number of bytes of the unread portion of the
559 // Reader.
560 Len() int
561 }
562 563 var _ Payloader = (*bytes.Buffer)(nil)
564 var _ Payloader = (*bytes.Reader)(nil)
565 566 var _ io.Writer = (*SliceWriter)(nil)
567 568 // SliceWriter implements io.Writer for slices.
569 type SliceWriter []byte
570 571 // Write implements io.Writer.Write.
572 func (s *SliceWriter) Write(b []byte) (int, error) {
573 n := copy(*s, b)
574 *s = (*s)[n:]
575 var err error
576 if n != len(b) {
577 err = io.ErrShortWrite
578 }
579 return n, err
580 }
581 582 var _ io.Writer = (*LimitedWriter)(nil)
583 584 // A LimitedWriter writes to W but limits the amount of data copied to just N
585 // bytes. Each call to Write updates N to reflect the new amount remaining.
586 type LimitedWriter struct {
587 W io.Writer
588 N int64
589 }
590 591 func (l *LimitedWriter) Write(p []byte) (int, error) {
592 pLen := int64(len(p))
593 if pLen > l.N {
594 p = p[:l.N]
595 }
596 n, err := l.W.Write(p)
597 n64 := int64(n)
598 if err == nil && n64 != pLen {
599 err = io.ErrShortWrite
600 }
601 l.N -= n64
602 return n, err
603 }
604 605 // SendableControlMessages contains socket control messages that can be written.
606 //
607 // +stateify savable
608 type SendableControlMessages struct {
609 // HasTTL indicates whether TTL is valid/set.
610 HasTTL bool
611 612 // TTL is the IPv4 Time To Live of the associated packet.
613 TTL uint8
614 615 // HasHopLimit indicates whether HopLimit is valid/set.
616 HasHopLimit bool
617 618 // HopLimit is the IPv6 Hop Limit of the associated packet.
619 HopLimit uint8
620 621 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
622 HasIPv6PacketInfo bool
623 624 // IPv6PacketInfo holds interface and address data on an incoming packet.
625 IPv6PacketInfo IPv6PacketInfo
626 }
627 628 // ReceivableControlMessages contains socket control messages that can be
629 // received.
630 //
631 // +stateify savable
632 type ReceivableControlMessages struct {
633 // Timestamp is the time that the last packet used to create the read data
634 // was received.
635 Timestamp time.Time `state:".(int64)"`
636 637 // HasInq indicates whether Inq is valid/set.
638 HasInq bool
639 640 // Inq is the number of bytes ready to be received.
641 Inq int32
642 643 // HasTOS indicates whether TOS is valid/set.
644 HasTOS bool
645 646 // TOS is the IPv4 type of service of the associated packet.
647 TOS uint8
648 649 // HasTTL indicates whether TTL is valid/set.
650 HasTTL bool
651 652 // TTL is the IPv4 Time To Live of the associated packet.
653 TTL uint8
654 655 // HasHopLimit indicates whether HopLimit is valid/set.
656 HasHopLimit bool
657 658 // HopLimit is the IPv6 Hop Limit of the associated packet.
659 HopLimit uint8
660 661 // HasTimestamp indicates whether Timestamp is valid/set.
662 HasTimestamp bool
663 664 // HasTClass indicates whether TClass is valid/set.
665 HasTClass bool
666 667 // TClass is the IPv6 traffic class of the associated packet.
668 TClass uint32
669 670 // HasIPPacketInfo indicates whether PacketInfo is set.
671 HasIPPacketInfo bool
672 673 // PacketInfo holds interface and address data on an incoming packet.
674 PacketInfo IPPacketInfo
675 676 // HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
677 HasIPv6PacketInfo bool
678 679 // IPv6PacketInfo holds interface and address data on an incoming packet.
680 IPv6PacketInfo IPv6PacketInfo
681 682 // HasOriginalDestinationAddress indicates whether OriginalDstAddress is
683 // set.
684 HasOriginalDstAddress bool
685 686 // OriginalDestinationAddress holds the original destination address
687 // and port of the incoming packet.
688 OriginalDstAddress FullAddress
689 690 // SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE).
691 SockErr *SockError
692 }
693 694 // PacketOwner is used to get UID and GID of the packet.
695 type PacketOwner interface {
696 // KUID returns KUID of the packet.
697 KUID() uint32
698 699 // KGID returns KGID of the packet.
700 KGID() uint32
701 }
702 703 // ReadOptions contains options for Endpoint.Read.
704 type ReadOptions struct {
705 // Peek indicates whether this read is a peek.
706 Peek bool
707 708 // NeedRemoteAddr indicates whether to return the remote address, if
709 // supported.
710 NeedRemoteAddr bool
711 712 // NeedLinkPacketInfo indicates whether to return the link-layer information,
713 // if supported.
714 NeedLinkPacketInfo bool
715 }
716 717 // ReadResult represents result for a successful Endpoint.Read.
718 type ReadResult struct {
719 // Count is the number of bytes received and written to the buffer.
720 Count int
721 722 // Total is the number of bytes of the received packet. This can be used to
723 // determine whether the read is truncated.
724 Total int
725 726 // ControlMessages is the control messages received.
727 ControlMessages ReceivableControlMessages
728 729 // RemoteAddr is the remote address if ReadOptions.NeedAddr is true.
730 RemoteAddr FullAddress
731 732 // LinkPacketInfo is the link-layer information of the received packet if
733 // ReadOptions.NeedLinkPacketInfo is true.
734 LinkPacketInfo LinkPacketInfo
735 }
736 737 // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp)
738 // that exposes functionality like read, write, connect, etc. to users of the
739 // networking stack.
740 type Endpoint interface {
741 // Close puts the endpoint in a closed state and frees all resources
742 // associated with it. Close initiates the teardown process, the
743 // Endpoint may not be fully closed when Close returns.
744 Close()
745 746 // Abort initiates an expedited endpoint teardown. As compared to
747 // Close, Abort prioritizes closing the Endpoint quickly over cleanly.
748 // Abort is best effort; implementing Abort with Close is acceptable.
749 Abort()
750 751 // Read reads data from the endpoint and optionally writes to dst.
752 //
753 // This method does not block if there is no data pending; in this case,
754 // ErrWouldBlock is returned.
755 //
756 // If non-zero number of bytes are successfully read and written to dst, err
757 // must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer
758 // should be returned.
759 Read(io.Writer, ReadOptions) (ReadResult, Error)
760 761 // Write writes data to the endpoint's peer. This method does not block if
762 // the data cannot be written.
763 //
764 // Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes
765 // successfully written to the Endpoint. That is, if a call to
766 // Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and
767 // the caller should not use data[:n] after Write returns.
768 //
769 // Note that unlike io.Writer.Write, it is not an error for Write to
770 // perform a partial write (if n > 0, no error may be returned). Only
771 // stream (TCP) Endpoints may return partial writes, and even then only
772 // in the case where writing additional data would block. Other Endpoints
773 // will either write the entire message or return an error.
774 Write(Payloader, WriteOptions) (int64, Error)
775 776 // Connect connects the endpoint to its peer. Specifying a NIC is
777 // optional.
778 //
779 // There are three classes of return values:
780 // nil -- the attempt to connect succeeded.
781 // ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started
782 // but hasn't completed yet. In this case, the caller must call Connect
783 // or GetSockOpt(ErrorOption) when the endpoint becomes writable to
784 // get the actual result. The first call to Connect after the socket has
785 // connected returns nil. Calling connect again results in ErrAlreadyConnected.
786 // Anything else -- the attempt to connect failed.
787 //
788 // If address.Addr is empty, this means that Endpoint has to be
789 // disconnected if this is supported, otherwise
790 // ErrAddressFamilyNotSupported must be returned.
791 Connect(address FullAddress) Error
792 793 // Disconnect disconnects the endpoint from its peer.
794 Disconnect() Error
795 796 // Shutdown closes the read and/or write end of the endpoint connection
797 // to its peer.
798 Shutdown(flags ShutdownFlags) Error
799 800 // Listen puts the endpoint in "listen" mode, which allows it to accept
801 // new connections.
802 Listen(backlog int) Error
803 804 // Accept returns a new endpoint if a peer has established a connection
805 // to an endpoint previously set to listen mode. This method does not
806 // block if no new connections are available.
807 //
808 // The returned Queue is the wait queue for the newly created endpoint.
809 //
810 // If peerAddr is not nil then it is populated with the peer address of the
811 // returned endpoint.
812 Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error)
813 814 // Bind binds the endpoint to a specific local address and port.
815 // Specifying a NIC is optional.
816 Bind(address FullAddress) Error
817 818 // GetLocalAddress returns the address to which the endpoint is bound.
819 GetLocalAddress() (FullAddress, Error)
820 821 // GetRemoteAddress returns the address to which the endpoint is
822 // connected.
823 GetRemoteAddress() (FullAddress, Error)
824 825 // Readiness returns the current readiness of the endpoint. For example,
826 // if waiter.EventIn is set, the endpoint is immediately readable.
827 Readiness(mask waiter.EventMask) waiter.EventMask
828 829 // SetSockOpt sets a socket option.
830 SetSockOpt(opt SettableSocketOption) Error
831 832 // SetSockOptInt sets a socket option, for simple cases where a value
833 // has the int type.
834 SetSockOptInt(opt SockOptInt, v int) Error
835 836 // GetSockOpt gets a socket option.
837 GetSockOpt(opt GettableSocketOption) Error
838 839 // GetSockOptInt gets a socket option for simple cases where a return
840 // value has the int type.
841 GetSockOptInt(SockOptInt) (int, Error)
842 843 // State returns a socket's lifecycle state. The returned value is
844 // protocol-specific and is primarily used for diagnostics.
845 State() uint32
846 847 // ModerateRecvBuf should be called everytime data is copied to the user
848 // space. This allows for dynamic tuning of recv buffer space for a
849 // given socket.
850 //
851 // NOTE: This method is a no-op for sockets other than TCP.
852 ModerateRecvBuf(copied int)
853 854 // Info returns a copy to the transport endpoint info.
855 Info() EndpointInfo
856 857 // Stats returns a reference to the endpoint stats.
858 Stats() EndpointStats
859 860 // SetOwner sets the task owner to the endpoint owner.
861 SetOwner(owner PacketOwner)
862 863 // LastError clears and returns the last error reported by the endpoint.
864 LastError() Error
865 866 // SocketOptions returns the structure which contains all the socket
867 // level options.
868 SocketOptions() *SocketOptions
869 }
870 871 // EndpointWithPreflight is the interface implemented by endpoints that need
872 // to expose the `Preflight` method for preparing the endpoint prior to
873 // calling `Write`.
874 type EndpointWithPreflight interface {
875 // Prepares the endpoint for writes using the provided WriteOptions,
876 // returning an error if the options were incompatible with the endpoint's
877 // current state.
878 Preflight(WriteOptions) Error
879 }
880 881 // LinkPacketInfo holds Link layer information for a received packet.
882 //
883 // +stateify savable
884 type LinkPacketInfo struct {
885 // Protocol is the NetworkProtocolNumber for the packet.
886 Protocol NetworkProtocolNumber
887 888 // PktType is used to indicate the destination of the packet.
889 PktType PacketType
890 }
891 892 // EndpointInfo is the interface implemented by each endpoint info struct.
893 type EndpointInfo interface {
894 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
895 // marker interface.
896 IsEndpointInfo()
897 }
898 899 // EndpointStats is the interface implemented by each endpoint stats struct.
900 type EndpointStats interface {
901 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats
902 // marker interface.
903 IsEndpointStats()
904 }
905 906 // WriteOptions contains options for Endpoint.Write.
907 type WriteOptions struct {
908 // If To is not nil, write to the given address instead of the endpoint's
909 // peer.
910 To *FullAddress
911 912 // More has the same semantics as Linux's MSG_MORE.
913 More bool
914 915 // EndOfRecord has the same semantics as Linux's MSG_EOR.
916 EndOfRecord bool
917 918 // Atomic means that all data fetched from Payloader must be written to the
919 // endpoint. If Atomic is false, then data fetched from the Payloader may be
920 // discarded if available endpoint buffer space is insufficient.
921 Atomic bool
922 923 // ControlMessages contains optional overrides used when writing a packet.
924 ControlMessages SendableControlMessages
925 }
926 927 // SockOptInt represents socket options which values have the int type.
928 type SockOptInt int
929 930 const (
931 // KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to
932 // specify the number of un-ACKed TCP keepalives that will be sent
933 // before the connection is closed.
934 KeepaliveCountOption SockOptInt = iota
935 936 // IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS
937 // for all subsequent outgoing IPv4 packets from the endpoint.
938 IPv4TOSOption
939 940 // IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to
941 // specify TOS for all subsequent outgoing IPv6 packets from the
942 // endpoint.
943 IPv6TrafficClassOption
944 945 // MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the
946 // current Maximum Segment Size(MSS) value as specified using the
947 // TCP_MAXSEG option.
948 MaxSegOption
949 950 // MTUDiscoverOption is used to set/get the path MTU discovery setting.
951 //
952 // NOTE: Setting this option to any other value than PMTUDiscoveryDont
953 // is not supported and will fail as such, and getting this option will
954 // always return PMTUDiscoveryDont.
955 MTUDiscoverOption
956 957 // MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control
958 // the default TTL value for multicast messages. The default is 1.
959 MulticastTTLOption
960 961 // ReceiveQueueSizeOption is used in GetSockOptInt to specify that the
962 // number of unread bytes in the input buffer should be returned.
963 ReceiveQueueSizeOption
964 965 // SendQueueSizeOption is used in GetSockOptInt to specify that the
966 // number of unread bytes in the output buffer should be returned.
967 SendQueueSizeOption
968 969 // IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default
970 // TTL value for unicast messages.
971 //
972 // The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value
973 // configures the endpoint to use the default.
974 IPv4TTLOption
975 976 // IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the
977 // default hop limit value for unicast messages.
978 //
979 // The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit
980 // value configures the endpoint to use the default.
981 IPv6HopLimitOption
982 983 // TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify
984 // the number of SYN retransmits that TCP should send before aborting
985 // the attempt to connect. It cannot exceed 255.
986 //
987 // NOTE: This option is currently only stubbed out and is no-op.
988 TCPSynCountOption
989 990 // TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound
991 // the size of the advertised window to this value.
992 //
993 // NOTE: This option is currently only stubed out and is a no-op
994 TCPWindowClampOption
995 996 // IPv6Checksum is used to request the stack to populate and validate the IPv6
997 // checksum for transport level headers.
998 IPv6Checksum
999 1000 // PacketMMapVersionOption is used to set the packet mmap version.
1001 PacketMMapVersionOption
1002 1003 // PacketMMapReserveOption is used to set the packet mmap reserved space
1004 // between the aligned header and the payload.
1005 PacketMMapReserveOption
1006 )
1007 1008 const (
1009 // UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to
1010 // use the default ttl currently configured by the IPv4 protocol (see
1011 // DefaultTTLOption).
1012 UseDefaultIPv4TTL = 0
1013 1014 // UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an
1015 // endpoint to use the default hop limit currently configured by the IPv6
1016 // protocol (see DefaultTTLOption).
1017 UseDefaultIPv6HopLimit = -1
1018 )
1019 1020 // PMTUDStrategy is the kind of PMTUD to perform.
1021 type PMTUDStrategy int
1022 1023 const (
1024 // PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use
1025 // per-route settings.
1026 PMTUDiscoveryWant PMTUDStrategy = iota
1027 1028 // PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable
1029 // path MTU discovery.
1030 PMTUDiscoveryDont
1031 1032 // PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do
1033 // path MTU discovery.
1034 PMTUDiscoveryDo
1035 1036 // PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF
1037 // but ignore path MTU.
1038 PMTUDiscoveryProbe
1039 )
1040 1041 // GettableNetworkProtocolOption is a marker interface for network protocol
1042 // options that may be queried.
1043 type GettableNetworkProtocolOption interface {
1044 isGettableNetworkProtocolOption()
1045 }
1046 1047 // SettableNetworkProtocolOption is a marker interface for network protocol
1048 // options that may be set.
1049 type SettableNetworkProtocolOption interface {
1050 isSettableNetworkProtocolOption()
1051 }
1052 1053 // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
1054 // a default TTL.
1055 type DefaultTTLOption uint8
1056 1057 func (*DefaultTTLOption) isGettableNetworkProtocolOption() {}
1058 1059 func (*DefaultTTLOption) isSettableNetworkProtocolOption() {}
1060 1061 // GettableTransportProtocolOption is a marker interface for transport protocol
1062 // options that may be queried.
1063 type GettableTransportProtocolOption interface {
1064 isGettableTransportProtocolOption()
1065 }
1066 1067 // SettableTransportProtocolOption is a marker interface for transport protocol
1068 // options that may be set.
1069 type SettableTransportProtocolOption interface {
1070 isSettableTransportProtocolOption()
1071 }
1072 1073 // TCPSACKEnabled the SACK option for TCP.
1074 //
1075 // See: https://tools.ietf.org/html/rfc2018.
1076 type TCPSACKEnabled bool
1077 1078 func (*TCPSACKEnabled) isGettableTransportProtocolOption() {}
1079 1080 func (*TCPSACKEnabled) isSettableTransportProtocolOption() {}
1081 1082 // TCPRecovery is the loss deteoction algorithm used by TCP.
1083 type TCPRecovery int32
1084 1085 func (*TCPRecovery) isGettableTransportProtocolOption() {}
1086 1087 func (*TCPRecovery) isSettableTransportProtocolOption() {}
1088 1089 // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies.
1090 type TCPAlwaysUseSynCookies bool
1091 1092 func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {}
1093 1094 func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {}
1095 1096 const (
1097 // TCPRACKLossDetection indicates RACK is used for loss detection and
1098 // recovery.
1099 TCPRACKLossDetection TCPRecovery = 1 << iota
1100 1101 // TCPRACKStaticReoWnd indicates the reordering window should not be
1102 // adjusted when DSACK is received.
1103 TCPRACKStaticReoWnd
1104 1105 // TCPRACKNoDupTh indicates RACK should not consider the classic three
1106 // duplicate acknowledgements rule to mark the segments as lost. This
1107 // is used when reordering is not detected.
1108 TCPRACKNoDupTh
1109 )
1110 1111 // TCPDelayEnabled enables/disables Nagle's algorithm in TCP.
1112 type TCPDelayEnabled bool
1113 1114 func (*TCPDelayEnabled) isGettableTransportProtocolOption() {}
1115 1116 func (*TCPDelayEnabled) isSettableTransportProtocolOption() {}
1117 1118 // TCPSendBufferSizeRangeOption is the send buffer size range for TCP.
1119 //
1120 // +stateify savable
1121 type TCPSendBufferSizeRangeOption struct {
1122 Min int
1123 Default int
1124 Max int
1125 }
1126 1127 func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {}
1128 1129 func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {}
1130 1131 // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP.
1132 //
1133 // +stateify savable
1134 type TCPReceiveBufferSizeRangeOption struct {
1135 Min int
1136 Default int
1137 Max int
1138 }
1139 1140 func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {}
1141 1142 func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {}
1143 1144 // TCPAvailableCongestionControlOption is the supported congestion control
1145 // algorithms for TCP
1146 type TCPAvailableCongestionControlOption string
1147 1148 func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {}
1149 1150 func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {}
1151 1152 // TCPModerateReceiveBufferOption enables/disables receive buffer moderation
1153 // for TCP.
1154 type TCPModerateReceiveBufferOption bool
1155 1156 func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {}
1157 1158 func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {}
1159 1160 // GettableSocketOption is a marker interface for socket options that may be
1161 // queried.
1162 type GettableSocketOption interface {
1163 isGettableSocketOption()
1164 }
1165 1166 // SettableSocketOption is a marker interface for socket options that may be
1167 // configured.
1168 type SettableSocketOption interface {
1169 isSettableSocketOption()
1170 }
1171 1172 // ICMPv6Filter specifies a filter for ICMPv6 types.
1173 //
1174 // +stateify savable
1175 type ICMPv6Filter struct {
1176 // DenyType indicates if an ICMP type should be blocked.
1177 //
1178 // The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6
1179 // types.
1180 DenyType [8]uint32
1181 }
1182 1183 // ShouldDeny returns true iff the ICMPv6 Type should be denied.
1184 func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool {
1185 const bitsInUint32 = 32
1186 i := icmpType / bitsInUint32
1187 b := icmpType % bitsInUint32
1188 return f.DenyType[i]&(1<<b) != 0
1189 }
1190 1191 func (*ICMPv6Filter) isGettableSocketOption() {}
1192 1193 func (*ICMPv6Filter) isSettableSocketOption() {}
1194 1195 // TpacketReq is the tpacket_req structure as described in
1196 // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
1197 //
1198 // +stateify savable
1199 type TpacketReq struct {
1200 TpBlockSize uint32
1201 TpBlockNr uint32
1202 TpFrameSize uint32
1203 TpFrameNr uint32
1204 }
1205 1206 func (*TpacketReq) isSettableSocketOption() {}
1207 1208 // TpacketStats is the statistics for a packet_mmap ring buffer from
1209 // <linux/if_packet.h>.
1210 //
1211 // +stateify savable
1212 type TpacketStats struct {
1213 Packets uint32
1214 Dropped uint32
1215 }
1216 1217 func (*TpacketStats) isGettableSocketOption() {}
1218 1219 // EndpointState represents the state of an endpoint.
1220 type EndpointState uint8
1221 1222 // CongestionControlState indicates the current congestion control state for
1223 // TCP sender.
1224 type CongestionControlState int
1225 1226 const (
1227 // Open indicates that the sender is receiving acks in order and
1228 // no loss or dupACK's etc have been detected.
1229 Open CongestionControlState = iota
1230 // RTORecovery indicates that an RTO has occurred and the sender
1231 // has entered an RTO based recovery phase.
1232 RTORecovery
1233 // FastRecovery indicates that the sender has entered FastRecovery
1234 // based on receiving nDupAck's. This state is entered only when
1235 // SACK is not in use.
1236 FastRecovery
1237 // SACKRecovery indicates that the sender has entered SACK based
1238 // recovery.
1239 SACKRecovery
1240 // Disorder indicates the sender either received some SACK blocks
1241 // or dupACK's.
1242 Disorder
1243 )
1244 1245 // TCPInfoOption is used by GetSockOpt to expose TCP statistics.
1246 type TCPInfoOption struct {
1247 // RTT is the smoothed round trip time.
1248 RTT time.Duration
1249 1250 // RTTVar is the round trip time variation.
1251 RTTVar time.Duration
1252 1253 // RTO is the retransmission timeout for the endpoint.
1254 RTO time.Duration
1255 1256 // State is the current endpoint protocol state.
1257 State EndpointState
1258 1259 // CcState is the congestion control state.
1260 CcState CongestionControlState
1261 1262 // SndCwnd is the congestion window, in packets.
1263 SndCwnd uint32
1264 1265 // SndSsthresh is the threshold between slow start and congestion
1266 // avoidance.
1267 SndSsthresh uint32
1268 1269 // ReorderSeen indicates if reordering is seen in the endpoint.
1270 ReorderSeen bool
1271 }
1272 1273 func (*TCPInfoOption) isGettableSocketOption() {}
1274 1275 // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a
1276 // connection must remain idle before the first TCP keepalive packet is sent.
1277 // Once this time is reached, KeepaliveIntervalOption is used instead.
1278 type KeepaliveIdleOption time.Duration
1279 1280 func (*KeepaliveIdleOption) isGettableSocketOption() {}
1281 1282 func (*KeepaliveIdleOption) isSettableSocketOption() {}
1283 1284 // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the
1285 // interval between sending TCP keepalive packets.
1286 type KeepaliveIntervalOption time.Duration
1287 1288 func (*KeepaliveIntervalOption) isGettableSocketOption() {}
1289 1290 func (*KeepaliveIntervalOption) isSettableSocketOption() {}
1291 1292 // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user
1293 // specified timeout for a given TCP connection.
1294 // See: RFC5482 for details.
1295 type TCPUserTimeoutOption time.Duration
1296 1297 func (*TCPUserTimeoutOption) isGettableSocketOption() {}
1298 1299 func (*TCPUserTimeoutOption) isSettableSocketOption() {}
1300 1301 // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get
1302 // the current congestion control algorithm.
1303 type CongestionControlOption string
1304 1305 func (*CongestionControlOption) isGettableSocketOption() {}
1306 1307 func (*CongestionControlOption) isSettableSocketOption() {}
1308 1309 func (*CongestionControlOption) isGettableTransportProtocolOption() {}
1310 1311 func (*CongestionControlOption) isSettableTransportProtocolOption() {}
1312 1313 // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
1314 // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state
1315 // before being marked closed.
1316 type TCPLingerTimeoutOption time.Duration
1317 1318 func (*TCPLingerTimeoutOption) isGettableSocketOption() {}
1319 1320 func (*TCPLingerTimeoutOption) isSettableSocketOption() {}
1321 1322 func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {}
1323 1324 func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {}
1325 1326 // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
1327 // maximum duration for which a socket lingers in the TIME_WAIT state
1328 // before being marked closed.
1329 type TCPTimeWaitTimeoutOption time.Duration
1330 1331 func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {}
1332 1333 func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {}
1334 1335 func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {}
1336 1337 func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {}
1338 1339 // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a
1340 // accept to return a completed connection only when there is data to be
1341 // read. This usually means the listening socket will drop the final ACK
1342 // for a handshake till the specified timeout until a segment with data arrives.
1343 type TCPDeferAcceptOption time.Duration
1344 1345 func (*TCPDeferAcceptOption) isGettableSocketOption() {}
1346 1347 func (*TCPDeferAcceptOption) isSettableSocketOption() {}
1348 1349 // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
1350 // default MinRTO used by the Stack.
1351 type TCPMinRTOOption time.Duration
1352 1353 func (*TCPMinRTOOption) isGettableSocketOption() {}
1354 1355 func (*TCPMinRTOOption) isSettableSocketOption() {}
1356 1357 func (*TCPMinRTOOption) isGettableTransportProtocolOption() {}
1358 1359 func (*TCPMinRTOOption) isSettableTransportProtocolOption() {}
1360 1361 // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
1362 // default MaxRTO used by the Stack.
1363 type TCPMaxRTOOption time.Duration
1364 1365 func (*TCPMaxRTOOption) isGettableSocketOption() {}
1366 1367 func (*TCPMaxRTOOption) isSettableSocketOption() {}
1368 1369 func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {}
1370 1371 func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {}
1372 1373 // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
1374 // maximum number of retransmits after which we time out the connection.
1375 type TCPMaxRetriesOption uint64
1376 1377 func (*TCPMaxRetriesOption) isGettableSocketOption() {}
1378 1379 func (*TCPMaxRetriesOption) isSettableSocketOption() {}
1380 1381 func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {}
1382 1383 func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {}
1384 1385 // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
1386 // default for number of times SYN is retransmitted before aborting a connect.
1387 type TCPSynRetriesOption uint8
1388 1389 func (*TCPSynRetriesOption) isGettableSocketOption() {}
1390 1391 func (*TCPSynRetriesOption) isSettableSocketOption() {}
1392 1393 func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {}
1394 1395 func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {}
1396 1397 // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
1398 // default interface for multicast.
1399 type MulticastInterfaceOption struct {
1400 NIC NICID
1401 InterfaceAddr Address
1402 }
1403 1404 func (*MulticastInterfaceOption) isGettableSocketOption() {}
1405 1406 func (*MulticastInterfaceOption) isSettableSocketOption() {}
1407 1408 // MembershipOption is used to identify a multicast membership on an interface.
1409 type MembershipOption struct {
1410 NIC NICID
1411 InterfaceAddr Address
1412 MulticastAddr Address
1413 }
1414 1415 // AddMembershipOption identifies a multicast group to join on some interface.
1416 type AddMembershipOption MembershipOption
1417 1418 func (*AddMembershipOption) isSettableSocketOption() {}
1419 1420 // RemoveMembershipOption identifies a multicast group to leave on some
1421 // interface.
1422 type RemoveMembershipOption MembershipOption
1423 1424 func (*RemoveMembershipOption) isSettableSocketOption() {}
1425 1426 // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached
1427 // classic BPF filter on a given endpoint.
1428 type SocketDetachFilterOption int
1429 1430 func (*SocketDetachFilterOption) isSettableSocketOption() {}
1431 1432 // OriginalDestinationOption is used to get the original destination address
1433 // and port of a redirected packet.
1434 type OriginalDestinationOption FullAddress
1435 1436 func (*OriginalDestinationOption) isGettableSocketOption() {}
1437 1438 // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to
1439 // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for
1440 // new connections when it is safe from protocol viewpoint.
1441 type TCPTimeWaitReuseOption uint8
1442 1443 func (*TCPTimeWaitReuseOption) isGettableSocketOption() {}
1444 1445 func (*TCPTimeWaitReuseOption) isSettableSocketOption() {}
1446 1447 func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {}
1448 1449 func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {}
1450 1451 const (
1452 // TCPTimeWaitReuseDisabled indicates reuse of port bound by endpoints in TIME-WAIT cannot
1453 // be reused for new connections.
1454 TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota
1455 1456 // TCPTimeWaitReuseGlobal indicates reuse of port bound by endpoints in TIME-WAIT can
1457 // be reused for new connections irrespective of the src/dest addresses.
1458 TCPTimeWaitReuseGlobal
1459 1460 // TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can
1461 // only be reused if the connection was a connection over loopback. i.e. src/dest addresses
1462 // are loopback addresses.
1463 TCPTimeWaitReuseLoopbackOnly
1464 )
1465 1466 // LingerOption is used by SetSockOpt/GetSockOpt to set/get the
1467 // duration for which a socket lingers before returning from Close.
1468 //
1469 // +marshal
1470 // +stateify savable
1471 type LingerOption struct {
1472 Enabled bool
1473 Timeout time.Duration
1474 }
1475 1476 // IPPacketInfo is the message structure for IP_PKTINFO.
1477 //
1478 // +stateify savable
1479 type IPPacketInfo struct {
1480 // NIC is the ID of the NIC to be used.
1481 NIC NICID
1482 1483 // LocalAddr is the local address.
1484 LocalAddr Address
1485 1486 // DestinationAddr is the destination address found in the IP header.
1487 DestinationAddr Address
1488 }
1489 1490 // IPv6PacketInfo is the message structure for IPV6_PKTINFO.
1491 //
1492 // +stateify savable
1493 type IPv6PacketInfo struct {
1494 Addr Address
1495 NIC NICID
1496 }
1497 1498 // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to
1499 // get/set the default, min and max send buffer sizes.
1500 //
1501 // +stateify savable
1502 type SendBufferSizeOption struct {
1503 // Min is the minimum size for send buffer.
1504 Min int
1505 1506 // Default is the default size for send buffer.
1507 Default int
1508 1509 // Max is the maximum size for send buffer.
1510 Max int
1511 }
1512 1513 // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to
1514 // get/set the default, min and max receive buffer sizes.
1515 //
1516 // +stateify savable
1517 type ReceiveBufferSizeOption struct {
1518 // Min is the minimum size for send buffer.
1519 Min int
1520 1521 // Default is the default size for send buffer.
1522 Default int
1523 1524 // Max is the maximum size for send buffer.
1525 Max int
1526 }
1527 1528 // GetSendBufferLimits is used to get the send buffer size limits.
1529 type GetSendBufferLimits func(StackHandler) SendBufferSizeOption
1530 1531 // GetStackSendBufferLimits is used to get default, min and max send buffer size.
1532 func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption {
1533 var ss SendBufferSizeOption
1534 if err := so.Option(&ss); err != nil {
1535 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
1536 }
1537 return ss
1538 }
1539 1540 // GetReceiveBufferLimits is used to get the send buffer size limits.
1541 type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption
1542 1543 // GetStackReceiveBufferLimits is used to get default, min and max send buffer size.
1544 func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption {
1545 var ss ReceiveBufferSizeOption
1546 if err := so.Option(&ss); err != nil {
1547 panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
1548 }
1549 return ss
1550 }
1551 1552 // Route is a row in the routing table. It specifies through which NIC (and
1553 // gateway) sets of packets should be routed. A row is considered viable if the
1554 // masked target address matches the destination address in the row.
1555 //
1556 // +stateify savable
1557 type Route struct {
1558 RouteEntry
1559 1560 // Destination must contain the target address for this row to be viable.
1561 Destination Subnet
1562 1563 // Gateway is the gateway to be used if this row is viable.
1564 Gateway Address
1565 1566 // NIC is the id of the nic to be used if this row is viable.
1567 NIC NICID
1568 1569 // SourceHint indicates a preferred source address to use when NICs
1570 // have multiple addresses.
1571 SourceHint Address
1572 1573 // MTU is the maximum transmission unit to use for this route.
1574 // If MTU is 0, this field is ignored and the MTU of the NIC for which this route
1575 // is configured is used for egress packets.
1576 MTU uint32
1577 }
1578 1579 // String implements the fmt.Stringer interface.
1580 func (r Route) String() string {
1581 var out strings.Builder
1582 _, _ = fmt.Fprintf(&out, "%s", r.Destination)
1583 if r.Gateway.length > 0 {
1584 _, _ = fmt.Fprintf(&out, " via %s", r.Gateway)
1585 }
1586 _, _ = fmt.Fprintf(&out, " nic %d", r.NIC)
1587 return out.String()
1588 }
1589 1590 // Equal returns true if the given Route is equal to this Route.
1591 func (r Route) Equal(to Route) bool {
1592 // NOTE: This relies on the fact that r.Destination == to.Destination
1593 return r.Destination.Equal(to.Destination) && r.NIC == to.NIC
1594 }
1595 1596 // TransportProtocolNumber is the number of a transport protocol.
1597 type TransportProtocolNumber uint32
1598 1599 // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet
1600 // frame.
1601 //
1602 // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml
1603 type NetworkProtocolNumber uint32
1604 1605 // A StatCounter keeps track of a statistic.
1606 //
1607 // +stateify savable
1608 type StatCounter struct {
1609 count atomicbitops.Uint64
1610 }
1611 1612 // Increment adds one to the counter.
1613 func (s *StatCounter) Increment() {
1614 s.IncrementBy(1)
1615 }
1616 1617 // Decrement minuses one to the counter.
1618 func (s *StatCounter) Decrement() {
1619 s.IncrementBy(^uint64(0))
1620 }
1621 1622 // Value returns the current value of the counter.
1623 func (s *StatCounter) Value() uint64 {
1624 return s.count.Load()
1625 }
1626 1627 // IncrementBy increments the counter by v.
1628 func (s *StatCounter) IncrementBy(v uint64) {
1629 s.count.Add(v)
1630 }
1631 1632 func (s *StatCounter) String() string {
1633 return strconv.FormatUint(s.Value(), 10)
1634 }
1635 1636 // A MultiCounterStat keeps track of two counters at once.
1637 //
1638 // +stateify savable
1639 type MultiCounterStat struct {
1640 a *StatCounter
1641 b *StatCounter
1642 }
1643 1644 // Init sets both internal counters to point to a and b.
1645 func (m *MultiCounterStat) Init(a, b *StatCounter) {
1646 m.a = a
1647 m.b = b
1648 }
1649 1650 // Increment adds one to the counters.
1651 func (m *MultiCounterStat) Increment() {
1652 m.a.Increment()
1653 m.b.Increment()
1654 }
1655 1656 // IncrementBy increments the counters by v.
1657 func (m *MultiCounterStat) IncrementBy(v uint64) {
1658 m.a.IncrementBy(v)
1659 m.b.IncrementBy(v)
1660 }
1661 1662 // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types.
1663 //
1664 // +stateify savable
1665 type ICMPv4PacketStats struct {
1666 // LINT.IfChange(ICMPv4PacketStats)
1667 1668 // EchoRequest is the number of ICMPv4 echo packets counted.
1669 EchoRequest *StatCounter
1670 1671 // EchoReply is the number of ICMPv4 echo reply packets counted.
1672 EchoReply *StatCounter
1673 1674 // DstUnreachable is the number of ICMPv4 destination unreachable packets
1675 // counted.
1676 DstUnreachable *StatCounter
1677 1678 // SrcQuench is the number of ICMPv4 source quench packets counted.
1679 SrcQuench *StatCounter
1680 1681 // Redirect is the number of ICMPv4 redirect packets counted.
1682 Redirect *StatCounter
1683 1684 // TimeExceeded is the number of ICMPv4 time exceeded packets counted.
1685 TimeExceeded *StatCounter
1686 1687 // ParamProblem is the number of ICMPv4 parameter problem packets counted.
1688 ParamProblem *StatCounter
1689 1690 // Timestamp is the number of ICMPv4 timestamp packets counted.
1691 Timestamp *StatCounter
1692 1693 // TimestampReply is the number of ICMPv4 timestamp reply packets counted.
1694 TimestampReply *StatCounter
1695 1696 // InfoRequest is the number of ICMPv4 information request packets counted.
1697 InfoRequest *StatCounter
1698 1699 // InfoReply is the number of ICMPv4 information reply packets counted.
1700 InfoReply *StatCounter
1701 1702 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats)
1703 }
1704 1705 // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats.
1706 //
1707 // +stateify savable
1708 type ICMPv4SentPacketStats struct {
1709 // LINT.IfChange(ICMPv4SentPacketStats)
1710 1711 ICMPv4PacketStats
1712 1713 // Dropped is the number of ICMPv4 packets dropped due to link layer errors.
1714 Dropped *StatCounter
1715 1716 // RateLimited is the number of ICMPv4 packets dropped due to rate limit being
1717 // exceeded.
1718 RateLimited *StatCounter
1719 1720 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats)
1721 }
1722 1723 // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats.
1724 //
1725 // +stateify savable
1726 type ICMPv4ReceivedPacketStats struct {
1727 // LINT.IfChange(ICMPv4ReceivedPacketStats)
1728 1729 ICMPv4PacketStats
1730 1731 // Invalid is the number of invalid ICMPv4 packets received.
1732 Invalid *StatCounter
1733 1734 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats)
1735 }
1736 1737 // ICMPv4Stats collects ICMPv4-specific stats.
1738 //
1739 // +stateify savable
1740 type ICMPv4Stats struct {
1741 // LINT.IfChange(ICMPv4Stats)
1742 1743 // PacketsSent contains statistics about sent packets.
1744 PacketsSent ICMPv4SentPacketStats
1745 1746 // PacketsReceived contains statistics about received packets.
1747 PacketsReceived ICMPv4ReceivedPacketStats
1748 1749 // LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats)
1750 }
1751 1752 // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types.
1753 //
1754 // +stateify savable
1755 type ICMPv6PacketStats struct {
1756 // LINT.IfChange(ICMPv6PacketStats)
1757 1758 // EchoRequest is the number of ICMPv6 echo request packets counted.
1759 EchoRequest *StatCounter
1760 1761 // EchoReply is the number of ICMPv6 echo reply packets counted.
1762 EchoReply *StatCounter
1763 1764 // DstUnreachable is the number of ICMPv6 destination unreachable packets
1765 // counted.
1766 DstUnreachable *StatCounter
1767 1768 // PacketTooBig is the number of ICMPv6 packet too big packets counted.
1769 PacketTooBig *StatCounter
1770 1771 // TimeExceeded is the number of ICMPv6 time exceeded packets counted.
1772 TimeExceeded *StatCounter
1773 1774 // ParamProblem is the number of ICMPv6 parameter problem packets counted.
1775 ParamProblem *StatCounter
1776 1777 // RouterSolicit is the number of ICMPv6 router solicit packets counted.
1778 RouterSolicit *StatCounter
1779 1780 // RouterAdvert is the number of ICMPv6 router advert packets counted.
1781 RouterAdvert *StatCounter
1782 1783 // NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted.
1784 NeighborSolicit *StatCounter
1785 1786 // NeighborAdvert is the number of ICMPv6 neighbor advert packets counted.
1787 NeighborAdvert *StatCounter
1788 1789 // RedirectMsg is the number of ICMPv6 redirect message packets counted.
1790 RedirectMsg *StatCounter
1791 1792 // MulticastListenerQuery is the number of Multicast Listener Query messages
1793 // counted.
1794 MulticastListenerQuery *StatCounter
1795 1796 // MulticastListenerReport is the number of Multicast Listener Report messages
1797 // counted.
1798 MulticastListenerReport *StatCounter
1799 1800 // MulticastListenerReportV2 is the number of Multicast Listener Report
1801 // messages counted.
1802 MulticastListenerReportV2 *StatCounter
1803 1804 // MulticastListenerDone is the number of Multicast Listener Done messages
1805 // counted.
1806 MulticastListenerDone *StatCounter
1807 1808 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats)
1809 }
1810 1811 // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats.
1812 //
1813 // +stateify savable
1814 type ICMPv6SentPacketStats struct {
1815 // LINT.IfChange(ICMPv6SentPacketStats)
1816 1817 ICMPv6PacketStats
1818 1819 // Dropped is the number of ICMPv6 packets dropped due to link layer errors.
1820 Dropped *StatCounter
1821 1822 // RateLimited is the number of ICMPv6 packets dropped due to rate limit being
1823 // exceeded.
1824 RateLimited *StatCounter
1825 1826 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats)
1827 }
1828 1829 // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats.
1830 //
1831 // +stateify savable
1832 type ICMPv6ReceivedPacketStats struct {
1833 // LINT.IfChange(ICMPv6ReceivedPacketStats)
1834 1835 ICMPv6PacketStats
1836 1837 // Unrecognized is the number of ICMPv6 packets received that the transport
1838 // layer does not know how to parse.
1839 Unrecognized *StatCounter
1840 1841 // Invalid is the number of invalid ICMPv6 packets received.
1842 Invalid *StatCounter
1843 1844 // RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due
1845 // to being router-specific packets.
1846 RouterOnlyPacketsDroppedByHost *StatCounter
1847 1848 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats)
1849 }
1850 1851 // ICMPv6Stats collects ICMPv6-specific stats.
1852 //
1853 // +stateify savable
1854 type ICMPv6Stats struct {
1855 // LINT.IfChange(ICMPv6Stats)
1856 1857 // PacketsSent contains statistics about sent packets.
1858 PacketsSent ICMPv6SentPacketStats
1859 1860 // PacketsReceived contains statistics about received packets.
1861 PacketsReceived ICMPv6ReceivedPacketStats
1862 1863 // LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats)
1864 }
1865 1866 // ICMPStats collects ICMP-specific stats (both v4 and v6).
1867 //
1868 // +stateify savable
1869 type ICMPStats struct {
1870 // V4 contains the ICMPv4-specifics stats.
1871 V4 ICMPv4Stats
1872 1873 // V6 contains the ICMPv4-specifics stats.
1874 V6 ICMPv6Stats
1875 }
1876 1877 // IGMPPacketStats enumerates counts for all IGMP packet types.
1878 //
1879 // +stateify savable
1880 type IGMPPacketStats struct {
1881 // LINT.IfChange(IGMPPacketStats)
1882 1883 // MembershipQuery is the number of Membership Query messages counted.
1884 MembershipQuery *StatCounter
1885 1886 // V1MembershipReport is the number of Version 1 Membership Report messages
1887 // counted.
1888 V1MembershipReport *StatCounter
1889 1890 // V2MembershipReport is the number of Version 2 Membership Report messages
1891 // counted.
1892 V2MembershipReport *StatCounter
1893 1894 // V3MembershipReport is the number of Version 3 Membership Report messages
1895 // counted.
1896 V3MembershipReport *StatCounter
1897 1898 // LeaveGroup is the number of Leave Group messages counted.
1899 LeaveGroup *StatCounter
1900 1901 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats)
1902 }
1903 1904 // IGMPSentPacketStats collects outbound IGMP-specific stats.
1905 //
1906 // +stateify savable
1907 type IGMPSentPacketStats struct {
1908 // LINT.IfChange(IGMPSentPacketStats)
1909 1910 IGMPPacketStats
1911 1912 // Dropped is the number of IGMP packets dropped.
1913 Dropped *StatCounter
1914 1915 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats)
1916 }
1917 1918 // IGMPReceivedPacketStats collects inbound IGMP-specific stats.
1919 //
1920 // +stateify savable
1921 type IGMPReceivedPacketStats struct {
1922 // LINT.IfChange(IGMPReceivedPacketStats)
1923 1924 IGMPPacketStats
1925 1926 // Invalid is the number of invalid IGMP packets received.
1927 Invalid *StatCounter
1928 1929 // ChecksumErrors is the number of IGMP packets dropped due to bad checksums.
1930 ChecksumErrors *StatCounter
1931 1932 // Unrecognized is the number of unrecognized messages counted, these are
1933 // silently ignored for forward-compatibility.
1934 Unrecognized *StatCounter
1935 1936 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats)
1937 }
1938 1939 // IGMPStats collects IGMP-specific stats.
1940 //
1941 // +stateify savable
1942 type IGMPStats struct {
1943 // LINT.IfChange(IGMPStats)
1944 1945 // PacketsSent contains statistics about sent packets.
1946 PacketsSent IGMPSentPacketStats
1947 1948 // PacketsReceived contains statistics about received packets.
1949 PacketsReceived IGMPReceivedPacketStats
1950 1951 // LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats)
1952 }
1953 1954 // IPForwardingStats collects stats related to IP forwarding (both v4 and v6).
1955 //
1956 // +stateify savable
1957 type IPForwardingStats struct {
1958 // LINT.IfChange(IPForwardingStats)
1959 1960 // Unrouteable is the number of IP packets received which were dropped
1961 // because a route to their destination could not be constructed.
1962 Unrouteable *StatCounter
1963 1964 // ExhaustedTTL is the number of IP packets received which were dropped
1965 // because their TTL was exhausted.
1966 ExhaustedTTL *StatCounter
1967 1968 // InitializingSource is the number of IP packets which were dropped
1969 // because they contained a source address that may only be used on the local
1970 // network as part of initialization work.
1971 InitializingSource *StatCounter
1972 1973 // LinkLocalSource is the number of IP packets which were dropped
1974 // because they contained a link-local source address.
1975 LinkLocalSource *StatCounter
1976 1977 // LinkLocalDestination is the number of IP packets which were dropped
1978 // because they contained a link-local destination address.
1979 LinkLocalDestination *StatCounter
1980 1981 // PacketTooBig is the number of IP packets which were dropped because they
1982 // were too big for the outgoing MTU.
1983 PacketTooBig *StatCounter
1984 1985 // HostUnreachable is the number of IP packets received which could not be
1986 // successfully forwarded due to an unresolvable next hop.
1987 HostUnreachable *StatCounter
1988 1989 // ExtensionHeaderProblem is the number of IP packets which were dropped
1990 // because of a problem encountered when processing an IPv6 extension
1991 // header.
1992 ExtensionHeaderProblem *StatCounter
1993 1994 // UnexpectedMulticastInputInterface is the number of multicast packets that
1995 // were received on an interface that did not match the corresponding route's
1996 // expected input interface.
1997 UnexpectedMulticastInputInterface *StatCounter
1998 1999 // UnknownOutputEndpoint is the number of packets that could not be forwarded
2000 // because the output endpoint could not be found.
2001 UnknownOutputEndpoint *StatCounter
2002 2003 // NoMulticastPendingQueueBufferSpace is the number of multicast packets that
2004 // were dropped due to insufficient buffer space in the pending packet queue.
2005 NoMulticastPendingQueueBufferSpace *StatCounter
2006 2007 // OutgoingDeviceNoBufferSpace is the number of packets that were dropped due
2008 // to insufficient space in the outgoing device.
2009 OutgoingDeviceNoBufferSpace *StatCounter
2010 2011 // Errors is the number of IP packets received which could not be
2012 // successfully forwarded.
2013 Errors *StatCounter
2014 2015 // OutgoingDeviceClosedForSend is the number of packets that were dropped due
2016 // to the outgoing device being closed for send.
2017 OutgoingDeviceClosedForSend *StatCounter
2018 2019 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats)
2020 }
2021 2022 // IPStats collects IP-specific stats (both v4 and v6).
2023 //
2024 // +stateify savable
2025 type IPStats struct {
2026 // LINT.IfChange(IPStats)
2027 2028 // PacketsReceived is the number of IP packets received from the link layer.
2029 PacketsReceived *StatCounter
2030 2031 // ValidPacketsReceived is the number of valid IP packets that reached the IP
2032 // layer.
2033 ValidPacketsReceived *StatCounter
2034 2035 // DisabledPacketsReceived is the number of IP packets received from the link
2036 // layer when the IP layer is disabled.
2037 DisabledPacketsReceived *StatCounter
2038 2039 // InvalidDestinationAddressesReceived is the number of IP packets received
2040 // with an unknown or invalid destination address.
2041 InvalidDestinationAddressesReceived *StatCounter
2042 2043 // InvalidSourceAddressesReceived is the number of IP packets received with a
2044 // source address that should never have been received on the wire.
2045 InvalidSourceAddressesReceived *StatCounter
2046 2047 // PacketsDelivered is the number of incoming IP packets that are successfully
2048 // delivered to the transport layer.
2049 PacketsDelivered *StatCounter
2050 2051 // PacketsSent is the number of IP packets sent via WritePacket.
2052 PacketsSent *StatCounter
2053 2054 // OutgoingPacketErrors is the number of IP packets which failed to write to a
2055 // link-layer endpoint.
2056 OutgoingPacketErrors *StatCounter
2057 2058 // MalformedPacketsReceived is the number of IP Packets that were dropped due
2059 // to the IP packet header failing validation checks.
2060 MalformedPacketsReceived *StatCounter
2061 2062 // MalformedFragmentsReceived is the number of IP Fragments that were dropped
2063 // due to the fragment failing validation checks.
2064 MalformedFragmentsReceived *StatCounter
2065 2066 // IPTablesPreroutingDropped is the number of IP packets dropped in the
2067 // Prerouting chain.
2068 IPTablesPreroutingDropped *StatCounter
2069 2070 // IPTablesInputDropped is the number of IP packets dropped in the Input
2071 // chain.
2072 IPTablesInputDropped *StatCounter
2073 2074 // IPTablesForwardDropped is the number of IP packets dropped in the Forward
2075 // chain.
2076 IPTablesForwardDropped *StatCounter
2077 2078 // IPTablesOutputDropped is the number of IP packets dropped in the Output
2079 // chain.
2080 IPTablesOutputDropped *StatCounter
2081 2082 // IPTablesPostroutingDropped is the number of IP packets dropped in the
2083 // Postrouting chain.
2084 IPTablesPostroutingDropped *StatCounter
2085 2086 // TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out
2087 // of IPStats.
2088 // OptionTimestampReceived is the number of Timestamp options seen.
2089 OptionTimestampReceived *StatCounter
2090 2091 // OptionRecordRouteReceived is the number of Record Route options seen.
2092 OptionRecordRouteReceived *StatCounter
2093 2094 // OptionRouterAlertReceived is the number of Router Alert options seen.
2095 OptionRouterAlertReceived *StatCounter
2096 2097 // OptionUnknownReceived is the number of unknown IP options seen.
2098 OptionUnknownReceived *StatCounter
2099 2100 // Forwarding collects stats related to IP forwarding.
2101 Forwarding IPForwardingStats
2102 2103 // LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats)
2104 }
2105 2106 // ARPStats collects ARP-specific stats.
2107 //
2108 // +stateify savable
2109 type ARPStats struct {
2110 // LINT.IfChange(ARPStats)
2111 2112 // PacketsReceived is the number of ARP packets received from the link layer.
2113 PacketsReceived *StatCounter
2114 2115 // DisabledPacketsReceived is the number of ARP packets received from the link
2116 // layer when the ARP layer is disabled.
2117 DisabledPacketsReceived *StatCounter
2118 2119 // MalformedPacketsReceived is the number of ARP packets that were dropped due
2120 // to being malformed.
2121 MalformedPacketsReceived *StatCounter
2122 2123 // RequestsReceived is the number of ARP requests received.
2124 RequestsReceived *StatCounter
2125 2126 // RequestsReceivedUnknownTargetAddress is the number of ARP requests that
2127 // were targeted to an interface different from the one it was received on.
2128 RequestsReceivedUnknownTargetAddress *StatCounter
2129 2130 // OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures
2131 // to send an ARP request because the interface has no network address
2132 // assigned to it.
2133 OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter
2134 2135 // OutgoingRequestBadLocalAddressErrors is the number of failures to send an
2136 // ARP request with a bad local address.
2137 OutgoingRequestBadLocalAddressErrors *StatCounter
2138 2139 // OutgoingRequestsDropped is the number of ARP requests which failed to write
2140 // to a link-layer endpoint.
2141 OutgoingRequestsDropped *StatCounter
2142 2143 // OutgoingRequestSent is the number of ARP requests successfully written to a
2144 // link-layer endpoint.
2145 OutgoingRequestsSent *StatCounter
2146 2147 // RepliesReceived is the number of ARP replies received.
2148 RepliesReceived *StatCounter
2149 2150 // OutgoingRepliesDropped is the number of ARP replies which failed to write
2151 // to a link-layer endpoint.
2152 OutgoingRepliesDropped *StatCounter
2153 2154 // OutgoingRepliesSent is the number of ARP replies successfully written to a
2155 // link-layer endpoint.
2156 OutgoingRepliesSent *StatCounter
2157 2158 // LINT.ThenChange(network/arp/stats.go:multiCounterARPStats)
2159 }
2160 2161 // TCPStats collects TCP-specific stats.
2162 //
2163 // +stateify savable
2164 type TCPStats struct {
2165 // ActiveConnectionOpenings is the number of connections opened
2166 // successfully via Connect.
2167 ActiveConnectionOpenings *StatCounter
2168 2169 // PassiveConnectionOpenings is the number of connections opened
2170 // successfully via Listen.
2171 PassiveConnectionOpenings *StatCounter
2172 2173 // CurrentEstablished is the number of TCP connections for which the
2174 // current state is ESTABLISHED.
2175 CurrentEstablished *StatCounter
2176 2177 // CurrentConnected is the number of TCP connections that
2178 // are in connected state.
2179 CurrentConnected *StatCounter
2180 2181 // EstablishedResets is the number of times TCP connections have made
2182 // a direct transition to the CLOSED state from either the
2183 // ESTABLISHED state or the CLOSE-WAIT state.
2184 EstablishedResets *StatCounter
2185 2186 // EstablishedClosed is the number of times established TCP connections
2187 // made a transition to CLOSED state.
2188 EstablishedClosed *StatCounter
2189 2190 // EstablishedTimedout is the number of times an established connection
2191 // was reset because of keep-alive time out.
2192 EstablishedTimedout *StatCounter
2193 2194 // ListenOverflowSynDrop is the number of times the listen queue overflowed
2195 // and a SYN was dropped.
2196 ListenOverflowSynDrop *StatCounter
2197 2198 // ListenOverflowAckDrop is the number of times the final ACK
2199 // in the handshake was dropped due to overflow.
2200 ListenOverflowAckDrop *StatCounter
2201 2202 // ListenOverflowCookieSent is the number of times a SYN cookie was sent.
2203 ListenOverflowSynCookieSent *StatCounter
2204 2205 // ListenOverflowSynCookieRcvd is the number of times a valid SYN
2206 // cookie was received.
2207 ListenOverflowSynCookieRcvd *StatCounter
2208 2209 // ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie
2210 // was received.
2211 ListenOverflowInvalidSynCookieRcvd *StatCounter
2212 2213 // FailedConnectionAttempts is the number of calls to Connect or Listen
2214 // (active and passive openings, respectively) that end in an error.
2215 FailedConnectionAttempts *StatCounter
2216 2217 // ValidSegmentsReceived is the number of TCP segments received that
2218 // the transport layer successfully parsed.
2219 ValidSegmentsReceived *StatCounter
2220 2221 // InvalidSegmentsReceived is the number of TCP segments received that
2222 // the transport layer could not parse.
2223 InvalidSegmentsReceived *StatCounter
2224 2225 // SegmentsSent is the number of TCP segments sent.
2226 SegmentsSent *StatCounter
2227 2228 // SegmentSendErrors is the number of TCP segments failed to be sent.
2229 SegmentSendErrors *StatCounter
2230 2231 // ResetsSent is the number of TCP resets sent.
2232 ResetsSent *StatCounter
2233 2234 // ResetsReceived is the number of TCP resets received.
2235 ResetsReceived *StatCounter
2236 2237 // Retransmits is the number of TCP segments retransmitted.
2238 Retransmits *StatCounter
2239 2240 // FastRecovery is the number of times Fast Recovery was used to
2241 // recover from packet loss.
2242 FastRecovery *StatCounter
2243 2244 // SACKRecovery is the number of times SACK Recovery was used to
2245 // recover from packet loss.
2246 SACKRecovery *StatCounter
2247 2248 // TLPRecovery is the number of times recovery was accomplished by the tail
2249 // loss probe.
2250 TLPRecovery *StatCounter
2251 2252 // SlowStartRetransmits is the number of segments retransmitted in slow
2253 // start.
2254 SlowStartRetransmits *StatCounter
2255 2256 // FastRetransmit is the number of segments retransmitted in fast
2257 // recovery.
2258 FastRetransmit *StatCounter
2259 2260 // Timeouts is the number of times the RTO expired.
2261 Timeouts *StatCounter
2262 2263 // ChecksumErrors is the number of segments dropped due to bad checksums.
2264 ChecksumErrors *StatCounter
2265 2266 // FailedPortReservations is the number of times TCP failed to reserve
2267 // a port.
2268 FailedPortReservations *StatCounter
2269 2270 // SegmentsAckedWithDSACK is the number of segments acknowledged with
2271 // DSACK.
2272 SegmentsAckedWithDSACK *StatCounter
2273 2274 // SpuriousRecovery is the number of times the connection entered loss
2275 // recovery spuriously.
2276 SpuriousRecovery *StatCounter
2277 2278 // SpuriousRTORecovery is the number of spurious RTOs.
2279 SpuriousRTORecovery *StatCounter
2280 2281 // ForwardMaxInFlightDrop is the number of connection requests that are
2282 // dropped due to exceeding the maximum number of in-flight connection
2283 // requests.
2284 ForwardMaxInFlightDrop *StatCounter
2285 }
2286 2287 // UDPStats collects UDP-specific stats.
2288 //
2289 // +stateify savable
2290 type UDPStats struct {
2291 // PacketsReceived is the number of UDP datagrams received via
2292 // HandlePacket.
2293 PacketsReceived *StatCounter
2294 2295 // UnknownPortErrors is the number of incoming UDP datagrams dropped
2296 // because they did not have a known destination port.
2297 UnknownPortErrors *StatCounter
2298 2299 // ReceiveBufferErrors is the number of incoming UDP datagrams dropped
2300 // due to the receiving buffer being in an invalid state.
2301 ReceiveBufferErrors *StatCounter
2302 2303 // MalformedPacketsReceived is the number of incoming UDP datagrams
2304 // dropped due to the UDP header being in a malformed state.
2305 MalformedPacketsReceived *StatCounter
2306 2307 // PacketsSent is the number of UDP datagrams sent via sendUDP.
2308 PacketsSent *StatCounter
2309 2310 // PacketSendErrors is the number of datagrams failed to be sent.
2311 PacketSendErrors *StatCounter
2312 2313 // ChecksumErrors is the number of datagrams dropped due to bad checksums.
2314 ChecksumErrors *StatCounter
2315 }
2316 2317 // NICNeighborStats holds metrics for the neighbor table.
2318 //
2319 // +stateify savable
2320 type NICNeighborStats struct {
2321 // LINT.IfChange(NICNeighborStats)
2322 2323 // UnreachableEntryLookups counts the number of lookups performed on an
2324 // entry in Unreachable state.
2325 UnreachableEntryLookups *StatCounter
2326 2327 // DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor
2328 // responses that were dropped because they didn't match an entry in the
2329 // cache.
2330 DroppedConfirmationForNoninitiatedNeighbor *StatCounter
2331 2332 // DroppedInvalidLinkAddressConfirmations counts the number of neighbor
2333 // responses that were ignored because they had an invalid source link-layer
2334 // address.
2335 DroppedInvalidLinkAddressConfirmations *StatCounter
2336 2337 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats)
2338 }
2339 2340 // NICPacketStats holds basic packet statistics.
2341 //
2342 // +stateify savable
2343 type NICPacketStats struct {
2344 // LINT.IfChange(NICPacketStats)
2345 2346 // Packets is the number of packets counted.
2347 Packets *StatCounter
2348 2349 // Bytes is the number of bytes counted.
2350 Bytes *StatCounter
2351 2352 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats)
2353 }
2354 2355 // IntegralStatCounterMap holds a map associating integral keys with
2356 // StatCounters.
2357 //
2358 // +stateify savable
2359 type IntegralStatCounterMap struct {
2360 mu sync.RWMutex `state:"nosave"`
2361 // +checklocks:mu
2362 counterMap map[uint64]*StatCounter
2363 }
2364 2365 // Keys returns all keys present in the map.
2366 func (m *IntegralStatCounterMap) Keys() []uint64 {
2367 m.mu.RLock()
2368 defer m.mu.RUnlock()
2369 var keys []uint64
2370 for k := range m.counterMap {
2371 keys = append(keys, k)
2372 }
2373 return keys
2374 }
2375 2376 // Get returns the counter mapped by the provided key.
2377 func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) {
2378 m.mu.RLock()
2379 defer m.mu.RUnlock()
2380 counter, ok := m.counterMap[key]
2381 return counter, ok
2382 }
2383 2384 // Init initializes the map.
2385 func (m *IntegralStatCounterMap) Init() {
2386 m.mu.Lock()
2387 defer m.mu.Unlock()
2388 m.counterMap = make(map[uint64]*StatCounter)
2389 }
2390 2391 // Increment increments the counter associated with the provided key.
2392 func (m *IntegralStatCounterMap) Increment(key uint64) {
2393 m.mu.RLock()
2394 counter, ok := m.counterMap[key]
2395 m.mu.RUnlock()
2396 2397 if !ok {
2398 m.mu.Lock()
2399 counter, ok = m.counterMap[key]
2400 if !ok {
2401 counter = new(StatCounter)
2402 m.counterMap[key] = counter
2403 }
2404 m.mu.Unlock()
2405 }
2406 counter.Increment()
2407 }
2408 2409 // A MultiIntegralStatCounterMap keeps track of two integral counter maps at
2410 // once.
2411 //
2412 // +stateify savable
2413 type MultiIntegralStatCounterMap struct {
2414 a *IntegralStatCounterMap
2415 b *IntegralStatCounterMap
2416 }
2417 2418 // Init sets the internal integral counter maps to point to a and b.
2419 func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) {
2420 m.a = a
2421 m.b = b
2422 }
2423 2424 // Increment increments the counter in each map corresponding to the
2425 // provided key.
2426 func (m *MultiIntegralStatCounterMap) Increment(key uint64) {
2427 m.a.Increment(key)
2428 m.b.Increment(key)
2429 }
2430 2431 // NICStats holds NIC statistics.
2432 //
2433 // +stateify savable
2434 type NICStats struct {
2435 // LINT.IfChange(NICStats)
2436 2437 // UnknownL3ProtocolRcvdPacketCounts records the number of packets received
2438 // for each unknown or unsupported network protocol number.
2439 UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap
2440 2441 // UnknownL4ProtocolRcvdPacketCounts records the number of packets received
2442 // for each unknown or unsupported transport protocol number.
2443 UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap
2444 2445 // MalformedL4RcvdPackets is the number of packets received by a NIC that
2446 // could not be delivered to a transport endpoint because the L4 header could
2447 // not be parsed.
2448 MalformedL4RcvdPackets *StatCounter
2449 2450 // Tx contains statistics about transmitted packets.
2451 Tx NICPacketStats
2452 2453 // TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the
2454 // NIC not having enough buffer space to send the packet.
2455 //
2456 // Packets may be dropped with a no buffer space error when the device TX
2457 // queue is full.
2458 TxPacketsDroppedNoBufferSpace *StatCounter
2459 2460 // Rx contains statistics about received packets.
2461 Rx NICPacketStats
2462 2463 // DisabledRx contains statistics about received packets on disabled NICs.
2464 DisabledRx NICPacketStats
2465 2466 // Neighbor contains statistics about neighbor entries.
2467 Neighbor NICNeighborStats
2468 2469 // LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats)
2470 }
2471 2472 // FillIn returns a copy of s with nil fields initialized to new StatCounters.
2473 func (s NICStats) FillIn() NICStats {
2474 InitStatCounters(reflect.ValueOf(&s).Elem())
2475 return s
2476 }
2477 2478 // Stats holds statistics about the networking stack.
2479 //
2480 // +stateify savable
2481 type Stats struct {
2482 // TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less
2483 // ambiguous.
2484 2485 // DroppedPackets is the number of packets dropped at the transport layer.
2486 DroppedPackets *StatCounter
2487 2488 // NICs is an aggregation of every NIC's statistics. These should not be
2489 // incremented using this field, but using the relevant NIC multicounters.
2490 NICs NICStats
2491 2492 // ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4
2493 // and v6). These should not be incremented using this field, but using the
2494 // relevant NetworkEndpoint ICMP multicounters.
2495 ICMP ICMPStats
2496 2497 // IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These
2498 // should not be incremented using this field, but using the relevant
2499 // NetworkEndpoint IGMP multicounters.
2500 IGMP IGMPStats
2501 2502 // IP is an aggregation of every NetworkEndpoint's IP statistics. These should
2503 // not be incremented using this field, but using the relevant NetworkEndpoint
2504 // IP multicounters.
2505 IP IPStats
2506 2507 // ARP is an aggregation of every NetworkEndpoint's ARP statistics. These
2508 // should not be incremented using this field, but using the relevant
2509 // NetworkEndpoint ARP multicounters.
2510 ARP ARPStats
2511 2512 // TCP holds TCP-specific stats.
2513 TCP TCPStats
2514 2515 // UDP holds UDP-specific stats.
2516 UDP UDPStats
2517 }
2518 2519 // ReceiveErrors collects packet receive errors within transport endpoint.
2520 //
2521 // +stateify savable
2522 type ReceiveErrors struct {
2523 // ReceiveBufferOverflow is the number of received packets dropped
2524 // due to the receive buffer being full.
2525 ReceiveBufferOverflow StatCounter
2526 2527 // MalformedPacketsReceived is the number of incoming packets
2528 // dropped due to the packet header being in a malformed state.
2529 MalformedPacketsReceived StatCounter
2530 2531 // ClosedReceiver is the number of received packets dropped because
2532 // of receiving endpoint state being closed.
2533 ClosedReceiver StatCounter
2534 2535 // ChecksumErrors is the number of packets dropped due to bad checksums.
2536 ChecksumErrors StatCounter
2537 }
2538 2539 // SendErrors collects packet send errors within the transport layer for an
2540 // endpoint.
2541 //
2542 // +stateify savable
2543 type SendErrors struct {
2544 // SendToNetworkFailed is the number of packets failed to be written to
2545 // the network endpoint.
2546 SendToNetworkFailed StatCounter
2547 2548 // NoRoute is the number of times we failed to resolve IP route.
2549 NoRoute StatCounter
2550 }
2551 2552 // ReadErrors collects segment read errors from an endpoint read call.
2553 //
2554 // +stateify savable
2555 type ReadErrors struct {
2556 // ReadClosed is the number of received packet drops because the endpoint
2557 // was shutdown for read.
2558 ReadClosed StatCounter
2559 2560 // InvalidEndpointState is the number of times we found the endpoint state
2561 // to be unexpected.
2562 InvalidEndpointState StatCounter
2563 2564 // NotConnected is the number of times we tried to read but found that the
2565 // endpoint was not connected.
2566 NotConnected StatCounter
2567 }
2568 2569 // WriteErrors collects packet write errors from an endpoint write call.
2570 //
2571 // +stateify savable
2572 type WriteErrors struct {
2573 // WriteClosed is the number of packet drops because the endpoint
2574 // was shutdown for write.
2575 WriteClosed StatCounter
2576 2577 // InvalidEndpointState is the number of times we found the endpoint state
2578 // to be unexpected.
2579 InvalidEndpointState StatCounter
2580 2581 // InvalidArgs is the number of times invalid input arguments were
2582 // provided for endpoint Write call.
2583 InvalidArgs StatCounter
2584 }
2585 2586 // TransportEndpointStats collects statistics about the endpoint.
2587 //
2588 // +stateify savable
2589 type TransportEndpointStats struct {
2590 // PacketsReceived is the number of successful packet receives.
2591 PacketsReceived StatCounter
2592 2593 // PacketsSent is the number of successful packet sends.
2594 PacketsSent StatCounter
2595 2596 // ReceiveErrors collects packet receive errors within transport layer.
2597 ReceiveErrors ReceiveErrors
2598 2599 // ReadErrors collects packet read errors from an endpoint read call.
2600 ReadErrors ReadErrors
2601 2602 // SendErrors collects packet send errors within the transport layer.
2603 SendErrors SendErrors
2604 2605 // WriteErrors collects packet write errors from an endpoint write call.
2606 WriteErrors WriteErrors
2607 }
2608 2609 // IsEndpointStats is an empty method to implement the tcpip.EndpointStats
2610 // marker interface.
2611 func (*TransportEndpointStats) IsEndpointStats() {}
2612 2613 // InitStatCounters initializes v's fields with nil StatCounter fields to new
2614 // StatCounters.
2615 func InitStatCounters(v reflect.Value) {
2616 for i := 0; i < v.NumField(); i++ {
2617 v := v.Field(i)
2618 if s, ok := v.Addr().Interface().(**StatCounter); ok {
2619 if *s == nil {
2620 *s = new(StatCounter)
2621 }
2622 } else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok {
2623 if *s == nil {
2624 *s = new(IntegralStatCounterMap)
2625 (*s).Init()
2626 }
2627 } else {
2628 InitStatCounters(v)
2629 }
2630 }
2631 }
2632 2633 // FillIn returns a copy of s with nil fields initialized to new StatCounters.
2634 func (s Stats) FillIn() Stats {
2635 InitStatCounters(reflect.ValueOf(&s).Elem())
2636 return s
2637 }
2638 2639 // Clone clones a copy of the TransportEndpointStats into dst by atomically
2640 // reading each field.
2641 func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) {
2642 clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem())
2643 }
2644 2645 func clone(dst reflect.Value, src reflect.Value) {
2646 for i := 0; i < dst.NumField(); i++ {
2647 d := dst.Field(i)
2648 s := src.Field(i)
2649 if c, ok := s.Addr().Interface().(*StatCounter); ok {
2650 d.Addr().Interface().(*StatCounter).IncrementBy(c.Value())
2651 } else {
2652 clone(d, s)
2653 }
2654 }
2655 }
2656 2657 // String implements the fmt.Stringer interface.
2658 func (a Address) String() string {
2659 switch l := a.Len(); l {
2660 case 4:
2661 return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3]))
2662 case 16:
2663 // Find the longest subsequence of hexadecimal zeros.
2664 start, end := -1, -1
2665 for i := 0; i < a.Len(); i += 2 {
2666 j := i
2667 for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 {
2668 j += 2
2669 }
2670 if j > i+2 && j-i > end-start {
2671 start, end = i, j
2672 }
2673 }
2674 2675 var b strings.Builder
2676 for i := 0; i < a.Len(); i += 2 {
2677 if i == start {
2678 b.WriteString("::")
2679 i = end
2680 if end >= a.Len() {
2681 break
2682 }
2683 } else if i > 0 {
2684 b.WriteByte(':')
2685 }
2686 v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1])
2687 if v == 0 {
2688 b.WriteByte('0')
2689 } else {
2690 const digits = "0123456789abcdef"
2691 for i := uint(3); i < 4; i-- {
2692 if v := v >> (i * 4); v != 0 {
2693 b.WriteByte(digits[v&0xf])
2694 }
2695 }
2696 }
2697 }
2698 return b.String()
2699 default:
2700 return fmt.Sprintf("%x", a.addr[:l])
2701 }
2702 }
2703 2704 // To4 converts the IPv4 address to a 4-byte representation.
2705 // If the address is not an IPv4 address, To4 returns the empty Address.
2706 func (a Address) To4() Address {
2707 const (
2708 ipv4len = 4
2709 ipv6len = 16
2710 )
2711 if a.Len() == ipv4len {
2712 return a
2713 }
2714 if a.Len() == ipv6len &&
2715 isZeros(a.addr[:10]) &&
2716 a.addr[10] == 0xff &&
2717 a.addr[11] == 0xff {
2718 return AddrFrom4Slice(a.addr[12:16])
2719 }
2720 return Address{}
2721 }
2722 2723 // isZeros reports whether addr is all zeros.
2724 func isZeros(addr []byte) bool {
2725 for _, b := range addr {
2726 if b != 0 {
2727 return false
2728 }
2729 }
2730 return true
2731 }
2732 2733 // LinkAddress is a byte slice cast as a string that represents a link address.
2734 // It is typically a 6-byte MAC address.
2735 type LinkAddress string
2736 2737 // String implements the fmt.Stringer interface.
2738 func (a LinkAddress) String() string {
2739 switch len(a) {
2740 case 6:
2741 return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5])
2742 default:
2743 return fmt.Sprintf("%x", []byte(a))
2744 }
2745 }
2746 2747 // ParseMACAddress parses an IEEE 802 address.
2748 //
2749 // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff.
2750 func ParseMACAddress(s string) (LinkAddress, error) {
2751 parts := strings.FieldsFunc(s, func(c rune) bool {
2752 return c == ':' || c == '-'
2753 })
2754 if len(parts) != LinkAddressSize {
2755 return "", fmt.Errorf("inconsistent parts: %s", s)
2756 }
2757 addr := make([]byte, 0, len(parts))
2758 for _, part := range parts {
2759 u, err := strconv.ParseUint(part, 16, 8)
2760 if err != nil {
2761 return "", fmt.Errorf("invalid hex digits: %s", s)
2762 }
2763 addr = append(addr, byte(u))
2764 }
2765 return LinkAddress(addr), nil
2766 }
2767 2768 // GetRandMacAddr returns a mac address that can be used for local virtual devices.
2769 func GetRandMacAddr() LinkAddress {
2770 mac := make(net.HardwareAddr, LinkAddressSize)
2771 rand.Read(mac) // Fill with random data.
2772 mac[0] &^= 0x1 // Clear multicast bit.
2773 mac[0] |= 0x2 // Set local assignment bit (IEEE802).
2774 return LinkAddress(mac)
2775 }
2776 2777 // AddressWithPrefix is an address with its subnet prefix length.
2778 //
2779 // +stateify savable
2780 type AddressWithPrefix struct {
2781 // Address is a network address.
2782 Address Address
2783 2784 // PrefixLen is the subnet prefix length.
2785 PrefixLen int
2786 }
2787 2788 // String implements the fmt.Stringer interface.
2789 func (a AddressWithPrefix) String() string {
2790 return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen)
2791 }
2792 2793 // Subnet converts the address and prefix into a Subnet value and returns it.
2794 func (a AddressWithPrefix) Subnet() Subnet {
2795 addrLen := a.Address.length
2796 if a.PrefixLen <= 0 {
2797 return Subnet{
2798 address: Address{length: addrLen},
2799 mask: AddressMask{length: addrLen},
2800 }
2801 }
2802 if a.PrefixLen >= addrLen*8 {
2803 sub := Subnet{
2804 address: a.Address,
2805 mask: AddressMask{length: addrLen},
2806 }
2807 for i := 0; i < addrLen; i++ {
2808 sub.mask.mask[i] = 0xff
2809 }
2810 return sub
2811 }
2812 2813 sa := Address{length: addrLen}
2814 sm := AddressMask{length: addrLen}
2815 n := uint(a.PrefixLen)
2816 for i := 0; i < addrLen; i++ {
2817 if n >= 8 {
2818 sa.addr[i] = a.Address.addr[i]
2819 sm.mask[i] = 0xff
2820 n -= 8
2821 continue
2822 }
2823 sm.mask[i] = ^byte(0xff >> n)
2824 sa.addr[i] = a.Address.addr[i] & sm.mask[i]
2825 n = 0
2826 }
2827 2828 // For extra caution, call NewSubnet rather than directly creating the Subnet
2829 // value. If that fails it indicates a serious bug in this code, so panic is
2830 // in order.
2831 s, err := NewSubnet(sa, sm)
2832 if err != nil {
2833 panic("invalid subnet: " + err.Error())
2834 }
2835 return s
2836 }
2837 2838 // ProtocolAddress is an address and the network protocol it is associated
2839 // with.
2840 //
2841 // +stateify savable
2842 type ProtocolAddress struct {
2843 // Protocol is the protocol of the address.
2844 Protocol NetworkProtocolNumber
2845 2846 // AddressWithPrefix is a network address with its subnet prefix length.
2847 AddressWithPrefix AddressWithPrefix
2848 }
2849 2850 var (
2851 // danglingEndpointsMu protects access to danglingEndpoints.
2852 danglingEndpointsMu sync.Mutex
2853 2854 // danglingEndpoints tracks all dangling endpoints no longer owned by the app.
2855 danglingEndpoints = make(map[Endpoint]struct{})
2856 )
2857 2858 // GetDanglingEndpoints returns all dangling endpoints.
2859 func GetDanglingEndpoints() []Endpoint {
2860 danglingEndpointsMu.Lock()
2861 es := make([]Endpoint, 0, len(danglingEndpoints))
2862 for e := range danglingEndpoints {
2863 es = append(es, e)
2864 }
2865 danglingEndpointsMu.Unlock()
2866 return es
2867 }
2868 2869 // ReleaseDanglingEndpoints clears out all all reference counted objects held by
2870 // dangling endpoints.
2871 func ReleaseDanglingEndpoints() {
2872 // Get the dangling endpoints first to avoid locking around Release(), which
2873 // can cause a lock inversion with endpoint.mu and danglingEndpointsMu.
2874 // Calling Release on a dangling endpoint that has been deleted is a noop.
2875 eps := GetDanglingEndpoints()
2876 for _, ep := range eps {
2877 ep.Abort()
2878 }
2879 }
2880 2881 // AddDanglingEndpoint adds a dangling endpoint.
2882 func AddDanglingEndpoint(e Endpoint) {
2883 danglingEndpointsMu.Lock()
2884 danglingEndpoints[e] = struct{}{}
2885 danglingEndpointsMu.Unlock()
2886 }
2887 2888 // DeleteDanglingEndpoint removes a dangling endpoint.
2889 func DeleteDanglingEndpoint(e Endpoint) {
2890 danglingEndpointsMu.Lock()
2891 delete(danglingEndpoints, e)
2892 danglingEndpointsMu.Unlock()
2893 }
2894 2895 // AsyncLoading is the global barrier for asynchronous endpoint loading
2896 // activities.
2897 var AsyncLoading sync.WaitGroup
2898