tcpip.go raw

   1  // Copyright 2018 The gVisor Authors.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  // Package tcpip provides the interfaces and related types that users of the
  16  // tcpip stack will use in order to create endpoints used to send and receive
  17  // data over the network stack.
  18  //
  19  // The starting point is the creation and configuration of a stack. A stack can
  20  // be created by calling the New() function of the tcpip/stack/stack package;
  21  // configuring a stack involves creating NICs (via calls to Stack.CreateNIC()),
  22  // adding network addresses (via calls to Stack.AddProtocolAddress()), and
  23  // setting a route table (via a call to Stack.SetRouteTable()).
  24  //
  25  // Once a stack is configured, endpoints can be created by calling
  26  // Stack.NewEndpoint(). Such endpoints can be used to send/receive data, connect
  27  // to peers, listen for connections, accept connections, etc., depending on the
  28  // transport protocol selected.
  29  package tcpip
  30  
  31  import (
  32  	"bytes"
  33  	"errors"
  34  	"fmt"
  35  	"io"
  36  	"math"
  37  	"math/bits"
  38  	"net"
  39  	"reflect"
  40  	"strconv"
  41  	"strings"
  42  	"time"
  43  
  44  	"gvisor.dev/gvisor/pkg/atomicbitops"
  45  	"gvisor.dev/gvisor/pkg/rand"
  46  	"gvisor.dev/gvisor/pkg/sync"
  47  	"gvisor.dev/gvisor/pkg/waiter"
  48  )
  49  
  50  // Using the header package here would cause an import cycle.
  51  const (
  52  	ipv4AddressSize    = 4
  53  	ipv4ProtocolNumber = 0x0800
  54  	ipv6AddressSize    = 16
  55  	ipv6ProtocolNumber = 0x86dd
  56  )
  57  
  58  const (
  59  	// LinkAddressSize is the size of a MAC address.
  60  	LinkAddressSize = 6
  61  )
  62  
  63  // Known IP address.
  64  var (
  65  	IPv4Zero = []byte{0, 0, 0, 0}
  66  	IPv6Zero = []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  67  )
  68  
  69  // Errors related to Subnet
  70  var (
  71  	errSubnetLengthMismatch = errors.New("subnet length of address and mask differ")
  72  	errSubnetAddressMasked  = errors.New("subnet address has bits set outside the mask")
  73  )
  74  
  75  // ErrSaveRejection indicates a failed save due to unsupported networking state.
  76  // This type of errors is only used for save logic.
  77  type ErrSaveRejection struct {
  78  	Err error
  79  }
  80  
  81  // Error returns a sensible description of the save rejection error.
  82  func (e *ErrSaveRejection) Error() string {
  83  	return "save rejected due to unsupported networking state: " + e.Err.Error()
  84  }
  85  
  86  // MonotonicTime is a monotonic clock reading.
  87  //
  88  // +stateify savable
  89  type MonotonicTime struct {
  90  	nanoseconds int64
  91  }
  92  
  93  // String implements Stringer.
  94  func (mt MonotonicTime) String() string {
  95  	return strconv.FormatInt(mt.nanoseconds, 10)
  96  }
  97  
  98  // MonotonicTimeInfinite returns the monotonic timestamp as far away in the
  99  // future as possible.
 100  func MonotonicTimeInfinite() MonotonicTime {
 101  	return MonotonicTime{nanoseconds: math.MaxInt64}
 102  }
 103  
 104  // Before reports whether the monotonic clock reading mt is before u.
 105  func (mt MonotonicTime) Before(u MonotonicTime) bool {
 106  	return mt.nanoseconds < u.nanoseconds
 107  }
 108  
 109  // After reports whether the monotonic clock reading mt is after u.
 110  func (mt MonotonicTime) After(u MonotonicTime) bool {
 111  	return mt.nanoseconds > u.nanoseconds
 112  }
 113  
 114  // Add returns the monotonic clock reading mt+d.
 115  func (mt MonotonicTime) Add(d time.Duration) MonotonicTime {
 116  	return MonotonicTime{
 117  		nanoseconds: time.Unix(0, mt.nanoseconds).Add(d).Sub(time.Unix(0, 0)).Nanoseconds(),
 118  	}
 119  }
 120  
 121  // Sub returns the duration mt-u. If the result exceeds the maximum (or minimum)
 122  // value that can be stored in a Duration, the maximum (or minimum) duration
 123  // will be returned. To compute t-d for a duration d, use t.Add(-d).
 124  func (mt MonotonicTime) Sub(u MonotonicTime) time.Duration {
 125  	return time.Unix(0, mt.nanoseconds).Sub(time.Unix(0, u.nanoseconds))
 126  }
 127  
 128  // Milliseconds returns the time in milliseconds.
 129  func (mt MonotonicTime) Milliseconds() int64 {
 130  	return mt.nanoseconds / 1e6
 131  }
 132  
 133  // A Clock provides the current time and schedules work for execution.
 134  //
 135  // Times returned by a Clock should always be used for application-visible
 136  // time. Only monotonic times should be used for netstack internal timekeeping.
 137  type Clock interface {
 138  	// Now returns the current local time.
 139  	Now() time.Time
 140  
 141  	// NowMonotonic returns the current monotonic clock reading.
 142  	NowMonotonic() MonotonicTime
 143  
 144  	// AfterFunc waits for the duration to elapse and then calls f in its own
 145  	// goroutine. It returns a Timer that can be used to cancel the call using
 146  	// its Stop method.
 147  	AfterFunc(d time.Duration, f func()) Timer
 148  }
 149  
 150  // Timer represents a single event. A Timer must be created with
 151  // Clock.AfterFunc.
 152  type Timer interface {
 153  	// Stop prevents the Timer from firing. It returns true if the call stops the
 154  	// timer, false if the timer has already expired or been stopped.
 155  	//
 156  	// If Stop returns false, then the timer has already expired and the function
 157  	// f of Clock.AfterFunc(d, f) has been started in its own goroutine; Stop
 158  	// does not wait for f to complete before returning. If the caller needs to
 159  	// know whether f is completed, it must coordinate with f explicitly.
 160  	Stop() bool
 161  
 162  	// Reset changes the timer to expire after duration d.
 163  	//
 164  	// Reset should be invoked only on stopped or expired timers. If the timer is
 165  	// known to have expired, Reset can be used directly. Otherwise, the caller
 166  	// must coordinate with the function f of Clock.AfterFunc(d, f).
 167  	Reset(d time.Duration)
 168  }
 169  
 170  // Address is a byte slice cast as a string that represents the address of a
 171  // network node. Or, in the case of unix endpoints, it may represent a path.
 172  //
 173  // +stateify savable
 174  type Address struct {
 175  	addr   [16]byte
 176  	length int
 177  }
 178  
 179  // AddrFrom4 converts addr to an Address.
 180  func AddrFrom4(addr [4]byte) Address {
 181  	ret := Address{
 182  		length: 4,
 183  	}
 184  	// It's guaranteed that copy will return 4.
 185  	copy(ret.addr[:], addr[:])
 186  	return ret
 187  }
 188  
 189  // AddrFrom4Slice converts addr to an Address. It panics if len(addr) != 4.
 190  func AddrFrom4Slice(addr []byte) Address {
 191  	if len(addr) != 4 {
 192  		panic(fmt.Sprintf("bad address length for address %v", addr))
 193  	}
 194  	ret := Address{
 195  		length: 4,
 196  	}
 197  	// It's guaranteed that copy will return 4.
 198  	copy(ret.addr[:], addr)
 199  	return ret
 200  }
 201  
 202  // AddrFrom16 converts addr to an Address.
 203  func AddrFrom16(addr [16]byte) Address {
 204  	ret := Address{
 205  		length: 16,
 206  	}
 207  	// It's guaranteed that copy will return 16.
 208  	copy(ret.addr[:], addr[:])
 209  	return ret
 210  }
 211  
 212  // AddrFrom16Slice converts addr to an Address. It panics if len(addr) != 16.
 213  func AddrFrom16Slice(addr []byte) Address {
 214  	if len(addr) != 16 {
 215  		panic(fmt.Sprintf("bad address length for address %v", addr))
 216  	}
 217  	ret := Address{
 218  		length: 16,
 219  	}
 220  	// It's guaranteed that copy will return 16.
 221  	copy(ret.addr[:], addr)
 222  	return ret
 223  }
 224  
 225  // AddrFromSlice converts addr to an Address. It returns the Address zero value
 226  // if len(addr) != 4 or 16.
 227  func AddrFromSlice(addr []byte) Address {
 228  	switch len(addr) {
 229  	case ipv4AddressSize:
 230  		return AddrFrom4Slice(addr)
 231  	case ipv6AddressSize:
 232  		return AddrFrom16Slice(addr)
 233  	}
 234  	return Address{}
 235  }
 236  
 237  // As4 returns a as a 4 byte array. It panics if the address length is not 4.
 238  func (a Address) As4() [4]byte {
 239  	if a.Len() != 4 {
 240  		panic(fmt.Sprintf("bad address length for address %v", a.addr))
 241  	}
 242  	return [4]byte(a.addr[:4])
 243  }
 244  
 245  // As16 returns a as a 16 byte array. It panics if the address length is not 16.
 246  func (a Address) As16() [16]byte {
 247  	if a.Len() != 16 {
 248  		panic(fmt.Sprintf("bad address length for address %v", a.addr))
 249  	}
 250  	return [16]byte(a.addr[:16])
 251  }
 252  
 253  // AsSlice returns a as a byte slice. Callers should be careful as it can
 254  // return a window into existing memory.
 255  //
 256  // +checkescape
 257  func (a *Address) AsSlice() []byte {
 258  	return a.addr[:a.length]
 259  }
 260  
 261  // BitLen returns the length in bits of a.
 262  func (a Address) BitLen() int {
 263  	return a.Len() * 8
 264  }
 265  
 266  // Len returns the length in bytes of a.
 267  func (a Address) Len() int {
 268  	return a.length
 269  }
 270  
 271  // WithPrefix returns the address with a prefix that represents a point subnet.
 272  func (a Address) WithPrefix() AddressWithPrefix {
 273  	return AddressWithPrefix{
 274  		Address:   a,
 275  		PrefixLen: a.BitLen(),
 276  	}
 277  }
 278  
 279  // Unspecified returns true if the address is unspecified.
 280  func (a Address) Unspecified() bool {
 281  	for _, b := range a.addr {
 282  		if b != 0 {
 283  			return false
 284  		}
 285  	}
 286  	return true
 287  }
 288  
 289  // Equal returns whether a and other are equal. It exists for use by the cmp
 290  // library.
 291  func (a Address) Equal(other Address) bool {
 292  	return a == other
 293  }
 294  
 295  // MatchingPrefix returns the matching prefix length in bits.
 296  //
 297  // Panics if b and a have different lengths.
 298  func (a Address) MatchingPrefix(b Address) uint8 {
 299  	const bitsInAByte = 8
 300  
 301  	if a.Len() != b.Len() {
 302  		panic(fmt.Sprintf("addresses %s and %s do not have the same length", a, b))
 303  	}
 304  
 305  	var prefix uint8
 306  	for i := 0; i < a.length; i++ {
 307  		aByte := a.addr[i]
 308  		bByte := b.addr[i]
 309  
 310  		if aByte == bByte {
 311  			prefix += bitsInAByte
 312  			continue
 313  		}
 314  
 315  		// Count the remaining matching bits in the byte from MSbit to LSBbit.
 316  		mask := uint8(1) << (bitsInAByte - 1)
 317  		for {
 318  			if aByte&mask == bByte&mask {
 319  				prefix++
 320  				mask >>= 1
 321  				continue
 322  			}
 323  
 324  			break
 325  		}
 326  
 327  		break
 328  	}
 329  
 330  	return prefix
 331  }
 332  
 333  // AddressMask is a bitmask for an address.
 334  //
 335  // +stateify savable
 336  type AddressMask struct {
 337  	mask   [16]byte
 338  	length int
 339  }
 340  
 341  // MaskFrom returns a Mask based on str.
 342  //
 343  // MaskFrom may allocate, and so should not be in hot paths.
 344  func MaskFrom(str string) AddressMask {
 345  	mask := AddressMask{length: len(str)}
 346  	copy(mask.mask[:], str)
 347  	return mask
 348  }
 349  
 350  // MaskFromBytes returns a Mask based on bs.
 351  func MaskFromBytes(bs []byte) AddressMask {
 352  	mask := AddressMask{length: len(bs)}
 353  	copy(mask.mask[:], bs)
 354  	return mask
 355  }
 356  
 357  // String implements Stringer.
 358  func (m AddressMask) String() string {
 359  	return fmt.Sprintf("%x", m.mask)
 360  }
 361  
 362  // AsSlice returns a as a byte slice. Callers should be careful as it can
 363  // return a window into existing memory.
 364  func (m *AddressMask) AsSlice() []byte {
 365  	return []byte(m.mask[:m.length])
 366  }
 367  
 368  // BitLen returns the length of the mask in bits.
 369  func (m AddressMask) BitLen() int {
 370  	return m.length * 8
 371  }
 372  
 373  // Len returns the length of the mask in bytes.
 374  func (m AddressMask) Len() int {
 375  	return m.length
 376  }
 377  
 378  // Prefix returns the number of bits before the first host bit.
 379  func (m AddressMask) Prefix() int {
 380  	p := 0
 381  	for _, b := range m.mask[:m.length] {
 382  		p += bits.LeadingZeros8(^b)
 383  	}
 384  	return p
 385  }
 386  
 387  // Equal returns whether m and other are equal. It exists for use by the cmp
 388  // library.
 389  func (m AddressMask) Equal(other AddressMask) bool {
 390  	return m == other
 391  }
 392  
 393  // Subnet is a subnet defined by its address and mask.
 394  //
 395  // +stateify savable
 396  type Subnet struct {
 397  	address Address
 398  	mask    AddressMask
 399  }
 400  
 401  // NewSubnet creates a new Subnet, checking that the address and mask are the same length.
 402  func NewSubnet(a Address, m AddressMask) (Subnet, error) {
 403  	if a.Len() != m.Len() {
 404  		return Subnet{}, errSubnetLengthMismatch
 405  	}
 406  	for i := 0; i < a.Len(); i++ {
 407  		if a.addr[i]&^m.mask[i] != 0 {
 408  			return Subnet{}, errSubnetAddressMasked
 409  		}
 410  	}
 411  	return Subnet{a, m}, nil
 412  }
 413  
 414  // String implements Stringer.
 415  func (s Subnet) String() string {
 416  	return fmt.Sprintf("%s/%d", s.ID(), s.Prefix())
 417  }
 418  
 419  // Contains returns true iff the address is of the same length and matches the
 420  // subnet address and mask.
 421  func (s *Subnet) Contains(a Address) bool {
 422  	if a.Len() != s.address.Len() {
 423  		return false
 424  	}
 425  	for i := 0; i < a.Len(); i++ {
 426  		if a.addr[i]&s.mask.mask[i] != s.address.addr[i] {
 427  			return false
 428  		}
 429  	}
 430  	return true
 431  }
 432  
 433  // ID returns the subnet ID.
 434  func (s *Subnet) ID() Address {
 435  	return s.address
 436  }
 437  
 438  // Bits returns the number of ones (network bits) and zeros (host bits) in the
 439  // subnet mask.
 440  func (s *Subnet) Bits() (ones int, zeros int) {
 441  	ones = s.mask.Prefix()
 442  	return ones, s.mask.BitLen() - ones
 443  }
 444  
 445  // Prefix returns the number of bits before the first host bit.
 446  func (s *Subnet) Prefix() int {
 447  	return s.mask.Prefix()
 448  }
 449  
 450  // Mask returns the subnet mask.
 451  func (s *Subnet) Mask() AddressMask {
 452  	return s.mask
 453  }
 454  
 455  // Broadcast returns the subnet's broadcast address.
 456  func (s *Subnet) Broadcast() Address {
 457  	addrCopy := s.address
 458  	for i := 0; i < addrCopy.Len(); i++ {
 459  		addrCopy.addr[i] |= ^s.mask.mask[i]
 460  	}
 461  	return addrCopy
 462  }
 463  
 464  // IsBroadcast returns true if the address is considered a broadcast address.
 465  func (s *Subnet) IsBroadcast(address Address) bool {
 466  	// Only IPv4 supports the notion of a broadcast address.
 467  	if address.Len() != ipv4AddressSize {
 468  		return false
 469  	}
 470  
 471  	// Normally, we would just compare address with the subnet's broadcast
 472  	// address but there is an exception where a simple comparison is not
 473  	// correct. This exception is for /31 and /32 IPv4 subnets where all
 474  	// addresses are considered valid host addresses.
 475  	//
 476  	// For /31 subnets, the case is easy. RFC 3021 Section 2.1 states that
 477  	// both addresses in a /31 subnet "MUST be interpreted as host addresses."
 478  	//
 479  	// For /32, the case is a bit more vague. RFC 3021 makes no mention of /32
 480  	// subnets. However, the same reasoning applies - if an exception is not
 481  	// made, then there do not exist any host addresses in a /32 subnet. RFC
 482  	// 4632 Section 3.1 also vaguely implies this interpretation by referring
 483  	// to addresses in /32 subnets as "host routes."
 484  	return s.Prefix() <= 30 && s.Broadcast() == address
 485  }
 486  
 487  // Equal returns true if this Subnet is equal to the given Subnet.
 488  func (s Subnet) Equal(o Subnet) bool {
 489  	// If this changes, update Route.Equal accordingly.
 490  	return s == o
 491  }
 492  
 493  // NICID is a number that uniquely identifies a NIC.
 494  type NICID int32
 495  
 496  // ShutdownFlags represents flags that can be passed to the Shutdown() method
 497  // of the Endpoint interface.
 498  type ShutdownFlags int
 499  
 500  // Values of the flags that can be passed to the Shutdown() method. They can
 501  // be OR'ed together.
 502  const (
 503  	ShutdownRead ShutdownFlags = 1 << iota
 504  	ShutdownWrite
 505  )
 506  
 507  // PacketType is used to indicate the destination of the packet.
 508  type PacketType uint8
 509  
 510  const (
 511  	// PacketHost indicates a packet addressed to the local host.
 512  	PacketHost PacketType = iota
 513  
 514  	// PacketOtherHost indicates an outgoing packet addressed to
 515  	// another host caught by a NIC in promiscuous mode.
 516  	PacketOtherHost
 517  
 518  	// PacketOutgoing for a packet originating from the local host
 519  	// that is looped back to a packet socket.
 520  	PacketOutgoing
 521  
 522  	// PacketBroadcast indicates a link layer broadcast packet.
 523  	PacketBroadcast
 524  
 525  	// PacketMulticast indicates a link layer multicast packet.
 526  	PacketMulticast
 527  )
 528  
 529  // FullAddress represents a full transport node address, as required by the
 530  // Connect() and Bind() methods.
 531  //
 532  // +stateify savable
 533  type FullAddress struct {
 534  	// NIC is the ID of the NIC this address refers to.
 535  	//
 536  	// This may not be used by all endpoint types.
 537  	NIC NICID
 538  
 539  	// Addr is the network address.
 540  	Addr Address
 541  
 542  	// Port is the transport port.
 543  	//
 544  	// This may not be used by all endpoint types.
 545  	Port uint16
 546  
 547  	// LinkAddr is the link layer address.
 548  	LinkAddr LinkAddress
 549  }
 550  
 551  // Payloader is an interface that provides data.
 552  //
 553  // This interface allows the endpoint to request the amount of data it needs
 554  // based on internal buffers without exposing them.
 555  type Payloader interface {
 556  	io.Reader
 557  
 558  	// Len returns the number of bytes of the unread portion of the
 559  	// Reader.
 560  	Len() int
 561  }
 562  
 563  var _ Payloader = (*bytes.Buffer)(nil)
 564  var _ Payloader = (*bytes.Reader)(nil)
 565  
 566  var _ io.Writer = (*SliceWriter)(nil)
 567  
 568  // SliceWriter implements io.Writer for slices.
 569  type SliceWriter []byte
 570  
 571  // Write implements io.Writer.Write.
 572  func (s *SliceWriter) Write(b []byte) (int, error) {
 573  	n := copy(*s, b)
 574  	*s = (*s)[n:]
 575  	var err error
 576  	if n != len(b) {
 577  		err = io.ErrShortWrite
 578  	}
 579  	return n, err
 580  }
 581  
 582  var _ io.Writer = (*LimitedWriter)(nil)
 583  
 584  // A LimitedWriter writes to W but limits the amount of data copied to just N
 585  // bytes. Each call to Write updates N to reflect the new amount remaining.
 586  type LimitedWriter struct {
 587  	W io.Writer
 588  	N int64
 589  }
 590  
 591  func (l *LimitedWriter) Write(p []byte) (int, error) {
 592  	pLen := int64(len(p))
 593  	if pLen > l.N {
 594  		p = p[:l.N]
 595  	}
 596  	n, err := l.W.Write(p)
 597  	n64 := int64(n)
 598  	if err == nil && n64 != pLen {
 599  		err = io.ErrShortWrite
 600  	}
 601  	l.N -= n64
 602  	return n, err
 603  }
 604  
 605  // SendableControlMessages contains socket control messages that can be written.
 606  //
 607  // +stateify savable
 608  type SendableControlMessages struct {
 609  	// HasTTL indicates whether TTL is valid/set.
 610  	HasTTL bool
 611  
 612  	// TTL is the IPv4 Time To Live of the associated packet.
 613  	TTL uint8
 614  
 615  	// HasHopLimit indicates whether HopLimit is valid/set.
 616  	HasHopLimit bool
 617  
 618  	// HopLimit is the IPv6 Hop Limit of the associated packet.
 619  	HopLimit uint8
 620  
 621  	// HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
 622  	HasIPv6PacketInfo bool
 623  
 624  	// IPv6PacketInfo holds interface and address data on an incoming packet.
 625  	IPv6PacketInfo IPv6PacketInfo
 626  }
 627  
 628  // ReceivableControlMessages contains socket control messages that can be
 629  // received.
 630  //
 631  // +stateify savable
 632  type ReceivableControlMessages struct {
 633  	// Timestamp is the time that the last packet used to create the read data
 634  	// was received.
 635  	Timestamp time.Time `state:".(int64)"`
 636  
 637  	// HasInq indicates whether Inq is valid/set.
 638  	HasInq bool
 639  
 640  	// Inq is the number of bytes ready to be received.
 641  	Inq int32
 642  
 643  	// HasTOS indicates whether TOS is valid/set.
 644  	HasTOS bool
 645  
 646  	// TOS is the IPv4 type of service of the associated packet.
 647  	TOS uint8
 648  
 649  	// HasTTL indicates whether TTL is valid/set.
 650  	HasTTL bool
 651  
 652  	// TTL is the IPv4 Time To Live of the associated packet.
 653  	TTL uint8
 654  
 655  	// HasHopLimit indicates whether HopLimit is valid/set.
 656  	HasHopLimit bool
 657  
 658  	// HopLimit is the IPv6 Hop Limit of the associated packet.
 659  	HopLimit uint8
 660  
 661  	// HasTimestamp indicates whether Timestamp is valid/set.
 662  	HasTimestamp bool
 663  
 664  	// HasTClass indicates whether TClass is valid/set.
 665  	HasTClass bool
 666  
 667  	// TClass is the IPv6 traffic class of the associated packet.
 668  	TClass uint32
 669  
 670  	// HasIPPacketInfo indicates whether PacketInfo is set.
 671  	HasIPPacketInfo bool
 672  
 673  	// PacketInfo holds interface and address data on an incoming packet.
 674  	PacketInfo IPPacketInfo
 675  
 676  	// HasIPv6PacketInfo indicates whether IPv6PacketInfo is set.
 677  	HasIPv6PacketInfo bool
 678  
 679  	// IPv6PacketInfo holds interface and address data on an incoming packet.
 680  	IPv6PacketInfo IPv6PacketInfo
 681  
 682  	// HasOriginalDestinationAddress indicates whether OriginalDstAddress is
 683  	// set.
 684  	HasOriginalDstAddress bool
 685  
 686  	// OriginalDestinationAddress holds the original destination address
 687  	// and port of the incoming packet.
 688  	OriginalDstAddress FullAddress
 689  
 690  	// SockErr is the dequeued socket error on recvmsg(MSG_ERRQUEUE).
 691  	SockErr *SockError
 692  }
 693  
 694  // PacketOwner is used to get UID and GID of the packet.
 695  type PacketOwner interface {
 696  	// KUID returns KUID of the packet.
 697  	KUID() uint32
 698  
 699  	// KGID returns KGID of the packet.
 700  	KGID() uint32
 701  }
 702  
 703  // ReadOptions contains options for Endpoint.Read.
 704  type ReadOptions struct {
 705  	// Peek indicates whether this read is a peek.
 706  	Peek bool
 707  
 708  	// NeedRemoteAddr indicates whether to return the remote address, if
 709  	// supported.
 710  	NeedRemoteAddr bool
 711  
 712  	// NeedLinkPacketInfo indicates whether to return the link-layer information,
 713  	// if supported.
 714  	NeedLinkPacketInfo bool
 715  }
 716  
 717  // ReadResult represents result for a successful Endpoint.Read.
 718  type ReadResult struct {
 719  	// Count is the number of bytes received and written to the buffer.
 720  	Count int
 721  
 722  	// Total is the number of bytes of the received packet. This can be used to
 723  	// determine whether the read is truncated.
 724  	Total int
 725  
 726  	// ControlMessages is the control messages received.
 727  	ControlMessages ReceivableControlMessages
 728  
 729  	// RemoteAddr is the remote address if ReadOptions.NeedAddr is true.
 730  	RemoteAddr FullAddress
 731  
 732  	// LinkPacketInfo is the link-layer information of the received packet if
 733  	// ReadOptions.NeedLinkPacketInfo is true.
 734  	LinkPacketInfo LinkPacketInfo
 735  }
 736  
 737  // Endpoint is the interface implemented by transport protocols (e.g., tcp, udp)
 738  // that exposes functionality like read, write, connect, etc. to users of the
 739  // networking stack.
 740  type Endpoint interface {
 741  	// Close puts the endpoint in a closed state and frees all resources
 742  	// associated with it. Close initiates the teardown process, the
 743  	// Endpoint may not be fully closed when Close returns.
 744  	Close()
 745  
 746  	// Abort initiates an expedited endpoint teardown. As compared to
 747  	// Close, Abort prioritizes closing the Endpoint quickly over cleanly.
 748  	// Abort is best effort; implementing Abort with Close is acceptable.
 749  	Abort()
 750  
 751  	// Read reads data from the endpoint and optionally writes to dst.
 752  	//
 753  	// This method does not block if there is no data pending; in this case,
 754  	// ErrWouldBlock is returned.
 755  	//
 756  	// If non-zero number of bytes are successfully read and written to dst, err
 757  	// must be nil. Otherwise, if dst failed to write anything, ErrBadBuffer
 758  	// should be returned.
 759  	Read(io.Writer, ReadOptions) (ReadResult, Error)
 760  
 761  	// Write writes data to the endpoint's peer. This method does not block if
 762  	// the data cannot be written.
 763  	//
 764  	// Unlike io.Writer.Write, Endpoint.Write transfers ownership of any bytes
 765  	// successfully written to the Endpoint. That is, if a call to
 766  	// Write(SlicePayload{data}) returns (n, err), it may retain data[:n], and
 767  	// the caller should not use data[:n] after Write returns.
 768  	//
 769  	// Note that unlike io.Writer.Write, it is not an error for Write to
 770  	// perform a partial write (if n > 0, no error may be returned). Only
 771  	// stream (TCP) Endpoints may return partial writes, and even then only
 772  	// in the case where writing additional data would block. Other Endpoints
 773  	// will either write the entire message or return an error.
 774  	Write(Payloader, WriteOptions) (int64, Error)
 775  
 776  	// Connect connects the endpoint to its peer. Specifying a NIC is
 777  	// optional.
 778  	//
 779  	// There are three classes of return values:
 780  	//	nil -- the attempt to connect succeeded.
 781  	//	ErrConnectStarted/ErrAlreadyConnecting -- the connect attempt started
 782  	//		but hasn't completed yet. In this case, the caller must call Connect
 783  	//		or GetSockOpt(ErrorOption) when the endpoint becomes writable to
 784  	//		get the actual result. The first call to Connect after the socket has
 785  	//		connected returns nil. Calling connect again results in ErrAlreadyConnected.
 786  	//	Anything else -- the attempt to connect failed.
 787  	//
 788  	// If address.Addr is empty, this means that Endpoint has to be
 789  	// disconnected if this is supported, otherwise
 790  	// ErrAddressFamilyNotSupported must be returned.
 791  	Connect(address FullAddress) Error
 792  
 793  	// Disconnect disconnects the endpoint from its peer.
 794  	Disconnect() Error
 795  
 796  	// Shutdown closes the read and/or write end of the endpoint connection
 797  	// to its peer.
 798  	Shutdown(flags ShutdownFlags) Error
 799  
 800  	// Listen puts the endpoint in "listen" mode, which allows it to accept
 801  	// new connections.
 802  	Listen(backlog int) Error
 803  
 804  	// Accept returns a new endpoint if a peer has established a connection
 805  	// to an endpoint previously set to listen mode. This method does not
 806  	// block if no new connections are available.
 807  	//
 808  	// The returned Queue is the wait queue for the newly created endpoint.
 809  	//
 810  	// If peerAddr is not nil then it is populated with the peer address of the
 811  	// returned endpoint.
 812  	Accept(peerAddr *FullAddress) (Endpoint, *waiter.Queue, Error)
 813  
 814  	// Bind binds the endpoint to a specific local address and port.
 815  	// Specifying a NIC is optional.
 816  	Bind(address FullAddress) Error
 817  
 818  	// GetLocalAddress returns the address to which the endpoint is bound.
 819  	GetLocalAddress() (FullAddress, Error)
 820  
 821  	// GetRemoteAddress returns the address to which the endpoint is
 822  	// connected.
 823  	GetRemoteAddress() (FullAddress, Error)
 824  
 825  	// Readiness returns the current readiness of the endpoint. For example,
 826  	// if waiter.EventIn is set, the endpoint is immediately readable.
 827  	Readiness(mask waiter.EventMask) waiter.EventMask
 828  
 829  	// SetSockOpt sets a socket option.
 830  	SetSockOpt(opt SettableSocketOption) Error
 831  
 832  	// SetSockOptInt sets a socket option, for simple cases where a value
 833  	// has the int type.
 834  	SetSockOptInt(opt SockOptInt, v int) Error
 835  
 836  	// GetSockOpt gets a socket option.
 837  	GetSockOpt(opt GettableSocketOption) Error
 838  
 839  	// GetSockOptInt gets a socket option for simple cases where a return
 840  	// value has the int type.
 841  	GetSockOptInt(SockOptInt) (int, Error)
 842  
 843  	// State returns a socket's lifecycle state. The returned value is
 844  	// protocol-specific and is primarily used for diagnostics.
 845  	State() uint32
 846  
 847  	// ModerateRecvBuf should be called everytime data is copied to the user
 848  	// space. This allows for dynamic tuning of recv buffer space for a
 849  	// given socket.
 850  	//
 851  	// NOTE: This method is a no-op for sockets other than TCP.
 852  	ModerateRecvBuf(copied int)
 853  
 854  	// Info returns a copy to the transport endpoint info.
 855  	Info() EndpointInfo
 856  
 857  	// Stats returns a reference to the endpoint stats.
 858  	Stats() EndpointStats
 859  
 860  	// SetOwner sets the task owner to the endpoint owner.
 861  	SetOwner(owner PacketOwner)
 862  
 863  	// LastError clears and returns the last error reported by the endpoint.
 864  	LastError() Error
 865  
 866  	// SocketOptions returns the structure which contains all the socket
 867  	// level options.
 868  	SocketOptions() *SocketOptions
 869  }
 870  
 871  // EndpointWithPreflight is the interface implemented by endpoints that need
 872  // to expose the `Preflight` method for preparing the endpoint prior to
 873  // calling `Write`.
 874  type EndpointWithPreflight interface {
 875  	// Prepares the endpoint for writes using the provided WriteOptions,
 876  	// returning an error if the options were incompatible with the endpoint's
 877  	// current state.
 878  	Preflight(WriteOptions) Error
 879  }
 880  
 881  // LinkPacketInfo holds Link layer information for a received packet.
 882  //
 883  // +stateify savable
 884  type LinkPacketInfo struct {
 885  	// Protocol is the NetworkProtocolNumber for the packet.
 886  	Protocol NetworkProtocolNumber
 887  
 888  	// PktType is used to indicate the destination of the packet.
 889  	PktType PacketType
 890  }
 891  
 892  // EndpointInfo is the interface implemented by each endpoint info struct.
 893  type EndpointInfo interface {
 894  	// IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
 895  	// marker interface.
 896  	IsEndpointInfo()
 897  }
 898  
 899  // EndpointStats is the interface implemented by each endpoint stats struct.
 900  type EndpointStats interface {
 901  	// IsEndpointStats is an empty method to implement the tcpip.EndpointStats
 902  	// marker interface.
 903  	IsEndpointStats()
 904  }
 905  
 906  // WriteOptions contains options for Endpoint.Write.
 907  type WriteOptions struct {
 908  	// If To is not nil, write to the given address instead of the endpoint's
 909  	// peer.
 910  	To *FullAddress
 911  
 912  	// More has the same semantics as Linux's MSG_MORE.
 913  	More bool
 914  
 915  	// EndOfRecord has the same semantics as Linux's MSG_EOR.
 916  	EndOfRecord bool
 917  
 918  	// Atomic means that all data fetched from Payloader must be written to the
 919  	// endpoint. If Atomic is false, then data fetched from the Payloader may be
 920  	// discarded if available endpoint buffer space is insufficient.
 921  	Atomic bool
 922  
 923  	// ControlMessages contains optional overrides used when writing a packet.
 924  	ControlMessages SendableControlMessages
 925  }
 926  
 927  // SockOptInt represents socket options which values have the int type.
 928  type SockOptInt int
 929  
 930  const (
 931  	// KeepaliveCountOption is used by SetSockOptInt/GetSockOptInt to
 932  	// specify the number of un-ACKed TCP keepalives that will be sent
 933  	// before the connection is closed.
 934  	KeepaliveCountOption SockOptInt = iota
 935  
 936  	// IPv4TOSOption is used by SetSockOptInt/GetSockOptInt to specify TOS
 937  	// for all subsequent outgoing IPv4 packets from the endpoint.
 938  	IPv4TOSOption
 939  
 940  	// IPv6TrafficClassOption is used by SetSockOptInt/GetSockOptInt to
 941  	// specify TOS for all subsequent outgoing IPv6 packets from the
 942  	// endpoint.
 943  	IPv6TrafficClassOption
 944  
 945  	// MaxSegOption is used by SetSockOptInt/GetSockOptInt to set/get the
 946  	// current Maximum Segment Size(MSS) value as specified using the
 947  	// TCP_MAXSEG option.
 948  	MaxSegOption
 949  
 950  	// MTUDiscoverOption is used to set/get the path MTU discovery setting.
 951  	//
 952  	// NOTE: Setting this option to any other value than PMTUDiscoveryDont
 953  	// is not supported and will fail as such, and getting this option will
 954  	// always return PMTUDiscoveryDont.
 955  	MTUDiscoverOption
 956  
 957  	// MulticastTTLOption is used by SetSockOptInt/GetSockOptInt to control
 958  	// the default TTL value for multicast messages. The default is 1.
 959  	MulticastTTLOption
 960  
 961  	// ReceiveQueueSizeOption is used in GetSockOptInt to specify that the
 962  	// number of unread bytes in the input buffer should be returned.
 963  	ReceiveQueueSizeOption
 964  
 965  	// SendQueueSizeOption is used in GetSockOptInt to specify that the
 966  	// number of unread bytes in the output buffer should be returned.
 967  	SendQueueSizeOption
 968  
 969  	// IPv4TTLOption is used by SetSockOptInt/GetSockOptInt to control the default
 970  	// TTL value for unicast messages.
 971  	//
 972  	// The default is configured by DefaultTTLOption. A UseDefaultIPv4TTL value
 973  	// configures the endpoint to use the default.
 974  	IPv4TTLOption
 975  
 976  	// IPv6HopLimitOption is used by SetSockOptInt/GetSockOptInt to control the
 977  	// default hop limit value for unicast messages.
 978  	//
 979  	// The default is configured by DefaultTTLOption. A UseDefaultIPv6HopLimit
 980  	// value configures the endpoint to use the default.
 981  	IPv6HopLimitOption
 982  
 983  	// TCPSynCountOption is used by SetSockOptInt/GetSockOptInt to specify
 984  	// the number of SYN retransmits that TCP should send before aborting
 985  	// the attempt to connect. It cannot exceed 255.
 986  	//
 987  	// NOTE: This option is currently only stubbed out and is no-op.
 988  	TCPSynCountOption
 989  
 990  	// TCPWindowClampOption is used by SetSockOptInt/GetSockOptInt to bound
 991  	// the size of the advertised window to this value.
 992  	//
 993  	// NOTE: This option is currently only stubed out and is a no-op
 994  	TCPWindowClampOption
 995  
 996  	// IPv6Checksum is used to request the stack to populate and validate the IPv6
 997  	// checksum for transport level headers.
 998  	IPv6Checksum
 999  
1000  	// PacketMMapVersionOption is used to set the packet mmap version.
1001  	PacketMMapVersionOption
1002  
1003  	// PacketMMapReserveOption is used to set the packet mmap reserved space
1004  	// between the aligned header and the payload.
1005  	PacketMMapReserveOption
1006  )
1007  
1008  const (
1009  	// UseDefaultIPv4TTL is the IPv4TTLOption value that configures an endpoint to
1010  	// use the default ttl currently configured by the IPv4 protocol (see
1011  	// DefaultTTLOption).
1012  	UseDefaultIPv4TTL = 0
1013  
1014  	// UseDefaultIPv6HopLimit is the IPv6HopLimitOption value that configures an
1015  	// endpoint to use the default hop limit currently configured by the IPv6
1016  	// protocol (see DefaultTTLOption).
1017  	UseDefaultIPv6HopLimit = -1
1018  )
1019  
1020  // PMTUDStrategy is the kind of PMTUD to perform.
1021  type PMTUDStrategy int
1022  
1023  const (
1024  	// PMTUDiscoveryWant is a setting of the MTUDiscoverOption to use
1025  	// per-route settings.
1026  	PMTUDiscoveryWant PMTUDStrategy = iota
1027  
1028  	// PMTUDiscoveryDont is a setting of the MTUDiscoverOption to disable
1029  	// path MTU discovery.
1030  	PMTUDiscoveryDont
1031  
1032  	// PMTUDiscoveryDo is a setting of the MTUDiscoverOption to always do
1033  	// path MTU discovery.
1034  	PMTUDiscoveryDo
1035  
1036  	// PMTUDiscoveryProbe is a setting of the MTUDiscoverOption to set DF
1037  	// but ignore path MTU.
1038  	PMTUDiscoveryProbe
1039  )
1040  
1041  // GettableNetworkProtocolOption is a marker interface for network protocol
1042  // options that may be queried.
1043  type GettableNetworkProtocolOption interface {
1044  	isGettableNetworkProtocolOption()
1045  }
1046  
1047  // SettableNetworkProtocolOption is a marker interface for network protocol
1048  // options that may be set.
1049  type SettableNetworkProtocolOption interface {
1050  	isSettableNetworkProtocolOption()
1051  }
1052  
1053  // DefaultTTLOption is used by stack.(*Stack).NetworkProtocolOption to specify
1054  // a default TTL.
1055  type DefaultTTLOption uint8
1056  
1057  func (*DefaultTTLOption) isGettableNetworkProtocolOption() {}
1058  
1059  func (*DefaultTTLOption) isSettableNetworkProtocolOption() {}
1060  
1061  // GettableTransportProtocolOption is a marker interface for transport protocol
1062  // options that may be queried.
1063  type GettableTransportProtocolOption interface {
1064  	isGettableTransportProtocolOption()
1065  }
1066  
1067  // SettableTransportProtocolOption is a marker interface for transport protocol
1068  // options that may be set.
1069  type SettableTransportProtocolOption interface {
1070  	isSettableTransportProtocolOption()
1071  }
1072  
1073  // TCPSACKEnabled the SACK option for TCP.
1074  //
1075  // See: https://tools.ietf.org/html/rfc2018.
1076  type TCPSACKEnabled bool
1077  
1078  func (*TCPSACKEnabled) isGettableTransportProtocolOption() {}
1079  
1080  func (*TCPSACKEnabled) isSettableTransportProtocolOption() {}
1081  
1082  // TCPRecovery is the loss deteoction algorithm used by TCP.
1083  type TCPRecovery int32
1084  
1085  func (*TCPRecovery) isGettableTransportProtocolOption() {}
1086  
1087  func (*TCPRecovery) isSettableTransportProtocolOption() {}
1088  
1089  // TCPAlwaysUseSynCookies indicates unconditional usage of syncookies.
1090  type TCPAlwaysUseSynCookies bool
1091  
1092  func (*TCPAlwaysUseSynCookies) isGettableTransportProtocolOption() {}
1093  
1094  func (*TCPAlwaysUseSynCookies) isSettableTransportProtocolOption() {}
1095  
1096  const (
1097  	// TCPRACKLossDetection indicates RACK is used for loss detection and
1098  	// recovery.
1099  	TCPRACKLossDetection TCPRecovery = 1 << iota
1100  
1101  	// TCPRACKStaticReoWnd indicates the reordering window should not be
1102  	// adjusted when DSACK is received.
1103  	TCPRACKStaticReoWnd
1104  
1105  	// TCPRACKNoDupTh indicates RACK should not consider the classic three
1106  	// duplicate acknowledgements rule to mark the segments as lost. This
1107  	// is used when reordering is not detected.
1108  	TCPRACKNoDupTh
1109  )
1110  
1111  // TCPDelayEnabled enables/disables Nagle's algorithm in TCP.
1112  type TCPDelayEnabled bool
1113  
1114  func (*TCPDelayEnabled) isGettableTransportProtocolOption() {}
1115  
1116  func (*TCPDelayEnabled) isSettableTransportProtocolOption() {}
1117  
1118  // TCPSendBufferSizeRangeOption is the send buffer size range for TCP.
1119  //
1120  // +stateify savable
1121  type TCPSendBufferSizeRangeOption struct {
1122  	Min     int
1123  	Default int
1124  	Max     int
1125  }
1126  
1127  func (*TCPSendBufferSizeRangeOption) isGettableTransportProtocolOption() {}
1128  
1129  func (*TCPSendBufferSizeRangeOption) isSettableTransportProtocolOption() {}
1130  
1131  // TCPReceiveBufferSizeRangeOption is the receive buffer size range for TCP.
1132  //
1133  // +stateify savable
1134  type TCPReceiveBufferSizeRangeOption struct {
1135  	Min     int
1136  	Default int
1137  	Max     int
1138  }
1139  
1140  func (*TCPReceiveBufferSizeRangeOption) isGettableTransportProtocolOption() {}
1141  
1142  func (*TCPReceiveBufferSizeRangeOption) isSettableTransportProtocolOption() {}
1143  
1144  // TCPAvailableCongestionControlOption is the supported congestion control
1145  // algorithms for TCP
1146  type TCPAvailableCongestionControlOption string
1147  
1148  func (*TCPAvailableCongestionControlOption) isGettableTransportProtocolOption() {}
1149  
1150  func (*TCPAvailableCongestionControlOption) isSettableTransportProtocolOption() {}
1151  
1152  // TCPModerateReceiveBufferOption enables/disables receive buffer moderation
1153  // for TCP.
1154  type TCPModerateReceiveBufferOption bool
1155  
1156  func (*TCPModerateReceiveBufferOption) isGettableTransportProtocolOption() {}
1157  
1158  func (*TCPModerateReceiveBufferOption) isSettableTransportProtocolOption() {}
1159  
1160  // GettableSocketOption is a marker interface for socket options that may be
1161  // queried.
1162  type GettableSocketOption interface {
1163  	isGettableSocketOption()
1164  }
1165  
1166  // SettableSocketOption is a marker interface for socket options that may be
1167  // configured.
1168  type SettableSocketOption interface {
1169  	isSettableSocketOption()
1170  }
1171  
1172  // ICMPv6Filter specifies a filter for ICMPv6 types.
1173  //
1174  // +stateify savable
1175  type ICMPv6Filter struct {
1176  	// DenyType indicates if an ICMP type should be blocked.
1177  	//
1178  	// The ICMPv6 type field is 8 bits so there are up to 256 different ICMPv6
1179  	// types.
1180  	DenyType [8]uint32
1181  }
1182  
1183  // ShouldDeny returns true iff the ICMPv6 Type should be denied.
1184  func (f *ICMPv6Filter) ShouldDeny(icmpType uint8) bool {
1185  	const bitsInUint32 = 32
1186  	i := icmpType / bitsInUint32
1187  	b := icmpType % bitsInUint32
1188  	return f.DenyType[i]&(1<<b) != 0
1189  }
1190  
1191  func (*ICMPv6Filter) isGettableSocketOption() {}
1192  
1193  func (*ICMPv6Filter) isSettableSocketOption() {}
1194  
1195  // TpacketReq is the tpacket_req structure as described in
1196  // https://www.kernel.org/doc/Documentation/networking/packet_mmap.txt
1197  //
1198  // +stateify savable
1199  type TpacketReq struct {
1200  	TpBlockSize uint32
1201  	TpBlockNr   uint32
1202  	TpFrameSize uint32
1203  	TpFrameNr   uint32
1204  }
1205  
1206  func (*TpacketReq) isSettableSocketOption() {}
1207  
1208  // TpacketStats is the statistics for a packet_mmap ring buffer from
1209  // <linux/if_packet.h>.
1210  //
1211  // +stateify savable
1212  type TpacketStats struct {
1213  	Packets uint32
1214  	Dropped uint32
1215  }
1216  
1217  func (*TpacketStats) isGettableSocketOption() {}
1218  
1219  // EndpointState represents the state of an endpoint.
1220  type EndpointState uint8
1221  
1222  // CongestionControlState indicates the current congestion control state for
1223  // TCP sender.
1224  type CongestionControlState int
1225  
1226  const (
1227  	// Open indicates that the sender is receiving acks in order and
1228  	// no loss or dupACK's etc have been detected.
1229  	Open CongestionControlState = iota
1230  	// RTORecovery indicates that an RTO has occurred and the sender
1231  	// has entered an RTO based recovery phase.
1232  	RTORecovery
1233  	// FastRecovery indicates that the sender has entered FastRecovery
1234  	// based on receiving nDupAck's. This state is entered only when
1235  	// SACK is not in use.
1236  	FastRecovery
1237  	// SACKRecovery indicates that the sender has entered SACK based
1238  	// recovery.
1239  	SACKRecovery
1240  	// Disorder indicates the sender either received some SACK blocks
1241  	// or dupACK's.
1242  	Disorder
1243  )
1244  
1245  // TCPInfoOption is used by GetSockOpt to expose TCP statistics.
1246  type TCPInfoOption struct {
1247  	// RTT is the smoothed round trip time.
1248  	RTT time.Duration
1249  
1250  	// RTTVar is the round trip time variation.
1251  	RTTVar time.Duration
1252  
1253  	// RTO is the retransmission timeout for the endpoint.
1254  	RTO time.Duration
1255  
1256  	// State is the current endpoint protocol state.
1257  	State EndpointState
1258  
1259  	// CcState is the congestion control state.
1260  	CcState CongestionControlState
1261  
1262  	// SndCwnd is the congestion window, in packets.
1263  	SndCwnd uint32
1264  
1265  	// SndSsthresh is the threshold between slow start and congestion
1266  	// avoidance.
1267  	SndSsthresh uint32
1268  
1269  	// ReorderSeen indicates if reordering is seen in the endpoint.
1270  	ReorderSeen bool
1271  }
1272  
1273  func (*TCPInfoOption) isGettableSocketOption() {}
1274  
1275  // KeepaliveIdleOption is used by SetSockOpt/GetSockOpt to specify the time a
1276  // connection must remain idle before the first TCP keepalive packet is sent.
1277  // Once this time is reached, KeepaliveIntervalOption is used instead.
1278  type KeepaliveIdleOption time.Duration
1279  
1280  func (*KeepaliveIdleOption) isGettableSocketOption() {}
1281  
1282  func (*KeepaliveIdleOption) isSettableSocketOption() {}
1283  
1284  // KeepaliveIntervalOption is used by SetSockOpt/GetSockOpt to specify the
1285  // interval between sending TCP keepalive packets.
1286  type KeepaliveIntervalOption time.Duration
1287  
1288  func (*KeepaliveIntervalOption) isGettableSocketOption() {}
1289  
1290  func (*KeepaliveIntervalOption) isSettableSocketOption() {}
1291  
1292  // TCPUserTimeoutOption is used by SetSockOpt/GetSockOpt to specify a user
1293  // specified timeout for a given TCP connection.
1294  // See: RFC5482 for details.
1295  type TCPUserTimeoutOption time.Duration
1296  
1297  func (*TCPUserTimeoutOption) isGettableSocketOption() {}
1298  
1299  func (*TCPUserTimeoutOption) isSettableSocketOption() {}
1300  
1301  // CongestionControlOption is used by SetSockOpt/GetSockOpt to set/get
1302  // the current congestion control algorithm.
1303  type CongestionControlOption string
1304  
1305  func (*CongestionControlOption) isGettableSocketOption() {}
1306  
1307  func (*CongestionControlOption) isSettableSocketOption() {}
1308  
1309  func (*CongestionControlOption) isGettableTransportProtocolOption() {}
1310  
1311  func (*CongestionControlOption) isSettableTransportProtocolOption() {}
1312  
1313  // TCPLingerTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
1314  // maximum duration for which a socket lingers in the TCP_FIN_WAIT_2 state
1315  // before being marked closed.
1316  type TCPLingerTimeoutOption time.Duration
1317  
1318  func (*TCPLingerTimeoutOption) isGettableSocketOption() {}
1319  
1320  func (*TCPLingerTimeoutOption) isSettableSocketOption() {}
1321  
1322  func (*TCPLingerTimeoutOption) isGettableTransportProtocolOption() {}
1323  
1324  func (*TCPLingerTimeoutOption) isSettableTransportProtocolOption() {}
1325  
1326  // TCPTimeWaitTimeoutOption is used by SetSockOpt/GetSockOpt to set/get the
1327  // maximum duration for which a socket lingers in the TIME_WAIT state
1328  // before being marked closed.
1329  type TCPTimeWaitTimeoutOption time.Duration
1330  
1331  func (*TCPTimeWaitTimeoutOption) isGettableSocketOption() {}
1332  
1333  func (*TCPTimeWaitTimeoutOption) isSettableSocketOption() {}
1334  
1335  func (*TCPTimeWaitTimeoutOption) isGettableTransportProtocolOption() {}
1336  
1337  func (*TCPTimeWaitTimeoutOption) isSettableTransportProtocolOption() {}
1338  
1339  // TCPDeferAcceptOption is used by SetSockOpt/GetSockOpt to allow a
1340  // accept to return a completed connection only when there is data to be
1341  // read. This usually means the listening socket will drop the final ACK
1342  // for a handshake till the specified timeout until a segment with data arrives.
1343  type TCPDeferAcceptOption time.Duration
1344  
1345  func (*TCPDeferAcceptOption) isGettableSocketOption() {}
1346  
1347  func (*TCPDeferAcceptOption) isSettableSocketOption() {}
1348  
1349  // TCPMinRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
1350  // default MinRTO used by the Stack.
1351  type TCPMinRTOOption time.Duration
1352  
1353  func (*TCPMinRTOOption) isGettableSocketOption() {}
1354  
1355  func (*TCPMinRTOOption) isSettableSocketOption() {}
1356  
1357  func (*TCPMinRTOOption) isGettableTransportProtocolOption() {}
1358  
1359  func (*TCPMinRTOOption) isSettableTransportProtocolOption() {}
1360  
1361  // TCPMaxRTOOption is use by SetSockOpt/GetSockOpt to allow overriding
1362  // default MaxRTO used by the Stack.
1363  type TCPMaxRTOOption time.Duration
1364  
1365  func (*TCPMaxRTOOption) isGettableSocketOption() {}
1366  
1367  func (*TCPMaxRTOOption) isSettableSocketOption() {}
1368  
1369  func (*TCPMaxRTOOption) isGettableTransportProtocolOption() {}
1370  
1371  func (*TCPMaxRTOOption) isSettableTransportProtocolOption() {}
1372  
1373  // TCPMaxRetriesOption is used by SetSockOpt/GetSockOpt to set/get the
1374  // maximum number of retransmits after which we time out the connection.
1375  type TCPMaxRetriesOption uint64
1376  
1377  func (*TCPMaxRetriesOption) isGettableSocketOption() {}
1378  
1379  func (*TCPMaxRetriesOption) isSettableSocketOption() {}
1380  
1381  func (*TCPMaxRetriesOption) isGettableTransportProtocolOption() {}
1382  
1383  func (*TCPMaxRetriesOption) isSettableTransportProtocolOption() {}
1384  
1385  // TCPSynRetriesOption is used by SetSockOpt/GetSockOpt to specify stack-wide
1386  // default for number of times SYN is retransmitted before aborting a connect.
1387  type TCPSynRetriesOption uint8
1388  
1389  func (*TCPSynRetriesOption) isGettableSocketOption() {}
1390  
1391  func (*TCPSynRetriesOption) isSettableSocketOption() {}
1392  
1393  func (*TCPSynRetriesOption) isGettableTransportProtocolOption() {}
1394  
1395  func (*TCPSynRetriesOption) isSettableTransportProtocolOption() {}
1396  
1397  // MulticastInterfaceOption is used by SetSockOpt/GetSockOpt to specify a
1398  // default interface for multicast.
1399  type MulticastInterfaceOption struct {
1400  	NIC           NICID
1401  	InterfaceAddr Address
1402  }
1403  
1404  func (*MulticastInterfaceOption) isGettableSocketOption() {}
1405  
1406  func (*MulticastInterfaceOption) isSettableSocketOption() {}
1407  
1408  // MembershipOption is used to identify a multicast membership on an interface.
1409  type MembershipOption struct {
1410  	NIC           NICID
1411  	InterfaceAddr Address
1412  	MulticastAddr Address
1413  }
1414  
1415  // AddMembershipOption identifies a multicast group to join on some interface.
1416  type AddMembershipOption MembershipOption
1417  
1418  func (*AddMembershipOption) isSettableSocketOption() {}
1419  
1420  // RemoveMembershipOption identifies a multicast group to leave on some
1421  // interface.
1422  type RemoveMembershipOption MembershipOption
1423  
1424  func (*RemoveMembershipOption) isSettableSocketOption() {}
1425  
1426  // SocketDetachFilterOption is used by SetSockOpt to detach a previously attached
1427  // classic BPF filter on a given endpoint.
1428  type SocketDetachFilterOption int
1429  
1430  func (*SocketDetachFilterOption) isSettableSocketOption() {}
1431  
1432  // OriginalDestinationOption is used to get the original destination address
1433  // and port of a redirected packet.
1434  type OriginalDestinationOption FullAddress
1435  
1436  func (*OriginalDestinationOption) isGettableSocketOption() {}
1437  
1438  // TCPTimeWaitReuseOption is used stack.(*Stack).TransportProtocolOption to
1439  // specify if the stack can reuse the port bound by an endpoint in TIME-WAIT for
1440  // new connections when it is safe from protocol viewpoint.
1441  type TCPTimeWaitReuseOption uint8
1442  
1443  func (*TCPTimeWaitReuseOption) isGettableSocketOption() {}
1444  
1445  func (*TCPTimeWaitReuseOption) isSettableSocketOption() {}
1446  
1447  func (*TCPTimeWaitReuseOption) isGettableTransportProtocolOption() {}
1448  
1449  func (*TCPTimeWaitReuseOption) isSettableTransportProtocolOption() {}
1450  
1451  const (
1452  	// TCPTimeWaitReuseDisabled indicates reuse of port bound by endpoints in TIME-WAIT cannot
1453  	// be reused for new connections.
1454  	TCPTimeWaitReuseDisabled TCPTimeWaitReuseOption = iota
1455  
1456  	// TCPTimeWaitReuseGlobal indicates reuse of port bound by endpoints in TIME-WAIT can
1457  	// be reused for new connections irrespective of the src/dest addresses.
1458  	TCPTimeWaitReuseGlobal
1459  
1460  	// TCPTimeWaitReuseLoopbackOnly indicates reuse of port bound by endpoint in TIME-WAIT can
1461  	// only be reused if the connection was a connection over loopback. i.e. src/dest addresses
1462  	// are loopback addresses.
1463  	TCPTimeWaitReuseLoopbackOnly
1464  )
1465  
1466  // LingerOption is used by SetSockOpt/GetSockOpt to set/get the
1467  // duration for which a socket lingers before returning from Close.
1468  //
1469  // +marshal
1470  // +stateify savable
1471  type LingerOption struct {
1472  	Enabled bool
1473  	Timeout time.Duration
1474  }
1475  
1476  // IPPacketInfo is the message structure for IP_PKTINFO.
1477  //
1478  // +stateify savable
1479  type IPPacketInfo struct {
1480  	// NIC is the ID of the NIC to be used.
1481  	NIC NICID
1482  
1483  	// LocalAddr is the local address.
1484  	LocalAddr Address
1485  
1486  	// DestinationAddr is the destination address found in the IP header.
1487  	DestinationAddr Address
1488  }
1489  
1490  // IPv6PacketInfo is the message structure for IPV6_PKTINFO.
1491  //
1492  // +stateify savable
1493  type IPv6PacketInfo struct {
1494  	Addr Address
1495  	NIC  NICID
1496  }
1497  
1498  // SendBufferSizeOption is used by stack.(Stack*).Option/SetOption to
1499  // get/set the default, min and max send buffer sizes.
1500  //
1501  // +stateify savable
1502  type SendBufferSizeOption struct {
1503  	// Min is the minimum size for send buffer.
1504  	Min int
1505  
1506  	// Default is the default size for send buffer.
1507  	Default int
1508  
1509  	// Max is the maximum size for send buffer.
1510  	Max int
1511  }
1512  
1513  // ReceiveBufferSizeOption is used by stack.(Stack*).Option/SetOption to
1514  // get/set the default, min and max receive buffer sizes.
1515  //
1516  // +stateify savable
1517  type ReceiveBufferSizeOption struct {
1518  	// Min is the minimum size for send buffer.
1519  	Min int
1520  
1521  	// Default is the default size for send buffer.
1522  	Default int
1523  
1524  	// Max is the maximum size for send buffer.
1525  	Max int
1526  }
1527  
1528  // GetSendBufferLimits is used to get the send buffer size limits.
1529  type GetSendBufferLimits func(StackHandler) SendBufferSizeOption
1530  
1531  // GetStackSendBufferLimits is used to get default, min and max send buffer size.
1532  func GetStackSendBufferLimits(so StackHandler) SendBufferSizeOption {
1533  	var ss SendBufferSizeOption
1534  	if err := so.Option(&ss); err != nil {
1535  		panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
1536  	}
1537  	return ss
1538  }
1539  
1540  // GetReceiveBufferLimits is used to get the send buffer size limits.
1541  type GetReceiveBufferLimits func(StackHandler) ReceiveBufferSizeOption
1542  
1543  // GetStackReceiveBufferLimits is used to get default, min and max send buffer size.
1544  func GetStackReceiveBufferLimits(so StackHandler) ReceiveBufferSizeOption {
1545  	var ss ReceiveBufferSizeOption
1546  	if err := so.Option(&ss); err != nil {
1547  		panic(fmt.Sprintf("s.Option(%#v) = %s", ss, err))
1548  	}
1549  	return ss
1550  }
1551  
1552  // Route is a row in the routing table. It specifies through which NIC (and
1553  // gateway) sets of packets should be routed. A row is considered viable if the
1554  // masked target address matches the destination address in the row.
1555  //
1556  // +stateify savable
1557  type Route struct {
1558  	RouteEntry
1559  
1560  	// Destination must contain the target address for this row to be viable.
1561  	Destination Subnet
1562  
1563  	// Gateway is the gateway to be used if this row is viable.
1564  	Gateway Address
1565  
1566  	// NIC is the id of the nic to be used if this row is viable.
1567  	NIC NICID
1568  
1569  	// SourceHint indicates a preferred source address to use when NICs
1570  	// have multiple addresses.
1571  	SourceHint Address
1572  
1573  	// MTU is the maximum transmission unit to use for this route.
1574  	// If MTU is 0, this field is ignored and the MTU of the NIC for which this route
1575  	// is configured is used for egress packets.
1576  	MTU uint32
1577  }
1578  
1579  // String implements the fmt.Stringer interface.
1580  func (r Route) String() string {
1581  	var out strings.Builder
1582  	_, _ = fmt.Fprintf(&out, "%s", r.Destination)
1583  	if r.Gateway.length > 0 {
1584  		_, _ = fmt.Fprintf(&out, " via %s", r.Gateway)
1585  	}
1586  	_, _ = fmt.Fprintf(&out, " nic %d", r.NIC)
1587  	return out.String()
1588  }
1589  
1590  // Equal returns true if the given Route is equal to this Route.
1591  func (r Route) Equal(to Route) bool {
1592  	// NOTE: This relies on the fact that r.Destination == to.Destination
1593  	return r.Destination.Equal(to.Destination) && r.NIC == to.NIC
1594  }
1595  
1596  // TransportProtocolNumber is the number of a transport protocol.
1597  type TransportProtocolNumber uint32
1598  
1599  // NetworkProtocolNumber is the EtherType of a network protocol in an Ethernet
1600  // frame.
1601  //
1602  // See: https://www.iana.org/assignments/ieee-802-numbers/ieee-802-numbers.xhtml
1603  type NetworkProtocolNumber uint32
1604  
1605  // A StatCounter keeps track of a statistic.
1606  //
1607  // +stateify savable
1608  type StatCounter struct {
1609  	count atomicbitops.Uint64
1610  }
1611  
1612  // Increment adds one to the counter.
1613  func (s *StatCounter) Increment() {
1614  	s.IncrementBy(1)
1615  }
1616  
1617  // Decrement minuses one to the counter.
1618  func (s *StatCounter) Decrement() {
1619  	s.IncrementBy(^uint64(0))
1620  }
1621  
1622  // Value returns the current value of the counter.
1623  func (s *StatCounter) Value() uint64 {
1624  	return s.count.Load()
1625  }
1626  
1627  // IncrementBy increments the counter by v.
1628  func (s *StatCounter) IncrementBy(v uint64) {
1629  	s.count.Add(v)
1630  }
1631  
1632  func (s *StatCounter) String() string {
1633  	return strconv.FormatUint(s.Value(), 10)
1634  }
1635  
1636  // A MultiCounterStat keeps track of two counters at once.
1637  //
1638  // +stateify savable
1639  type MultiCounterStat struct {
1640  	a *StatCounter
1641  	b *StatCounter
1642  }
1643  
1644  // Init sets both internal counters to point to a and b.
1645  func (m *MultiCounterStat) Init(a, b *StatCounter) {
1646  	m.a = a
1647  	m.b = b
1648  }
1649  
1650  // Increment adds one to the counters.
1651  func (m *MultiCounterStat) Increment() {
1652  	m.a.Increment()
1653  	m.b.Increment()
1654  }
1655  
1656  // IncrementBy increments the counters by v.
1657  func (m *MultiCounterStat) IncrementBy(v uint64) {
1658  	m.a.IncrementBy(v)
1659  	m.b.IncrementBy(v)
1660  }
1661  
1662  // ICMPv4PacketStats enumerates counts for all ICMPv4 packet types.
1663  //
1664  // +stateify savable
1665  type ICMPv4PacketStats struct {
1666  	// LINT.IfChange(ICMPv4PacketStats)
1667  
1668  	// EchoRequest is the number of ICMPv4 echo packets counted.
1669  	EchoRequest *StatCounter
1670  
1671  	// EchoReply is the number of ICMPv4 echo reply packets counted.
1672  	EchoReply *StatCounter
1673  
1674  	// DstUnreachable is the number of ICMPv4 destination unreachable packets
1675  	// counted.
1676  	DstUnreachable *StatCounter
1677  
1678  	// SrcQuench is the number of ICMPv4 source quench packets counted.
1679  	SrcQuench *StatCounter
1680  
1681  	// Redirect is the number of ICMPv4 redirect packets counted.
1682  	Redirect *StatCounter
1683  
1684  	// TimeExceeded is the number of ICMPv4 time exceeded packets counted.
1685  	TimeExceeded *StatCounter
1686  
1687  	// ParamProblem is the number of ICMPv4 parameter problem packets counted.
1688  	ParamProblem *StatCounter
1689  
1690  	// Timestamp is the number of ICMPv4 timestamp packets counted.
1691  	Timestamp *StatCounter
1692  
1693  	// TimestampReply is the number of ICMPv4 timestamp reply packets counted.
1694  	TimestampReply *StatCounter
1695  
1696  	// InfoRequest is the number of ICMPv4 information request packets counted.
1697  	InfoRequest *StatCounter
1698  
1699  	// InfoReply is the number of ICMPv4 information reply packets counted.
1700  	InfoReply *StatCounter
1701  
1702  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4PacketStats)
1703  }
1704  
1705  // ICMPv4SentPacketStats collects outbound ICMPv4-specific stats.
1706  //
1707  // +stateify savable
1708  type ICMPv4SentPacketStats struct {
1709  	// LINT.IfChange(ICMPv4SentPacketStats)
1710  
1711  	ICMPv4PacketStats
1712  
1713  	// Dropped is the number of ICMPv4 packets dropped due to link layer errors.
1714  	Dropped *StatCounter
1715  
1716  	// RateLimited is the number of ICMPv4 packets dropped due to rate limit being
1717  	// exceeded.
1718  	RateLimited *StatCounter
1719  
1720  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4SentPacketStats)
1721  }
1722  
1723  // ICMPv4ReceivedPacketStats collects inbound ICMPv4-specific stats.
1724  //
1725  // +stateify savable
1726  type ICMPv4ReceivedPacketStats struct {
1727  	// LINT.IfChange(ICMPv4ReceivedPacketStats)
1728  
1729  	ICMPv4PacketStats
1730  
1731  	// Invalid is the number of invalid ICMPv4 packets received.
1732  	Invalid *StatCounter
1733  
1734  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4ReceivedPacketStats)
1735  }
1736  
1737  // ICMPv4Stats collects ICMPv4-specific stats.
1738  //
1739  // +stateify savable
1740  type ICMPv4Stats struct {
1741  	// LINT.IfChange(ICMPv4Stats)
1742  
1743  	// PacketsSent contains statistics about sent packets.
1744  	PacketsSent ICMPv4SentPacketStats
1745  
1746  	// PacketsReceived contains statistics about received packets.
1747  	PacketsReceived ICMPv4ReceivedPacketStats
1748  
1749  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterICMPv4Stats)
1750  }
1751  
1752  // ICMPv6PacketStats enumerates counts for all ICMPv6 packet types.
1753  //
1754  // +stateify savable
1755  type ICMPv6PacketStats struct {
1756  	// LINT.IfChange(ICMPv6PacketStats)
1757  
1758  	// EchoRequest is the number of ICMPv6 echo request packets counted.
1759  	EchoRequest *StatCounter
1760  
1761  	// EchoReply is the number of ICMPv6 echo reply packets counted.
1762  	EchoReply *StatCounter
1763  
1764  	// DstUnreachable is the number of ICMPv6 destination unreachable packets
1765  	// counted.
1766  	DstUnreachable *StatCounter
1767  
1768  	// PacketTooBig is the number of ICMPv6 packet too big packets counted.
1769  	PacketTooBig *StatCounter
1770  
1771  	// TimeExceeded is the number of ICMPv6 time exceeded packets counted.
1772  	TimeExceeded *StatCounter
1773  
1774  	// ParamProblem is the number of ICMPv6 parameter problem packets counted.
1775  	ParamProblem *StatCounter
1776  
1777  	// RouterSolicit is the number of ICMPv6 router solicit packets counted.
1778  	RouterSolicit *StatCounter
1779  
1780  	// RouterAdvert is the number of ICMPv6 router advert packets counted.
1781  	RouterAdvert *StatCounter
1782  
1783  	// NeighborSolicit is the number of ICMPv6 neighbor solicit packets counted.
1784  	NeighborSolicit *StatCounter
1785  
1786  	// NeighborAdvert is the number of ICMPv6 neighbor advert packets counted.
1787  	NeighborAdvert *StatCounter
1788  
1789  	// RedirectMsg is the number of ICMPv6 redirect message packets counted.
1790  	RedirectMsg *StatCounter
1791  
1792  	// MulticastListenerQuery is the number of Multicast Listener Query messages
1793  	// counted.
1794  	MulticastListenerQuery *StatCounter
1795  
1796  	// MulticastListenerReport is the number of Multicast Listener Report messages
1797  	// counted.
1798  	MulticastListenerReport *StatCounter
1799  
1800  	// MulticastListenerReportV2 is the number of Multicast Listener Report
1801  	// messages counted.
1802  	MulticastListenerReportV2 *StatCounter
1803  
1804  	// MulticastListenerDone is the number of Multicast Listener Done messages
1805  	// counted.
1806  	MulticastListenerDone *StatCounter
1807  
1808  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6PacketStats)
1809  }
1810  
1811  // ICMPv6SentPacketStats collects outbound ICMPv6-specific stats.
1812  //
1813  // +stateify savable
1814  type ICMPv6SentPacketStats struct {
1815  	// LINT.IfChange(ICMPv6SentPacketStats)
1816  
1817  	ICMPv6PacketStats
1818  
1819  	// Dropped is the number of ICMPv6 packets dropped due to link layer errors.
1820  	Dropped *StatCounter
1821  
1822  	// RateLimited is the number of ICMPv6 packets dropped due to rate limit being
1823  	// exceeded.
1824  	RateLimited *StatCounter
1825  
1826  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6SentPacketStats)
1827  }
1828  
1829  // ICMPv6ReceivedPacketStats collects inbound ICMPv6-specific stats.
1830  //
1831  // +stateify savable
1832  type ICMPv6ReceivedPacketStats struct {
1833  	// LINT.IfChange(ICMPv6ReceivedPacketStats)
1834  
1835  	ICMPv6PacketStats
1836  
1837  	// Unrecognized is the number of ICMPv6 packets received that the transport
1838  	// layer does not know how to parse.
1839  	Unrecognized *StatCounter
1840  
1841  	// Invalid is the number of invalid ICMPv6 packets received.
1842  	Invalid *StatCounter
1843  
1844  	// RouterOnlyPacketsDroppedByHost is the number of ICMPv6 packets dropped due
1845  	// to being router-specific packets.
1846  	RouterOnlyPacketsDroppedByHost *StatCounter
1847  
1848  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6ReceivedPacketStats)
1849  }
1850  
1851  // ICMPv6Stats collects ICMPv6-specific stats.
1852  //
1853  // +stateify savable
1854  type ICMPv6Stats struct {
1855  	// LINT.IfChange(ICMPv6Stats)
1856  
1857  	// PacketsSent contains statistics about sent packets.
1858  	PacketsSent ICMPv6SentPacketStats
1859  
1860  	// PacketsReceived contains statistics about received packets.
1861  	PacketsReceived ICMPv6ReceivedPacketStats
1862  
1863  	// LINT.ThenChange(network/ipv6/stats.go:multiCounterICMPv6Stats)
1864  }
1865  
1866  // ICMPStats collects ICMP-specific stats (both v4 and v6).
1867  //
1868  // +stateify savable
1869  type ICMPStats struct {
1870  	// V4 contains the ICMPv4-specifics stats.
1871  	V4 ICMPv4Stats
1872  
1873  	// V6 contains the ICMPv4-specifics stats.
1874  	V6 ICMPv6Stats
1875  }
1876  
1877  // IGMPPacketStats enumerates counts for all IGMP packet types.
1878  //
1879  // +stateify savable
1880  type IGMPPacketStats struct {
1881  	// LINT.IfChange(IGMPPacketStats)
1882  
1883  	// MembershipQuery is the number of Membership Query messages counted.
1884  	MembershipQuery *StatCounter
1885  
1886  	// V1MembershipReport is the number of Version 1 Membership Report messages
1887  	// counted.
1888  	V1MembershipReport *StatCounter
1889  
1890  	// V2MembershipReport is the number of Version 2 Membership Report messages
1891  	// counted.
1892  	V2MembershipReport *StatCounter
1893  
1894  	// V3MembershipReport is the number of Version 3 Membership Report messages
1895  	// counted.
1896  	V3MembershipReport *StatCounter
1897  
1898  	// LeaveGroup is the number of Leave Group messages counted.
1899  	LeaveGroup *StatCounter
1900  
1901  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPPacketStats)
1902  }
1903  
1904  // IGMPSentPacketStats collects outbound IGMP-specific stats.
1905  //
1906  // +stateify savable
1907  type IGMPSentPacketStats struct {
1908  	// LINT.IfChange(IGMPSentPacketStats)
1909  
1910  	IGMPPacketStats
1911  
1912  	// Dropped is the number of IGMP packets dropped.
1913  	Dropped *StatCounter
1914  
1915  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPSentPacketStats)
1916  }
1917  
1918  // IGMPReceivedPacketStats collects inbound IGMP-specific stats.
1919  //
1920  // +stateify savable
1921  type IGMPReceivedPacketStats struct {
1922  	// LINT.IfChange(IGMPReceivedPacketStats)
1923  
1924  	IGMPPacketStats
1925  
1926  	// Invalid is the number of invalid IGMP packets received.
1927  	Invalid *StatCounter
1928  
1929  	// ChecksumErrors is the number of IGMP packets dropped due to bad checksums.
1930  	ChecksumErrors *StatCounter
1931  
1932  	// Unrecognized is the number of unrecognized messages counted, these are
1933  	// silently ignored for forward-compatibility.
1934  	Unrecognized *StatCounter
1935  
1936  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPReceivedPacketStats)
1937  }
1938  
1939  // IGMPStats collects IGMP-specific stats.
1940  //
1941  // +stateify savable
1942  type IGMPStats struct {
1943  	// LINT.IfChange(IGMPStats)
1944  
1945  	// PacketsSent contains statistics about sent packets.
1946  	PacketsSent IGMPSentPacketStats
1947  
1948  	// PacketsReceived contains statistics about received packets.
1949  	PacketsReceived IGMPReceivedPacketStats
1950  
1951  	// LINT.ThenChange(network/ipv4/stats.go:multiCounterIGMPStats)
1952  }
1953  
1954  // IPForwardingStats collects stats related to IP forwarding (both v4 and v6).
1955  //
1956  // +stateify savable
1957  type IPForwardingStats struct {
1958  	// LINT.IfChange(IPForwardingStats)
1959  
1960  	// Unrouteable is the number of IP packets received which were dropped
1961  	// because a route to their destination could not be constructed.
1962  	Unrouteable *StatCounter
1963  
1964  	// ExhaustedTTL is the number of IP packets received which were dropped
1965  	// because their TTL was exhausted.
1966  	ExhaustedTTL *StatCounter
1967  
1968  	// InitializingSource is the number of IP packets which were dropped
1969  	// because they contained a source address that may only be used on the local
1970  	// network as part of initialization work.
1971  	InitializingSource *StatCounter
1972  
1973  	// LinkLocalSource is the number of IP packets which were dropped
1974  	// because they contained a link-local source address.
1975  	LinkLocalSource *StatCounter
1976  
1977  	// LinkLocalDestination is the number of IP packets which were dropped
1978  	// because they contained a link-local destination address.
1979  	LinkLocalDestination *StatCounter
1980  
1981  	// PacketTooBig is the number of IP packets which were dropped because they
1982  	// were too big for the outgoing MTU.
1983  	PacketTooBig *StatCounter
1984  
1985  	// HostUnreachable is the number of IP packets received which could not be
1986  	// successfully forwarded due to an unresolvable next hop.
1987  	HostUnreachable *StatCounter
1988  
1989  	// ExtensionHeaderProblem is the number of IP packets which were dropped
1990  	// because of a problem encountered when processing an IPv6 extension
1991  	// header.
1992  	ExtensionHeaderProblem *StatCounter
1993  
1994  	// UnexpectedMulticastInputInterface is the number of multicast packets that
1995  	// were received on an interface that did not match the corresponding route's
1996  	// expected input interface.
1997  	UnexpectedMulticastInputInterface *StatCounter
1998  
1999  	// UnknownOutputEndpoint is the number of packets that could not be forwarded
2000  	// because the output endpoint could not be found.
2001  	UnknownOutputEndpoint *StatCounter
2002  
2003  	// NoMulticastPendingQueueBufferSpace is the number of multicast packets that
2004  	// were dropped due to insufficient buffer space in the pending packet queue.
2005  	NoMulticastPendingQueueBufferSpace *StatCounter
2006  
2007  	// OutgoingDeviceNoBufferSpace is the number of packets that were dropped due
2008  	// to insufficient space in the outgoing device.
2009  	OutgoingDeviceNoBufferSpace *StatCounter
2010  
2011  	// Errors is the number of IP packets received which could not be
2012  	// successfully forwarded.
2013  	Errors *StatCounter
2014  
2015  	// OutgoingDeviceClosedForSend is the number of packets that were dropped due
2016  	// to the outgoing device being closed for send.
2017  	OutgoingDeviceClosedForSend *StatCounter
2018  
2019  	// LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPForwardingStats)
2020  }
2021  
2022  // IPStats collects IP-specific stats (both v4 and v6).
2023  //
2024  // +stateify savable
2025  type IPStats struct {
2026  	// LINT.IfChange(IPStats)
2027  
2028  	// PacketsReceived is the number of IP packets received from the link layer.
2029  	PacketsReceived *StatCounter
2030  
2031  	// ValidPacketsReceived is the number of valid IP packets that reached the IP
2032  	// layer.
2033  	ValidPacketsReceived *StatCounter
2034  
2035  	// DisabledPacketsReceived is the number of IP packets received from the link
2036  	// layer when the IP layer is disabled.
2037  	DisabledPacketsReceived *StatCounter
2038  
2039  	// InvalidDestinationAddressesReceived is the number of IP packets received
2040  	// with an unknown or invalid destination address.
2041  	InvalidDestinationAddressesReceived *StatCounter
2042  
2043  	// InvalidSourceAddressesReceived is the number of IP packets received with a
2044  	// source address that should never have been received on the wire.
2045  	InvalidSourceAddressesReceived *StatCounter
2046  
2047  	// PacketsDelivered is the number of incoming IP packets that are successfully
2048  	// delivered to the transport layer.
2049  	PacketsDelivered *StatCounter
2050  
2051  	// PacketsSent is the number of IP packets sent via WritePacket.
2052  	PacketsSent *StatCounter
2053  
2054  	// OutgoingPacketErrors is the number of IP packets which failed to write to a
2055  	// link-layer endpoint.
2056  	OutgoingPacketErrors *StatCounter
2057  
2058  	// MalformedPacketsReceived is the number of IP Packets that were dropped due
2059  	// to the IP packet header failing validation checks.
2060  	MalformedPacketsReceived *StatCounter
2061  
2062  	// MalformedFragmentsReceived is the number of IP Fragments that were dropped
2063  	// due to the fragment failing validation checks.
2064  	MalformedFragmentsReceived *StatCounter
2065  
2066  	// IPTablesPreroutingDropped is the number of IP packets dropped in the
2067  	// Prerouting chain.
2068  	IPTablesPreroutingDropped *StatCounter
2069  
2070  	// IPTablesInputDropped is the number of IP packets dropped in the Input
2071  	// chain.
2072  	IPTablesInputDropped *StatCounter
2073  
2074  	// IPTablesForwardDropped is the number of IP packets dropped in the Forward
2075  	// chain.
2076  	IPTablesForwardDropped *StatCounter
2077  
2078  	// IPTablesOutputDropped is the number of IP packets dropped in the Output
2079  	// chain.
2080  	IPTablesOutputDropped *StatCounter
2081  
2082  	// IPTablesPostroutingDropped is the number of IP packets dropped in the
2083  	// Postrouting chain.
2084  	IPTablesPostroutingDropped *StatCounter
2085  
2086  	// TODO(https://gvisor.dev/issues/5529): Move the IPv4-only option stats out
2087  	// of IPStats.
2088  	// OptionTimestampReceived is the number of Timestamp options seen.
2089  	OptionTimestampReceived *StatCounter
2090  
2091  	// OptionRecordRouteReceived is the number of Record Route options seen.
2092  	OptionRecordRouteReceived *StatCounter
2093  
2094  	// OptionRouterAlertReceived is the number of Router Alert options seen.
2095  	OptionRouterAlertReceived *StatCounter
2096  
2097  	// OptionUnknownReceived is the number of unknown IP options seen.
2098  	OptionUnknownReceived *StatCounter
2099  
2100  	// Forwarding collects stats related to IP forwarding.
2101  	Forwarding IPForwardingStats
2102  
2103  	// LINT.ThenChange(network/internal/ip/stats.go:MultiCounterIPStats)
2104  }
2105  
2106  // ARPStats collects ARP-specific stats.
2107  //
2108  // +stateify savable
2109  type ARPStats struct {
2110  	// LINT.IfChange(ARPStats)
2111  
2112  	// PacketsReceived is the number of ARP packets received from the link layer.
2113  	PacketsReceived *StatCounter
2114  
2115  	// DisabledPacketsReceived is the number of ARP packets received from the link
2116  	// layer when the ARP layer is disabled.
2117  	DisabledPacketsReceived *StatCounter
2118  
2119  	// MalformedPacketsReceived is the number of ARP packets that were dropped due
2120  	// to being malformed.
2121  	MalformedPacketsReceived *StatCounter
2122  
2123  	// RequestsReceived is the number of ARP requests received.
2124  	RequestsReceived *StatCounter
2125  
2126  	// RequestsReceivedUnknownTargetAddress is the number of ARP requests that
2127  	// were targeted to an interface different from the one it was received on.
2128  	RequestsReceivedUnknownTargetAddress *StatCounter
2129  
2130  	// OutgoingRequestInterfaceHasNoLocalAddressErrors is the number of failures
2131  	// to send an ARP request because the interface has no network address
2132  	// assigned to it.
2133  	OutgoingRequestInterfaceHasNoLocalAddressErrors *StatCounter
2134  
2135  	// OutgoingRequestBadLocalAddressErrors is the number of failures to send an
2136  	// ARP request with a bad local address.
2137  	OutgoingRequestBadLocalAddressErrors *StatCounter
2138  
2139  	// OutgoingRequestsDropped is the number of ARP requests which failed to write
2140  	// to a link-layer endpoint.
2141  	OutgoingRequestsDropped *StatCounter
2142  
2143  	// OutgoingRequestSent is the number of ARP requests successfully written to a
2144  	// link-layer endpoint.
2145  	OutgoingRequestsSent *StatCounter
2146  
2147  	// RepliesReceived is the number of ARP replies received.
2148  	RepliesReceived *StatCounter
2149  
2150  	// OutgoingRepliesDropped is the number of ARP replies which failed to write
2151  	// to a link-layer endpoint.
2152  	OutgoingRepliesDropped *StatCounter
2153  
2154  	// OutgoingRepliesSent is the number of ARP replies successfully written to a
2155  	// link-layer endpoint.
2156  	OutgoingRepliesSent *StatCounter
2157  
2158  	// LINT.ThenChange(network/arp/stats.go:multiCounterARPStats)
2159  }
2160  
2161  // TCPStats collects TCP-specific stats.
2162  //
2163  // +stateify savable
2164  type TCPStats struct {
2165  	// ActiveConnectionOpenings is the number of connections opened
2166  	// successfully via Connect.
2167  	ActiveConnectionOpenings *StatCounter
2168  
2169  	// PassiveConnectionOpenings is the number of connections opened
2170  	// successfully via Listen.
2171  	PassiveConnectionOpenings *StatCounter
2172  
2173  	// CurrentEstablished is the number of TCP connections for which the
2174  	// current state is ESTABLISHED.
2175  	CurrentEstablished *StatCounter
2176  
2177  	// CurrentConnected is the number of TCP connections that
2178  	// are in connected state.
2179  	CurrentConnected *StatCounter
2180  
2181  	// EstablishedResets is the number of times TCP connections have made
2182  	// a direct transition to the CLOSED state from either the
2183  	// ESTABLISHED state or the CLOSE-WAIT state.
2184  	EstablishedResets *StatCounter
2185  
2186  	// EstablishedClosed is the number of times established TCP connections
2187  	// made a transition to CLOSED state.
2188  	EstablishedClosed *StatCounter
2189  
2190  	// EstablishedTimedout is the number of times an established connection
2191  	// was reset because of keep-alive time out.
2192  	EstablishedTimedout *StatCounter
2193  
2194  	// ListenOverflowSynDrop is the number of times the listen queue overflowed
2195  	// and a SYN was dropped.
2196  	ListenOverflowSynDrop *StatCounter
2197  
2198  	// ListenOverflowAckDrop is the number of times the final ACK
2199  	// in the handshake was dropped due to overflow.
2200  	ListenOverflowAckDrop *StatCounter
2201  
2202  	// ListenOverflowCookieSent is the number of times a SYN cookie was sent.
2203  	ListenOverflowSynCookieSent *StatCounter
2204  
2205  	// ListenOverflowSynCookieRcvd is the number of times a valid SYN
2206  	// cookie was received.
2207  	ListenOverflowSynCookieRcvd *StatCounter
2208  
2209  	// ListenOverflowInvalidSynCookieRcvd is the number of times an invalid SYN cookie
2210  	// was received.
2211  	ListenOverflowInvalidSynCookieRcvd *StatCounter
2212  
2213  	// FailedConnectionAttempts is the number of calls to Connect or Listen
2214  	// (active and passive openings, respectively) that end in an error.
2215  	FailedConnectionAttempts *StatCounter
2216  
2217  	// ValidSegmentsReceived is the number of TCP segments received that
2218  	// the transport layer successfully parsed.
2219  	ValidSegmentsReceived *StatCounter
2220  
2221  	// InvalidSegmentsReceived is the number of TCP segments received that
2222  	// the transport layer could not parse.
2223  	InvalidSegmentsReceived *StatCounter
2224  
2225  	// SegmentsSent is the number of TCP segments sent.
2226  	SegmentsSent *StatCounter
2227  
2228  	// SegmentSendErrors is the number of TCP segments failed to be sent.
2229  	SegmentSendErrors *StatCounter
2230  
2231  	// ResetsSent is the number of TCP resets sent.
2232  	ResetsSent *StatCounter
2233  
2234  	// ResetsReceived is the number of TCP resets received.
2235  	ResetsReceived *StatCounter
2236  
2237  	// Retransmits is the number of TCP segments retransmitted.
2238  	Retransmits *StatCounter
2239  
2240  	// FastRecovery is the number of times Fast Recovery was used to
2241  	// recover from packet loss.
2242  	FastRecovery *StatCounter
2243  
2244  	// SACKRecovery is the number of times SACK Recovery was used to
2245  	// recover from packet loss.
2246  	SACKRecovery *StatCounter
2247  
2248  	// TLPRecovery is the number of times recovery was accomplished by the tail
2249  	// loss probe.
2250  	TLPRecovery *StatCounter
2251  
2252  	// SlowStartRetransmits is the number of segments retransmitted in slow
2253  	// start.
2254  	SlowStartRetransmits *StatCounter
2255  
2256  	// FastRetransmit is the number of segments retransmitted in fast
2257  	// recovery.
2258  	FastRetransmit *StatCounter
2259  
2260  	// Timeouts is the number of times the RTO expired.
2261  	Timeouts *StatCounter
2262  
2263  	// ChecksumErrors is the number of segments dropped due to bad checksums.
2264  	ChecksumErrors *StatCounter
2265  
2266  	// FailedPortReservations is the number of times TCP failed to reserve
2267  	// a port.
2268  	FailedPortReservations *StatCounter
2269  
2270  	// SegmentsAckedWithDSACK is the number of segments acknowledged with
2271  	// DSACK.
2272  	SegmentsAckedWithDSACK *StatCounter
2273  
2274  	// SpuriousRecovery is the number of times the connection entered loss
2275  	// recovery spuriously.
2276  	SpuriousRecovery *StatCounter
2277  
2278  	// SpuriousRTORecovery is the number of spurious RTOs.
2279  	SpuriousRTORecovery *StatCounter
2280  
2281  	// ForwardMaxInFlightDrop is the number of connection requests that are
2282  	// dropped due to exceeding the maximum number of in-flight connection
2283  	// requests.
2284  	ForwardMaxInFlightDrop *StatCounter
2285  }
2286  
2287  // UDPStats collects UDP-specific stats.
2288  //
2289  // +stateify savable
2290  type UDPStats struct {
2291  	// PacketsReceived is the number of UDP datagrams received via
2292  	// HandlePacket.
2293  	PacketsReceived *StatCounter
2294  
2295  	// UnknownPortErrors is the number of incoming UDP datagrams dropped
2296  	// because they did not have a known destination port.
2297  	UnknownPortErrors *StatCounter
2298  
2299  	// ReceiveBufferErrors is the number of incoming UDP datagrams dropped
2300  	// due to the receiving buffer being in an invalid state.
2301  	ReceiveBufferErrors *StatCounter
2302  
2303  	// MalformedPacketsReceived is the number of incoming UDP datagrams
2304  	// dropped due to the UDP header being in a malformed state.
2305  	MalformedPacketsReceived *StatCounter
2306  
2307  	// PacketsSent is the number of UDP datagrams sent via sendUDP.
2308  	PacketsSent *StatCounter
2309  
2310  	// PacketSendErrors is the number of datagrams failed to be sent.
2311  	PacketSendErrors *StatCounter
2312  
2313  	// ChecksumErrors is the number of datagrams dropped due to bad checksums.
2314  	ChecksumErrors *StatCounter
2315  }
2316  
2317  // NICNeighborStats holds metrics for the neighbor table.
2318  //
2319  // +stateify savable
2320  type NICNeighborStats struct {
2321  	// LINT.IfChange(NICNeighborStats)
2322  
2323  	// UnreachableEntryLookups counts the number of lookups performed on an
2324  	// entry in Unreachable state.
2325  	UnreachableEntryLookups *StatCounter
2326  
2327  	// DroppedConfirmationForNoninitiatedNeighbor counts the number of neighbor
2328  	// responses that were dropped because they didn't match an entry in the
2329  	// cache.
2330  	DroppedConfirmationForNoninitiatedNeighbor *StatCounter
2331  
2332  	// DroppedInvalidLinkAddressConfirmations counts the number of neighbor
2333  	// responses that were ignored because they had an invalid source link-layer
2334  	// address.
2335  	DroppedInvalidLinkAddressConfirmations *StatCounter
2336  
2337  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICNeighborStats)
2338  }
2339  
2340  // NICPacketStats holds basic packet statistics.
2341  //
2342  // +stateify savable
2343  type NICPacketStats struct {
2344  	// LINT.IfChange(NICPacketStats)
2345  
2346  	// Packets is the number of packets counted.
2347  	Packets *StatCounter
2348  
2349  	// Bytes is the number of bytes counted.
2350  	Bytes *StatCounter
2351  
2352  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICPacketStats)
2353  }
2354  
2355  // IntegralStatCounterMap holds a map associating integral keys with
2356  // StatCounters.
2357  //
2358  // +stateify savable
2359  type IntegralStatCounterMap struct {
2360  	mu sync.RWMutex `state:"nosave"`
2361  	// +checklocks:mu
2362  	counterMap map[uint64]*StatCounter
2363  }
2364  
2365  // Keys returns all keys present in the map.
2366  func (m *IntegralStatCounterMap) Keys() []uint64 {
2367  	m.mu.RLock()
2368  	defer m.mu.RUnlock()
2369  	var keys []uint64
2370  	for k := range m.counterMap {
2371  		keys = append(keys, k)
2372  	}
2373  	return keys
2374  }
2375  
2376  // Get returns the counter mapped by the provided key.
2377  func (m *IntegralStatCounterMap) Get(key uint64) (*StatCounter, bool) {
2378  	m.mu.RLock()
2379  	defer m.mu.RUnlock()
2380  	counter, ok := m.counterMap[key]
2381  	return counter, ok
2382  }
2383  
2384  // Init initializes the map.
2385  func (m *IntegralStatCounterMap) Init() {
2386  	m.mu.Lock()
2387  	defer m.mu.Unlock()
2388  	m.counterMap = make(map[uint64]*StatCounter)
2389  }
2390  
2391  // Increment increments the counter associated with the provided key.
2392  func (m *IntegralStatCounterMap) Increment(key uint64) {
2393  	m.mu.RLock()
2394  	counter, ok := m.counterMap[key]
2395  	m.mu.RUnlock()
2396  
2397  	if !ok {
2398  		m.mu.Lock()
2399  		counter, ok = m.counterMap[key]
2400  		if !ok {
2401  			counter = new(StatCounter)
2402  			m.counterMap[key] = counter
2403  		}
2404  		m.mu.Unlock()
2405  	}
2406  	counter.Increment()
2407  }
2408  
2409  // A MultiIntegralStatCounterMap keeps track of two integral counter maps at
2410  // once.
2411  //
2412  // +stateify savable
2413  type MultiIntegralStatCounterMap struct {
2414  	a *IntegralStatCounterMap
2415  	b *IntegralStatCounterMap
2416  }
2417  
2418  // Init sets the internal integral counter maps to point to a and b.
2419  func (m *MultiIntegralStatCounterMap) Init(a, b *IntegralStatCounterMap) {
2420  	m.a = a
2421  	m.b = b
2422  }
2423  
2424  // Increment increments the counter in each map corresponding to the
2425  // provided key.
2426  func (m *MultiIntegralStatCounterMap) Increment(key uint64) {
2427  	m.a.Increment(key)
2428  	m.b.Increment(key)
2429  }
2430  
2431  // NICStats holds NIC statistics.
2432  //
2433  // +stateify savable
2434  type NICStats struct {
2435  	// LINT.IfChange(NICStats)
2436  
2437  	// UnknownL3ProtocolRcvdPacketCounts records the number of packets received
2438  	// for each unknown or unsupported network protocol number.
2439  	UnknownL3ProtocolRcvdPacketCounts *IntegralStatCounterMap
2440  
2441  	// UnknownL4ProtocolRcvdPacketCounts records the number of packets received
2442  	// for each unknown or unsupported transport protocol number.
2443  	UnknownL4ProtocolRcvdPacketCounts *IntegralStatCounterMap
2444  
2445  	// MalformedL4RcvdPackets is the number of packets received by a NIC that
2446  	// could not be delivered to a transport endpoint because the L4 header could
2447  	// not be parsed.
2448  	MalformedL4RcvdPackets *StatCounter
2449  
2450  	// Tx contains statistics about transmitted packets.
2451  	Tx NICPacketStats
2452  
2453  	// TxPacketsDroppedNoBufferSpace is the number of packets dropepd due to the
2454  	// NIC not having enough buffer space to send the packet.
2455  	//
2456  	// Packets may be dropped with a no buffer space error when the device TX
2457  	// queue is full.
2458  	TxPacketsDroppedNoBufferSpace *StatCounter
2459  
2460  	// Rx contains statistics about received packets.
2461  	Rx NICPacketStats
2462  
2463  	// DisabledRx contains statistics about received packets on disabled NICs.
2464  	DisabledRx NICPacketStats
2465  
2466  	// Neighbor contains statistics about neighbor entries.
2467  	Neighbor NICNeighborStats
2468  
2469  	// LINT.ThenChange(stack/nic_stats.go:multiCounterNICStats)
2470  }
2471  
2472  // FillIn returns a copy of s with nil fields initialized to new StatCounters.
2473  func (s NICStats) FillIn() NICStats {
2474  	InitStatCounters(reflect.ValueOf(&s).Elem())
2475  	return s
2476  }
2477  
2478  // Stats holds statistics about the networking stack.
2479  //
2480  // +stateify savable
2481  type Stats struct {
2482  	// TODO(https://gvisor.dev/issues/5986): Make the DroppedPackets stat less
2483  	// ambiguous.
2484  
2485  	// DroppedPackets is the number of packets dropped at the transport layer.
2486  	DroppedPackets *StatCounter
2487  
2488  	// NICs is an aggregation of every NIC's statistics. These should not be
2489  	// incremented using this field, but using the relevant NIC multicounters.
2490  	NICs NICStats
2491  
2492  	// ICMP is an aggregation of every NetworkEndpoint's ICMP statistics (both v4
2493  	// and v6). These should not be incremented using this field, but using the
2494  	// relevant NetworkEndpoint ICMP multicounters.
2495  	ICMP ICMPStats
2496  
2497  	// IGMP is an aggregation of every NetworkEndpoint's IGMP statistics. These
2498  	// should not be incremented using this field, but using the relevant
2499  	// NetworkEndpoint IGMP multicounters.
2500  	IGMP IGMPStats
2501  
2502  	// IP is an aggregation of every NetworkEndpoint's IP statistics. These should
2503  	// not be incremented using this field, but using the relevant NetworkEndpoint
2504  	// IP multicounters.
2505  	IP IPStats
2506  
2507  	// ARP is an aggregation of every NetworkEndpoint's ARP statistics. These
2508  	// should not be incremented using this field, but using the relevant
2509  	// NetworkEndpoint ARP multicounters.
2510  	ARP ARPStats
2511  
2512  	// TCP holds TCP-specific stats.
2513  	TCP TCPStats
2514  
2515  	// UDP holds UDP-specific stats.
2516  	UDP UDPStats
2517  }
2518  
2519  // ReceiveErrors collects packet receive errors within transport endpoint.
2520  //
2521  // +stateify savable
2522  type ReceiveErrors struct {
2523  	// ReceiveBufferOverflow is the number of received packets dropped
2524  	// due to the receive buffer being full.
2525  	ReceiveBufferOverflow StatCounter
2526  
2527  	// MalformedPacketsReceived is the number of incoming packets
2528  	// dropped due to the packet header being in a malformed state.
2529  	MalformedPacketsReceived StatCounter
2530  
2531  	// ClosedReceiver is the number of received packets dropped because
2532  	// of receiving endpoint state being closed.
2533  	ClosedReceiver StatCounter
2534  
2535  	// ChecksumErrors is the number of packets dropped due to bad checksums.
2536  	ChecksumErrors StatCounter
2537  }
2538  
2539  // SendErrors collects packet send errors within the transport layer for an
2540  // endpoint.
2541  //
2542  // +stateify savable
2543  type SendErrors struct {
2544  	// SendToNetworkFailed is the number of packets failed to be written to
2545  	// the network endpoint.
2546  	SendToNetworkFailed StatCounter
2547  
2548  	// NoRoute is the number of times we failed to resolve IP route.
2549  	NoRoute StatCounter
2550  }
2551  
2552  // ReadErrors collects segment read errors from an endpoint read call.
2553  //
2554  // +stateify savable
2555  type ReadErrors struct {
2556  	// ReadClosed is the number of received packet drops because the endpoint
2557  	// was shutdown for read.
2558  	ReadClosed StatCounter
2559  
2560  	// InvalidEndpointState is the number of times we found the endpoint state
2561  	// to be unexpected.
2562  	InvalidEndpointState StatCounter
2563  
2564  	// NotConnected is the number of times we tried to read but found that the
2565  	// endpoint was not connected.
2566  	NotConnected StatCounter
2567  }
2568  
2569  // WriteErrors collects packet write errors from an endpoint write call.
2570  //
2571  // +stateify savable
2572  type WriteErrors struct {
2573  	// WriteClosed is the number of packet drops because the endpoint
2574  	// was shutdown for write.
2575  	WriteClosed StatCounter
2576  
2577  	// InvalidEndpointState is the number of times we found the endpoint state
2578  	// to be unexpected.
2579  	InvalidEndpointState StatCounter
2580  
2581  	// InvalidArgs is the number of times invalid input arguments were
2582  	// provided for endpoint Write call.
2583  	InvalidArgs StatCounter
2584  }
2585  
2586  // TransportEndpointStats collects statistics about the endpoint.
2587  //
2588  // +stateify savable
2589  type TransportEndpointStats struct {
2590  	// PacketsReceived is the number of successful packet receives.
2591  	PacketsReceived StatCounter
2592  
2593  	// PacketsSent is the number of successful packet sends.
2594  	PacketsSent StatCounter
2595  
2596  	// ReceiveErrors collects packet receive errors within transport layer.
2597  	ReceiveErrors ReceiveErrors
2598  
2599  	// ReadErrors collects packet read errors from an endpoint read call.
2600  	ReadErrors ReadErrors
2601  
2602  	// SendErrors collects packet send errors within the transport layer.
2603  	SendErrors SendErrors
2604  
2605  	// WriteErrors collects packet write errors from an endpoint write call.
2606  	WriteErrors WriteErrors
2607  }
2608  
2609  // IsEndpointStats is an empty method to implement the tcpip.EndpointStats
2610  // marker interface.
2611  func (*TransportEndpointStats) IsEndpointStats() {}
2612  
2613  // InitStatCounters initializes v's fields with nil StatCounter fields to new
2614  // StatCounters.
2615  func InitStatCounters(v reflect.Value) {
2616  	for i := 0; i < v.NumField(); i++ {
2617  		v := v.Field(i)
2618  		if s, ok := v.Addr().Interface().(**StatCounter); ok {
2619  			if *s == nil {
2620  				*s = new(StatCounter)
2621  			}
2622  		} else if s, ok := v.Addr().Interface().(**IntegralStatCounterMap); ok {
2623  			if *s == nil {
2624  				*s = new(IntegralStatCounterMap)
2625  				(*s).Init()
2626  			}
2627  		} else {
2628  			InitStatCounters(v)
2629  		}
2630  	}
2631  }
2632  
2633  // FillIn returns a copy of s with nil fields initialized to new StatCounters.
2634  func (s Stats) FillIn() Stats {
2635  	InitStatCounters(reflect.ValueOf(&s).Elem())
2636  	return s
2637  }
2638  
2639  // Clone clones a copy of the TransportEndpointStats into dst by atomically
2640  // reading each field.
2641  func (src *TransportEndpointStats) Clone(dst *TransportEndpointStats) {
2642  	clone(reflect.ValueOf(dst).Elem(), reflect.ValueOf(src).Elem())
2643  }
2644  
2645  func clone(dst reflect.Value, src reflect.Value) {
2646  	for i := 0; i < dst.NumField(); i++ {
2647  		d := dst.Field(i)
2648  		s := src.Field(i)
2649  		if c, ok := s.Addr().Interface().(*StatCounter); ok {
2650  			d.Addr().Interface().(*StatCounter).IncrementBy(c.Value())
2651  		} else {
2652  			clone(d, s)
2653  		}
2654  	}
2655  }
2656  
2657  // String implements the fmt.Stringer interface.
2658  func (a Address) String() string {
2659  	switch l := a.Len(); l {
2660  	case 4:
2661  		return fmt.Sprintf("%d.%d.%d.%d", int(a.addr[0]), int(a.addr[1]), int(a.addr[2]), int(a.addr[3]))
2662  	case 16:
2663  		// Find the longest subsequence of hexadecimal zeros.
2664  		start, end := -1, -1
2665  		for i := 0; i < a.Len(); i += 2 {
2666  			j := i
2667  			for j < a.Len() && a.addr[j] == 0 && a.addr[j+1] == 0 {
2668  				j += 2
2669  			}
2670  			if j > i+2 && j-i > end-start {
2671  				start, end = i, j
2672  			}
2673  		}
2674  
2675  		var b strings.Builder
2676  		for i := 0; i < a.Len(); i += 2 {
2677  			if i == start {
2678  				b.WriteString("::")
2679  				i = end
2680  				if end >= a.Len() {
2681  					break
2682  				}
2683  			} else if i > 0 {
2684  				b.WriteByte(':')
2685  			}
2686  			v := uint16(a.addr[i+0])<<8 | uint16(a.addr[i+1])
2687  			if v == 0 {
2688  				b.WriteByte('0')
2689  			} else {
2690  				const digits = "0123456789abcdef"
2691  				for i := uint(3); i < 4; i-- {
2692  					if v := v >> (i * 4); v != 0 {
2693  						b.WriteByte(digits[v&0xf])
2694  					}
2695  				}
2696  			}
2697  		}
2698  		return b.String()
2699  	default:
2700  		return fmt.Sprintf("%x", a.addr[:l])
2701  	}
2702  }
2703  
2704  // To4 converts the IPv4 address to a 4-byte representation.
2705  // If the address is not an IPv4 address, To4 returns the empty Address.
2706  func (a Address) To4() Address {
2707  	const (
2708  		ipv4len = 4
2709  		ipv6len = 16
2710  	)
2711  	if a.Len() == ipv4len {
2712  		return a
2713  	}
2714  	if a.Len() == ipv6len &&
2715  		isZeros(a.addr[:10]) &&
2716  		a.addr[10] == 0xff &&
2717  		a.addr[11] == 0xff {
2718  		return AddrFrom4Slice(a.addr[12:16])
2719  	}
2720  	return Address{}
2721  }
2722  
2723  // isZeros reports whether addr is all zeros.
2724  func isZeros(addr []byte) bool {
2725  	for _, b := range addr {
2726  		if b != 0 {
2727  			return false
2728  		}
2729  	}
2730  	return true
2731  }
2732  
2733  // LinkAddress is a byte slice cast as a string that represents a link address.
2734  // It is typically a 6-byte MAC address.
2735  type LinkAddress string
2736  
2737  // String implements the fmt.Stringer interface.
2738  func (a LinkAddress) String() string {
2739  	switch len(a) {
2740  	case 6:
2741  		return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", a[0], a[1], a[2], a[3], a[4], a[5])
2742  	default:
2743  		return fmt.Sprintf("%x", []byte(a))
2744  	}
2745  }
2746  
2747  // ParseMACAddress parses an IEEE 802 address.
2748  //
2749  // It must be in the format aa:bb:cc:dd:ee:ff or aa-bb-cc-dd-ee-ff.
2750  func ParseMACAddress(s string) (LinkAddress, error) {
2751  	parts := strings.FieldsFunc(s, func(c rune) bool {
2752  		return c == ':' || c == '-'
2753  	})
2754  	if len(parts) != LinkAddressSize {
2755  		return "", fmt.Errorf("inconsistent parts: %s", s)
2756  	}
2757  	addr := make([]byte, 0, len(parts))
2758  	for _, part := range parts {
2759  		u, err := strconv.ParseUint(part, 16, 8)
2760  		if err != nil {
2761  			return "", fmt.Errorf("invalid hex digits: %s", s)
2762  		}
2763  		addr = append(addr, byte(u))
2764  	}
2765  	return LinkAddress(addr), nil
2766  }
2767  
2768  // GetRandMacAddr returns a mac address that can be used for local virtual devices.
2769  func GetRandMacAddr() LinkAddress {
2770  	mac := make(net.HardwareAddr, LinkAddressSize)
2771  	rand.Read(mac) // Fill with random data.
2772  	mac[0] &^= 0x1 // Clear multicast bit.
2773  	mac[0] |= 0x2  // Set local assignment bit (IEEE802).
2774  	return LinkAddress(mac)
2775  }
2776  
2777  // AddressWithPrefix is an address with its subnet prefix length.
2778  //
2779  // +stateify savable
2780  type AddressWithPrefix struct {
2781  	// Address is a network address.
2782  	Address Address
2783  
2784  	// PrefixLen is the subnet prefix length.
2785  	PrefixLen int
2786  }
2787  
2788  // String implements the fmt.Stringer interface.
2789  func (a AddressWithPrefix) String() string {
2790  	return fmt.Sprintf("%s/%d", a.Address, a.PrefixLen)
2791  }
2792  
2793  // Subnet converts the address and prefix into a Subnet value and returns it.
2794  func (a AddressWithPrefix) Subnet() Subnet {
2795  	addrLen := a.Address.length
2796  	if a.PrefixLen <= 0 {
2797  		return Subnet{
2798  			address: Address{length: addrLen},
2799  			mask:    AddressMask{length: addrLen},
2800  		}
2801  	}
2802  	if a.PrefixLen >= addrLen*8 {
2803  		sub := Subnet{
2804  			address: a.Address,
2805  			mask:    AddressMask{length: addrLen},
2806  		}
2807  		for i := 0; i < addrLen; i++ {
2808  			sub.mask.mask[i] = 0xff
2809  		}
2810  		return sub
2811  	}
2812  
2813  	sa := Address{length: addrLen}
2814  	sm := AddressMask{length: addrLen}
2815  	n := uint(a.PrefixLen)
2816  	for i := 0; i < addrLen; i++ {
2817  		if n >= 8 {
2818  			sa.addr[i] = a.Address.addr[i]
2819  			sm.mask[i] = 0xff
2820  			n -= 8
2821  			continue
2822  		}
2823  		sm.mask[i] = ^byte(0xff >> n)
2824  		sa.addr[i] = a.Address.addr[i] & sm.mask[i]
2825  		n = 0
2826  	}
2827  
2828  	// For extra caution, call NewSubnet rather than directly creating the Subnet
2829  	// value. If that fails it indicates a serious bug in this code, so panic is
2830  	// in order.
2831  	s, err := NewSubnet(sa, sm)
2832  	if err != nil {
2833  		panic("invalid subnet: " + err.Error())
2834  	}
2835  	return s
2836  }
2837  
2838  // ProtocolAddress is an address and the network protocol it is associated
2839  // with.
2840  //
2841  // +stateify savable
2842  type ProtocolAddress struct {
2843  	// Protocol is the protocol of the address.
2844  	Protocol NetworkProtocolNumber
2845  
2846  	// AddressWithPrefix is a network address with its subnet prefix length.
2847  	AddressWithPrefix AddressWithPrefix
2848  }
2849  
2850  var (
2851  	// danglingEndpointsMu protects access to danglingEndpoints.
2852  	danglingEndpointsMu sync.Mutex
2853  
2854  	// danglingEndpoints tracks all dangling endpoints no longer owned by the app.
2855  	danglingEndpoints = make(map[Endpoint]struct{})
2856  )
2857  
2858  // GetDanglingEndpoints returns all dangling endpoints.
2859  func GetDanglingEndpoints() []Endpoint {
2860  	danglingEndpointsMu.Lock()
2861  	es := make([]Endpoint, 0, len(danglingEndpoints))
2862  	for e := range danglingEndpoints {
2863  		es = append(es, e)
2864  	}
2865  	danglingEndpointsMu.Unlock()
2866  	return es
2867  }
2868  
2869  // ReleaseDanglingEndpoints clears out all all reference counted objects held by
2870  // dangling endpoints.
2871  func ReleaseDanglingEndpoints() {
2872  	// Get the dangling endpoints first to avoid locking around Release(), which
2873  	// can cause a lock inversion with endpoint.mu and danglingEndpointsMu.
2874  	// Calling Release on a dangling endpoint that has been deleted is a noop.
2875  	eps := GetDanglingEndpoints()
2876  	for _, ep := range eps {
2877  		ep.Abort()
2878  	}
2879  }
2880  
2881  // AddDanglingEndpoint adds a dangling endpoint.
2882  func AddDanglingEndpoint(e Endpoint) {
2883  	danglingEndpointsMu.Lock()
2884  	danglingEndpoints[e] = struct{}{}
2885  	danglingEndpointsMu.Unlock()
2886  }
2887  
2888  // DeleteDanglingEndpoint removes a dangling endpoint.
2889  func DeleteDanglingEndpoint(e Endpoint) {
2890  	danglingEndpointsMu.Lock()
2891  	delete(danglingEndpoints, e)
2892  	danglingEndpointsMu.Unlock()
2893  }
2894  
2895  // AsyncLoading is the global barrier for asynchronous endpoint loading
2896  // activities.
2897  var AsyncLoading sync.WaitGroup
2898