registration.go raw

   1  // Copyright 2018 The gVisor Authors.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  package stack
  16  
  17  import (
  18  	"fmt"
  19  	"time"
  20  
  21  	"gvisor.dev/gvisor/pkg/buffer"
  22  	"gvisor.dev/gvisor/pkg/tcpip"
  23  	"gvisor.dev/gvisor/pkg/tcpip/header"
  24  	"gvisor.dev/gvisor/pkg/waiter"
  25  )
  26  
  27  // NetworkEndpointID is the identifier of a network layer protocol endpoint.
  28  // Currently the local address is sufficient because all supported protocols
  29  // (i.e., IPv4 and IPv6) have different sizes for their addresses.
  30  type NetworkEndpointID struct {
  31  	LocalAddress tcpip.Address
  32  }
  33  
  34  // TransportEndpointID is the identifier of a transport layer protocol endpoint.
  35  //
  36  // +stateify savable
  37  type TransportEndpointID struct {
  38  	// LocalPort is the local port associated with the endpoint.
  39  	LocalPort uint16
  40  
  41  	// LocalAddress is the local [network layer] address associated with
  42  	// the endpoint.
  43  	LocalAddress tcpip.Address
  44  
  45  	// RemotePort is the remote port associated with the endpoint.
  46  	RemotePort uint16
  47  
  48  	// RemoteAddress it the remote [network layer] address associated with
  49  	// the endpoint.
  50  	RemoteAddress tcpip.Address
  51  }
  52  
  53  // NetworkPacketInfo holds information about a network layer packet.
  54  //
  55  // +stateify savable
  56  type NetworkPacketInfo struct {
  57  	// LocalAddressBroadcast is true if the packet's local address is a broadcast
  58  	// address.
  59  	LocalAddressBroadcast bool
  60  
  61  	// LocalAddressTemporary is true if the packet's local address is a temporary
  62  	// address.
  63  	LocalAddressTemporary bool
  64  
  65  	// IsForwardedPacket is true if the packet is being forwarded.
  66  	IsForwardedPacket bool
  67  }
  68  
  69  // TransportErrorKind enumerates error types that are handled by the transport
  70  // layer.
  71  type TransportErrorKind int
  72  
  73  const (
  74  	// PacketTooBigTransportError indicates that a packet did not reach its
  75  	// destination because a link on the path to the destination had an MTU that
  76  	// was too small to carry the packet.
  77  	PacketTooBigTransportError TransportErrorKind = iota
  78  
  79  	// DestinationHostUnreachableTransportError indicates that the destination
  80  	// host was unreachable.
  81  	DestinationHostUnreachableTransportError
  82  
  83  	// DestinationPortUnreachableTransportError indicates that a packet reached
  84  	// the destination host, but the transport protocol was not active on the
  85  	// destination port.
  86  	DestinationPortUnreachableTransportError
  87  
  88  	// DestinationNetworkUnreachableTransportError indicates that the destination
  89  	// network was unreachable.
  90  	DestinationNetworkUnreachableTransportError
  91  
  92  	// DestinationProtoUnreachableTransportError indicates that the destination
  93  	// protocol was unreachable.
  94  	DestinationProtoUnreachableTransportError
  95  
  96  	// SourceRouteFailedTransportError indicates that the source route failed.
  97  	SourceRouteFailedTransportError
  98  
  99  	// SourceHostIsolatedTransportError indicates that the source machine is not
 100  	// on the network.
 101  	SourceHostIsolatedTransportError
 102  
 103  	// DestinationHostDownTransportError indicates that the destination host is
 104  	// down.
 105  	DestinationHostDownTransportError
 106  )
 107  
 108  // TransportError is a marker interface for errors that may be handled by the
 109  // transport layer.
 110  type TransportError interface {
 111  	tcpip.SockErrorCause
 112  
 113  	// Kind returns the type of the transport error.
 114  	Kind() TransportErrorKind
 115  }
 116  
 117  // TransportEndpoint is the interface that needs to be implemented by transport
 118  // protocol (e.g., tcp, udp) endpoints that can handle packets.
 119  type TransportEndpoint interface {
 120  	// HandlePacket is called by the stack when new packets arrive to this
 121  	// transport endpoint. It sets the packet buffer's transport header.
 122  	//
 123  	// HandlePacket may modify the packet.
 124  	HandlePacket(TransportEndpointID, *PacketBuffer)
 125  
 126  	// HandleError is called when the transport endpoint receives an error.
 127  	//
 128  	// HandleError takes may modify the packet buffer.
 129  	HandleError(TransportError, *PacketBuffer)
 130  
 131  	// Abort initiates an expedited endpoint teardown. It puts the endpoint
 132  	// in a closed state and frees all resources associated with it. This
 133  	// cleanup may happen asynchronously. Wait can be used to block on this
 134  	// asynchronous cleanup.
 135  	Abort()
 136  
 137  	// Wait waits for any worker goroutines owned by the endpoint to stop.
 138  	//
 139  	// An endpoint can be requested to stop its worker goroutines by calling
 140  	// its Close method.
 141  	//
 142  	// Wait will not block if the endpoint hasn't started any goroutines
 143  	// yet, even if it might later.
 144  	Wait()
 145  }
 146  
 147  // RawTransportEndpoint is the interface that needs to be implemented by raw
 148  // transport protocol endpoints. RawTransportEndpoints receive the entire
 149  // packet - including the network and transport headers - as delivered to
 150  // netstack.
 151  type RawTransportEndpoint interface {
 152  	// HandlePacket is called by the stack when new packets arrive to
 153  	// this transport endpoint. The packet contains all data from the link
 154  	// layer up.
 155  	//
 156  	// HandlePacket may modify the packet.
 157  	HandlePacket(*PacketBuffer)
 158  }
 159  
 160  // PacketEndpoint is the interface that needs to be implemented by packet
 161  // transport protocol endpoints. These endpoints receive link layer headers in
 162  // addition to whatever they contain (usually network and transport layer
 163  // headers and a payload).
 164  type PacketEndpoint interface {
 165  	// HandlePacket is called by the stack when new packets arrive that
 166  	// match the endpoint.
 167  	//
 168  	// Implementers should treat packet as immutable and should copy it
 169  	// before modification.
 170  	//
 171  	// linkHeader may have a length of 0, in which case the PacketEndpoint
 172  	// should construct its own ethernet header for applications.
 173  	//
 174  	// HandlePacket may modify pkt.
 175  	HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 176  }
 177  
 178  // MappablePacketEndpoint is a packet endpoint that supports forwarding its
 179  // packets to a PacketMMapEndpoint.
 180  type MappablePacketEndpoint interface {
 181  	PacketEndpoint
 182  
 183  	// GetPacketMMapOpts returns the options for initializing a PacketMMapEndpoint
 184  	// for this endpoint.
 185  	GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) PacketMMapOpts
 186  
 187  	// SetPacketMMapEndpoint sets the PacketMMapEndpoint for this endpoint. All
 188  	// packets received by this endpoint will be forwarded to the provided
 189  	// PacketMMapEndpoint.
 190  	SetPacketMMapEndpoint(ep PacketMMapEndpoint)
 191  
 192  	// GetPacketMMapEndpoint returns the PacketMMapEndpoint for this endpoint or
 193  	// nil if there is none.
 194  	GetPacketMMapEndpoint() PacketMMapEndpoint
 195  
 196  	// HandlePacketMMapCopy is a function that is called when a packet received is
 197  	// too large for the buffer size specified for the memory mapped endpoint. In
 198  	// this case, the packet is copied and passed to the original packet endpoint.
 199  	HandlePacketMMapCopy(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 200  }
 201  
 202  // PacketMMapOpts are the options for initializing a PacketMMapEndpoint.
 203  //
 204  // +stateify savable
 205  type PacketMMapOpts struct {
 206  	Req            *tcpip.TpacketReq
 207  	IsRx           bool
 208  	Cooked         bool
 209  	Stack          *Stack
 210  	Wq             *waiter.Queue
 211  	PacketEndpoint MappablePacketEndpoint
 212  	Version        int
 213  	Reserve        uint32
 214  }
 215  
 216  // PacketMMapEndpoint is the interface implemented by endpoints to handle memory
 217  // mapped packets over the packet transport protocol (PACKET_MMAP).
 218  type PacketMMapEndpoint interface {
 219  	// HandlePacket is called by the stack when new packets arrive that
 220  	// match the endpoint.
 221  	//
 222  	// Implementers should treat packet as immutable and should copy it
 223  	// before modification.
 224  	//
 225  	// linkHeader may have a length of 0, in which case the PacketEndpoint
 226  	// should construct its own ethernet header for applications.
 227  	//
 228  	// HandlePacket may modify pkt.
 229  	HandlePacket(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
 230  
 231  	// Close releases any resources associated with the endpoint.
 232  	Close()
 233  
 234  	// Readiness returns the events that the endpoint is ready for.
 235  	Readiness(mask waiter.EventMask) waiter.EventMask
 236  
 237  	// Stats returns the statistics for the endpoint that can be used for
 238  	// getsockopt(PACKET_STATISTICS).
 239  	Stats() tcpip.TpacketStats
 240  }
 241  
 242  // UnknownDestinationPacketDisposition enumerates the possible return values from
 243  // HandleUnknownDestinationPacket().
 244  type UnknownDestinationPacketDisposition int
 245  
 246  const (
 247  	// UnknownDestinationPacketMalformed denotes that the packet was malformed
 248  	// and no further processing should be attempted other than updating
 249  	// statistics.
 250  	UnknownDestinationPacketMalformed UnknownDestinationPacketDisposition = iota
 251  
 252  	// UnknownDestinationPacketUnhandled tells the caller that the packet was
 253  	// well formed but that the issue was not handled and the stack should take
 254  	// the default action.
 255  	UnknownDestinationPacketUnhandled
 256  
 257  	// UnknownDestinationPacketHandled tells the caller that it should do
 258  	// no further processing.
 259  	UnknownDestinationPacketHandled
 260  )
 261  
 262  // TransportProtocol is the interface that needs to be implemented by transport
 263  // protocols (e.g., tcp, udp) that want to be part of the networking stack.
 264  type TransportProtocol interface {
 265  	// Number returns the transport protocol number.
 266  	Number() tcpip.TransportProtocolNumber
 267  
 268  	// NewEndpoint creates a new endpoint of the transport protocol.
 269  	NewEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
 270  
 271  	// NewRawEndpoint creates a new raw endpoint of the transport protocol.
 272  	NewRawEndpoint(netProto tcpip.NetworkProtocolNumber, waitQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
 273  
 274  	// MinimumPacketSize returns the minimum valid packet size of this
 275  	// transport protocol. The stack automatically drops any packets smaller
 276  	// than this targeted at this protocol.
 277  	MinimumPacketSize() int
 278  
 279  	// ParsePorts returns the source and destination ports stored in a
 280  	// packet of this protocol.
 281  	ParsePorts(b []byte) (src, dst uint16, err tcpip.Error)
 282  
 283  	// HandleUnknownDestinationPacket handles packets targeted at this
 284  	// protocol that don't match any existing endpoint. For example,
 285  	// it is targeted at a port that has no listeners.
 286  	//
 287  	// HandleUnknownDestinationPacket may modify the packet if it handles
 288  	// the issue.
 289  	HandleUnknownDestinationPacket(TransportEndpointID, *PacketBuffer) UnknownDestinationPacketDisposition
 290  
 291  	// SetOption allows enabling/disabling protocol specific features.
 292  	// SetOption returns an error if the option is not supported or the
 293  	// provided option value is invalid.
 294  	SetOption(option tcpip.SettableTransportProtocolOption) tcpip.Error
 295  
 296  	// Option allows retrieving protocol specific option values.
 297  	// Option returns an error if the option is not supported or the
 298  	// provided option value is invalid.
 299  	Option(option tcpip.GettableTransportProtocolOption) tcpip.Error
 300  
 301  	// Close requests that any worker goroutines owned by the protocol
 302  	// stop.
 303  	Close()
 304  
 305  	// Wait waits for any worker goroutines owned by the protocol to stop.
 306  	Wait()
 307  
 308  	// Pause requests that any protocol level background workers pause.
 309  	Pause()
 310  
 311  	// Resume resumes any protocol level background workers that were
 312  	// previously paused by Pause.
 313  	Resume()
 314  
 315  	// Restore starts any protocol level background workers during restore.
 316  	Restore()
 317  
 318  	// Parse sets pkt.TransportHeader and trims pkt.Data appropriately. It does
 319  	// neither and returns false if pkt.Data is too small, i.e. pkt.Data.Size() <
 320  	// MinimumPacketSize()
 321  	Parse(pkt *PacketBuffer) (ok bool)
 322  }
 323  
 324  // TransportPacketDisposition is the result from attempting to deliver a packet
 325  // to the transport layer.
 326  type TransportPacketDisposition int
 327  
 328  const (
 329  	// TransportPacketHandled indicates that a transport packet was handled by the
 330  	// transport layer and callers need not take any further action.
 331  	TransportPacketHandled TransportPacketDisposition = iota
 332  
 333  	// TransportPacketProtocolUnreachable indicates that the transport
 334  	// protocol requested in the packet is not supported.
 335  	TransportPacketProtocolUnreachable
 336  
 337  	// TransportPacketDestinationPortUnreachable indicates that there weren't any
 338  	// listeners interested in the packet and the transport protocol has no means
 339  	// to notify the sender.
 340  	TransportPacketDestinationPortUnreachable
 341  )
 342  
 343  // TransportDispatcher contains the methods used by the network stack to deliver
 344  // packets to the appropriate transport endpoint after it has been handled by
 345  // the network layer.
 346  type TransportDispatcher interface {
 347  	// DeliverTransportPacket delivers packets to the appropriate
 348  	// transport protocol endpoint.
 349  	//
 350  	// pkt.NetworkHeader must be set before calling DeliverTransportPacket.
 351  	//
 352  	// DeliverTransportPacket may modify the packet.
 353  	DeliverTransportPacket(tcpip.TransportProtocolNumber, *PacketBuffer) TransportPacketDisposition
 354  
 355  	// DeliverTransportError delivers an error to the appropriate transport
 356  	// endpoint.
 357  	//
 358  	// DeliverTransportError may modify the packet buffer.
 359  	DeliverTransportError(local, remote tcpip.Address, _ tcpip.NetworkProtocolNumber, _ tcpip.TransportProtocolNumber, _ TransportError, _ *PacketBuffer)
 360  
 361  	// DeliverRawPacket delivers a packet to any subscribed raw sockets.
 362  	//
 363  	// DeliverRawPacket does NOT take ownership of the packet buffer.
 364  	DeliverRawPacket(tcpip.TransportProtocolNumber, *PacketBuffer)
 365  }
 366  
 367  // PacketLooping specifies where an outbound packet should be sent.
 368  type PacketLooping byte
 369  
 370  const (
 371  	// PacketOut indicates that the packet should be passed to the link
 372  	// endpoint.
 373  	PacketOut PacketLooping = 1 << iota
 374  
 375  	// PacketLoop indicates that the packet should be handled locally.
 376  	PacketLoop
 377  )
 378  
 379  // NetworkHeaderParams are the header parameters given as input by the
 380  // transport endpoint to the network.
 381  type NetworkHeaderParams struct {
 382  	// Protocol refers to the transport protocol number.
 383  	Protocol tcpip.TransportProtocolNumber
 384  
 385  	// TTL refers to Time To Live field of the IP-header.
 386  	TTL uint8
 387  
 388  	// TOS refers to TypeOfService or TrafficClass field of the IP-header.
 389  	TOS uint8
 390  
 391  	// DF indicates whether the DF bit should be set.
 392  	DF bool
 393  
 394  	// ExperimentOptionValue is a 16 bit value that is set for the IP experiment
 395  	// option headers if it is not zero.
 396  	ExperimentOptionValue uint16
 397  }
 398  
 399  // GroupAddressableEndpoint is an endpoint that supports group addressing.
 400  //
 401  // An endpoint is considered to support group addressing when one or more
 402  // endpoints may associate themselves with the same identifier (group address).
 403  type GroupAddressableEndpoint interface {
 404  	// JoinGroup joins the specified group.
 405  	JoinGroup(group tcpip.Address) tcpip.Error
 406  
 407  	// LeaveGroup attempts to leave the specified group.
 408  	LeaveGroup(group tcpip.Address) tcpip.Error
 409  
 410  	// IsInGroup returns true if the endpoint is a member of the specified group.
 411  	IsInGroup(group tcpip.Address) bool
 412  }
 413  
 414  // PrimaryEndpointBehavior is an enumeration of an AddressEndpoint's primary
 415  // behavior.
 416  type PrimaryEndpointBehavior int
 417  
 418  const (
 419  	// CanBePrimaryEndpoint indicates the endpoint can be used as a primary
 420  	// endpoint for new connections with no local address.
 421  	CanBePrimaryEndpoint PrimaryEndpointBehavior = iota
 422  
 423  	// FirstPrimaryEndpoint indicates the endpoint should be the first
 424  	// primary endpoint considered. If there are multiple endpoints with
 425  	// this behavior, they are ordered by recency.
 426  	FirstPrimaryEndpoint
 427  
 428  	// NeverPrimaryEndpoint indicates the endpoint should never be a
 429  	// primary endpoint.
 430  	NeverPrimaryEndpoint
 431  )
 432  
 433  func (peb PrimaryEndpointBehavior) String() string {
 434  	switch peb {
 435  	case CanBePrimaryEndpoint:
 436  		return "CanBePrimaryEndpoint"
 437  	case FirstPrimaryEndpoint:
 438  		return "FirstPrimaryEndpoint"
 439  	case NeverPrimaryEndpoint:
 440  		return "NeverPrimaryEndpoint"
 441  	default:
 442  		panic(fmt.Sprintf("unknown primary endpoint behavior: %d", peb))
 443  	}
 444  }
 445  
 446  // AddressConfigType is the method used to add an address.
 447  type AddressConfigType int
 448  
 449  const (
 450  	// AddressConfigStatic is a statically configured address endpoint that was
 451  	// added by some user-specified action (adding an explicit address, joining a
 452  	// multicast group).
 453  	AddressConfigStatic AddressConfigType = iota
 454  
 455  	// AddressConfigSlaac is an address endpoint added by SLAAC, as per RFC 4862
 456  	// section 5.5.3.
 457  	AddressConfigSlaac
 458  )
 459  
 460  // AddressLifetimes encodes an address' preferred and valid lifetimes, as well
 461  // as if the address is deprecated.
 462  //
 463  // +stateify savable
 464  type AddressLifetimes struct {
 465  	// Deprecated is whether the address is deprecated.
 466  	Deprecated bool
 467  
 468  	// PreferredUntil is the time at which the address will be deprecated.
 469  	//
 470  	// Note that for certain addresses, deprecating the address at the
 471  	// PreferredUntil time is not handled as a scheduled job by the stack, but
 472  	// is information provided by the owner as an indication of when it will
 473  	// deprecate the address.
 474  	//
 475  	// PreferredUntil should be ignored if Deprecated is true. If Deprecated
 476  	// is false, and PreferredUntil is the zero value, no information about
 477  	// the preferred lifetime can be inferred.
 478  	PreferredUntil tcpip.MonotonicTime
 479  
 480  	// ValidUntil is the time at which the address will be invalidated.
 481  	//
 482  	// Note that for certain addresses, invalidating the address at the
 483  	// ValidUntil time is not handled as a scheduled job by the stack, but
 484  	// is information provided by the owner as an indication of when it will
 485  	// invalidate the address.
 486  	//
 487  	// If ValidUntil is the zero value, no information about the valid lifetime
 488  	// can be inferred.
 489  	ValidUntil tcpip.MonotonicTime
 490  }
 491  
 492  // AddressProperties contains additional properties that can be configured when
 493  // adding an address.
 494  type AddressProperties struct {
 495  	PEB        PrimaryEndpointBehavior
 496  	ConfigType AddressConfigType
 497  	// Lifetimes encodes the address' lifetimes.
 498  	//
 499  	// Lifetimes.PreferredUntil and Lifetimes.ValidUntil are informational, i.e.
 500  	// the stack will not deprecated nor invalidate the address upon reaching
 501  	// these timestamps.
 502  	//
 503  	// If Lifetimes.Deprecated is true, the address will be added as deprecated.
 504  	Lifetimes AddressLifetimes
 505  	// Temporary is as defined in RFC 4941, but applies not only to addresses
 506  	// added via SLAAC, e.g. DHCPv6 can also add temporary addresses. Temporary
 507  	// addresses are short-lived and are not to be valid (or preferred)
 508  	// forever; hence the term temporary.
 509  	Temporary bool
 510  	Disp      AddressDispatcher
 511  }
 512  
 513  // AddressAssignmentState is an address' assignment state.
 514  type AddressAssignmentState int
 515  
 516  const (
 517  	_ AddressAssignmentState = iota
 518  
 519  	// AddressDisabled indicates the NIC the address is assigned to is disabled.
 520  	AddressDisabled
 521  
 522  	// AddressTentative indicates an address is yet to pass DAD (IPv4 addresses
 523  	// are never tentative).
 524  	AddressTentative
 525  
 526  	// AddressAssigned indicates an address is assigned.
 527  	AddressAssigned
 528  )
 529  
 530  func (state AddressAssignmentState) String() string {
 531  	switch state {
 532  	case AddressDisabled:
 533  		return "Disabled"
 534  	case AddressTentative:
 535  		return "Tentative"
 536  	case AddressAssigned:
 537  		return "Assigned"
 538  	default:
 539  		panic(fmt.Sprintf("unknown address assignment state: %d", state))
 540  	}
 541  }
 542  
 543  // AddressRemovalReason is the reason an address was removed.
 544  type AddressRemovalReason int
 545  
 546  const (
 547  	_ AddressRemovalReason = iota
 548  
 549  	// AddressRemovalManualAction indicates the address was removed explicitly
 550  	// using the stack API.
 551  	AddressRemovalManualAction
 552  
 553  	// AddressRemovalInterfaceRemoved indicates the address was removed because
 554  	// the NIC it is assigned to was removed.
 555  	AddressRemovalInterfaceRemoved
 556  
 557  	// AddressRemovalDADFailed indicates the address was removed because DAD
 558  	// failed.
 559  	AddressRemovalDADFailed
 560  
 561  	// AddressRemovalInvalidated indicates the address was removed because it
 562  	// was invalidated.
 563  	AddressRemovalInvalidated
 564  )
 565  
 566  func (reason AddressRemovalReason) String() string {
 567  	switch reason {
 568  	case AddressRemovalManualAction:
 569  		return "ManualAction"
 570  	case AddressRemovalInterfaceRemoved:
 571  		return "InterfaceRemoved"
 572  	case AddressRemovalDADFailed:
 573  		return "DADFailed"
 574  	case AddressRemovalInvalidated:
 575  		return "Invalidated"
 576  	default:
 577  		panic(fmt.Sprintf("unknown address removal reason: %d", reason))
 578  	}
 579  }
 580  
 581  // AddressDispatcher is the interface integrators can implement to receive
 582  // address-related events.
 583  type AddressDispatcher interface {
 584  	// OnChanged is called with an address' properties when they change.
 585  	//
 586  	// OnChanged is called once when the address is added with the initial state,
 587  	// and every time a property changes.
 588  	//
 589  	// The PreferredUntil and ValidUntil fields in AddressLifetimes must be
 590  	// considered informational, i.e. one must not consider an address to be
 591  	// deprecated/invalid even if the monotonic clock timestamp is past these
 592  	// deadlines. The Deprecated field indicates whether an address is
 593  	// preferred or not; and OnRemoved will be called when an address is
 594  	// removed due to invalidation.
 595  	OnChanged(AddressLifetimes, AddressAssignmentState)
 596  
 597  	// OnRemoved is called when an address is removed with the removal reason.
 598  	OnRemoved(AddressRemovalReason)
 599  }
 600  
 601  // AssignableAddressEndpoint is a reference counted address endpoint that may be
 602  // assigned to a NetworkEndpoint.
 603  type AssignableAddressEndpoint interface {
 604  	// AddressWithPrefix returns the endpoint's address.
 605  	AddressWithPrefix() tcpip.AddressWithPrefix
 606  
 607  	// Subnet returns the subnet of the endpoint's address.
 608  	Subnet() tcpip.Subnet
 609  
 610  	// IsAssigned returns whether or not the endpoint is considered bound
 611  	// to its NetworkEndpoint.
 612  	IsAssigned(allowExpired bool) bool
 613  
 614  	// TryIncRef tries to increment this endpoint's reference count.
 615  	//
 616  	// Returns true if it was successfully incremented. If it returns false, then
 617  	// the endpoint is considered expired and should no longer be used.
 618  	TryIncRef() bool
 619  
 620  	// DecRef decrements this endpoint's reference count.
 621  	DecRef()
 622  }
 623  
 624  // AddressEndpoint is an endpoint representing an address assigned to an
 625  // AddressableEndpoint.
 626  type AddressEndpoint interface {
 627  	AssignableAddressEndpoint
 628  
 629  	// GetKind returns the address kind for this endpoint.
 630  	GetKind() AddressKind
 631  
 632  	// SetKind sets the address kind for this endpoint.
 633  	SetKind(AddressKind)
 634  
 635  	// ConfigType returns the method used to add the address.
 636  	ConfigType() AddressConfigType
 637  
 638  	// Deprecated returns whether or not this endpoint is deprecated.
 639  	Deprecated() bool
 640  
 641  	// SetDeprecated sets this endpoint's deprecated status.
 642  	SetDeprecated(bool)
 643  
 644  	// Lifetimes returns this endpoint's lifetimes.
 645  	Lifetimes() AddressLifetimes
 646  
 647  	// SetLifetimes sets this endpoint's lifetimes.
 648  	//
 649  	// Note that setting preferred-until and valid-until times do not result in
 650  	// deprecation/invalidation jobs to be scheduled by the stack.
 651  	SetLifetimes(AddressLifetimes)
 652  
 653  	// Temporary returns whether or not this endpoint is temporary.
 654  	Temporary() bool
 655  
 656  	// RegisterDispatcher registers an address dispatcher.
 657  	//
 658  	// OnChanged will be called immediately on the provided address dispatcher
 659  	// with this endpoint's current state.
 660  	RegisterDispatcher(AddressDispatcher)
 661  }
 662  
 663  // AddressKind is the kind of an address.
 664  //
 665  // See the values of AddressKind for more details.
 666  type AddressKind int
 667  
 668  const (
 669  	// PermanentTentative is a permanent address endpoint that is not yet
 670  	// considered to be fully bound to an interface in the traditional
 671  	// sense. That is, the address is associated with a NIC, but packets
 672  	// destined to the address MUST NOT be accepted and MUST be silently
 673  	// dropped, and the address MUST NOT be used as a source address for
 674  	// outgoing packets. For IPv6, addresses are of this kind until NDP's
 675  	// Duplicate Address Detection (DAD) resolves. If DAD fails, the address
 676  	// is removed.
 677  	PermanentTentative AddressKind = iota
 678  
 679  	// Permanent is a permanent endpoint (vs. a temporary one) assigned to the
 680  	// NIC. Its reference count is biased by 1 to avoid removal when no route
 681  	// holds a reference to it. It is removed by explicitly removing the address
 682  	// from the NIC.
 683  	Permanent
 684  
 685  	// PermanentExpired is a permanent endpoint that had its address removed from
 686  	// the NIC, and it is waiting to be removed once no references to it are held.
 687  	//
 688  	// If the address is re-added before the endpoint is removed, its type
 689  	// changes back to Permanent.
 690  	PermanentExpired
 691  
 692  	// Temporary is an endpoint, created on a one-off basis to temporarily
 693  	// consider the NIC bound an an address that it is not explicitly bound to
 694  	// (such as a permanent address). Its reference count must not be biased by 1
 695  	// so that the address is removed immediately when references to it are no
 696  	// longer held.
 697  	//
 698  	// A temporary endpoint may be promoted to permanent if the address is added
 699  	// permanently.
 700  	Temporary
 701  )
 702  
 703  // IsPermanent returns true if the AddressKind represents a permanent address.
 704  func (k AddressKind) IsPermanent() bool {
 705  	switch k {
 706  	case Permanent, PermanentTentative:
 707  		return true
 708  	case Temporary, PermanentExpired:
 709  		return false
 710  	default:
 711  		panic(fmt.Sprintf("unrecognized address kind = %d", k))
 712  	}
 713  }
 714  
 715  // AddressableEndpoint is an endpoint that supports addressing.
 716  //
 717  // An endpoint is considered to support addressing when the endpoint may
 718  // associate itself with an identifier (address).
 719  type AddressableEndpoint interface {
 720  	// AddAndAcquirePermanentAddress adds the passed permanent address.
 721  	//
 722  	// Returns *tcpip.ErrDuplicateAddress if the address exists.
 723  	//
 724  	// Acquires and returns the AddressEndpoint for the added address.
 725  	AddAndAcquirePermanentAddress(addr tcpip.AddressWithPrefix, properties AddressProperties) (AddressEndpoint, tcpip.Error)
 726  
 727  	// RemovePermanentAddress removes the passed address if it is a permanent
 728  	// address.
 729  	//
 730  	// Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed
 731  	// permanent address.
 732  	RemovePermanentAddress(addr tcpip.Address) tcpip.Error
 733  
 734  	// SetLifetimes sets an address' lifetimes (strictly informational) and
 735  	// whether it should be deprecated or preferred.
 736  	//
 737  	// Returns *tcpip.ErrBadLocalAddress if the endpoint does not have the passed
 738  	// address.
 739  	SetLifetimes(addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error
 740  
 741  	// MainAddress returns the endpoint's primary permanent address.
 742  	MainAddress() tcpip.AddressWithPrefix
 743  
 744  	// AcquireAssignedAddress returns an address endpoint for the passed address
 745  	// that is considered bound to the endpoint, optionally creating a temporary
 746  	// endpoint if requested and no existing address exists.
 747  	//
 748  	// The returned endpoint's reference count is incremented if readOnly is
 749  	// false.
 750  	//
 751  	// Returns nil if the specified address is not local to this endpoint.
 752  	AcquireAssignedAddress(localAddr tcpip.Address, allowTemp bool, tempPEB PrimaryEndpointBehavior, readOnly bool) AddressEndpoint
 753  
 754  	// AcquireOutgoingPrimaryAddress returns a primary address that may be used as
 755  	// a source address when sending packets to the passed remote address.
 756  	//
 757  	// If allowExpired is true, expired addresses may be returned.
 758  	//
 759  	// The returned endpoint's reference count is incremented.
 760  	//
 761  	// Returns nil if a primary address is not available.
 762  	AcquireOutgoingPrimaryAddress(remoteAddr, srcHint tcpip.Address, allowExpired bool) AddressEndpoint
 763  
 764  	// PrimaryAddresses returns the primary addresses.
 765  	PrimaryAddresses() []tcpip.AddressWithPrefix
 766  
 767  	// PermanentAddresses returns all the permanent addresses.
 768  	PermanentAddresses() []tcpip.AddressWithPrefix
 769  }
 770  
 771  // NDPEndpoint is a network endpoint that supports NDP.
 772  type NDPEndpoint interface {
 773  	NetworkEndpoint
 774  
 775  	// InvalidateDefaultRouter invalidates a default router discovered through
 776  	// NDP.
 777  	InvalidateDefaultRouter(tcpip.Address)
 778  }
 779  
 780  // NetworkInterface is a network interface.
 781  type NetworkInterface interface {
 782  	NetworkLinkEndpoint
 783  
 784  	// ID returns the interface's ID.
 785  	ID() tcpip.NICID
 786  
 787  	// IsLoopback returns true if the interface is a loopback interface.
 788  	IsLoopback() bool
 789  
 790  	// Name returns the name of the interface.
 791  	//
 792  	// May return an empty string if the interface is not configured with a name.
 793  	Name() string
 794  
 795  	// Enabled returns true if the interface is enabled.
 796  	Enabled() bool
 797  
 798  	// Promiscuous returns true if the interface is in promiscuous mode.
 799  	//
 800  	// When in promiscuous mode, the interface should accept all packets.
 801  	Promiscuous() bool
 802  
 803  	// Spoofing returns true if the interface is in spoofing mode.
 804  	//
 805  	// When in spoofing mode, the interface should consider all addresses as
 806  	// assigned to it.
 807  	Spoofing() bool
 808  
 809  	// PrimaryAddress returns the primary address associated with the interface.
 810  	//
 811  	// PrimaryAddress will return the first non-deprecated address if such an
 812  	// address exists. If no non-deprecated addresses exist, the first deprecated
 813  	// address will be returned. If no deprecated addresses exist, the zero value
 814  	// will be returned.
 815  	PrimaryAddress(tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error)
 816  
 817  	// CheckLocalAddress returns true if the address exists on the interface.
 818  	CheckLocalAddress(tcpip.NetworkProtocolNumber, tcpip.Address) bool
 819  
 820  	// WritePacketToRemote writes the packet to the given remote link address.
 821  	WritePacketToRemote(tcpip.LinkAddress, *PacketBuffer) tcpip.Error
 822  
 823  	// WritePacket writes a packet through the given route.
 824  	//
 825  	// WritePacket may modify the packet buffer. The packet buffer's
 826  	// network and transport header must be set.
 827  	WritePacket(*Route, *PacketBuffer) tcpip.Error
 828  
 829  	// HandleNeighborProbe processes an incoming neighbor probe (e.g. ARP
 830  	// request or NDP Neighbor Solicitation).
 831  	//
 832  	// HandleNeighborProbe assumes that the probe is valid for the network
 833  	// interface the probe was received on.
 834  	HandleNeighborProbe(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress) tcpip.Error
 835  
 836  	// HandleNeighborConfirmation processes an incoming neighbor confirmation
 837  	// (e.g. ARP reply or NDP Neighbor Advertisement).
 838  	HandleNeighborConfirmation(tcpip.NetworkProtocolNumber, tcpip.Address, tcpip.LinkAddress, ReachabilityConfirmationFlags) tcpip.Error
 839  }
 840  
 841  // LinkResolvableNetworkEndpoint handles link resolution events.
 842  type LinkResolvableNetworkEndpoint interface {
 843  	// HandleLinkResolutionFailure is called when link resolution prevents the
 844  	// argument from having been sent.
 845  	HandleLinkResolutionFailure(*PacketBuffer)
 846  }
 847  
 848  // NetworkEndpoint is the interface that needs to be implemented by endpoints
 849  // of network layer protocols (e.g., ipv4, ipv6).
 850  type NetworkEndpoint interface {
 851  	// Enable enables the endpoint.
 852  	//
 853  	// Must only be called when the stack is in a state that allows the endpoint
 854  	// to send and receive packets.
 855  	//
 856  	// Returns *tcpip.ErrNotPermitted if the endpoint cannot be enabled.
 857  	Enable() tcpip.Error
 858  
 859  	// Enabled returns true if the endpoint is enabled.
 860  	Enabled() bool
 861  
 862  	// Disable disables the endpoint.
 863  	Disable()
 864  
 865  	// DefaultTTL is the default time-to-live value (or hop limit, in ipv6)
 866  	// for this endpoint.
 867  	DefaultTTL() uint8
 868  
 869  	// MTU is the maximum transmission unit for this endpoint. This is
 870  	// generally calculated as the MTU of the underlying data link endpoint
 871  	// minus the network endpoint max header length.
 872  	MTU() uint32
 873  
 874  	// MaxHeaderLength returns the maximum size the network (and lower
 875  	// level layers combined) headers can have. Higher levels use this
 876  	// information to reserve space in the front of the packets they're
 877  	// building.
 878  	MaxHeaderLength() uint16
 879  
 880  	// WritePacket writes a packet to the given destination address and
 881  	// protocol. It may modify pkt. pkt.TransportHeader must have
 882  	// already been set.
 883  	WritePacket(r *Route, params NetworkHeaderParams, pkt *PacketBuffer) tcpip.Error
 884  
 885  	// WriteHeaderIncludedPacket writes a packet that includes a network
 886  	// header to the given destination address. It may modify pkt.
 887  	WriteHeaderIncludedPacket(r *Route, pkt *PacketBuffer) tcpip.Error
 888  
 889  	// HandlePacket is called by the link layer when new packets arrive to
 890  	// this network endpoint. It sets pkt.NetworkHeader.
 891  	//
 892  	// HandlePacket may modify pkt.
 893  	HandlePacket(pkt *PacketBuffer)
 894  
 895  	// Close is called when the endpoint is removed from a stack.
 896  	Close()
 897  
 898  	// NetworkProtocolNumber returns the tcpip.NetworkProtocolNumber for
 899  	// this endpoint.
 900  	NetworkProtocolNumber() tcpip.NetworkProtocolNumber
 901  
 902  	// Stats returns a reference to the network endpoint stats.
 903  	Stats() NetworkEndpointStats
 904  }
 905  
 906  // NetworkEndpointStats is the interface implemented by each network endpoint
 907  // stats struct.
 908  type NetworkEndpointStats interface {
 909  	// IsNetworkEndpointStats is an empty method to implement the
 910  	// NetworkEndpointStats marker interface.
 911  	IsNetworkEndpointStats()
 912  }
 913  
 914  // IPNetworkEndpointStats is a NetworkEndpointStats that tracks IP-related
 915  // statistics.
 916  type IPNetworkEndpointStats interface {
 917  	NetworkEndpointStats
 918  
 919  	// IPStats returns the IP statistics of a network endpoint.
 920  	IPStats() *tcpip.IPStats
 921  }
 922  
 923  // ForwardingNetworkEndpoint is a network endpoint that may forward packets.
 924  type ForwardingNetworkEndpoint interface {
 925  	NetworkEndpoint
 926  
 927  	// Forwarding returns the forwarding configuration.
 928  	Forwarding() bool
 929  
 930  	// SetForwarding sets the forwarding configuration.
 931  	//
 932  	// Returns the previous forwarding configuration.
 933  	SetForwarding(bool) bool
 934  }
 935  
 936  // MulticastForwardingNetworkEndpoint is a network endpoint that may forward
 937  // multicast packets.
 938  type MulticastForwardingNetworkEndpoint interface {
 939  	ForwardingNetworkEndpoint
 940  
 941  	// MulticastForwarding returns true if multicast forwarding is enabled.
 942  	// Otherwise, returns false.
 943  	MulticastForwarding() bool
 944  
 945  	// SetMulticastForwarding sets the multicast forwarding configuration.
 946  	//
 947  	// Returns the previous forwarding configuration.
 948  	SetMulticastForwarding(bool) bool
 949  }
 950  
 951  // NetworkProtocol is the interface that needs to be implemented by network
 952  // protocols (e.g., ipv4, ipv6) that want to be part of the networking stack.
 953  type NetworkProtocol interface {
 954  	// Number returns the network protocol number.
 955  	Number() tcpip.NetworkProtocolNumber
 956  
 957  	// MinimumPacketSize returns the minimum valid packet size of this
 958  	// network protocol. The stack automatically drops any packets smaller
 959  	// than this targeted at this protocol.
 960  	MinimumPacketSize() int
 961  
 962  	// ParseAddresses returns the source and destination addresses stored in a
 963  	// packet of this protocol.
 964  	ParseAddresses(b []byte) (src, dst tcpip.Address)
 965  
 966  	// NewEndpoint creates a new endpoint of this protocol.
 967  	NewEndpoint(nic NetworkInterface, dispatcher TransportDispatcher) NetworkEndpoint
 968  
 969  	// SetOption allows enabling/disabling protocol specific features.
 970  	// SetOption returns an error if the option is not supported or the
 971  	// provided option value is invalid.
 972  	SetOption(option tcpip.SettableNetworkProtocolOption) tcpip.Error
 973  
 974  	// Option allows retrieving protocol specific option values.
 975  	// Option returns an error if the option is not supported or the
 976  	// provided option value is invalid.
 977  	Option(option tcpip.GettableNetworkProtocolOption) tcpip.Error
 978  
 979  	// Close requests that any worker goroutines owned by the protocol
 980  	// stop.
 981  	Close()
 982  
 983  	// Wait waits for any worker goroutines owned by the protocol to stop.
 984  	Wait()
 985  
 986  	// Parse sets pkt.NetworkHeader and trims pkt.Data appropriately. It
 987  	// returns:
 988  	//	- The encapsulated protocol, if present.
 989  	//	- Whether there is an encapsulated transport protocol payload (e.g. ARP
 990  	//		does not encapsulate anything).
 991  	//	- Whether pkt.Data was large enough to parse and set pkt.NetworkHeader.
 992  	Parse(pkt *PacketBuffer) (proto tcpip.TransportProtocolNumber, hasTransportHdr bool, ok bool)
 993  }
 994  
 995  // UnicastSourceAndMulticastDestination is a tuple that represents a unicast
 996  // source address and a multicast destination address.
 997  //
 998  // +stateify savable
 999  type UnicastSourceAndMulticastDestination struct {
1000  	// Source represents a unicast source address.
1001  	Source tcpip.Address
1002  	// Destination represents a multicast destination address.
1003  	Destination tcpip.Address
1004  }
1005  
1006  // MulticastRouteOutgoingInterface represents an outgoing interface in a
1007  // multicast route.
1008  type MulticastRouteOutgoingInterface struct {
1009  	// ID corresponds to the outgoing NIC.
1010  	ID tcpip.NICID
1011  
1012  	// MinTTL represents the minimum TTL/HopLimit a multicast packet must have to
1013  	// be sent through the outgoing interface.
1014  	//
1015  	// Note: a value of 0 allows all packets to be forwarded.
1016  	MinTTL uint8
1017  }
1018  
1019  // MulticastRoute is a multicast route.
1020  type MulticastRoute struct {
1021  	// ExpectedInputInterface is the interface on which packets using this route
1022  	// are expected to ingress.
1023  	ExpectedInputInterface tcpip.NICID
1024  
1025  	// OutgoingInterfaces is the set of interfaces that a multicast packet should
1026  	// be forwarded out of.
1027  	//
1028  	// This field should not be empty.
1029  	OutgoingInterfaces []MulticastRouteOutgoingInterface
1030  }
1031  
1032  // MulticastForwardingNetworkProtocol is the interface that needs to be
1033  // implemented by the network protocols that support multicast forwarding.
1034  type MulticastForwardingNetworkProtocol interface {
1035  	NetworkProtocol
1036  
1037  	// AddMulticastRoute adds a route to the multicast routing table such that
1038  	// packets matching the addresses will be forwarded using the provided route.
1039  	//
1040  	// Returns an error if the addresses or route is invalid.
1041  	AddMulticastRoute(UnicastSourceAndMulticastDestination, MulticastRoute) tcpip.Error
1042  
1043  	// RemoveMulticastRoute removes the route matching the provided addresses
1044  	// from the multicast routing table.
1045  	//
1046  	// Returns an error if the addresses are invalid or a matching route is not
1047  	// found.
1048  	RemoveMulticastRoute(UnicastSourceAndMulticastDestination) tcpip.Error
1049  
1050  	// MulticastRouteLastUsedTime returns a monotonic timestamp that
1051  	// represents the last time that the route matching the provided addresses
1052  	// was used or updated.
1053  	//
1054  	// Returns an error if the addresses are invalid or a matching route was not
1055  	// found.
1056  	MulticastRouteLastUsedTime(UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error)
1057  
1058  	// EnableMulticastForwarding enables multicast forwarding for the protocol.
1059  	//
1060  	// Returns an error if the provided multicast forwarding event dispatcher is
1061  	// nil. Otherwise, returns true if the multicast forwarding was already
1062  	// enabled.
1063  	EnableMulticastForwarding(MulticastForwardingEventDispatcher) (bool, tcpip.Error)
1064  
1065  	// DisableMulticastForwarding disables multicast forwarding for the protocol.
1066  	DisableMulticastForwarding()
1067  }
1068  
1069  // MulticastPacketContext is the context in which a multicast packet triggered
1070  // a multicast forwarding event.
1071  type MulticastPacketContext struct {
1072  	// SourceAndDestination contains the unicast source address and the multicast
1073  	// destination address found in the relevant multicast packet.
1074  	SourceAndDestination UnicastSourceAndMulticastDestination
1075  	// InputInterface is the interface on which the relevant multicast packet
1076  	// arrived.
1077  	InputInterface tcpip.NICID
1078  }
1079  
1080  // MulticastForwardingEventDispatcher is the interface that integrators should
1081  // implement to handle multicast routing events.
1082  type MulticastForwardingEventDispatcher interface {
1083  	// OnMissingRoute is called when an incoming multicast packet does not match
1084  	// any installed route.
1085  	//
1086  	// The packet that triggered this event may be queued so that it can be
1087  	// transmitted once a route is installed. Even then, it may still be dropped
1088  	// as per the routing table's GC/eviction policy.
1089  	OnMissingRoute(MulticastPacketContext)
1090  
1091  	// OnUnexpectedInputInterface is called when a multicast packet arrives at an
1092  	// interface that does not match the installed route's expected input
1093  	// interface.
1094  	//
1095  	// This may be an indication of a routing loop. The packet that triggered
1096  	// this event is dropped without being forwarded.
1097  	OnUnexpectedInputInterface(context MulticastPacketContext, expectedInputInterface tcpip.NICID)
1098  }
1099  
1100  // NetworkDispatcher contains the methods used by the network stack to deliver
1101  // inbound/outbound packets to the appropriate network/packet(if any) endpoints.
1102  type NetworkDispatcher interface {
1103  	// DeliverNetworkPacket finds the appropriate network protocol endpoint
1104  	// and hands the packet over for further processing.
1105  	//
1106  	//
1107  	// If the link-layer has a header, the packet's link header must be populated.
1108  	//
1109  	// DeliverNetworkPacket may modify pkt.
1110  	DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
1111  
1112  	// DeliverLinkPacket delivers a packet to any interested packet endpoints.
1113  	//
1114  	// This method should be called with both incoming and outgoing packets.
1115  	//
1116  	// If the link-layer has a header, the packet's link header must be populated.
1117  	DeliverLinkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
1118  }
1119  
1120  // LinkEndpointCapabilities is the type associated with the capabilities
1121  // supported by a link-layer endpoint. It is a set of bitfields.
1122  type LinkEndpointCapabilities uint
1123  
1124  // The following are the supported link endpoint capabilities.
1125  const (
1126  	CapabilityNone LinkEndpointCapabilities = 0
1127  	// CapabilityTXChecksumOffload indicates that the link endpoint supports
1128  	// checksum computation for outgoing packets and the stack can skip
1129  	// computing checksums when sending packets.
1130  	CapabilityTXChecksumOffload LinkEndpointCapabilities = 1 << iota
1131  	// CapabilityRXChecksumOffload indicates that the link endpoint supports
1132  	// checksum verification on received packets and that it's safe for the
1133  	// stack to skip checksum verification.
1134  	CapabilityRXChecksumOffload
1135  	CapabilityResolutionRequired
1136  	CapabilitySaveRestore
1137  	CapabilityDisconnectOk
1138  	CapabilityLoopback
1139  )
1140  
1141  // LinkWriter is an interface that supports sending packets via a data-link
1142  // layer endpoint. It is used with QueueingDiscipline to batch writes from
1143  // upper layer endpoints.
1144  type LinkWriter interface {
1145  	// WritePackets writes packets. Must not be called with an empty list of
1146  	// packet buffers.
1147  	//
1148  	// Each packet must have the link-layer header set, if the link requires
1149  	// one.
1150  	//
1151  	// WritePackets may modify the packet buffers, and takes ownership of the PacketBufferList.
1152  	// it is not safe to use the PacketBufferList after a call to WritePackets.
1153  	WritePackets(PacketBufferList) (int, tcpip.Error)
1154  }
1155  
1156  // NetworkLinkEndpoint is a data-link layer that supports sending network
1157  // layer packets.
1158  type NetworkLinkEndpoint interface {
1159  	// MTU is the maximum transmission unit for this endpoint. This is
1160  	// usually dictated by the backing physical network; when such a
1161  	// physical network doesn't exist, the limit is generally 64k, which
1162  	// includes the maximum size of an IP packet.
1163  	MTU() uint32
1164  
1165  	// SetMTU update the maximum transmission unit for the endpoint.
1166  	SetMTU(mtu uint32)
1167  
1168  	// MaxHeaderLength returns the maximum size the data link (and
1169  	// lower level layers combined) headers can have. Higher levels use this
1170  	// information to reserve space in the front of the packets they're
1171  	// building.
1172  	MaxHeaderLength() uint16
1173  
1174  	// LinkAddress returns the link address (typically a MAC) of the
1175  	// endpoint.
1176  	LinkAddress() tcpip.LinkAddress
1177  
1178  	// SetLinkAddress updated the endpoint's link address (typically a MAC).
1179  	SetLinkAddress(addr tcpip.LinkAddress)
1180  
1181  	// Capabilities returns the set of capabilities supported by the
1182  	// endpoint.
1183  	Capabilities() LinkEndpointCapabilities
1184  
1185  	// Attach attaches the data link layer endpoint to the network-layer
1186  	// dispatcher of the stack.
1187  	//
1188  	// Attach is called with a nil dispatcher when the endpoint's NIC is being
1189  	// removed.
1190  	Attach(dispatcher NetworkDispatcher)
1191  
1192  	// IsAttached returns whether a NetworkDispatcher is attached to the
1193  	// endpoint.
1194  	IsAttached() bool
1195  
1196  	// Wait waits for any worker goroutines owned by the endpoint to stop.
1197  	//
1198  	// For now, requesting that an endpoint's worker goroutine(s) stop is
1199  	// implementation specific.
1200  	//
1201  	// Wait will not block if the endpoint hasn't started any goroutines
1202  	// yet, even if it might later.
1203  	Wait()
1204  
1205  	// ARPHardwareType returns the ARPHRD_TYPE of the link endpoint.
1206  	//
1207  	// See:
1208  	// https://github.com/torvalds/linux/blob/aa0c9086b40c17a7ad94425b3b70dd1fdd7497bf/include/uapi/linux/if_arp.h#L30
1209  	ARPHardwareType() header.ARPHardwareType
1210  
1211  	// AddHeader adds a link layer header to the packet if required.
1212  	AddHeader(*PacketBuffer)
1213  
1214  	// ParseHeader parses the link layer header to the packet.
1215  	ParseHeader(*PacketBuffer) bool
1216  
1217  	// Close is called when the endpoint is removed from a stack.
1218  	Close()
1219  
1220  	// SetOnCloseAction sets the action that will be executed before closing the
1221  	// endpoint. It is used to destroy a network device when its endpoint
1222  	// is closed. Endpoints that are closed only after destroying their
1223  	// network devices can implement this method as no-op.
1224  	SetOnCloseAction(func())
1225  }
1226  
1227  // QueueingDiscipline provides a queueing strategy for outgoing packets (e.g
1228  // FIFO, LIFO, Random Early Drop etc).
1229  type QueueingDiscipline interface {
1230  	// WritePacket writes a packet.
1231  	//
1232  	// WritePacket may modify the packet buffer. The packet buffer's
1233  	// network and transport header must be set.
1234  	//
1235  	// To participate in transparent bridging, a LinkEndpoint implementation
1236  	// should call eth.Encode with header.EthernetFields.SrcAddr set to
1237  	// pkg.EgressRoute.LocalLinkAddress if it is provided.
1238  	WritePacket(*PacketBuffer) tcpip.Error
1239  
1240  	Close()
1241  }
1242  
1243  // LinkEndpoint is the interface implemented by data link layer protocols (e.g.,
1244  // ethernet, loopback, raw) and used by network layer protocols to send packets
1245  // out through the implementer's data link endpoint. When a link header exists,
1246  // it sets each PacketBuffer's LinkHeader field before passing it up the
1247  // stack.
1248  type LinkEndpoint interface {
1249  	NetworkLinkEndpoint
1250  	LinkWriter
1251  }
1252  
1253  // InjectableLinkEndpoint is a LinkEndpoint where inbound packets are
1254  // delivered via the Inject method.
1255  type InjectableLinkEndpoint interface {
1256  	LinkEndpoint
1257  
1258  	// InjectInbound injects an inbound packet.
1259  	InjectInbound(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer)
1260  
1261  	// InjectOutbound writes a fully formed outbound packet directly to the
1262  	// link.
1263  	//
1264  	// dest is used by endpoints with multiple raw destinations.
1265  	InjectOutbound(dest tcpip.Address, packet *buffer.View) tcpip.Error
1266  }
1267  
1268  // DADResult is a marker interface for the result of a duplicate address
1269  // detection process.
1270  type DADResult interface {
1271  	isDADResult()
1272  }
1273  
1274  var _ DADResult = (*DADSucceeded)(nil)
1275  
1276  // DADSucceeded indicates DAD completed without finding any duplicate addresses.
1277  type DADSucceeded struct{}
1278  
1279  func (*DADSucceeded) isDADResult() {}
1280  
1281  var _ DADResult = (*DADError)(nil)
1282  
1283  // DADError indicates DAD hit an error.
1284  type DADError struct {
1285  	Err tcpip.Error
1286  }
1287  
1288  func (*DADError) isDADResult() {}
1289  
1290  var _ DADResult = (*DADAborted)(nil)
1291  
1292  // DADAborted indicates DAD was aborted.
1293  type DADAborted struct{}
1294  
1295  func (*DADAborted) isDADResult() {}
1296  
1297  var _ DADResult = (*DADDupAddrDetected)(nil)
1298  
1299  // DADDupAddrDetected indicates DAD detected a duplicate address.
1300  type DADDupAddrDetected struct {
1301  	// HolderLinkAddress is the link address of the node that holds the duplicate
1302  	// address.
1303  	HolderLinkAddress tcpip.LinkAddress
1304  }
1305  
1306  func (*DADDupAddrDetected) isDADResult() {}
1307  
1308  // DADCompletionHandler is a handler for DAD completion.
1309  type DADCompletionHandler func(DADResult)
1310  
1311  // DADCheckAddressDisposition enumerates the possible return values from
1312  // DAD.CheckDuplicateAddress.
1313  type DADCheckAddressDisposition int
1314  
1315  const (
1316  	_ DADCheckAddressDisposition = iota
1317  
1318  	// DADDisabled indicates that DAD is disabled.
1319  	DADDisabled
1320  
1321  	// DADStarting indicates that DAD is starting for an address.
1322  	DADStarting
1323  
1324  	// DADAlreadyRunning indicates that DAD was already started for an address.
1325  	DADAlreadyRunning
1326  )
1327  
1328  const (
1329  	// defaultDupAddrDetectTransmits is the default number of NDP Neighbor
1330  	// Solicitation messages to send when doing Duplicate Address Detection
1331  	// for a tentative address.
1332  	//
1333  	// Default = 1 (from RFC 4862 section 5.1)
1334  	defaultDupAddrDetectTransmits = 1
1335  )
1336  
1337  // DADConfigurations holds configurations for duplicate address detection.
1338  //
1339  // +stateify savable
1340  type DADConfigurations struct {
1341  	// The number of Neighbor Solicitation messages to send when doing
1342  	// Duplicate Address Detection for a tentative address.
1343  	//
1344  	// Note, a value of zero effectively disables DAD.
1345  	DupAddrDetectTransmits uint8
1346  
1347  	// The amount of time to wait between sending Neighbor Solicitation
1348  	// messages.
1349  	//
1350  	// Must be greater than or equal to 1ms.
1351  	RetransmitTimer time.Duration
1352  }
1353  
1354  // DefaultDADConfigurations returns the default DAD configurations.
1355  func DefaultDADConfigurations() DADConfigurations {
1356  	return DADConfigurations{
1357  		DupAddrDetectTransmits: defaultDupAddrDetectTransmits,
1358  		RetransmitTimer:        defaultRetransmitTimer,
1359  	}
1360  }
1361  
1362  // Validate modifies the configuration with valid values. If invalid values are
1363  // present in the configurations, the corresponding default values are used
1364  // instead.
1365  func (c *DADConfigurations) Validate() {
1366  	if c.RetransmitTimer < minimumRetransmitTimer {
1367  		c.RetransmitTimer = defaultRetransmitTimer
1368  	}
1369  }
1370  
1371  // DuplicateAddressDetector handles checking if an address is already assigned
1372  // to some neighboring node on the link.
1373  type DuplicateAddressDetector interface {
1374  	// CheckDuplicateAddress checks if an address is assigned to a neighbor.
1375  	//
1376  	// If DAD is already being performed for the address, the handler will be
1377  	// called with the result of the original DAD request.
1378  	CheckDuplicateAddress(tcpip.Address, DADCompletionHandler) DADCheckAddressDisposition
1379  
1380  	// SetDADConfigurations sets the configurations for DAD.
1381  	SetDADConfigurations(c DADConfigurations)
1382  
1383  	// DuplicateAddressProtocol returns the network protocol the receiver can
1384  	// perform duplicate address detection for.
1385  	DuplicateAddressProtocol() tcpip.NetworkProtocolNumber
1386  }
1387  
1388  // LinkAddressResolver handles link address resolution for a network protocol.
1389  type LinkAddressResolver interface {
1390  	// LinkAddressRequest sends a request for the link address of the target
1391  	// address. The request is broadcast on the local network if a remote link
1392  	// address is not provided.
1393  	LinkAddressRequest(targetAddr, localAddr tcpip.Address, remoteLinkAddr tcpip.LinkAddress) tcpip.Error
1394  
1395  	// ResolveStaticAddress attempts to resolve address without sending
1396  	// requests. It either resolves the name immediately or returns the
1397  	// empty LinkAddress.
1398  	//
1399  	// It can be used to resolve broadcast addresses for example.
1400  	ResolveStaticAddress(addr tcpip.Address) (tcpip.LinkAddress, bool)
1401  
1402  	// LinkAddressProtocol returns the network protocol of the
1403  	// addresses this resolver can resolve.
1404  	LinkAddressProtocol() tcpip.NetworkProtocolNumber
1405  }
1406  
1407  // RawFactory produces endpoints for writing various types of raw packets.
1408  type RawFactory interface {
1409  	// NewUnassociatedEndpoint produces endpoints for writing packets not
1410  	// associated with a particular transport protocol. Such endpoints can
1411  	// be used to write arbitrary packets that include the network header.
1412  	NewUnassociatedEndpoint(stack *Stack, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
1413  
1414  	// NewPacketEndpoint produces endpoints for reading and writing packets
1415  	// that include network and (when cooked is false) link layer headers.
1416  	NewPacketEndpoint(stack *Stack, cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error)
1417  }
1418  
1419  // GSOType is the type of GSO segments.
1420  //
1421  // +stateify savable
1422  type GSOType int
1423  
1424  // Types of gso segments.
1425  const (
1426  	GSONone GSOType = iota
1427  
1428  	// Hardware GSO types:
1429  	GSOTCPv4
1430  	GSOTCPv6
1431  
1432  	// GSOGvisor is used for gVisor GSO segments which have to be sent by
1433  	// endpoint.WritePackets.
1434  	GSOGvisor
1435  )
1436  
1437  // GSO contains generic segmentation offload properties.
1438  //
1439  // +stateify savable
1440  type GSO struct {
1441  	// Type is one of GSONone, GSOTCPv4, etc.
1442  	Type GSOType
1443  	// NeedsCsum is set if the checksum offload is enabled.
1444  	NeedsCsum bool
1445  	// CsumOffset is offset after that to place checksum.
1446  	CsumOffset uint16
1447  
1448  	// Mss is maximum segment size.
1449  	MSS uint16
1450  	// L3Len is L3 (IP) header length.
1451  	L3HdrLen uint16
1452  
1453  	// MaxSize is maximum GSO packet size.
1454  	MaxSize uint32
1455  }
1456  
1457  // SupportedGSO is the type of segmentation offloading supported.
1458  type SupportedGSO int
1459  
1460  const (
1461  	// GSONotSupported indicates that segmentation offloading is not supported.
1462  	GSONotSupported SupportedGSO = iota
1463  
1464  	// HostGSOSupported indicates that segmentation offloading may be performed
1465  	// by the host. This is typically true when netstack is attached to a host
1466  	// AF_PACKET socket, and not true when attached to a unix socket or other
1467  	// non-networking data layer.
1468  	HostGSOSupported
1469  
1470  	// GVisorGSOSupported indicates that segmentation offloading may be performed
1471  	// in gVisor.
1472  	GVisorGSOSupported
1473  )
1474  
1475  // GSOEndpoint provides access to GSO properties.
1476  type GSOEndpoint interface {
1477  	// GSOMaxSize returns the maximum GSO packet size.
1478  	GSOMaxSize() uint32
1479  
1480  	// SupportedGSO returns the supported segmentation offloading.
1481  	SupportedGSO() SupportedGSO
1482  }
1483  
1484  // GVisorGSOMaxSize is a maximum allowed size of a software GSO segment.
1485  // This isn't a hard limit, because it is never set into packet headers.
1486  const GVisorGSOMaxSize = 1 << 16
1487