nic.go raw

   1  // Copyright 2018 The gVisor Authors.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  package stack
  16  
  17  import (
  18  	"fmt"
  19  	"reflect"
  20  	"sort"
  21  
  22  	"gvisor.dev/gvisor/pkg/atomicbitops"
  23  	"gvisor.dev/gvisor/pkg/tcpip"
  24  	"gvisor.dev/gvisor/pkg/tcpip/header"
  25  )
  26  
  27  // +stateify savable
  28  type linkResolver struct {
  29  	resolver LinkAddressResolver
  30  
  31  	neigh neighborCache
  32  }
  33  
  34  var _ NetworkInterface = (*nic)(nil)
  35  var _ NetworkDispatcher = (*nic)(nil)
  36  
  37  // nic represents a "network interface card" to which the networking stack is
  38  // attached.
  39  //
  40  // +stateify savable
  41  type nic struct {
  42  	NetworkLinkEndpoint
  43  
  44  	stack   *Stack
  45  	id      tcpip.NICID
  46  	name    string
  47  	context NICContext
  48  
  49  	stats sharedStats
  50  
  51  	// enableDisableMu is used to synchronize attempts to enable/disable the NIC.
  52  	// Without this mutex, calls to enable/disable the NIC may interleave and
  53  	// leave the NIC in an inconsistent state.
  54  	enableDisableMu nicRWMutex `state:"nosave"`
  55  
  56  	// The network endpoints themselves may be modified by calling the interface's
  57  	// methods, but the map reference and entries must be constant.
  58  	networkEndpoints          map[tcpip.NetworkProtocolNumber]NetworkEndpoint
  59  	linkAddrResolvers         map[tcpip.NetworkProtocolNumber]*linkResolver
  60  	duplicateAddressDetectors map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector
  61  
  62  	// enabled indicates whether the NIC is enabled.
  63  	enabled atomicbitops.Bool
  64  
  65  	// spoofing indicates whether the NIC is spoofing.
  66  	spoofing atomicbitops.Bool
  67  
  68  	// promiscuous indicates whether the NIC is promiscuous.
  69  	promiscuous atomicbitops.Bool
  70  
  71  	// linkResQueue holds packets that are waiting for link resolution to
  72  	// complete.
  73  	linkResQueue packetsPendingLinkResolution
  74  
  75  	// packetEPsMu protects annotated fields below.
  76  	packetEPsMu packetEPsRWMutex `state:"nosave"`
  77  
  78  	// eps is protected by the mutex, but the values contained in it are not.
  79  	//
  80  	// +checklocks:packetEPsMu
  81  	packetEPs map[tcpip.NetworkProtocolNumber]*packetEndpointList
  82  
  83  	qDisc QueueingDiscipline
  84  
  85  	// deliverLinkPackets specifies whether this NIC delivers packets to
  86  	// packet sockets. It is immutable.
  87  	//
  88  	// deliverLinkPackets is off by default because some users already
  89  	// deliver link packets by explicitly calling nic.DeliverLinkPackets.
  90  	deliverLinkPackets bool
  91  
  92  	// Primary is the main controlling interface in a bonded setup.
  93  	Primary *nic
  94  
  95  	// experimentIPOptionEnabled indicates whether the NIC supports the
  96  	// experiment IP option.
  97  	experimentIPOptionEnabled bool
  98  }
  99  
 100  // makeNICStats initializes the NIC statistics and associates them to the global
 101  // NIC statistics.
 102  func makeNICStats(global tcpip.NICStats) sharedStats {
 103  	var stats sharedStats
 104  	tcpip.InitStatCounters(reflect.ValueOf(&stats.local).Elem())
 105  	stats.init(&stats.local, &global)
 106  	return stats
 107  }
 108  
 109  // +stateify savable
 110  type packetEndpointList struct {
 111  	mu packetEndpointListRWMutex `state:"nosave"`
 112  
 113  	// eps is protected by mu, but the contained PacketEndpoint values are not.
 114  	//
 115  	// +checklocks:mu
 116  	eps []PacketEndpoint
 117  }
 118  
 119  func (p *packetEndpointList) add(ep PacketEndpoint) {
 120  	p.mu.Lock()
 121  	defer p.mu.Unlock()
 122  	p.eps = append(p.eps, ep)
 123  }
 124  
 125  func (p *packetEndpointList) remove(ep PacketEndpoint) {
 126  	p.mu.Lock()
 127  	defer p.mu.Unlock()
 128  	for i, epOther := range p.eps {
 129  		if epOther == ep {
 130  			p.eps = append(p.eps[:i], p.eps[i+1:]...)
 131  			break
 132  		}
 133  	}
 134  }
 135  
 136  func (p *packetEndpointList) len() int {
 137  	p.mu.RLock()
 138  	defer p.mu.RUnlock()
 139  	return len(p.eps)
 140  }
 141  
 142  // forEach calls fn with each endpoints in p while holding the read lock on p.
 143  func (p *packetEndpointList) forEach(fn func(PacketEndpoint)) {
 144  	p.mu.RLock()
 145  	defer p.mu.RUnlock()
 146  	for _, ep := range p.eps {
 147  		fn(ep)
 148  	}
 149  }
 150  
 151  var _ QueueingDiscipline = (*delegatingQueueingDiscipline)(nil)
 152  
 153  // +stateify savable
 154  type delegatingQueueingDiscipline struct {
 155  	LinkWriter
 156  }
 157  
 158  func (*delegatingQueueingDiscipline) Close() {}
 159  
 160  // WritePacket passes the packet through to the underlying LinkWriter's WritePackets.
 161  func (qDisc *delegatingQueueingDiscipline) WritePacket(pkt *PacketBuffer) tcpip.Error {
 162  	var pkts PacketBufferList
 163  	pkts.PushBack(pkt)
 164  	_, err := qDisc.LinkWriter.WritePackets(pkts)
 165  	return err
 166  }
 167  
 168  // newNIC returns a new NIC using the default NDP configurations from stack.
 169  func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *nic {
 170  	// TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
 171  	// example, make sure that the link address it provides is a valid
 172  	// unicast ethernet address.
 173  
 174  	// If no queueing discipline was specified provide a stub implementation that
 175  	// just delegates to the lower link endpoint.
 176  	qDisc := opts.QDisc
 177  	if qDisc == nil {
 178  		qDisc = &delegatingQueueingDiscipline{LinkWriter: ep}
 179  	}
 180  
 181  	// TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints
 182  	// observe an MTU of at least 1280 bytes. Ensure that this requirement
 183  	// of IPv6 is supported on this endpoint's LinkEndpoint.
 184  	nic := &nic{
 185  		NetworkLinkEndpoint:       ep,
 186  		stack:                     stack,
 187  		id:                        id,
 188  		name:                      opts.Name,
 189  		context:                   opts.Context,
 190  		stats:                     makeNICStats(stack.Stats().NICs),
 191  		networkEndpoints:          make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
 192  		linkAddrResolvers:         make(map[tcpip.NetworkProtocolNumber]*linkResolver),
 193  		duplicateAddressDetectors: make(map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector),
 194  		qDisc:                     qDisc,
 195  		deliverLinkPackets:        opts.DeliverLinkPackets,
 196  		experimentIPOptionEnabled: opts.EnableExperimentIPOption,
 197  	}
 198  	nic.linkResQueue.init(nic)
 199  
 200  	nic.packetEPsMu.Lock()
 201  	defer nic.packetEPsMu.Unlock()
 202  
 203  	nic.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList)
 204  
 205  	resolutionRequired := ep.Capabilities()&CapabilityResolutionRequired != 0
 206  
 207  	for _, netProto := range stack.networkProtocols {
 208  		netNum := netProto.Number()
 209  		netEP := netProto.NewEndpoint(nic, nic)
 210  		nic.networkEndpoints[netNum] = netEP
 211  
 212  		if resolutionRequired {
 213  			if r, ok := netEP.(LinkAddressResolver); ok {
 214  				l := &linkResolver{resolver: r}
 215  				l.neigh.init(nic, r)
 216  				nic.linkAddrResolvers[r.LinkAddressProtocol()] = l
 217  			}
 218  		}
 219  
 220  		if d, ok := netEP.(DuplicateAddressDetector); ok {
 221  			nic.duplicateAddressDetectors[d.DuplicateAddressProtocol()] = d
 222  		}
 223  	}
 224  
 225  	nic.NetworkLinkEndpoint.Attach(nic)
 226  
 227  	return nic
 228  }
 229  
 230  func (n *nic) getNetworkEndpoint(proto tcpip.NetworkProtocolNumber) NetworkEndpoint {
 231  	return n.networkEndpoints[proto]
 232  }
 233  
 234  // Enabled implements NetworkInterface.
 235  func (n *nic) Enabled() bool {
 236  	return n.enabled.Load()
 237  }
 238  
 239  // setEnabled sets the enabled status for the NIC.
 240  //
 241  // Returns true if the enabled status was updated.
 242  //
 243  // +checklocks:n.enableDisableMu
 244  func (n *nic) setEnabled(v bool) bool {
 245  	return n.enabled.Swap(v) != v
 246  }
 247  
 248  // disable disables n.
 249  //
 250  // It undoes the work done by enable.
 251  func (n *nic) disable() {
 252  	n.enableDisableMu.Lock()
 253  	defer n.enableDisableMu.Unlock()
 254  	n.disableLocked()
 255  }
 256  
 257  // disableLocked disables n.
 258  //
 259  // It undoes the work done by enable.
 260  //
 261  // +checklocks:n.enableDisableMu
 262  func (n *nic) disableLocked() {
 263  	if !n.Enabled() {
 264  		return
 265  	}
 266  
 267  	// TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be
 268  	// invalidated? Currently, Routes will continue to work when a NIC is enabled
 269  	// again, and applications may not know that the underlying NIC was ever
 270  	// disabled.
 271  
 272  	for _, ep := range n.networkEndpoints {
 273  		ep.Disable()
 274  
 275  		// Clear the neighbour table (including static entries) as we cannot
 276  		// guarantee that the current neighbour table will be valid when the NIC is
 277  		// enabled again.
 278  		//
 279  		// This matches linux's behaviour at the time of writing:
 280  		// https://github.com/torvalds/linux/blob/71c061d2443814de15e177489d5cc00a4a253ef3/net/core/neighbour.c#L371
 281  		netProto := ep.NetworkProtocolNumber()
 282  		switch err := n.clearNeighbors(netProto); err.(type) {
 283  		case nil, *tcpip.ErrNotSupported:
 284  		default:
 285  			panic(fmt.Sprintf("n.clearNeighbors(%d): %s", netProto, err))
 286  		}
 287  	}
 288  
 289  	if !n.setEnabled(false) {
 290  		panic("should have only done work to disable the NIC if it was enabled")
 291  	}
 292  }
 293  
 294  // enable enables n.
 295  //
 296  // If the stack has IPv6 enabled, enable will join the IPv6 All-Nodes Multicast
 297  // address (ff02::1), start DAD for permanent addresses, and start soliciting
 298  // routers if the stack is not operating as a router. If the stack is also
 299  // configured to auto-generate a link-local address, one will be generated.
 300  func (n *nic) enable() tcpip.Error {
 301  	n.enableDisableMu.Lock()
 302  	defer n.enableDisableMu.Unlock()
 303  
 304  	if !n.setEnabled(true) {
 305  		return nil
 306  	}
 307  
 308  	for _, ep := range n.networkEndpoints {
 309  		if err := ep.Enable(); err != nil {
 310  			return err
 311  		}
 312  	}
 313  
 314  	return nil
 315  }
 316  
 317  // remove detaches NIC from the link endpoint and releases network endpoint
 318  // resources. This guarantees no packets between this NIC and the network
 319  // stack.
 320  //
 321  // It returns an action that has to be excuted after releasing the Stack lock
 322  // and any error encountered.
 323  func (n *nic) remove(closeLinkEndpoint bool) (func(), tcpip.Error) {
 324  	n.enableDisableMu.Lock()
 325  
 326  	n.disableLocked()
 327  
 328  	for _, ep := range n.networkEndpoints {
 329  		ep.Close()
 330  	}
 331  
 332  	n.enableDisableMu.Unlock()
 333  
 334  	// Drain and drop any packets pending link resolution.
 335  	// We must not hold n.enableDisableMu here.
 336  	n.linkResQueue.cancel()
 337  
 338  	var deferAct func()
 339  	// Prevent packets from going down to the link before shutting the link down.
 340  	n.qDisc.Close()
 341  	n.NetworkLinkEndpoint.Attach(nil)
 342  	if closeLinkEndpoint {
 343  		ep := n.NetworkLinkEndpoint
 344  		ep.SetOnCloseAction(nil)
 345  		// The link endpoint has to be closed without holding a
 346  		// netstack lock, because it can trigger other netstack
 347  		// operations.
 348  		deferAct = ep.Close
 349  	}
 350  
 351  	return deferAct, nil
 352  }
 353  
 354  // setPromiscuousMode enables or disables promiscuous mode.
 355  func (n *nic) setPromiscuousMode(enable bool) {
 356  	n.promiscuous.Store(enable)
 357  }
 358  
 359  // Promiscuous implements NetworkInterface.
 360  func (n *nic) Promiscuous() bool {
 361  	return n.promiscuous.Load()
 362  }
 363  
 364  // IsLoopback implements NetworkInterface.
 365  func (n *nic) IsLoopback() bool {
 366  	return n.NetworkLinkEndpoint.Capabilities()&CapabilityLoopback != 0
 367  }
 368  
 369  // WritePacket implements NetworkEndpoint.
 370  func (n *nic) WritePacket(r *Route, pkt *PacketBuffer) tcpip.Error {
 371  	routeInfo, _, err := r.resolvedFields(nil)
 372  	switch err.(type) {
 373  	case nil:
 374  		pkt.EgressRoute = routeInfo
 375  		return n.writePacket(pkt)
 376  	case *tcpip.ErrWouldBlock:
 377  		// As per relevant RFCs, we should queue packets while we wait for link
 378  		// resolution to complete.
 379  		//
 380  		// RFC 1122 section 2.3.2.2 (for IPv4):
 381  		//   The link layer SHOULD save (rather than discard) at least
 382  		//   one (the latest) packet of each set of packets destined to
 383  		//   the same unresolved IP address, and transmit the saved
 384  		//   packet when the address has been resolved.
 385  		//
 386  		// RFC 4861 section 7.2.2 (for IPv6):
 387  		//   While waiting for address resolution to complete, the sender MUST, for
 388  		//   each neighbor, retain a small queue of packets waiting for address
 389  		//   resolution to complete. The queue MUST hold at least one packet, and
 390  		//   MAY contain more. However, the number of queued packets per neighbor
 391  		//   SHOULD be limited to some small value. When a queue overflows, the new
 392  		//   arrival SHOULD replace the oldest entry. Once address resolution
 393  		//   completes, the node transmits any queued packets.
 394  		return n.linkResQueue.enqueue(r, pkt)
 395  	default:
 396  		return err
 397  	}
 398  }
 399  
 400  // WritePacketToRemote implements NetworkInterface.
 401  func (n *nic) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, pkt *PacketBuffer) tcpip.Error {
 402  	pkt.EgressRoute = RouteInfo{
 403  		routeInfo: routeInfo{
 404  			NetProto:         pkt.NetworkProtocolNumber,
 405  			LocalLinkAddress: n.LinkAddress(),
 406  		},
 407  		RemoteLinkAddress: remoteLinkAddr,
 408  	}
 409  	return n.writePacket(pkt)
 410  }
 411  
 412  func (n *nic) writePacket(pkt *PacketBuffer) tcpip.Error {
 413  	n.NetworkLinkEndpoint.AddHeader(pkt)
 414  	return n.writeRawPacket(pkt)
 415  }
 416  
 417  func (n *nic) writeRawPacketWithLinkHeaderInPayload(pkt *PacketBuffer) tcpip.Error {
 418  	if !n.NetworkLinkEndpoint.ParseHeader(pkt) {
 419  		return &tcpip.ErrMalformedHeader{}
 420  	}
 421  	return n.writeRawPacket(pkt)
 422  }
 423  
 424  func (n *nic) writeRawPacket(pkt *PacketBuffer) tcpip.Error {
 425  	// Always an outgoing packet.
 426  	pkt.PktType = tcpip.PacketOutgoing
 427  
 428  	if n.deliverLinkPackets {
 429  		n.DeliverLinkPacket(pkt.NetworkProtocolNumber, pkt)
 430  	}
 431  
 432  	if err := n.qDisc.WritePacket(pkt); err != nil {
 433  		if _, ok := err.(*tcpip.ErrNoBufferSpace); ok {
 434  			n.stats.txPacketsDroppedNoBufferSpace.Increment()
 435  		}
 436  		return err
 437  	}
 438  
 439  	n.stats.tx.packets.Increment()
 440  	n.stats.tx.bytes.IncrementBy(uint64(pkt.Size()))
 441  	return nil
 442  }
 443  
 444  // setSpoofing enables or disables address spoofing.
 445  func (n *nic) setSpoofing(enable bool) {
 446  	n.spoofing.Store(enable)
 447  }
 448  
 449  // Spoofing implements NetworkInterface.
 450  func (n *nic) Spoofing() bool {
 451  	return n.spoofing.Load()
 452  }
 453  
 454  // primaryAddress returns an address that can be used to communicate with
 455  // remoteAddr.
 456  func (n *nic) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr, srcHint tcpip.Address) AssignableAddressEndpoint {
 457  	ep := n.getNetworkEndpoint(protocol)
 458  	if ep == nil {
 459  		return nil
 460  	}
 461  
 462  	addressableEndpoint, ok := ep.(AddressableEndpoint)
 463  	if !ok {
 464  		return nil
 465  	}
 466  
 467  	return addressableEndpoint.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, n.Spoofing())
 468  }
 469  
 470  type getAddressBehaviour int
 471  
 472  const (
 473  	// spoofing indicates that the NIC's spoofing flag should be observed when
 474  	// getting a NIC's address endpoint.
 475  	spoofing getAddressBehaviour = iota
 476  
 477  	// promiscuous indicates that the NIC's promiscuous flag should be observed
 478  	// when getting a NIC's address endpoint.
 479  	promiscuous
 480  )
 481  
 482  func (n *nic) getAddress(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) AssignableAddressEndpoint {
 483  	return n.getAddressOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
 484  }
 485  
 486  func (n *nic) hasAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
 487  	ep := n.getAddressOrCreateTempInner(protocol, addr, false, NeverPrimaryEndpoint)
 488  	if ep != nil {
 489  		ep.DecRef()
 490  		return true
 491  	}
 492  
 493  	return false
 494  }
 495  
 496  // findEndpoint finds the endpoint, if any, with the given address.
 497  func (n *nic) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
 498  	return n.getAddressOrCreateTemp(protocol, address, peb, spoofing)
 499  }
 500  
 501  // getAddressEpOrCreateTemp returns the address endpoint for the given protocol
 502  // and address.
 503  //
 504  // If none exists a temporary one may be created if we are in promiscuous mode
 505  // or spoofing. Promiscuous mode will only be checked if promiscuous is true.
 506  // Similarly, spoofing will only be checked if spoofing is true.
 507  //
 508  // If the address is the IPv4 broadcast address for an endpoint's network, that
 509  // endpoint will be returned.
 510  func (n *nic) getAddressOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getAddressBehaviour) AssignableAddressEndpoint {
 511  	var spoofingOrPromiscuous bool
 512  	switch tempRef {
 513  	case spoofing:
 514  		spoofingOrPromiscuous = n.Spoofing()
 515  	case promiscuous:
 516  		spoofingOrPromiscuous = n.Promiscuous()
 517  	}
 518  	return n.getAddressOrCreateTempInner(protocol, address, spoofingOrPromiscuous, peb)
 519  }
 520  
 521  // getAddressOrCreateTempInner is like getAddressEpOrCreateTemp except a boolean
 522  // is passed to indicate whether or not we should generate temporary endpoints.
 523  func (n *nic) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, createTemp bool, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
 524  	ep := n.getNetworkEndpoint(protocol)
 525  	if ep == nil {
 526  		return nil
 527  	}
 528  
 529  	addressableEndpoint, ok := ep.(AddressableEndpoint)
 530  	if !ok {
 531  		return nil
 532  	}
 533  
 534  	return addressableEndpoint.AcquireAssignedAddress(address, createTemp, peb, false)
 535  }
 536  
 537  // addAddress adds a new address to n, so that it starts accepting packets
 538  // targeted at the given address (and network protocol).
 539  func (n *nic) addAddress(protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error {
 540  	ep := n.getNetworkEndpoint(protocolAddress.Protocol)
 541  	if ep == nil {
 542  		return &tcpip.ErrUnknownProtocol{}
 543  	}
 544  
 545  	addressableEndpoint, ok := ep.(AddressableEndpoint)
 546  	if !ok {
 547  		return &tcpip.ErrNotSupported{}
 548  	}
 549  
 550  	addressEndpoint, err := addressableEndpoint.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, properties)
 551  	if err == nil {
 552  		// We have no need for the address endpoint.
 553  		addressEndpoint.DecRef()
 554  	}
 555  	return err
 556  }
 557  
 558  // allPermanentAddresses returns all permanent addresses associated with
 559  // this NIC.
 560  func (n *nic) allPermanentAddresses() []tcpip.ProtocolAddress {
 561  	var addrs []tcpip.ProtocolAddress
 562  	for p, ep := range n.networkEndpoints {
 563  		addressableEndpoint, ok := ep.(AddressableEndpoint)
 564  		if !ok {
 565  			continue
 566  		}
 567  
 568  		for _, a := range addressableEndpoint.PermanentAddresses() {
 569  			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
 570  		}
 571  	}
 572  	return addrs
 573  }
 574  
 575  // primaryAddresses returns the primary addresses associated with this NIC.
 576  func (n *nic) primaryAddresses() []tcpip.ProtocolAddress {
 577  	var addrs []tcpip.ProtocolAddress
 578  
 579  	protocolNumbers := make([]tcpip.NetworkProtocolNumber, 0, len(n.networkEndpoints))
 580  	for p := range n.networkEndpoints {
 581  		protocolNumbers = append(protocolNumbers, p)
 582  	}
 583  	// Sort the network protocol numbers so that IPv4 address is always
 584  	// added to the list before IPv6 address.
 585  	sort.Slice(protocolNumbers, func(i, j int) bool {
 586  		return protocolNumbers[i] < protocolNumbers[j]
 587  	})
 588  
 589  	for _, p := range protocolNumbers {
 590  		addressableEndpoint, ok := n.networkEndpoints[p].(AddressableEndpoint)
 591  		if !ok {
 592  			continue
 593  		}
 594  		for _, a := range addressableEndpoint.PrimaryAddresses() {
 595  			addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
 596  		}
 597  	}
 598  	return addrs
 599  }
 600  
 601  // PrimaryAddress implements NetworkInterface.
 602  func (n *nic) PrimaryAddress(proto tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
 603  	ep := n.getNetworkEndpoint(proto)
 604  	if ep == nil {
 605  		return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownProtocol{}
 606  	}
 607  
 608  	addressableEndpoint, ok := ep.(AddressableEndpoint)
 609  	if !ok {
 610  		return tcpip.AddressWithPrefix{}, &tcpip.ErrNotSupported{}
 611  	}
 612  
 613  	return addressableEndpoint.MainAddress(), nil
 614  }
 615  
 616  // removeAddress removes an address from n.
 617  func (n *nic) removeAddress(addr tcpip.Address) tcpip.Error {
 618  	for _, ep := range n.networkEndpoints {
 619  		addressableEndpoint, ok := ep.(AddressableEndpoint)
 620  		if !ok {
 621  			continue
 622  		}
 623  
 624  		switch err := addressableEndpoint.RemovePermanentAddress(addr); err.(type) {
 625  		case *tcpip.ErrBadLocalAddress:
 626  			continue
 627  		default:
 628  			return err
 629  		}
 630  	}
 631  
 632  	return &tcpip.ErrBadLocalAddress{}
 633  }
 634  
 635  func (n *nic) setAddressLifetimes(addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error {
 636  	for _, ep := range n.networkEndpoints {
 637  		ep, ok := ep.(AddressableEndpoint)
 638  		if !ok {
 639  			continue
 640  		}
 641  
 642  		switch err := ep.SetLifetimes(addr, lifetimes); err.(type) {
 643  		case *tcpip.ErrBadLocalAddress:
 644  			continue
 645  		default:
 646  			return err
 647  		}
 648  	}
 649  
 650  	return &tcpip.ErrBadLocalAddress{}
 651  }
 652  
 653  func (n *nic) getLinkAddress(addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
 654  	linkRes, ok := n.linkAddrResolvers[protocol]
 655  	if !ok {
 656  		return &tcpip.ErrNotSupported{}
 657  	}
 658  
 659  	if linkAddr, ok := linkRes.resolver.ResolveStaticAddress(addr); ok {
 660  		onResolve(LinkResolutionResult{LinkAddress: linkAddr, Err: nil})
 661  		return nil
 662  	}
 663  
 664  	_, _, err := linkRes.neigh.entry(addr, localAddr, onResolve)
 665  	return err
 666  }
 667  
 668  func (n *nic) neighbors(protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
 669  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 670  		return linkRes.neigh.entries(), nil
 671  	}
 672  
 673  	return nil, &tcpip.ErrNotSupported{}
 674  }
 675  
 676  func (n *nic) addStaticNeighbor(addr tcpip.Address, protocol tcpip.NetworkProtocolNumber, linkAddress tcpip.LinkAddress) tcpip.Error {
 677  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 678  		linkRes.neigh.addStaticEntry(addr, linkAddress)
 679  		return nil
 680  	}
 681  
 682  	return &tcpip.ErrNotSupported{}
 683  }
 684  
 685  func (n *nic) removeNeighbor(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
 686  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 687  		if !linkRes.neigh.removeEntry(addr) {
 688  			return &tcpip.ErrBadAddress{}
 689  		}
 690  		return nil
 691  	}
 692  
 693  	return &tcpip.ErrNotSupported{}
 694  }
 695  
 696  func (n *nic) clearNeighbors(protocol tcpip.NetworkProtocolNumber) tcpip.Error {
 697  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 698  		linkRes.neigh.clear()
 699  		return nil
 700  	}
 701  
 702  	return &tcpip.ErrNotSupported{}
 703  }
 704  
 705  // joinGroup adds a new endpoint for the given multicast address, if none
 706  // exists yet. Otherwise it just increments its count.
 707  func (n *nic) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
 708  	// TODO(b/143102137): When implementing MLD, make sure MLD packets are
 709  	// not sent unless a valid link-local address is available for use on n
 710  	// as an MLD packet's source address must be a link-local address as
 711  	// outlined in RFC 3810 section 5.
 712  
 713  	ep := n.getNetworkEndpoint(protocol)
 714  	if ep == nil {
 715  		return &tcpip.ErrNotSupported{}
 716  	}
 717  
 718  	gep, ok := ep.(GroupAddressableEndpoint)
 719  	if !ok {
 720  		return &tcpip.ErrNotSupported{}
 721  	}
 722  
 723  	return gep.JoinGroup(addr)
 724  }
 725  
 726  // leaveGroup decrements the count for the given multicast address, and when it
 727  // reaches zero removes the endpoint for this address.
 728  func (n *nic) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
 729  	ep := n.getNetworkEndpoint(protocol)
 730  	if ep == nil {
 731  		return &tcpip.ErrNotSupported{}
 732  	}
 733  
 734  	gep, ok := ep.(GroupAddressableEndpoint)
 735  	if !ok {
 736  		return &tcpip.ErrNotSupported{}
 737  	}
 738  
 739  	return gep.LeaveGroup(addr)
 740  }
 741  
 742  // isInGroup returns true if n has joined the multicast group addr.
 743  func (n *nic) isInGroup(addr tcpip.Address) bool {
 744  	for _, ep := range n.networkEndpoints {
 745  		gep, ok := ep.(GroupAddressableEndpoint)
 746  		if !ok {
 747  			continue
 748  		}
 749  
 750  		if gep.IsInGroup(addr) {
 751  			return true
 752  		}
 753  	}
 754  
 755  	return false
 756  }
 757  
 758  // DeliverNetworkPacket finds the appropriate network protocol endpoint and
 759  // hands the packet over for further processing. This function is called when
 760  // the NIC receives a packet from the link endpoint.
 761  func (n *nic) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
 762  	enabled := n.Enabled()
 763  	// If the NIC is not yet enabled, don't receive any packets.
 764  	if !enabled {
 765  		n.stats.disabledRx.packets.Increment()
 766  		n.stats.disabledRx.bytes.IncrementBy(uint64(pkt.Data().Size()))
 767  		return
 768  	}
 769  
 770  	n.stats.rx.packets.Increment()
 771  	n.stats.rx.bytes.IncrementBy(uint64(pkt.Data().Size()))
 772  
 773  	networkEndpoint := n.getNetworkEndpoint(protocol)
 774  	if networkEndpoint == nil {
 775  		n.stats.unknownL3ProtocolRcvdPacketCounts.Increment(uint64(protocol))
 776  		return
 777  	}
 778  
 779  	pkt.RXChecksumValidated = n.NetworkLinkEndpoint.Capabilities()&CapabilityRXChecksumOffload != 0
 780  
 781  	if n.deliverLinkPackets {
 782  		n.DeliverLinkPacket(protocol, pkt)
 783  	}
 784  
 785  	networkEndpoint.HandlePacket(pkt)
 786  }
 787  
 788  func (n *nic) DeliverLinkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
 789  	// Deliver to interested packet endpoints without holding NIC lock.
 790  	var packetEPPkt *PacketBuffer
 791  	defer func() {
 792  		if packetEPPkt != nil {
 793  			packetEPPkt.DecRef()
 794  		}
 795  	}()
 796  	deliverPacketEPs := func(ep PacketEndpoint) {
 797  		if packetEPPkt == nil {
 798  			// Packet endpoints hold the full packet.
 799  			//
 800  			// We perform a deep copy because higher-level endpoints may point to
 801  			// the middle of a view that is held by a packet endpoint. Save/Restore
 802  			// does not support overlapping slices and will panic in this case.
 803  			//
 804  			// TODO(https://gvisor.dev/issue/6517): Avoid this copy once S/R supports
 805  			// overlapping slices (e.g. by passing a shallow copy of pkt to the packet
 806  			// endpoint).
 807  			packetEPPkt = NewPacketBuffer(PacketBufferOptions{
 808  				Payload: BufferSince(pkt.LinkHeader()),
 809  			})
 810  			// If a link header was populated in the original packet buffer, then
 811  			// populate it in the packet buffer we provide to packet endpoints as
 812  			// packet endpoints inspect link headers.
 813  			packetEPPkt.LinkHeader().Consume(len(pkt.LinkHeader().Slice()))
 814  			packetEPPkt.PktType = pkt.PktType
 815  			// Assume the packet is for us if the packet type is unset.
 816  			// The packet type is set to PacketOutgoing when sending packets so
 817  			// this may only be unset for incoming packets where link endpoints
 818  			// have not set it.
 819  			if packetEPPkt.PktType == 0 {
 820  				packetEPPkt.PktType = tcpip.PacketHost
 821  			}
 822  		}
 823  
 824  		clone := packetEPPkt.Clone()
 825  		defer clone.DecRef()
 826  		ep.HandlePacket(n.id, protocol, clone)
 827  	}
 828  
 829  	n.packetEPsMu.Lock()
 830  	// Are any packet type sockets listening for this network protocol?
 831  	protoEPs, protoEPsOK := n.packetEPs[protocol]
 832  	// Other packet type sockets that are listening for all protocols.
 833  	anyEPs, anyEPsOK := n.packetEPs[header.EthernetProtocolAll]
 834  	n.packetEPsMu.Unlock()
 835  
 836  	// On Linux, only ETH_P_ALL endpoints get outbound packets.
 837  	if pkt.PktType != tcpip.PacketOutgoing && protoEPsOK {
 838  		protoEPs.forEach(deliverPacketEPs)
 839  	}
 840  	if anyEPsOK {
 841  		anyEPs.forEach(deliverPacketEPs)
 842  	}
 843  }
 844  
 845  // DeliverTransportPacket delivers the packets to the appropriate transport
 846  // protocol endpoint.
 847  func (n *nic) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
 848  	state, ok := n.stack.transportProtocols[protocol]
 849  	if !ok {
 850  		n.stats.unknownL4ProtocolRcvdPacketCounts.Increment(uint64(protocol))
 851  		return TransportPacketProtocolUnreachable
 852  	}
 853  
 854  	transProto := state.proto
 855  
 856  	if len(pkt.TransportHeader().Slice()) == 0 {
 857  		n.stats.malformedL4RcvdPackets.Increment()
 858  		return TransportPacketHandled
 859  	}
 860  
 861  	srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().Slice())
 862  	if err != nil {
 863  		n.stats.malformedL4RcvdPackets.Increment()
 864  		return TransportPacketHandled
 865  	}
 866  
 867  	netProto, ok := n.stack.networkProtocols[pkt.NetworkProtocolNumber]
 868  	if !ok {
 869  		panic(fmt.Sprintf("expected network protocol = %d, have = %#v", pkt.NetworkProtocolNumber, n.stack.networkProtocolNumbers()))
 870  	}
 871  
 872  	src, dst := netProto.ParseAddresses(pkt.NetworkHeader().Slice())
 873  	id := TransportEndpointID{
 874  		LocalPort:     dstPort,
 875  		LocalAddress:  dst,
 876  		RemotePort:    srcPort,
 877  		RemoteAddress: src,
 878  	}
 879  	if n.stack.demux.deliverPacket(protocol, pkt, id) {
 880  		return TransportPacketHandled
 881  	}
 882  
 883  	// Try to deliver to per-stack default handler.
 884  	if state.defaultHandler != nil {
 885  		if state.defaultHandler(id, pkt) {
 886  			return TransportPacketHandled
 887  		}
 888  	}
 889  
 890  	// We could not find an appropriate destination for this packet so
 891  	// give the protocol specific error handler a chance to handle it.
 892  	// If it doesn't handle it then we should do so.
 893  	switch res := transProto.HandleUnknownDestinationPacket(id, pkt); res {
 894  	case UnknownDestinationPacketMalformed:
 895  		n.stats.malformedL4RcvdPackets.Increment()
 896  		return TransportPacketHandled
 897  	case UnknownDestinationPacketUnhandled:
 898  		return TransportPacketDestinationPortUnreachable
 899  	case UnknownDestinationPacketHandled:
 900  		return TransportPacketHandled
 901  	default:
 902  		panic(fmt.Sprintf("unrecognized result from HandleUnknownDestinationPacket = %d", res))
 903  	}
 904  }
 905  
 906  // DeliverTransportError implements TransportDispatcher.
 907  func (n *nic) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer) {
 908  	state, ok := n.stack.transportProtocols[trans]
 909  	if !ok {
 910  		return
 911  	}
 912  
 913  	transProto := state.proto
 914  
 915  	// ICMPv4 only guarantees that 8 bytes of the transport protocol will
 916  	// be present in the payload. We know that the ports are within the
 917  	// first 8 bytes for all known transport protocols.
 918  	transHeader, ok := pkt.Data().PullUp(8)
 919  	if !ok {
 920  		return
 921  	}
 922  
 923  	srcPort, dstPort, err := transProto.ParsePorts(transHeader)
 924  	if err != nil {
 925  		return
 926  	}
 927  
 928  	id := TransportEndpointID{srcPort, local, dstPort, remote}
 929  	if n.stack.demux.deliverError(n, net, trans, transErr, pkt, id) {
 930  		return
 931  	}
 932  }
 933  
 934  // DeliverRawPacket implements TransportDispatcher.
 935  func (n *nic) DeliverRawPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
 936  	// For ICMPv4 only we validate the header length for compatibility with
 937  	// raw(7) ICMP_FILTER. The same check is made in Linux here:
 938  	// https://github.com/torvalds/linux/blob/70585216/net/ipv4/raw.c#L189.
 939  	if protocol == header.ICMPv4ProtocolNumber && len(pkt.TransportHeader().Slice())+pkt.Data().Size() < header.ICMPv4MinimumSize {
 940  		return
 941  	}
 942  	n.stack.demux.deliverRawPacket(protocol, pkt)
 943  }
 944  
 945  // ID implements NetworkInterface.
 946  func (n *nic) ID() tcpip.NICID {
 947  	return n.id
 948  }
 949  
 950  // Name implements NetworkInterface.
 951  func (n *nic) Name() string {
 952  	return n.name
 953  }
 954  
 955  // nudConfigs gets the NUD configurations for n.
 956  func (n *nic) nudConfigs(protocol tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
 957  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 958  		return linkRes.neigh.config(), nil
 959  	}
 960  
 961  	return NUDConfigurations{}, &tcpip.ErrNotSupported{}
 962  }
 963  
 964  // setNUDConfigs sets the NUD configurations for n.
 965  //
 966  // Note, if c contains invalid NUD configuration values, it will be fixed to
 967  // use default values for the erroneous values.
 968  func (n *nic) setNUDConfigs(protocol tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
 969  	if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
 970  		c.resetInvalidFields()
 971  		linkRes.neigh.setConfig(c)
 972  		return nil
 973  	}
 974  
 975  	return &tcpip.ErrNotSupported{}
 976  }
 977  
 978  func (n *nic) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
 979  	n.packetEPsMu.Lock()
 980  	defer n.packetEPsMu.Unlock()
 981  
 982  	eps, ok := n.packetEPs[netProto]
 983  	if !ok {
 984  		eps = new(packetEndpointList)
 985  		n.packetEPs[netProto] = eps
 986  	}
 987  	eps.add(ep)
 988  }
 989  
 990  func (n *nic) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
 991  	n.packetEPsMu.Lock()
 992  	defer n.packetEPsMu.Unlock()
 993  
 994  	eps, ok := n.packetEPs[netProto]
 995  	if !ok {
 996  		return
 997  	}
 998  	eps.remove(ep)
 999  	if eps.len() == 0 {
1000  		delete(n.packetEPs, netProto)
1001  	}
1002  }
1003  
1004  // isValidForOutgoing returns true if the endpoint can be used to send out a
1005  // packet. It requires the endpoint to not be marked expired (i.e., its address
1006  // has been removed) unless the NIC is in spoofing mode, or temporary.
1007  func (n *nic) isValidForOutgoing(ep AssignableAddressEndpoint) bool {
1008  	return n.Enabled() && ep.IsAssigned(n.Spoofing())
1009  }
1010  
1011  // HandleNeighborProbe implements NetworkInterface.
1012  func (n *nic) HandleNeighborProbe(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
1013  	if l, ok := n.linkAddrResolvers[protocol]; ok {
1014  		l.neigh.handleProbe(addr, linkAddr)
1015  		return nil
1016  	}
1017  
1018  	return &tcpip.ErrNotSupported{}
1019  }
1020  
1021  // HandleNeighborConfirmation implements NetworkInterface.
1022  func (n *nic) HandleNeighborConfirmation(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) tcpip.Error {
1023  	if l, ok := n.linkAddrResolvers[protocol]; ok {
1024  		l.neigh.handleConfirmation(addr, linkAddr, flags)
1025  		return nil
1026  	}
1027  
1028  	return &tcpip.ErrNotSupported{}
1029  }
1030  
1031  // CheckLocalAddress implements NetworkInterface.
1032  func (n *nic) CheckLocalAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
1033  	if n.Spoofing() {
1034  		return true
1035  	}
1036  
1037  	if addressEndpoint := n.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint); addressEndpoint != nil {
1038  		addressEndpoint.DecRef()
1039  		return true
1040  	}
1041  
1042  	return false
1043  }
1044  
1045  func (n *nic) checkDuplicateAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
1046  	d, ok := n.duplicateAddressDetectors[protocol]
1047  	if !ok {
1048  		return 0, &tcpip.ErrNotSupported{}
1049  	}
1050  
1051  	return d.CheckDuplicateAddress(addr, h), nil
1052  }
1053  
1054  func (n *nic) setForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
1055  	ep := n.getNetworkEndpoint(protocol)
1056  	if ep == nil {
1057  		return false, &tcpip.ErrUnknownProtocol{}
1058  	}
1059  
1060  	forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
1061  	if !ok {
1062  		return false, &tcpip.ErrNotSupported{}
1063  	}
1064  
1065  	return forwardingEP.SetForwarding(enable), nil
1066  }
1067  
1068  func (n *nic) forwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
1069  	ep := n.getNetworkEndpoint(protocol)
1070  	if ep == nil {
1071  		return false, &tcpip.ErrUnknownProtocol{}
1072  	}
1073  
1074  	forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
1075  	if !ok {
1076  		return false, &tcpip.ErrNotSupported{}
1077  	}
1078  
1079  	return forwardingEP.Forwarding(), nil
1080  }
1081  
1082  func (n *nic) multicastForwardingEndpoint(protocol tcpip.NetworkProtocolNumber) (MulticastForwardingNetworkEndpoint, tcpip.Error) {
1083  	ep := n.getNetworkEndpoint(protocol)
1084  	if ep == nil {
1085  		return nil, &tcpip.ErrUnknownProtocol{}
1086  	}
1087  
1088  	forwardingEP, ok := ep.(MulticastForwardingNetworkEndpoint)
1089  	if !ok {
1090  		return nil, &tcpip.ErrNotSupported{}
1091  	}
1092  
1093  	return forwardingEP, nil
1094  }
1095  
1096  func (n *nic) setMulticastForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
1097  	ep, err := n.multicastForwardingEndpoint(protocol)
1098  	if err != nil {
1099  		return false, err
1100  	}
1101  
1102  	return ep.SetMulticastForwarding(enable), nil
1103  }
1104  
1105  func (n *nic) multicastForwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
1106  	ep, err := n.multicastForwardingEndpoint(protocol)
1107  	if err != nil {
1108  		return false, err
1109  	}
1110  
1111  	return ep.MulticastForwarding(), nil
1112  }
1113  
1114  // GetExperimentIPOptionEnabled returns whether the NIC is responsible for
1115  // passing the experiment IP option.
1116  func (n *nic) GetExperimentIPOptionEnabled() bool {
1117  	return n.experimentIPOptionEnabled
1118  }
1119  
1120  // CoordinatorNIC represents NetworkLinkEndpoint that can join multiple network devices.
1121  type CoordinatorNIC interface {
1122  	// AddNIC adds the specified NIC device.
1123  	AddNIC(n *nic) tcpip.Error
1124  	// DelNIC deletes the specified NIC device.
1125  	DelNIC(n *nic) tcpip.Error
1126  }
1127