nic.go raw
1 // Copyright 2018 The gVisor Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 package stack
16
17 import (
18 "fmt"
19 "reflect"
20 "sort"
21
22 "gvisor.dev/gvisor/pkg/atomicbitops"
23 "gvisor.dev/gvisor/pkg/tcpip"
24 "gvisor.dev/gvisor/pkg/tcpip/header"
25 )
26
27 // +stateify savable
28 type linkResolver struct {
29 resolver LinkAddressResolver
30
31 neigh neighborCache
32 }
33
34 var _ NetworkInterface = (*nic)(nil)
35 var _ NetworkDispatcher = (*nic)(nil)
36
37 // nic represents a "network interface card" to which the networking stack is
38 // attached.
39 //
40 // +stateify savable
41 type nic struct {
42 NetworkLinkEndpoint
43
44 stack *Stack
45 id tcpip.NICID
46 name string
47 context NICContext
48
49 stats sharedStats
50
51 // enableDisableMu is used to synchronize attempts to enable/disable the NIC.
52 // Without this mutex, calls to enable/disable the NIC may interleave and
53 // leave the NIC in an inconsistent state.
54 enableDisableMu nicRWMutex `state:"nosave"`
55
56 // The network endpoints themselves may be modified by calling the interface's
57 // methods, but the map reference and entries must be constant.
58 networkEndpoints map[tcpip.NetworkProtocolNumber]NetworkEndpoint
59 linkAddrResolvers map[tcpip.NetworkProtocolNumber]*linkResolver
60 duplicateAddressDetectors map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector
61
62 // enabled indicates whether the NIC is enabled.
63 enabled atomicbitops.Bool
64
65 // spoofing indicates whether the NIC is spoofing.
66 spoofing atomicbitops.Bool
67
68 // promiscuous indicates whether the NIC is promiscuous.
69 promiscuous atomicbitops.Bool
70
71 // linkResQueue holds packets that are waiting for link resolution to
72 // complete.
73 linkResQueue packetsPendingLinkResolution
74
75 // packetEPsMu protects annotated fields below.
76 packetEPsMu packetEPsRWMutex `state:"nosave"`
77
78 // eps is protected by the mutex, but the values contained in it are not.
79 //
80 // +checklocks:packetEPsMu
81 packetEPs map[tcpip.NetworkProtocolNumber]*packetEndpointList
82
83 qDisc QueueingDiscipline
84
85 // deliverLinkPackets specifies whether this NIC delivers packets to
86 // packet sockets. It is immutable.
87 //
88 // deliverLinkPackets is off by default because some users already
89 // deliver link packets by explicitly calling nic.DeliverLinkPackets.
90 deliverLinkPackets bool
91
92 // Primary is the main controlling interface in a bonded setup.
93 Primary *nic
94
95 // experimentIPOptionEnabled indicates whether the NIC supports the
96 // experiment IP option.
97 experimentIPOptionEnabled bool
98 }
99
100 // makeNICStats initializes the NIC statistics and associates them to the global
101 // NIC statistics.
102 func makeNICStats(global tcpip.NICStats) sharedStats {
103 var stats sharedStats
104 tcpip.InitStatCounters(reflect.ValueOf(&stats.local).Elem())
105 stats.init(&stats.local, &global)
106 return stats
107 }
108
109 // +stateify savable
110 type packetEndpointList struct {
111 mu packetEndpointListRWMutex `state:"nosave"`
112
113 // eps is protected by mu, but the contained PacketEndpoint values are not.
114 //
115 // +checklocks:mu
116 eps []PacketEndpoint
117 }
118
119 func (p *packetEndpointList) add(ep PacketEndpoint) {
120 p.mu.Lock()
121 defer p.mu.Unlock()
122 p.eps = append(p.eps, ep)
123 }
124
125 func (p *packetEndpointList) remove(ep PacketEndpoint) {
126 p.mu.Lock()
127 defer p.mu.Unlock()
128 for i, epOther := range p.eps {
129 if epOther == ep {
130 p.eps = append(p.eps[:i], p.eps[i+1:]...)
131 break
132 }
133 }
134 }
135
136 func (p *packetEndpointList) len() int {
137 p.mu.RLock()
138 defer p.mu.RUnlock()
139 return len(p.eps)
140 }
141
142 // forEach calls fn with each endpoints in p while holding the read lock on p.
143 func (p *packetEndpointList) forEach(fn func(PacketEndpoint)) {
144 p.mu.RLock()
145 defer p.mu.RUnlock()
146 for _, ep := range p.eps {
147 fn(ep)
148 }
149 }
150
151 var _ QueueingDiscipline = (*delegatingQueueingDiscipline)(nil)
152
153 // +stateify savable
154 type delegatingQueueingDiscipline struct {
155 LinkWriter
156 }
157
158 func (*delegatingQueueingDiscipline) Close() {}
159
160 // WritePacket passes the packet through to the underlying LinkWriter's WritePackets.
161 func (qDisc *delegatingQueueingDiscipline) WritePacket(pkt *PacketBuffer) tcpip.Error {
162 var pkts PacketBufferList
163 pkts.PushBack(pkt)
164 _, err := qDisc.LinkWriter.WritePackets(pkts)
165 return err
166 }
167
168 // newNIC returns a new NIC using the default NDP configurations from stack.
169 func newNIC(stack *Stack, id tcpip.NICID, ep LinkEndpoint, opts NICOptions) *nic {
170 // TODO(b/141011931): Validate a LinkEndpoint (ep) is valid. For
171 // example, make sure that the link address it provides is a valid
172 // unicast ethernet address.
173
174 // If no queueing discipline was specified provide a stub implementation that
175 // just delegates to the lower link endpoint.
176 qDisc := opts.QDisc
177 if qDisc == nil {
178 qDisc = &delegatingQueueingDiscipline{LinkWriter: ep}
179 }
180
181 // TODO(b/143357959): RFC 8200 section 5 requires that IPv6 endpoints
182 // observe an MTU of at least 1280 bytes. Ensure that this requirement
183 // of IPv6 is supported on this endpoint's LinkEndpoint.
184 nic := &nic{
185 NetworkLinkEndpoint: ep,
186 stack: stack,
187 id: id,
188 name: opts.Name,
189 context: opts.Context,
190 stats: makeNICStats(stack.Stats().NICs),
191 networkEndpoints: make(map[tcpip.NetworkProtocolNumber]NetworkEndpoint),
192 linkAddrResolvers: make(map[tcpip.NetworkProtocolNumber]*linkResolver),
193 duplicateAddressDetectors: make(map[tcpip.NetworkProtocolNumber]DuplicateAddressDetector),
194 qDisc: qDisc,
195 deliverLinkPackets: opts.DeliverLinkPackets,
196 experimentIPOptionEnabled: opts.EnableExperimentIPOption,
197 }
198 nic.linkResQueue.init(nic)
199
200 nic.packetEPsMu.Lock()
201 defer nic.packetEPsMu.Unlock()
202
203 nic.packetEPs = make(map[tcpip.NetworkProtocolNumber]*packetEndpointList)
204
205 resolutionRequired := ep.Capabilities()&CapabilityResolutionRequired != 0
206
207 for _, netProto := range stack.networkProtocols {
208 netNum := netProto.Number()
209 netEP := netProto.NewEndpoint(nic, nic)
210 nic.networkEndpoints[netNum] = netEP
211
212 if resolutionRequired {
213 if r, ok := netEP.(LinkAddressResolver); ok {
214 l := &linkResolver{resolver: r}
215 l.neigh.init(nic, r)
216 nic.linkAddrResolvers[r.LinkAddressProtocol()] = l
217 }
218 }
219
220 if d, ok := netEP.(DuplicateAddressDetector); ok {
221 nic.duplicateAddressDetectors[d.DuplicateAddressProtocol()] = d
222 }
223 }
224
225 nic.NetworkLinkEndpoint.Attach(nic)
226
227 return nic
228 }
229
230 func (n *nic) getNetworkEndpoint(proto tcpip.NetworkProtocolNumber) NetworkEndpoint {
231 return n.networkEndpoints[proto]
232 }
233
234 // Enabled implements NetworkInterface.
235 func (n *nic) Enabled() bool {
236 return n.enabled.Load()
237 }
238
239 // setEnabled sets the enabled status for the NIC.
240 //
241 // Returns true if the enabled status was updated.
242 //
243 // +checklocks:n.enableDisableMu
244 func (n *nic) setEnabled(v bool) bool {
245 return n.enabled.Swap(v) != v
246 }
247
248 // disable disables n.
249 //
250 // It undoes the work done by enable.
251 func (n *nic) disable() {
252 n.enableDisableMu.Lock()
253 defer n.enableDisableMu.Unlock()
254 n.disableLocked()
255 }
256
257 // disableLocked disables n.
258 //
259 // It undoes the work done by enable.
260 //
261 // +checklocks:n.enableDisableMu
262 func (n *nic) disableLocked() {
263 if !n.Enabled() {
264 return
265 }
266
267 // TODO(gvisor.dev/issue/1491): Should Routes that are currently bound to n be
268 // invalidated? Currently, Routes will continue to work when a NIC is enabled
269 // again, and applications may not know that the underlying NIC was ever
270 // disabled.
271
272 for _, ep := range n.networkEndpoints {
273 ep.Disable()
274
275 // Clear the neighbour table (including static entries) as we cannot
276 // guarantee that the current neighbour table will be valid when the NIC is
277 // enabled again.
278 //
279 // This matches linux's behaviour at the time of writing:
280 // https://github.com/torvalds/linux/blob/71c061d2443814de15e177489d5cc00a4a253ef3/net/core/neighbour.c#L371
281 netProto := ep.NetworkProtocolNumber()
282 switch err := n.clearNeighbors(netProto); err.(type) {
283 case nil, *tcpip.ErrNotSupported:
284 default:
285 panic(fmt.Sprintf("n.clearNeighbors(%d): %s", netProto, err))
286 }
287 }
288
289 if !n.setEnabled(false) {
290 panic("should have only done work to disable the NIC if it was enabled")
291 }
292 }
293
294 // enable enables n.
295 //
296 // If the stack has IPv6 enabled, enable will join the IPv6 All-Nodes Multicast
297 // address (ff02::1), start DAD for permanent addresses, and start soliciting
298 // routers if the stack is not operating as a router. If the stack is also
299 // configured to auto-generate a link-local address, one will be generated.
300 func (n *nic) enable() tcpip.Error {
301 n.enableDisableMu.Lock()
302 defer n.enableDisableMu.Unlock()
303
304 if !n.setEnabled(true) {
305 return nil
306 }
307
308 for _, ep := range n.networkEndpoints {
309 if err := ep.Enable(); err != nil {
310 return err
311 }
312 }
313
314 return nil
315 }
316
317 // remove detaches NIC from the link endpoint and releases network endpoint
318 // resources. This guarantees no packets between this NIC and the network
319 // stack.
320 //
321 // It returns an action that has to be excuted after releasing the Stack lock
322 // and any error encountered.
323 func (n *nic) remove(closeLinkEndpoint bool) (func(), tcpip.Error) {
324 n.enableDisableMu.Lock()
325
326 n.disableLocked()
327
328 for _, ep := range n.networkEndpoints {
329 ep.Close()
330 }
331
332 n.enableDisableMu.Unlock()
333
334 // Drain and drop any packets pending link resolution.
335 // We must not hold n.enableDisableMu here.
336 n.linkResQueue.cancel()
337
338 var deferAct func()
339 // Prevent packets from going down to the link before shutting the link down.
340 n.qDisc.Close()
341 n.NetworkLinkEndpoint.Attach(nil)
342 if closeLinkEndpoint {
343 ep := n.NetworkLinkEndpoint
344 ep.SetOnCloseAction(nil)
345 // The link endpoint has to be closed without holding a
346 // netstack lock, because it can trigger other netstack
347 // operations.
348 deferAct = ep.Close
349 }
350
351 return deferAct, nil
352 }
353
354 // setPromiscuousMode enables or disables promiscuous mode.
355 func (n *nic) setPromiscuousMode(enable bool) {
356 n.promiscuous.Store(enable)
357 }
358
359 // Promiscuous implements NetworkInterface.
360 func (n *nic) Promiscuous() bool {
361 return n.promiscuous.Load()
362 }
363
364 // IsLoopback implements NetworkInterface.
365 func (n *nic) IsLoopback() bool {
366 return n.NetworkLinkEndpoint.Capabilities()&CapabilityLoopback != 0
367 }
368
369 // WritePacket implements NetworkEndpoint.
370 func (n *nic) WritePacket(r *Route, pkt *PacketBuffer) tcpip.Error {
371 routeInfo, _, err := r.resolvedFields(nil)
372 switch err.(type) {
373 case nil:
374 pkt.EgressRoute = routeInfo
375 return n.writePacket(pkt)
376 case *tcpip.ErrWouldBlock:
377 // As per relevant RFCs, we should queue packets while we wait for link
378 // resolution to complete.
379 //
380 // RFC 1122 section 2.3.2.2 (for IPv4):
381 // The link layer SHOULD save (rather than discard) at least
382 // one (the latest) packet of each set of packets destined to
383 // the same unresolved IP address, and transmit the saved
384 // packet when the address has been resolved.
385 //
386 // RFC 4861 section 7.2.2 (for IPv6):
387 // While waiting for address resolution to complete, the sender MUST, for
388 // each neighbor, retain a small queue of packets waiting for address
389 // resolution to complete. The queue MUST hold at least one packet, and
390 // MAY contain more. However, the number of queued packets per neighbor
391 // SHOULD be limited to some small value. When a queue overflows, the new
392 // arrival SHOULD replace the oldest entry. Once address resolution
393 // completes, the node transmits any queued packets.
394 return n.linkResQueue.enqueue(r, pkt)
395 default:
396 return err
397 }
398 }
399
400 // WritePacketToRemote implements NetworkInterface.
401 func (n *nic) WritePacketToRemote(remoteLinkAddr tcpip.LinkAddress, pkt *PacketBuffer) tcpip.Error {
402 pkt.EgressRoute = RouteInfo{
403 routeInfo: routeInfo{
404 NetProto: pkt.NetworkProtocolNumber,
405 LocalLinkAddress: n.LinkAddress(),
406 },
407 RemoteLinkAddress: remoteLinkAddr,
408 }
409 return n.writePacket(pkt)
410 }
411
412 func (n *nic) writePacket(pkt *PacketBuffer) tcpip.Error {
413 n.NetworkLinkEndpoint.AddHeader(pkt)
414 return n.writeRawPacket(pkt)
415 }
416
417 func (n *nic) writeRawPacketWithLinkHeaderInPayload(pkt *PacketBuffer) tcpip.Error {
418 if !n.NetworkLinkEndpoint.ParseHeader(pkt) {
419 return &tcpip.ErrMalformedHeader{}
420 }
421 return n.writeRawPacket(pkt)
422 }
423
424 func (n *nic) writeRawPacket(pkt *PacketBuffer) tcpip.Error {
425 // Always an outgoing packet.
426 pkt.PktType = tcpip.PacketOutgoing
427
428 if n.deliverLinkPackets {
429 n.DeliverLinkPacket(pkt.NetworkProtocolNumber, pkt)
430 }
431
432 if err := n.qDisc.WritePacket(pkt); err != nil {
433 if _, ok := err.(*tcpip.ErrNoBufferSpace); ok {
434 n.stats.txPacketsDroppedNoBufferSpace.Increment()
435 }
436 return err
437 }
438
439 n.stats.tx.packets.Increment()
440 n.stats.tx.bytes.IncrementBy(uint64(pkt.Size()))
441 return nil
442 }
443
444 // setSpoofing enables or disables address spoofing.
445 func (n *nic) setSpoofing(enable bool) {
446 n.spoofing.Store(enable)
447 }
448
449 // Spoofing implements NetworkInterface.
450 func (n *nic) Spoofing() bool {
451 return n.spoofing.Load()
452 }
453
454 // primaryAddress returns an address that can be used to communicate with
455 // remoteAddr.
456 func (n *nic) primaryEndpoint(protocol tcpip.NetworkProtocolNumber, remoteAddr, srcHint tcpip.Address) AssignableAddressEndpoint {
457 ep := n.getNetworkEndpoint(protocol)
458 if ep == nil {
459 return nil
460 }
461
462 addressableEndpoint, ok := ep.(AddressableEndpoint)
463 if !ok {
464 return nil
465 }
466
467 return addressableEndpoint.AcquireOutgoingPrimaryAddress(remoteAddr, srcHint, n.Spoofing())
468 }
469
470 type getAddressBehaviour int
471
472 const (
473 // spoofing indicates that the NIC's spoofing flag should be observed when
474 // getting a NIC's address endpoint.
475 spoofing getAddressBehaviour = iota
476
477 // promiscuous indicates that the NIC's promiscuous flag should be observed
478 // when getting a NIC's address endpoint.
479 promiscuous
480 )
481
482 func (n *nic) getAddress(protocol tcpip.NetworkProtocolNumber, dst tcpip.Address) AssignableAddressEndpoint {
483 return n.getAddressOrCreateTemp(protocol, dst, CanBePrimaryEndpoint, promiscuous)
484 }
485
486 func (n *nic) hasAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
487 ep := n.getAddressOrCreateTempInner(protocol, addr, false, NeverPrimaryEndpoint)
488 if ep != nil {
489 ep.DecRef()
490 return true
491 }
492
493 return false
494 }
495
496 // findEndpoint finds the endpoint, if any, with the given address.
497 func (n *nic) findEndpoint(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
498 return n.getAddressOrCreateTemp(protocol, address, peb, spoofing)
499 }
500
501 // getAddressEpOrCreateTemp returns the address endpoint for the given protocol
502 // and address.
503 //
504 // If none exists a temporary one may be created if we are in promiscuous mode
505 // or spoofing. Promiscuous mode will only be checked if promiscuous is true.
506 // Similarly, spoofing will only be checked if spoofing is true.
507 //
508 // If the address is the IPv4 broadcast address for an endpoint's network, that
509 // endpoint will be returned.
510 func (n *nic) getAddressOrCreateTemp(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, peb PrimaryEndpointBehavior, tempRef getAddressBehaviour) AssignableAddressEndpoint {
511 var spoofingOrPromiscuous bool
512 switch tempRef {
513 case spoofing:
514 spoofingOrPromiscuous = n.Spoofing()
515 case promiscuous:
516 spoofingOrPromiscuous = n.Promiscuous()
517 }
518 return n.getAddressOrCreateTempInner(protocol, address, spoofingOrPromiscuous, peb)
519 }
520
521 // getAddressOrCreateTempInner is like getAddressEpOrCreateTemp except a boolean
522 // is passed to indicate whether or not we should generate temporary endpoints.
523 func (n *nic) getAddressOrCreateTempInner(protocol tcpip.NetworkProtocolNumber, address tcpip.Address, createTemp bool, peb PrimaryEndpointBehavior) AssignableAddressEndpoint {
524 ep := n.getNetworkEndpoint(protocol)
525 if ep == nil {
526 return nil
527 }
528
529 addressableEndpoint, ok := ep.(AddressableEndpoint)
530 if !ok {
531 return nil
532 }
533
534 return addressableEndpoint.AcquireAssignedAddress(address, createTemp, peb, false)
535 }
536
537 // addAddress adds a new address to n, so that it starts accepting packets
538 // targeted at the given address (and network protocol).
539 func (n *nic) addAddress(protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error {
540 ep := n.getNetworkEndpoint(protocolAddress.Protocol)
541 if ep == nil {
542 return &tcpip.ErrUnknownProtocol{}
543 }
544
545 addressableEndpoint, ok := ep.(AddressableEndpoint)
546 if !ok {
547 return &tcpip.ErrNotSupported{}
548 }
549
550 addressEndpoint, err := addressableEndpoint.AddAndAcquirePermanentAddress(protocolAddress.AddressWithPrefix, properties)
551 if err == nil {
552 // We have no need for the address endpoint.
553 addressEndpoint.DecRef()
554 }
555 return err
556 }
557
558 // allPermanentAddresses returns all permanent addresses associated with
559 // this NIC.
560 func (n *nic) allPermanentAddresses() []tcpip.ProtocolAddress {
561 var addrs []tcpip.ProtocolAddress
562 for p, ep := range n.networkEndpoints {
563 addressableEndpoint, ok := ep.(AddressableEndpoint)
564 if !ok {
565 continue
566 }
567
568 for _, a := range addressableEndpoint.PermanentAddresses() {
569 addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
570 }
571 }
572 return addrs
573 }
574
575 // primaryAddresses returns the primary addresses associated with this NIC.
576 func (n *nic) primaryAddresses() []tcpip.ProtocolAddress {
577 var addrs []tcpip.ProtocolAddress
578
579 protocolNumbers := make([]tcpip.NetworkProtocolNumber, 0, len(n.networkEndpoints))
580 for p := range n.networkEndpoints {
581 protocolNumbers = append(protocolNumbers, p)
582 }
583 // Sort the network protocol numbers so that IPv4 address is always
584 // added to the list before IPv6 address.
585 sort.Slice(protocolNumbers, func(i, j int) bool {
586 return protocolNumbers[i] < protocolNumbers[j]
587 })
588
589 for _, p := range protocolNumbers {
590 addressableEndpoint, ok := n.networkEndpoints[p].(AddressableEndpoint)
591 if !ok {
592 continue
593 }
594 for _, a := range addressableEndpoint.PrimaryAddresses() {
595 addrs = append(addrs, tcpip.ProtocolAddress{Protocol: p, AddressWithPrefix: a})
596 }
597 }
598 return addrs
599 }
600
601 // PrimaryAddress implements NetworkInterface.
602 func (n *nic) PrimaryAddress(proto tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
603 ep := n.getNetworkEndpoint(proto)
604 if ep == nil {
605 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownProtocol{}
606 }
607
608 addressableEndpoint, ok := ep.(AddressableEndpoint)
609 if !ok {
610 return tcpip.AddressWithPrefix{}, &tcpip.ErrNotSupported{}
611 }
612
613 return addressableEndpoint.MainAddress(), nil
614 }
615
616 // removeAddress removes an address from n.
617 func (n *nic) removeAddress(addr tcpip.Address) tcpip.Error {
618 for _, ep := range n.networkEndpoints {
619 addressableEndpoint, ok := ep.(AddressableEndpoint)
620 if !ok {
621 continue
622 }
623
624 switch err := addressableEndpoint.RemovePermanentAddress(addr); err.(type) {
625 case *tcpip.ErrBadLocalAddress:
626 continue
627 default:
628 return err
629 }
630 }
631
632 return &tcpip.ErrBadLocalAddress{}
633 }
634
635 func (n *nic) setAddressLifetimes(addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error {
636 for _, ep := range n.networkEndpoints {
637 ep, ok := ep.(AddressableEndpoint)
638 if !ok {
639 continue
640 }
641
642 switch err := ep.SetLifetimes(addr, lifetimes); err.(type) {
643 case *tcpip.ErrBadLocalAddress:
644 continue
645 default:
646 return err
647 }
648 }
649
650 return &tcpip.ErrBadLocalAddress{}
651 }
652
653 func (n *nic) getLinkAddress(addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
654 linkRes, ok := n.linkAddrResolvers[protocol]
655 if !ok {
656 return &tcpip.ErrNotSupported{}
657 }
658
659 if linkAddr, ok := linkRes.resolver.ResolveStaticAddress(addr); ok {
660 onResolve(LinkResolutionResult{LinkAddress: linkAddr, Err: nil})
661 return nil
662 }
663
664 _, _, err := linkRes.neigh.entry(addr, localAddr, onResolve)
665 return err
666 }
667
668 func (n *nic) neighbors(protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
669 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
670 return linkRes.neigh.entries(), nil
671 }
672
673 return nil, &tcpip.ErrNotSupported{}
674 }
675
676 func (n *nic) addStaticNeighbor(addr tcpip.Address, protocol tcpip.NetworkProtocolNumber, linkAddress tcpip.LinkAddress) tcpip.Error {
677 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
678 linkRes.neigh.addStaticEntry(addr, linkAddress)
679 return nil
680 }
681
682 return &tcpip.ErrNotSupported{}
683 }
684
685 func (n *nic) removeNeighbor(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
686 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
687 if !linkRes.neigh.removeEntry(addr) {
688 return &tcpip.ErrBadAddress{}
689 }
690 return nil
691 }
692
693 return &tcpip.ErrNotSupported{}
694 }
695
696 func (n *nic) clearNeighbors(protocol tcpip.NetworkProtocolNumber) tcpip.Error {
697 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
698 linkRes.neigh.clear()
699 return nil
700 }
701
702 return &tcpip.ErrNotSupported{}
703 }
704
705 // joinGroup adds a new endpoint for the given multicast address, if none
706 // exists yet. Otherwise it just increments its count.
707 func (n *nic) joinGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
708 // TODO(b/143102137): When implementing MLD, make sure MLD packets are
709 // not sent unless a valid link-local address is available for use on n
710 // as an MLD packet's source address must be a link-local address as
711 // outlined in RFC 3810 section 5.
712
713 ep := n.getNetworkEndpoint(protocol)
714 if ep == nil {
715 return &tcpip.ErrNotSupported{}
716 }
717
718 gep, ok := ep.(GroupAddressableEndpoint)
719 if !ok {
720 return &tcpip.ErrNotSupported{}
721 }
722
723 return gep.JoinGroup(addr)
724 }
725
726 // leaveGroup decrements the count for the given multicast address, and when it
727 // reaches zero removes the endpoint for this address.
728 func (n *nic) leaveGroup(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
729 ep := n.getNetworkEndpoint(protocol)
730 if ep == nil {
731 return &tcpip.ErrNotSupported{}
732 }
733
734 gep, ok := ep.(GroupAddressableEndpoint)
735 if !ok {
736 return &tcpip.ErrNotSupported{}
737 }
738
739 return gep.LeaveGroup(addr)
740 }
741
742 // isInGroup returns true if n has joined the multicast group addr.
743 func (n *nic) isInGroup(addr tcpip.Address) bool {
744 for _, ep := range n.networkEndpoints {
745 gep, ok := ep.(GroupAddressableEndpoint)
746 if !ok {
747 continue
748 }
749
750 if gep.IsInGroup(addr) {
751 return true
752 }
753 }
754
755 return false
756 }
757
758 // DeliverNetworkPacket finds the appropriate network protocol endpoint and
759 // hands the packet over for further processing. This function is called when
760 // the NIC receives a packet from the link endpoint.
761 func (n *nic) DeliverNetworkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
762 enabled := n.Enabled()
763 // If the NIC is not yet enabled, don't receive any packets.
764 if !enabled {
765 n.stats.disabledRx.packets.Increment()
766 n.stats.disabledRx.bytes.IncrementBy(uint64(pkt.Data().Size()))
767 return
768 }
769
770 n.stats.rx.packets.Increment()
771 n.stats.rx.bytes.IncrementBy(uint64(pkt.Data().Size()))
772
773 networkEndpoint := n.getNetworkEndpoint(protocol)
774 if networkEndpoint == nil {
775 n.stats.unknownL3ProtocolRcvdPacketCounts.Increment(uint64(protocol))
776 return
777 }
778
779 pkt.RXChecksumValidated = n.NetworkLinkEndpoint.Capabilities()&CapabilityRXChecksumOffload != 0
780
781 if n.deliverLinkPackets {
782 n.DeliverLinkPacket(protocol, pkt)
783 }
784
785 networkEndpoint.HandlePacket(pkt)
786 }
787
788 func (n *nic) DeliverLinkPacket(protocol tcpip.NetworkProtocolNumber, pkt *PacketBuffer) {
789 // Deliver to interested packet endpoints without holding NIC lock.
790 var packetEPPkt *PacketBuffer
791 defer func() {
792 if packetEPPkt != nil {
793 packetEPPkt.DecRef()
794 }
795 }()
796 deliverPacketEPs := func(ep PacketEndpoint) {
797 if packetEPPkt == nil {
798 // Packet endpoints hold the full packet.
799 //
800 // We perform a deep copy because higher-level endpoints may point to
801 // the middle of a view that is held by a packet endpoint. Save/Restore
802 // does not support overlapping slices and will panic in this case.
803 //
804 // TODO(https://gvisor.dev/issue/6517): Avoid this copy once S/R supports
805 // overlapping slices (e.g. by passing a shallow copy of pkt to the packet
806 // endpoint).
807 packetEPPkt = NewPacketBuffer(PacketBufferOptions{
808 Payload: BufferSince(pkt.LinkHeader()),
809 })
810 // If a link header was populated in the original packet buffer, then
811 // populate it in the packet buffer we provide to packet endpoints as
812 // packet endpoints inspect link headers.
813 packetEPPkt.LinkHeader().Consume(len(pkt.LinkHeader().Slice()))
814 packetEPPkt.PktType = pkt.PktType
815 // Assume the packet is for us if the packet type is unset.
816 // The packet type is set to PacketOutgoing when sending packets so
817 // this may only be unset for incoming packets where link endpoints
818 // have not set it.
819 if packetEPPkt.PktType == 0 {
820 packetEPPkt.PktType = tcpip.PacketHost
821 }
822 }
823
824 clone := packetEPPkt.Clone()
825 defer clone.DecRef()
826 ep.HandlePacket(n.id, protocol, clone)
827 }
828
829 n.packetEPsMu.Lock()
830 // Are any packet type sockets listening for this network protocol?
831 protoEPs, protoEPsOK := n.packetEPs[protocol]
832 // Other packet type sockets that are listening for all protocols.
833 anyEPs, anyEPsOK := n.packetEPs[header.EthernetProtocolAll]
834 n.packetEPsMu.Unlock()
835
836 // On Linux, only ETH_P_ALL endpoints get outbound packets.
837 if pkt.PktType != tcpip.PacketOutgoing && protoEPsOK {
838 protoEPs.forEach(deliverPacketEPs)
839 }
840 if anyEPsOK {
841 anyEPs.forEach(deliverPacketEPs)
842 }
843 }
844
845 // DeliverTransportPacket delivers the packets to the appropriate transport
846 // protocol endpoint.
847 func (n *nic) DeliverTransportPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) TransportPacketDisposition {
848 state, ok := n.stack.transportProtocols[protocol]
849 if !ok {
850 n.stats.unknownL4ProtocolRcvdPacketCounts.Increment(uint64(protocol))
851 return TransportPacketProtocolUnreachable
852 }
853
854 transProto := state.proto
855
856 if len(pkt.TransportHeader().Slice()) == 0 {
857 n.stats.malformedL4RcvdPackets.Increment()
858 return TransportPacketHandled
859 }
860
861 srcPort, dstPort, err := transProto.ParsePorts(pkt.TransportHeader().Slice())
862 if err != nil {
863 n.stats.malformedL4RcvdPackets.Increment()
864 return TransportPacketHandled
865 }
866
867 netProto, ok := n.stack.networkProtocols[pkt.NetworkProtocolNumber]
868 if !ok {
869 panic(fmt.Sprintf("expected network protocol = %d, have = %#v", pkt.NetworkProtocolNumber, n.stack.networkProtocolNumbers()))
870 }
871
872 src, dst := netProto.ParseAddresses(pkt.NetworkHeader().Slice())
873 id := TransportEndpointID{
874 LocalPort: dstPort,
875 LocalAddress: dst,
876 RemotePort: srcPort,
877 RemoteAddress: src,
878 }
879 if n.stack.demux.deliverPacket(protocol, pkt, id) {
880 return TransportPacketHandled
881 }
882
883 // Try to deliver to per-stack default handler.
884 if state.defaultHandler != nil {
885 if state.defaultHandler(id, pkt) {
886 return TransportPacketHandled
887 }
888 }
889
890 // We could not find an appropriate destination for this packet so
891 // give the protocol specific error handler a chance to handle it.
892 // If it doesn't handle it then we should do so.
893 switch res := transProto.HandleUnknownDestinationPacket(id, pkt); res {
894 case UnknownDestinationPacketMalformed:
895 n.stats.malformedL4RcvdPackets.Increment()
896 return TransportPacketHandled
897 case UnknownDestinationPacketUnhandled:
898 return TransportPacketDestinationPortUnreachable
899 case UnknownDestinationPacketHandled:
900 return TransportPacketHandled
901 default:
902 panic(fmt.Sprintf("unrecognized result from HandleUnknownDestinationPacket = %d", res))
903 }
904 }
905
906 // DeliverTransportError implements TransportDispatcher.
907 func (n *nic) DeliverTransportError(local, remote tcpip.Address, net tcpip.NetworkProtocolNumber, trans tcpip.TransportProtocolNumber, transErr TransportError, pkt *PacketBuffer) {
908 state, ok := n.stack.transportProtocols[trans]
909 if !ok {
910 return
911 }
912
913 transProto := state.proto
914
915 // ICMPv4 only guarantees that 8 bytes of the transport protocol will
916 // be present in the payload. We know that the ports are within the
917 // first 8 bytes for all known transport protocols.
918 transHeader, ok := pkt.Data().PullUp(8)
919 if !ok {
920 return
921 }
922
923 srcPort, dstPort, err := transProto.ParsePorts(transHeader)
924 if err != nil {
925 return
926 }
927
928 id := TransportEndpointID{srcPort, local, dstPort, remote}
929 if n.stack.demux.deliverError(n, net, trans, transErr, pkt, id) {
930 return
931 }
932 }
933
934 // DeliverRawPacket implements TransportDispatcher.
935 func (n *nic) DeliverRawPacket(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) {
936 // For ICMPv4 only we validate the header length for compatibility with
937 // raw(7) ICMP_FILTER. The same check is made in Linux here:
938 // https://github.com/torvalds/linux/blob/70585216/net/ipv4/raw.c#L189.
939 if protocol == header.ICMPv4ProtocolNumber && len(pkt.TransportHeader().Slice())+pkt.Data().Size() < header.ICMPv4MinimumSize {
940 return
941 }
942 n.stack.demux.deliverRawPacket(protocol, pkt)
943 }
944
945 // ID implements NetworkInterface.
946 func (n *nic) ID() tcpip.NICID {
947 return n.id
948 }
949
950 // Name implements NetworkInterface.
951 func (n *nic) Name() string {
952 return n.name
953 }
954
955 // nudConfigs gets the NUD configurations for n.
956 func (n *nic) nudConfigs(protocol tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
957 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
958 return linkRes.neigh.config(), nil
959 }
960
961 return NUDConfigurations{}, &tcpip.ErrNotSupported{}
962 }
963
964 // setNUDConfigs sets the NUD configurations for n.
965 //
966 // Note, if c contains invalid NUD configuration values, it will be fixed to
967 // use default values for the erroneous values.
968 func (n *nic) setNUDConfigs(protocol tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
969 if linkRes, ok := n.linkAddrResolvers[protocol]; ok {
970 c.resetInvalidFields()
971 linkRes.neigh.setConfig(c)
972 return nil
973 }
974
975 return &tcpip.ErrNotSupported{}
976 }
977
978 func (n *nic) registerPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
979 n.packetEPsMu.Lock()
980 defer n.packetEPsMu.Unlock()
981
982 eps, ok := n.packetEPs[netProto]
983 if !ok {
984 eps = new(packetEndpointList)
985 n.packetEPs[netProto] = eps
986 }
987 eps.add(ep)
988 }
989
990 func (n *nic) unregisterPacketEndpoint(netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
991 n.packetEPsMu.Lock()
992 defer n.packetEPsMu.Unlock()
993
994 eps, ok := n.packetEPs[netProto]
995 if !ok {
996 return
997 }
998 eps.remove(ep)
999 if eps.len() == 0 {
1000 delete(n.packetEPs, netProto)
1001 }
1002 }
1003
1004 // isValidForOutgoing returns true if the endpoint can be used to send out a
1005 // packet. It requires the endpoint to not be marked expired (i.e., its address
1006 // has been removed) unless the NIC is in spoofing mode, or temporary.
1007 func (n *nic) isValidForOutgoing(ep AssignableAddressEndpoint) bool {
1008 return n.Enabled() && ep.IsAssigned(n.Spoofing())
1009 }
1010
1011 // HandleNeighborProbe implements NetworkInterface.
1012 func (n *nic) HandleNeighborProbe(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
1013 if l, ok := n.linkAddrResolvers[protocol]; ok {
1014 l.neigh.handleProbe(addr, linkAddr)
1015 return nil
1016 }
1017
1018 return &tcpip.ErrNotSupported{}
1019 }
1020
1021 // HandleNeighborConfirmation implements NetworkInterface.
1022 func (n *nic) HandleNeighborConfirmation(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress, flags ReachabilityConfirmationFlags) tcpip.Error {
1023 if l, ok := n.linkAddrResolvers[protocol]; ok {
1024 l.neigh.handleConfirmation(addr, linkAddr, flags)
1025 return nil
1026 }
1027
1028 return &tcpip.ErrNotSupported{}
1029 }
1030
1031 // CheckLocalAddress implements NetworkInterface.
1032 func (n *nic) CheckLocalAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
1033 if n.Spoofing() {
1034 return true
1035 }
1036
1037 if addressEndpoint := n.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint); addressEndpoint != nil {
1038 addressEndpoint.DecRef()
1039 return true
1040 }
1041
1042 return false
1043 }
1044
1045 func (n *nic) checkDuplicateAddress(protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
1046 d, ok := n.duplicateAddressDetectors[protocol]
1047 if !ok {
1048 return 0, &tcpip.ErrNotSupported{}
1049 }
1050
1051 return d.CheckDuplicateAddress(addr, h), nil
1052 }
1053
1054 func (n *nic) setForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
1055 ep := n.getNetworkEndpoint(protocol)
1056 if ep == nil {
1057 return false, &tcpip.ErrUnknownProtocol{}
1058 }
1059
1060 forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
1061 if !ok {
1062 return false, &tcpip.ErrNotSupported{}
1063 }
1064
1065 return forwardingEP.SetForwarding(enable), nil
1066 }
1067
1068 func (n *nic) forwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
1069 ep := n.getNetworkEndpoint(protocol)
1070 if ep == nil {
1071 return false, &tcpip.ErrUnknownProtocol{}
1072 }
1073
1074 forwardingEP, ok := ep.(ForwardingNetworkEndpoint)
1075 if !ok {
1076 return false, &tcpip.ErrNotSupported{}
1077 }
1078
1079 return forwardingEP.Forwarding(), nil
1080 }
1081
1082 func (n *nic) multicastForwardingEndpoint(protocol tcpip.NetworkProtocolNumber) (MulticastForwardingNetworkEndpoint, tcpip.Error) {
1083 ep := n.getNetworkEndpoint(protocol)
1084 if ep == nil {
1085 return nil, &tcpip.ErrUnknownProtocol{}
1086 }
1087
1088 forwardingEP, ok := ep.(MulticastForwardingNetworkEndpoint)
1089 if !ok {
1090 return nil, &tcpip.ErrNotSupported{}
1091 }
1092
1093 return forwardingEP, nil
1094 }
1095
1096 func (n *nic) setMulticastForwarding(protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
1097 ep, err := n.multicastForwardingEndpoint(protocol)
1098 if err != nil {
1099 return false, err
1100 }
1101
1102 return ep.SetMulticastForwarding(enable), nil
1103 }
1104
1105 func (n *nic) multicastForwarding(protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
1106 ep, err := n.multicastForwardingEndpoint(protocol)
1107 if err != nil {
1108 return false, err
1109 }
1110
1111 return ep.MulticastForwarding(), nil
1112 }
1113
1114 // GetExperimentIPOptionEnabled returns whether the NIC is responsible for
1115 // passing the experiment IP option.
1116 func (n *nic) GetExperimentIPOptionEnabled() bool {
1117 return n.experimentIPOptionEnabled
1118 }
1119
1120 // CoordinatorNIC represents NetworkLinkEndpoint that can join multiple network devices.
1121 type CoordinatorNIC interface {
1122 // AddNIC adds the specified NIC device.
1123 AddNIC(n *nic) tcpip.Error
1124 // DelNIC deletes the specified NIC device.
1125 DelNIC(n *nic) tcpip.Error
1126 }
1127