1 // Copyright 2018 The gVisor Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 15 // Package stack provides the glue between networking protocols and the
16 // consumers of the networking stack.
17 //
18 // For consumers, the only function of interest is New(), everything else is
19 // provided by the tcpip/public package.
20 package stack
21 22 import (
23 "context"
24 "encoding/binary"
25 "fmt"
26 "io"
27 "math/rand"
28 "time"
29 30 "golang.org/x/time/rate"
31 "gvisor.dev/gvisor/pkg/atomicbitops"
32 "gvisor.dev/gvisor/pkg/buffer"
33 "gvisor.dev/gvisor/pkg/log"
34 cryptorand "gvisor.dev/gvisor/pkg/rand"
35 "gvisor.dev/gvisor/pkg/tcpip"
36 "gvisor.dev/gvisor/pkg/tcpip/header"
37 "gvisor.dev/gvisor/pkg/tcpip/ports"
38 "gvisor.dev/gvisor/pkg/waiter"
39 )
40 41 const (
42 // DefaultTOS is the default type of service value for network endpoints.
43 DefaultTOS = 0
44 )
45 46 // +stateify savable
47 type transportProtocolState struct {
48 proto TransportProtocol
49 defaultHandler func(id TransportEndpointID, pkt *PacketBuffer) bool `state:"nosave"`
50 }
51 52 // RestoredEndpoint is an endpoint that needs to be restored.
53 type RestoredEndpoint interface {
54 // Restore restores an endpoint. This can be used to restart background
55 // workers such as protocol goroutines. This must be called after all
56 // indirect dependencies of the endpoint has been restored, which
57 // generally implies at the end of the restore process.
58 Restore(*Stack)
59 }
60 61 // ResumableEndpoint is an endpoint that needs to be resumed after save.
62 type ResumableEndpoint interface {
63 // Resume resumes an endpoint.
64 Resume()
65 }
66 67 var netRawMissingLogger = log.BasicRateLimitedLogger(time.Minute)
68 69 // Stack is a networking stack, with all supported protocols, NICs, and route
70 // table.
71 //
72 // LOCK ORDERING: mu > routeMu.
73 //
74 // +stateify savable
75 type Stack struct {
76 transportProtocols map[tcpip.TransportProtocolNumber]*transportProtocolState
77 networkProtocols map[tcpip.NetworkProtocolNumber]NetworkProtocol
78 79 // rawFactory creates raw endpoints. If nil, raw endpoints are
80 // disabled. It is set during Stack creation and is immutable.
81 rawFactory RawFactory
82 packetEndpointWriteSupported bool
83 84 demux *transportDemuxer
85 86 stats tcpip.Stats
87 88 // routeMu protects annotated fields below.
89 routeMu routeStackRWMutex `state:"nosave"`
90 91 // routeTable is a list of routes sorted by prefix length, longest (most specific) first.
92 // +checklocks:routeMu
93 routeTable tcpip.RouteList `state:"nosave"`
94 95 mu stackRWMutex `state:"nosave"`
96 // +checklocks:mu
97 nics map[tcpip.NICID]*nic `state:"nosave"`
98 // +checklocks:mu
99 defaultForwardingEnabled map[tcpip.NetworkProtocolNumber]struct{}
100 101 // nicIDGen is used to generate NIC IDs.
102 nicIDGen atomicbitops.Int32 `state:"nosave"`
103 104 // cleanupEndpointsMu protects cleanupEndpoints.
105 cleanupEndpointsMu cleanupEndpointsMutex `state:"nosave"`
106 // +checklocks:cleanupEndpointsMu
107 cleanupEndpoints map[TransportEndpoint]struct{}
108 109 *ports.PortManager
110 111 // clock is used to generate user-visible times.
112 clock tcpip.Clock
113 114 // handleLocal allows non-loopback interfaces to loop packets.
115 handleLocal bool
116 117 // tables are the iptables packet filtering and manipulation rules.
118 // TODO(gvisor.dev/issue/4595): S/R this field.
119 tables *IPTables `state:"nosave"`
120 121 // restoredEndpoints is a list of endpoints that need to be restored if the
122 // stack is being restored.
123 restoredEndpoints []RestoredEndpoint
124 125 // resumableEndpoints is a list of endpoints that need to be resumed
126 // after save.
127 resumableEndpoints []ResumableEndpoint
128 129 // icmpRateLimiter is a global rate limiter for all ICMP messages generated
130 // by the stack.
131 icmpRateLimiter *ICMPRateLimiter
132 133 // seed is a one-time random value initialized at stack startup.
134 //
135 // TODO(gvisor.dev/issue/940): S/R this field.
136 seed uint32
137 138 // nudConfigs is the default NUD configurations used by interfaces.
139 nudConfigs NUDConfigurations
140 141 // nudDisp is the NUD event dispatcher that is used to send the netstack
142 // integrator NUD related events.
143 nudDisp NUDDispatcher
144 145 // randomGenerator is an injectable pseudo random generator that can be
146 // used when a random number is required. It must not be used in
147 // security-sensitive contexts.
148 insecureRNG *rand.Rand `state:"nosave"`
149 150 // secureRNG is a cryptographically secure random number generator.
151 secureRNG cryptorand.RNG `state:"nosave"`
152 153 // sendBufferSize holds the min/default/max send buffer sizes for
154 // endpoints other than TCP.
155 sendBufferSize tcpip.SendBufferSizeOption
156 157 // receiveBufferSize holds the min/default/max receive buffer sizes for
158 // endpoints other than TCP.
159 receiveBufferSize tcpip.ReceiveBufferSizeOption
160 161 // tcpInvalidRateLimit is the maximal rate for sending duplicate
162 // acknowledgements in response to incoming TCP packets that are for an existing
163 // connection but that are invalid due to any of the following reasons:
164 //
165 // a) out-of-window sequence number.
166 // b) out-of-window acknowledgement number.
167 // c) PAWS check failure (when implemented).
168 //
169 // This is required to prevent potential ACK loops.
170 // Setting this to 0 will disable all rate limiting.
171 tcpInvalidRateLimit time.Duration
172 173 // tsOffsetSecret is the secret key for generating timestamp offsets
174 // initialized at stack startup.
175 tsOffsetSecret uint32
176 177 // saveRestoreEnabled indicates whether the stack is saved and restored.
178 saveRestoreEnabled bool
179 }
180 181 // NetworkProtocolFactory instantiates a network protocol.
182 //
183 // NetworkProtocolFactory must not attempt to modify the stack, it may only
184 // query the stack.
185 type NetworkProtocolFactory func(*Stack) NetworkProtocol
186 187 // TransportProtocolFactory instantiates a transport protocol.
188 //
189 // TransportProtocolFactory must not attempt to modify the stack, it may only
190 // query the stack.
191 type TransportProtocolFactory func(*Stack) TransportProtocol
192 193 // Options contains optional Stack configuration.
194 type Options struct {
195 // NetworkProtocols lists the network protocols to enable.
196 NetworkProtocols []NetworkProtocolFactory
197 198 // TransportProtocols lists the transport protocols to enable.
199 TransportProtocols []TransportProtocolFactory
200 201 // Clock is an optional clock used for timekeeping.
202 //
203 // If Clock is nil, tcpip.NewStdClock() will be used.
204 Clock tcpip.Clock
205 206 // Stats are optional statistic counters.
207 Stats tcpip.Stats
208 209 // HandleLocal indicates whether packets destined to their source
210 // should be handled by the stack internally (true) or outside the
211 // stack (false).
212 HandleLocal bool
213 214 // NUDConfigs is the default NUD configurations used by interfaces.
215 NUDConfigs NUDConfigurations
216 217 // NUDDisp is the NUD event dispatcher that an integrator can provide to
218 // receive NUD related events.
219 NUDDisp NUDDispatcher
220 221 // RawFactory produces raw endpoints. Raw endpoints are enabled only if
222 // this is non-nil.
223 RawFactory RawFactory
224 225 // AllowPacketEndpointWrite determines if packet endpoints support write
226 // operations.
227 AllowPacketEndpointWrite bool
228 229 // RandSource is an optional source to use to generate random
230 // numbers. If omitted it defaults to a Source seeded by the data
231 // returned by the stack secure RNG.
232 //
233 // RandSource must be thread-safe.
234 RandSource rand.Source
235 236 // IPTables are the initial iptables rules. If nil, DefaultIPTables will be
237 // used to construct the initial iptables rules.
238 // all traffic.
239 IPTables *IPTables
240 241 // DefaultIPTables is an optional iptables rules constructor that is called
242 // if IPTables is nil. If both fields are nil, iptables will allow all
243 // traffic.
244 DefaultIPTables func(clock tcpip.Clock, rand *rand.Rand) *IPTables
245 246 // SecureRNG is a cryptographically secure random number generator.
247 SecureRNG io.Reader
248 }
249 250 // TransportEndpointInfo holds useful information about a transport endpoint
251 // which can be queried by monitoring tools.
252 //
253 // +stateify savable
254 type TransportEndpointInfo struct {
255 // The following fields are initialized at creation time and are
256 // immutable.
257 258 NetProto tcpip.NetworkProtocolNumber
259 TransProto tcpip.TransportProtocolNumber
260 261 // The following fields are protected by endpoint mu.
262 263 ID TransportEndpointID
264 // BindNICID and bindAddr are set via calls to Bind(). They are used to
265 // reject attempts to send data or connect via a different NIC or
266 // address
267 BindNICID tcpip.NICID
268 BindAddr tcpip.Address
269 // RegisterNICID is the default NICID registered as a side-effect of
270 // connect or datagram write.
271 RegisterNICID tcpip.NICID
272 }
273 274 // AddrNetProtoLocked unwraps the specified address if it is a V4-mapped V6
275 // address and returns the network protocol number to be used to communicate
276 // with the specified address. It returns an error if the passed address is
277 // incompatible with the receiver.
278 //
279 // Preconditon: the parent endpoint mu must be held while calling this method.
280 func (t *TransportEndpointInfo) AddrNetProtoLocked(addr tcpip.FullAddress, v6only bool, bind bool) (tcpip.FullAddress, tcpip.NetworkProtocolNumber, tcpip.Error) {
281 netProto := t.NetProto
282 switch addr.Addr.BitLen() {
283 case header.IPv4AddressSizeBits:
284 netProto = header.IPv4ProtocolNumber
285 case header.IPv6AddressSizeBits:
286 if header.IsV4MappedAddress(addr.Addr) {
287 netProto = header.IPv4ProtocolNumber
288 addr.Addr = tcpip.AddrFrom4Slice(addr.Addr.AsSlice()[header.IPv6AddressSize-header.IPv4AddressSize:])
289 if addr.Addr == header.IPv4Any {
290 addr.Addr = tcpip.Address{}
291 }
292 }
293 }
294 295 switch t.ID.LocalAddress.BitLen() {
296 case header.IPv4AddressSizeBits:
297 if addr.Addr.BitLen() == header.IPv6AddressSizeBits {
298 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
299 }
300 case header.IPv6AddressSizeBits:
301 if addr.Addr.BitLen() == header.IPv4AddressSizeBits {
302 return tcpip.FullAddress{}, 0, &tcpip.ErrNetworkUnreachable{}
303 }
304 }
305 306 if !bind && addr.Addr.Unspecified() {
307 // If the destination address isn't set, Linux sets it to the
308 // source address. If a source address isn't set either, it
309 // sets both to the loopback address.
310 if t.ID.LocalAddress.Unspecified() {
311 switch netProto {
312 case header.IPv4ProtocolNumber:
313 addr.Addr = header.IPv4Loopback
314 case header.IPv6ProtocolNumber:
315 addr.Addr = header.IPv6Loopback
316 }
317 } else {
318 addr.Addr = t.ID.LocalAddress
319 }
320 }
321 322 switch {
323 case netProto == t.NetProto:
324 case netProto == header.IPv4ProtocolNumber && t.NetProto == header.IPv6ProtocolNumber:
325 if v6only {
326 return tcpip.FullAddress{}, 0, &tcpip.ErrHostUnreachable{}
327 }
328 default:
329 return tcpip.FullAddress{}, 0, &tcpip.ErrInvalidEndpointState{}
330 }
331 332 return addr, netProto, nil
333 }
334 335 // IsEndpointInfo is an empty method to implement the tcpip.EndpointInfo
336 // marker interface.
337 func (*TransportEndpointInfo) IsEndpointInfo() {}
338 339 // New allocates a new networking stack with only the requested networking and
340 // transport protocols configured with default options.
341 //
342 // Note, NDPConfigurations will be fixed before being used by the Stack. That
343 // is, if an invalid value was provided, it will be reset to the default value.
344 //
345 // Protocol options can be changed by calling the
346 // SetNetworkProtocolOption/SetTransportProtocolOption methods provided by the
347 // stack. Please refer to individual protocol implementations as to what options
348 // are supported.
349 func New(opts Options) *Stack {
350 clock := opts.Clock
351 if clock == nil {
352 clock = tcpip.NewStdClock()
353 }
354 355 if opts.SecureRNG == nil {
356 opts.SecureRNG = cryptorand.Reader
357 }
358 secureRNG := cryptorand.RNGFrom(opts.SecureRNG)
359 360 randSrc := opts.RandSource
361 if randSrc == nil {
362 var v int64
363 if err := binary.Read(opts.SecureRNG, binary.LittleEndian, &v); err != nil {
364 panic(err)
365 }
366 // Source provided by rand.NewSource is not thread-safe so
367 // we wrap it in a simple thread-safe version.
368 randSrc = &lockedRandomSource{src: rand.NewSource(v)}
369 }
370 insecureRNG := rand.New(randSrc)
371 372 if opts.IPTables == nil {
373 if opts.DefaultIPTables == nil {
374 opts.DefaultIPTables = DefaultTables
375 }
376 opts.IPTables = opts.DefaultIPTables(clock, insecureRNG)
377 }
378 379 opts.NUDConfigs.resetInvalidFields()
380 381 s := &Stack{
382 transportProtocols: make(map[tcpip.TransportProtocolNumber]*transportProtocolState),
383 networkProtocols: make(map[tcpip.NetworkProtocolNumber]NetworkProtocol),
384 nics: make(map[tcpip.NICID]*nic),
385 packetEndpointWriteSupported: opts.AllowPacketEndpointWrite,
386 defaultForwardingEnabled: make(map[tcpip.NetworkProtocolNumber]struct{}),
387 cleanupEndpoints: make(map[TransportEndpoint]struct{}),
388 PortManager: ports.NewPortManager(),
389 clock: clock,
390 stats: opts.Stats.FillIn(),
391 handleLocal: opts.HandleLocal,
392 tables: opts.IPTables,
393 icmpRateLimiter: NewICMPRateLimiter(clock),
394 seed: secureRNG.Uint32(),
395 nudConfigs: opts.NUDConfigs,
396 nudDisp: opts.NUDDisp,
397 insecureRNG: insecureRNG,
398 secureRNG: secureRNG,
399 sendBufferSize: tcpip.SendBufferSizeOption{
400 Min: MinBufferSize,
401 Default: DefaultBufferSize,
402 Max: DefaultMaxBufferSize,
403 },
404 receiveBufferSize: tcpip.ReceiveBufferSizeOption{
405 Min: MinBufferSize,
406 Default: DefaultBufferSize,
407 Max: DefaultMaxBufferSize,
408 },
409 tcpInvalidRateLimit: defaultTCPInvalidRateLimit,
410 tsOffsetSecret: secureRNG.Uint32(),
411 }
412 413 // Add specified network protocols.
414 for _, netProtoFactory := range opts.NetworkProtocols {
415 netProto := netProtoFactory(s)
416 s.networkProtocols[netProto.Number()] = netProto
417 }
418 419 // Add specified transport protocols.
420 for _, transProtoFactory := range opts.TransportProtocols {
421 transProto := transProtoFactory(s)
422 s.transportProtocols[transProto.Number()] = &transportProtocolState{
423 proto: transProto,
424 }
425 }
426 427 // Add the factory for raw endpoints, if present.
428 s.rawFactory = opts.RawFactory
429 430 // Create the global transport demuxer.
431 s.demux = newTransportDemuxer(s)
432 433 return s
434 }
435 436 // NextNICID allocates the next available NIC ID and returns it.
437 func (s *Stack) NextNICID() tcpip.NICID {
438 next := s.nicIDGen.Add(1)
439 if next < 0 {
440 panic("NICID overflow")
441 }
442 return tcpip.NICID(next)
443 }
444 445 // SetNetworkProtocolOption allows configuring individual protocol level
446 // options. This method returns an error if the protocol is not supported or
447 // option is not supported by the protocol implementation or the provided value
448 // is incorrect.
449 func (s *Stack) SetNetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.SettableNetworkProtocolOption) tcpip.Error {
450 netProto, ok := s.networkProtocols[network]
451 if !ok {
452 return &tcpip.ErrUnknownProtocol{}
453 }
454 return netProto.SetOption(option)
455 }
456 457 // NetworkProtocolOption allows retrieving individual protocol level option
458 // values. This method returns an error if the protocol is not supported or
459 // option is not supported by the protocol implementation. E.g.:
460 //
461 // var v ipv4.MyOption
462 // err := s.NetworkProtocolOption(tcpip.IPv4ProtocolNumber, &v)
463 // if err != nil {
464 // ...
465 // }
466 func (s *Stack) NetworkProtocolOption(network tcpip.NetworkProtocolNumber, option tcpip.GettableNetworkProtocolOption) tcpip.Error {
467 netProto, ok := s.networkProtocols[network]
468 if !ok {
469 return &tcpip.ErrUnknownProtocol{}
470 }
471 return netProto.Option(option)
472 }
473 474 // SetTransportProtocolOption allows configuring individual protocol level
475 // options. This method returns an error if the protocol is not supported or
476 // option is not supported by the protocol implementation or the provided value
477 // is incorrect.
478 func (s *Stack) SetTransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.SettableTransportProtocolOption) tcpip.Error {
479 transProtoState, ok := s.transportProtocols[transport]
480 if !ok {
481 return &tcpip.ErrUnknownProtocol{}
482 }
483 return transProtoState.proto.SetOption(option)
484 }
485 486 // TransportProtocolOption allows retrieving individual protocol level option
487 // values. This method returns an error if the protocol is not supported or
488 // option is not supported by the protocol implementation.
489 //
490 // var v tcp.SACKEnabled
491 // if err := s.TransportProtocolOption(tcpip.TCPProtocolNumber, &v); err != nil {
492 // ...
493 // }
494 func (s *Stack) TransportProtocolOption(transport tcpip.TransportProtocolNumber, option tcpip.GettableTransportProtocolOption) tcpip.Error {
495 transProtoState, ok := s.transportProtocols[transport]
496 if !ok {
497 return &tcpip.ErrUnknownProtocol{}
498 }
499 return transProtoState.proto.Option(option)
500 }
501 502 // SendBufSizeProto is a protocol that can return its send buffer size.
503 type SendBufSizeProto interface {
504 SendBufferSize() tcpip.TCPSendBufferSizeRangeOption
505 }
506 507 // TCPSendBufferLimits returns the TCP send buffer size limit.
508 func (s *Stack) TCPSendBufferLimits() tcpip.TCPSendBufferSizeRangeOption {
509 return s.transportProtocols[header.TCPProtocolNumber].proto.(SendBufSizeProto).SendBufferSize()
510 }
511 512 // SetTransportProtocolHandler sets the per-stack default handler for the given
513 // protocol.
514 //
515 // It must be called only during initialization of the stack. Changing it as the
516 // stack is operating is not supported.
517 func (s *Stack) SetTransportProtocolHandler(p tcpip.TransportProtocolNumber, h func(TransportEndpointID, *PacketBuffer) bool) {
518 state := s.transportProtocols[p]
519 if state != nil {
520 state.defaultHandler = h
521 }
522 }
523 524 // Clock returns the Stack's clock for retrieving the current time and
525 // scheduling work.
526 func (s *Stack) Clock() tcpip.Clock {
527 return s.clock
528 }
529 530 // Stats returns a mutable copy of the current stats.
531 //
532 // This is not generally exported via the public interface, but is available
533 // internally.
534 func (s *Stack) Stats() tcpip.Stats {
535 return s.stats
536 }
537 538 // SetNICForwarding enables or disables packet forwarding on the specified NIC
539 // for the passed protocol.
540 //
541 // Returns the previous configuration on the NIC.
542 func (s *Stack) SetNICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
543 s.mu.RLock()
544 defer s.mu.RUnlock()
545 546 nic, ok := s.nics[id]
547 if !ok {
548 return false, &tcpip.ErrUnknownNICID{}
549 }
550 551 return nic.setForwarding(protocol, enable)
552 }
553 554 // NICForwarding returns the forwarding configuration for the specified NIC.
555 func (s *Stack) NICForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
556 s.mu.RLock()
557 defer s.mu.RUnlock()
558 559 nic, ok := s.nics[id]
560 if !ok {
561 return false, &tcpip.ErrUnknownNICID{}
562 }
563 564 return nic.forwarding(protocol)
565 }
566 567 // SetForwardingDefaultAndAllNICs sets packet forwarding for all NICs for the
568 // passed protocol and sets the default setting for newly created NICs.
569 func (s *Stack) SetForwardingDefaultAndAllNICs(protocol tcpip.NetworkProtocolNumber, enable bool) tcpip.Error {
570 s.mu.Lock()
571 defer s.mu.Unlock()
572 573 doneOnce := false
574 for id, nic := range s.nics {
575 if _, err := nic.setForwarding(protocol, enable); err != nil {
576 // Expect forwarding to be settable on all interfaces if it was set on
577 // one.
578 if doneOnce {
579 panic(fmt.Sprintf("nic(id=%d).setForwarding(%d, %t): %s", id, protocol, enable, err))
580 }
581 582 return err
583 }
584 585 doneOnce = true
586 }
587 588 if enable {
589 s.defaultForwardingEnabled[protocol] = struct{}{}
590 } else {
591 delete(s.defaultForwardingEnabled, protocol)
592 }
593 594 return nil
595 }
596 597 // AddMulticastRoute adds a multicast route to be used for the specified
598 // addresses and protocol.
599 func (s *Stack) AddMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination, route MulticastRoute) tcpip.Error {
600 netProto, ok := s.networkProtocols[protocol]
601 if !ok {
602 return &tcpip.ErrUnknownProtocol{}
603 }
604 605 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
606 if !ok {
607 return &tcpip.ErrNotSupported{}
608 }
609 610 return forwardingNetProto.AddMulticastRoute(addresses, route)
611 }
612 613 // RemoveMulticastRoute removes a multicast route that matches the specified
614 // addresses and protocol.
615 func (s *Stack) RemoveMulticastRoute(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) tcpip.Error {
616 netProto, ok := s.networkProtocols[protocol]
617 if !ok {
618 return &tcpip.ErrUnknownProtocol{}
619 }
620 621 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
622 if !ok {
623 return &tcpip.ErrNotSupported{}
624 }
625 626 return forwardingNetProto.RemoveMulticastRoute(addresses)
627 }
628 629 // MulticastRouteLastUsedTime returns a monotonic timestamp that represents the
630 // last time that the route that matches the provided addresses and protocol
631 // was used or updated.
632 func (s *Stack) MulticastRouteLastUsedTime(protocol tcpip.NetworkProtocolNumber, addresses UnicastSourceAndMulticastDestination) (tcpip.MonotonicTime, tcpip.Error) {
633 netProto, ok := s.networkProtocols[protocol]
634 if !ok {
635 return tcpip.MonotonicTime{}, &tcpip.ErrUnknownProtocol{}
636 }
637 638 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
639 if !ok {
640 return tcpip.MonotonicTime{}, &tcpip.ErrNotSupported{}
641 }
642 643 return forwardingNetProto.MulticastRouteLastUsedTime(addresses)
644 }
645 646 // EnableMulticastForwardingForProtocol enables multicast forwarding for the
647 // provided protocol.
648 //
649 // Returns true if forwarding was already enabled on the protocol.
650 // Additionally, returns an error if:
651 //
652 // - The protocol is not found.
653 // - The protocol doesn't support multicast forwarding.
654 // - The multicast forwarding event dispatcher is nil.
655 //
656 // If successful, future multicast forwarding events will be sent to the
657 // provided event dispatcher.
658 func (s *Stack) EnableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber, disp MulticastForwardingEventDispatcher) (bool, tcpip.Error) {
659 netProto, ok := s.networkProtocols[protocol]
660 if !ok {
661 return false, &tcpip.ErrUnknownProtocol{}
662 }
663 664 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
665 if !ok {
666 return false, &tcpip.ErrNotSupported{}
667 }
668 669 return forwardingNetProto.EnableMulticastForwarding(disp)
670 }
671 672 // DisableMulticastForwardingForProtocol disables multicast forwarding for the
673 // provided protocol.
674 //
675 // Returns an error if the provided protocol is not found or if it does not
676 // support multicast forwarding.
677 func (s *Stack) DisableMulticastForwardingForProtocol(protocol tcpip.NetworkProtocolNumber) tcpip.Error {
678 netProto, ok := s.networkProtocols[protocol]
679 if !ok {
680 return &tcpip.ErrUnknownProtocol{}
681 }
682 683 forwardingNetProto, ok := netProto.(MulticastForwardingNetworkProtocol)
684 if !ok {
685 return &tcpip.ErrNotSupported{}
686 }
687 688 forwardingNetProto.DisableMulticastForwarding()
689 return nil
690 }
691 692 // SetNICMulticastForwarding enables or disables multicast packet forwarding on
693 // the specified NIC for the passed protocol.
694 //
695 // Returns the previous configuration on the NIC.
696 func (s *Stack) SetNICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber, enable bool) (bool, tcpip.Error) {
697 s.mu.RLock()
698 defer s.mu.RUnlock()
699 700 nic, ok := s.nics[id]
701 if !ok {
702 return false, &tcpip.ErrUnknownNICID{}
703 }
704 705 return nic.setMulticastForwarding(protocol, enable)
706 }
707 708 // NICMulticastForwarding returns the multicast forwarding configuration for
709 // the specified NIC.
710 func (s *Stack) NICMulticastForwarding(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (bool, tcpip.Error) {
711 s.mu.RLock()
712 defer s.mu.RUnlock()
713 714 nic, ok := s.nics[id]
715 if !ok {
716 return false, &tcpip.ErrUnknownNICID{}
717 }
718 719 return nic.multicastForwarding(protocol)
720 }
721 722 // PortRange returns the UDP and TCP inclusive range of ephemeral ports used in
723 // both IPv4 and IPv6.
724 func (s *Stack) PortRange() (uint16, uint16) {
725 return s.PortManager.PortRange()
726 }
727 728 // SetPortRange sets the UDP and TCP IPv4 and IPv6 ephemeral port range
729 // (inclusive).
730 func (s *Stack) SetPortRange(start uint16, end uint16) tcpip.Error {
731 return s.PortManager.SetPortRange(start, end)
732 }
733 734 // SetRouteTable assigns the route table to be used by this stack. It
735 // specifies which NIC to use for given destination address ranges.
736 //
737 // This method takes ownership of the table.
738 func (s *Stack) SetRouteTable(table []tcpip.Route) {
739 s.routeMu.Lock()
740 defer s.routeMu.Unlock()
741 s.routeTable.Reset()
742 for _, r := range table {
743 s.addRouteLocked(&r)
744 }
745 }
746 747 // GetRouteTable returns the route table which is currently in use.
748 func (s *Stack) GetRouteTable() []tcpip.Route {
749 s.routeMu.RLock()
750 defer s.routeMu.RUnlock()
751 table := make([]tcpip.Route, 0)
752 for r := s.routeTable.Front(); r != nil; r = r.Next() {
753 table = append(table, *r)
754 }
755 return table
756 }
757 758 // AddRoute appends a route to the route table.
759 func (s *Stack) AddRoute(route tcpip.Route) {
760 s.routeMu.Lock()
761 defer s.routeMu.Unlock()
762 s.addRouteLocked(&route)
763 }
764 765 // +checklocks:s.routeMu
766 func (s *Stack) addRouteLocked(route *tcpip.Route) {
767 routePrefix := route.Destination.Prefix()
768 n := s.routeTable.Front()
769 for ; n != nil; n = n.Next() {
770 if n.Destination.Prefix() < routePrefix {
771 s.routeTable.InsertBefore(n, route)
772 return
773 }
774 }
775 s.routeTable.PushBack(route)
776 }
777 778 // RemoveRoutes removes matching routes from the route table, it
779 // returns the number of routes that are removed.
780 func (s *Stack) RemoveRoutes(match func(tcpip.Route) bool) int {
781 s.routeMu.Lock()
782 defer s.routeMu.Unlock()
783 784 return s.removeRoutesLocked(match)
785 }
786 787 // +checklocks:s.routeMu
788 func (s *Stack) removeRoutesLocked(match func(tcpip.Route) bool) int {
789 count := 0
790 for route := s.routeTable.Front(); route != nil; {
791 next := route.Next()
792 if match(*route) {
793 s.routeTable.Remove(route)
794 count++
795 }
796 route = next
797 }
798 return count
799 }
800 801 // ReplaceRoute replaces the route in the routing table which matchse
802 // the lookup key for the routing table. If there is no match, the given
803 // route will still be added to the routing table.
804 // The lookup key consists of destination, ToS, scope and output interface.
805 func (s *Stack) ReplaceRoute(route tcpip.Route) {
806 s.routeMu.Lock()
807 defer s.routeMu.Unlock()
808 809 s.removeRoutesLocked(func(rt tcpip.Route) bool {
810 return rt.Equal(route)
811 })
812 s.addRouteLocked(&route)
813 }
814 815 // NewEndpoint creates a new transport layer endpoint of the given protocol.
816 func (s *Stack) NewEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
817 t, ok := s.transportProtocols[transport]
818 if !ok {
819 return nil, &tcpip.ErrUnknownProtocol{}
820 }
821 822 return t.proto.NewEndpoint(network, waiterQueue)
823 }
824 825 // NewRawEndpoint creates a new raw transport layer endpoint of the given
826 // protocol. Raw endpoints receive all traffic for a given protocol regardless
827 // of address.
828 func (s *Stack) NewRawEndpoint(transport tcpip.TransportProtocolNumber, network tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue, associated bool) (tcpip.Endpoint, tcpip.Error) {
829 if s.rawFactory == nil {
830 netRawMissingLogger.Infof("A process tried to create a raw socket, but --net-raw was not specified. Should runsc be run with --net-raw?")
831 return nil, &tcpip.ErrNotPermitted{}
832 }
833 834 if !associated {
835 return s.rawFactory.NewUnassociatedEndpoint(s, network, transport, waiterQueue)
836 }
837 838 t, ok := s.transportProtocols[transport]
839 if !ok {
840 return nil, &tcpip.ErrUnknownProtocol{}
841 }
842 843 return t.proto.NewRawEndpoint(network, waiterQueue)
844 }
845 846 // NewPacketEndpoint creates a new packet endpoint listening for the given
847 // netProto.
848 func (s *Stack) NewPacketEndpoint(cooked bool, netProto tcpip.NetworkProtocolNumber, waiterQueue *waiter.Queue) (tcpip.Endpoint, tcpip.Error) {
849 if s.rawFactory == nil {
850 return nil, &tcpip.ErrNotPermitted{}
851 }
852 853 return s.rawFactory.NewPacketEndpoint(s, cooked, netProto, waiterQueue)
854 }
855 856 // NICContext is an opaque pointer used to store client-supplied NIC metadata.
857 type NICContext any
858 859 // NICOptions specifies the configuration of a NIC as it is being created.
860 // The zero value creates an enabled, unnamed NIC.
861 type NICOptions struct {
862 // Name specifies the name of the NIC.
863 Name string
864 865 // Disabled specifies whether to avoid calling Attach on the passed
866 // LinkEndpoint.
867 Disabled bool
868 869 // Context specifies user-defined data that will be returned in stack.NICInfo
870 // for the NIC. Clients of this library can use it to add metadata that
871 // should be tracked alongside a NIC, to avoid having to keep a
872 // map[tcpip.NICID]metadata mirroring stack.Stack's nic map.
873 Context NICContext
874 875 // QDisc is the queue discipline to use for this NIC.
876 QDisc QueueingDiscipline
877 878 // DeliverLinkPackets specifies whether the NIC is responsible for
879 // delivering raw packets to packet sockets.
880 DeliverLinkPackets bool
881 882 // EnableExperimentIPOption specifies whether the NIC is responsible for
883 // passing the experiment IP option.
884 EnableExperimentIPOption bool
885 }
886 887 // GetNICByID return a network device associated with the specified ID.
888 func (s *Stack) GetNICByID(id tcpip.NICID) (*nic, tcpip.Error) {
889 s.mu.Lock()
890 defer s.mu.Unlock()
891 892 n, ok := s.nics[id]
893 if !ok {
894 return nil, &tcpip.ErrNoSuchFile{}
895 }
896 return n, nil
897 }
898 899 // CreateNICWithOptions creates a NIC with the provided id, LinkEndpoint, and
900 // NICOptions. See the documentation on type NICOptions for details on how
901 // NICs can be configured.
902 //
903 // LinkEndpoint.Attach will be called to bind ep with a NetworkDispatcher.
904 func (s *Stack) CreateNICWithOptions(id tcpip.NICID, ep LinkEndpoint, opts NICOptions) tcpip.Error {
905 s.mu.Lock()
906 defer s.mu.Unlock()
907 908 if id == 0 {
909 return &tcpip.ErrInvalidNICID{}
910 }
911 // Make sure id is unique.
912 if _, ok := s.nics[id]; ok {
913 return &tcpip.ErrDuplicateNICID{}
914 }
915 916 // Make sure name is unique, unless unnamed.
917 if opts.Name != "" {
918 for _, n := range s.nics {
919 if n.Name() == opts.Name {
920 return &tcpip.ErrDuplicateNICID{}
921 }
922 }
923 }
924 925 n := newNIC(s, id, ep, opts)
926 for proto := range s.defaultForwardingEnabled {
927 if _, err := n.setForwarding(proto, true); err != nil {
928 panic(fmt.Sprintf("newNIC(%d, ...).setForwarding(%d, true): %s", id, proto, err))
929 }
930 }
931 s.nics[id] = n
932 ep.SetOnCloseAction(func() {
933 s.RemoveNIC(id)
934 })
935 if !opts.Disabled {
936 return n.enable()
937 }
938 939 return nil
940 }
941 942 // CreateNIC creates a NIC with the provided id and LinkEndpoint and calls
943 // LinkEndpoint.Attach to bind ep with a NetworkDispatcher.
944 func (s *Stack) CreateNIC(id tcpip.NICID, ep LinkEndpoint) tcpip.Error {
945 return s.CreateNICWithOptions(id, ep, NICOptions{})
946 }
947 948 // GetLinkEndpointByName gets the link endpoint specified by name.
949 func (s *Stack) GetLinkEndpointByName(name string) LinkEndpoint {
950 s.mu.RLock()
951 defer s.mu.RUnlock()
952 for _, nic := range s.nics {
953 if nic.Name() == name {
954 linkEP, ok := nic.NetworkLinkEndpoint.(LinkEndpoint)
955 if !ok {
956 panic(fmt.Sprintf("unexpected NetworkLinkEndpoint(%#v) is not a LinkEndpoint", nic.NetworkLinkEndpoint))
957 }
958 return linkEP
959 }
960 }
961 return nil
962 }
963 964 // EnableNIC enables the given NIC so that the link-layer endpoint can start
965 // delivering packets to it.
966 func (s *Stack) EnableNIC(id tcpip.NICID) tcpip.Error {
967 s.mu.RLock()
968 defer s.mu.RUnlock()
969 970 nic, ok := s.nics[id]
971 if !ok {
972 return &tcpip.ErrUnknownNICID{}
973 }
974 975 return nic.enable()
976 }
977 978 // DisableNIC disables the given NIC.
979 func (s *Stack) DisableNIC(id tcpip.NICID) tcpip.Error {
980 s.mu.RLock()
981 defer s.mu.RUnlock()
982 983 nic, ok := s.nics[id]
984 if !ok {
985 return &tcpip.ErrUnknownNICID{}
986 }
987 988 nic.disable()
989 return nil
990 }
991 992 // CheckNIC checks if a NIC is usable.
993 func (s *Stack) CheckNIC(id tcpip.NICID) bool {
994 s.mu.RLock()
995 defer s.mu.RUnlock()
996 997 nic, ok := s.nics[id]
998 if !ok {
999 return false
1000 }
1001 1002 return nic.Enabled()
1003 }
1004 1005 // RemoveNIC removes NIC and all related routes from the network stack.
1006 func (s *Stack) RemoveNIC(id tcpip.NICID) tcpip.Error {
1007 s.mu.Lock()
1008 deferAct, err := s.removeNICLocked(id)
1009 s.mu.Unlock()
1010 if deferAct != nil {
1011 deferAct()
1012 }
1013 return err
1014 }
1015 1016 // removeNICLocked removes NIC and all related routes from the network stack.
1017 //
1018 // +checklocks:s.mu
1019 func (s *Stack) removeNICLocked(id tcpip.NICID) (func(), tcpip.Error) {
1020 nic, ok := s.nics[id]
1021 if !ok {
1022 return nil, &tcpip.ErrUnknownNICID{}
1023 }
1024 delete(s.nics, id)
1025 1026 if nic.Primary != nil {
1027 b := nic.Primary.NetworkLinkEndpoint.(CoordinatorNIC)
1028 if err := b.DelNIC(nic); err != nil {
1029 return nil, err
1030 }
1031 }
1032 1033 // Remove routes in-place. n tracks the number of routes written.
1034 s.routeMu.Lock()
1035 for r := s.routeTable.Front(); r != nil; {
1036 next := r.Next()
1037 if r.NIC == id {
1038 s.routeTable.Remove(r)
1039 }
1040 r = next
1041 }
1042 s.routeMu.Unlock()
1043 1044 return nic.remove(true /* closeLinkEndpoint */)
1045 }
1046 1047 // SetNICCoordinator sets a coordinator device.
1048 func (s *Stack) SetNICCoordinator(id tcpip.NICID, mid tcpip.NICID) tcpip.Error {
1049 s.mu.Lock()
1050 defer s.mu.Unlock()
1051 1052 nic, ok := s.nics[id]
1053 if !ok {
1054 return &tcpip.ErrUnknownNICID{}
1055 }
1056 // Setting a coordinator for a coordinator NIC is not allowed.
1057 if _, ok := nic.NetworkLinkEndpoint.(CoordinatorNIC); ok {
1058 return &tcpip.ErrNoSuchFile{}
1059 }
1060 m, ok := s.nics[mid]
1061 if !ok {
1062 return &tcpip.ErrUnknownNICID{}
1063 }
1064 b, ok := m.NetworkLinkEndpoint.(CoordinatorNIC)
1065 if !ok {
1066 return &tcpip.ErrNotSupported{}
1067 }
1068 if err := b.AddNIC(nic); err != nil {
1069 return err
1070 }
1071 nic.Primary = m
1072 return nil
1073 }
1074 1075 // SetNICAddress sets the hardware address which is identified by the nic ID.
1076 func (s *Stack) SetNICAddress(id tcpip.NICID, addr tcpip.LinkAddress) tcpip.Error {
1077 s.mu.Lock()
1078 defer s.mu.Unlock()
1079 1080 nic, ok := s.nics[id]
1081 if !ok {
1082 return &tcpip.ErrUnknownNICID{}
1083 }
1084 nic.NetworkLinkEndpoint.SetLinkAddress(addr)
1085 return nil
1086 }
1087 1088 // SetNICName sets a NIC's name.
1089 func (s *Stack) SetNICName(id tcpip.NICID, name string) tcpip.Error {
1090 s.mu.Lock()
1091 defer s.mu.Unlock()
1092 1093 nic, ok := s.nics[id]
1094 if !ok {
1095 return &tcpip.ErrUnknownNICID{}
1096 }
1097 nic.name = name
1098 return nil
1099 }
1100 1101 // SetNICMTU sets a NIC's MTU.
1102 func (s *Stack) SetNICMTU(id tcpip.NICID, mtu uint32) tcpip.Error {
1103 s.mu.Lock()
1104 defer s.mu.Unlock()
1105 1106 nic, ok := s.nics[id]
1107 if !ok {
1108 return &tcpip.ErrUnknownNICID{}
1109 }
1110 nic.NetworkLinkEndpoint.SetMTU(mtu)
1111 return nil
1112 }
1113 1114 // NICInfo captures the name and addresses assigned to a NIC.
1115 type NICInfo struct {
1116 Name string
1117 LinkAddress tcpip.LinkAddress
1118 ProtocolAddresses []tcpip.ProtocolAddress
1119 1120 // Flags indicate the state of the NIC.
1121 Flags NICStateFlags
1122 1123 // MTU is the maximum transmission unit.
1124 MTU uint32
1125 1126 Stats tcpip.NICStats
1127 1128 // NetworkStats holds the stats of each NetworkEndpoint bound to the NIC.
1129 NetworkStats map[tcpip.NetworkProtocolNumber]NetworkEndpointStats
1130 1131 // Context is user-supplied data optionally supplied in CreateNICWithOptions.
1132 // See type NICOptions for more details.
1133 Context NICContext
1134 1135 // ARPHardwareType holds the ARP Hardware type of the NIC. This is the
1136 // value sent in haType field of an ARP Request sent by this NIC and the
1137 // value expected in the haType field of an ARP response.
1138 ARPHardwareType header.ARPHardwareType
1139 1140 // Forwarding holds the forwarding status for each network endpoint that
1141 // supports forwarding.
1142 Forwarding map[tcpip.NetworkProtocolNumber]bool
1143 1144 // MulticastForwarding holds the forwarding status for each network endpoint
1145 // that supports multicast forwarding.
1146 MulticastForwarding map[tcpip.NetworkProtocolNumber]bool
1147 }
1148 1149 // HasNIC returns true if the NICID is defined in the stack.
1150 func (s *Stack) HasNIC(id tcpip.NICID) bool {
1151 s.mu.RLock()
1152 _, ok := s.nics[id]
1153 s.mu.RUnlock()
1154 return ok
1155 }
1156 1157 // NICInfo returns a map of NICIDs to their associated information.
1158 func (s *Stack) NICInfo() map[tcpip.NICID]NICInfo {
1159 s.mu.RLock()
1160 defer s.mu.RUnlock()
1161 1162 type forwardingFn func(tcpip.NetworkProtocolNumber) (bool, tcpip.Error)
1163 forwardingValue := func(forwardingFn forwardingFn, proto tcpip.NetworkProtocolNumber, nicID tcpip.NICID, fnName string) (forward bool, ok bool) {
1164 switch forwarding, err := forwardingFn(proto); err.(type) {
1165 case nil:
1166 return forwarding, true
1167 case *tcpip.ErrUnknownProtocol:
1168 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nicID))
1169 case *tcpip.ErrNotSupported:
1170 // Not all network protocols support forwarding.
1171 default:
1172 panic(fmt.Sprintf("nic(id=%d).%s(%d): %s", nicID, fnName, proto, err))
1173 }
1174 return false, false
1175 }
1176 1177 nics := make(map[tcpip.NICID]NICInfo)
1178 for id, nic := range s.nics {
1179 flags := NICStateFlags{
1180 Up: true, // Netstack interfaces are always up.
1181 Running: nic.Enabled(),
1182 Promiscuous: nic.Promiscuous(),
1183 Loopback: nic.IsLoopback(),
1184 }
1185 1186 netStats := make(map[tcpip.NetworkProtocolNumber]NetworkEndpointStats)
1187 for proto, netEP := range nic.networkEndpoints {
1188 netStats[proto] = netEP.Stats()
1189 }
1190 1191 info := NICInfo{
1192 Name: nic.name,
1193 LinkAddress: nic.NetworkLinkEndpoint.LinkAddress(),
1194 ProtocolAddresses: nic.primaryAddresses(),
1195 Flags: flags,
1196 MTU: nic.NetworkLinkEndpoint.MTU(),
1197 Stats: nic.stats.local,
1198 NetworkStats: netStats,
1199 Context: nic.context,
1200 ARPHardwareType: nic.NetworkLinkEndpoint.ARPHardwareType(),
1201 Forwarding: make(map[tcpip.NetworkProtocolNumber]bool),
1202 MulticastForwarding: make(map[tcpip.NetworkProtocolNumber]bool),
1203 }
1204 1205 for proto := range s.networkProtocols {
1206 if forwarding, ok := forwardingValue(nic.forwarding, proto, id, "forwarding"); ok {
1207 info.Forwarding[proto] = forwarding
1208 }
1209 1210 if multicastForwarding, ok := forwardingValue(nic.multicastForwarding, proto, id, "multicastForwarding"); ok {
1211 info.MulticastForwarding[proto] = multicastForwarding
1212 }
1213 }
1214 1215 nics[id] = info
1216 }
1217 return nics
1218 }
1219 1220 // NICStateFlags holds information about the state of an NIC.
1221 type NICStateFlags struct {
1222 // Up indicates whether the interface is running.
1223 Up bool
1224 1225 // Running indicates whether resources are allocated.
1226 Running bool
1227 1228 // Promiscuous indicates whether the interface is in promiscuous mode.
1229 Promiscuous bool
1230 1231 // Loopback indicates whether the interface is a loopback.
1232 Loopback bool
1233 }
1234 1235 // AddProtocolAddress adds an address to the specified NIC, possibly with extra
1236 // properties.
1237 func (s *Stack) AddProtocolAddress(id tcpip.NICID, protocolAddress tcpip.ProtocolAddress, properties AddressProperties) tcpip.Error {
1238 s.mu.RLock()
1239 defer s.mu.RUnlock()
1240 1241 nic, ok := s.nics[id]
1242 if !ok {
1243 return &tcpip.ErrUnknownNICID{}
1244 }
1245 1246 return nic.addAddress(protocolAddress, properties)
1247 }
1248 1249 // RemoveAddress removes an existing network-layer address from the specified
1250 // NIC.
1251 func (s *Stack) RemoveAddress(id tcpip.NICID, addr tcpip.Address) tcpip.Error {
1252 s.mu.RLock()
1253 defer s.mu.RUnlock()
1254 1255 if nic, ok := s.nics[id]; ok {
1256 return nic.removeAddress(addr)
1257 }
1258 1259 return &tcpip.ErrUnknownNICID{}
1260 }
1261 1262 // SetAddressLifetimes sets informational preferred and valid lifetimes, and
1263 // whether the address should be preferred or deprecated.
1264 func (s *Stack) SetAddressLifetimes(id tcpip.NICID, addr tcpip.Address, lifetimes AddressLifetimes) tcpip.Error {
1265 s.mu.RLock()
1266 defer s.mu.RUnlock()
1267 1268 if nic, ok := s.nics[id]; ok {
1269 return nic.setAddressLifetimes(addr, lifetimes)
1270 }
1271 1272 return &tcpip.ErrUnknownNICID{}
1273 }
1274 1275 // AllAddresses returns a map of NICIDs to their protocol addresses (primary
1276 // and non-primary).
1277 func (s *Stack) AllAddresses() map[tcpip.NICID][]tcpip.ProtocolAddress {
1278 s.mu.RLock()
1279 defer s.mu.RUnlock()
1280 1281 nics := make(map[tcpip.NICID][]tcpip.ProtocolAddress)
1282 for id, nic := range s.nics {
1283 nics[id] = nic.allPermanentAddresses()
1284 }
1285 return nics
1286 }
1287 1288 // GetMainNICAddress returns the first non-deprecated primary address and prefix
1289 // for the given NIC and protocol. If no non-deprecated primary addresses exist,
1290 // a deprecated address will be returned. If no deprecated addresses exist, the
1291 // zero value will be returned.
1292 func (s *Stack) GetMainNICAddress(id tcpip.NICID, protocol tcpip.NetworkProtocolNumber) (tcpip.AddressWithPrefix, tcpip.Error) {
1293 s.mu.RLock()
1294 defer s.mu.RUnlock()
1295 1296 nic, ok := s.nics[id]
1297 if !ok {
1298 return tcpip.AddressWithPrefix{}, &tcpip.ErrUnknownNICID{}
1299 }
1300 1301 return nic.PrimaryAddress(protocol)
1302 }
1303 1304 func (s *Stack) getAddressEP(nic *nic, localAddr, remoteAddr, srcHint tcpip.Address, netProto tcpip.NetworkProtocolNumber) AssignableAddressEndpoint {
1305 if localAddr.BitLen() == 0 {
1306 return nic.primaryEndpoint(netProto, remoteAddr, srcHint)
1307 }
1308 return nic.findEndpoint(netProto, localAddr, CanBePrimaryEndpoint)
1309 }
1310 1311 // NewRouteForMulticast returns a Route that may be used to forward multicast
1312 // packets.
1313 //
1314 // Returns nil if validation fails.
1315 func (s *Stack) NewRouteForMulticast(nicID tcpip.NICID, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
1316 s.mu.RLock()
1317 defer s.mu.RUnlock()
1318 1319 nic, ok := s.nics[nicID]
1320 if !ok || !nic.Enabled() {
1321 return nil
1322 }
1323 1324 if addressEndpoint := s.getAddressEP(nic, tcpip.Address{} /* localAddr */, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil {
1325 return constructAndValidateRoute(netProto, addressEndpoint, nic, nic, tcpip.Address{} /* gateway */, tcpip.Address{} /* localAddr */, remoteAddr, s.handleLocal, false /* multicastLoop */, 0 /* mtu */)
1326 }
1327 return nil
1328 }
1329 1330 // findLocalRouteFromNICRLocked is like findLocalRouteRLocked but finds a route
1331 // from the specified NIC.
1332 //
1333 // +checklocksread:s.mu
1334 func (s *Stack) findLocalRouteFromNICRLocked(localAddressNIC *nic, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
1335 localAddressEndpoint := localAddressNIC.getAddressOrCreateTempInner(netProto, localAddr, false /* createTemp */, NeverPrimaryEndpoint)
1336 if localAddressEndpoint == nil {
1337 return nil
1338 }
1339 1340 var outgoingNIC *nic
1341 // Prefer a local route to the same interface as the local address.
1342 if localAddressNIC.hasAddress(netProto, remoteAddr) {
1343 outgoingNIC = localAddressNIC
1344 }
1345 1346 // If the remote address isn't owned by the local address's NIC, check all
1347 // NICs.
1348 if outgoingNIC == nil {
1349 for _, nic := range s.nics {
1350 if nic.hasAddress(netProto, remoteAddr) {
1351 outgoingNIC = nic
1352 break
1353 }
1354 }
1355 }
1356 1357 // If the remote address is not owned by the stack, we can't return a local
1358 // route.
1359 if outgoingNIC == nil {
1360 localAddressEndpoint.DecRef()
1361 return nil
1362 }
1363 1364 r := makeLocalRoute(
1365 netProto,
1366 localAddr,
1367 remoteAddr,
1368 outgoingNIC,
1369 localAddressNIC,
1370 localAddressEndpoint,
1371 )
1372 1373 if r.IsOutboundBroadcast() {
1374 r.Release()
1375 return nil
1376 }
1377 1378 return r
1379 }
1380 1381 // findLocalRouteRLocked returns a local route.
1382 //
1383 // A local route is a route to some remote address which the stack owns. That
1384 // is, a local route is a route where packets never have to leave the stack.
1385 //
1386 // +checklocksread:s.mu
1387 func (s *Stack) findLocalRouteRLocked(localAddressNICID tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber) *Route {
1388 if localAddr.BitLen() == 0 {
1389 localAddr = remoteAddr
1390 }
1391 1392 if localAddressNICID == 0 {
1393 for _, localAddressNIC := range s.nics {
1394 if r := s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto); r != nil {
1395 return r
1396 }
1397 }
1398 1399 return nil
1400 }
1401 1402 if localAddressNIC, ok := s.nics[localAddressNICID]; ok {
1403 return s.findLocalRouteFromNICRLocked(localAddressNIC, localAddr, remoteAddr, netProto)
1404 }
1405 1406 return nil
1407 }
1408 1409 // HandleLocal returns true if non-loopback interfaces are allowed to loop packets.
1410 func (s *Stack) HandleLocal() bool {
1411 return s.handleLocal
1412 }
1413 1414 func isNICForwarding(nic *nic, proto tcpip.NetworkProtocolNumber) bool {
1415 switch forwarding, err := nic.forwarding(proto); err.(type) {
1416 case nil:
1417 return forwarding
1418 case *tcpip.ErrUnknownProtocol:
1419 panic(fmt.Sprintf("expected network protocol %d to be available on NIC %d", proto, nic.ID()))
1420 case *tcpip.ErrNotSupported:
1421 // Not all network protocols support forwarding.
1422 return false
1423 default:
1424 panic(fmt.Sprintf("nic(id=%d).forwarding(%d): %s", nic.ID(), proto, err))
1425 }
1426 }
1427 1428 // findRouteWithLocalAddrFromAnyInterfaceRLocked returns a route to the given
1429 // destination address, leaving through the given NIC.
1430 //
1431 // Rather than preferring to find a route that uses a local address assigned to
1432 // the outgoing interface, it finds any NIC that holds a matching local address
1433 // endpoint.
1434 //
1435 // +checklocksread:s.mu
1436 func (s *Stack) findRouteWithLocalAddrFromAnyInterfaceRLocked(outgoingNIC *nic, localAddr, remoteAddr, srcHint, gateway tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool, mtu uint32) *Route {
1437 for _, aNIC := range s.nics {
1438 addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, srcHint, netProto)
1439 if addressEndpoint == nil {
1440 continue
1441 }
1442 1443 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, outgoingNIC, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop, mtu); r != nil {
1444 return r
1445 }
1446 }
1447 return nil
1448 }
1449 1450 // FindRoute creates a route to the given destination address, leaving through
1451 // the given NIC and local address (if provided).
1452 //
1453 // If a NIC is not specified, the returned route will leave through the same
1454 // NIC as the NIC that has the local address assigned when forwarding is
1455 // disabled. If forwarding is enabled and the NIC is unspecified, the route may
1456 // leave through any interface unless the route is link-local.
1457 //
1458 // If no local address is provided, the stack will select a local address. If no
1459 // remote address is provided, the stack will use a remote address equal to the
1460 // local address.
1461 func (s *Stack) FindRoute(id tcpip.NICID, localAddr, remoteAddr tcpip.Address, netProto tcpip.NetworkProtocolNumber, multicastLoop bool) (*Route, tcpip.Error) {
1462 s.mu.RLock()
1463 defer s.mu.RUnlock()
1464 1465 // Reject attempts to use unsupported protocols.
1466 if !s.CheckNetworkProtocol(netProto) {
1467 return nil, &tcpip.ErrUnknownProtocol{}
1468 }
1469 1470 isLinkLocal := header.IsV6LinkLocalUnicastAddress(remoteAddr) || header.IsV6LinkLocalMulticastAddress(remoteAddr)
1471 isLocalBroadcast := remoteAddr == header.IPv4Broadcast
1472 isMulticast := header.IsV4MulticastAddress(remoteAddr) || header.IsV6MulticastAddress(remoteAddr)
1473 isLoopback := header.IsV4LoopbackAddress(remoteAddr) || header.IsV6LoopbackAddress(remoteAddr)
1474 needRoute := !(isLocalBroadcast || isMulticast || isLinkLocal || isLoopback)
1475 1476 if s.handleLocal && !isMulticast && !isLocalBroadcast {
1477 if r := s.findLocalRouteRLocked(id, localAddr, remoteAddr, netProto); r != nil {
1478 return r, nil
1479 }
1480 }
1481 1482 // If the interface is specified and we do not need a route, return a route
1483 // through the interface if the interface is valid and enabled.
1484 if id != 0 && !needRoute {
1485 if nic, ok := s.nics[id]; ok && nic.Enabled() {
1486 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, tcpip.Address{} /* srcHint */, netProto); addressEndpoint != nil {
1487 return makeRoute(
1488 netProto,
1489 tcpip.Address{}, /* gateway */
1490 localAddr,
1491 remoteAddr,
1492 nic, /* outgoingNIC */
1493 nic, /* localAddressNIC*/
1494 addressEndpoint,
1495 s.handleLocal,
1496 multicastLoop,
1497 0, /* mtu */
1498 ), nil
1499 }
1500 }
1501 1502 if isLoopback {
1503 return nil, &tcpip.ErrBadLocalAddress{}
1504 }
1505 return nil, &tcpip.ErrNetworkUnreachable{}
1506 }
1507 1508 onlyGlobalAddresses := !header.IsV6LinkLocalUnicastAddress(localAddr) && !isLinkLocal
1509 1510 // Find a route to the remote with the route table.
1511 var chosenRoute tcpip.Route
1512 if r := func() *Route {
1513 s.routeMu.RLock()
1514 defer s.routeMu.RUnlock()
1515 1516 for route := s.routeTable.Front(); route != nil; route = route.Next() {
1517 if remoteAddr.BitLen() != 0 && !route.Destination.Contains(remoteAddr) {
1518 continue
1519 }
1520 1521 nic, ok := s.nics[route.NIC]
1522 if !ok || !nic.Enabled() {
1523 continue
1524 }
1525 1526 if id == 0 || id == route.NIC {
1527 if addressEndpoint := s.getAddressEP(nic, localAddr, remoteAddr, route.SourceHint, netProto); addressEndpoint != nil {
1528 var gateway tcpip.Address
1529 if needRoute {
1530 gateway = route.Gateway
1531 }
1532 r := constructAndValidateRoute(netProto, addressEndpoint, nic /* outgoingNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop, route.MTU)
1533 if r == nil {
1534 panic(fmt.Sprintf("non-forwarding route validation failed with route table entry = %#v, id = %d, localAddr = %s, remoteAddr = %s", route, id, localAddr, remoteAddr))
1535 }
1536 return r
1537 }
1538 }
1539 1540 // If the stack has forwarding enabled, we haven't found a valid route to
1541 // the remote address yet, and we are routing locally generated traffic,
1542 // keep track of the first valid route. We keep iterating because we
1543 // prefer routes that let us use a local address that is assigned to the
1544 // outgoing interface. There is no requirement to do this from any RFC
1545 // but simply a choice made to better follow a strong host model which
1546 // the netstack follows at the time of writing.
1547 //
1548 // Note that for incoming traffic that we are forwarding (for which the
1549 // NIC and local address are unspecified), we do not keep iterating, as
1550 // there is no reason to prefer routes that let us use a local address
1551 // when routing forwarded (as opposed to locally-generated) traffic.
1552 locallyGenerated := (id != 0 || localAddr != tcpip.Address{})
1553 if onlyGlobalAddresses && chosenRoute.Equal(tcpip.Route{}) && isNICForwarding(nic, netProto) {
1554 if locallyGenerated {
1555 chosenRoute = *route
1556 continue
1557 }
1558 1559 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, route.SourceHint, route.Gateway, netProto, multicastLoop, route.MTU); r != nil {
1560 return r
1561 }
1562 }
1563 }
1564 1565 return nil
1566 }(); r != nil {
1567 return r, nil
1568 }
1569 1570 if !chosenRoute.Equal(tcpip.Route{}) {
1571 // At this point we know the stack has forwarding enabled since chosenRoute is
1572 // only set when forwarding is enabled.
1573 nic, ok := s.nics[chosenRoute.NIC]
1574 if !ok {
1575 // If the route's NIC was invalid, we should not have chosen the route.
1576 panic(fmt.Sprintf("chosen route must have a valid NIC with ID = %d", chosenRoute.NIC))
1577 }
1578 1579 var gateway tcpip.Address
1580 if needRoute {
1581 gateway = chosenRoute.Gateway
1582 }
1583 1584 // Use the specified NIC to get the local address endpoint.
1585 if id != 0 {
1586 if aNIC, ok := s.nics[id]; ok {
1587 if addressEndpoint := s.getAddressEP(aNIC, localAddr, remoteAddr, chosenRoute.SourceHint, netProto); addressEndpoint != nil {
1588 if r := constructAndValidateRoute(netProto, addressEndpoint, aNIC /* localAddressNIC */, nic /* outgoingNIC */, gateway, localAddr, remoteAddr, s.handleLocal, multicastLoop, chosenRoute.MTU); r != nil {
1589 return r, nil
1590 }
1591 }
1592 }
1593 1594 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
1595 return nil, &tcpip.ErrHostUnreachable{}
1596 }
1597 1598 if id == 0 {
1599 // If an interface is not specified, try to find a NIC that holds the local
1600 // address endpoint to construct a route.
1601 if r := s.findRouteWithLocalAddrFromAnyInterfaceRLocked(nic, localAddr, remoteAddr, chosenRoute.SourceHint, gateway, netProto, multicastLoop, chosenRoute.MTU); r != nil {
1602 return r, nil
1603 }
1604 }
1605 }
1606 1607 if needRoute {
1608 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
1609 return nil, &tcpip.ErrHostUnreachable{}
1610 }
1611 if header.IsV6LoopbackAddress(remoteAddr) {
1612 return nil, &tcpip.ErrBadLocalAddress{}
1613 }
1614 // TODO(https://gvisor.dev/issues/8105): This should be ErrNetworkUnreachable.
1615 return nil, &tcpip.ErrNetworkUnreachable{}
1616 }
1617 1618 // CheckNetworkProtocol checks if a given network protocol is enabled in the
1619 // stack.
1620 func (s *Stack) CheckNetworkProtocol(protocol tcpip.NetworkProtocolNumber) bool {
1621 _, ok := s.networkProtocols[protocol]
1622 return ok
1623 }
1624 1625 // CheckDuplicateAddress performs duplicate address detection for the address on
1626 // the specified interface.
1627 func (s *Stack) CheckDuplicateAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, h DADCompletionHandler) (DADCheckAddressDisposition, tcpip.Error) {
1628 s.mu.RLock()
1629 nic, ok := s.nics[nicID]
1630 s.mu.RUnlock()
1631 1632 if !ok {
1633 return 0, &tcpip.ErrUnknownNICID{}
1634 }
1635 1636 return nic.checkDuplicateAddress(protocol, addr, h)
1637 }
1638 1639 // CheckLocalAddress determines if the given local address exists, and if it
1640 // does, returns the id of the NIC it's bound to. Returns 0 if the address
1641 // does not exist.
1642 func (s *Stack) CheckLocalAddress(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.NICID {
1643 s.mu.RLock()
1644 defer s.mu.RUnlock()
1645 1646 // If a NIC is specified, use its NIC id.
1647 if nicID != 0 {
1648 nic, ok := s.nics[nicID]
1649 if !ok {
1650 return 0
1651 }
1652 // In IPv4, linux only checks the interface. If it matches, then it does
1653 // not bother with the address.
1654 // https://github.com/torvalds/linux/blob/15205c2829ca2cbb5ece5ceaafe1171a8470e62b/net/ipv4/igmp.c#L1829-L1837
1655 if protocol == header.IPv4ProtocolNumber {
1656 return nic.id
1657 }
1658 if nic.CheckLocalAddress(protocol, addr) {
1659 return nic.id
1660 }
1661 return 0
1662 }
1663 1664 // Go through all the NICs.
1665 for _, nic := range s.nics {
1666 if nic.CheckLocalAddress(protocol, addr) {
1667 return nic.id
1668 }
1669 }
1670 1671 return 0
1672 }
1673 1674 // SetPromiscuousMode enables or disables promiscuous mode in the given NIC.
1675 func (s *Stack) SetPromiscuousMode(nicID tcpip.NICID, enable bool) tcpip.Error {
1676 s.mu.RLock()
1677 defer s.mu.RUnlock()
1678 1679 nic, ok := s.nics[nicID]
1680 if !ok {
1681 return &tcpip.ErrUnknownNICID{}
1682 }
1683 1684 nic.setPromiscuousMode(enable)
1685 1686 return nil
1687 }
1688 1689 // SetSpoofing enables or disables address spoofing in the given NIC, allowing
1690 // endpoints to bind to any address in the NIC.
1691 func (s *Stack) SetSpoofing(nicID tcpip.NICID, enable bool) tcpip.Error {
1692 s.mu.RLock()
1693 defer s.mu.RUnlock()
1694 1695 nic, ok := s.nics[nicID]
1696 if !ok {
1697 return &tcpip.ErrUnknownNICID{}
1698 }
1699 1700 nic.setSpoofing(enable)
1701 1702 return nil
1703 }
1704 1705 // LinkResolutionResult is the result of a link address resolution attempt.
1706 type LinkResolutionResult struct {
1707 LinkAddress tcpip.LinkAddress
1708 Err tcpip.Error
1709 }
1710 1711 // GetLinkAddress finds the link address corresponding to a network address.
1712 //
1713 // Returns ErrNotSupported if the stack is not configured with a link address
1714 // resolver for the specified network protocol.
1715 //
1716 // Returns ErrWouldBlock if the link address is not readily available, along
1717 // with a notification channel for the caller to block on. Triggers address
1718 // resolution asynchronously.
1719 //
1720 // onResolve will be called either immediately, if resolution is not required,
1721 // or when address resolution is complete, with the resolved link address and
1722 // whether resolution succeeded.
1723 //
1724 // If specified, the local address must be an address local to the interface
1725 // the neighbor cache belongs to. The local address is the source address of
1726 // a packet prompting NUD/link address resolution.
1727 func (s *Stack) GetLinkAddress(nicID tcpip.NICID, addr, localAddr tcpip.Address, protocol tcpip.NetworkProtocolNumber, onResolve func(LinkResolutionResult)) tcpip.Error {
1728 s.mu.RLock()
1729 nic, ok := s.nics[nicID]
1730 s.mu.RUnlock()
1731 if !ok {
1732 return &tcpip.ErrUnknownNICID{}
1733 }
1734 1735 return nic.getLinkAddress(addr, localAddr, protocol, onResolve)
1736 }
1737 1738 // Neighbors returns all IP to MAC address associations.
1739 func (s *Stack) Neighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) ([]NeighborEntry, tcpip.Error) {
1740 s.mu.RLock()
1741 nic, ok := s.nics[nicID]
1742 s.mu.RUnlock()
1743 1744 if !ok {
1745 return nil, &tcpip.ErrUnknownNICID{}
1746 }
1747 1748 return nic.neighbors(protocol)
1749 }
1750 1751 // AddStaticNeighbor statically associates an IP address to a MAC address.
1752 func (s *Stack) AddStaticNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address, linkAddr tcpip.LinkAddress) tcpip.Error {
1753 s.mu.RLock()
1754 nic, ok := s.nics[nicID]
1755 s.mu.RUnlock()
1756 1757 if !ok {
1758 return &tcpip.ErrUnknownNICID{}
1759 }
1760 1761 return nic.addStaticNeighbor(addr, protocol, linkAddr)
1762 }
1763 1764 // RemoveNeighbor removes an IP to MAC address association previously created
1765 // either automatically or by AddStaticNeighbor. Returns ErrBadAddress if there
1766 // is no association with the provided address.
1767 func (s *Stack) RemoveNeighbor(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) tcpip.Error {
1768 s.mu.RLock()
1769 nic, ok := s.nics[nicID]
1770 s.mu.RUnlock()
1771 1772 if !ok {
1773 return &tcpip.ErrUnknownNICID{}
1774 }
1775 1776 return nic.removeNeighbor(protocol, addr)
1777 }
1778 1779 // ClearNeighbors removes all IP to MAC address associations.
1780 func (s *Stack) ClearNeighbors(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber) tcpip.Error {
1781 s.mu.RLock()
1782 nic, ok := s.nics[nicID]
1783 s.mu.RUnlock()
1784 1785 if !ok {
1786 return &tcpip.ErrUnknownNICID{}
1787 }
1788 1789 return nic.clearNeighbors(protocol)
1790 }
1791 1792 // RegisterTransportEndpoint registers the given endpoint with the stack
1793 // transport dispatcher. Received packets that match the provided id will be
1794 // delivered to the given endpoint; specifying a nic is optional, but
1795 // nic-specific IDs have precedence over global ones.
1796 func (s *Stack) RegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
1797 return s.demux.registerEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
1798 }
1799 1800 // CheckRegisterTransportEndpoint checks if an endpoint can be registered with
1801 // the stack transport dispatcher.
1802 func (s *Stack) CheckRegisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, flags ports.Flags, bindToDevice tcpip.NICID) tcpip.Error {
1803 return s.demux.checkEndpoint(netProtos, protocol, id, flags, bindToDevice)
1804 }
1805 1806 // UnregisterTransportEndpoint removes the endpoint with the given id from the
1807 // stack transport dispatcher.
1808 func (s *Stack) UnregisterTransportEndpoint(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
1809 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
1810 }
1811 1812 // StartTransportEndpointCleanup removes the endpoint with the given id from
1813 // the stack transport dispatcher. It also transitions it to the cleanup stage.
1814 func (s *Stack) StartTransportEndpointCleanup(netProtos []tcpip.NetworkProtocolNumber, protocol tcpip.TransportProtocolNumber, id TransportEndpointID, ep TransportEndpoint, flags ports.Flags, bindToDevice tcpip.NICID) {
1815 s.cleanupEndpointsMu.Lock()
1816 s.cleanupEndpoints[ep] = struct{}{}
1817 s.cleanupEndpointsMu.Unlock()
1818 1819 s.demux.unregisterEndpoint(netProtos, protocol, id, ep, flags, bindToDevice)
1820 }
1821 1822 // CompleteTransportEndpointCleanup removes the endpoint from the cleanup
1823 // stage.
1824 func (s *Stack) CompleteTransportEndpointCleanup(ep TransportEndpoint) {
1825 s.cleanupEndpointsMu.Lock()
1826 delete(s.cleanupEndpoints, ep)
1827 s.cleanupEndpointsMu.Unlock()
1828 }
1829 1830 // FindTransportEndpoint finds an endpoint that most closely matches the provided
1831 // id. If no endpoint is found it returns nil.
1832 func (s *Stack) FindTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, id TransportEndpointID, nicID tcpip.NICID) TransportEndpoint {
1833 return s.demux.findTransportEndpoint(netProto, transProto, id, nicID)
1834 }
1835 1836 // RegisterRawTransportEndpoint registers the given endpoint with the stack
1837 // transport dispatcher. Received packets that match the provided transport
1838 // protocol will be delivered to the given endpoint.
1839 func (s *Stack) RegisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) tcpip.Error {
1840 return s.demux.registerRawEndpoint(netProto, transProto, ep)
1841 }
1842 1843 // UnregisterRawTransportEndpoint removes the endpoint for the transport
1844 // protocol from the stack transport dispatcher.
1845 func (s *Stack) UnregisterRawTransportEndpoint(netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber, ep RawTransportEndpoint) {
1846 s.demux.unregisterRawEndpoint(netProto, transProto, ep)
1847 }
1848 1849 // RegisterRestoredEndpoint records e as an endpoint that has been restored on
1850 // this stack.
1851 func (s *Stack) RegisterRestoredEndpoint(e RestoredEndpoint) {
1852 s.mu.Lock()
1853 defer s.mu.Unlock()
1854 1855 s.restoredEndpoints = append(s.restoredEndpoints, e)
1856 }
1857 1858 // RegisterResumableEndpoint records e as an endpoint that has to be resumed.
1859 func (s *Stack) RegisterResumableEndpoint(e ResumableEndpoint) {
1860 s.mu.Lock()
1861 defer s.mu.Unlock()
1862 1863 s.resumableEndpoints = append(s.resumableEndpoints, e)
1864 }
1865 1866 // RegisteredEndpoints returns all endpoints which are currently registered.
1867 func (s *Stack) RegisteredEndpoints() []TransportEndpoint {
1868 s.mu.Lock()
1869 defer s.mu.Unlock()
1870 1871 var es []TransportEndpoint
1872 for _, e := range s.demux.protocol {
1873 es = append(es, e.transportEndpoints()...)
1874 }
1875 return es
1876 }
1877 1878 // CleanupEndpoints returns endpoints currently in the cleanup state.
1879 func (s *Stack) CleanupEndpoints() []TransportEndpoint {
1880 s.cleanupEndpointsMu.Lock()
1881 defer s.cleanupEndpointsMu.Unlock()
1882 1883 es := make([]TransportEndpoint, 0, len(s.cleanupEndpoints))
1884 for e := range s.cleanupEndpoints {
1885 es = append(es, e)
1886 }
1887 return es
1888 }
1889 1890 // RestoreCleanupEndpoints adds endpoints to cleanup tracking. This is useful
1891 // for restoring a stack after a save.
1892 func (s *Stack) RestoreCleanupEndpoints(es []TransportEndpoint) {
1893 s.cleanupEndpointsMu.Lock()
1894 defer s.cleanupEndpointsMu.Unlock()
1895 1896 for _, e := range es {
1897 s.cleanupEndpoints[e] = struct{}{}
1898 }
1899 }
1900 1901 // Close closes all currently registered transport endpoints.
1902 //
1903 // Endpoints created or modified during this call may not get closed.
1904 func (s *Stack) Close() {
1905 for _, e := range s.RegisteredEndpoints() {
1906 e.Abort()
1907 }
1908 for _, p := range s.transportProtocols {
1909 p.proto.Close()
1910 }
1911 for _, p := range s.networkProtocols {
1912 p.Close()
1913 }
1914 }
1915 1916 // Wait waits for all transport and link endpoints to halt their worker
1917 // goroutines.
1918 //
1919 // Endpoints created or modified during this call may not get waited on.
1920 //
1921 // Note that link endpoints must be stopped via an implementation specific
1922 // mechanism.
1923 func (s *Stack) Wait() {
1924 for _, e := range s.RegisteredEndpoints() {
1925 e.Wait()
1926 }
1927 for _, e := range s.CleanupEndpoints() {
1928 e.Wait()
1929 }
1930 for _, p := range s.transportProtocols {
1931 p.proto.Wait()
1932 }
1933 for _, p := range s.networkProtocols {
1934 p.Wait()
1935 }
1936 1937 deferActs := make([]func(), 0)
1938 1939 s.mu.Lock()
1940 for id, n := range s.nics {
1941 // Remove NIC to ensure that qDisc goroutines are correctly
1942 // terminated on stack teardown.
1943 act, _ := s.removeNICLocked(id)
1944 n.NetworkLinkEndpoint.Wait()
1945 if act != nil {
1946 deferActs = append(deferActs, act)
1947 }
1948 }
1949 s.mu.Unlock()
1950 1951 for _, act := range deferActs {
1952 act()
1953 }
1954 }
1955 1956 // Destroy destroys the stack with all endpoints.
1957 func (s *Stack) Destroy() {
1958 s.Close()
1959 s.Wait()
1960 }
1961 1962 // Pause pauses any protocol level background workers.
1963 func (s *Stack) Pause() {
1964 for _, p := range s.transportProtocols {
1965 p.proto.Pause()
1966 }
1967 }
1968 1969 func (s *Stack) getNICs() map[tcpip.NICID]*nic {
1970 s.mu.RLock()
1971 defer s.mu.RUnlock()
1972 1973 nics := s.nics
1974 return nics
1975 }
1976 1977 // ReplaceConfig replaces config in the loaded stack.
1978 func (s *Stack) ReplaceConfig(st *Stack) {
1979 if st == nil {
1980 panic("stack.Stack cannot be nil when netstack s/r is enabled")
1981 }
1982 1983 // Update route table.
1984 s.SetRouteTable(st.GetRouteTable())
1985 1986 // Update NICs.
1987 nics := st.getNICs()
1988 s.mu.Lock()
1989 defer s.mu.Unlock()
1990 s.nics = make(map[tcpip.NICID]*nic)
1991 for id, nic := range nics {
1992 nic.stack = s
1993 s.nics[id] = nic
1994 _ = s.NextNICID()
1995 }
1996 s.tables = st.tables
1997 }
1998 1999 // Restore restarts the stack after a restore. This must be called after the
2000 // entire system has been restored.
2001 func (s *Stack) Restore() {
2002 // RestoredEndpoint.Restore() may call other methods on s, so we can't hold
2003 // s.mu while restoring the endpoints.
2004 s.mu.Lock()
2005 eps := s.restoredEndpoints
2006 s.restoredEndpoints = nil
2007 saveRestoreEnabled := s.saveRestoreEnabled
2008 s.mu.Unlock()
2009 for _, e := range eps {
2010 e.Restore(s)
2011 }
2012 // Now resume any protocol level background workers.
2013 for _, p := range s.transportProtocols {
2014 if saveRestoreEnabled {
2015 p.proto.Restore()
2016 } else {
2017 p.proto.Resume()
2018 }
2019 }
2020 }
2021 2022 // Resume resumes the stack after a save.
2023 func (s *Stack) Resume() {
2024 s.mu.Lock()
2025 eps := s.resumableEndpoints
2026 s.resumableEndpoints = nil
2027 s.mu.Unlock()
2028 for _, e := range eps {
2029 e.Resume()
2030 }
2031 // Now resume any protocol level background workers.
2032 for _, p := range s.transportProtocols {
2033 p.proto.Resume()
2034 }
2035 }
2036 2037 // RegisterPacketEndpoint registers ep with the stack, causing it to receive
2038 // all traffic of the specified netProto on the given NIC. If nicID is 0, it
2039 // receives traffic from every NIC.
2040 func (s *Stack) RegisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) tcpip.Error {
2041 s.mu.Lock()
2042 defer s.mu.Unlock()
2043 2044 // If no NIC is specified, capture on all devices.
2045 if nicID == 0 {
2046 // Register with each NIC.
2047 for _, nic := range s.nics {
2048 nic.registerPacketEndpoint(netProto, ep)
2049 }
2050 return nil
2051 }
2052 2053 // Capture on a specific device.
2054 nic, ok := s.nics[nicID]
2055 if !ok {
2056 return &tcpip.ErrUnknownNICID{}
2057 }
2058 nic.registerPacketEndpoint(netProto, ep)
2059 2060 return nil
2061 }
2062 2063 // UnregisterPacketEndpoint unregisters ep for packets of the specified
2064 // netProto from the specified NIC. If nicID is 0, ep is unregistered from all
2065 // NICs.
2066 func (s *Stack) UnregisterPacketEndpoint(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
2067 s.mu.Lock()
2068 defer s.mu.Unlock()
2069 s.unregisterPacketEndpointLocked(nicID, netProto, ep)
2070 }
2071 2072 // +checklocks:s.mu
2073 func (s *Stack) unregisterPacketEndpointLocked(nicID tcpip.NICID, netProto tcpip.NetworkProtocolNumber, ep PacketEndpoint) {
2074 // If no NIC is specified, unregister on all devices.
2075 if nicID == 0 {
2076 // Unregister with each NIC.
2077 for _, nic := range s.nics {
2078 nic.unregisterPacketEndpoint(netProto, ep)
2079 }
2080 return
2081 }
2082 2083 // Unregister in a single device.
2084 nic, ok := s.nics[nicID]
2085 if !ok {
2086 return
2087 }
2088 nic.unregisterPacketEndpoint(netProto, ep)
2089 }
2090 2091 // WritePacketToRemote writes a payload on the specified NIC using the provided
2092 // network protocol and remote link address.
2093 func (s *Stack) WritePacketToRemote(nicID tcpip.NICID, remote tcpip.LinkAddress, netProto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error {
2094 s.mu.Lock()
2095 nic, ok := s.nics[nicID]
2096 s.mu.Unlock()
2097 if !ok {
2098 return &tcpip.ErrUnknownDevice{}
2099 }
2100 pkt := NewPacketBuffer(PacketBufferOptions{
2101 ReserveHeaderBytes: int(nic.MaxHeaderLength()),
2102 Payload: payload,
2103 })
2104 defer pkt.DecRef()
2105 pkt.NetworkProtocolNumber = netProto
2106 return nic.WritePacketToRemote(remote, pkt)
2107 }
2108 2109 // WriteRawPacket writes data directly to the specified NIC without adding any
2110 // headers.
2111 func (s *Stack) WriteRawPacket(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber, payload buffer.Buffer) tcpip.Error {
2112 s.mu.RLock()
2113 nic, ok := s.nics[nicID]
2114 s.mu.RUnlock()
2115 if !ok {
2116 return &tcpip.ErrUnknownNICID{}
2117 }
2118 2119 pkt := NewPacketBuffer(PacketBufferOptions{
2120 Payload: payload,
2121 })
2122 defer pkt.DecRef()
2123 pkt.NetworkProtocolNumber = proto
2124 return nic.writeRawPacketWithLinkHeaderInPayload(pkt)
2125 }
2126 2127 // NetworkProtocolInstance returns the protocol instance in the stack for the
2128 // specified network protocol. This method is public for protocol implementers
2129 // and tests to use.
2130 func (s *Stack) NetworkProtocolInstance(num tcpip.NetworkProtocolNumber) NetworkProtocol {
2131 if p, ok := s.networkProtocols[num]; ok {
2132 return p
2133 }
2134 return nil
2135 }
2136 2137 // TransportProtocolInstance returns the protocol instance in the stack for the
2138 // specified transport protocol. This method is public for protocol implementers
2139 // and tests to use.
2140 func (s *Stack) TransportProtocolInstance(num tcpip.TransportProtocolNumber) TransportProtocol {
2141 if pState, ok := s.transportProtocols[num]; ok {
2142 return pState.proto
2143 }
2144 return nil
2145 }
2146 2147 // JoinGroup joins the given multicast group on the given NIC.
2148 func (s *Stack) JoinGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
2149 s.mu.RLock()
2150 defer s.mu.RUnlock()
2151 2152 if nic, ok := s.nics[nicID]; ok {
2153 return nic.joinGroup(protocol, multicastAddr)
2154 }
2155 return &tcpip.ErrUnknownNICID{}
2156 }
2157 2158 // LeaveGroup leaves the given multicast group on the given NIC.
2159 func (s *Stack) LeaveGroup(protocol tcpip.NetworkProtocolNumber, nicID tcpip.NICID, multicastAddr tcpip.Address) tcpip.Error {
2160 s.mu.RLock()
2161 defer s.mu.RUnlock()
2162 2163 if nic, ok := s.nics[nicID]; ok {
2164 return nic.leaveGroup(protocol, multicastAddr)
2165 }
2166 return &tcpip.ErrUnknownNICID{}
2167 }
2168 2169 // IsInGroup returns true if the NIC with ID nicID has joined the multicast
2170 // group multicastAddr.
2171 func (s *Stack) IsInGroup(nicID tcpip.NICID, multicastAddr tcpip.Address) (bool, tcpip.Error) {
2172 s.mu.RLock()
2173 defer s.mu.RUnlock()
2174 2175 if nic, ok := s.nics[nicID]; ok {
2176 return nic.isInGroup(multicastAddr), nil
2177 }
2178 return false, &tcpip.ErrUnknownNICID{}
2179 }
2180 2181 // IPTables returns the stack's iptables.
2182 func (s *Stack) IPTables() *IPTables {
2183 return s.tables
2184 }
2185 2186 // ICMPLimit returns the maximum number of ICMP messages that can be sent
2187 // in one second.
2188 func (s *Stack) ICMPLimit() rate.Limit {
2189 return s.icmpRateLimiter.Limit()
2190 }
2191 2192 // SetICMPLimit sets the maximum number of ICMP messages that be sent
2193 // in one second.
2194 func (s *Stack) SetICMPLimit(newLimit rate.Limit) {
2195 s.icmpRateLimiter.SetLimit(newLimit)
2196 }
2197 2198 // ICMPBurst returns the maximum number of ICMP messages that can be sent
2199 // in a single burst.
2200 func (s *Stack) ICMPBurst() int {
2201 return s.icmpRateLimiter.Burst()
2202 }
2203 2204 // SetICMPBurst sets the maximum number of ICMP messages that can be sent
2205 // in a single burst.
2206 func (s *Stack) SetICMPBurst(burst int) {
2207 s.icmpRateLimiter.SetBurst(burst)
2208 }
2209 2210 // AllowICMPMessage returns true if we the rate limiter allows at least one
2211 // ICMP message to be sent at this instant.
2212 func (s *Stack) AllowICMPMessage() bool {
2213 return s.icmpRateLimiter.Allow()
2214 }
2215 2216 // GetNetworkEndpoint returns the NetworkEndpoint with the specified protocol
2217 // number installed on the specified NIC.
2218 func (s *Stack) GetNetworkEndpoint(nicID tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NetworkEndpoint, tcpip.Error) {
2219 s.mu.Lock()
2220 defer s.mu.Unlock()
2221 2222 nic, ok := s.nics[nicID]
2223 if !ok {
2224 return nil, &tcpip.ErrUnknownNICID{}
2225 }
2226 2227 return nic.getNetworkEndpoint(proto), nil
2228 }
2229 2230 // NUDConfigurations gets the per-interface NUD configurations.
2231 func (s *Stack) NUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber) (NUDConfigurations, tcpip.Error) {
2232 s.mu.RLock()
2233 nic, ok := s.nics[id]
2234 s.mu.RUnlock()
2235 2236 if !ok {
2237 return NUDConfigurations{}, &tcpip.ErrUnknownNICID{}
2238 }
2239 2240 return nic.nudConfigs(proto)
2241 }
2242 2243 // SetNUDConfigurations sets the per-interface NUD configurations.
2244 //
2245 // Note, if c contains invalid NUD configuration values, it will be fixed to
2246 // use default values for the erroneous values.
2247 func (s *Stack) SetNUDConfigurations(id tcpip.NICID, proto tcpip.NetworkProtocolNumber, c NUDConfigurations) tcpip.Error {
2248 s.mu.RLock()
2249 nic, ok := s.nics[id]
2250 s.mu.RUnlock()
2251 2252 if !ok {
2253 return &tcpip.ErrUnknownNICID{}
2254 }
2255 2256 return nic.setNUDConfigs(proto, c)
2257 }
2258 2259 // Seed returns a 32 bit value that can be used as a seed value.
2260 //
2261 // NOTE: The seed is generated once during stack initialization only.
2262 func (s *Stack) Seed() uint32 {
2263 return s.seed
2264 }
2265 2266 // InsecureRNG returns a reference to a pseudo random generator that can be used
2267 // to generate random numbers as required. It is not cryptographically secure
2268 // and should not be used for security sensitive work.
2269 func (s *Stack) InsecureRNG() *rand.Rand {
2270 return s.insecureRNG
2271 }
2272 2273 // SecureRNG returns the stack's cryptographically secure random number
2274 // generator.
2275 func (s *Stack) SecureRNG() cryptorand.RNG {
2276 return s.secureRNG
2277 }
2278 2279 // FindNICNameFromID returns the name of the NIC for the given NICID.
2280 func (s *Stack) FindNICNameFromID(id tcpip.NICID) string {
2281 s.mu.RLock()
2282 defer s.mu.RUnlock()
2283 2284 nic, ok := s.nics[id]
2285 if !ok {
2286 return ""
2287 }
2288 2289 return nic.Name()
2290 }
2291 2292 // ParseResult indicates the result of a parsing attempt.
2293 type ParseResult int
2294 2295 const (
2296 // ParsedOK indicates that a packet was successfully parsed.
2297 ParsedOK ParseResult = iota
2298 2299 // UnknownTransportProtocol indicates that the transport protocol is unknown.
2300 UnknownTransportProtocol
2301 2302 // TransportLayerParseError indicates that the transport packet was not
2303 // successfully parsed.
2304 TransportLayerParseError
2305 )
2306 2307 // ParsePacketBufferTransport parses the provided packet buffer's transport
2308 // header.
2309 func (s *Stack) ParsePacketBufferTransport(protocol tcpip.TransportProtocolNumber, pkt *PacketBuffer) ParseResult {
2310 pkt.TransportProtocolNumber = protocol
2311 // Parse the transport header if present.
2312 state, ok := s.transportProtocols[protocol]
2313 if !ok {
2314 return UnknownTransportProtocol
2315 }
2316 2317 if !state.proto.Parse(pkt) {
2318 return TransportLayerParseError
2319 }
2320 2321 return ParsedOK
2322 }
2323 2324 // networkProtocolNumbers returns the network protocol numbers the stack is
2325 // configured with.
2326 func (s *Stack) networkProtocolNumbers() []tcpip.NetworkProtocolNumber {
2327 protos := make([]tcpip.NetworkProtocolNumber, 0, len(s.networkProtocols))
2328 for p := range s.networkProtocols {
2329 protos = append(protos, p)
2330 }
2331 return protos
2332 }
2333 2334 func isSubnetBroadcastOnNIC(nic *nic, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
2335 addressEndpoint := nic.getAddressOrCreateTempInner(protocol, addr, false /* createTemp */, NeverPrimaryEndpoint)
2336 if addressEndpoint == nil {
2337 return false
2338 }
2339 2340 subnet := addressEndpoint.Subnet()
2341 addressEndpoint.DecRef()
2342 return subnet.IsBroadcast(addr)
2343 }
2344 2345 // IsSubnetBroadcast returns true if the provided address is a subnet-local
2346 // broadcast address on the specified NIC and protocol.
2347 //
2348 // Returns false if the NIC is unknown or if the protocol is unknown or does
2349 // not support addressing.
2350 //
2351 // If the NIC is not specified, the stack will check all NICs.
2352 func (s *Stack) IsSubnetBroadcast(nicID tcpip.NICID, protocol tcpip.NetworkProtocolNumber, addr tcpip.Address) bool {
2353 s.mu.RLock()
2354 defer s.mu.RUnlock()
2355 2356 if nicID != 0 {
2357 nic, ok := s.nics[nicID]
2358 if !ok {
2359 return false
2360 }
2361 2362 return isSubnetBroadcastOnNIC(nic, protocol, addr)
2363 }
2364 2365 for _, nic := range s.nics {
2366 if isSubnetBroadcastOnNIC(nic, protocol, addr) {
2367 return true
2368 }
2369 }
2370 2371 return false
2372 }
2373 2374 // PacketEndpointWriteSupported returns true iff packet endpoints support write
2375 // operations.
2376 func (s *Stack) PacketEndpointWriteSupported() bool {
2377 return s.packetEndpointWriteSupported
2378 }
2379 2380 // SetNICStack moves the network device to the specified network namespace.
2381 func (s *Stack) SetNICStack(id tcpip.NICID, peer *Stack) (tcpip.NICID, tcpip.Error) {
2382 s.mu.Lock()
2383 nic, ok := s.nics[id]
2384 if !ok {
2385 s.mu.Unlock()
2386 return 0, &tcpip.ErrUnknownNICID{}
2387 }
2388 if s == peer {
2389 s.mu.Unlock()
2390 return id, nil
2391 }
2392 delete(s.nics, id)
2393 2394 // Remove routes in-place. n tracks the number of routes written.
2395 s.RemoveRoutes(func(r tcpip.Route) bool { return r.NIC == id })
2396 ne := nic.NetworkLinkEndpoint.(LinkEndpoint)
2397 deferAct, err := nic.remove(false /* closeLinkEndpoint */)
2398 s.mu.Unlock()
2399 if deferAct != nil {
2400 deferAct()
2401 }
2402 if err != nil {
2403 return 0, err
2404 }
2405 2406 id = tcpip.NICID(peer.NextNICID())
2407 return id, peer.CreateNICWithOptions(id, ne, NICOptions{Name: nic.Name()})
2408 }
2409 2410 // EnableSaveRestore marks the saveRestoreEnabled to true.
2411 func (s *Stack) EnableSaveRestore() {
2412 s.mu.Lock()
2413 defer s.mu.Unlock()
2414 2415 s.saveRestoreEnabled = true
2416 }
2417 2418 // IsSaveRestoreEnabled returns true if save restore is enabled for the stack.
2419 func (s *Stack) IsSaveRestoreEnabled() bool {
2420 s.mu.Lock()
2421 defer s.mu.Unlock()
2422 2423 return s.saveRestoreEnabled
2424 }
2425 2426 // contextID is this package's type for context.Context.Value keys.
2427 type contextID int
2428 2429 const (
2430 // CtxRestoreStack is a Context.Value key for the stack to be used in restore.
2431 CtxRestoreStack contextID = iota
2432 )
2433 2434 // RestoreStackFromContext returns the stack to be used during restore.
2435 func RestoreStackFromContext(ctx context.Context) *Stack {
2436 return ctx.Value(CtxRestoreStack).(*Stack)
2437 }
2438