tcp.go raw

   1  // Copyright 2018 The gVisor Authors.
   2  //
   3  // Licensed under the Apache License, Version 2.0 (the "License");
   4  // you may not use this file except in compliance with the License.
   5  // You may obtain a copy of the License at
   6  //
   7  //     http://www.apache.org/licenses/LICENSE-2.0
   8  //
   9  // Unless required by applicable law or agreed to in writing, software
  10  // distributed under the License is distributed on an "AS IS" BASIS,
  11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  // See the License for the specific language governing permissions and
  13  // limitations under the License.
  14  
  15  package header
  16  
  17  import (
  18  	"encoding/binary"
  19  
  20  	"github.com/google/btree"
  21  	"gvisor.dev/gvisor/pkg/tcpip"
  22  	"gvisor.dev/gvisor/pkg/tcpip/checksum"
  23  	"gvisor.dev/gvisor/pkg/tcpip/seqnum"
  24  )
  25  
  26  // These constants are the offsets of the respective fields in the TCP header.
  27  const (
  28  	TCPSrcPortOffset   = 0
  29  	TCPDstPortOffset   = 2
  30  	TCPSeqNumOffset    = 4
  31  	TCPAckNumOffset    = 8
  32  	TCPDataOffset      = 12
  33  	TCPFlagsOffset     = 13
  34  	TCPWinSizeOffset   = 14
  35  	TCPChecksumOffset  = 16
  36  	TCPUrgentPtrOffset = 18
  37  )
  38  
  39  const (
  40  	// MaxWndScale is maximum allowed window scaling, as described in
  41  	// RFC 1323, section 2.3, page 11.
  42  	MaxWndScale = 14
  43  
  44  	// TCPMaxSACKBlocks is the maximum number of SACK blocks that can
  45  	// be encoded in a TCP option field.
  46  	TCPMaxSACKBlocks = 4
  47  )
  48  
  49  // TCPFlags is the dedicated type for TCP flags.
  50  type TCPFlags uint8
  51  
  52  // Intersects returns true iff there are flags common to both f and o.
  53  func (f TCPFlags) Intersects(o TCPFlags) bool {
  54  	return f&o != 0
  55  }
  56  
  57  // Contains returns true iff all the flags in o are contained within f.
  58  func (f TCPFlags) Contains(o TCPFlags) bool {
  59  	return f&o == o
  60  }
  61  
  62  // String implements Stringer.String.
  63  func (f TCPFlags) String() string {
  64  	flagsStr := []byte("FSRPAUEC")
  65  	for i := range flagsStr {
  66  		if f&(1<<uint(i)) == 0 {
  67  			flagsStr[i] = ' '
  68  		}
  69  	}
  70  	return string(flagsStr)
  71  }
  72  
  73  // Flags that may be set in a TCP segment.
  74  const (
  75  	TCPFlagFin TCPFlags = 1 << iota
  76  	TCPFlagSyn
  77  	TCPFlagRst
  78  	TCPFlagPsh
  79  	TCPFlagAck
  80  	TCPFlagUrg
  81  	TCPFlagEce
  82  	TCPFlagCwr
  83  )
  84  
  85  // Options that may be present in a TCP segment.
  86  const (
  87  	TCPOptionEOL           = 0
  88  	TCPOptionNOP           = 1
  89  	TCPOptionMSS           = 2
  90  	TCPOptionWS            = 3
  91  	TCPOptionTS            = 8
  92  	TCPOptionSACKPermitted = 4
  93  	TCPOptionSACK          = 5
  94  )
  95  
  96  // Option Lengths.
  97  const (
  98  	TCPOptionMSSLength           = 4
  99  	TCPOptionTSLength            = 10
 100  	TCPOptionWSLength            = 3
 101  	TCPOptionSackPermittedLength = 2
 102  )
 103  
 104  // TCPFields contains the fields of a TCP packet. It is used to describe the
 105  // fields of a packet that needs to be encoded.
 106  type TCPFields struct {
 107  	// SrcPort is the "source port" field of a TCP packet.
 108  	SrcPort uint16
 109  
 110  	// DstPort is the "destination port" field of a TCP packet.
 111  	DstPort uint16
 112  
 113  	// SeqNum is the "sequence number" field of a TCP packet.
 114  	SeqNum uint32
 115  
 116  	// AckNum is the "acknowledgement number" field of a TCP packet.
 117  	AckNum uint32
 118  
 119  	// DataOffset is the "data offset" field of a TCP packet. It is the length of
 120  	// the TCP header in bytes.
 121  	DataOffset uint8
 122  
 123  	// Flags is the "flags" field of a TCP packet.
 124  	Flags TCPFlags
 125  
 126  	// WindowSize is the "window size" field of a TCP packet.
 127  	WindowSize uint16
 128  
 129  	// Checksum is the "checksum" field of a TCP packet.
 130  	Checksum uint16
 131  
 132  	// UrgentPointer is the "urgent pointer" field of a TCP packet.
 133  	UrgentPointer uint16
 134  }
 135  
 136  // TCPSynOptions is used to return the parsed TCP Options in a syn
 137  // segment.
 138  //
 139  // +stateify savable
 140  type TCPSynOptions struct {
 141  	// MSS is the maximum segment size provided by the peer in the SYN.
 142  	MSS uint16
 143  
 144  	// WS is the window scale option provided by the peer in the SYN.
 145  	//
 146  	// Set to -1 if no window scale option was provided.
 147  	WS int
 148  
 149  	// TS is true if the timestamp option was provided in the syn/syn-ack.
 150  	TS bool
 151  
 152  	// TSVal is the value of the TSVal field in the timestamp option.
 153  	TSVal uint32
 154  
 155  	// TSEcr is the value of the TSEcr field in the timestamp option.
 156  	TSEcr uint32
 157  
 158  	// SACKPermitted is true if the SACK option was provided in the SYN/SYN-ACK.
 159  	SACKPermitted bool
 160  
 161  	// Flags if specified are set on the outgoing SYN. The SYN flag is
 162  	// always set.
 163  	Flags TCPFlags
 164  }
 165  
 166  // SACKBlock represents a single contiguous SACK block.
 167  //
 168  // +stateify savable
 169  type SACKBlock struct {
 170  	// Start indicates the lowest sequence number in the block.
 171  	Start seqnum.Value
 172  
 173  	// End indicates the sequence number immediately following the last
 174  	// sequence number of this block.
 175  	End seqnum.Value
 176  }
 177  
 178  // Less returns true if r.Start < b.Start.
 179  func (r SACKBlock) Less(b btree.Item) bool {
 180  	return r.Start.LessThan(b.(SACKBlock).Start)
 181  }
 182  
 183  // Contains returns true if b is completely contained in r.
 184  func (r SACKBlock) Contains(b SACKBlock) bool {
 185  	return r.Start.LessThanEq(b.Start) && b.End.LessThanEq(r.End)
 186  }
 187  
 188  // TCPOptions are used to parse and cache the TCP segment options for a non
 189  // syn/syn-ack segment.
 190  //
 191  // +stateify savable
 192  type TCPOptions struct {
 193  	// TS is true if the TimeStamp option is enabled.
 194  	TS bool
 195  
 196  	// TSVal is the value in the TSVal field of the segment.
 197  	TSVal uint32
 198  
 199  	// TSEcr is the value in the TSEcr field of the segment.
 200  	TSEcr uint32
 201  
 202  	// SACKBlocks are the SACK blocks specified in the segment.
 203  	SACKBlocks []SACKBlock
 204  }
 205  
 206  // TCP represents a TCP header stored in a byte array.
 207  type TCP []byte
 208  
 209  const (
 210  	// TCPMinimumSize is the minimum size of a valid TCP packet.
 211  	TCPMinimumSize = 20
 212  
 213  	// TCPOptionsMaximumSize is the maximum size of TCP options.
 214  	TCPOptionsMaximumSize = 40
 215  
 216  	// TCPHeaderMaximumSize is the maximum header size of a TCP packet.
 217  	TCPHeaderMaximumSize = TCPMinimumSize + TCPOptionsMaximumSize
 218  
 219  	// TCPTotalHeaderMaximumSize is the maximum size of headers from all layers in
 220  	// a TCP packet. It analogous to MAX_TCP_HEADER in Linux.
 221  	//
 222  	// TODO(b/319936470): Investigate why this needs to be at least 140 bytes. In
 223  	// Linux this value is at least 160, but in theory we should be able to use
 224  	// 138. In practice anything less than 140 starts to break GSO on gVNIC
 225  	// hardware.
 226  	TCPTotalHeaderMaximumSize = 160
 227  
 228  	// TCPProtocolNumber is TCP's transport protocol number.
 229  	TCPProtocolNumber tcpip.TransportProtocolNumber = 6
 230  
 231  	// TCPMinimumMSS is the minimum acceptable value for MSS. This is the
 232  	// same as the value TCP_MIN_MSS defined net/tcp.h.
 233  	TCPMinimumMSS = IPv4MaximumHeaderSize + TCPHeaderMaximumSize + MinIPFragmentPayloadSize - IPv4MinimumSize - TCPMinimumSize
 234  
 235  	// TCPMinimumSendMSS is the minimum value for MSS in a sender. This is the
 236  	// same as the value TCP_MIN_SND_MSS in net/tcp.h.
 237  	TCPMinimumSendMSS = TCPOptionsMaximumSize + MinIPFragmentPayloadSize
 238  
 239  	// TCPMaximumMSS is the maximum acceptable value for MSS.
 240  	TCPMaximumMSS = 0xffff
 241  
 242  	// TCPDefaultMSS is the MSS value that should be used if an MSS option
 243  	// is not received from the peer. It's also the value returned by
 244  	// TCP_MAXSEG option for a socket in an unconnected state.
 245  	//
 246  	// Per RFC 1122, page 85: "If an MSS option is not received at
 247  	// connection setup, TCP MUST assume a default send MSS of 536."
 248  	TCPDefaultMSS = 536
 249  )
 250  
 251  // SourcePort returns the "source port" field of the TCP header.
 252  func (b TCP) SourcePort() uint16 {
 253  	return binary.BigEndian.Uint16(b[TCPSrcPortOffset:])
 254  }
 255  
 256  // DestinationPort returns the "destination port" field of the TCP header.
 257  func (b TCP) DestinationPort() uint16 {
 258  	return binary.BigEndian.Uint16(b[TCPDstPortOffset:])
 259  }
 260  
 261  // SequenceNumber returns the "sequence number" field of the TCP header.
 262  func (b TCP) SequenceNumber() uint32 {
 263  	return binary.BigEndian.Uint32(b[TCPSeqNumOffset:])
 264  }
 265  
 266  // AckNumber returns the "ack number" field of the TCP header.
 267  func (b TCP) AckNumber() uint32 {
 268  	return binary.BigEndian.Uint32(b[TCPAckNumOffset:])
 269  }
 270  
 271  // DataOffset returns the "data offset" field of the TCP header. The return
 272  // value is the length of the TCP header in bytes.
 273  func (b TCP) DataOffset() uint8 {
 274  	return (b[TCPDataOffset] >> 4) * 4
 275  }
 276  
 277  // Payload returns the data in the TCP packet.
 278  func (b TCP) Payload() []byte {
 279  	return b[b.DataOffset():]
 280  }
 281  
 282  // Flags returns the flags field of the TCP header.
 283  func (b TCP) Flags() TCPFlags {
 284  	return TCPFlags(b[TCPFlagsOffset])
 285  }
 286  
 287  // WindowSize returns the "window size" field of the TCP header.
 288  func (b TCP) WindowSize() uint16 {
 289  	return binary.BigEndian.Uint16(b[TCPWinSizeOffset:])
 290  }
 291  
 292  // Checksum returns the "checksum" field of the TCP header.
 293  func (b TCP) Checksum() uint16 {
 294  	return binary.BigEndian.Uint16(b[TCPChecksumOffset:])
 295  }
 296  
 297  // UrgentPointer returns the "urgent pointer" field of the TCP header.
 298  func (b TCP) UrgentPointer() uint16 {
 299  	return binary.BigEndian.Uint16(b[TCPUrgentPtrOffset:])
 300  }
 301  
 302  // SetSourcePort sets the "source port" field of the TCP header.
 303  func (b TCP) SetSourcePort(port uint16) {
 304  	binary.BigEndian.PutUint16(b[TCPSrcPortOffset:], port)
 305  }
 306  
 307  // SetDestinationPort sets the "destination port" field of the TCP header.
 308  func (b TCP) SetDestinationPort(port uint16) {
 309  	binary.BigEndian.PutUint16(b[TCPDstPortOffset:], port)
 310  }
 311  
 312  // SetChecksum sets the checksum field of the TCP header.
 313  func (b TCP) SetChecksum(xsum uint16) {
 314  	checksum.Put(b[TCPChecksumOffset:], xsum)
 315  }
 316  
 317  // SetDataOffset sets the data offset field of the TCP header. headerLen should
 318  // be the length of the TCP header in bytes.
 319  func (b TCP) SetDataOffset(headerLen uint8) {
 320  	b[TCPDataOffset] = (headerLen / 4) << 4
 321  }
 322  
 323  // SetSequenceNumber sets the sequence number field of the TCP header.
 324  func (b TCP) SetSequenceNumber(seqNum uint32) {
 325  	binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seqNum)
 326  }
 327  
 328  // SetAckNumber sets the ack number field of the TCP header.
 329  func (b TCP) SetAckNumber(ackNum uint32) {
 330  	binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ackNum)
 331  }
 332  
 333  // SetFlags sets the flags field of the TCP header.
 334  func (b TCP) SetFlags(flags uint8) {
 335  	b[TCPFlagsOffset] = flags
 336  }
 337  
 338  // SetWindowSize sets the window size field of the TCP header.
 339  func (b TCP) SetWindowSize(rcvwnd uint16) {
 340  	binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd)
 341  }
 342  
 343  // SetUrgentPointer sets the window size field of the TCP header.
 344  func (b TCP) SetUrgentPointer(urgentPointer uint16) {
 345  	binary.BigEndian.PutUint16(b[TCPUrgentPtrOffset:], urgentPointer)
 346  }
 347  
 348  // CalculateChecksum calculates the checksum of the TCP segment.
 349  // partialChecksum is the checksum of the network-layer pseudo-header
 350  // and the checksum of the segment data.
 351  func (b TCP) CalculateChecksum(partialChecksum uint16) uint16 {
 352  	// Calculate the rest of the checksum.
 353  	return checksum.Checksum(b[:b.DataOffset()], partialChecksum)
 354  }
 355  
 356  // IsChecksumValid returns true iff the TCP header's checksum is valid.
 357  func (b TCP) IsChecksumValid(src, dst tcpip.Address, payloadChecksum, payloadLength uint16) bool {
 358  	xsum := PseudoHeaderChecksum(TCPProtocolNumber, src, dst, uint16(b.DataOffset())+payloadLength)
 359  	xsum = checksum.Combine(xsum, payloadChecksum)
 360  	return b.CalculateChecksum(xsum) == 0xffff
 361  }
 362  
 363  // Options returns a slice that holds the unparsed TCP options in the segment.
 364  func (b TCP) Options() []byte {
 365  	return b[TCPMinimumSize:b.DataOffset()]
 366  }
 367  
 368  // ParsedOptions returns a TCPOptions structure which parses and caches the TCP
 369  // option values in the TCP segment. NOTE: Invoking this function repeatedly is
 370  // expensive as it reparses the options on each invocation.
 371  func (b TCP) ParsedOptions() TCPOptions {
 372  	return ParseTCPOptions(b.Options())
 373  }
 374  
 375  func (b TCP) encodeSubset(seq, ack uint32, flags TCPFlags, rcvwnd uint16) {
 376  	binary.BigEndian.PutUint32(b[TCPSeqNumOffset:], seq)
 377  	binary.BigEndian.PutUint32(b[TCPAckNumOffset:], ack)
 378  	b[TCPFlagsOffset] = uint8(flags)
 379  	binary.BigEndian.PutUint16(b[TCPWinSizeOffset:], rcvwnd)
 380  }
 381  
 382  // Encode encodes all the fields of the TCP header.
 383  func (b TCP) Encode(t *TCPFields) {
 384  	b.encodeSubset(t.SeqNum, t.AckNum, t.Flags, t.WindowSize)
 385  	b.SetSourcePort(t.SrcPort)
 386  	b.SetDestinationPort(t.DstPort)
 387  	b.SetDataOffset(t.DataOffset)
 388  	b.SetChecksum(t.Checksum)
 389  	b.SetUrgentPointer(t.UrgentPointer)
 390  }
 391  
 392  // EncodePartial updates a subset of the fields of the TCP header. It is useful
 393  // in cases when similar segments are produced.
 394  func (b TCP) EncodePartial(partialChecksum, length uint16, seqnum, acknum uint32, flags TCPFlags, rcvwnd uint16) {
 395  	// Add the total length and "flags" field contributions to the checksum.
 396  	// We don't use the flags field directly from the header because it's a
 397  	// one-byte field with an odd offset, so it would be accounted for
 398  	// incorrectly by the Checksum routine.
 399  	tmp := make([]byte, 4)
 400  	binary.BigEndian.PutUint16(tmp, length)
 401  	binary.BigEndian.PutUint16(tmp[2:], uint16(flags))
 402  	xsum := checksum.Checksum(tmp, partialChecksum)
 403  
 404  	// Encode the passed-in fields.
 405  	b.encodeSubset(seqnum, acknum, flags, rcvwnd)
 406  
 407  	// Add the contributions of the passed-in fields to the checksum.
 408  	xsum = checksum.Checksum(b[TCPSeqNumOffset:TCPSeqNumOffset+8], xsum)
 409  	xsum = checksum.Checksum(b[TCPWinSizeOffset:TCPWinSizeOffset+2], xsum)
 410  
 411  	// Encode the checksum.
 412  	b.SetChecksum(^xsum)
 413  }
 414  
 415  // SetSourcePortWithChecksumUpdate implements ChecksummableTransport.
 416  func (b TCP) SetSourcePortWithChecksumUpdate(new uint16) {
 417  	old := b.SourcePort()
 418  	b.SetSourcePort(new)
 419  	b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new))
 420  }
 421  
 422  // SetDestinationPortWithChecksumUpdate implements ChecksummableTransport.
 423  func (b TCP) SetDestinationPortWithChecksumUpdate(new uint16) {
 424  	old := b.DestinationPort()
 425  	b.SetDestinationPort(new)
 426  	b.SetChecksum(^checksumUpdate2ByteAlignedUint16(^b.Checksum(), old, new))
 427  }
 428  
 429  // UpdateChecksumPseudoHeaderAddress implements ChecksummableTransport.
 430  func (b TCP) UpdateChecksumPseudoHeaderAddress(old, new tcpip.Address, fullChecksum bool) {
 431  	xsum := b.Checksum()
 432  	if fullChecksum {
 433  		xsum = ^xsum
 434  	}
 435  
 436  	xsum = checksumUpdate2ByteAlignedAddress(xsum, old, new)
 437  	if fullChecksum {
 438  		xsum = ^xsum
 439  	}
 440  
 441  	b.SetChecksum(xsum)
 442  }
 443  
 444  // ParseSynOptions parses the options received in a SYN segment and returns the
 445  // relevant ones. opts should point to the option part of the TCP header.
 446  func ParseSynOptions(opts []byte, isAck bool) TCPSynOptions {
 447  	limit := len(opts)
 448  
 449  	synOpts := TCPSynOptions{
 450  		// Per RFC 1122, page 85: "If an MSS option is not received at
 451  		// connection setup, TCP MUST assume a default send MSS of 536."
 452  		MSS: TCPDefaultMSS,
 453  		// If no window scale option is specified, WS in options is
 454  		// returned as -1; this is because the absence of the option
 455  		// indicates that the we cannot use window scaling on the
 456  		// receive end either.
 457  		WS: -1,
 458  	}
 459  
 460  	for i := 0; i < limit; {
 461  		switch opts[i] {
 462  		case TCPOptionEOL:
 463  			i = limit
 464  		case TCPOptionNOP:
 465  			i++
 466  		case TCPOptionMSS:
 467  			if i+4 > limit || opts[i+1] != 4 {
 468  				return synOpts
 469  			}
 470  			mss := uint16(opts[i+2])<<8 | uint16(opts[i+3])
 471  			if mss == 0 {
 472  				return synOpts
 473  			}
 474  			synOpts.MSS = mss
 475  			if mss < TCPMinimumSendMSS {
 476  				synOpts.MSS = TCPMinimumSendMSS
 477  			}
 478  			i += 4
 479  
 480  		case TCPOptionWS:
 481  			if i+3 > limit || opts[i+1] != 3 {
 482  				return synOpts
 483  			}
 484  			ws := int(opts[i+2])
 485  			if ws > MaxWndScale {
 486  				ws = MaxWndScale
 487  			}
 488  			synOpts.WS = ws
 489  			i += 3
 490  
 491  		case TCPOptionTS:
 492  			if i+10 > limit || opts[i+1] != 10 {
 493  				return synOpts
 494  			}
 495  			synOpts.TSVal = binary.BigEndian.Uint32(opts[i+2:])
 496  			if isAck {
 497  				// If the segment is a SYN-ACK then store the Timestamp Echo Reply
 498  				// in the segment.
 499  				synOpts.TSEcr = binary.BigEndian.Uint32(opts[i+6:])
 500  			}
 501  			synOpts.TS = true
 502  			i += 10
 503  		case TCPOptionSACKPermitted:
 504  			if i+2 > limit || opts[i+1] != 2 {
 505  				return synOpts
 506  			}
 507  			synOpts.SACKPermitted = true
 508  			i += 2
 509  
 510  		default:
 511  			// We don't recognize this option, just skip over it.
 512  			if i+2 > limit {
 513  				return synOpts
 514  			}
 515  			l := int(opts[i+1])
 516  			// If the length is incorrect or if l+i overflows the
 517  			// total options length then return false.
 518  			if l < 2 || i+l > limit {
 519  				return synOpts
 520  			}
 521  			i += l
 522  		}
 523  	}
 524  
 525  	return synOpts
 526  }
 527  
 528  // ParseTCPOptions extracts and stores all known options in the provided byte
 529  // slice in a TCPOptions structure.
 530  func ParseTCPOptions(b []byte) TCPOptions {
 531  	opts := TCPOptions{}
 532  	limit := len(b)
 533  	for i := 0; i < limit; {
 534  		switch b[i] {
 535  		case TCPOptionEOL:
 536  			i = limit
 537  		case TCPOptionNOP:
 538  			i++
 539  		case TCPOptionTS:
 540  			if i+10 > limit || (b[i+1] != 10) {
 541  				return opts
 542  			}
 543  			opts.TS = true
 544  			opts.TSVal = binary.BigEndian.Uint32(b[i+2:])
 545  			opts.TSEcr = binary.BigEndian.Uint32(b[i+6:])
 546  			i += 10
 547  		case TCPOptionSACK:
 548  			if i+2 > limit {
 549  				// Malformed SACK block, just return and stop parsing.
 550  				return opts
 551  			}
 552  			sackOptionLen := int(b[i+1])
 553  			if i+sackOptionLen > limit || (sackOptionLen-2)%8 != 0 {
 554  				// Malformed SACK block, just return and stop parsing.
 555  				return opts
 556  			}
 557  			numBlocks := (sackOptionLen - 2) / 8
 558  			opts.SACKBlocks = []SACKBlock{}
 559  			for j := 0; j < numBlocks; j++ {
 560  				start := binary.BigEndian.Uint32(b[i+2+j*8:])
 561  				end := binary.BigEndian.Uint32(b[i+2+j*8+4:])
 562  				opts.SACKBlocks = append(opts.SACKBlocks, SACKBlock{
 563  					Start: seqnum.Value(start),
 564  					End:   seqnum.Value(end),
 565  				})
 566  			}
 567  			i += sackOptionLen
 568  		default:
 569  			// We don't recognize this option, just skip over it.
 570  			if i+2 > limit {
 571  				return opts
 572  			}
 573  			l := int(b[i+1])
 574  			// If the length is incorrect or if l+i overflows the
 575  			// total options length then return false.
 576  			if l < 2 || i+l > limit {
 577  				return opts
 578  			}
 579  			i += l
 580  		}
 581  	}
 582  	return opts
 583  }
 584  
 585  // EncodeMSSOption encodes the MSS TCP option with the provided MSS values in
 586  // the supplied buffer. If the provided buffer is not large enough then it just
 587  // returns without encoding anything. It returns the number of bytes written to
 588  // the provided buffer.
 589  func EncodeMSSOption(mss uint32, b []byte) int {
 590  	if len(b) < TCPOptionMSSLength {
 591  		return 0
 592  	}
 593  	b[0], b[1], b[2], b[3] = TCPOptionMSS, TCPOptionMSSLength, byte(mss>>8), byte(mss)
 594  	return TCPOptionMSSLength
 595  }
 596  
 597  // EncodeWSOption encodes the WS TCP option with the WS value in the
 598  // provided buffer. If the provided buffer is not large enough then it just
 599  // returns without encoding anything. It returns the number of bytes written to
 600  // the provided buffer.
 601  func EncodeWSOption(ws int, b []byte) int {
 602  	if len(b) < TCPOptionWSLength {
 603  		return 0
 604  	}
 605  	b[0], b[1], b[2] = TCPOptionWS, TCPOptionWSLength, uint8(ws)
 606  	return int(b[1])
 607  }
 608  
 609  // EncodeTSOption encodes the provided tsVal and tsEcr values as a TCP timestamp
 610  // option into the provided buffer. If the buffer is smaller than expected it
 611  // just returns without encoding anything. It returns the number of bytes
 612  // written to the provided buffer.
 613  func EncodeTSOption(tsVal, tsEcr uint32, b []byte) int {
 614  	if len(b) < TCPOptionTSLength {
 615  		return 0
 616  	}
 617  	b[0], b[1] = TCPOptionTS, TCPOptionTSLength
 618  	binary.BigEndian.PutUint32(b[2:], tsVal)
 619  	binary.BigEndian.PutUint32(b[6:], tsEcr)
 620  	return int(b[1])
 621  }
 622  
 623  // EncodeSACKPermittedOption encodes a SACKPermitted option into the provided
 624  // buffer. If the buffer is smaller than required it just returns without
 625  // encoding anything. It returns the number of bytes written to the provided
 626  // buffer.
 627  func EncodeSACKPermittedOption(b []byte) int {
 628  	if len(b) < TCPOptionSackPermittedLength {
 629  		return 0
 630  	}
 631  
 632  	b[0], b[1] = TCPOptionSACKPermitted, TCPOptionSackPermittedLength
 633  	return int(b[1])
 634  }
 635  
 636  // EncodeSACKBlocks encodes the provided SACK blocks as a TCP SACK option block
 637  // in the provided slice. It tries to fit in as many blocks as possible based on
 638  // number of bytes available in the provided buffer. It returns the number of
 639  // bytes written to the provided buffer.
 640  func EncodeSACKBlocks(sackBlocks []SACKBlock, b []byte) int {
 641  	if len(sackBlocks) == 0 {
 642  		return 0
 643  	}
 644  	l := len(sackBlocks)
 645  	if l > TCPMaxSACKBlocks {
 646  		l = TCPMaxSACKBlocks
 647  	}
 648  	if ll := (len(b) - 2) / 8; ll < l {
 649  		l = ll
 650  	}
 651  	if l == 0 {
 652  		// There is not enough space in the provided buffer to add
 653  		// any SACK blocks.
 654  		return 0
 655  	}
 656  	b[0] = TCPOptionSACK
 657  	b[1] = byte(l*8 + 2)
 658  	for i := 0; i < l; i++ {
 659  		binary.BigEndian.PutUint32(b[i*8+2:], uint32(sackBlocks[i].Start))
 660  		binary.BigEndian.PutUint32(b[i*8+6:], uint32(sackBlocks[i].End))
 661  	}
 662  	return int(b[1])
 663  }
 664  
 665  // EncodeNOP adds an explicit NOP to the option list.
 666  func EncodeNOP(b []byte) int {
 667  	if len(b) == 0 {
 668  		return 0
 669  	}
 670  	b[0] = TCPOptionNOP
 671  	return 1
 672  }
 673  
 674  // AddTCPOptionPadding adds the required number of TCPOptionNOP to quad align
 675  // the option buffer. It adds padding bytes after the offset specified and
 676  // returns the number of padding bytes added. The passed in options slice
 677  // must have space for the padding bytes.
 678  func AddTCPOptionPadding(options []byte, offset int) int {
 679  	paddingToAdd := -offset & 3
 680  	// Now add any padding bytes that might be required to quad align the
 681  	// options.
 682  	for i := offset; i < offset+paddingToAdd; i++ {
 683  		options[i] = TCPOptionNOP
 684  	}
 685  	return paddingToAdd
 686  }
 687  
 688  // Acceptable checks if a segment that starts at segSeq and has length segLen is
 689  // "acceptable" for arriving in a receive window that starts at rcvNxt and ends
 690  // before rcvAcc, according to the table on page 26 and 69 of RFC 793.
 691  func Acceptable(segSeq seqnum.Value, segLen seqnum.Size, rcvNxt, rcvAcc seqnum.Value) bool {
 692  	if rcvNxt == rcvAcc {
 693  		return segLen == 0 && segSeq == rcvNxt
 694  	}
 695  	if segLen == 0 {
 696  		// rcvWnd is incremented by 1 because that is Linux's behavior despite the
 697  		// RFC.
 698  		return segSeq.InRange(rcvNxt, rcvAcc.Add(1))
 699  	}
 700  	// Page 70 of RFC 793 allows packets that can be made "acceptable" by trimming
 701  	// the payload, so we'll accept any payload that overlaps the receive window.
 702  	// segSeq < rcvAcc is more correct according to RFC, however, Linux does it
 703  	// differently, it uses segSeq <= rcvAcc, we'd want to keep the same behavior
 704  	// as Linux.
 705  	return rcvNxt.LessThan(segSeq.Add(segLen)) && segSeq.LessThanEq(rcvAcc)
 706  }
 707  
 708  // TCPValid returns true if the pkt has a valid TCP header. It checks whether:
 709  //   - The data offset is too small.
 710  //   - The data offset is too large.
 711  //   - The checksum is invalid.
 712  //
 713  // TCPValid corresponds to net/netfilter/nf_conntrack_proto_tcp.c:tcp_error.
 714  func TCPValid(hdr TCP, payloadChecksum func() uint16, payloadSize uint16, srcAddr, dstAddr tcpip.Address, skipChecksumValidation bool) (csum uint16, csumValid, ok bool) {
 715  	if offset := int(hdr.DataOffset()); offset < TCPMinimumSize || offset > len(hdr) {
 716  		return
 717  	}
 718  
 719  	if skipChecksumValidation {
 720  		csumValid = true
 721  	} else {
 722  		csum = hdr.Checksum()
 723  		csumValid = hdr.IsChecksumValid(srcAddr, dstAddr, payloadChecksum(), payloadSize)
 724  	}
 725  	return csum, csumValid, true
 726  }
 727