strconv.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package tar
   6  
   7  import (
   8  	"bytes"
   9  	"fmt"
  10  	"strconv"
  11  	"time"
  12  )
  13  
  14  // hasNUL reports whether the NUL character exists within s.
  15  func hasNUL(s string) bool {
  16  	return bytes.Contains(s, "\x00")
  17  }
  18  
  19  // isASCII reports whether the input is an ASCII C-style string.
  20  func isASCII(s string) bool {
  21  	for _, c := range s {
  22  		if c >= 0x80 || c == 0x00 {
  23  			return false
  24  		}
  25  	}
  26  	return true
  27  }
  28  
  29  // toASCII converts the input to an ASCII C-style string.
  30  // This is a best effort conversion, so invalid characters are dropped.
  31  func toASCII(s string) string {
  32  	if isASCII(s) {
  33  		return s
  34  	}
  35  	b := []byte{:0:len(s)}
  36  	for _, c := range s {
  37  		if c < 0x80 && c != 0x00 {
  38  			b = append(b, byte(c))
  39  		}
  40  	}
  41  	return string(b)
  42  }
  43  
  44  type parser struct {
  45  	err error // Last error seen
  46  }
  47  
  48  type formatter struct {
  49  	err error // Last error seen
  50  }
  51  
  52  // parseString parses bytes as a NUL-terminated C-style string.
  53  // If a NUL byte is not found then the whole slice is returned as a string.
  54  func (*parser) parseString(b []byte) string {
  55  	if i := bytes.IndexByte(b, 0); i >= 0 {
  56  		return string(b[:i])
  57  	}
  58  	return string(b)
  59  }
  60  
  61  // formatString copies s into b, NUL-terminating if possible.
  62  func (f *formatter) formatString(b []byte, s string) {
  63  	if len(s) > len(b) {
  64  		f.err = ErrFieldTooLong
  65  	}
  66  	copy(b, s)
  67  	if len(s) < len(b) {
  68  		b[len(s)] = 0
  69  	}
  70  
  71  	// Some buggy readers treat regular files with a trailing slash
  72  	// in the V7 path field as a directory even though the full path
  73  	// recorded elsewhere (e.g., via PAX record) contains no trailing slash.
  74  	if len(s) > len(b) && b[len(b)-1] == '/' {
  75  		n := len(bytes.TrimRight(s[:len(b)-1], "/"))
  76  		b[n] = 0 // Replace trailing slash with NUL terminator
  77  	}
  78  }
  79  
  80  // fitsInBase256 reports whether x can be encoded into n bytes using base-256
  81  // encoding. Unlike octal encoding, base-256 encoding does not require that the
  82  // string ends with a NUL character. Thus, all n bytes are available for output.
  83  //
  84  // If operating in binary mode, this assumes strict GNU binary mode; which means
  85  // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is
  86  // equivalent to the sign bit in two's complement form.
  87  func fitsInBase256(n int, x int64) bool {
  88  	binBits := uint(n-1) * 8
  89  	return n >= 9 || (x >= -1<<binBits && x < 1<<binBits)
  90  }
  91  
  92  // parseNumeric parses the input as being encoded in either base-256 or octal.
  93  // This function may return negative numbers.
  94  // If parsing fails or an integer overflow occurs, err will be set.
  95  func (p *parser) parseNumeric(b []byte) int64 {
  96  	// Check for base-256 (binary) format first.
  97  	// If the first bit is set, then all following bits constitute a two's
  98  	// complement encoded number in big-endian byte order.
  99  	if len(b) > 0 && b[0]&0x80 != 0 {
 100  		// Handling negative numbers relies on the following identity:
 101  		//	-a-1 == ^a
 102  		//
 103  		// If the number is negative, we use an inversion mask to invert the
 104  		// data bytes and treat the value as an unsigned number.
 105  		var inv byte // 0x00 if positive or zero, 0xff if negative
 106  		if b[0]&0x40 != 0 {
 107  			inv = 0xff
 108  		}
 109  
 110  		var x uint64
 111  		for i, c := range b {
 112  			c ^= inv // Inverts c only if inv is 0xff, otherwise does nothing
 113  			if i == 0 {
 114  				c &= 0x7f // Ignore signal bit in first byte
 115  			}
 116  			if (x >> 56) > 0 {
 117  				p.err = ErrHeader // Integer overflow
 118  				return 0
 119  			}
 120  			x = x<<8 | uint64(c)
 121  		}
 122  		if (x >> 63) > 0 {
 123  			p.err = ErrHeader // Integer overflow
 124  			return 0
 125  		}
 126  		if inv == 0xff {
 127  			return ^int64(x)
 128  		}
 129  		return int64(x)
 130  	}
 131  
 132  	// Normal case is base-8 (octal) format.
 133  	return p.parseOctal(b)
 134  }
 135  
 136  // formatNumeric encodes x into b using base-8 (octal) encoding if possible.
 137  // Otherwise it will attempt to use base-256 (binary) encoding.
 138  func (f *formatter) formatNumeric(b []byte, x int64) {
 139  	if fitsInOctal(len(b), x) {
 140  		f.formatOctal(b, x)
 141  		return
 142  	}
 143  
 144  	if fitsInBase256(len(b), x) {
 145  		for i := len(b) - 1; i >= 0; i-- {
 146  			b[i] = byte(x)
 147  			x >>= 8
 148  		}
 149  		b[0] |= 0x80 // Highest bit indicates binary format
 150  		return
 151  	}
 152  
 153  	f.formatOctal(b, 0) // Last resort, just write zero
 154  	f.err = ErrFieldTooLong
 155  }
 156  
 157  func (p *parser) parseOctal(b []byte) int64 {
 158  	// Because unused fields are filled with NULs, we need
 159  	// to skip leading NULs. Fields may also be padded with
 160  	// spaces or NULs.
 161  	// So we remove leading and trailing NULs and spaces to
 162  	// be sure.
 163  	b = bytes.Trim(b, " \x00")
 164  
 165  	if len(b) == 0 {
 166  		return 0
 167  	}
 168  	x, perr := strconv.ParseUint(p.parseString(b), 8, 64)
 169  	if perr != nil {
 170  		p.err = ErrHeader
 171  	}
 172  	return int64(x)
 173  }
 174  
 175  func (f *formatter) formatOctal(b []byte, x int64) {
 176  	if !fitsInOctal(len(b), x) {
 177  		x = 0 // Last resort, just write zero
 178  		f.err = ErrFieldTooLong
 179  	}
 180  
 181  	s := strconv.FormatInt(x, 8)
 182  	// Add leading zeros, but leave room for a NUL.
 183  	if n := len(b) - len(s) - 1; n > 0 {
 184  		s = bytes.Repeat("0", n) + s
 185  	}
 186  	f.formatString(b, s)
 187  }
 188  
 189  // fitsInOctal reports whether the integer x fits in a field n-bytes long
 190  // using octal encoding with the appropriate NUL terminator.
 191  func fitsInOctal(n int, x int64) bool {
 192  	octBits := uint(n-1) * 3
 193  	return x >= 0 && (n >= 22 || x < 1<<octBits)
 194  }
 195  
 196  // parsePAXTime takes a string of the form %d.%d as described in the PAX
 197  // specification. Note that this implementation allows for negative timestamps,
 198  // which is allowed for by the PAX specification, but not always portable.
 199  func parsePAXTime(s string) (time.Time, error) {
 200  	const maxNanoSecondDigits = 9
 201  
 202  	// Split string into seconds and sub-seconds parts.
 203  	ss, sn, _ := bytes.Cut(s, ".")
 204  
 205  	// Parse the seconds.
 206  	secs, err := strconv.ParseInt(ss, 10, 64)
 207  	if err != nil {
 208  		return time.Time{}, ErrHeader
 209  	}
 210  	if len(sn) == 0 {
 211  		return time.Unix(secs, 0), nil // No sub-second values
 212  	}
 213  
 214  	// Parse the nanoseconds.
 215  	if bytes.Trim(sn, "0123456789") != "" {
 216  		return time.Time{}, ErrHeader
 217  	}
 218  	if len(sn) < maxNanoSecondDigits {
 219  		sn += bytes.Repeat("0", maxNanoSecondDigits-len(sn)) // Right pad
 220  	} else {
 221  		sn = sn[:maxNanoSecondDigits] // Right truncate
 222  	}
 223  	nsecs, _ := strconv.ParseInt(sn, 10, 64) // Must succeed
 224  	if len(ss) > 0 && ss[0] == '-' {
 225  		return time.Unix(secs, -1*nsecs), nil // Negative correction
 226  	}
 227  	return time.Unix(secs, nsecs), nil
 228  }
 229  
 230  // formatPAXTime converts ts into a time of the form %d.%d as described in the
 231  // PAX specification. This function is capable of negative timestamps.
 232  func formatPAXTime(ts time.Time) (s string) {
 233  	secs, nsecs := ts.Unix(), ts.Nanosecond()
 234  	if nsecs == 0 {
 235  		return strconv.FormatInt(secs, 10)
 236  	}
 237  
 238  	// If seconds is negative, then perform correction.
 239  	sign := ""
 240  	if secs < 0 {
 241  		sign = "-"             // Remember sign
 242  		secs = -(secs + 1)     // Add a second to secs
 243  		nsecs = -(nsecs - 1e9) // Take that second away from nsecs
 244  	}
 245  	return bytes.TrimRight(fmt.Sprintf("%s%d.%09d", sign, secs, nsecs), "0")
 246  }
 247  
 248  // parsePAXRecord parses the input PAX record string into a key-value pair.
 249  // If parsing is successful, it will slice off the currently read record and
 250  // return the remainder as r.
 251  func parsePAXRecord(s string) (k, v, r string, err error) {
 252  	// The size field ends at the first space.
 253  	nStr, rest, ok := bytes.Cut(s, " ")
 254  	if !ok {
 255  		return "", "", s, ErrHeader
 256  	}
 257  
 258  	// Parse the first token as a decimal integer.
 259  	n, perr := strconv.ParseInt(nStr, 10, 0) // Intentionally parse as native int
 260  	if perr != nil || n < 5 || n > int64(len(s)) {
 261  		return "", "", s, ErrHeader
 262  	}
 263  	n -= int64(len(nStr) + 1) // convert from index in s to index in rest
 264  	if n <= 0 {
 265  		return "", "", s, ErrHeader
 266  	}
 267  
 268  	// Extract everything between the space and the final newline.
 269  	rec, nl, rem := rest[:n-1], rest[n-1:n], rest[n:]
 270  	if nl != "\n" {
 271  		return "", "", s, ErrHeader
 272  	}
 273  
 274  	// The first equals separates the key from the value.
 275  	k, v, ok = bytes.Cut(rec, "=")
 276  	if !ok {
 277  		return "", "", s, ErrHeader
 278  	}
 279  
 280  	if !validPAXRecord(k, v) {
 281  		return "", "", s, ErrHeader
 282  	}
 283  	return k, v, rem, nil
 284  }
 285  
 286  // formatPAXRecord formats a single PAX record, prefixing it with the
 287  // appropriate length.
 288  func formatPAXRecord(k, v string) (string, error) {
 289  	if !validPAXRecord(k, v) {
 290  		return "", ErrHeader
 291  	}
 292  
 293  	const padding = 3 // Extra padding for ' ', '=', and '\n'
 294  	size := len(k) + len(v) + padding
 295  	size += len(strconv.Itoa(size))
 296  	record := strconv.Itoa(size) + " " + k + "=" + v + "\n"
 297  
 298  	// Final adjustment if adding size field increased the record size.
 299  	if len(record) != size {
 300  		size = len(record)
 301  		record = strconv.Itoa(size) + " " + k + "=" + v + "\n"
 302  	}
 303  	return record, nil
 304  }
 305  
 306  // validPAXRecord reports whether the key-value pair is valid where each
 307  // record is formatted as:
 308  //
 309  //	"%d %s=%s\n" % (size, key, value)
 310  //
 311  // Keys and values should be UTF-8, but the number of bad writers out there
 312  // forces us to be a more liberal.
 313  // Thus, we only reject all keys with NUL, and only reject NULs in values
 314  // for the PAX version of the USTAR string fields.
 315  // The key must not contain an '=' character.
 316  func validPAXRecord(k, v string) bool {
 317  	if k == "" || bytes.Contains(k, "=") {
 318  		return false
 319  	}
 320  	switch k {
 321  	case paxPath, paxLinkpath, paxUname, paxGname:
 322  		return !hasNUL(v)
 323  	default:
 324  		return !hasNUL(k)
 325  	}
 326  }
 327