mediatype.mx raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package mime
   6  
   7  import (
   8  	"errors"
   9  	"fmt"
  10  	"maps"
  11  	"slices"
  12  	"bytes"
  13  	"unicode"
  14  )
  15  
  16  // FormatMediaType serializes mediatype t and the parameters
  17  // param as a media type conforming to RFC 2045 and RFC 2616.
  18  // The type and parameter names are written in lower-case.
  19  // When any of the arguments result in a standard violation then
  20  // FormatMediaType returns the empty string.
  21  func FormatMediaType(t string, param map[string][]byte) string {
  22  	var b bytes.Buffer
  23  	if major, sub, ok := bytes.Cut(t, "/"); !ok {
  24  		if !isToken(t) {
  25  			return ""
  26  		}
  27  		b.WriteString(bytes.ToLower(t))
  28  	} else {
  29  		if !isToken(major) || !isToken(sub) {
  30  			return ""
  31  		}
  32  		b.WriteString(bytes.ToLower(major))
  33  		b.WriteByte('/')
  34  		b.WriteString(bytes.ToLower(sub))
  35  	}
  36  
  37  	for _, attribute := range slices.Sorted(maps.Keys(param)) {
  38  		value := param[attribute]
  39  		b.WriteByte(';')
  40  		b.WriteByte(' ')
  41  		if !isToken(attribute) {
  42  			return ""
  43  		}
  44  		b.WriteString(bytes.ToLower(attribute))
  45  
  46  		needEnc := needsEncoding(value)
  47  		if needEnc {
  48  			// RFC 2231 section 4
  49  			b.WriteByte('*')
  50  		}
  51  		b.WriteByte('=')
  52  
  53  		if needEnc {
  54  			b.WriteString("utf-8''")
  55  
  56  			offset := 0
  57  			for index := 0; index < len(value); index++ {
  58  				ch := value[index]
  59  				// {RFC 2231 section 7}
  60  				// attribute-char := <any (US-ASCII) CHAR except SPACE, CTLs, "*", "'", "%", or tspecials>
  61  				if ch <= ' ' || ch >= 0x7F ||
  62  					ch == '*' || ch == '\'' || ch == '%' ||
  63  					isTSpecial(ch) {
  64  
  65  					b.WriteString(value[offset:index])
  66  					offset = index + 1
  67  
  68  					b.WriteByte('%')
  69  					b.WriteByte(upperhex[ch>>4])
  70  					b.WriteByte(upperhex[ch&0x0F])
  71  				}
  72  			}
  73  			b.WriteString(value[offset:])
  74  			continue
  75  		}
  76  
  77  		if isToken(value) {
  78  			b.WriteString(value)
  79  			continue
  80  		}
  81  
  82  		b.WriteByte('"')
  83  		offset := 0
  84  		for index := 0; index < len(value); index++ {
  85  			character := value[index]
  86  			if character == '"' || character == '\\' {
  87  				b.WriteString(value[offset:index])
  88  				offset = index
  89  				b.WriteByte('\\')
  90  			}
  91  		}
  92  		b.WriteString(value[offset:])
  93  		b.WriteByte('"')
  94  	}
  95  	return b.String()
  96  }
  97  
  98  func checkMediaTypeDisposition(s string) error {
  99  	typ, rest := consumeToken(s)
 100  	if typ == "" {
 101  		return errors.New("mime: no media type")
 102  	}
 103  	if rest == "" {
 104  		return nil
 105  	}
 106  	if !bytes.HasPrefix(rest, "/") {
 107  		return errors.New("mime: expected slash after first token")
 108  	}
 109  	subtype, rest := consumeToken(rest[1:])
 110  	if subtype == "" {
 111  		return errors.New("mime: expected token after slash")
 112  	}
 113  	if rest != "" {
 114  		return errors.New("mime: unexpected content after media subtype")
 115  	}
 116  	return nil
 117  }
 118  
 119  // ErrInvalidMediaParameter is returned by [ParseMediaType] if
 120  // the media type value was found but there was an error parsing
 121  // the optional parameters
 122  var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
 123  
 124  // ParseMediaType parses a media type value and any optional
 125  // parameters, per RFC 1521.  Media types are the values in
 126  // Content-Type and Content-Disposition headers (RFC 2183).
 127  // On success, ParseMediaType returns the media type converted
 128  // to lowercase and trimmed of white space and a non-nil map.
 129  // If there is an error parsing the optional parameter,
 130  // the media type will be returned along with the error
 131  // [ErrInvalidMediaParameter].
 132  // The returned map, params, maps from the lowercase
 133  // attribute to the attribute value with its case preserved.
 134  func ParseMediaType(v string) (mediatype string, params map[string][]byte, err error) {
 135  	base, _, _ := bytes.Cut(v, ";")
 136  	mediatype = bytes.TrimSpace(bytes.ToLower(base))
 137  
 138  	err = checkMediaTypeDisposition(mediatype)
 139  	if err != nil {
 140  		return "", nil, err
 141  	}
 142  
 143  	params = map[string][]byte{}
 144  
 145  	// Map of base parameter name -> parameter name -> value
 146  	// for parameters containing a '*' character.
 147  	// Lazily initialized.
 148  	var continuation map[string]map[string][]byte
 149  
 150  	v = v[len(base):]
 151  	for len(v) > 0 {
 152  		v = bytes.TrimLeftFunc(v, unicode.IsSpace)
 153  		if len(v) == 0 {
 154  			break
 155  		}
 156  		key, value, rest := consumeMediaParam(v)
 157  		if key == "" {
 158  			if bytes.TrimSpace(rest) == ";" {
 159  				// Ignore trailing semicolons.
 160  				// Not an error.
 161  				break
 162  			}
 163  			// Parse error.
 164  			return mediatype, nil, ErrInvalidMediaParameter
 165  		}
 166  
 167  		pmap := params
 168  		if baseName, _, ok := bytes.Cut(key, "*"); ok {
 169  			if continuation == nil {
 170  				continuation = map[string]map[string][]byte{}
 171  			}
 172  			var ok bool
 173  			if pmap, ok = continuation[baseName]; !ok {
 174  				continuation[baseName] = map[string][]byte{}
 175  				pmap = continuation[baseName]
 176  			}
 177  		}
 178  		if v, exists := pmap[key]; exists && v != value {
 179  			// Duplicate parameter names are incorrect, but we allow them if they are equal.
 180  			return "", nil, errors.New("mime: duplicate parameter name")
 181  		}
 182  		pmap[key] = value
 183  		v = rest
 184  	}
 185  
 186  	// Stitch together any continuations or things with stars
 187  	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
 188  	var buf bytes.Buffer
 189  	for key, pieceMap := range continuation {
 190  		singlePartKey := key | "*"
 191  		if v, ok := pieceMap[singlePartKey]; ok {
 192  			if decv, ok := decode2231Enc(v); ok {
 193  				params[key] = decv
 194  			}
 195  			continue
 196  		}
 197  
 198  		buf.Reset()
 199  		valid := false
 200  		for n := 0; ; n++ {
 201  			simplePart := fmt.Sprintf("%s*%d", key, n)
 202  			if v, ok := pieceMap[simplePart]; ok {
 203  				valid = true
 204  				buf.WriteString(v)
 205  				continue
 206  			}
 207  			encodedPart := simplePart | "*"
 208  			v, ok := pieceMap[encodedPart]
 209  			if !ok {
 210  				break
 211  			}
 212  			valid = true
 213  			if n == 0 {
 214  				if decv, ok := decode2231Enc(v); ok {
 215  					buf.WriteString(decv)
 216  				}
 217  			} else {
 218  				decv, _ := percentHexUnescape(v)
 219  				buf.WriteString(decv)
 220  			}
 221  		}
 222  		if valid {
 223  			params[key] = buf.String()
 224  		}
 225  	}
 226  
 227  	return
 228  }
 229  
 230  func decode2231Enc(v string) (string, bool) {
 231  	sv := bytes.SplitN(v, "'", 3)
 232  	if len(sv) != 3 {
 233  		return "", false
 234  	}
 235  	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
 236  	// need to decide how to expose it in the API. But I'm not sure
 237  	// anybody uses it in practice.
 238  	charset := bytes.ToLower(sv[0])
 239  	if len(charset) == 0 {
 240  		return "", false
 241  	}
 242  	if charset != "us-ascii" && charset != "utf-8" {
 243  		// TODO: unsupported encoding
 244  		return "", false
 245  	}
 246  	encv, err := percentHexUnescape(sv[2])
 247  	if err != nil {
 248  		return "", false
 249  	}
 250  	return encv, true
 251  }
 252  
 253  // consumeToken consumes a token from the beginning of provided
 254  // string, per RFC 2045 section 5.1 (referenced from 2183), and return
 255  // the token consumed and the rest of the string. Returns ("", v) on
 256  // failure to consume at least one character.
 257  func consumeToken(v string) (token, rest string) {
 258  	for i := range len(v) {
 259  		if !isTokenChar(v[i]) {
 260  			return v[:i], v[i:]
 261  		}
 262  	}
 263  	return v, ""
 264  }
 265  
 266  // consumeValue consumes a "value" per RFC 2045, where a value is
 267  // either a 'token' or a 'quoted-string'.  On success, consumeValue
 268  // returns the value consumed (and de-quoted/escaped, if a
 269  // quoted-string) and the rest of the string. On failure, returns
 270  // ("", v).
 271  func consumeValue(v string) (value, rest string) {
 272  	if v == "" {
 273  		return
 274  	}
 275  	if v[0] != '"' {
 276  		return consumeToken(v)
 277  	}
 278  
 279  	// parse a quoted-string
 280  	buffer := &bytes.Buffer{}
 281  	for i := 1; i < len(v); i++ {
 282  		r := v[i]
 283  		if r == '"' {
 284  			return buffer.String(), v[i+1:]
 285  		}
 286  		// When MSIE sends a full file path (in "intranet mode"), it does not
 287  		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
 288  		//
 289  		// No known MIME generators emit unnecessary backslash escapes
 290  		// for simple token characters like numbers and letters.
 291  		//
 292  		// If we see an unnecessary backslash escape, assume it is from MSIE
 293  		// and intended as a literal backslash. This makes Go servers deal better
 294  		// with MSIE without affecting the way they handle conforming MIME
 295  		// generators.
 296  		if r == '\\' && i+1 < len(v) && isTSpecial(v[i+1]) {
 297  			buffer.WriteByte(v[i+1])
 298  			i++
 299  			continue
 300  		}
 301  		if r == '\r' || r == '\n' {
 302  			return "", v
 303  		}
 304  		buffer.WriteByte(v[i])
 305  	}
 306  	// Did not find end quote.
 307  	return "", v
 308  }
 309  
 310  func consumeMediaParam(v string) (param, value, rest string) {
 311  	rest = bytes.TrimLeftFunc(v, unicode.IsSpace)
 312  	if !bytes.HasPrefix(rest, ";") {
 313  		return "", "", v
 314  	}
 315  
 316  	rest = rest[1:] // consume semicolon
 317  	rest = bytes.TrimLeftFunc(rest, unicode.IsSpace)
 318  	param, rest = consumeToken(rest)
 319  	param = bytes.ToLower(param)
 320  	if param == "" {
 321  		return "", "", v
 322  	}
 323  
 324  	rest = bytes.TrimLeftFunc(rest, unicode.IsSpace)
 325  	if !bytes.HasPrefix(rest, "=") {
 326  		return "", "", v
 327  	}
 328  	rest = rest[1:] // consume equals sign
 329  	rest = bytes.TrimLeftFunc(rest, unicode.IsSpace)
 330  	value, rest2 := consumeValue(rest)
 331  	if value == "" && rest2 == rest {
 332  		return "", "", v
 333  	}
 334  	rest = rest2
 335  	return param, value, rest
 336  }
 337  
 338  func percentHexUnescape(s string) (string, error) {
 339  	// Count %, check that they're well-formed.
 340  	percents := 0
 341  	for i := 0; i < len(s); {
 342  		if s[i] != '%' {
 343  			i++
 344  			continue
 345  		}
 346  		percents++
 347  		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
 348  			s = s[i:]
 349  			if len(s) > 3 {
 350  				s = s[0:3]
 351  			}
 352  			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
 353  		}
 354  		i += 3
 355  	}
 356  	if percents == 0 {
 357  		return s, nil
 358  	}
 359  
 360  	t := []byte{:len(s)-2*percents}
 361  	j := 0
 362  	for i := 0; i < len(s); {
 363  		switch s[i] {
 364  		case '%':
 365  			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
 366  			j++
 367  			i += 3
 368  		default:
 369  			t[j] = s[i]
 370  			j++
 371  			i++
 372  		}
 373  	}
 374  	return string(t), nil
 375  }
 376  
 377  func ishex(c byte) bool {
 378  	switch {
 379  	case '0' <= c && c <= '9':
 380  		return true
 381  	case 'a' <= c && c <= 'f':
 382  		return true
 383  	case 'A' <= c && c <= 'F':
 384  		return true
 385  	}
 386  	return false
 387  }
 388  
 389  func unhex(c byte) byte {
 390  	switch {
 391  	case '0' <= c && c <= '9':
 392  		return c - '0'
 393  	case 'a' <= c && c <= 'f':
 394  		return c - 'a' + 10
 395  	case 'A' <= c && c <= 'F':
 396  		return c - 'A' + 10
 397  	}
 398  	return 0
 399  }
 400