scanner.go raw

   1  package unstable
   2  
   3  import "github.com/pelletier/go-toml/v2/internal/characters"
   4  
   5  func scanFollows(b []byte, pattern string) bool {
   6  	n := len(pattern)
   7  
   8  	return len(b) >= n && string(b[:n]) == pattern
   9  }
  10  
  11  func scanFollowsMultilineBasicStringDelimiter(b []byte) bool {
  12  	return scanFollows(b, `"""`)
  13  }
  14  
  15  func scanFollowsMultilineLiteralStringDelimiter(b []byte) bool {
  16  	return scanFollows(b, `'''`)
  17  }
  18  
  19  func scanFollowsTrue(b []byte) bool {
  20  	return scanFollows(b, `true`)
  21  }
  22  
  23  func scanFollowsFalse(b []byte) bool {
  24  	return scanFollows(b, `false`)
  25  }
  26  
  27  func scanFollowsInf(b []byte) bool {
  28  	return scanFollows(b, `inf`)
  29  }
  30  
  31  func scanFollowsNan(b []byte) bool {
  32  	return scanFollows(b, `nan`)
  33  }
  34  
  35  func scanUnquotedKey(b []byte) ([]byte, []byte) {
  36  	// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
  37  	for i := 0; i < len(b); i++ {
  38  		if !isUnquotedKeyChar(b[i]) {
  39  			return b[:i], b[i:]
  40  		}
  41  	}
  42  
  43  	return b, b[len(b):]
  44  }
  45  
  46  func isUnquotedKeyChar(r byte) bool {
  47  	return (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '-' || r == '_'
  48  }
  49  
  50  func scanLiteralString(b []byte) ([]byte, []byte, error) {
  51  	// literal-string = apostrophe *literal-char apostrophe
  52  	// apostrophe = %x27 ; ' apostrophe
  53  	// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
  54  	for i := 1; i < len(b); {
  55  		switch b[i] {
  56  		case '\'':
  57  			return b[:i+1], b[i+1:], nil
  58  		case '\n', '\r':
  59  			return nil, nil, NewParserError(b[i:i+1], "literal strings cannot have new lines")
  60  		}
  61  		size := characters.Utf8ValidNext(b[i:])
  62  		if size == 0 {
  63  			return nil, nil, NewParserError(b[i:i+1], "invalid character")
  64  		}
  65  		i += size
  66  	}
  67  
  68  	return nil, nil, NewParserError(b[len(b):], "unterminated literal string")
  69  }
  70  
  71  func scanMultilineLiteralString(b []byte) ([]byte, []byte, error) {
  72  	// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
  73  	// ml-literal-string-delim
  74  	// ml-literal-string-delim = 3apostrophe
  75  	// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
  76  	//
  77  	// mll-content = mll-char / newline
  78  	// mll-char = %x09 / %x20-26 / %x28-7E / non-ascii
  79  	// mll-quotes = 1*2apostrophe
  80  	for i := 3; i < len(b); {
  81  		switch b[i] {
  82  		case '\'':
  83  			if scanFollowsMultilineLiteralStringDelimiter(b[i:]) {
  84  				i += 3
  85  
  86  				// At that point we found 3 apostrophe, and i is the
  87  				// index of the byte after the third one. The scanner
  88  				// needs to be eager, because there can be an extra 2
  89  				// apostrophe that can be accepted at the end of the
  90  				// string.
  91  
  92  				if i >= len(b) || b[i] != '\'' {
  93  					return b[:i], b[i:], nil
  94  				}
  95  				i++
  96  
  97  				if i >= len(b) || b[i] != '\'' {
  98  					return b[:i], b[i:], nil
  99  				}
 100  				i++
 101  
 102  				if i < len(b) && b[i] == '\'' {
 103  					return nil, nil, NewParserError(b[i-3:i+1], "''' not allowed in multiline literal string")
 104  				}
 105  
 106  				return b[:i], b[i:], nil
 107  			}
 108  		case '\r':
 109  			if len(b) < i+2 {
 110  				return nil, nil, NewParserError(b[len(b):], `need a \n after \r`)
 111  			}
 112  			if b[i+1] != '\n' {
 113  				return nil, nil, NewParserError(b[i:i+2], `need a \n after \r`)
 114  			}
 115  			i += 2 // skip the \n
 116  			continue
 117  		}
 118  		size := characters.Utf8ValidNext(b[i:])
 119  		if size == 0 {
 120  			return nil, nil, NewParserError(b[i:i+1], "invalid character")
 121  		}
 122  		i += size
 123  	}
 124  
 125  	return nil, nil, NewParserError(b[len(b):], `multiline literal string not terminated by '''`)
 126  }
 127  
 128  func scanWindowsNewline(b []byte) ([]byte, []byte, error) {
 129  	const lenCRLF = 2
 130  	if len(b) < lenCRLF {
 131  		return nil, nil, NewParserError(b, "windows new line expected")
 132  	}
 133  
 134  	if b[1] != '\n' {
 135  		return nil, nil, NewParserError(b, `windows new line should be \r\n`)
 136  	}
 137  
 138  	return b[:lenCRLF], b[lenCRLF:], nil
 139  }
 140  
 141  func scanWhitespace(b []byte) ([]byte, []byte) {
 142  	for i := 0; i < len(b); i++ {
 143  		switch b[i] {
 144  		case ' ', '\t':
 145  			continue
 146  		default:
 147  			return b[:i], b[i:]
 148  		}
 149  	}
 150  
 151  	return b, b[len(b):]
 152  }
 153  
 154  func scanComment(b []byte) ([]byte, []byte, error) {
 155  	// comment-start-symbol = %x23 ; #
 156  	// non-ascii = %x80-D7FF / %xE000-10FFFF
 157  	// non-eol = %x09 / %x20-7F / non-ascii
 158  	//
 159  	// comment = comment-start-symbol *non-eol
 160  
 161  	for i := 1; i < len(b); {
 162  		if b[i] == '\n' {
 163  			return b[:i], b[i:], nil
 164  		}
 165  		if b[i] == '\r' {
 166  			if i+1 < len(b) && b[i+1] == '\n' {
 167  				return b[:i+1], b[i+1:], nil
 168  			}
 169  			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
 170  		}
 171  		size := characters.Utf8ValidNext(b[i:])
 172  		if size == 0 {
 173  			return nil, nil, NewParserError(b[i:i+1], "invalid character in comment")
 174  		}
 175  
 176  		i += size
 177  	}
 178  
 179  	return b, b[len(b):], nil
 180  }
 181  
 182  func scanBasicString(b []byte) ([]byte, bool, []byte, error) {
 183  	// basic-string = quotation-mark *basic-char quotation-mark
 184  	// quotation-mark = %x22            ; "
 185  	// basic-char = basic-unescaped / escaped
 186  	// basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 187  	// escaped = escape escape-seq-char
 188  	escaped := false
 189  	i := 1
 190  
 191  	for ; i < len(b); i++ {
 192  		switch b[i] {
 193  		case '"':
 194  			return b[:i+1], escaped, b[i+1:], nil
 195  		case '\n', '\r':
 196  			return nil, escaped, nil, NewParserError(b[i:i+1], "basic strings cannot have new lines")
 197  		case '\\':
 198  			if len(b) < i+2 {
 199  				return nil, escaped, nil, NewParserError(b[i:i+1], "need a character after \\")
 200  			}
 201  			escaped = true
 202  			i++ // skip the next character
 203  		}
 204  	}
 205  
 206  	return nil, escaped, nil, NewParserError(b[len(b):], `basic string not terminated by "`)
 207  }
 208  
 209  func scanMultilineBasicString(b []byte) ([]byte, bool, []byte, error) {
 210  	// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
 211  	// ml-basic-string-delim
 212  	// ml-basic-string-delim = 3quotation-mark
 213  	// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
 214  	//
 215  	// mlb-content = mlb-char / newline / mlb-escaped-nl
 216  	// mlb-char = mlb-unescaped / escaped
 217  	// mlb-quotes = 1*2quotation-mark
 218  	// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
 219  	// mlb-escaped-nl = escape ws newline *( wschar / newline )
 220  
 221  	escaped := false
 222  	i := 3
 223  
 224  	for ; i < len(b); i++ {
 225  		switch b[i] {
 226  		case '"':
 227  			if scanFollowsMultilineBasicStringDelimiter(b[i:]) {
 228  				i += 3
 229  
 230  				// At that point we found 3 apostrophe, and i is the
 231  				// index of the byte after the third one. The scanner
 232  				// needs to be eager, because there can be an extra 2
 233  				// apostrophe that can be accepted at the end of the
 234  				// string.
 235  
 236  				if i >= len(b) || b[i] != '"' {
 237  					return b[:i], escaped, b[i:], nil
 238  				}
 239  				i++
 240  
 241  				if i >= len(b) || b[i] != '"' {
 242  					return b[:i], escaped, b[i:], nil
 243  				}
 244  				i++
 245  
 246  				if i < len(b) && b[i] == '"' {
 247  					return nil, escaped, nil, NewParserError(b[i-3:i+1], `""" not allowed in multiline basic string`)
 248  				}
 249  
 250  				return b[:i], escaped, b[i:], nil
 251  			}
 252  		case '\\':
 253  			if len(b) < i+2 {
 254  				return nil, escaped, nil, NewParserError(b[len(b):], "need a character after \\")
 255  			}
 256  			escaped = true
 257  			i++ // skip the next character
 258  		case '\r':
 259  			if len(b) < i+2 {
 260  				return nil, escaped, nil, NewParserError(b[len(b):], `need a \n after \r`)
 261  			}
 262  			if b[i+1] != '\n' {
 263  				return nil, escaped, nil, NewParserError(b[i:i+2], `need a \n after \r`)
 264  			}
 265  			i++ // skip the \n
 266  		}
 267  	}
 268  
 269  	return nil, escaped, nil, NewParserError(b[len(b):], `multiline basic string not terminated by """`)
 270  }
 271