1 package toml
2 3 import (
4 "fmt"
5 "math"
6 "strconv"
7 "strings"
8 "time"
9 "unicode/utf8"
10 11 "github.com/BurntSushi/toml/internal"
12 )
13 14 type parser struct {
15 lx *lexer
16 context Key // Full key for the current hash in scope.
17 currentKey string // Base key name for everything except hashes.
18 pos Position // Current position in the TOML file.
19 20 ordered []Key // List of keys in the order that they appear in the TOML data.
21 22 keyInfo map[string]keyInfo // Map keyname → info about the TOML key.
23 mapping map[string]any // Map keyname → key value.
24 implicits map[string]struct{} // Record implicit keys (e.g. "key.group.names").
25 }
26 27 type keyInfo struct {
28 pos Position
29 tomlType tomlType
30 }
31 32 func parse(data string) (p *parser, err error) {
33 defer func() {
34 if r := recover(); r != nil {
35 if pErr, ok := r.(ParseError); ok {
36 pErr.input = data
37 err = pErr
38 return
39 }
40 panic(r)
41 }
42 }()
43 44 // Read over BOM; do this here as the lexer calls utf8.DecodeRuneInString()
45 // which mangles stuff. UTF-16 BOM isn't strictly valid, but some tools add
46 // it anyway.
47 if strings.HasPrefix(data, "\xff\xfe") || strings.HasPrefix(data, "\xfe\xff") { // UTF-16
48 data = data[2:]
49 } else if strings.HasPrefix(data, "\xef\xbb\xbf") { // UTF-8
50 data = data[3:]
51 }
52 53 // Examine first few bytes for NULL bytes; this probably means it's a UTF-16
54 // file (second byte in surrogate pair being NULL). Again, do this here to
55 // avoid having to deal with UTF-8/16 stuff in the lexer.
56 ex := 6
57 if len(data) < 6 {
58 ex = len(data)
59 }
60 if i := strings.IndexRune(data[:ex], 0); i > -1 {
61 return nil, ParseError{
62 Message: "files cannot contain NULL bytes; probably using UTF-16; TOML files must be UTF-8",
63 Position: Position{Line: 1, Col: 1, Start: i, Len: 1},
64 Line: 1,
65 input: data,
66 }
67 }
68 69 p = &parser{
70 keyInfo: make(map[string]keyInfo),
71 mapping: make(map[string]any),
72 lx: lex(data),
73 ordered: make([]Key, 0),
74 implicits: make(map[string]struct{}),
75 }
76 for {
77 item := p.next()
78 if item.typ == itemEOF {
79 break
80 }
81 p.topLevel(item)
82 }
83 84 return p, nil
85 }
86 87 func (p *parser) panicErr(it item, err error) {
88 panic(ParseError{
89 Message: err.Error(),
90 err: err,
91 Position: it.pos.withCol(p.lx.input),
92 Line: it.pos.Len,
93 LastKey: p.current(),
94 })
95 }
96 97 func (p *parser) panicItemf(it item, format string, v ...any) {
98 panic(ParseError{
99 Message: fmt.Sprintf(format, v...),
100 Position: it.pos.withCol(p.lx.input),
101 Line: it.pos.Len,
102 LastKey: p.current(),
103 })
104 }
105 106 func (p *parser) panicf(format string, v ...any) {
107 panic(ParseError{
108 Message: fmt.Sprintf(format, v...),
109 Position: p.pos.withCol(p.lx.input),
110 Line: p.pos.Line,
111 LastKey: p.current(),
112 })
113 }
114 115 func (p *parser) next() item {
116 it := p.lx.nextItem()
117 //fmt.Printf("ITEM %-18s line %-3d │ %q\n", it.typ, it.pos.Line, it.val)
118 if it.typ == itemError {
119 if it.err != nil {
120 panic(ParseError{
121 Message: it.err.Error(),
122 err: it.err,
123 Position: it.pos.withCol(p.lx.input),
124 Line: it.pos.Line,
125 LastKey: p.current(),
126 })
127 }
128 129 p.panicItemf(it, "%s", it.val)
130 }
131 return it
132 }
133 134 func (p *parser) nextPos() item {
135 it := p.next()
136 p.pos = it.pos
137 return it
138 }
139 140 func (p *parser) bug(format string, v ...any) {
141 panic(fmt.Sprintf("BUG: "+format+"\n\n", v...))
142 }
143 144 func (p *parser) expect(typ itemType) item {
145 it := p.next()
146 p.assertEqual(typ, it.typ)
147 return it
148 }
149 150 func (p *parser) assertEqual(expected, got itemType) {
151 if expected != got {
152 p.bug("Expected '%s' but got '%s'.", expected, got)
153 }
154 }
155 156 func (p *parser) topLevel(item item) {
157 switch item.typ {
158 case itemCommentStart: // # ..
159 p.expect(itemText)
160 case itemTableStart: // [ .. ]
161 name := p.nextPos()
162 163 var key Key
164 for ; name.typ != itemTableEnd && name.typ != itemEOF; name = p.next() {
165 key = append(key, p.keyString(name))
166 }
167 p.assertEqual(itemTableEnd, name.typ)
168 169 p.addContext(key, false)
170 p.setType("", tomlHash, item.pos)
171 p.ordered = append(p.ordered, key)
172 case itemArrayTableStart: // [[ .. ]]
173 name := p.nextPos()
174 175 var key Key
176 for ; name.typ != itemArrayTableEnd && name.typ != itemEOF; name = p.next() {
177 key = append(key, p.keyString(name))
178 }
179 p.assertEqual(itemArrayTableEnd, name.typ)
180 181 p.addContext(key, true)
182 p.setType("", tomlArrayHash, item.pos)
183 p.ordered = append(p.ordered, key)
184 case itemKeyStart: // key = ..
185 outerContext := p.context
186 /// Read all the key parts (e.g. 'a' and 'b' in 'a.b')
187 k := p.nextPos()
188 var key Key
189 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
190 key = append(key, p.keyString(k))
191 }
192 p.assertEqual(itemKeyEnd, k.typ)
193 194 /// The current key is the last part.
195 p.currentKey = key.last()
196 197 /// All the other parts (if any) are the context; need to set each part
198 /// as implicit.
199 context := key.parent()
200 for i := range context {
201 p.addImplicitContext(append(p.context, context[i:i+1]...))
202 }
203 p.ordered = append(p.ordered, p.context.add(p.currentKey))
204 205 /// Set value.
206 vItem := p.next()
207 val, typ := p.value(vItem, false)
208 p.setValue(p.currentKey, val)
209 p.setType(p.currentKey, typ, vItem.pos)
210 211 /// Remove the context we added (preserving any context from [tbl] lines).
212 p.context = outerContext
213 p.currentKey = ""
214 default:
215 p.bug("Unexpected type at top level: %s", item.typ)
216 }
217 }
218 219 // Gets a string for a key (or part of a key in a table name).
220 func (p *parser) keyString(it item) string {
221 switch it.typ {
222 case itemText:
223 return it.val
224 case itemString, itemStringEsc, itemMultilineString,
225 itemRawString, itemRawMultilineString:
226 s, _ := p.value(it, false)
227 return s.(string)
228 default:
229 p.bug("Unexpected key type: %s", it.typ)
230 }
231 panic("unreachable")
232 }
233 234 var datetimeRepl = strings.NewReplacer(
235 "z", "Z",
236 "t", "T",
237 " ", "T")
238 239 // value translates an expected value from the lexer into a Go value wrapped
240 // as an empty interface.
241 func (p *parser) value(it item, parentIsArray bool) (any, tomlType) {
242 switch it.typ {
243 case itemString:
244 return it.val, p.typeOfPrimitive(it)
245 case itemStringEsc:
246 return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
247 case itemMultilineString:
248 return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
249 case itemRawString:
250 return it.val, p.typeOfPrimitive(it)
251 case itemRawMultilineString:
252 return stripFirstNewline(it.val), p.typeOfPrimitive(it)
253 case itemInteger:
254 return p.valueInteger(it)
255 case itemFloat:
256 return p.valueFloat(it)
257 case itemBool:
258 switch it.val {
259 case "true":
260 return true, p.typeOfPrimitive(it)
261 case "false":
262 return false, p.typeOfPrimitive(it)
263 default:
264 p.bug("Expected boolean value, but got '%s'.", it.val)
265 }
266 case itemDatetime:
267 return p.valueDatetime(it)
268 case itemArray:
269 return p.valueArray(it)
270 case itemInlineTableStart:
271 return p.valueInlineTable(it, parentIsArray)
272 default:
273 p.bug("Unexpected value type: %s", it.typ)
274 }
275 panic("unreachable")
276 }
277 278 func (p *parser) valueInteger(it item) (any, tomlType) {
279 if !numUnderscoresOK(it.val) {
280 p.panicItemf(it, "Invalid integer %q: underscores must be surrounded by digits", it.val)
281 }
282 if numHasLeadingZero(it.val) {
283 p.panicItemf(it, "Invalid integer %q: cannot have leading zeroes", it.val)
284 }
285 286 num, err := strconv.ParseInt(it.val, 0, 64)
287 if err != nil {
288 // Distinguish integer values. Normally, it'd be a bug if the lexer
289 // provides an invalid integer, but it's possible that the number is
290 // out of range of valid values (which the lexer cannot determine).
291 // So mark the former as a bug but the latter as a legitimate user
292 // error.
293 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
294 p.panicErr(it, errParseRange{i: it.val, size: "int64"})
295 } else {
296 p.bug("Expected integer value, but got '%s'.", it.val)
297 }
298 }
299 return num, p.typeOfPrimitive(it)
300 }
301 302 func (p *parser) valueFloat(it item) (any, tomlType) {
303 parts := strings.FieldsFunc(it.val, func(r rune) bool {
304 switch r {
305 case '.', 'e', 'E':
306 return true
307 }
308 return false
309 })
310 for _, part := range parts {
311 if !numUnderscoresOK(part) {
312 p.panicItemf(it, "Invalid float %q: underscores must be surrounded by digits", it.val)
313 }
314 }
315 if len(parts) > 0 && numHasLeadingZero(parts[0]) {
316 p.panicItemf(it, "Invalid float %q: cannot have leading zeroes", it.val)
317 }
318 if !numPeriodsOK(it.val) {
319 // As a special case, numbers like '123.' or '1.e2',
320 // which are valid as far as Go/strconv are concerned,
321 // must be rejected because TOML says that a fractional
322 // part consists of '.' followed by 1+ digits.
323 p.panicItemf(it, "Invalid float %q: '.' must be followed by one or more digits", it.val)
324 }
325 val := strings.Replace(it.val, "_", "", -1)
326 signbit := false
327 if val == "+nan" || val == "-nan" {
328 signbit = val == "-nan"
329 val = "nan"
330 }
331 num, err := strconv.ParseFloat(val, 64)
332 if err != nil {
333 if e, ok := err.(*strconv.NumError); ok && e.Err == strconv.ErrRange {
334 p.panicErr(it, errParseRange{i: it.val, size: "float64"})
335 } else {
336 p.panicItemf(it, "Invalid float value: %q", it.val)
337 }
338 }
339 if signbit {
340 num = math.Copysign(num, -1)
341 }
342 return num, p.typeOfPrimitive(it)
343 }
344 345 var dtTypes = []struct {
346 fmt string
347 zone *time.Location
348 }{
349 {time.RFC3339Nano, time.Local},
350 {"2006-01-02T15:04:05.999999999", internal.LocalDatetime},
351 {"2006-01-02", internal.LocalDate},
352 {"15:04:05.999999999", internal.LocalTime},
353 {"2006-01-02T15:04Z07:00", time.Local},
354 {"2006-01-02T15:04", internal.LocalDatetime},
355 {"15:04", internal.LocalTime},
356 }
357 358 func (p *parser) valueDatetime(it item) (any, tomlType) {
359 it.val = datetimeRepl.Replace(it.val)
360 var (
361 t time.Time
362 ok bool
363 err error
364 )
365 for _, dt := range dtTypes {
366 t, err = time.ParseInLocation(dt.fmt, it.val, dt.zone)
367 if err == nil {
368 if missingLeadingZero(it.val, dt.fmt) {
369 p.panicErr(it, errParseDate{it.val})
370 }
371 ok = true
372 break
373 }
374 }
375 if !ok {
376 p.panicErr(it, errParseDate{it.val})
377 }
378 return t, p.typeOfPrimitive(it)
379 }
380 381 // Go's time.Parse() will accept numbers without a leading zero; there isn't any
382 // way to require it. https://github.com/golang/go/issues/29911
383 //
384 // Depend on the fact that the separators (- and :) should always be at the same
385 // location.
386 func missingLeadingZero(d, l string) bool {
387 for i, c := range []byte(l) {
388 if c == '.' || c == 'Z' {
389 return false
390 }
391 if (c < '0' || c > '9') && d[i] != c {
392 return true
393 }
394 }
395 return false
396 }
397 398 func (p *parser) valueArray(it item) (any, tomlType) {
399 p.setType(p.currentKey, tomlArray, it.pos)
400 401 var (
402 // Initialize to a non-nil slice to make it consistent with how S = []
403 // decodes into a non-nil slice inside something like struct { S
404 // []string }. See #338
405 array = make([]any, 0, 2)
406 )
407 for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
408 if it.typ == itemCommentStart {
409 p.expect(itemText)
410 continue
411 }
412 413 val, typ := p.value(it, true)
414 array = append(array, val)
415 416 // XXX: type isn't used here, we need it to record the accurate type
417 // information.
418 //
419 // Not entirely sure how to best store this; could use "key[0]",
420 // "key[1]" notation, or maybe store it on the Array type?
421 _ = typ
422 }
423 return array, tomlArray
424 }
425 426 func (p *parser) valueInlineTable(it item, parentIsArray bool) (any, tomlType) {
427 var (
428 topHash = make(map[string]any)
429 outerContext = p.context
430 outerKey = p.currentKey
431 )
432 433 p.context = append(p.context, p.currentKey)
434 prevContext := p.context
435 p.currentKey = ""
436 437 p.addImplicit(p.context)
438 p.addContext(p.context, parentIsArray)
439 440 /// Loop over all table key/value pairs.
441 for it := p.next(); it.typ != itemInlineTableEnd; it = p.next() {
442 if it.typ == itemCommentStart {
443 p.expect(itemText)
444 continue
445 }
446 447 /// Read all key parts.
448 k := p.nextPos()
449 var key Key
450 for ; k.typ != itemKeyEnd && k.typ != itemEOF; k = p.next() {
451 key = append(key, p.keyString(k))
452 }
453 p.assertEqual(itemKeyEnd, k.typ)
454 455 /// The current key is the last part.
456 p.currentKey = key.last()
457 458 /// All the other parts (if any) are the context; need to set each part
459 /// as implicit.
460 context := key.parent()
461 for i := range context {
462 p.addImplicitContext(append(p.context, context[i:i+1]...))
463 }
464 p.ordered = append(p.ordered, p.context.add(p.currentKey))
465 466 /// Set the value.
467 val, typ := p.value(p.next(), false)
468 p.setValue(p.currentKey, val)
469 p.setType(p.currentKey, typ, it.pos)
470 471 hash := topHash
472 for _, c := range context {
473 h, ok := hash[c]
474 if !ok {
475 h = make(map[string]any)
476 hash[c] = h
477 }
478 hash, ok = h.(map[string]any)
479 if !ok {
480 p.panicf("%q is not a table", p.context)
481 }
482 }
483 hash[p.currentKey] = val
484 485 /// Restore context.
486 p.context = prevContext
487 }
488 p.context = outerContext
489 p.currentKey = outerKey
490 return topHash, tomlHash
491 }
492 493 // numHasLeadingZero checks if this number has leading zeroes, allowing for '0',
494 // +/- signs, and base prefixes.
495 func numHasLeadingZero(s string) bool {
496 if len(s) > 1 && s[0] == '0' && !(s[1] == 'b' || s[1] == 'o' || s[1] == 'x') { // Allow 0b, 0o, 0x
497 return true
498 }
499 if len(s) > 2 && (s[0] == '-' || s[0] == '+') && s[1] == '0' {
500 return true
501 }
502 return false
503 }
504 505 // numUnderscoresOK checks whether each underscore in s is surrounded by
506 // characters that are not underscores.
507 func numUnderscoresOK(s string) bool {
508 switch s {
509 case "nan", "+nan", "-nan", "inf", "-inf", "+inf":
510 return true
511 }
512 accept := false
513 for _, r := range s {
514 if r == '_' {
515 if !accept {
516 return false
517 }
518 }
519 520 // isHex is a superset of all the permissible characters surrounding an
521 // underscore.
522 accept = isHex(r)
523 }
524 return accept
525 }
526 527 // numPeriodsOK checks whether every period in s is followed by a digit.
528 func numPeriodsOK(s string) bool {
529 period := false
530 for _, r := range s {
531 if period && !isDigit(r) {
532 return false
533 }
534 period = r == '.'
535 }
536 return !period
537 }
538 539 // Set the current context of the parser, where the context is either a hash or
540 // an array of hashes, depending on the value of the `array` parameter.
541 //
542 // Establishing the context also makes sure that the key isn't a duplicate, and
543 // will create implicit hashes automatically.
544 func (p *parser) addContext(key Key, array bool) {
545 /// Always start at the top level and drill down for our context.
546 hashContext := p.mapping
547 keyContext := make(Key, 0, len(key)-1)
548 549 /// We only need implicit hashes for the parents.
550 for _, k := range key.parent() {
551 _, ok := hashContext[k]
552 keyContext = append(keyContext, k)
553 554 // No key? Make an implicit hash and move on.
555 if !ok {
556 p.addImplicit(keyContext)
557 hashContext[k] = make(map[string]any)
558 }
559 560 // If the hash context is actually an array of tables, then set
561 // the hash context to the last element in that array.
562 //
563 // Otherwise, it better be a table, since this MUST be a key group (by
564 // virtue of it not being the last element in a key).
565 switch t := hashContext[k].(type) {
566 case []map[string]any:
567 hashContext = t[len(t)-1]
568 case map[string]any:
569 hashContext = t
570 default:
571 p.panicf("Key '%s' was already created as a hash.", keyContext)
572 }
573 }
574 575 p.context = keyContext
576 if array {
577 // If this is the first element for this array, then allocate a new
578 // list of tables for it.
579 k := key.last()
580 if _, ok := hashContext[k]; !ok {
581 hashContext[k] = make([]map[string]any, 0, 4)
582 }
583 584 // Add a new table. But make sure the key hasn't already been used
585 // for something else.
586 if hash, ok := hashContext[k].([]map[string]any); ok {
587 hashContext[k] = append(hash, make(map[string]any))
588 } else {
589 p.panicf("Key '%s' was already created and cannot be used as an array.", key)
590 }
591 } else {
592 p.setValue(key.last(), make(map[string]any))
593 }
594 p.context = append(p.context, key.last())
595 }
596 597 // setValue sets the given key to the given value in the current context.
598 // It will make sure that the key hasn't already been defined, account for
599 // implicit key groups.
600 func (p *parser) setValue(key string, value any) {
601 var (
602 tmpHash any
603 ok bool
604 hash = p.mapping
605 keyContext = make(Key, 0, len(p.context)+1)
606 )
607 for _, k := range p.context {
608 keyContext = append(keyContext, k)
609 if tmpHash, ok = hash[k]; !ok {
610 p.bug("Context for key '%s' has not been established.", keyContext)
611 }
612 switch t := tmpHash.(type) {
613 case []map[string]any:
614 // The context is a table of hashes. Pick the most recent table
615 // defined as the current hash.
616 hash = t[len(t)-1]
617 case map[string]any:
618 hash = t
619 default:
620 p.panicf("Key '%s' has already been defined.", keyContext)
621 }
622 }
623 keyContext = append(keyContext, key)
624 625 if _, ok := hash[key]; ok {
626 // Normally redefining keys isn't allowed, but the key could have been
627 // defined implicitly and it's allowed to be redefined concretely. (See
628 // the `valid/implicit-and-explicit-after.toml` in toml-test)
629 //
630 // But we have to make sure to stop marking it as an implicit. (So that
631 // another redefinition provokes an error.)
632 //
633 // Note that since it has already been defined (as a hash), we don't
634 // want to overwrite it. So our business is done.
635 if p.isArray(keyContext) {
636 if !p.isImplicit(keyContext) {
637 if _, ok := hash[key]; ok {
638 p.panicf("Key '%s' has already been defined.", keyContext)
639 }
640 }
641 p.removeImplicit(keyContext)
642 hash[key] = value
643 return
644 }
645 if p.isImplicit(keyContext) {
646 p.removeImplicit(keyContext)
647 return
648 }
649 // Otherwise, we have a concrete key trying to override a previous key,
650 // which is *always* wrong.
651 p.panicf("Key '%s' has already been defined.", keyContext)
652 }
653 654 hash[key] = value
655 }
656 657 // setType sets the type of a particular value at a given key. It should be
658 // called immediately AFTER setValue.
659 //
660 // Note that if `key` is empty, then the type given will be applied to the
661 // current context (which is either a table or an array of tables).
662 func (p *parser) setType(key string, typ tomlType, pos Position) {
663 keyContext := make(Key, 0, len(p.context)+1)
664 keyContext = append(keyContext, p.context...)
665 if len(key) > 0 { // allow type setting for hashes
666 keyContext = append(keyContext, key)
667 }
668 // Special case to make empty keys ("" = 1) work.
669 // Without it it will set "" rather than `""`.
670 // TODO: why is this needed? And why is this only needed here?
671 if len(keyContext) == 0 {
672 keyContext = Key{""}
673 }
674 p.keyInfo[keyContext.String()] = keyInfo{tomlType: typ, pos: pos}
675 }
676 677 // Implicit keys need to be created when tables are implied in "a.b.c.d = 1" and
678 // "[a.b.c]" (the "a", "b", and "c" hashes are never created explicitly).
679 func (p *parser) addImplicit(key Key) { p.implicits[key.String()] = struct{}{} }
680 func (p *parser) removeImplicit(key Key) { delete(p.implicits, key.String()) }
681 func (p *parser) isImplicit(key Key) bool { _, ok := p.implicits[key.String()]; return ok }
682 func (p *parser) isArray(key Key) bool { return p.keyInfo[key.String()].tomlType == tomlArray }
683 func (p *parser) addImplicitContext(key Key) { p.addImplicit(key); p.addContext(key, false) }
684 685 // current returns the full key name of the current context.
686 func (p *parser) current() string {
687 if len(p.currentKey) == 0 {
688 return p.context.String()
689 }
690 if len(p.context) == 0 {
691 return p.currentKey
692 }
693 return fmt.Sprintf("%s.%s", p.context, p.currentKey)
694 }
695 696 func stripFirstNewline(s string) string {
697 if len(s) > 0 && s[0] == '\n' {
698 return s[1:]
699 }
700 if len(s) > 1 && s[0] == '\r' && s[1] == '\n' {
701 return s[2:]
702 }
703 return s
704 }
705 706 // stripEscapedNewlines removes whitespace after line-ending backslashes in
707 // multiline strings.
708 //
709 // A line-ending backslash is an unescaped \ followed only by whitespace until
710 // the next newline. After a line-ending backslash, all whitespace is removed
711 // until the next non-whitespace character.
712 func (p *parser) stripEscapedNewlines(s string) string {
713 var (
714 b strings.Builder
715 i int
716 )
717 b.Grow(len(s))
718 for {
719 ix := strings.Index(s[i:], `\`)
720 if ix < 0 {
721 b.WriteString(s)
722 return b.String()
723 }
724 i += ix
725 726 if len(s) > i+1 && s[i+1] == '\\' {
727 // Escaped backslash.
728 i += 2
729 continue
730 }
731 // Scan until the next non-whitespace.
732 j := i + 1
733 whitespaceLoop:
734 for ; j < len(s); j++ {
735 switch s[j] {
736 case ' ', '\t', '\r', '\n':
737 default:
738 break whitespaceLoop
739 }
740 }
741 if j == i+1 {
742 // Not a whitespace escape.
743 i++
744 continue
745 }
746 if !strings.Contains(s[i:j], "\n") {
747 // This is not a line-ending backslash. (It's a bad escape sequence,
748 // but we can let replaceEscapes catch it.)
749 i++
750 continue
751 }
752 b.WriteString(s[:i])
753 s = s[j:]
754 i = 0
755 }
756 }
757 758 func (p *parser) replaceEscapes(it item, str string) string {
759 var (
760 b strings.Builder
761 skip = 0
762 )
763 b.Grow(len(str))
764 for i, c := range str {
765 if skip > 0 {
766 skip--
767 continue
768 }
769 if c != '\\' {
770 b.WriteRune(c)
771 continue
772 }
773 774 if i >= len(str) {
775 p.bug("Escape sequence at end of string.")
776 return ""
777 }
778 switch str[i+1] {
779 default:
780 p.bug("Expected valid escape code after \\, but got %q.", str[i+1])
781 case ' ', '\t':
782 p.panicItemf(it, "invalid escape: '\\%c'", str[i+1])
783 case 'b':
784 b.WriteByte(0x08)
785 skip = 1
786 case 't':
787 b.WriteByte(0x09)
788 skip = 1
789 case 'n':
790 b.WriteByte(0x0a)
791 skip = 1
792 case 'f':
793 b.WriteByte(0x0c)
794 skip = 1
795 case 'r':
796 b.WriteByte(0x0d)
797 skip = 1
798 case 'e':
799 b.WriteByte(0x1b)
800 skip = 1
801 case '"':
802 b.WriteByte(0x22)
803 skip = 1
804 case '\\':
805 b.WriteByte(0x5c)
806 skip = 1
807 // The lexer guarantees the correct number of characters are present;
808 // don't need to check here.
809 case 'x':
810 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4])
811 b.WriteRune(escaped)
812 skip = 3
813 case 'u':
814 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6])
815 b.WriteRune(escaped)
816 skip = 5
817 case 'U':
818 escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10])
819 b.WriteRune(escaped)
820 skip = 9
821 }
822 }
823 return b.String()
824 }
825 826 func (p *parser) asciiEscapeToUnicode(it item, s string) rune {
827 hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
828 if err != nil {
829 p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)
830 }
831 if !utf8.ValidRune(rune(hex)) {
832 p.panicItemf(it, "Escaped character '\\u%s' is not valid UTF-8.", s)
833 }
834 return rune(hex)
835 }
836