parser.go raw
1 package parser
2
3 import (
4 "fmt"
5 "io/ioutil"
6 "strings"
7
8 "github.com/goccy/go-yaml/ast"
9 "github.com/goccy/go-yaml/internal/errors"
10 "github.com/goccy/go-yaml/lexer"
11 "github.com/goccy/go-yaml/token"
12 "golang.org/x/xerrors"
13 )
14
15 type parser struct{}
16
17 func (p *parser) parseMapping(ctx *context) (*ast.MappingNode, error) {
18 mapTk := ctx.currentToken()
19 node := ast.Mapping(mapTk, true)
20 node.SetPath(ctx.path)
21 ctx.progress(1) // skip MappingStart token
22 for ctx.next() {
23 tk := ctx.currentToken()
24 if tk.Type == token.MappingEndType {
25 node.End = tk
26 return node, nil
27 } else if tk.Type == token.CollectEntryType {
28 ctx.progress(1)
29 continue
30 }
31
32 value, err := p.parseMappingValue(ctx)
33 if err != nil {
34 return nil, errors.Wrapf(err, "failed to parse mapping value in mapping node")
35 }
36 mvnode, ok := value.(*ast.MappingValueNode)
37 if !ok {
38 return nil, errors.ErrSyntax("failed to parse flow mapping node", value.GetToken())
39 }
40 node.Values = append(node.Values, mvnode)
41 ctx.progress(1)
42 }
43 return nil, errors.ErrSyntax("unterminated flow mapping", node.GetToken())
44 }
45
46 func (p *parser) parseSequence(ctx *context) (*ast.SequenceNode, error) {
47 node := ast.Sequence(ctx.currentToken(), true)
48 node.SetPath(ctx.path)
49 ctx.progress(1) // skip SequenceStart token
50 for ctx.next() {
51 tk := ctx.currentToken()
52 if tk.Type == token.SequenceEndType {
53 node.End = tk
54 break
55 } else if tk.Type == token.CollectEntryType {
56 ctx.progress(1)
57 continue
58 }
59
60 value, err := p.parseToken(ctx.withIndex(uint(len(node.Values))), tk)
61 if err != nil {
62 return nil, errors.Wrapf(err, "failed to parse sequence value in flow sequence node")
63 }
64 node.Values = append(node.Values, value)
65 ctx.progress(1)
66 }
67 return node, nil
68 }
69
70 func (p *parser) parseTag(ctx *context) (*ast.TagNode, error) {
71 tagToken := ctx.currentToken()
72 node := ast.Tag(tagToken)
73 node.SetPath(ctx.path)
74 ctx.progress(1) // skip tag token
75 var (
76 value ast.Node
77 err error
78 )
79 switch token.ReservedTagKeyword(tagToken.Value) {
80 case token.MappingTag,
81 token.OrderedMapTag:
82 value, err = p.parseMapping(ctx)
83 case token.IntegerTag,
84 token.FloatTag,
85 token.StringTag,
86 token.BinaryTag,
87 token.TimestampTag,
88 token.NullTag:
89 typ := ctx.currentToken().Type
90 if typ == token.LiteralType || typ == token.FoldedType {
91 value, err = p.parseLiteral(ctx)
92 } else {
93 value = p.parseScalarValue(ctx.currentToken())
94 }
95 case token.SequenceTag,
96 token.SetTag:
97 err = errors.ErrSyntax(fmt.Sprintf("sorry, currently not supported %s tag", tagToken.Value), tagToken)
98 default:
99 // custom tag
100 value, err = p.parseToken(ctx, ctx.currentToken())
101 }
102 if err != nil {
103 return nil, errors.Wrapf(err, "failed to parse tag value")
104 }
105 node.Value = value
106 return node, nil
107 }
108
109 func (p *parser) removeLeftSideNewLineCharacter(src string) string {
110 // CR or LF or CRLF
111 return strings.TrimLeft(strings.TrimLeft(strings.TrimLeft(src, "\r"), "\n"), "\r\n")
112 }
113
114 func (p *parser) existsNewLineCharacter(src string) bool {
115 if strings.Index(src, "\n") > 0 {
116 return true
117 }
118 if strings.Index(src, "\r") > 0 {
119 return true
120 }
121 return false
122 }
123
124 func (p *parser) validateMapKey(tk *token.Token) error {
125 if tk.Type != token.StringType {
126 return nil
127 }
128 origin := p.removeLeftSideNewLineCharacter(tk.Origin)
129 if p.existsNewLineCharacter(origin) {
130 return errors.ErrSyntax("unexpected key name", tk)
131 }
132 return nil
133 }
134
135 func (p *parser) createNullToken(base *token.Token) *token.Token {
136 pos := *(base.Position)
137 pos.Column++
138 return token.New("null", "null", &pos)
139 }
140
141 func (p *parser) parseMapValue(ctx *context, key ast.MapKeyNode, colonToken *token.Token) (ast.Node, error) {
142 node, err := p.createMapValueNode(ctx, key, colonToken)
143 if err != nil {
144 return nil, errors.Wrapf(err, "failed to create map value node")
145 }
146 if node != nil && node.GetPath() == "" {
147 node.SetPath(ctx.path)
148 }
149 return node, nil
150 }
151
152 func (p *parser) createMapValueNode(ctx *context, key ast.MapKeyNode, colonToken *token.Token) (ast.Node, error) {
153 tk := ctx.currentToken()
154 if tk == nil {
155 nullToken := p.createNullToken(colonToken)
156 ctx.insertToken(ctx.idx, nullToken)
157 return ast.Null(nullToken), nil
158 }
159
160 if tk.Position.Column == key.GetToken().Position.Column && tk.Type == token.StringType {
161 // in this case,
162 // ----
163 // key: <value does not defined>
164 // next
165 nullToken := p.createNullToken(colonToken)
166 ctx.insertToken(ctx.idx, nullToken)
167 return ast.Null(nullToken), nil
168 }
169
170 if tk.Position.Column < key.GetToken().Position.Column {
171 // in this case,
172 // ----
173 // key: <value does not defined>
174 // next
175 nullToken := p.createNullToken(colonToken)
176 ctx.insertToken(ctx.idx, nullToken)
177 return ast.Null(nullToken), nil
178 }
179
180 value, err := p.parseToken(ctx, ctx.currentToken())
181 if err != nil {
182 return nil, errors.Wrapf(err, "failed to parse mapping 'value' node")
183 }
184 return value, nil
185 }
186
187 func (p *parser) validateMapValue(ctx *context, key, value ast.Node) error {
188 keyColumn := key.GetToken().Position.Column
189 valueColumn := value.GetToken().Position.Column
190 if keyColumn != valueColumn {
191 return nil
192 }
193 if value.Type() != ast.StringType {
194 return nil
195 }
196 ntk := ctx.nextToken()
197 if ntk == nil || (ntk.Type != token.MappingValueType && ntk.Type != token.SequenceEntryType) {
198 return errors.ErrSyntax("could not found expected ':' token", value.GetToken())
199 }
200 return nil
201 }
202
203 func (p *parser) parseMappingValue(ctx *context) (ast.Node, error) {
204 key, err := p.parseMapKey(ctx)
205 if err != nil {
206 return nil, errors.Wrapf(err, "failed to parse map key")
207 }
208 keyText := key.GetToken().Value
209 key.SetPath(ctx.withChild(keyText).path)
210 if err := p.validateMapKey(key.GetToken()); err != nil {
211 return nil, errors.Wrapf(err, "validate mapping key error")
212 }
213 ctx.progress(1) // progress to mapping value token
214 tk := ctx.currentToken() // get mapping value token
215 if tk == nil {
216 return nil, errors.ErrSyntax("unexpected map", key.GetToken())
217 }
218 ctx.progress(1) // progress to value token
219 if err := p.setSameLineCommentIfExists(ctx.withChild(keyText), key); err != nil {
220 return nil, errors.Wrapf(err, "failed to set same line comment to node")
221 }
222 if key.GetComment() != nil {
223 // if current token is comment, GetComment() is not nil.
224 // then progress to value token
225 ctx.progressIgnoreComment(1)
226 }
227
228 value, err := p.parseMapValue(ctx.withChild(keyText), key, tk)
229 if err != nil {
230 return nil, errors.Wrapf(err, "failed to parse map value")
231 }
232 if err := p.validateMapValue(ctx, key, value); err != nil {
233 return nil, errors.Wrapf(err, "failed to validate map value")
234 }
235
236 mvnode := ast.MappingValue(tk, key, value)
237 mvnode.SetPath(ctx.withChild(keyText).path)
238 node := ast.Mapping(tk, false, mvnode)
239 node.SetPath(ctx.withChild(keyText).path)
240
241 ntk := ctx.nextNotCommentToken()
242 antk := ctx.afterNextNotCommentToken()
243 for antk != nil && antk.Type == token.MappingValueType &&
244 ntk.Position.Column == key.GetToken().Position.Column {
245 ctx.progressIgnoreComment(1)
246 value, err := p.parseToken(ctx, ctx.currentToken())
247 if err != nil {
248 return nil, errors.Wrapf(err, "failed to parse mapping node")
249 }
250 switch value.Type() {
251 case ast.MappingType:
252 c := value.(*ast.MappingNode)
253 comment := c.GetComment()
254 for idx, v := range c.Values {
255 if idx == 0 && comment != nil {
256 if err := v.SetComment(comment); err != nil {
257 return nil, errors.Wrapf(err, "failed to set comment token to node")
258 }
259 }
260 node.Values = append(node.Values, v)
261 }
262 case ast.MappingValueType:
263 node.Values = append(node.Values, value.(*ast.MappingValueNode))
264 default:
265 return nil, xerrors.Errorf("failed to parse mapping value node node is %s", value.Type())
266 }
267 ntk = ctx.nextNotCommentToken()
268 antk = ctx.afterNextNotCommentToken()
269 }
270 if len(node.Values) == 1 {
271 return mvnode, nil
272 }
273 return node, nil
274 }
275
276 func (p *parser) parseSequenceEntry(ctx *context) (*ast.SequenceNode, error) {
277 tk := ctx.currentToken()
278 sequenceNode := ast.Sequence(tk, false)
279 sequenceNode.SetPath(ctx.path)
280 curColumn := tk.Position.Column
281 for tk.Type == token.SequenceEntryType {
282 ctx.progress(1) // skip sequence token
283 tk = ctx.currentToken()
284 if tk == nil {
285 return nil, errors.ErrSyntax("empty sequence entry", ctx.previousToken())
286 }
287 var comment *ast.CommentGroupNode
288 if tk.Type == token.CommentType {
289 comment = p.parseCommentOnly(ctx)
290 tk = ctx.currentToken()
291 if tk.Type != token.SequenceEntryType {
292 break
293 }
294 ctx.progress(1) // skip sequence token
295 }
296 value, err := p.parseToken(ctx.withIndex(uint(len(sequenceNode.Values))), ctx.currentToken())
297 if err != nil {
298 return nil, errors.Wrapf(err, "failed to parse sequence")
299 }
300 if comment != nil {
301 comment.SetPath(ctx.withIndex(uint(len(sequenceNode.Values))).path)
302 sequenceNode.ValueComments = append(sequenceNode.ValueComments, comment)
303 } else {
304 sequenceNode.ValueComments = append(sequenceNode.ValueComments, nil)
305 }
306 sequenceNode.Values = append(sequenceNode.Values, value)
307 tk = ctx.nextNotCommentToken()
308 if tk == nil {
309 break
310 }
311 if tk.Type != token.SequenceEntryType {
312 break
313 }
314 if tk.Position.Column != curColumn {
315 break
316 }
317 ctx.progressIgnoreComment(1)
318 }
319 return sequenceNode, nil
320 }
321
322 func (p *parser) parseAnchor(ctx *context) (*ast.AnchorNode, error) {
323 tk := ctx.currentToken()
324 anchor := ast.Anchor(tk)
325 anchor.SetPath(ctx.path)
326 ntk := ctx.nextToken()
327 if ntk == nil {
328 return nil, errors.ErrSyntax("unexpected anchor. anchor name is undefined", tk)
329 }
330 ctx.progress(1) // skip anchor token
331 name, err := p.parseToken(ctx, ctx.currentToken())
332 if err != nil {
333 return nil, errors.Wrapf(err, "failed to parser anchor name node")
334 }
335 anchor.Name = name
336 ntk = ctx.nextToken()
337 if ntk == nil {
338 return nil, errors.ErrSyntax("unexpected anchor. anchor value is undefined", ctx.currentToken())
339 }
340 ctx.progress(1)
341 value, err := p.parseToken(ctx, ctx.currentToken())
342 if err != nil {
343 return nil, errors.Wrapf(err, "failed to parser anchor name node")
344 }
345 anchor.Value = value
346 return anchor, nil
347 }
348
349 func (p *parser) parseAlias(ctx *context) (*ast.AliasNode, error) {
350 tk := ctx.currentToken()
351 alias := ast.Alias(tk)
352 alias.SetPath(ctx.path)
353 ntk := ctx.nextToken()
354 if ntk == nil {
355 return nil, errors.ErrSyntax("unexpected alias. alias name is undefined", tk)
356 }
357 ctx.progress(1) // skip alias token
358 name, err := p.parseToken(ctx, ctx.currentToken())
359 if err != nil {
360 return nil, errors.Wrapf(err, "failed to parser alias name node")
361 }
362 alias.Value = name
363 return alias, nil
364 }
365
366 func (p *parser) parseMapKey(ctx *context) (ast.MapKeyNode, error) {
367 tk := ctx.currentToken()
368 if value := p.parseScalarValue(tk); value != nil {
369 return value, nil
370 }
371 switch tk.Type {
372 case token.MergeKeyType:
373 return ast.MergeKey(tk), nil
374 case token.MappingKeyType:
375 return p.parseMappingKey(ctx)
376 }
377 return nil, errors.ErrSyntax("unexpected mapping key", tk)
378 }
379
380 func (p *parser) parseStringValue(tk *token.Token) *ast.StringNode {
381 switch tk.Type {
382 case token.StringType,
383 token.SingleQuoteType,
384 token.DoubleQuoteType:
385 return ast.String(tk)
386 }
387 return nil
388 }
389
390 func (p *parser) parseScalarValueWithComment(ctx *context, tk *token.Token) (ast.ScalarNode, error) {
391 node := p.parseScalarValue(tk)
392 if node == nil {
393 return nil, nil
394 }
395 node.SetPath(ctx.path)
396 if p.isSameLineComment(ctx.nextToken(), node) {
397 ctx.progress(1)
398 if err := p.setSameLineCommentIfExists(ctx, node); err != nil {
399 return nil, errors.Wrapf(err, "failed to set same line comment to node")
400 }
401 }
402 return node, nil
403 }
404
405 func (p *parser) parseScalarValue(tk *token.Token) ast.ScalarNode {
406 if node := p.parseStringValue(tk); node != nil {
407 return node
408 }
409 switch tk.Type {
410 case token.NullType:
411 return ast.Null(tk)
412 case token.BoolType:
413 return ast.Bool(tk)
414 case token.IntegerType,
415 token.BinaryIntegerType,
416 token.OctetIntegerType,
417 token.HexIntegerType:
418 return ast.Integer(tk)
419 case token.FloatType:
420 return ast.Float(tk)
421 case token.InfinityType:
422 return ast.Infinity(tk)
423 case token.NanType:
424 return ast.Nan(tk)
425 }
426 return nil
427 }
428
429 func (p *parser) parseDirective(ctx *context) (*ast.DirectiveNode, error) {
430 node := ast.Directive(ctx.currentToken())
431 ctx.progress(1) // skip directive token
432 value, err := p.parseToken(ctx, ctx.currentToken())
433 if err != nil {
434 return nil, errors.Wrapf(err, "failed to parse directive value")
435 }
436 node.Value = value
437 ctx.progress(1)
438 tk := ctx.currentToken()
439 if tk == nil {
440 // Since current token is nil, use the previous token to specify
441 // the syntax error location.
442 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.previousToken())
443 }
444 if tk.Type != token.DocumentHeaderType {
445 return nil, errors.ErrSyntax("unexpected directive value. document not started", ctx.currentToken())
446 }
447 return node, nil
448 }
449
450 func (p *parser) parseLiteral(ctx *context) (*ast.LiteralNode, error) {
451 node := ast.Literal(ctx.currentToken())
452 ctx.progress(1) // skip literal/folded token
453
454 tk := ctx.currentToken()
455 var comment *ast.CommentGroupNode
456 if tk.Type == token.CommentType {
457 comment = p.parseCommentOnly(ctx)
458 comment.SetPath(ctx.path)
459 if err := node.SetComment(comment); err != nil {
460 return nil, errors.Wrapf(err, "failed to set comment to literal")
461 }
462 tk = ctx.currentToken()
463 }
464 value, err := p.parseToken(ctx, tk)
465 if err != nil {
466 return nil, errors.Wrapf(err, "failed to parse literal/folded value")
467 }
468 snode, ok := value.(*ast.StringNode)
469 if !ok {
470 return nil, errors.ErrSyntax("unexpected token. required string token", value.GetToken())
471 }
472 node.Value = snode
473 return node, nil
474 }
475
476 func (p *parser) isSameLineComment(tk *token.Token, node ast.Node) bool {
477 if tk == nil {
478 return false
479 }
480 if tk.Type != token.CommentType {
481 return false
482 }
483 return tk.Position.Line == node.GetToken().Position.Line
484 }
485
486 func (p *parser) setSameLineCommentIfExists(ctx *context, node ast.Node) error {
487 tk := ctx.currentToken()
488 if !p.isSameLineComment(tk, node) {
489 return nil
490 }
491 comment := ast.CommentGroup([]*token.Token{tk})
492 comment.SetPath(ctx.path)
493 if err := node.SetComment(comment); err != nil {
494 return errors.Wrapf(err, "failed to set comment token to ast.Node")
495 }
496 return nil
497 }
498
499 func (p *parser) parseDocument(ctx *context) (*ast.DocumentNode, error) {
500 startTk := ctx.currentToken()
501 ctx.progress(1) // skip document header token
502 body, err := p.parseToken(ctx, ctx.currentToken())
503 if err != nil {
504 return nil, errors.Wrapf(err, "failed to parse document body")
505 }
506 node := ast.Document(startTk, body)
507 if ntk := ctx.nextToken(); ntk != nil && ntk.Type == token.DocumentEndType {
508 node.End = ntk
509 ctx.progress(1)
510 }
511 return node, nil
512 }
513
514 func (p *parser) parseCommentOnly(ctx *context) *ast.CommentGroupNode {
515 commentTokens := []*token.Token{}
516 for {
517 tk := ctx.currentToken()
518 if tk == nil {
519 break
520 }
521 if tk.Type != token.CommentType {
522 break
523 }
524 commentTokens = append(commentTokens, tk)
525 ctx.progressIgnoreComment(1) // skip comment token
526 }
527 return ast.CommentGroup(commentTokens)
528 }
529
530 func (p *parser) parseComment(ctx *context) (ast.Node, error) {
531 group := p.parseCommentOnly(ctx)
532 node, err := p.parseToken(ctx, ctx.currentToken())
533 if err != nil {
534 return nil, errors.Wrapf(err, "failed to parse node after comment")
535 }
536 if node == nil {
537 return group, nil
538 }
539 group.SetPath(node.GetPath())
540 if err := node.SetComment(group); err != nil {
541 return nil, errors.Wrapf(err, "failed to set comment token to node")
542 }
543 return node, nil
544 }
545
546 func (p *parser) parseMappingKey(ctx *context) (*ast.MappingKeyNode, error) {
547 keyTk := ctx.currentToken()
548 node := ast.MappingKey(keyTk)
549 node.SetPath(ctx.path)
550 ctx.progress(1) // skip mapping key token
551 value, err := p.parseToken(ctx.withChild(keyTk.Value), ctx.currentToken())
552 if err != nil {
553 return nil, errors.Wrapf(err, "failed to parse map key")
554 }
555 node.Value = value
556 return node, nil
557 }
558
559 func (p *parser) parseToken(ctx *context, tk *token.Token) (ast.Node, error) {
560 node, err := p.createNodeFromToken(ctx, tk)
561 if err != nil {
562 return nil, errors.Wrapf(err, "failed to create node from token")
563 }
564 if node != nil && node.GetPath() == "" {
565 node.SetPath(ctx.path)
566 }
567 return node, nil
568 }
569
570 func (p *parser) createNodeFromToken(ctx *context, tk *token.Token) (ast.Node, error) {
571 if tk == nil {
572 return nil, nil
573 }
574 if tk.NextType() == token.MappingValueType {
575 node, err := p.parseMappingValue(ctx)
576 return node, err
577 }
578 node, err := p.parseScalarValueWithComment(ctx, tk)
579 if err != nil {
580 return nil, errors.Wrapf(err, "failed to parse scalar value")
581 }
582 if node != nil {
583 return node, nil
584 }
585 switch tk.Type {
586 case token.CommentType:
587 return p.parseComment(ctx)
588 case token.MappingKeyType:
589 return p.parseMappingKey(ctx)
590 case token.DocumentHeaderType:
591 return p.parseDocument(ctx)
592 case token.MappingStartType:
593 return p.parseMapping(ctx)
594 case token.SequenceStartType:
595 return p.parseSequence(ctx)
596 case token.SequenceEntryType:
597 return p.parseSequenceEntry(ctx)
598 case token.AnchorType:
599 return p.parseAnchor(ctx)
600 case token.AliasType:
601 return p.parseAlias(ctx)
602 case token.DirectiveType:
603 return p.parseDirective(ctx)
604 case token.TagType:
605 return p.parseTag(ctx)
606 case token.LiteralType, token.FoldedType:
607 return p.parseLiteral(ctx)
608 }
609 return nil, nil
610 }
611
612 func (p *parser) parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
613 ctx := newContext(tokens, mode)
614 file := &ast.File{Docs: []*ast.DocumentNode{}}
615 for ctx.next() {
616 node, err := p.parseToken(ctx, ctx.currentToken())
617 if err != nil {
618 return nil, errors.Wrapf(err, "failed to parse")
619 }
620 ctx.progressIgnoreComment(1)
621 if node == nil {
622 continue
623 }
624 if doc, ok := node.(*ast.DocumentNode); ok {
625 file.Docs = append(file.Docs, doc)
626 } else {
627 file.Docs = append(file.Docs, ast.Document(nil, node))
628 }
629 }
630 return file, nil
631 }
632
633 type Mode uint
634
635 const (
636 ParseComments Mode = 1 << iota // parse comments and add them to AST
637 )
638
639 // ParseBytes parse from byte slice, and returns ast.File
640 func ParseBytes(bytes []byte, mode Mode) (*ast.File, error) {
641 tokens := lexer.Tokenize(string(bytes))
642 f, err := Parse(tokens, mode)
643 if err != nil {
644 return nil, errors.Wrapf(err, "failed to parse")
645 }
646 return f, nil
647 }
648
649 // Parse parse from token instances, and returns ast.File
650 func Parse(tokens token.Tokens, mode Mode) (*ast.File, error) {
651 var p parser
652 f, err := p.parse(tokens, mode)
653 if err != nil {
654 return nil, errors.Wrapf(err, "failed to parse")
655 }
656 return f, nil
657 }
658
659 // Parse parse from filename, and returns ast.File
660 func ParseFile(filename string, mode Mode) (*ast.File, error) {
661 file, err := ioutil.ReadFile(filename)
662 if err != nil {
663 return nil, errors.Wrapf(err, "failed to read file: %s", filename)
664 }
665 f, err := ParseBytes(file, mode)
666 if err != nil {
667 return nil, errors.Wrapf(err, "failed to parse")
668 }
669 f.Name = filename
670 return f, nil
671 }
672