parser.go raw

   1  package parser
   2  
   3  import (
   4  	"errors"
   5  	"fmt"
   6  
   7  	"github.com/hashicorp/hcl/hcl/ast"
   8  	hcltoken "github.com/hashicorp/hcl/hcl/token"
   9  	"github.com/hashicorp/hcl/json/scanner"
  10  	"github.com/hashicorp/hcl/json/token"
  11  )
  12  
  13  type Parser struct {
  14  	sc *scanner.Scanner
  15  
  16  	// Last read token
  17  	tok       token.Token
  18  	commaPrev token.Token
  19  
  20  	enableTrace bool
  21  	indent      int
  22  	n           int // buffer size (max = 1)
  23  }
  24  
  25  func newParser(src []byte) *Parser {
  26  	return &Parser{
  27  		sc: scanner.New(src),
  28  	}
  29  }
  30  
  31  // Parse returns the fully parsed source and returns the abstract syntax tree.
  32  func Parse(src []byte) (*ast.File, error) {
  33  	p := newParser(src)
  34  	return p.Parse()
  35  }
  36  
  37  var errEofToken = errors.New("EOF token found")
  38  
  39  // Parse returns the fully parsed source and returns the abstract syntax tree.
  40  func (p *Parser) Parse() (*ast.File, error) {
  41  	f := &ast.File{}
  42  	var err, scerr error
  43  	p.sc.Error = func(pos token.Pos, msg string) {
  44  		scerr = fmt.Errorf("%s: %s", pos, msg)
  45  	}
  46  
  47  	// The root must be an object in JSON
  48  	object, err := p.object()
  49  	if scerr != nil {
  50  		return nil, scerr
  51  	}
  52  	if err != nil {
  53  		return nil, err
  54  	}
  55  
  56  	// We make our final node an object list so it is more HCL compatible
  57  	f.Node = object.List
  58  
  59  	// Flatten it, which finds patterns and turns them into more HCL-like
  60  	// AST trees.
  61  	flattenObjects(f.Node)
  62  
  63  	return f, nil
  64  }
  65  
  66  func (p *Parser) objectList() (*ast.ObjectList, error) {
  67  	defer un(trace(p, "ParseObjectList"))
  68  	node := &ast.ObjectList{}
  69  
  70  	for {
  71  		n, err := p.objectItem()
  72  		if err == errEofToken {
  73  			break // we are finished
  74  		}
  75  
  76  		// we don't return a nil node, because might want to use already
  77  		// collected items.
  78  		if err != nil {
  79  			return node, err
  80  		}
  81  
  82  		node.Add(n)
  83  
  84  		// Check for a followup comma. If it isn't a comma, then we're done
  85  		if tok := p.scan(); tok.Type != token.COMMA {
  86  			break
  87  		}
  88  	}
  89  
  90  	return node, nil
  91  }
  92  
  93  // objectItem parses a single object item
  94  func (p *Parser) objectItem() (*ast.ObjectItem, error) {
  95  	defer un(trace(p, "ParseObjectItem"))
  96  
  97  	keys, err := p.objectKey()
  98  	if err != nil {
  99  		return nil, err
 100  	}
 101  
 102  	o := &ast.ObjectItem{
 103  		Keys: keys,
 104  	}
 105  
 106  	switch p.tok.Type {
 107  	case token.COLON:
 108  		pos := p.tok.Pos
 109  		o.Assign = hcltoken.Pos{
 110  			Filename: pos.Filename,
 111  			Offset:   pos.Offset,
 112  			Line:     pos.Line,
 113  			Column:   pos.Column,
 114  		}
 115  
 116  		o.Val, err = p.objectValue()
 117  		if err != nil {
 118  			return nil, err
 119  		}
 120  	}
 121  
 122  	return o, nil
 123  }
 124  
 125  // objectKey parses an object key and returns a ObjectKey AST
 126  func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
 127  	keyCount := 0
 128  	keys := make([]*ast.ObjectKey, 0)
 129  
 130  	for {
 131  		tok := p.scan()
 132  		switch tok.Type {
 133  		case token.EOF:
 134  			return nil, errEofToken
 135  		case token.STRING:
 136  			keyCount++
 137  			keys = append(keys, &ast.ObjectKey{
 138  				Token: p.tok.HCLToken(),
 139  			})
 140  		case token.COLON:
 141  			// If we have a zero keycount it means that we never got
 142  			// an object key, i.e. `{ :`. This is a syntax error.
 143  			if keyCount == 0 {
 144  				return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
 145  			}
 146  
 147  			// Done
 148  			return keys, nil
 149  		case token.ILLEGAL:
 150  			return nil, errors.New("illegal")
 151  		default:
 152  			return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
 153  		}
 154  	}
 155  }
 156  
 157  // object parses any type of object, such as number, bool, string, object or
 158  // list.
 159  func (p *Parser) objectValue() (ast.Node, error) {
 160  	defer un(trace(p, "ParseObjectValue"))
 161  	tok := p.scan()
 162  
 163  	switch tok.Type {
 164  	case token.NUMBER, token.FLOAT, token.BOOL, token.NULL, token.STRING:
 165  		return p.literalType()
 166  	case token.LBRACE:
 167  		return p.objectType()
 168  	case token.LBRACK:
 169  		return p.listType()
 170  	case token.EOF:
 171  		return nil, errEofToken
 172  	}
 173  
 174  	return nil, fmt.Errorf("Expected object value, got unknown token: %+v", tok)
 175  }
 176  
 177  // object parses any type of object, such as number, bool, string, object or
 178  // list.
 179  func (p *Parser) object() (*ast.ObjectType, error) {
 180  	defer un(trace(p, "ParseType"))
 181  	tok := p.scan()
 182  
 183  	switch tok.Type {
 184  	case token.LBRACE:
 185  		return p.objectType()
 186  	case token.EOF:
 187  		return nil, errEofToken
 188  	}
 189  
 190  	return nil, fmt.Errorf("Expected object, got unknown token: %+v", tok)
 191  }
 192  
 193  // objectType parses an object type and returns a ObjectType AST
 194  func (p *Parser) objectType() (*ast.ObjectType, error) {
 195  	defer un(trace(p, "ParseObjectType"))
 196  
 197  	// we assume that the currently scanned token is a LBRACE
 198  	o := &ast.ObjectType{}
 199  
 200  	l, err := p.objectList()
 201  
 202  	// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
 203  	// not a RBRACE, it's an syntax error and we just return it.
 204  	if err != nil && p.tok.Type != token.RBRACE {
 205  		return nil, err
 206  	}
 207  
 208  	o.List = l
 209  	return o, nil
 210  }
 211  
 212  // listType parses a list type and returns a ListType AST
 213  func (p *Parser) listType() (*ast.ListType, error) {
 214  	defer un(trace(p, "ParseListType"))
 215  
 216  	// we assume that the currently scanned token is a LBRACK
 217  	l := &ast.ListType{}
 218  
 219  	for {
 220  		tok := p.scan()
 221  		switch tok.Type {
 222  		case token.NUMBER, token.FLOAT, token.STRING:
 223  			node, err := p.literalType()
 224  			if err != nil {
 225  				return nil, err
 226  			}
 227  
 228  			l.Add(node)
 229  		case token.COMMA:
 230  			continue
 231  		case token.LBRACE:
 232  			node, err := p.objectType()
 233  			if err != nil {
 234  				return nil, err
 235  			}
 236  
 237  			l.Add(node)
 238  		case token.BOOL:
 239  			// TODO(arslan) should we support? not supported by HCL yet
 240  		case token.LBRACK:
 241  			// TODO(arslan) should we support nested lists? Even though it's
 242  			// written in README of HCL, it's not a part of the grammar
 243  			// (not defined in parse.y)
 244  		case token.RBRACK:
 245  			// finished
 246  			return l, nil
 247  		default:
 248  			return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type)
 249  		}
 250  
 251  	}
 252  }
 253  
 254  // literalType parses a literal type and returns a LiteralType AST
 255  func (p *Parser) literalType() (*ast.LiteralType, error) {
 256  	defer un(trace(p, "ParseLiteral"))
 257  
 258  	return &ast.LiteralType{
 259  		Token: p.tok.HCLToken(),
 260  	}, nil
 261  }
 262  
 263  // scan returns the next token from the underlying scanner. If a token has
 264  // been unscanned then read that instead.
 265  func (p *Parser) scan() token.Token {
 266  	// If we have a token on the buffer, then return it.
 267  	if p.n != 0 {
 268  		p.n = 0
 269  		return p.tok
 270  	}
 271  
 272  	p.tok = p.sc.Scan()
 273  	return p.tok
 274  }
 275  
 276  // unscan pushes the previously read token back onto the buffer.
 277  func (p *Parser) unscan() {
 278  	p.n = 1
 279  }
 280  
 281  // ----------------------------------------------------------------------------
 282  // Parsing support
 283  
 284  func (p *Parser) printTrace(a ...interface{}) {
 285  	if !p.enableTrace {
 286  		return
 287  	}
 288  
 289  	const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
 290  	const n = len(dots)
 291  	fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
 292  
 293  	i := 2 * p.indent
 294  	for i > n {
 295  		fmt.Print(dots)
 296  		i -= n
 297  	}
 298  	// i <= n
 299  	fmt.Print(dots[0:i])
 300  	fmt.Println(a...)
 301  }
 302  
 303  func trace(p *Parser, msg string) *Parser {
 304  	p.printTrace(msg, "(")
 305  	p.indent++
 306  	return p
 307  }
 308  
 309  // Usage pattern: defer un(trace(p, "..."))
 310  func un(p *Parser) {
 311  	p.indent--
 312  	p.printTrace(")")
 313  }
 314