scanner.go raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // This file implements scanner, a lexical tokenizer for
   6  // Go source. After initialization, consecutive calls of
   7  // next advance the scanner one token at a time.
   8  //
   9  // This file, source.go, tokens.go, and token_string.go are self-contained
  10  // (`go tool compile scanner.go source.go tokens.go token_string.go` compiles)
  11  // and thus could be made into their own package.
  12  
  13  package syntax
  14  
  15  import (
  16  	"fmt"
  17  	"io"
  18  	"unicode"
  19  	"unicode/utf8"
  20  )
  21  
  22  // The mode flags below control which comments are reported
  23  // by calling the error handler. If no flag is set, comments
  24  // are ignored.
  25  const (
  26  	comments   uint = 1 << iota // call handler for all comments
  27  	directives                  // call handler for directives only
  28  )
  29  
  30  type Scanner struct {
  31  	Source
  32  	mode   uint
  33  	nlsemi bool // if set '\n' and EOF translate to ';'
  34  
  35  	// current token, valid after calling next()
  36  	Line, Col uint32
  37  	Blank     bool // line is blank up to col
  38  	Tok       Token
  39  	Lit       string   // valid if tok is _Name, _Literal, or _Semi ("semicolon", "newline", or "EOF"); may be malformed if bad is true
  40  	Bad       bool     // valid if tok is _Literal, true if a syntax error occurred, lit may be malformed
  41  	Kind      LitKind  // valid if tok is _Literal
  42  	Op        Operator // valid if tok is _Operator, _Star, _AssignOp, or _IncOp
  43  	Prec      int32      // valid if tok is _Operator, _Star, _AssignOp, or _IncOp
  44  }
  45  
  46  func (s *Scanner) Init(src io.Reader, errh func(line, col uint32, msg string), mode uint) {
  47  	s.Source.init(src, errh)
  48  	s.mode = mode
  49  	s.nlsemi = false
  50  }
  51  
  52  // errorf reports an error at the most recently read character position.
  53  func (s *Scanner) Errorf(format string, args ...interface{}) {
  54  	s.error(fmt.Sprintf(format, args...))
  55  }
  56  
  57  // errorAtf reports an error at a byte column offset relative to the current token start.
  58  func (s *Scanner) ErrorAtf(offset int32, format string, args ...interface{}) {
  59  	s.errh(s.line, s.col+uint32(offset), fmt.Sprintf(format, args...))
  60  }
  61  
  62  // setLit sets the scanner state for a recognized _Literal token.
  63  func (s *Scanner) SetLit(kind LitKind, ok bool) {
  64  	s.nlsemi = true
  65  	s.Tok = Literal
  66  	s.Lit = string(s.segment())
  67  	s.Bad = !ok
  68  	s.Kind = kind
  69  }
  70  
  71  // next advances the scanner by reading the next token.
  72  //
  73  // If a read, source encoding, or lexical error occurs, next calls
  74  // the installed error handler with the respective error position
  75  // and message. The error message is guaranteed to be non-empty and
  76  // never starts with a '/'. The error handler must exist.
  77  //
  78  // If the scanner mode includes the comments flag and a comment
  79  // (including comments containing directives) is encountered, the
  80  // error handler is also called with each comment position and text
  81  // (including opening /* or // and closing */, but without a newline
  82  // at the end of line comments). Comment text always starts with a /
  83  // which can be used to distinguish these handler calls from errors.
  84  //
  85  // If the scanner mode includes the directives (but not the comments)
  86  // flag, only comments containing a //line, /*line, or //go: directive
  87  // are reported, in the same way as regular comments.
  88  func (s *Scanner) Next() {
  89  	nlsemi := s.nlsemi
  90  	s.nlsemi = false
  91  
  92  redo:
  93  	// skip white space
  94  	s.stop()
  95  	startLine, startCol := s.pos()
  96  	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !nlsemi || s.ch == '\r' {
  97  		s.nextch()
  98  	}
  99  
 100  	// token start
 101  	s.Line, s.Col = s.pos()
 102  	s.Blank = s.line > startLine || startCol == Colbase
 103  	s.start()
 104  	if IsLetter(s.ch) || s.ch >= utf8.RuneSelf && s.AtIdentChar(true) {
 105  		s.nextch()
 106  		s.Ident()
 107  		return
 108  	}
 109  
 110  	switch s.ch {
 111  	case -1:
 112  		if nlsemi {
 113  			s.Lit = "EOF"
 114  			s.Tok = Semi
 115  			break
 116  		}
 117  		s.Tok =EOF
 118  
 119  	case '\n':
 120  		s.nextch()
 121  		s.Lit = "newline"
 122  		s.Tok = Semi
 123  
 124  	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 125  		s.Number(false)
 126  
 127  	case '"':
 128  		s.stdString()
 129  
 130  	case '`':
 131  		s.rawString()
 132  
 133  	case '\'':
 134  		s.rune()
 135  
 136  	case '(':
 137  		s.nextch()
 138  		s.Tok = Lparen
 139  
 140  	case '[':
 141  		s.nextch()
 142  		s.Tok = Lbrack
 143  
 144  	case '{':
 145  		s.nextch()
 146  		s.Tok = Lbrace
 147  
 148  	case ',':
 149  		s.nextch()
 150  		s.Tok = Comma
 151  
 152  	case ';':
 153  		s.nextch()
 154  		s.Lit = "semicolon"
 155  		s.Tok = Semi
 156  
 157  	case ')':
 158  		s.nextch()
 159  		s.nlsemi = true
 160  		s.Tok = Rparen
 161  
 162  	case ']':
 163  		s.nextch()
 164  		s.nlsemi = true
 165  		s.Tok = Rbrack
 166  
 167  	case '}':
 168  		s.nextch()
 169  		s.nlsemi = true
 170  		s.Tok = Rbrace
 171  
 172  	case ':':
 173  		s.nextch()
 174  		if s.ch == '=' {
 175  			s.nextch()
 176  			s.Tok = Define
 177  			break
 178  		}
 179  		s.Tok = Colon
 180  
 181  	case '.':
 182  		s.nextch()
 183  		if IsDecimal(s.ch) {
 184  			s.Number(true)
 185  			break
 186  		}
 187  		if s.ch == '.' {
 188  			s.nextch()
 189  			if s.ch == '.' {
 190  				s.nextch()
 191  				s.Tok = DotDotDot
 192  				break
 193  			}
 194  			s.rewind() // now s.ch holds 1st '.'
 195  			s.nextch() // consume 1st '.' again
 196  		}
 197  		s.Tok = Dot
 198  
 199  	case '+':
 200  		s.nextch()
 201  		s.Op, s.Prec = Add, PrecAdd
 202  		if s.ch != '+' {
 203  			goto assignop
 204  		}
 205  		s.nextch()
 206  		s.nlsemi = true
 207  		s.Tok = IncOp
 208  
 209  	case '-':
 210  		s.nextch()
 211  		s.Op, s.Prec = Sub, PrecAdd
 212  		if s.ch != '-' {
 213  			goto assignop
 214  		}
 215  		s.nextch()
 216  		s.nlsemi = true
 217  		s.Tok = IncOp
 218  
 219  	case '*':
 220  		s.nextch()
 221  		s.Op, s.Prec = Mul, PrecMul
 222  		// don't goto assignop - want _Star token
 223  		if s.ch == '=' {
 224  			s.nextch()
 225  			s.Tok = AssignOp
 226  			break
 227  		}
 228  		s.Tok = Star
 229  
 230  	case '/':
 231  		s.nextch()
 232  		if s.ch == '/' {
 233  			s.nextch()
 234  			s.lineComment()
 235  			goto redo
 236  		}
 237  		if s.ch == '*' {
 238  			s.nextch()
 239  			s.fullComment()
 240  			if line, _ := s.pos(); line > s.Line && nlsemi {
 241  				// A multi-line comment acts like a newline;
 242  				// it translates to a ';' if nlsemi is set.
 243  				s.Lit = "newline"
 244  				s.Tok = Semi
 245  				break
 246  			}
 247  			goto redo
 248  		}
 249  		s.Op, s.Prec = Div, PrecMul
 250  		goto assignop
 251  
 252  	case '%':
 253  		s.nextch()
 254  		s.Op, s.Prec = Rem, PrecMul
 255  		goto assignop
 256  
 257  	case '&':
 258  		s.nextch()
 259  		if s.ch == '&' {
 260  			s.nextch()
 261  			s.Op, s.Prec = AndAnd, PrecAndAnd
 262  			s.Tok = OperatorType
 263  			break
 264  		}
 265  		s.Op, s.Prec = And, PrecMul
 266  		if s.ch == '^' {
 267  			s.nextch()
 268  			s.Op = AndNot
 269  		}
 270  		goto assignop
 271  
 272  	case '|':
 273  		s.nextch()
 274  		if s.ch == '|' {
 275  			s.nextch()
 276  			s.Op, s.Prec = OrOr, PrecOrOr
 277  			s.Tok = OperatorType
 278  			break
 279  		}
 280  		s.Op, s.Prec = Or, PrecAdd
 281  		goto assignop
 282  
 283  	case '^':
 284  		s.nextch()
 285  		s.Op, s.Prec = Xor, PrecAdd
 286  		goto assignop
 287  
 288  	case '<':
 289  		s.nextch()
 290  		if s.ch == '=' {
 291  			s.nextch()
 292  			s.Op, s.Prec = Leq, PrecCmp
 293  			s.Tok = OperatorType
 294  			break
 295  		}
 296  		if s.ch == '<' {
 297  			s.nextch()
 298  			s.Op, s.Prec = Shl, PrecMul
 299  			goto assignop
 300  		}
 301  		if s.ch == '-' {
 302  			s.nextch()
 303  			s.Tok = Arrow
 304  			break
 305  		}
 306  		s.Op, s.Prec = Lss, PrecCmp
 307  		s.Tok = OperatorType
 308  
 309  	case '>':
 310  		s.nextch()
 311  		if s.ch == '=' {
 312  			s.nextch()
 313  			s.Op, s.Prec = Geq, PrecCmp
 314  			s.Tok =OperatorType
 315  			break
 316  		}
 317  		if s.ch == '>' {
 318  			s.nextch()
 319  			s.Op, s.Prec = Shr, PrecMul
 320  			goto assignop
 321  		}
 322  		s.Op, s.Prec = Gtr, PrecCmp
 323  		s.Tok = OperatorType
 324  
 325  	case '=':
 326  		s.nextch()
 327  		if s.ch == '=' {
 328  			s.nextch()
 329  			s.Op, s.Prec = Eql, PrecCmp
 330  			s.Tok = OperatorType
 331  			break
 332  		}
 333  		s.Tok = Assign
 334  
 335  	case '!':
 336  		s.nextch()
 337  		if s.ch == '=' {
 338  			s.nextch()
 339  			s.Op, s.Prec = Neq, PrecCmp
 340  			s.Tok = OperatorType
 341  			break
 342  		}
 343  		s.Op, s.Prec = Not, 0
 344  		s.Tok = OperatorType
 345  
 346  	case '~':
 347  		s.nextch()
 348  		s.Op, s.Prec = Tilde, 0
 349  		s.Tok = OperatorType
 350  
 351  	default:
 352  		s.Errorf("invalid character %#U", s.ch)
 353  		s.nextch()
 354  		goto redo
 355  	}
 356  
 357  	return
 358  
 359  assignop:
 360  	if s.ch == '=' {
 361  		s.nextch()
 362  		s.Tok = AssignOp
 363  		return
 364  	}
 365  	s.Tok = OperatorType
 366  }
 367  
 368  func (s *Scanner) Ident() {
 369  	// accelerate common case (7bit ASCII)
 370  	for IsLetter(s.ch) || IsDecimal(s.ch) {
 371  		s.nextch()
 372  	}
 373  
 374  	// general case
 375  	if s.ch >= utf8.RuneSelf {
 376  		for s.AtIdentChar(false) {
 377  			s.nextch()
 378  		}
 379  	}
 380  
 381  	// possibly a keyword
 382  	lit := s.segment()
 383  	if len(lit) >= 2 {
 384  		if tok := keywordMap[Hash(lit)]; tok != 0 && TokStrFast(tok) == string(lit) {
 385  			s.nlsemi = contains(1<<Break|1<<Continue|1<<Fallthrough|1<<Return, tok)
 386  			s.Tok = tok
 387  			return
 388  		}
 389  	}
 390  
 391  	s.nlsemi = true
 392  	s.Lit = string(lit)
 393  	s.Tok = NameType
 394  }
 395  
 396  // tokStrFast is a faster version of token.String, which assumes that tok
 397  // is one of the valid tokens - and can thus skip bounds checks.
 398  func TokStrFast(tok Token) string {
 399  	return token_name[token_index[tok-1]:token_index[tok]]
 400  }
 401  
 402  func (s *Scanner) AtIdentChar(first bool) bool {
 403  	switch {
 404  	case unicode.IsLetter(s.ch) || s.ch == '_':
 405  		// ok
 406  	case unicode.IsDigit(s.ch):
 407  		if first {
 408  			s.Errorf("identifier cannot begin with digit %#U", s.ch)
 409  		}
 410  	case s.ch >= utf8.RuneSelf:
 411  		s.Errorf("invalid character %#U in identifier", s.ch)
 412  	default:
 413  		return false
 414  	}
 415  	return true
 416  }
 417  
 418  // hash is a perfect hash function for keywords.
 419  // It assumes that s has at least length 2.
 420  func Hash(s []byte) uint {
 421  	return (uint(s[0])<<4 ^ uint(s[1]) + uint(len(s))) & uint(len(keywordMap)-1)
 422  }
 423  
 424  var keywordMap [1 << 6]Token // size must be power of two
 425  
 426  func init() { Init() }
 427  
 428  func Init() {
 429  	// populate keywordMap
 430  	for tok := Break; tok <= Var; tok++ {
 431  		h := Hash([]byte(tok.String()))
 432  		if keywordMap[h] != 0 {
 433  			panic("imperfect hash")
 434  		}
 435  		keywordMap[h] = tok
 436  	}
 437  }
 438  
 439  func Lower(ch rune) rune     { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
 440  func IsLetter(ch rune) bool  { return 'a' <= Lower(ch) && Lower(ch) <= 'z' || ch == '_' }
 441  func IsDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
 442  func IsHex(ch rune) bool     { return '0' <= ch && ch <= '9' || 'a' <= Lower(ch) && Lower(ch) <= 'f' }
 443  
 444  // Digits accepts the sequence { digit | '_' }.
 445  // If base <= 10, Digits accepts any decimal digit but records
 446  // the index (relative to the literal start) of a digit >= base
 447  // in *invalid, if *invalid < 0.
 448  // Digits returns a bitset describing whether the sequence contained
 449  // Digits (bit 0 is set), or separators '_' (bit 1 is set).
 450  func (s *Scanner) Digits(base int32, invalid *int32) (digsep int32) {
 451  	if base <= 10 {
 452  		max := rune('0' + base)
 453  		for IsDecimal(s.ch) || s.ch == '_' {
 454  			ds := int32(1)
 455  			if s.ch == '_' {
 456  				ds = 2
 457  			} else if s.ch >= max && *invalid < 0 {
 458  				_, col := s.pos()
 459  				*invalid = int32(col - s.col)
 460  			}
 461  			digsep |= ds
 462  			s.nextch()
 463  		}
 464  	} else {
 465  		for IsHex(s.ch) || s.ch == '_' {
 466  			ds := int32(1)
 467  			if s.ch == '_' {
 468  				ds = 2
 469  			}
 470  			digsep |= ds
 471  			s.nextch()
 472  		}
 473  	}
 474  	return
 475  }
 476  
 477  func (s *Scanner) Number(seenPoint bool) {
 478  	ok := true
 479  	kind := IntLit
 480  	base := int32(10)  // number base
 481  	prefix := rune(0)  // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
 482  	digsep := int32(0) // bit 0: digit present, bit 1: '_' present
 483  	invalid := int32(-1) // index of invalid digit in literal, or < 0
 484  
 485  	// integer part
 486  	if !seenPoint {
 487  		if s.ch == '0' {
 488  			s.nextch()
 489  			switch Lower(s.ch) {
 490  			case 'x':
 491  				s.nextch()
 492  				base, prefix = 16, 'x'
 493  			case 'o':
 494  				s.nextch()
 495  				base, prefix = 8, 'o'
 496  			case 'b':
 497  				s.nextch()
 498  				base, prefix = 2, 'b'
 499  			default:
 500  				base, prefix = 8, '0'
 501  				digsep = 1 // leading 0
 502  			}
 503  		}
 504  		digsep |= s.Digits(base, &invalid)
 505  		if s.ch == '.' {
 506  			if prefix == 'o' || prefix == 'b' {
 507  				s.Errorf("invalid radix point in %s literal", baseName(base))
 508  				ok = false
 509  			}
 510  			s.nextch()
 511  			seenPoint = true
 512  		}
 513  	}
 514  
 515  	// fractional part
 516  	if seenPoint {
 517  		kind = FloatLit
 518  		digsep |= s.Digits(base, &invalid)
 519  	}
 520  
 521  	if digsep&1 == 0 && ok {
 522  		s.Errorf("%s literal has no digits", baseName(base))
 523  		ok = false
 524  	}
 525  
 526  	// exponent
 527  	if e := Lower(s.ch); e == 'e' || e == 'p' {
 528  		if ok {
 529  			switch {
 530  			case e == 'e' && prefix != 0 && prefix != '0':
 531  				s.Errorf("%q exponent requires decimal mantissa", s.ch)
 532  				ok = false
 533  			case e == 'p' && prefix != 'x':
 534  				s.Errorf("%q exponent requires hexadecimal mantissa", s.ch)
 535  				ok = false
 536  			}
 537  		}
 538  		s.nextch()
 539  		kind = FloatLit
 540  		if s.ch == '+' || s.ch == '-' {
 541  			s.nextch()
 542  		}
 543  		digsep = s.Digits(10, nil) | digsep&2 // don't lose sep bit
 544  		if digsep&1 == 0 && ok {
 545  			s.Errorf("exponent has no digits")
 546  			ok = false
 547  		}
 548  	} else if prefix == 'x' && kind == FloatLit && ok {
 549  		s.Errorf("hexadecimal mantissa requires a 'p' exponent")
 550  		ok = false
 551  	}
 552  
 553  	// suffix 'i'
 554  	if s.ch == 'i' {
 555  		kind = ImagLit
 556  		s.nextch()
 557  	}
 558  
 559  	s.SetLit(kind, ok) // do this now so we can use s.lit below
 560  
 561  	if kind == IntLit && invalid >= 0 && ok {
 562  		s.ErrorAtf(invalid, "invalid digit %q in %s literal", s.Lit[invalid], baseName(base))
 563  		ok = false
 564  	}
 565  
 566  	if digsep&2 != 0 && ok {
 567  		if i := invalidSep(s.Lit); i >= 0 {
 568  			s.ErrorAtf(i, "'_' must separate successive digits")
 569  			ok = false
 570  		}
 571  	}
 572  
 573  	s.Bad = !ok // correct s.bad
 574  }
 575  
 576  func baseName(base int32) string {
 577  	switch base {
 578  	case 2:
 579  		return "binary"
 580  	case 8:
 581  		return "octal"
 582  	case 10:
 583  		return "decimal"
 584  	case 16:
 585  		return "hexadecimal"
 586  	}
 587  	panic("invalid base")
 588  }
 589  
 590  // invalidSep returns the index of the first invalid separator in x, or -1.
 591  func invalidSep(x string) int32 {
 592  	x1 := ' ' // prefix char, we only care if it's 'x'
 593  	d := '.'  // digit, one of '_', '0' (a digit), or '.' (anything else)
 594  	i := int32(0)
 595  
 596  	// a prefix counts as a digit
 597  	if len(x) >= 2 && x[0] == '0' {
 598  		x1 = Lower(rune(x[1]))
 599  		if x1 == 'x' || x1 == 'o' || x1 == 'b' {
 600  			d = '0'
 601  			i = 2
 602  		}
 603  	}
 604  
 605  	// mantissa and exponent
 606  	for ; i < int32(len(x)); i++ {
 607  		p := d // previous digit
 608  		d = rune(x[i])
 609  		switch {
 610  		case d == '_':
 611  			if p != '0' {
 612  				return i
 613  			}
 614  		case IsDecimal(d) || x1 == 'x' && IsHex(d):
 615  			d = '0'
 616  		default:
 617  			if p == '_' {
 618  				return i - 1
 619  			}
 620  			d = '.'
 621  		}
 622  	}
 623  	if d == '_' {
 624  		return int32(len(x)) - 1
 625  	}
 626  
 627  	return -1
 628  }
 629  
 630  func (s *Scanner) rune() {
 631  	ok := true
 632  	s.nextch()
 633  
 634  	n := 0
 635  	for ; ; n++ {
 636  		if s.ch == '\'' {
 637  			if ok {
 638  				if n == 0 {
 639  					s.Errorf("empty rune literal or unescaped '")
 640  					ok = false
 641  				} else if n != 1 {
 642  					s.ErrorAtf(0, "more than one character in rune literal")
 643  					ok = false
 644  				}
 645  			}
 646  			s.nextch()
 647  			break
 648  		}
 649  		if s.ch == '\\' {
 650  			s.nextch()
 651  			if !s.escape('\'') {
 652  				ok = false
 653  			}
 654  			continue
 655  		}
 656  		if s.ch == '\n' {
 657  			if ok {
 658  				s.Errorf("newline in rune literal")
 659  				ok = false
 660  			}
 661  			break
 662  		}
 663  		if s.ch < 0 {
 664  			if ok {
 665  				s.ErrorAtf(0, "rune literal not terminated")
 666  				ok = false
 667  			}
 668  			break
 669  		}
 670  		s.nextch()
 671  	}
 672  
 673  	s.SetLit(RuneLit, ok)
 674  }
 675  
 676  func (s *Scanner) stdString() {
 677  	ok := true
 678  	s.nextch()
 679  
 680  	for {
 681  		if s.ch == '"' {
 682  			s.nextch()
 683  			break
 684  		}
 685  		if s.ch == '\\' {
 686  			s.nextch()
 687  			if !s.escape('"') {
 688  				ok = false
 689  			}
 690  			continue
 691  		}
 692  		if s.ch == '\n' {
 693  			s.Errorf("newline in string")
 694  			ok = false
 695  			break
 696  		}
 697  		if s.ch < 0 {
 698  			s.ErrorAtf(0, "string not terminated")
 699  			ok = false
 700  			break
 701  		}
 702  		s.nextch()
 703  	}
 704  
 705  	s.SetLit(StringLit, ok)
 706  }
 707  
 708  func (s *Scanner) rawString() {
 709  	ok := true
 710  	s.nextch()
 711  
 712  	for {
 713  		if s.ch == '`' {
 714  			s.nextch()
 715  			break
 716  		}
 717  		if s.ch < 0 {
 718  			s.ErrorAtf(0, "string not terminated")
 719  			ok = false
 720  			break
 721  		}
 722  		s.nextch()
 723  	}
 724  	// We leave CRs in the string since they are part of the
 725  	// literal (even though they are not part of the literal
 726  	// value).
 727  
 728  	s.SetLit(StringLit, ok)
 729  }
 730  
 731  func (s *Scanner) comment(text string) {
 732  	s.ErrorAtf(0, "%s", text)
 733  }
 734  
 735  func (s *Scanner) skipLine() {
 736  	// don't consume '\n' - needed for nlsemi logic
 737  	for s.ch >= 0 && s.ch != '\n' {
 738  		s.nextch()
 739  	}
 740  }
 741  
 742  func (s *Scanner) lineComment() {
 743  	// opening has already been consumed
 744  
 745  	if s.mode&comments != 0 {
 746  		s.skipLine()
 747  		s.comment(string(s.segment()))
 748  		return
 749  	}
 750  
 751  	// are we saving directives? or is this definitely not a directive?
 752  	if s.mode&directives == 0 || (s.ch != 'g' && s.ch != 'l') {
 753  		s.stop()
 754  		s.skipLine()
 755  		return
 756  	}
 757  
 758  	// recognize go: or line directives
 759  	prefix := "go:"
 760  	if s.ch == 'l' {
 761  		prefix = "line "
 762  	}
 763  
 764  	for _, r := range prefix {
 765  		if s.ch != r { s.stop(); s.skipLine(); return }
 766  		s.nextch()
 767  	}
 768  	// directive text
 769  	s.skipLine()
 770  	s.comment(string(s.segment()))
 771  }
 772  
 773  func (s *Scanner) skipComment() bool {
 774  	for s.ch >= 0 {
 775  		for s.ch == '*' {
 776  			s.nextch()
 777  			if s.ch == '/' {
 778  				s.nextch()
 779  				return true
 780  			}
 781  		}
 782  		s.nextch()
 783  	}
 784  	s.ErrorAtf(0, "comment not terminated")
 785  	return false
 786  }
 787  
 788  func (s *Scanner) fullComment() {
 789  	/* opening has already been consumed */
 790  
 791  	if s.mode&comments != 0 {
 792  		if s.skipComment() {
 793  			s.comment(string(s.segment()))
 794  		}
 795  		return
 796  	}
 797  
 798  	if s.mode&directives == 0 || s.ch != 'l' {
 799  		s.stop()
 800  		s.skipComment()
 801  		return
 802  	}
 803  
 804  	// recognize line directive
 805  	const prefix = "line "
 806  
 807  	for _, r := range prefix {
 808  		if s.ch != r { s.stop(); s.skipComment(); return }
 809  		s.nextch()
 810  	}
 811  	// directive text
 812  	if s.skipComment() {
 813  		s.comment(string(s.segment()))
 814  	}
 815  }
 816  
 817  func (s *Scanner) escape(quote rune) bool {
 818  	var n int32
 819  	var base, max uint32
 820  
 821  	switch s.ch {
 822  	case quote, 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
 823  		s.nextch()
 824  		return true
 825  	case '0', '1', '2', '3', '4', '5', '6', '7':
 826  		n, base, max = 3, 8, 255
 827  	case 'x':
 828  		s.nextch()
 829  		n, base, max = 2, 16, 255
 830  	case 'u':
 831  		s.nextch()
 832  		n, base, max = 4, 16, unicode.MaxRune
 833  	case 'U':
 834  		s.nextch()
 835  		n, base, max = 8, 16, unicode.MaxRune
 836  	default:
 837  		if s.ch < 0 {
 838  			return true // complain in caller about EOF
 839  		}
 840  		s.Errorf("unknown escape")
 841  		return false
 842  	}
 843  
 844  	var x uint32
 845  	for i := n; i > 0; i-- {
 846  		if s.ch < 0 {
 847  			return true // complain in caller about EOF
 848  		}
 849  		d := base
 850  		if IsDecimal(s.ch) {
 851  			d = uint32(s.ch) - '0'
 852  		} else if 'a' <= Lower(s.ch) && Lower(s.ch) <= 'f' {
 853  			d = uint32(Lower(s.ch)) - 'a' + 10
 854  		}
 855  		if d >= base {
 856  			s.Errorf("invalid character %q in %s escape", s.ch, baseName(int32(base)))
 857  			return false
 858  		}
 859  		// d < base
 860  		x = x*base + d
 861  		s.nextch()
 862  	}
 863  
 864  	if x > max && base == 8 {
 865  		s.Errorf("octal escape value %d > 255", x)
 866  		return false
 867  	}
 868  
 869  	if x > max || 0xD800 <= x && x < 0xE000 /* surrogate range */ {
 870  		s.Errorf("escape is invalid Unicode code point %#U", x)
 871  		return false
 872  	}
 873  
 874  	return true
 875  }
 876