lexer.go raw

   1  package pattern
   2  
   3  import (
   4  	"fmt"
   5  	"go/token"
   6  	"unicode"
   7  	"unicode/utf8"
   8  )
   9  
  10  type lexer struct {
  11  	f *token.File
  12  
  13  	input string
  14  	start int
  15  	pos   int
  16  	width int
  17  	items chan item
  18  }
  19  
  20  type itemType int
  21  
  22  const eof = -1
  23  
  24  const (
  25  	itemError itemType = iota
  26  	itemLeftParen
  27  	itemRightParen
  28  	itemLeftBracket
  29  	itemRightBracket
  30  	itemTypeName
  31  	itemVariable
  32  	itemAt
  33  	itemColon
  34  	itemBlank
  35  	itemString
  36  	itemEOF
  37  )
  38  
  39  func (typ itemType) String() string {
  40  	switch typ {
  41  	case itemError:
  42  		return "ERROR"
  43  	case itemLeftParen:
  44  		return "("
  45  	case itemRightParen:
  46  		return ")"
  47  	case itemLeftBracket:
  48  		return "["
  49  	case itemRightBracket:
  50  		return "]"
  51  	case itemTypeName:
  52  		return "TYPE"
  53  	case itemVariable:
  54  		return "VAR"
  55  	case itemAt:
  56  		return "@"
  57  	case itemColon:
  58  		return ":"
  59  	case itemBlank:
  60  		return "_"
  61  	case itemString:
  62  		return "STRING"
  63  	case itemEOF:
  64  		return "EOF"
  65  	default:
  66  		return fmt.Sprintf("itemType(%d)", typ)
  67  	}
  68  }
  69  
  70  type item struct {
  71  	typ itemType
  72  	val string
  73  	pos int
  74  }
  75  
  76  type stateFn func(*lexer) stateFn
  77  
  78  func (l *lexer) run() {
  79  	for state := lexStart; state != nil; {
  80  		state = state(l)
  81  	}
  82  	close(l.items)
  83  }
  84  
  85  func (l *lexer) emitValue(t itemType, value string) {
  86  	l.items <- item{t, value, l.start}
  87  	l.start = l.pos
  88  }
  89  
  90  func (l *lexer) emit(t itemType) {
  91  	l.items <- item{t, l.input[l.start:l.pos], l.start}
  92  	l.start = l.pos
  93  }
  94  
  95  func lexStart(l *lexer) stateFn {
  96  	switch r := l.next(); {
  97  	case r == eof:
  98  		l.emit(itemEOF)
  99  		return nil
 100  	case unicode.IsSpace(r):
 101  		l.ignore()
 102  	case r == '(':
 103  		l.emit(itemLeftParen)
 104  	case r == ')':
 105  		l.emit(itemRightParen)
 106  	case r == '[':
 107  		l.emit(itemLeftBracket)
 108  	case r == ']':
 109  		l.emit(itemRightBracket)
 110  	case r == '@':
 111  		l.emit(itemAt)
 112  	case r == ':':
 113  		l.emit(itemColon)
 114  	case r == '_':
 115  		l.emit(itemBlank)
 116  	case r == '"':
 117  		l.backup()
 118  		return lexString
 119  	case unicode.IsUpper(r):
 120  		l.backup()
 121  		return lexType
 122  	case unicode.IsLower(r):
 123  		l.backup()
 124  		return lexVariable
 125  	default:
 126  		return l.errorf("unexpected character %c", r)
 127  	}
 128  	return lexStart
 129  }
 130  
 131  func (l *lexer) next() (r rune) {
 132  	if l.pos >= len(l.input) {
 133  		l.width = 0
 134  		return eof
 135  	}
 136  	r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
 137  
 138  	if r == '\n' {
 139  		l.f.AddLine(l.pos)
 140  	}
 141  
 142  	l.pos += l.width
 143  
 144  	return r
 145  }
 146  
 147  func (l *lexer) ignore() {
 148  	l.start = l.pos
 149  }
 150  
 151  func (l *lexer) backup() {
 152  	l.pos -= l.width
 153  }
 154  
 155  func (l *lexer) errorf(format string, args ...interface{}) stateFn {
 156  	// TODO(dh): emit position information in errors
 157  	l.items <- item{
 158  		itemError,
 159  		fmt.Sprintf(format, args...),
 160  		l.start,
 161  	}
 162  	return nil
 163  }
 164  
 165  func isAlphaNumeric(r rune) bool {
 166  	return r >= '0' && r <= '9' ||
 167  		r >= 'a' && r <= 'z' ||
 168  		r >= 'A' && r <= 'Z'
 169  }
 170  
 171  func lexString(l *lexer) stateFn {
 172  	l.next() // skip quote
 173  	escape := false
 174  
 175  	var runes []rune
 176  	for {
 177  		switch r := l.next(); r {
 178  		case eof:
 179  			return l.errorf("unterminated string")
 180  		case '"':
 181  			if !escape {
 182  				l.emitValue(itemString, string(runes))
 183  				return lexStart
 184  			} else {
 185  				runes = append(runes, '"')
 186  				escape = false
 187  			}
 188  		case '\\':
 189  			if escape {
 190  				runes = append(runes, '\\')
 191  				escape = false
 192  			} else {
 193  				escape = true
 194  			}
 195  		default:
 196  			runes = append(runes, r)
 197  		}
 198  	}
 199  }
 200  
 201  func lexType(l *lexer) stateFn {
 202  	l.next()
 203  	for {
 204  		if !isAlphaNumeric(l.next()) {
 205  			l.backup()
 206  			l.emit(itemTypeName)
 207  			return lexStart
 208  		}
 209  	}
 210  }
 211  
 212  func lexVariable(l *lexer) stateFn {
 213  	l.next()
 214  	for {
 215  		if !isAlphaNumeric(l.next()) {
 216  			l.backup()
 217  			l.emit(itemVariable)
 218  			return lexStart
 219  		}
 220  	}
 221  }
 222