source.mx raw

   1  package main
   2  
   3  import (
   4  	"io"
   5  	"unicode/utf8"
   6  )
   7  
   8  type Source struct {
   9  	in   io.Reader
  10  	errh func(line, col uint32, msg string)
  11  
  12  	buf       []byte
  13  	ioerr     error
  14  	b, r, e   int32
  15  	line, col uint32
  16  	ch        rune
  17  	chw       int32
  18  }
  19  
  20  const sentinel = utf8.RuneSelf
  21  
  22  func (s *Source) init(in io.Reader, errh func(line, col uint32, msg string)) {
  23  	s.in = in
  24  	s.errh = errh
  25  
  26  	if s.buf == nil {
  27  		s.buf = []byte{:nextSize(0)}
  28  	}
  29  	s.buf[0] = sentinel
  30  	s.ioerr = nil
  31  	s.b, s.r, s.e = -1, 0, 0
  32  	s.line, s.col = 0, 0
  33  	s.ch = ' '
  34  	s.chw = 0
  35  }
  36  
  37  const Linebase = 1
  38  const Colbase = 1
  39  
  40  func (s *Source) pos() (line, col uint32) {
  41  	return Linebase + s.line, Colbase + s.col
  42  }
  43  
  44  func (s *Source) Debugpos() (line, col uint32) {
  45  	return Linebase + s.line, Colbase + s.col
  46  }
  47  
  48  func (s *Source) error(msg string) {
  49  	line, col := s.pos()
  50  	s.errh(line, col, msg)
  51  }
  52  
  53  func (s *Source) start()          { s.b = s.r - s.chw }
  54  func (s *Source) stop()           { s.b = -1 }
  55  func (s *Source) segment() []byte { return s.buf[s.b : s.r-s.chw] }
  56  
  57  func (s *Source) rewind() {
  58  	if s.b < 0 {
  59  		panic("no active segment")
  60  	}
  61  	s.col -= uint32(s.r - s.b)
  62  	s.r = s.b
  63  	s.nextch()
  64  }
  65  
  66  func (s *Source) nextch() {
  67  redo:
  68  	s.col += uint32(s.chw)
  69  	if s.ch == '\n' {
  70  		s.line++
  71  		s.col = 0
  72  	}
  73  
  74  	if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
  75  		s.r++
  76  		s.chw = 1
  77  		if s.ch == 0 {
  78  			s.error("invalid NUL character")
  79  			goto redo
  80  		}
  81  		return
  82  	}
  83  
  84  	for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
  85  		s.fill()
  86  	}
  87  
  88  	if s.r == s.e {
  89  		if s.ioerr != io.EOF {
  90  			s.error("I/O error: " | s.ioerr.Error())
  91  			s.ioerr = nil
  92  		}
  93  		s.ch = -1
  94  		s.chw = 0
  95  		return
  96  	}
  97  
  98  	var w int
  99  	s.ch, w = utf8.DecodeRune(s.buf[s.r:s.e])
 100  	s.chw = int32(w)
 101  	s.r += s.chw
 102  
 103  	if s.ch == utf8.RuneError && s.chw == 1 {
 104  		s.error("invalid UTF-8 encoding")
 105  		goto redo
 106  	}
 107  
 108  	const BOM = 0xfeff
 109  	if s.ch == BOM {
 110  		if s.line > 0 || s.col > 0 {
 111  			s.error("invalid BOM in the middle of the file")
 112  		}
 113  		goto redo
 114  	}
 115  }
 116  
 117  func (s *Source) fill() {
 118  	b := s.r
 119  	if s.b >= 0 {
 120  		b = s.b
 121  		s.b = 0
 122  	}
 123  	content := s.buf[b:s.e]
 124  
 125  	if len(content)*2 > len(s.buf) {
 126  		s.buf = []byte{:nextSize(int32(len(s.buf)))}
 127  		copy(s.buf, content)
 128  	} else if b > 0 {
 129  		copy(s.buf, content)
 130  	}
 131  	s.r -= b
 132  	s.e -= b
 133  
 134  	for i := 0; i < 10; i++ {
 135  		var n int32
 136  		var nn int
 137  		nn, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1])
 138  		n = int32(nn)
 139  		if n < 0 {
 140  			panic("negative read")
 141  		}
 142  		if n > 0 || s.ioerr != nil {
 143  			s.e += n
 144  			s.buf[s.e] = sentinel
 145  			return
 146  		}
 147  	}
 148  
 149  	s.buf[s.e] = sentinel
 150  	s.ioerr = io.ErrNoProgress
 151  }
 152  
 153  func nextSize(size int32) int32 {
 154  	const min = 4 << 10
 155  	const max = 1 << 20
 156  	if size < min {
 157  		return min
 158  	}
 159  	if size <= max {
 160  		return size << 1
 161  	}
 162  	return size + max
 163  }
 164