source.mx raw

   1  package main
   2  
   3  import (
   4  	"io"
   5  	"unicode/utf8"
   6  )
   7  
   8  type Source struct {
   9  	in   io.Reader
  10  	errh func(line, col uint32, msg string)
  11  
  12  	buf       []byte
  13  	ioerr     error
  14  	b, r, e   int32
  15  	line, col uint32
  16  	ch        rune
  17  	chw       int32
  18  }
  19  
  20  const sentinel = utf8.RuneSelf
  21  
  22  func (s *Source) init(in io.Reader, errh func(line, col uint32, msg string)) {
  23  	s.in = in
  24  	s.errh = errh
  25  
  26  	if s.buf == nil {
  27  		s.buf = []byte{:nextSize(0)}
  28  	}
  29  	s.buf[0] = sentinel
  30  	s.ioerr = nil
  31  	s.b, s.r, s.e = -1, 0, 0
  32  	s.line, s.col = 0, 0
  33  	s.ch = ' '
  34  	s.chw = 0
  35  }
  36  
  37  const Linebase = 1
  38  const Colbase = 1
  39  
  40  func (s *Source) pos() (line, col uint32) {
  41  	return Linebase + s.line, Colbase + s.col
  42  }
  43  
  44  func (s *Source) Debugpos() (line, col uint32) {
  45  	return Linebase + s.line, Colbase + s.col
  46  }
  47  
  48  func (s *Source) error(msg string) {
  49  	line, col := s.pos()
  50  	s.errh(line, col, msg)
  51  }
  52  
  53  func (s *Source) start()          { s.b = s.r - s.chw }
  54  func (s *Source) stop()           { s.b = -1 }
  55  func (s *Source) segment() []byte { return s.buf[s.b : s.r-s.chw] }
  56  
  57  // segmentCopy returns a copy of the current segment that survives buffer
  58  // reallocation in fill(). In Moxie string=[]byte so string(segment()) does
  59  // NOT copy - the returned slice still aliases s.buf.
  60  func (s *Source) segmentCopy() []byte {
  61  	b := s.buf[s.b : s.r-s.chw]
  62  	c := []byte{:len(b)}
  63  	copy(c, b)
  64  	return c
  65  }
  66  
  67  func (s *Source) rewind() {
  68  	if s.b < 0 {
  69  		panic("no active segment")
  70  	}
  71  	s.col -= uint32(s.r - s.b)
  72  	s.r = s.b
  73  	s.nextch()
  74  }
  75  
  76  func (s *Source) nextch() {
  77  redo:
  78  	s.col += uint32(s.chw)
  79  	if s.ch == '\n' {
  80  		s.line++
  81  		s.col = 0
  82  	}
  83  
  84  	if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
  85  		s.r++
  86  		s.chw = 1
  87  		if s.ch == 0 {
  88  			s.error("invalid NUL character")
  89  			goto redo
  90  		}
  91  		return
  92  	}
  93  
  94  	for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
  95  		s.fill()
  96  	}
  97  
  98  	if s.r == s.e {
  99  		if s.ioerr != io.EOF {
 100  			s.error("I/O error: " | s.ioerr.Error())
 101  			s.ioerr = nil
 102  		}
 103  		s.ch = -1
 104  		s.chw = 0
 105  		return
 106  	}
 107  
 108  	var w int
 109  	s.ch, w = utf8.DecodeRune(s.buf[s.r:s.e])
 110  	s.chw = int32(w)
 111  	s.r += s.chw
 112  
 113  	if s.ch == utf8.RuneError && s.chw == 1 {
 114  		s.error("invalid UTF-8 encoding")
 115  		goto redo
 116  	}
 117  
 118  	const BOM = 0xfeff
 119  	if s.ch == BOM {
 120  		if s.line > 0 || s.col > 0 {
 121  			s.error("invalid BOM in the middle of the file")
 122  		}
 123  		goto redo
 124  	}
 125  }
 126  
 127  func (s *Source) fill() {
 128  	b := s.r
 129  	if s.b >= 0 {
 130  		b = s.b
 131  		s.b = 0
 132  	}
 133  	content := s.buf[b:s.e]
 134  
 135  	if len(content)*2 > len(s.buf) {
 136  		s.buf = []byte{:nextSize(int32(len(s.buf)))}
 137  		copy(s.buf, content)
 138  	} else if b > 0 {
 139  		copy(s.buf, content)
 140  	}
 141  	s.r -= b
 142  	s.e -= b
 143  
 144  	for i := 0; i < 10; i++ {
 145  		var n int32
 146  		var nn int
 147  		nn, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1])
 148  		n = int32(nn)
 149  		if n < 0 {
 150  			panic("negative read")
 151  		}
 152  		if n > 0 || s.ioerr != nil {
 153  			s.e += n
 154  			s.buf[s.e] = sentinel
 155  			return
 156  		}
 157  	}
 158  
 159  	s.buf[s.e] = sentinel
 160  	s.ioerr = io.ErrNoProgress
 161  }
 162  
 163  func nextSize(size int32) int32 {
 164  	const min = 4 << 10
 165  	const max = 1 << 20
 166  	if size < min {
 167  		return min
 168  	}
 169  	if size <= max {
 170  		return size << 1
 171  	}
 172  	return size + max
 173  }
 174