source.mx raw
1 package main
2
3 import (
4 "io"
5 "unicode/utf8"
6 )
7
8 type Source struct {
9 in io.Reader
10 errh func(line, col uint32, msg string)
11
12 buf []byte
13 ioerr error
14 b, r, e int32
15 line, col uint32
16 ch rune
17 chw int32
18 }
19
20 const sentinel = utf8.RuneSelf
21
22 func (s *Source) init(in io.Reader, errh func(line, col uint32, msg string)) {
23 s.in = in
24 s.errh = errh
25
26 if s.buf == nil {
27 s.buf = []byte{:nextSize(0)}
28 }
29 s.buf[0] = sentinel
30 s.ioerr = nil
31 s.b, s.r, s.e = -1, 0, 0
32 s.line, s.col = 0, 0
33 s.ch = ' '
34 s.chw = 0
35 }
36
37 const Linebase = 1
38 const Colbase = 1
39
40 func (s *Source) pos() (line, col uint32) {
41 return Linebase + s.line, Colbase + s.col
42 }
43
44 func (s *Source) Debugpos() (line, col uint32) {
45 return Linebase + s.line, Colbase + s.col
46 }
47
48 func (s *Source) error(msg string) {
49 line, col := s.pos()
50 s.errh(line, col, msg)
51 }
52
53 func (s *Source) start() { s.b = s.r - s.chw }
54 func (s *Source) stop() { s.b = -1 }
55 func (s *Source) segment() []byte { return s.buf[s.b : s.r-s.chw] }
56
57 // segmentCopy returns a copy of the current segment that survives buffer
58 // reallocation in fill(). In Moxie string=[]byte so string(segment()) does
59 // NOT copy - the returned slice still aliases s.buf.
60 func (s *Source) segmentCopy() []byte {
61 b := s.buf[s.b : s.r-s.chw]
62 c := []byte{:len(b)}
63 copy(c, b)
64 return c
65 }
66
67 func (s *Source) rewind() {
68 if s.b < 0 {
69 panic("no active segment")
70 }
71 s.col -= uint32(s.r - s.b)
72 s.r = s.b
73 s.nextch()
74 }
75
76 func (s *Source) nextch() {
77 redo:
78 s.col += uint32(s.chw)
79 if s.ch == '\n' {
80 s.line++
81 s.col = 0
82 }
83
84 if s.ch = rune(s.buf[s.r]); s.ch < sentinel {
85 s.r++
86 s.chw = 1
87 if s.ch == 0 {
88 s.error("invalid NUL character")
89 goto redo
90 }
91 return
92 }
93
94 for s.e-s.r < utf8.UTFMax && !utf8.FullRune(s.buf[s.r:s.e]) && s.ioerr == nil {
95 s.fill()
96 }
97
98 if s.r == s.e {
99 if s.ioerr != io.EOF {
100 s.error("I/O error: " | s.ioerr.Error())
101 s.ioerr = nil
102 }
103 s.ch = -1
104 s.chw = 0
105 return
106 }
107
108 var w int
109 s.ch, w = utf8.DecodeRune(s.buf[s.r:s.e])
110 s.chw = int32(w)
111 s.r += s.chw
112
113 if s.ch == utf8.RuneError && s.chw == 1 {
114 s.error("invalid UTF-8 encoding")
115 goto redo
116 }
117
118 const BOM = 0xfeff
119 if s.ch == BOM {
120 if s.line > 0 || s.col > 0 {
121 s.error("invalid BOM in the middle of the file")
122 }
123 goto redo
124 }
125 }
126
127 func (s *Source) fill() {
128 b := s.r
129 if s.b >= 0 {
130 b = s.b
131 s.b = 0
132 }
133 content := s.buf[b:s.e]
134
135 if len(content)*2 > len(s.buf) {
136 s.buf = []byte{:nextSize(int32(len(s.buf)))}
137 copy(s.buf, content)
138 } else if b > 0 {
139 copy(s.buf, content)
140 }
141 s.r -= b
142 s.e -= b
143
144 for i := 0; i < 10; i++ {
145 var n int32
146 var nn int
147 nn, s.ioerr = s.in.Read(s.buf[s.e : len(s.buf)-1])
148 n = int32(nn)
149 if n < 0 {
150 panic("negative read")
151 }
152 if n > 0 || s.ioerr != nil {
153 s.e += n
154 s.buf[s.e] = sentinel
155 return
156 }
157 }
158
159 s.buf[s.e] = sentinel
160 s.ioerr = io.ErrNoProgress
161 }
162
163 func nextSize(size int32) int32 {
164 const min = 4 << 10
165 const max = 1 << 20
166 if size < min {
167 return min
168 }
169 if size <= max {
170 return size << 1
171 }
172 return size + max
173 }
174