1 // Copyright 2021 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package pkgbits
6 7 import (
8 "bytes"
9 "crypto/md5"
10 "encoding/binary"
11 "go/constant"
12 "io"
13 "math/big"
14 "runtime"
15 "strings"
16 )
17 18 // A PkgEncoder provides methods for encoding a package's Unified IR
19 // export data.
20 type PkgEncoder struct {
21 // version of the bitstream.
22 version Version
23 24 // elems holds the bitstream for previously encoded elements.
25 elems [numRelocs][]string
26 27 // stringsIdx maps previously encoded strings to their index within
28 // the RelocString section, to allow deduplication. That is,
29 // elems[RelocString][stringsIdx[s]] == s (if present).
30 stringsIdx map[string]Index
31 32 // syncFrames is the number of frames to write at each sync
33 // marker. A negative value means sync markers are omitted.
34 syncFrames int
35 }
36 37 // SyncMarkers reports whether pw uses sync markers.
38 func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
39 40 // NewPkgEncoder returns an initialized PkgEncoder.
41 //
42 // syncFrames is the number of caller frames that should be serialized
43 // at Sync points. Serializing additional frames results in larger
44 // export data files, but can help diagnosing desync errors in
45 // higher-level Unified IR reader/writer code. If syncFrames is
46 // negative, then sync markers are omitted entirely.
47 func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
48 return PkgEncoder{
49 version: version,
50 stringsIdx: make(map[string]Index),
51 syncFrames: syncFrames,
52 }
53 }
54 55 // DumpTo writes the package's encoded data to out0 and returns the
56 // package fingerprint.
57 func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
58 h := md5.New()
59 out := io.MultiWriter(out0, h)
60 61 writeUint32 := func(x uint32) {
62 assert(binary.Write(out, binary.LittleEndian, x) == nil)
63 }
64 65 writeUint32(uint32(pw.version))
66 67 if pw.version.Has(Flags) {
68 var flags uint32
69 if pw.SyncMarkers() {
70 flags |= flagSyncMarkers
71 }
72 writeUint32(flags)
73 }
74 75 // Write elemEndsEnds.
76 var sum uint32
77 for _, elems := range &pw.elems {
78 sum += uint32(len(elems))
79 writeUint32(sum)
80 }
81 82 // Write elemEnds.
83 sum = 0
84 for _, elems := range &pw.elems {
85 for _, elem := range elems {
86 sum += uint32(len(elem))
87 writeUint32(sum)
88 }
89 }
90 91 // Write elemData.
92 for _, elems := range &pw.elems {
93 for _, elem := range elems {
94 _, err := io.WriteString(out, elem)
95 assert(err == nil)
96 }
97 }
98 99 // Write fingerprint.
100 copy(fingerprint[:], h.Sum(nil))
101 _, err := out0.Write(fingerprint[:])
102 assert(err == nil)
103 104 return
105 }
106 107 // StringIdx adds a string value to the strings section, if not
108 // already present, and returns its index.
109 func (pw *PkgEncoder) StringIdx(s string) Index {
110 if idx, ok := pw.stringsIdx[s]; ok {
111 assert(pw.elems[RelocString][idx] == s)
112 return idx
113 }
114 115 idx := Index(len(pw.elems[RelocString]))
116 pw.elems[RelocString] = append(pw.elems[RelocString], s)
117 pw.stringsIdx[s] = idx
118 return idx
119 }
120 121 // NewEncoder returns an Encoder for a new element within the given
122 // section, and encodes the given SyncMarker as the start of the
123 // element bitstream.
124 func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
125 e := pw.NewEncoderRaw(k)
126 e.Sync(marker)
127 return e
128 }
129 130 // NewEncoderRaw returns an Encoder for a new element within the given
131 // section.
132 //
133 // Most callers should use NewEncoder instead.
134 func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
135 idx := Index(len(pw.elems[k]))
136 pw.elems[k] = append(pw.elems[k], "") // placeholder
137 138 return Encoder{
139 p: pw,
140 k: k,
141 Idx: idx,
142 }
143 }
144 145 // An Encoder provides methods for encoding an individual element's
146 // bitstream data.
147 type Encoder struct {
148 p *PkgEncoder
149 150 Relocs []RelocEnt
151 RelocMap map[RelocEnt]uint32
152 Data bytes.Buffer // accumulated element bitstream data
153 154 encodingRelocHeader bool
155 156 k RelocKind
157 Idx Index // index within relocation section
158 }
159 160 // Flush finalizes the element's bitstream and returns its Index.
161 func (w *Encoder) Flush() Index {
162 var sb strings.Builder
163 164 // Backup the data so we write the relocations at the front.
165 var tmp bytes.Buffer
166 io.Copy(&tmp, &w.Data)
167 168 // TODO(mdempsky): Consider writing these out separately so they're
169 // easier to strip, along with function bodies, so that we can prune
170 // down to just the data that's relevant to go/types.
171 if w.encodingRelocHeader {
172 panic("encodingRelocHeader already true; recursive flush?")
173 }
174 w.encodingRelocHeader = true
175 w.Sync(SyncRelocs)
176 w.Len(len(w.Relocs))
177 for _, rEnt := range w.Relocs {
178 w.Sync(SyncReloc)
179 w.Len(int(rEnt.Kind))
180 w.Len(int(rEnt.Idx))
181 }
182 183 io.Copy(&sb, &w.Data)
184 io.Copy(&sb, &tmp)
185 w.p.elems[w.k][w.Idx] = sb.String()
186 187 return w.Idx
188 }
189 190 func (w *Encoder) checkErr(err error) {
191 if err != nil {
192 panicf("unexpected encoding error: %v", err)
193 }
194 }
195 196 func (w *Encoder) rawUvarint(x uint64) {
197 var buf [binary.MaxVarintLen64]byte
198 n := binary.PutUvarint(buf[:], x)
199 _, err := w.Data.Write(buf[:n])
200 w.checkErr(err)
201 }
202 203 func (w *Encoder) rawVarint(x int64) {
204 // Zig-zag encode.
205 ux := uint64(x) << 1
206 if x < 0 {
207 ux = ^ux
208 }
209 210 w.rawUvarint(ux)
211 }
212 213 func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
214 e := RelocEnt{r, idx}
215 if w.RelocMap != nil {
216 if i, ok := w.RelocMap[e]; ok {
217 return int(i)
218 }
219 } else {
220 w.RelocMap = make(map[RelocEnt]uint32)
221 }
222 223 i := len(w.Relocs)
224 w.RelocMap[e] = uint32(i)
225 w.Relocs = append(w.Relocs, e)
226 return i
227 }
228 229 func (w *Encoder) Sync(m SyncMarker) {
230 if !w.p.SyncMarkers() {
231 return
232 }
233 234 // Writing out stack frame string references requires working
235 // relocations, but writing out the relocations themselves involves
236 // sync markers. To prevent infinite recursion, we simply trim the
237 // stack frame for sync markers within the relocation header.
238 var frames []string
239 if !w.encodingRelocHeader && w.p.syncFrames > 0 {
240 pcs := make([]uintptr, w.p.syncFrames)
241 n := runtime.Callers(2, pcs)
242 frames = fmtFrames(pcs[:n]...)
243 }
244 245 // TODO(mdempsky): Save space by writing out stack frames as a
246 // linked list so we can share common stack frames.
247 w.rawUvarint(uint64(m))
248 w.rawUvarint(uint64(len(frames)))
249 for _, frame := range frames {
250 w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
251 }
252 }
253 254 // Bool encodes and writes a bool value into the element bitstream,
255 // and then returns the bool value.
256 //
257 // For simple, 2-alternative encodings, the idiomatic way to call Bool
258 // is something like:
259 //
260 // if w.Bool(x != 0) {
261 // // alternative #1
262 // } else {
263 // // alternative #2
264 // }
265 //
266 // For multi-alternative encodings, use Code instead.
267 func (w *Encoder) Bool(b bool) bool {
268 w.Sync(SyncBool)
269 var x byte
270 if b {
271 x = 1
272 }
273 err := w.Data.WriteByte(x)
274 w.checkErr(err)
275 return b
276 }
277 278 // Int64 encodes and writes an int64 value into the element bitstream.
279 func (w *Encoder) Int64(x int64) {
280 w.Sync(SyncInt64)
281 w.rawVarint(x)
282 }
283 284 // Uint64 encodes and writes a uint64 value into the element bitstream.
285 func (w *Encoder) Uint64(x uint64) {
286 w.Sync(SyncUint64)
287 w.rawUvarint(x)
288 }
289 290 // Len encodes and writes a non-negative int value into the element bitstream.
291 func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
292 293 // Int encodes and writes an int value into the element bitstream.
294 func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
295 296 // Uint encodes and writes a uint value into the element bitstream.
297 func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
298 299 // Reloc encodes and writes a relocation for the given (section,
300 // index) pair into the element bitstream.
301 //
302 // Note: Only the index is formally written into the element
303 // bitstream, so bitstream decoders must know from context which
304 // section an encoded relocation refers to.
305 func (w *Encoder) Reloc(r RelocKind, idx Index) {
306 w.Sync(SyncUseReloc)
307 w.Len(w.rawReloc(r, idx))
308 }
309 310 // Code encodes and writes a Code value into the element bitstream.
311 func (w *Encoder) Code(c Code) {
312 w.Sync(c.Marker())
313 w.Len(c.Value())
314 }
315 316 // String encodes and writes a string value into the element
317 // bitstream.
318 //
319 // Internally, strings are deduplicated by adding them to the strings
320 // section (if not already present), and then writing a relocation
321 // into the element bitstream.
322 func (w *Encoder) String(s string) {
323 w.StringRef(w.p.StringIdx(s))
324 }
325 326 // StringRef writes a reference to the given index, which must be a
327 // previously encoded string value.
328 func (w *Encoder) StringRef(idx Index) {
329 w.Sync(SyncString)
330 w.Reloc(RelocString, idx)
331 }
332 333 // Strings encodes and writes a variable-length slice of strings into
334 // the element bitstream.
335 func (w *Encoder) Strings(ss []string) {
336 w.Len(len(ss))
337 for _, s := range ss {
338 w.String(s)
339 }
340 }
341 342 // Value encodes and writes a constant.Value into the element
343 // bitstream.
344 func (w *Encoder) Value(val constant.Value) {
345 w.Sync(SyncValue)
346 if w.Bool(val.Kind() == constant.Complex) {
347 w.scalar(constant.Real(val))
348 w.scalar(constant.Imag(val))
349 } else {
350 w.scalar(val)
351 }
352 }
353 354 func (w *Encoder) scalar(val constant.Value) {
355 switch v := constant.Val(val).(type) {
356 default:
357 panicf("unhandled %v (%v)", val, val.Kind())
358 case bool:
359 w.Code(ValBool)
360 w.Bool(v)
361 case string:
362 w.Code(ValString)
363 w.String(v)
364 case int64:
365 w.Code(ValInt64)
366 w.Int64(v)
367 case *big.Int:
368 w.Code(ValBigInt)
369 w.bigInt(v)
370 case *big.Rat:
371 w.Code(ValBigRat)
372 w.bigInt(v.Num())
373 w.bigInt(v.Denom())
374 case *big.Float:
375 w.Code(ValBigFloat)
376 w.bigFloat(v)
377 }
378 }
379 380 func (w *Encoder) bigInt(v *big.Int) {
381 b := v.Bytes()
382 w.String(string(b)) // TODO: More efficient encoding.
383 w.Bool(v.Sign() < 0)
384 }
385 386 func (w *Encoder) bigFloat(v *big.Float) {
387 b := v.Append(nil, 'p', -1)
388 w.String(string(b)) // TODO: More efficient encoding.
389 }
390 391 // Version reports the version of the bitstream.
392 func (w *Encoder) Version() Version { return w.p.version }
393