1 // Copyright 2021 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package pkgbits
6 7 import (
8 "bytes"
9 "crypto/sha256"
10 "encoding/binary"
11 "go/constant"
12 "io"
13 "math/big"
14 "runtime"
15 )
16 17 // A PkgEncoder provides methods for encoding a package's Unified IR
18 // export data.
19 type PkgEncoder struct {
20 // version of the bitstream.
21 version Version
22 23 // elems holds the bitstream for previously encoded elements.
24 elems [numRelocs][][]byte
25 26 // stringsIdx maps previously encoded strings to their index within
27 // the RelocString section, to allow deduplication. That is,
28 // elems[RelocString][stringsIdx[s]] == s (if present).
29 stringsIdx map[string]RelElemIdx
30 31 // syncFrames is the number of frames to write at each sync
32 // marker. A negative value means sync markers are omitted.
33 syncFrames int
34 }
35 36 // SyncMarkers reports whether pw uses sync markers.
37 func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
38 39 // NewPkgEncoder returns an initialized PkgEncoder.
40 //
41 // syncFrames is the number of caller frames that should be serialized
42 // at Sync points. Serializing additional frames results in larger
43 // export data files, but can help diagnosing desync errors in
44 // higher-level Unified IR reader/writer code. If syncFrames is
45 // negative, then sync markers are omitted entirely.
46 func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
47 return PkgEncoder{
48 version: version,
49 stringsIdx: map[string]RelElemIdx{},
50 syncFrames: syncFrames,
51 }
52 }
53 54 // DumpTo writes the package's encoded data to out0 and returns the
55 // package fingerprint.
56 func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
57 h := sha256.New()
58 out := io.MultiWriter(out0, h)
59 60 writeUint32 := func(x uint32) {
61 assert(binary.Write(out, binary.LittleEndian, x) == nil)
62 }
63 64 writeUint32(uint32(pw.version))
65 66 if pw.version.Has(Flags) {
67 var flags uint32
68 if pw.SyncMarkers() {
69 flags |= flagSyncMarkers
70 }
71 writeUint32(flags)
72 }
73 74 // TODO(markfreeman): Also can use delta encoding to write section ends,
75 // but not as impactful.
76 var sum uint32
77 for _, elems := range &pw.elems {
78 sum += uint32(len(elems))
79 writeUint32(sum)
80 }
81 82 // TODO(markfreeman): Use delta encoding to store element ends and inflate
83 // back to this representation during decoding; the numbers will be much
84 // smaller.
85 sum = 0
86 for _, elems := range &pw.elems {
87 for _, elem := range elems {
88 sum += uint32(len(elem))
89 writeUint32(sum)
90 }
91 }
92 93 // Write elemData.
94 for _, elems := range &pw.elems {
95 for _, elem := range elems {
96 _, err := io.WriteString(out, elem)
97 assert(err == nil)
98 }
99 }
100 101 // Write fingerprint.
102 copy(fingerprint[:], h.Sum(nil))
103 _, err := out0.Write(fingerprint[:])
104 assert(err == nil)
105 106 return
107 }
108 109 // StringIdx adds a string value to the strings section, if not
110 // already present, and returns its index.
111 func (pw *PkgEncoder) StringIdx(s []byte) RelElemIdx {
112 if idx, ok := pw.stringsIdx[s]; ok {
113 assert(pw.elems[SectionString][idx] == s)
114 return idx
115 }
116 117 idx := RelElemIdx(len(pw.elems[SectionString]))
118 pw.elems[SectionString] = append(pw.elems[SectionString], s)
119 pw.stringsIdx[s] = idx
120 return idx
121 }
122 123 // NewEncoder returns an Encoder for a new element within the given
124 // section, and encodes the given SyncMarker as the start of the
125 // element bitstream.
126 func (pw *PkgEncoder) NewEncoder(k SectionKind, marker SyncMarker) *Encoder {
127 e := pw.NewEncoderRaw(k)
128 e.Sync(marker)
129 return e
130 }
131 132 // NewEncoderRaw returns an Encoder for a new element within the given
133 // section.
134 //
135 // Most callers should use NewEncoder instead.
136 func (pw *PkgEncoder) NewEncoderRaw(k SectionKind) *Encoder {
137 idx := RelElemIdx(len(pw.elems[k]))
138 pw.elems[k] = append(pw.elems[k], "") // placeholder
139 140 return &Encoder{
141 p: pw,
142 k: k,
143 Idx: idx,
144 }
145 }
146 147 // An Encoder provides methods for encoding an individual element's
148 // bitstream data.
149 type Encoder struct {
150 p *PkgEncoder
151 152 Relocs []RefTableEntry
153 RelocMap map[RefTableEntry]uint32
154 Data bytes.Buffer // accumulated element bitstream data
155 156 encodingRelocHeader bool
157 158 k SectionKind
159 Idx RelElemIdx // index within relocation section
160 }
161 162 // Flush finalizes the element's bitstream and returns its [RelElemIdx].
163 func (w *Encoder) Flush() RelElemIdx {
164 var sb bytes.Buffer
165 166 // Backup the data so we write the relocations at the front.
167 var tmp bytes.Buffer
168 io.Copy(&tmp, &w.Data)
169 170 // TODO(mdempsky): Consider writing these out separately so they're
171 // easier to strip, along with function bodies, so that we can prune
172 // down to just the data that's relevant to go/types.
173 if w.encodingRelocHeader {
174 panic("encodingRelocHeader already true; recursive flush?")
175 }
176 w.encodingRelocHeader = true
177 w.Sync(SyncRelocs)
178 w.Len(len(w.Relocs))
179 for _, rEnt := range w.Relocs {
180 w.Sync(SyncReloc)
181 w.Len(int(rEnt.Kind))
182 w.Len(int(rEnt.Idx))
183 }
184 185 io.Copy(&sb, &w.Data)
186 io.Copy(&sb, &tmp)
187 w.p.elems[w.k][w.Idx] = sb.String()
188 189 return w.Idx
190 }
191 192 func (w *Encoder) checkErr(err error) {
193 if err != nil {
194 panicf("unexpected encoding error: %v", err)
195 }
196 }
197 198 func (w *Encoder) rawUvarint(x uint64) {
199 var buf [binary.MaxVarintLen64]byte
200 n := binary.PutUvarint(buf[:], x)
201 _, err := w.Data.Write(buf[:n])
202 w.checkErr(err)
203 }
204 205 func (w *Encoder) rawVarint(x int64) {
206 // Zig-zag encode.
207 ux := uint64(x) << 1
208 if x < 0 {
209 ux = ^ux
210 }
211 212 w.rawUvarint(ux)
213 }
214 215 func (w *Encoder) rawReloc(k SectionKind, idx RelElemIdx) int {
216 e := RefTableEntry{k, idx}
217 if w.RelocMap != nil {
218 if i, ok := w.RelocMap[e]; ok {
219 return int(i)
220 }
221 } else {
222 w.RelocMap = map[RefTableEntry]uint32{}
223 }
224 225 i := len(w.Relocs)
226 w.RelocMap[e] = uint32(i)
227 w.Relocs = append(w.Relocs, e)
228 return i
229 }
230 231 func (w *Encoder) Sync(m SyncMarker) {
232 if !w.p.SyncMarkers() {
233 return
234 }
235 236 // Writing out stack frame string references requires working
237 // relocations, but writing out the relocations themselves involves
238 // sync markers. To prevent infinite recursion, we simply trim the
239 // stack frame for sync markers within the relocation header.
240 var frames [][]byte
241 if !w.encodingRelocHeader && w.p.syncFrames > 0 {
242 pcs := []uintptr{:w.p.syncFrames}
243 n := runtime.Callers(2, pcs)
244 frames = fmtFrames(pcs[:n]...)
245 }
246 247 // TODO(mdempsky): Save space by writing out stack frames as a
248 // linked list so we can share common stack frames.
249 w.rawUvarint(uint64(m))
250 w.rawUvarint(uint64(len(frames)))
251 for _, frame := range frames {
252 w.rawUvarint(uint64(w.rawReloc(SectionString, w.p.StringIdx(frame))))
253 }
254 }
255 256 // Bool encodes and writes a bool value into the element bitstream,
257 // and then returns the bool value.
258 //
259 // For simple, 2-alternative encodings, the idiomatic way to call Bool
260 // is something like:
261 //
262 // if w.Bool(x != 0) {
263 // // alternative #1
264 // } else {
265 // // alternative #2
266 // }
267 //
268 // For multi-alternative encodings, use Code instead.
269 func (w *Encoder) Bool(b bool) bool {
270 w.Sync(SyncBool)
271 var x byte
272 if b {
273 x = 1
274 }
275 err := w.Data.WriteByte(x)
276 w.checkErr(err)
277 return b
278 }
279 280 // Int64 encodes and writes an int64 value into the element bitstream.
281 func (w *Encoder) Int64(x int64) {
282 w.Sync(SyncInt64)
283 w.rawVarint(x)
284 }
285 286 // Uint64 encodes and writes a uint64 value into the element bitstream.
287 func (w *Encoder) Uint64(x uint64) {
288 w.Sync(SyncUint64)
289 w.rawUvarint(x)
290 }
291 292 // Len encodes and writes a non-negative int value into the element bitstream.
293 func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
294 295 // Int encodes and writes an int value into the element bitstream.
296 func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
297 298 // Uint encodes and writes a uint value into the element bitstream.
299 func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
300 301 // Reloc encodes and writes a relocation for the given (section,
302 // index) pair into the element bitstream.
303 //
304 // Note: Only the index is formally written into the element
305 // bitstream, so bitstream decoders must know from context which
306 // section an encoded relocation refers to.
307 func (w *Encoder) Reloc(k SectionKind, idx RelElemIdx) {
308 w.Sync(SyncUseReloc)
309 w.Len(w.rawReloc(k, idx))
310 }
311 312 // Code encodes and writes a Code value into the element bitstream.
313 func (w *Encoder) Code(c Code) {
314 w.Sync(c.Marker())
315 w.Len(c.Value())
316 }
317 318 // String encodes and writes a string value into the element
319 // bitstream.
320 //
321 // Internally, strings are deduplicated by adding them to the strings
322 // section (if not already present), and then writing a relocation
323 // into the element bitstream.
324 func (w *Encoder) String(s []byte) {
325 w.StringRef(w.p.StringIdx(s))
326 }
327 328 // StringRef writes a reference to the given index, which must be a
329 // previously encoded string value.
330 func (w *Encoder) StringRef(idx RelElemIdx) {
331 w.Sync(SyncString)
332 w.Reloc(SectionString, idx)
333 }
334 335 // Strings encodes and writes a variable-length slice of strings into
336 // the element bitstream.
337 func (w *Encoder) Strings(ss [][]byte) {
338 w.Len(len(ss))
339 for _, s := range ss {
340 w.String(s)
341 }
342 }
343 344 // Value encodes and writes a constant.Value into the element
345 // bitstream.
346 func (w *Encoder) Value(val constant.Value) {
347 w.Sync(SyncValue)
348 if w.Bool(val.Kind() == constant.Complex) {
349 w.scalar(constant.Real(val))
350 w.scalar(constant.Imag(val))
351 } else {
352 w.scalar(val)
353 }
354 }
355 356 func (w *Encoder) scalar(val constant.Value) {
357 switch v := constant.Val(val).(type) {
358 default:
359 panicf("unhandled %v (%v)", val, val.Kind())
360 case bool:
361 w.Code(ValBool)
362 w.Bool(v)
363 case []byte:
364 w.Code(ValString)
365 w.String(v)
366 case int64:
367 w.Code(ValInt64)
368 w.Int64(v)
369 case *big.Int:
370 w.Code(ValBigInt)
371 w.bigInt(v)
372 case *big.Rat:
373 w.Code(ValBigRat)
374 w.bigInt(v.Num())
375 w.bigInt(v.Denom())
376 case *big.Float:
377 w.Code(ValBigFloat)
378 w.bigFloat(v)
379 }
380 }
381 382 func (w *Encoder) bigInt(v *big.Int) {
383 b := v.Bytes()
384 w.String([]byte(b)) // TODO: More efficient encoding.
385 w.Bool(v.Sign() < 0)
386 }
387 388 func (w *Encoder) bigFloat(v *big.Float) {
389 b := v.Append(nil, 'p', -1)
390 w.String([]byte(b)) // TODO: More efficient encoding.
391 }
392 393 // Version reports the version of the bitstream.
394 func (w *Encoder) Version() Version { return w.p.version }
395