1 // Copyright 2021 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package pkgbits
6 7 import (
8 "encoding/binary"
9 "errors"
10 "fmt"
11 "go/constant"
12 "go/token"
13 "io"
14 "math/big"
15 "os"
16 "runtime"
17 "bytes"
18 )
19 20 // A PkgDecoder provides methods for decoding a package's Unified IR
21 // export data.
22 type PkgDecoder struct {
23 // version is the file format version.
24 version Version
25 26 // sync indicates whether the file uses sync markers.
27 sync bool
28 29 // pkgPath is the package path for the package to be decoded.
30 //
31 // TODO(mdempsky): Remove; unneeded since CL 391014.
32 pkgPath []byte
33 34 // elemData is the full data payload of the encoded package.
35 // Elements are densely and contiguously packed together.
36 //
37 // The last 8 bytes of elemData are the package fingerprint.
38 elemData []byte
39 40 // elemEnds stores the byte-offset end positions of element
41 // bitstreams within elemData.
42 //
43 // For example, element I's bitstream data starts at elemEnds[I-1]
44 // (or 0, if I==0) and ends at elemEnds[I].
45 //
46 // Note: elemEnds is indexed by absolute indices, not
47 // section-relative indices.
48 elemEnds []uint32
49 50 // elemEndsEnds stores the index-offset end positions of relocation
51 // sections within elemEnds.
52 //
53 // For example, section K's end positions start at elemEndsEnds[K-1]
54 // (or 0, if K==0) and end at elemEndsEnds[K].
55 elemEndsEnds [numRelocs]uint32
56 57 scratchRelocEnt []RefTableEntry
58 }
59 60 // PkgPath returns the package path for the package
61 //
62 // TODO(mdempsky): Remove; unneeded since CL 391014.
63 func (pr *PkgDecoder) PkgPath() []byte { return pr.pkgPath }
64 65 // SyncMarkers reports whether pr uses sync markers.
66 func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
67 68 // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
69 // IR export data from input. pkgPath is the package path for the
70 // compilation unit that produced the export data.
71 func NewPkgDecoder(pkgPath, input []byte) PkgDecoder {
72 pr := PkgDecoder{
73 pkgPath: pkgPath,
74 }
75 76 // TODO(mdempsky): Implement direct indexing of input string to
77 // avoid copying the position information.
78 79 r := bytes.NewReader(input)
80 81 var ver uint32
82 assert(binary.Read(r, binary.LittleEndian, &ver) == nil)
83 pr.version = Version(ver)
84 85 if pr.version >= numVersions {
86 panic(fmt.Errorf("cannot decode %q, export data version %d is greater than maximum supported version %d", pkgPath, pr.version, numVersions-1))
87 }
88 89 if pr.version.Has(Flags) {
90 var flags uint32
91 assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
92 pr.sync = flags&flagSyncMarkers != 0
93 }
94 95 assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
96 97 pr.elemEnds = []uint32{:pr.elemEndsEnds[len(pr.elemEndsEnds)-1]}
98 assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
99 100 pos, err := r.Seek(0, io.SeekCurrent)
101 assert(err == nil)
102 103 pr.elemData = input[pos:]
104 105 const fingerprintSize = 8
106 assert(len(pr.elemData)-fingerprintSize == int(pr.elemEnds[len(pr.elemEnds)-1]))
107 108 return pr
109 }
110 111 // NumElems returns the number of elements in section k.
112 func (pr *PkgDecoder) NumElems(k SectionKind) int {
113 count := int(pr.elemEndsEnds[k])
114 if k > 0 {
115 count -= int(pr.elemEndsEnds[k-1])
116 }
117 return count
118 }
119 120 // TotalElems returns the total number of elements across all sections.
121 func (pr *PkgDecoder) TotalElems() int {
122 return len(pr.elemEnds)
123 }
124 125 // Fingerprint returns the package fingerprint.
126 func (pr *PkgDecoder) Fingerprint() [8]byte {
127 var fp [8]byte
128 copy(fp[:], pr.elemData[len(pr.elemData)-8:])
129 return fp
130 }
131 132 // AbsIdx returns the absolute index for the given (section, index)
133 // pair.
134 func (pr *PkgDecoder) AbsIdx(k SectionKind, idx RelElemIdx) int {
135 absIdx := int(idx)
136 if k > 0 {
137 absIdx += int(pr.elemEndsEnds[k-1])
138 }
139 if absIdx >= int(pr.elemEndsEnds[k]) {
140 panicf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
141 }
142 return absIdx
143 }
144 145 // DataIdx returns the raw element bitstream for the given (section,
146 // index) pair.
147 func (pr *PkgDecoder) DataIdx(k SectionKind, idx RelElemIdx) []byte {
148 absIdx := pr.AbsIdx(k, idx)
149 150 var start uint32
151 if absIdx > 0 {
152 start = pr.elemEnds[absIdx-1]
153 }
154 end := pr.elemEnds[absIdx]
155 156 return pr.elemData[start:end]
157 }
158 159 // StringIdx returns the string value for the given string index.
160 func (pr *PkgDecoder) StringIdx(idx RelElemIdx) []byte {
161 return pr.DataIdx(SectionString, idx)
162 }
163 164 // NewDecoder returns a Decoder for the given (section, index) pair,
165 // and decodes the given SyncMarker from the element bitstream.
166 func (pr *PkgDecoder) NewDecoder(k SectionKind, idx RelElemIdx, marker SyncMarker) Decoder {
167 r := pr.NewDecoderRaw(k, idx)
168 r.Sync(marker)
169 return r
170 }
171 172 // TempDecoder returns a Decoder for the given (section, index) pair,
173 // and decodes the given SyncMarker from the element bitstream.
174 // If possible the Decoder should be RetireDecoder'd when it is no longer
175 // needed, this will avoid heap allocations.
176 func (pr *PkgDecoder) TempDecoder(k SectionKind, idx RelElemIdx, marker SyncMarker) Decoder {
177 r := pr.TempDecoderRaw(k, idx)
178 r.Sync(marker)
179 return r
180 }
181 182 func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
183 pr.scratchRelocEnt = d.Relocs
184 d.Relocs = nil
185 }
186 187 // NewDecoderRaw returns a Decoder for the given (section, index) pair.
188 //
189 // Most callers should use NewDecoder instead.
190 func (pr *PkgDecoder) NewDecoderRaw(k SectionKind, idx RelElemIdx) Decoder {
191 r := Decoder{
192 common: pr,
193 k: k,
194 Idx: idx,
195 }
196 197 r.Data.Reset(pr.DataIdx(k, idx))
198 r.Sync(SyncRelocs)
199 r.Relocs = []RefTableEntry{:r.Len()}
200 for i := range r.Relocs {
201 r.Sync(SyncReloc)
202 r.Relocs[i] = RefTableEntry{SectionKind(r.Len()), RelElemIdx(r.Len())}
203 }
204 205 return r
206 }
207 208 func (pr *PkgDecoder) TempDecoderRaw(k SectionKind, idx RelElemIdx) Decoder {
209 r := Decoder{
210 common: pr,
211 k: k,
212 Idx: idx,
213 }
214 215 r.Data.Reset(pr.DataIdx(k, idx))
216 r.Sync(SyncRelocs)
217 l := r.Len()
218 if cap(pr.scratchRelocEnt) >= l {
219 r.Relocs = pr.scratchRelocEnt[:l]
220 pr.scratchRelocEnt = nil
221 } else {
222 r.Relocs = []RefTableEntry{:l}
223 }
224 for i := range r.Relocs {
225 r.Sync(SyncReloc)
226 r.Relocs[i] = RefTableEntry{SectionKind(r.Len()), RelElemIdx(r.Len())}
227 }
228 229 return r
230 }
231 232 // A Decoder provides methods for decoding an individual element's
233 // bitstream data.
234 type Decoder struct {
235 common *PkgDecoder
236 237 Relocs []RefTableEntry
238 Data bytes.Reader
239 240 k SectionKind
241 Idx RelElemIdx
242 }
243 244 func (r *Decoder) checkErr(err error) {
245 if err != nil {
246 panicf("unexpected decoding error: %w", err)
247 }
248 }
249 250 func (r *Decoder) rawUvarint() uint64 {
251 x, err := readUvarint(&r.Data)
252 r.checkErr(err)
253 return x
254 }
255 256 // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
257 // This avoids the interface conversion and thus has better escape properties,
258 // which flows up the stack.
259 func readUvarint(r *bytes.Reader) (uint64, error) {
260 var x uint64
261 var s uint
262 for i := 0; i < binary.MaxVarintLen64; i++ {
263 b, err := r.ReadByte()
264 if err != nil {
265 if i > 0 && err == io.EOF {
266 err = io.ErrUnexpectedEOF
267 }
268 return x, err
269 }
270 if b < 0x80 {
271 if i == binary.MaxVarintLen64-1 && b > 1 {
272 return x, overflow
273 }
274 return x | uint64(b)<<s, nil
275 }
276 x |= uint64(b&0x7f) << s
277 s += 7
278 }
279 return x, overflow
280 }
281 282 var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
283 284 func (r *Decoder) rawVarint() int64 {
285 ux := r.rawUvarint()
286 287 // Zig-zag decode.
288 x := int64(ux >> 1)
289 if ux&1 != 0 {
290 x = ^x
291 }
292 return x
293 }
294 295 func (r *Decoder) rawReloc(k SectionKind, idx int) RelElemIdx {
296 e := r.Relocs[idx]
297 assert(e.Kind == k)
298 return e.Idx
299 }
300 301 // Sync decodes a sync marker from the element bitstream and asserts
302 // that it matches the expected marker.
303 //
304 // If EnableSync is false, then Sync is a no-op.
305 func (r *Decoder) Sync(mWant SyncMarker) {
306 if !r.common.sync {
307 return
308 }
309 310 pos, _ := r.Data.Seek(0, io.SeekCurrent)
311 mHave := SyncMarker(r.rawUvarint())
312 writerPCs := []int{:r.rawUvarint()}
313 for i := range writerPCs {
314 writerPCs[i] = int(r.rawUvarint())
315 }
316 317 if mHave == mWant {
318 return
319 }
320 321 // There's some tension here between printing:
322 //
323 // (1) full file paths that tools can recognize (e.g., so emacs
324 // hyperlinks the "file:line" text for easy navigation), or
325 //
326 // (2) short file paths that are easier for humans to read (e.g., by
327 // omitting redundant or irrelevant details, so it's easier to
328 // focus on the useful bits that remain).
329 //
330 // The current formatting favors the former, as it seems more
331 // helpful in practice. But perhaps the formatting could be improved
332 // to better address both concerns. For example, use relative file
333 // paths if they would be shorter, or rewrite file paths to contain
334 // "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
335 // to reliably expand that again.
336 337 fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
338 339 fmt.Printf("\nfound %v, written at:\n", mHave)
340 if len(writerPCs) == 0 {
341 fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
342 }
343 for _, pc := range writerPCs {
344 fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(SectionString, pc)))
345 }
346 347 fmt.Printf("\nexpected %v, reading at:\n", mWant)
348 var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
349 n := runtime.Callers(2, readerPCs[:])
350 for _, pc := range fmtFrames(readerPCs[:n]...) {
351 fmt.Printf("\t%s\n", pc)
352 }
353 354 // We already printed a stack trace for the reader, so now we can
355 // simply exit. Printing a second one with panic or base.Fatalf
356 // would just be noise.
357 os.Exit(1)
358 }
359 360 // Bool decodes and returns a bool value from the element bitstream.
361 func (r *Decoder) Bool() bool {
362 r.Sync(SyncBool)
363 x, err := r.Data.ReadByte()
364 r.checkErr(err)
365 assert(x < 2)
366 return x != 0
367 }
368 369 // Int64 decodes and returns an int64 value from the element bitstream.
370 func (r *Decoder) Int64() int64 {
371 r.Sync(SyncInt64)
372 return r.rawVarint()
373 }
374 375 // Uint64 decodes and returns a uint64 value from the element bitstream.
376 func (r *Decoder) Uint64() uint64 {
377 r.Sync(SyncUint64)
378 return r.rawUvarint()
379 }
380 381 // Len decodes and returns a non-negative int value from the element bitstream.
382 func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
383 384 // Int decodes and returns an int value from the element bitstream.
385 func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
386 387 // Uint decodes and returns a uint value from the element bitstream.
388 func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
389 390 // Code decodes a Code value from the element bitstream and returns
391 // its ordinal value. It's the caller's responsibility to convert the
392 // result to an appropriate Code type.
393 //
394 // TODO(mdempsky): Ideally this method would have signature "Code[T
395 // Code] T" instead, but we don't allow generic methods and the
396 // compiler can't depend on generics yet anyway.
397 func (r *Decoder) Code(mark SyncMarker) int {
398 r.Sync(mark)
399 return r.Len()
400 }
401 402 // Reloc decodes a relocation of expected section k from the element
403 // bitstream and returns an index to the referenced element.
404 func (r *Decoder) Reloc(k SectionKind) RelElemIdx {
405 r.Sync(SyncUseReloc)
406 return r.rawReloc(k, r.Len())
407 }
408 409 // String decodes and returns a string value from the element
410 // bitstream.
411 func (r *Decoder) String() string {
412 r.Sync(SyncString)
413 return r.common.StringIdx(r.Reloc(SectionString))
414 }
415 416 // Strings decodes and returns a variable-length slice of strings from
417 // the element bitstream.
418 func (r *Decoder) Strings() [][]byte {
419 res := [][]byte{:r.Len()}
420 for i := range res {
421 res[i] = r.String()
422 }
423 return res
424 }
425 426 // Value decodes and returns a constant.Value from the element
427 // bitstream.
428 func (r *Decoder) Value() constant.Value {
429 r.Sync(SyncValue)
430 isComplex := r.Bool()
431 val := r.scalar()
432 if isComplex {
433 val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
434 }
435 return val
436 }
437 438 func (r *Decoder) scalar() constant.Value {
439 switch tag := CodeVal(r.Code(SyncVal)); tag {
440 default:
441 panic(fmt.Errorf("unexpected scalar tag: %v", tag))
442 443 case ValBool:
444 return constant.MakeBool(r.Bool())
445 case ValString:
446 return constant.MakeString(r.String())
447 case ValInt64:
448 return constant.MakeInt64(r.Int64())
449 case ValBigInt:
450 return constant.Make(r.bigInt())
451 case ValBigRat:
452 num := r.bigInt()
453 denom := r.bigInt()
454 return constant.Make((&big.Rat{}).SetFrac(num, denom))
455 case ValBigFloat:
456 return constant.Make(r.bigFloat())
457 }
458 }
459 460 func (r *Decoder) bigInt() *big.Int {
461 v := (&big.Int{}).SetBytes([]byte(r.String()))
462 if r.Bool() {
463 v.Neg(v)
464 }
465 return v
466 }
467 468 func (r *Decoder) bigFloat() *big.Float {
469 v := (&big.Float{}).SetPrec(512)
470 assert(v.UnmarshalText([]byte(r.String())) == nil)
471 return v
472 }
473 474 // @@@ Helpers
475 476 // TODO(mdempsky): These should probably be removed. I think they're a
477 // smell that the export data format is not yet quite right.
478 479 // PeekPkgPath returns the package path for the specified package
480 // index.
481 func (pr *PkgDecoder) PeekPkgPath(idx RelElemIdx) []byte {
482 var path []byte
483 {
484 r := pr.TempDecoder(SectionPkg, idx, SyncPkgDef)
485 path = r.String()
486 pr.RetireDecoder(&r)
487 }
488 if path == "" {
489 path = pr.pkgPath
490 }
491 return path
492 }
493 494 // PeekObj returns the package path, object name, and CodeObj for the
495 // specified object index.
496 func (pr *PkgDecoder) PeekObj(idx RelElemIdx) ([]byte, []byte, CodeObj) {
497 var ridx RelElemIdx
498 var name []byte
499 var rcode int
500 {
501 r := pr.TempDecoder(SectionName, idx, SyncObject1)
502 r.Sync(SyncSym)
503 r.Sync(SyncPkg)
504 ridx = r.Reloc(SectionPkg)
505 name = r.String()
506 rcode = r.Code(SyncCodeObj)
507 pr.RetireDecoder(&r)
508 }
509 510 path := pr.PeekPkgPath(ridx)
511 assert(name != "")
512 513 tag := CodeObj(rcode)
514 515 return path, name, tag
516 }
517 518 // Version reports the version of the bitstream.
519 func (w *Decoder) Version() Version { return w.common.version }
520