decoder.mx raw

   1  // Copyright 2021 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package pkgbits
   6  
   7  import (
   8  	"encoding/binary"
   9  	"errors"
  10  	"fmt"
  11  	"go/constant"
  12  	"go/token"
  13  	"io"
  14  	"math/big"
  15  	"os"
  16  	"runtime"
  17  	"bytes"
  18  )
  19  
  20  // A PkgDecoder provides methods for decoding a package's Unified IR
  21  // export data.
  22  type PkgDecoder struct {
  23  	// version is the file format version.
  24  	version Version
  25  
  26  	// sync indicates whether the file uses sync markers.
  27  	sync bool
  28  
  29  	// pkgPath is the package path for the package to be decoded.
  30  	//
  31  	// TODO(mdempsky): Remove; unneeded since CL 391014.
  32  	pkgPath []byte
  33  
  34  	// elemData is the full data payload of the encoded package.
  35  	// Elements are densely and contiguously packed together.
  36  	//
  37  	// The last 8 bytes of elemData are the package fingerprint.
  38  	elemData []byte
  39  
  40  	// elemEnds stores the byte-offset end positions of element
  41  	// bitstreams within elemData.
  42  	//
  43  	// For example, element I's bitstream data starts at elemEnds[I-1]
  44  	// (or 0, if I==0) and ends at elemEnds[I].
  45  	//
  46  	// Note: elemEnds is indexed by absolute indices, not
  47  	// section-relative indices.
  48  	elemEnds []uint32
  49  
  50  	// elemEndsEnds stores the index-offset end positions of relocation
  51  	// sections within elemEnds.
  52  	//
  53  	// For example, section K's end positions start at elemEndsEnds[K-1]
  54  	// (or 0, if K==0) and end at elemEndsEnds[K].
  55  	elemEndsEnds [numRelocs]uint32
  56  
  57  	scratchRelocEnt []RefTableEntry
  58  }
  59  
  60  // PkgPath returns the package path for the package
  61  //
  62  // TODO(mdempsky): Remove; unneeded since CL 391014.
  63  func (pr *PkgDecoder) PkgPath() []byte { return pr.pkgPath }
  64  
  65  // SyncMarkers reports whether pr uses sync markers.
  66  func (pr *PkgDecoder) SyncMarkers() bool { return pr.sync }
  67  
  68  // NewPkgDecoder returns a PkgDecoder initialized to read the Unified
  69  // IR export data from input. pkgPath is the package path for the
  70  // compilation unit that produced the export data.
  71  func NewPkgDecoder(pkgPath, input []byte) PkgDecoder {
  72  	pr := PkgDecoder{
  73  		pkgPath: pkgPath,
  74  	}
  75  
  76  	// TODO(mdempsky): Implement direct indexing of input string to
  77  	// avoid copying the position information.
  78  
  79  	r := bytes.NewReader(input)
  80  
  81  	var ver uint32
  82  	assert(binary.Read(r, binary.LittleEndian, &ver) == nil)
  83  	pr.version = Version(ver)
  84  
  85  	if pr.version >= numVersions {
  86  		panic(fmt.Errorf("cannot decode %q, export data version %d is greater than maximum supported version %d", pkgPath, pr.version, numVersions-1))
  87  	}
  88  
  89  	if pr.version.Has(Flags) {
  90  		var flags uint32
  91  		assert(binary.Read(r, binary.LittleEndian, &flags) == nil)
  92  		pr.sync = flags&flagSyncMarkers != 0
  93  	}
  94  
  95  	assert(binary.Read(r, binary.LittleEndian, pr.elemEndsEnds[:]) == nil)
  96  
  97  	pr.elemEnds = []uint32{:pr.elemEndsEnds[len(pr.elemEndsEnds)-1]}
  98  	assert(binary.Read(r, binary.LittleEndian, pr.elemEnds[:]) == nil)
  99  
 100  	pos, err := r.Seek(0, io.SeekCurrent)
 101  	assert(err == nil)
 102  
 103  	pr.elemData = input[pos:]
 104  
 105  	const fingerprintSize = 8
 106  	assert(len(pr.elemData)-fingerprintSize == int(pr.elemEnds[len(pr.elemEnds)-1]))
 107  
 108  	return pr
 109  }
 110  
 111  // NumElems returns the number of elements in section k.
 112  func (pr *PkgDecoder) NumElems(k SectionKind) int {
 113  	count := int(pr.elemEndsEnds[k])
 114  	if k > 0 {
 115  		count -= int(pr.elemEndsEnds[k-1])
 116  	}
 117  	return count
 118  }
 119  
 120  // TotalElems returns the total number of elements across all sections.
 121  func (pr *PkgDecoder) TotalElems() int {
 122  	return len(pr.elemEnds)
 123  }
 124  
 125  // Fingerprint returns the package fingerprint.
 126  func (pr *PkgDecoder) Fingerprint() [8]byte {
 127  	var fp [8]byte
 128  	copy(fp[:], pr.elemData[len(pr.elemData)-8:])
 129  	return fp
 130  }
 131  
 132  // AbsIdx returns the absolute index for the given (section, index)
 133  // pair.
 134  func (pr *PkgDecoder) AbsIdx(k SectionKind, idx RelElemIdx) int {
 135  	absIdx := int(idx)
 136  	if k > 0 {
 137  		absIdx += int(pr.elemEndsEnds[k-1])
 138  	}
 139  	if absIdx >= int(pr.elemEndsEnds[k]) {
 140  		panicf("%v:%v is out of bounds; %v", k, idx, pr.elemEndsEnds)
 141  	}
 142  	return absIdx
 143  }
 144  
 145  // DataIdx returns the raw element bitstream for the given (section,
 146  // index) pair.
 147  func (pr *PkgDecoder) DataIdx(k SectionKind, idx RelElemIdx) []byte {
 148  	absIdx := pr.AbsIdx(k, idx)
 149  
 150  	var start uint32
 151  	if absIdx > 0 {
 152  		start = pr.elemEnds[absIdx-1]
 153  	}
 154  	end := pr.elemEnds[absIdx]
 155  
 156  	return pr.elemData[start:end]
 157  }
 158  
 159  // StringIdx returns the string value for the given string index.
 160  func (pr *PkgDecoder) StringIdx(idx RelElemIdx) []byte {
 161  	return pr.DataIdx(SectionString, idx)
 162  }
 163  
 164  // NewDecoder returns a Decoder for the given (section, index) pair,
 165  // and decodes the given SyncMarker from the element bitstream.
 166  func (pr *PkgDecoder) NewDecoder(k SectionKind, idx RelElemIdx, marker SyncMarker) Decoder {
 167  	r := pr.NewDecoderRaw(k, idx)
 168  	r.Sync(marker)
 169  	return r
 170  }
 171  
 172  // TempDecoder returns a Decoder for the given (section, index) pair,
 173  // and decodes the given SyncMarker from the element bitstream.
 174  // If possible the Decoder should be RetireDecoder'd when it is no longer
 175  // needed, this will avoid heap allocations.
 176  func (pr *PkgDecoder) TempDecoder(k SectionKind, idx RelElemIdx, marker SyncMarker) Decoder {
 177  	r := pr.TempDecoderRaw(k, idx)
 178  	r.Sync(marker)
 179  	return r
 180  }
 181  
 182  func (pr *PkgDecoder) RetireDecoder(d *Decoder) {
 183  	pr.scratchRelocEnt = d.Relocs
 184  	d.Relocs = nil
 185  }
 186  
 187  // NewDecoderRaw returns a Decoder for the given (section, index) pair.
 188  //
 189  // Most callers should use NewDecoder instead.
 190  func (pr *PkgDecoder) NewDecoderRaw(k SectionKind, idx RelElemIdx) Decoder {
 191  	r := Decoder{
 192  		common: pr,
 193  		k:      k,
 194  		Idx:    idx,
 195  	}
 196  
 197  	r.Data.Reset(pr.DataIdx(k, idx))
 198  	r.Sync(SyncRelocs)
 199  	r.Relocs = []RefTableEntry{:r.Len()}
 200  	for i := range r.Relocs {
 201  		r.Sync(SyncReloc)
 202  		r.Relocs[i] = RefTableEntry{SectionKind(r.Len()), RelElemIdx(r.Len())}
 203  	}
 204  
 205  	return r
 206  }
 207  
 208  func (pr *PkgDecoder) TempDecoderRaw(k SectionKind, idx RelElemIdx) Decoder {
 209  	r := Decoder{
 210  		common: pr,
 211  		k:      k,
 212  		Idx:    idx,
 213  	}
 214  
 215  	r.Data.Reset(pr.DataIdx(k, idx))
 216  	r.Sync(SyncRelocs)
 217  	l := r.Len()
 218  	if cap(pr.scratchRelocEnt) >= l {
 219  		r.Relocs = pr.scratchRelocEnt[:l]
 220  		pr.scratchRelocEnt = nil
 221  	} else {
 222  		r.Relocs = []RefTableEntry{:l}
 223  	}
 224  	for i := range r.Relocs {
 225  		r.Sync(SyncReloc)
 226  		r.Relocs[i] = RefTableEntry{SectionKind(r.Len()), RelElemIdx(r.Len())}
 227  	}
 228  
 229  	return r
 230  }
 231  
 232  // A Decoder provides methods for decoding an individual element's
 233  // bitstream data.
 234  type Decoder struct {
 235  	common *PkgDecoder
 236  
 237  	Relocs []RefTableEntry
 238  	Data   bytes.Reader
 239  
 240  	k   SectionKind
 241  	Idx RelElemIdx
 242  }
 243  
 244  func (r *Decoder) checkErr(err error) {
 245  	if err != nil {
 246  		panicf("unexpected decoding error: %w", err)
 247  	}
 248  }
 249  
 250  func (r *Decoder) rawUvarint() uint64 {
 251  	x, err := readUvarint(&r.Data)
 252  	r.checkErr(err)
 253  	return x
 254  }
 255  
 256  // readUvarint is a type-specialized copy of encoding/binary.ReadUvarint.
 257  // This avoids the interface conversion and thus has better escape properties,
 258  // which flows up the stack.
 259  func readUvarint(r *bytes.Reader) (uint64, error) {
 260  	var x uint64
 261  	var s uint
 262  	for i := 0; i < binary.MaxVarintLen64; i++ {
 263  		b, err := r.ReadByte()
 264  		if err != nil {
 265  			if i > 0 && err == io.EOF {
 266  				err = io.ErrUnexpectedEOF
 267  			}
 268  			return x, err
 269  		}
 270  		if b < 0x80 {
 271  			if i == binary.MaxVarintLen64-1 && b > 1 {
 272  				return x, overflow
 273  			}
 274  			return x | uint64(b)<<s, nil
 275  		}
 276  		x |= uint64(b&0x7f) << s
 277  		s += 7
 278  	}
 279  	return x, overflow
 280  }
 281  
 282  var overflow = errors.New("pkgbits: readUvarint overflows a 64-bit integer")
 283  
 284  func (r *Decoder) rawVarint() int64 {
 285  	ux := r.rawUvarint()
 286  
 287  	// Zig-zag decode.
 288  	x := int64(ux >> 1)
 289  	if ux&1 != 0 {
 290  		x = ^x
 291  	}
 292  	return x
 293  }
 294  
 295  func (r *Decoder) rawReloc(k SectionKind, idx int) RelElemIdx {
 296  	e := r.Relocs[idx]
 297  	assert(e.Kind == k)
 298  	return e.Idx
 299  }
 300  
 301  // Sync decodes a sync marker from the element bitstream and asserts
 302  // that it matches the expected marker.
 303  //
 304  // If EnableSync is false, then Sync is a no-op.
 305  func (r *Decoder) Sync(mWant SyncMarker) {
 306  	if !r.common.sync {
 307  		return
 308  	}
 309  
 310  	pos, _ := r.Data.Seek(0, io.SeekCurrent)
 311  	mHave := SyncMarker(r.rawUvarint())
 312  	writerPCs := []int{:r.rawUvarint()}
 313  	for i := range writerPCs {
 314  		writerPCs[i] = int(r.rawUvarint())
 315  	}
 316  
 317  	if mHave == mWant {
 318  		return
 319  	}
 320  
 321  	// There's some tension here between printing:
 322  	//
 323  	// (1) full file paths that tools can recognize (e.g., so emacs
 324  	//     hyperlinks the "file:line" text for easy navigation), or
 325  	//
 326  	// (2) short file paths that are easier for humans to read (e.g., by
 327  	//     omitting redundant or irrelevant details, so it's easier to
 328  	//     focus on the useful bits that remain).
 329  	//
 330  	// The current formatting favors the former, as it seems more
 331  	// helpful in practice. But perhaps the formatting could be improved
 332  	// to better address both concerns. For example, use relative file
 333  	// paths if they would be shorter, or rewrite file paths to contain
 334  	// "$GOROOT" (like objabi.AbsFile does) if tools can be taught how
 335  	// to reliably expand that again.
 336  
 337  	fmt.Printf("export data desync: package %q, section %v, index %v, offset %v\n", r.common.pkgPath, r.k, r.Idx, pos)
 338  
 339  	fmt.Printf("\nfound %v, written at:\n", mHave)
 340  	if len(writerPCs) == 0 {
 341  		fmt.Printf("\t[stack trace unavailable; recompile package %q with -d=syncframes]\n", r.common.pkgPath)
 342  	}
 343  	for _, pc := range writerPCs {
 344  		fmt.Printf("\t%s\n", r.common.StringIdx(r.rawReloc(SectionString, pc)))
 345  	}
 346  
 347  	fmt.Printf("\nexpected %v, reading at:\n", mWant)
 348  	var readerPCs [32]uintptr // TODO(mdempsky): Dynamically size?
 349  	n := runtime.Callers(2, readerPCs[:])
 350  	for _, pc := range fmtFrames(readerPCs[:n]...) {
 351  		fmt.Printf("\t%s\n", pc)
 352  	}
 353  
 354  	// We already printed a stack trace for the reader, so now we can
 355  	// simply exit. Printing a second one with panic or base.Fatalf
 356  	// would just be noise.
 357  	os.Exit(1)
 358  }
 359  
 360  // Bool decodes and returns a bool value from the element bitstream.
 361  func (r *Decoder) Bool() bool {
 362  	r.Sync(SyncBool)
 363  	x, err := r.Data.ReadByte()
 364  	r.checkErr(err)
 365  	assert(x < 2)
 366  	return x != 0
 367  }
 368  
 369  // Int64 decodes and returns an int64 value from the element bitstream.
 370  func (r *Decoder) Int64() int64 {
 371  	r.Sync(SyncInt64)
 372  	return r.rawVarint()
 373  }
 374  
 375  // Uint64 decodes and returns a uint64 value from the element bitstream.
 376  func (r *Decoder) Uint64() uint64 {
 377  	r.Sync(SyncUint64)
 378  	return r.rawUvarint()
 379  }
 380  
 381  // Len decodes and returns a non-negative int value from the element bitstream.
 382  func (r *Decoder) Len() int { x := r.Uint64(); v := int(x); assert(uint64(v) == x); return v }
 383  
 384  // Int decodes and returns an int value from the element bitstream.
 385  func (r *Decoder) Int() int { x := r.Int64(); v := int(x); assert(int64(v) == x); return v }
 386  
 387  // Uint decodes and returns a uint value from the element bitstream.
 388  func (r *Decoder) Uint() uint { x := r.Uint64(); v := uint(x); assert(uint64(v) == x); return v }
 389  
 390  // Code decodes a Code value from the element bitstream and returns
 391  // its ordinal value. It's the caller's responsibility to convert the
 392  // result to an appropriate Code type.
 393  //
 394  // TODO(mdempsky): Ideally this method would have signature "Code[T
 395  // Code] T" instead, but we don't allow generic methods and the
 396  // compiler can't depend on generics yet anyway.
 397  func (r *Decoder) Code(mark SyncMarker) int {
 398  	r.Sync(mark)
 399  	return r.Len()
 400  }
 401  
 402  // Reloc decodes a relocation of expected section k from the element
 403  // bitstream and returns an index to the referenced element.
 404  func (r *Decoder) Reloc(k SectionKind) RelElemIdx {
 405  	r.Sync(SyncUseReloc)
 406  	return r.rawReloc(k, r.Len())
 407  }
 408  
 409  // String decodes and returns a string value from the element
 410  // bitstream.
 411  func (r *Decoder) String() string {
 412  	r.Sync(SyncString)
 413  	return r.common.StringIdx(r.Reloc(SectionString))
 414  }
 415  
 416  // Strings decodes and returns a variable-length slice of strings from
 417  // the element bitstream.
 418  func (r *Decoder) Strings() [][]byte {
 419  	res := [][]byte{:r.Len()}
 420  	for i := range res {
 421  		res[i] = r.String()
 422  	}
 423  	return res
 424  }
 425  
 426  // Value decodes and returns a constant.Value from the element
 427  // bitstream.
 428  func (r *Decoder) Value() constant.Value {
 429  	r.Sync(SyncValue)
 430  	isComplex := r.Bool()
 431  	val := r.scalar()
 432  	if isComplex {
 433  		val = constant.BinaryOp(val, token.ADD, constant.MakeImag(r.scalar()))
 434  	}
 435  	return val
 436  }
 437  
 438  func (r *Decoder) scalar() constant.Value {
 439  	switch tag := CodeVal(r.Code(SyncVal)); tag {
 440  	default:
 441  		panic(fmt.Errorf("unexpected scalar tag: %v", tag))
 442  
 443  	case ValBool:
 444  		return constant.MakeBool(r.Bool())
 445  	case ValString:
 446  		return constant.MakeString(r.String())
 447  	case ValInt64:
 448  		return constant.MakeInt64(r.Int64())
 449  	case ValBigInt:
 450  		return constant.Make(r.bigInt())
 451  	case ValBigRat:
 452  		num := r.bigInt()
 453  		denom := r.bigInt()
 454  		return constant.Make((&big.Rat{}).SetFrac(num, denom))
 455  	case ValBigFloat:
 456  		return constant.Make(r.bigFloat())
 457  	}
 458  }
 459  
 460  func (r *Decoder) bigInt() *big.Int {
 461  	v := (&big.Int{}).SetBytes([]byte(r.String()))
 462  	if r.Bool() {
 463  		v.Neg(v)
 464  	}
 465  	return v
 466  }
 467  
 468  func (r *Decoder) bigFloat() *big.Float {
 469  	v := (&big.Float{}).SetPrec(512)
 470  	assert(v.UnmarshalText([]byte(r.String())) == nil)
 471  	return v
 472  }
 473  
 474  // @@@ Helpers
 475  
 476  // TODO(mdempsky): These should probably be removed. I think they're a
 477  // smell that the export data format is not yet quite right.
 478  
 479  // PeekPkgPath returns the package path for the specified package
 480  // index.
 481  func (pr *PkgDecoder) PeekPkgPath(idx RelElemIdx) []byte {
 482  	var path []byte
 483  	{
 484  		r := pr.TempDecoder(SectionPkg, idx, SyncPkgDef)
 485  		path = r.String()
 486  		pr.RetireDecoder(&r)
 487  	}
 488  	if path == "" {
 489  		path = pr.pkgPath
 490  	}
 491  	return path
 492  }
 493  
 494  // PeekObj returns the package path, object name, and CodeObj for the
 495  // specified object index.
 496  func (pr *PkgDecoder) PeekObj(idx RelElemIdx) ([]byte, []byte, CodeObj) {
 497  	var ridx RelElemIdx
 498  	var name []byte
 499  	var rcode int
 500  	{
 501  		r := pr.TempDecoder(SectionName, idx, SyncObject1)
 502  		r.Sync(SyncSym)
 503  		r.Sync(SyncPkg)
 504  		ridx = r.Reloc(SectionPkg)
 505  		name = r.String()
 506  		rcode = r.Code(SyncCodeObj)
 507  		pr.RetireDecoder(&r)
 508  	}
 509  
 510  	path := pr.PeekPkgPath(ridx)
 511  	assert(name != "")
 512  
 513  	tag := CodeObj(rcode)
 514  
 515  	return path, name, tag
 516  }
 517  
 518  // Version reports the version of the bitstream.
 519  func (w *Decoder) Version() Version { return w.common.version }
 520