encoder.mx raw

   1  // Copyright 2021 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package pkgbits
   6  
   7  import (
   8  	"bytes"
   9  	"crypto/sha256"
  10  	"encoding/binary"
  11  	"go/constant"
  12  	"io"
  13  	"math/big"
  14  	"runtime"
  15  )
  16  
  17  // A PkgEncoder provides methods for encoding a package's Unified IR
  18  // export data.
  19  type PkgEncoder struct {
  20  	// version of the bitstream.
  21  	version Version
  22  
  23  	// elems holds the bitstream for previously encoded elements.
  24  	elems [numRelocs][][]byte
  25  
  26  	// stringsIdx maps previously encoded strings to their index within
  27  	// the RelocString section, to allow deduplication. That is,
  28  	// elems[RelocString][stringsIdx[s]] == s (if present).
  29  	stringsIdx map[string]RelElemIdx
  30  
  31  	// syncFrames is the number of frames to write at each sync
  32  	// marker. A negative value means sync markers are omitted.
  33  	syncFrames int
  34  }
  35  
  36  // SyncMarkers reports whether pw uses sync markers.
  37  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
  38  
  39  // NewPkgEncoder returns an initialized PkgEncoder.
  40  //
  41  // syncFrames is the number of caller frames that should be serialized
  42  // at Sync points. Serializing additional frames results in larger
  43  // export data files, but can help diagnosing desync errors in
  44  // higher-level Unified IR reader/writer code. If syncFrames is
  45  // negative, then sync markers are omitted entirely.
  46  func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
  47  	return PkgEncoder{
  48  		version:    version,
  49  		stringsIdx: map[string]RelElemIdx{},
  50  		syncFrames: syncFrames,
  51  	}
  52  }
  53  
  54  // DumpTo writes the package's encoded data to out0 and returns the
  55  // package fingerprint.
  56  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
  57  	h := sha256.New()
  58  	out := io.MultiWriter(out0, h)
  59  
  60  	writeUint32 := func(x uint32) {
  61  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
  62  	}
  63  
  64  	writeUint32(uint32(pw.version))
  65  
  66  	if pw.version.Has(Flags) {
  67  		var flags uint32
  68  		if pw.SyncMarkers() {
  69  			flags |= flagSyncMarkers
  70  		}
  71  		writeUint32(flags)
  72  	}
  73  
  74  	// TODO(markfreeman): Also can use delta encoding to write section ends,
  75  	// but not as impactful.
  76  	var sum uint32
  77  	for _, elems := range &pw.elems {
  78  		sum += uint32(len(elems))
  79  		writeUint32(sum)
  80  	}
  81  
  82  	// TODO(markfreeman): Use delta encoding to store element ends and inflate
  83  	// back to this representation during decoding; the numbers will be much
  84  	// smaller.
  85  	sum = 0
  86  	for _, elems := range &pw.elems {
  87  		for _, elem := range elems {
  88  			sum += uint32(len(elem))
  89  			writeUint32(sum)
  90  		}
  91  	}
  92  
  93  	// Write elemData.
  94  	for _, elems := range &pw.elems {
  95  		for _, elem := range elems {
  96  			_, err := io.WriteString(out, elem)
  97  			assert(err == nil)
  98  		}
  99  	}
 100  
 101  	// Write fingerprint.
 102  	copy(fingerprint[:], h.Sum(nil))
 103  	_, err := out0.Write(fingerprint[:])
 104  	assert(err == nil)
 105  
 106  	return
 107  }
 108  
 109  // StringIdx adds a string value to the strings section, if not
 110  // already present, and returns its index.
 111  func (pw *PkgEncoder) StringIdx(s []byte) RelElemIdx {
 112  	if idx, ok := pw.stringsIdx[s]; ok {
 113  		assert(pw.elems[SectionString][idx] == s)
 114  		return idx
 115  	}
 116  
 117  	idx := RelElemIdx(len(pw.elems[SectionString]))
 118  	pw.elems[SectionString] = append(pw.elems[SectionString], s)
 119  	pw.stringsIdx[s] = idx
 120  	return idx
 121  }
 122  
 123  // NewEncoder returns an Encoder for a new element within the given
 124  // section, and encodes the given SyncMarker as the start of the
 125  // element bitstream.
 126  func (pw *PkgEncoder) NewEncoder(k SectionKind, marker SyncMarker) *Encoder {
 127  	e := pw.NewEncoderRaw(k)
 128  	e.Sync(marker)
 129  	return e
 130  }
 131  
 132  // NewEncoderRaw returns an Encoder for a new element within the given
 133  // section.
 134  //
 135  // Most callers should use NewEncoder instead.
 136  func (pw *PkgEncoder) NewEncoderRaw(k SectionKind) *Encoder {
 137  	idx := RelElemIdx(len(pw.elems[k]))
 138  	pw.elems[k] = append(pw.elems[k], "") // placeholder
 139  
 140  	return &Encoder{
 141  		p:   pw,
 142  		k:   k,
 143  		Idx: idx,
 144  	}
 145  }
 146  
 147  // An Encoder provides methods for encoding an individual element's
 148  // bitstream data.
 149  type Encoder struct {
 150  	p *PkgEncoder
 151  
 152  	Relocs   []RefTableEntry
 153  	RelocMap map[RefTableEntry]uint32
 154  	Data     bytes.Buffer // accumulated element bitstream data
 155  
 156  	encodingRelocHeader bool
 157  
 158  	k   SectionKind
 159  	Idx RelElemIdx // index within relocation section
 160  }
 161  
 162  // Flush finalizes the element's bitstream and returns its [RelElemIdx].
 163  func (w *Encoder) Flush() RelElemIdx {
 164  	var sb bytes.Buffer
 165  
 166  	// Backup the data so we write the relocations at the front.
 167  	var tmp bytes.Buffer
 168  	io.Copy(&tmp, &w.Data)
 169  
 170  	// TODO(mdempsky): Consider writing these out separately so they're
 171  	// easier to strip, along with function bodies, so that we can prune
 172  	// down to just the data that's relevant to go/types.
 173  	if w.encodingRelocHeader {
 174  		panic("encodingRelocHeader already true; recursive flush?")
 175  	}
 176  	w.encodingRelocHeader = true
 177  	w.Sync(SyncRelocs)
 178  	w.Len(len(w.Relocs))
 179  	for _, rEnt := range w.Relocs {
 180  		w.Sync(SyncReloc)
 181  		w.Len(int(rEnt.Kind))
 182  		w.Len(int(rEnt.Idx))
 183  	}
 184  
 185  	io.Copy(&sb, &w.Data)
 186  	io.Copy(&sb, &tmp)
 187  	w.p.elems[w.k][w.Idx] = sb.String()
 188  
 189  	return w.Idx
 190  }
 191  
 192  func (w *Encoder) checkErr(err error) {
 193  	if err != nil {
 194  		panicf("unexpected encoding error: %v", err)
 195  	}
 196  }
 197  
 198  func (w *Encoder) rawUvarint(x uint64) {
 199  	var buf [binary.MaxVarintLen64]byte
 200  	n := binary.PutUvarint(buf[:], x)
 201  	_, err := w.Data.Write(buf[:n])
 202  	w.checkErr(err)
 203  }
 204  
 205  func (w *Encoder) rawVarint(x int64) {
 206  	// Zig-zag encode.
 207  	ux := uint64(x) << 1
 208  	if x < 0 {
 209  		ux = ^ux
 210  	}
 211  
 212  	w.rawUvarint(ux)
 213  }
 214  
 215  func (w *Encoder) rawReloc(k SectionKind, idx RelElemIdx) int {
 216  	e := RefTableEntry{k, idx}
 217  	if w.RelocMap != nil {
 218  		if i, ok := w.RelocMap[e]; ok {
 219  			return int(i)
 220  		}
 221  	} else {
 222  		w.RelocMap = map[RefTableEntry]uint32{}
 223  	}
 224  
 225  	i := len(w.Relocs)
 226  	w.RelocMap[e] = uint32(i)
 227  	w.Relocs = append(w.Relocs, e)
 228  	return i
 229  }
 230  
 231  func (w *Encoder) Sync(m SyncMarker) {
 232  	if !w.p.SyncMarkers() {
 233  		return
 234  	}
 235  
 236  	// Writing out stack frame string references requires working
 237  	// relocations, but writing out the relocations themselves involves
 238  	// sync markers. To prevent infinite recursion, we simply trim the
 239  	// stack frame for sync markers within the relocation header.
 240  	var frames [][]byte
 241  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
 242  		pcs := []uintptr{:w.p.syncFrames}
 243  		n := runtime.Callers(2, pcs)
 244  		frames = fmtFrames(pcs[:n]...)
 245  	}
 246  
 247  	// TODO(mdempsky): Save space by writing out stack frames as a
 248  	// linked list so we can share common stack frames.
 249  	w.rawUvarint(uint64(m))
 250  	w.rawUvarint(uint64(len(frames)))
 251  	for _, frame := range frames {
 252  		w.rawUvarint(uint64(w.rawReloc(SectionString, w.p.StringIdx(frame))))
 253  	}
 254  }
 255  
 256  // Bool encodes and writes a bool value into the element bitstream,
 257  // and then returns the bool value.
 258  //
 259  // For simple, 2-alternative encodings, the idiomatic way to call Bool
 260  // is something like:
 261  //
 262  //	if w.Bool(x != 0) {
 263  //		// alternative #1
 264  //	} else {
 265  //		// alternative #2
 266  //	}
 267  //
 268  // For multi-alternative encodings, use Code instead.
 269  func (w *Encoder) Bool(b bool) bool {
 270  	w.Sync(SyncBool)
 271  	var x byte
 272  	if b {
 273  		x = 1
 274  	}
 275  	err := w.Data.WriteByte(x)
 276  	w.checkErr(err)
 277  	return b
 278  }
 279  
 280  // Int64 encodes and writes an int64 value into the element bitstream.
 281  func (w *Encoder) Int64(x int64) {
 282  	w.Sync(SyncInt64)
 283  	w.rawVarint(x)
 284  }
 285  
 286  // Uint64 encodes and writes a uint64 value into the element bitstream.
 287  func (w *Encoder) Uint64(x uint64) {
 288  	w.Sync(SyncUint64)
 289  	w.rawUvarint(x)
 290  }
 291  
 292  // Len encodes and writes a non-negative int value into the element bitstream.
 293  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
 294  
 295  // Int encodes and writes an int value into the element bitstream.
 296  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
 297  
 298  // Uint encodes and writes a uint value into the element bitstream.
 299  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
 300  
 301  // Reloc encodes and writes a relocation for the given (section,
 302  // index) pair into the element bitstream.
 303  //
 304  // Note: Only the index is formally written into the element
 305  // bitstream, so bitstream decoders must know from context which
 306  // section an encoded relocation refers to.
 307  func (w *Encoder) Reloc(k SectionKind, idx RelElemIdx) {
 308  	w.Sync(SyncUseReloc)
 309  	w.Len(w.rawReloc(k, idx))
 310  }
 311  
 312  // Code encodes and writes a Code value into the element bitstream.
 313  func (w *Encoder) Code(c Code) {
 314  	w.Sync(c.Marker())
 315  	w.Len(c.Value())
 316  }
 317  
 318  // String encodes and writes a string value into the element
 319  // bitstream.
 320  //
 321  // Internally, strings are deduplicated by adding them to the strings
 322  // section (if not already present), and then writing a relocation
 323  // into the element bitstream.
 324  func (w *Encoder) String(s []byte) {
 325  	w.StringRef(w.p.StringIdx(s))
 326  }
 327  
 328  // StringRef writes a reference to the given index, which must be a
 329  // previously encoded string value.
 330  func (w *Encoder) StringRef(idx RelElemIdx) {
 331  	w.Sync(SyncString)
 332  	w.Reloc(SectionString, idx)
 333  }
 334  
 335  // Strings encodes and writes a variable-length slice of strings into
 336  // the element bitstream.
 337  func (w *Encoder) Strings(ss [][]byte) {
 338  	w.Len(len(ss))
 339  	for _, s := range ss {
 340  		w.String(s)
 341  	}
 342  }
 343  
 344  // Value encodes and writes a constant.Value into the element
 345  // bitstream.
 346  func (w *Encoder) Value(val constant.Value) {
 347  	w.Sync(SyncValue)
 348  	if w.Bool(val.Kind() == constant.Complex) {
 349  		w.scalar(constant.Real(val))
 350  		w.scalar(constant.Imag(val))
 351  	} else {
 352  		w.scalar(val)
 353  	}
 354  }
 355  
 356  func (w *Encoder) scalar(val constant.Value) {
 357  	switch v := constant.Val(val).(type) {
 358  	default:
 359  		panicf("unhandled %v (%v)", val, val.Kind())
 360  	case bool:
 361  		w.Code(ValBool)
 362  		w.Bool(v)
 363  	case []byte:
 364  		w.Code(ValString)
 365  		w.String(v)
 366  	case int64:
 367  		w.Code(ValInt64)
 368  		w.Int64(v)
 369  	case *big.Int:
 370  		w.Code(ValBigInt)
 371  		w.bigInt(v)
 372  	case *big.Rat:
 373  		w.Code(ValBigRat)
 374  		w.bigInt(v.Num())
 375  		w.bigInt(v.Denom())
 376  	case *big.Float:
 377  		w.Code(ValBigFloat)
 378  		w.bigFloat(v)
 379  	}
 380  }
 381  
 382  func (w *Encoder) bigInt(v *big.Int) {
 383  	b := v.Bytes()
 384  	w.String([]byte(b)) // TODO: More efficient encoding.
 385  	w.Bool(v.Sign() < 0)
 386  }
 387  
 388  func (w *Encoder) bigFloat(v *big.Float) {
 389  	b := v.Append(nil, 'p', -1)
 390  	w.String([]byte(b)) // TODO: More efficient encoding.
 391  }
 392  
 393  // Version reports the version of the bitstream.
 394  func (w *Encoder) Version() Version { return w.p.version }
 395