encoder.go raw

   1  // Copyright 2021 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package pkgbits
   6  
   7  import (
   8  	"bytes"
   9  	"crypto/md5"
  10  	"encoding/binary"
  11  	"go/constant"
  12  	"io"
  13  	"math/big"
  14  	"runtime"
  15  	"strings"
  16  )
  17  
  18  // A PkgEncoder provides methods for encoding a package's Unified IR
  19  // export data.
  20  type PkgEncoder struct {
  21  	// version of the bitstream.
  22  	version Version
  23  
  24  	// elems holds the bitstream for previously encoded elements.
  25  	elems [numRelocs][]string
  26  
  27  	// stringsIdx maps previously encoded strings to their index within
  28  	// the RelocString section, to allow deduplication. That is,
  29  	// elems[RelocString][stringsIdx[s]] == s (if present).
  30  	stringsIdx map[string]Index
  31  
  32  	// syncFrames is the number of frames to write at each sync
  33  	// marker. A negative value means sync markers are omitted.
  34  	syncFrames int
  35  }
  36  
  37  // SyncMarkers reports whether pw uses sync markers.
  38  func (pw *PkgEncoder) SyncMarkers() bool { return pw.syncFrames >= 0 }
  39  
  40  // NewPkgEncoder returns an initialized PkgEncoder.
  41  //
  42  // syncFrames is the number of caller frames that should be serialized
  43  // at Sync points. Serializing additional frames results in larger
  44  // export data files, but can help diagnosing desync errors in
  45  // higher-level Unified IR reader/writer code. If syncFrames is
  46  // negative, then sync markers are omitted entirely.
  47  func NewPkgEncoder(version Version, syncFrames int) PkgEncoder {
  48  	return PkgEncoder{
  49  		version:    version,
  50  		stringsIdx: make(map[string]Index),
  51  		syncFrames: syncFrames,
  52  	}
  53  }
  54  
  55  // DumpTo writes the package's encoded data to out0 and returns the
  56  // package fingerprint.
  57  func (pw *PkgEncoder) DumpTo(out0 io.Writer) (fingerprint [8]byte) {
  58  	h := md5.New()
  59  	out := io.MultiWriter(out0, h)
  60  
  61  	writeUint32 := func(x uint32) {
  62  		assert(binary.Write(out, binary.LittleEndian, x) == nil)
  63  	}
  64  
  65  	writeUint32(uint32(pw.version))
  66  
  67  	if pw.version.Has(Flags) {
  68  		var flags uint32
  69  		if pw.SyncMarkers() {
  70  			flags |= flagSyncMarkers
  71  		}
  72  		writeUint32(flags)
  73  	}
  74  
  75  	// Write elemEndsEnds.
  76  	var sum uint32
  77  	for _, elems := range &pw.elems {
  78  		sum += uint32(len(elems))
  79  		writeUint32(sum)
  80  	}
  81  
  82  	// Write elemEnds.
  83  	sum = 0
  84  	for _, elems := range &pw.elems {
  85  		for _, elem := range elems {
  86  			sum += uint32(len(elem))
  87  			writeUint32(sum)
  88  		}
  89  	}
  90  
  91  	// Write elemData.
  92  	for _, elems := range &pw.elems {
  93  		for _, elem := range elems {
  94  			_, err := io.WriteString(out, elem)
  95  			assert(err == nil)
  96  		}
  97  	}
  98  
  99  	// Write fingerprint.
 100  	copy(fingerprint[:], h.Sum(nil))
 101  	_, err := out0.Write(fingerprint[:])
 102  	assert(err == nil)
 103  
 104  	return
 105  }
 106  
 107  // StringIdx adds a string value to the strings section, if not
 108  // already present, and returns its index.
 109  func (pw *PkgEncoder) StringIdx(s string) Index {
 110  	if idx, ok := pw.stringsIdx[s]; ok {
 111  		assert(pw.elems[RelocString][idx] == s)
 112  		return idx
 113  	}
 114  
 115  	idx := Index(len(pw.elems[RelocString]))
 116  	pw.elems[RelocString] = append(pw.elems[RelocString], s)
 117  	pw.stringsIdx[s] = idx
 118  	return idx
 119  }
 120  
 121  // NewEncoder returns an Encoder for a new element within the given
 122  // section, and encodes the given SyncMarker as the start of the
 123  // element bitstream.
 124  func (pw *PkgEncoder) NewEncoder(k RelocKind, marker SyncMarker) Encoder {
 125  	e := pw.NewEncoderRaw(k)
 126  	e.Sync(marker)
 127  	return e
 128  }
 129  
 130  // NewEncoderRaw returns an Encoder for a new element within the given
 131  // section.
 132  //
 133  // Most callers should use NewEncoder instead.
 134  func (pw *PkgEncoder) NewEncoderRaw(k RelocKind) Encoder {
 135  	idx := Index(len(pw.elems[k]))
 136  	pw.elems[k] = append(pw.elems[k], "") // placeholder
 137  
 138  	return Encoder{
 139  		p:   pw,
 140  		k:   k,
 141  		Idx: idx,
 142  	}
 143  }
 144  
 145  // An Encoder provides methods for encoding an individual element's
 146  // bitstream data.
 147  type Encoder struct {
 148  	p *PkgEncoder
 149  
 150  	Relocs   []RelocEnt
 151  	RelocMap map[RelocEnt]uint32
 152  	Data     bytes.Buffer // accumulated element bitstream data
 153  
 154  	encodingRelocHeader bool
 155  
 156  	k   RelocKind
 157  	Idx Index // index within relocation section
 158  }
 159  
 160  // Flush finalizes the element's bitstream and returns its Index.
 161  func (w *Encoder) Flush() Index {
 162  	var sb strings.Builder
 163  
 164  	// Backup the data so we write the relocations at the front.
 165  	var tmp bytes.Buffer
 166  	io.Copy(&tmp, &w.Data)
 167  
 168  	// TODO(mdempsky): Consider writing these out separately so they're
 169  	// easier to strip, along with function bodies, so that we can prune
 170  	// down to just the data that's relevant to go/types.
 171  	if w.encodingRelocHeader {
 172  		panic("encodingRelocHeader already true; recursive flush?")
 173  	}
 174  	w.encodingRelocHeader = true
 175  	w.Sync(SyncRelocs)
 176  	w.Len(len(w.Relocs))
 177  	for _, rEnt := range w.Relocs {
 178  		w.Sync(SyncReloc)
 179  		w.Len(int(rEnt.Kind))
 180  		w.Len(int(rEnt.Idx))
 181  	}
 182  
 183  	io.Copy(&sb, &w.Data)
 184  	io.Copy(&sb, &tmp)
 185  	w.p.elems[w.k][w.Idx] = sb.String()
 186  
 187  	return w.Idx
 188  }
 189  
 190  func (w *Encoder) checkErr(err error) {
 191  	if err != nil {
 192  		panicf("unexpected encoding error: %v", err)
 193  	}
 194  }
 195  
 196  func (w *Encoder) rawUvarint(x uint64) {
 197  	var buf [binary.MaxVarintLen64]byte
 198  	n := binary.PutUvarint(buf[:], x)
 199  	_, err := w.Data.Write(buf[:n])
 200  	w.checkErr(err)
 201  }
 202  
 203  func (w *Encoder) rawVarint(x int64) {
 204  	// Zig-zag encode.
 205  	ux := uint64(x) << 1
 206  	if x < 0 {
 207  		ux = ^ux
 208  	}
 209  
 210  	w.rawUvarint(ux)
 211  }
 212  
 213  func (w *Encoder) rawReloc(r RelocKind, idx Index) int {
 214  	e := RelocEnt{r, idx}
 215  	if w.RelocMap != nil {
 216  		if i, ok := w.RelocMap[e]; ok {
 217  			return int(i)
 218  		}
 219  	} else {
 220  		w.RelocMap = make(map[RelocEnt]uint32)
 221  	}
 222  
 223  	i := len(w.Relocs)
 224  	w.RelocMap[e] = uint32(i)
 225  	w.Relocs = append(w.Relocs, e)
 226  	return i
 227  }
 228  
 229  func (w *Encoder) Sync(m SyncMarker) {
 230  	if !w.p.SyncMarkers() {
 231  		return
 232  	}
 233  
 234  	// Writing out stack frame string references requires working
 235  	// relocations, but writing out the relocations themselves involves
 236  	// sync markers. To prevent infinite recursion, we simply trim the
 237  	// stack frame for sync markers within the relocation header.
 238  	var frames []string
 239  	if !w.encodingRelocHeader && w.p.syncFrames > 0 {
 240  		pcs := make([]uintptr, w.p.syncFrames)
 241  		n := runtime.Callers(2, pcs)
 242  		frames = fmtFrames(pcs[:n]...)
 243  	}
 244  
 245  	// TODO(mdempsky): Save space by writing out stack frames as a
 246  	// linked list so we can share common stack frames.
 247  	w.rawUvarint(uint64(m))
 248  	w.rawUvarint(uint64(len(frames)))
 249  	for _, frame := range frames {
 250  		w.rawUvarint(uint64(w.rawReloc(RelocString, w.p.StringIdx(frame))))
 251  	}
 252  }
 253  
 254  // Bool encodes and writes a bool value into the element bitstream,
 255  // and then returns the bool value.
 256  //
 257  // For simple, 2-alternative encodings, the idiomatic way to call Bool
 258  // is something like:
 259  //
 260  //	if w.Bool(x != 0) {
 261  //		// alternative #1
 262  //	} else {
 263  //		// alternative #2
 264  //	}
 265  //
 266  // For multi-alternative encodings, use Code instead.
 267  func (w *Encoder) Bool(b bool) bool {
 268  	w.Sync(SyncBool)
 269  	var x byte
 270  	if b {
 271  		x = 1
 272  	}
 273  	err := w.Data.WriteByte(x)
 274  	w.checkErr(err)
 275  	return b
 276  }
 277  
 278  // Int64 encodes and writes an int64 value into the element bitstream.
 279  func (w *Encoder) Int64(x int64) {
 280  	w.Sync(SyncInt64)
 281  	w.rawVarint(x)
 282  }
 283  
 284  // Uint64 encodes and writes a uint64 value into the element bitstream.
 285  func (w *Encoder) Uint64(x uint64) {
 286  	w.Sync(SyncUint64)
 287  	w.rawUvarint(x)
 288  }
 289  
 290  // Len encodes and writes a non-negative int value into the element bitstream.
 291  func (w *Encoder) Len(x int) { assert(x >= 0); w.Uint64(uint64(x)) }
 292  
 293  // Int encodes and writes an int value into the element bitstream.
 294  func (w *Encoder) Int(x int) { w.Int64(int64(x)) }
 295  
 296  // Uint encodes and writes a uint value into the element bitstream.
 297  func (w *Encoder) Uint(x uint) { w.Uint64(uint64(x)) }
 298  
 299  // Reloc encodes and writes a relocation for the given (section,
 300  // index) pair into the element bitstream.
 301  //
 302  // Note: Only the index is formally written into the element
 303  // bitstream, so bitstream decoders must know from context which
 304  // section an encoded relocation refers to.
 305  func (w *Encoder) Reloc(r RelocKind, idx Index) {
 306  	w.Sync(SyncUseReloc)
 307  	w.Len(w.rawReloc(r, idx))
 308  }
 309  
 310  // Code encodes and writes a Code value into the element bitstream.
 311  func (w *Encoder) Code(c Code) {
 312  	w.Sync(c.Marker())
 313  	w.Len(c.Value())
 314  }
 315  
 316  // String encodes and writes a string value into the element
 317  // bitstream.
 318  //
 319  // Internally, strings are deduplicated by adding them to the strings
 320  // section (if not already present), and then writing a relocation
 321  // into the element bitstream.
 322  func (w *Encoder) String(s string) {
 323  	w.StringRef(w.p.StringIdx(s))
 324  }
 325  
 326  // StringRef writes a reference to the given index, which must be a
 327  // previously encoded string value.
 328  func (w *Encoder) StringRef(idx Index) {
 329  	w.Sync(SyncString)
 330  	w.Reloc(RelocString, idx)
 331  }
 332  
 333  // Strings encodes and writes a variable-length slice of strings into
 334  // the element bitstream.
 335  func (w *Encoder) Strings(ss []string) {
 336  	w.Len(len(ss))
 337  	for _, s := range ss {
 338  		w.String(s)
 339  	}
 340  }
 341  
 342  // Value encodes and writes a constant.Value into the element
 343  // bitstream.
 344  func (w *Encoder) Value(val constant.Value) {
 345  	w.Sync(SyncValue)
 346  	if w.Bool(val.Kind() == constant.Complex) {
 347  		w.scalar(constant.Real(val))
 348  		w.scalar(constant.Imag(val))
 349  	} else {
 350  		w.scalar(val)
 351  	}
 352  }
 353  
 354  func (w *Encoder) scalar(val constant.Value) {
 355  	switch v := constant.Val(val).(type) {
 356  	default:
 357  		panicf("unhandled %v (%v)", val, val.Kind())
 358  	case bool:
 359  		w.Code(ValBool)
 360  		w.Bool(v)
 361  	case string:
 362  		w.Code(ValString)
 363  		w.String(v)
 364  	case int64:
 365  		w.Code(ValInt64)
 366  		w.Int64(v)
 367  	case *big.Int:
 368  		w.Code(ValBigInt)
 369  		w.bigInt(v)
 370  	case *big.Rat:
 371  		w.Code(ValBigRat)
 372  		w.bigInt(v.Num())
 373  		w.bigInt(v.Denom())
 374  	case *big.Float:
 375  		w.Code(ValBigFloat)
 376  		w.bigFloat(v)
 377  	}
 378  }
 379  
 380  func (w *Encoder) bigInt(v *big.Int) {
 381  	b := v.Bytes()
 382  	w.String(string(b)) // TODO: More efficient encoding.
 383  	w.Bool(v.Sign() < 0)
 384  }
 385  
 386  func (w *Encoder) bigFloat(v *big.Float) {
 387  	b := v.Append(nil, 'p', -1)
 388  	w.String(string(b)) // TODO: More efficient encoding.
 389  }
 390  
 391  // Version reports the version of the bitstream.
 392  func (w *Encoder) Version() Version { return w.p.version }
 393