1 package blockchain
2 3 import (
4 "github.com/p9c/p9/pkg/ecc"
5 "github.com/p9c/p9/pkg/txscript"
6 )
7 8 // In order to reduce the size of stored scripts, a domain specific compression algorithm is used which recognizes
9 // standard scripts and stores them using less bytes than the original script. The compression algorithm used here was
10 // obtained from Bitcoin Core, so all credits for the algorithm go to it.
11 //
12 // The general serialized format is:
13 //
14 // <script size or type><script data>
15 //
16 // Field Type Size
17 //
18 // script size or type VLQ variable
19 // script data []byte variable
20 //
21 // The specific serialized format for each recognized standard script is:
22 //
23 // - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash>
24 //
25 // - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash>
26 //
27 // - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value>
28 // 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use
29 // 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use
30 //
31 // ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported.
32 //
33 // Any scripts which are not recognized as one of the aforementioned standard scripts are encoded using the general
34 // serialized format and encode the script size as the sum of the actual size of the script and the number of special
35 // cases. The following constants specify the special constants used to identify a special script type in the
36 // domain-specific compressed script encoding. NOTE: This section specifically does not use iota since these values are
37 // serialized and must be stable for long-term storage.
38 const (
39 // cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script.
40 cstPayToPubKeyHash = 0
41 // cstPayToScriptHash identifies a compressed pay-to-script-hash script.
42 cstPayToScriptHash = 1
43 // cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to a compressed pubkey. Bit 0 specifies which
44 // y-coordinate to use to reconstruct the full uncompressed pubkey.
45 cstPayToPubKeyComp2 = 2
46 // cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to a compressed pubkey. Bit 0 specifies which
47 // y-coordinate to use to reconstruct the full uncompressed pubkey.
48 cstPayToPubKeyComp3 = 3
49 // cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to an uncompressed pubkey. Bit 0 specifies
50 // which y-coordinate to use to reconstruct the full uncompressed pubkey.
51 cstPayToPubKeyUncomp4 = 4
52 // cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to an uncompressed pubkey. Bit 0 specifies
53 // which y-coordinate to use to reconstruct the full uncompressed pubkey.
54 cstPayToPubKeyUncomp5 = 5
55 // numSpecialScripts is the number of special scripts recognized by the domain-specific script compression
56 // algorithm.
57 numSpecialScripts = 6
58 )
59 60 // In order to reduce the size of stored amounts, a domain specific compression algorithm is used which relies on there typically being a lot of zeroes at end of the amounts. The compression algorithm used here was obtained from Bitcoin Core, so all credits for the algorithm go to it. While this is simply exchanging one uint64 for another, the resulting value for typical amounts has a much smaller magnitude which results in fewer bytes when encoded as variable length quantity. For example, consider the amount of 0.1 DUO which is 10000000 satoshi. Encoding 10000000 as a VLQ would take 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte. Essentially the compression is achieved by splitting the value into an exponent in the range [0-9] and a digit in the range [1-9], when possible, and encoding them in a way that can be decoded. More specifically, the encoding is as follows:
61 //
62 // - 0 is 0
63 //
64 // - Find the exponent, e, as the largest power of 10 that evenly divides the value up to a maximum of 9
65 //
66 // - When e < 9, the final digit can't be 0 so store it as d and remove it by dividing the value by 10 (call the result
67 // n). The encoded value is thus: 1 + 10*(9*n + d-1) + e
68 //
69 // - When e==9, the only thing known is the amount is not 0. The encoded value is thus:
70 //
71 // 1 + 10*(n-1) + e == 10 + 10*(n-1)
72 //
73 // Example encodings:
74 //
75 // (The numbers in parenthesis are the number of bytes when serialized as a VLQ)
76 //
77 // 0 (1) -> 0 (1) * 0.00000000 DUO
78 // 1000 (2) -> 4 (1) * 0.00001000 DUO
79 // 10000 (2) -> 5 (1) * 0.00010000 DUO
80 // 12345678 (4) -> 111111101(4) * 0.12345678 DUO
81 // 50000000 (4) -> 47 (1) * 0.50000000 DUO
82 // 100000000 (4) -> 9 (1) * 1.00000000 DUO
83 // 500000000 (5) -> 49 (1) * 5.00000000 DUO
84 // 1000000000 (5) -> 10 (1) * 10.00000000 DUO
85 // -----------------------------------------------------------------------------
86 87 // compressTxOutAmount compresses the passed amount according to the domain specific compression algorithm described
88 // above.
89 func compressTxOutAmount(amount uint64) uint64 {
90 // No need to do any work if it's zero.
91 if amount == 0 {
92 return 0
93 }
94 // Find the largest power of 10 (max of 9) that evenly divides the value.
95 exponent := uint64(0)
96 for amount%10 == 0 && exponent < 9 {
97 amount /= 10
98 exponent++
99 }
100 // The compressed result for exponents less than 9 is:
101 // 1 + 10*(9*n + d-1) + e
102 if exponent < 9 {
103 lastDigit := amount % 10
104 amount /= 10
105 return 1 + 10*(9*amount+lastDigit-1) + exponent
106 }
107 // The compressed result for an exponent of 9 is:
108 // 1 + 10*(n-1) + e == 10 + 10*(n-1)
109 return 10 + 10*(amount-1)
110 }
111 112 // compressedScriptSize returns the number of bytes the passed script would take when encoded with the domain specific
113 // compression algorithm described above.
114 func compressedScriptSize(pkScript []byte) int {
115 // Pay-to-pubkey-hash script.
116 if valid, _ := isPubKeyHash(pkScript); valid {
117 return 21
118 }
119 // Pay-to-script-hash script.
120 if valid, _ := isScriptHash(pkScript); valid {
121 return 21
122 }
123 // Pay-to-pubkey (compressed or uncompressed) script.
124 if valid, _ := isPubKey(pkScript); valid {
125 return 33
126 }
127 // When none of the above special cases apply, encode the script as is preceded by the sum of its size and the
128 // number of special cases encoded as a variable length quantity.
129 return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) +
130 len(pkScript)
131 }
132 133 // Compressed transaction outputs consist of an amount and a public key script both compressed using the domain specific
134 // compression algorithms previously described.
135 //
136 // The serialized format is:
137 //
138 // <compressed amount><compressed script>
139 //
140 // Field Type Size
141 //
142 // compressed amount VLQ variable
143 // compressed script []byte variable
144 145 // compressedTxOutSize returns the number of bytes the passed transaction output fields would take when encoded with the
146 // format described above.
147 func compressedTxOutSize(amount uint64, pkScript []byte) int {
148 return serializeSizeVLQ(compressTxOutAmount(amount)) +
149 compressedScriptSize(pkScript)
150 }
151 152 // decodeCompressedScriptSize treats the passed serialized bytes as a compressed script, possibly followed by other
153 // data, and returns the number of bytes it occupies taking into account the special encoding of the script size by the
154 // domain specific compression algorithm described above.
155 func decodeCompressedScriptSize(serialized []byte) int {
156 scriptSize, bytesRead := deserializeVLQ(serialized)
157 if bytesRead == 0 {
158 return 0
159 }
160 switch scriptSize {
161 case cstPayToPubKeyHash:
162 return 21
163 case cstPayToScriptHash:
164 return 21
165 case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4,
166 cstPayToPubKeyUncomp5:
167 return 33
168 }
169 scriptSize -= numSpecialScripts
170 scriptSize += uint64(bytesRead)
171 return int(scriptSize)
172 }
173 174 // decodeCompressedTxOut decodes the passed compressed txout, possibly followed by other data, into its uncompressed
175 // amount and script and returns them along with the number of bytes they occupied prior to decompression.
176 func decodeCompressedTxOut(serialized []byte) (uint64, []byte, int, error) {
177 // Deserialize the compressed amount and ensure there are bytes remaining for the compressed script.
178 compressedAmount, bytesRead := deserializeVLQ(serialized)
179 if bytesRead >= len(serialized) {
180 return 0, nil, bytesRead, errDeserialize(
181 "unexpected end of " +
182 "data after compressed amount",
183 )
184 }
185 // Decode the compressed script size and ensure there are enough bytes left in the slice for it.
186 scriptSize := decodeCompressedScriptSize(serialized[bytesRead:])
187 if len(serialized[bytesRead:]) < scriptSize {
188 return 0, nil, bytesRead, errDeserialize(
189 "unexpected end of " +
190 "data after script size",
191 )
192 }
193 // Decompress and return the amount and script.
194 amount := decompressTxOutAmount(compressedAmount)
195 script := decompressScript(serialized[bytesRead : bytesRead+scriptSize])
196 return amount, script, bytesRead + scriptSize, nil
197 }
198 199 // decompressScript returns the original script obtained by decompressing the passed compressed script according to the
200 // domain specific compression algorithm described above. NOTE: The script parameter must already have been proven to be
201 // long enough to contain the number of bytes returned by decodeCompressedScriptSize or it will panic. This is
202 // acceptable since it is only an internal function.
203 func decompressScript(compressedPkScript []byte) []byte {
204 // In practice this function will not be called with a zero-length or nil script since the nil script encoding
205 // includes the length, however the code below assumes the length exists, so just return nil now if the function
206 // ever ends up being called with a nil script in the future.
207 if len(compressedPkScript) == 0 {
208 return nil
209 }
210 // Decode the script size and examine it for the special cases.
211 encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript)
212 switch encodedScriptSize {
213 // Pay-to-pubkey-hash script. The resulting script is:
214 //
215 // <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG>
216 case cstPayToPubKeyHash:
217 pkScript := make([]byte, 25)
218 pkScript[0] = txscript.OP_DUP
219 pkScript[1] = txscript.OP_HASH160
220 pkScript[2] = txscript.OP_DATA_20
221 copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20])
222 pkScript[23] = txscript.OP_EQUALVERIFY
223 pkScript[24] = txscript.OP_CHECKSIG
224 return pkScript
225 // Pay-to-script-hash script. The resulting script is:
226 //
227 // <OP_HASH160><20 byte script hash><OP_EQUAL>
228 case cstPayToScriptHash:
229 pkScript := make([]byte, 23)
230 pkScript[0] = txscript.OP_HASH160
231 pkScript[1] = txscript.OP_DATA_20
232 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20])
233 pkScript[22] = txscript.OP_EQUAL
234 return pkScript
235 // Pay-to-compressed-pubkey script. The resulting script is:
236 //
237 // <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG>
238 case cstPayToPubKeyComp2, cstPayToPubKeyComp3:
239 pkScript := make([]byte, 35)
240 pkScript[0] = txscript.OP_DATA_33
241 pkScript[1] = byte(encodedScriptSize)
242 copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32])
243 pkScript[34] = txscript.OP_CHECKSIG
244 return pkScript
245 // Pay-to-uncompressed-pubkey script. The resulting script is:
246 //
247 // <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG>
248 case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5:
249 // Change the leading byte to the appropriate compressed pubkey identifier (0x02 or 0x03) so it can be decoded
250 // as a compressed pubkey. This really should never fail since the encoding ensures it is valid before
251 // compressing to this type.
252 compressedKey := make([]byte, 33)
253 compressedKey[0] = byte(encodedScriptSize - 2)
254 copy(compressedKey[1:], compressedPkScript[1:])
255 key, e := ecc.ParsePubKey(compressedKey, ecc.S256())
256 if e != nil {
257 E.Ln(e)
258 return nil
259 }
260 pkScript := make([]byte, 67)
261 pkScript[0] = txscript.OP_DATA_65
262 copy(pkScript[1:], key.SerializeUncompressed())
263 pkScript[66] = txscript.OP_CHECKSIG
264 return pkScript
265 }
266 // When none of the special cases apply, the script was encoded using the general format, so reduce the script size
267 // by the number of special cases and return the unmodified script.
268 scriptSize := int(encodedScriptSize - numSpecialScripts)
269 pkScript := make([]byte, scriptSize)
270 copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize])
271 return pkScript
272 }
273 274 // decompressTxOutAmount returns the original amount the passed compressed amount represents according to the domain
275 // specific compression algorithm described above.
276 func decompressTxOutAmount(amount uint64) uint64 {
277 // No need to do any work if it's zero.
278 if amount == 0 {
279 return 0
280 }
281 // The decompressed amount is either of the following two equations:
282 //
283 // x = 1 + 10*(9*n + d - 1) + e
284 //
285 // x = 1 + 10*(n - 1) + 9
286 amount--
287 // The decompressed amount is now one of the following two equations:
288 //
289 // x = 10*(9*n + d - 1) + e
290 //
291 // x = 10*(n - 1) + 9
292 exponent := amount % 10
293 amount /= 10
294 // The decompressed amount is now one of the following two equations:
295 //
296 // x = 9*n + d - 1 | where e < 9
297 //
298 // x = n - 1 | where e = 9
299 n := uint64(0)
300 if exponent < 9 {
301 lastDigit := amount%9 + 1
302 amount /= 9
303 n = amount*10 + lastDigit
304 } else {
305 n = amount + 1
306 }
307 // Apply the exponent.
308 for ; exponent > 0; exponent-- {
309 n *= 10
310 }
311 return n
312 }
313 314 // deserializeVLQ deserializes the provided variable-length quantity according to the format described above. It also
315 // returns the number of bytes deserialized.
316 func deserializeVLQ(serialized []byte) (uint64, int) {
317 var n uint64
318 var size int
319 for _, val := range serialized {
320 size++
321 n = (n << 7) | uint64(val&0x7f)
322 if val&0x80 != 0x80 {
323 break
324 }
325 n++
326 }
327 return n, size
328 }
329 330 // isPubKey returns whether or not the passed public key script is a standard pay-to-pubkey script that pays to a valid
331 // compressed or uncompressed public key along with the serialized pubkey it is paying to if it is.
332 //
333 // NOTE: This function ensures the public key is actually valid since the compression algorithm requires valid pubkeys.
334 // It does not support hybrid pubkeys. This means that even if the script has the correct form for a pay-to-pubkey
335 // script, this function will only return true when it is paying to a valid compressed or uncompressed pubkey.
336 func isPubKey(script []byte) (bool, []byte) {
337 // Pay-to-compressed-pubkey script.
338 if len(script) == 35 && script[0] == txscript.OP_DATA_33 &&
339 script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 ||
340 script[1] == 0x03) {
341 // Ensure the public key is valid.
342 serializedPubKey := script[1:34]
343 _, e := ecc.ParsePubKey(serializedPubKey, ecc.S256())
344 if e == nil {
345 return true, serializedPubKey
346 }
347 }
348 // Pay-to-uncompressed-pubkey script.
349 if len(script) == 67 && script[0] == txscript.OP_DATA_65 &&
350 script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 {
351 // Ensure the public key is valid.
352 serializedPubKey := script[1:66]
353 _, e := ecc.ParsePubKey(serializedPubKey, ecc.S256())
354 if e == nil {
355 return true, serializedPubKey
356 }
357 }
358 return false, nil
359 }
360 361 // isPubKeyHash returns whether or not the passed public key script is a standard pay-to-pubkey-hash script along with
362 // the pubkey hash it is paying to if it is.
363 func isPubKeyHash(script []byte) (bool, []byte) {
364 if len(script) == 25 && script[0] == txscript.OP_DUP &&
365 script[1] == txscript.OP_HASH160 &&
366 script[2] == txscript.OP_DATA_20 &&
367 script[23] == txscript.OP_EQUALVERIFY &&
368 script[24] == txscript.OP_CHECKSIG {
369 return true, script[3:23]
370 }
371 return false, nil
372 }
373 374 // isScriptHash returns whether or not the passed public key script is a standard pay-to-script-hash script along with
375 // the script hash it is paying to if it is.
376 func isScriptHash(script []byte) (bool, []byte) {
377 if len(script) == 23 && script[0] == txscript.OP_HASH160 &&
378 script[1] == txscript.OP_DATA_20 &&
379 script[22] == txscript.OP_EQUAL {
380 return true, script[2:22]
381 }
382 return false, nil
383 }
384 385 // putCompressedScript compresses the passed script according to the domain specific compression algorithm described
386 // above directly into the passed target byte slice. The target byte slice must be at least large enough to handle the
387 // number of bytes returned by the compressedScriptSize function or it will panic.
388 func putCompressedScript(target, pkScript []byte) int {
389 // Pay-to-pubkey-hash script.
390 if valid, hash := isPubKeyHash(pkScript); valid {
391 target[0] = cstPayToPubKeyHash
392 copy(target[1:21], hash)
393 return 21
394 }
395 // Pay-to-script-hash script.
396 if valid, hash := isScriptHash(pkScript); valid {
397 target[0] = cstPayToScriptHash
398 copy(target[1:21], hash)
399 return 21
400 }
401 // Pay-to-pubkey (compressed or uncompressed) script.
402 if valid, serializedPubKey := isPubKey(pkScript); valid {
403 pubKeyFormat := serializedPubKey[0]
404 switch pubKeyFormat {
405 case 0x02, 0x03:
406 target[0] = pubKeyFormat
407 copy(target[1:33], serializedPubKey[1:33])
408 return 33
409 case 0x04:
410 // Encode the oddness of the serialized pubkey into the
411 // compressed script type.
412 target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01)
413 copy(target[1:33], serializedPubKey[1:33])
414 return 33
415 }
416 }
417 // When none of the above special cases apply, encode the unmodified script preceded by the sum of its size and the
418 // number of special cases encoded as a variable length quantity.
419 encodedSize := uint64(len(pkScript) + numSpecialScripts)
420 vlqSizeLen := putVLQ(target, encodedSize)
421 copy(target[vlqSizeLen:], pkScript)
422 return vlqSizeLen + len(pkScript)
423 }
424 425 // putCompressedTxOut compresses the passed amount and script according to their domain specific compression algorithms
426 // and encodes them directly into the passed target byte slice with the format described above. The target byte slice
427 // must be at least large enough to handle the number of bytes returned by the compressedTxOutSize function or it will
428 // panic.
429 func putCompressedTxOut(target []byte, amount uint64, pkScript []byte) int {
430 offset := putVLQ(target, compressTxOutAmount(amount))
431 offset += putCompressedScript(target[offset:], pkScript)
432 return offset
433 }
434 435 // putVLQ serializes the provided number to a variable-length quantity according to the format described above and
436 // returns the number of bytes of the encoded value. The result is placed directly into the passed byte slice which must
437 // be at least large enough to handle the number of bytes returned by the serializeSizeVLQ function or it will panic.
438 func putVLQ(target []byte, n uint64) int {
439 offset := 0
440 for ; ; offset++ {
441 // The high bit is set when another byte follows.
442 highBitMask := byte(0x80)
443 if offset == 0 {
444 highBitMask = 0x00
445 }
446 target[offset] = byte(n&0x7f) | highBitMask
447 if n <= 0x7f {
448 break
449 }
450 n = (n >> 7) - 1
451 }
452 // Reverse the bytes so it is MSB-encoded.
453 for i, j := 0, offset; i < j; i, j = i+1, j-1 {
454 target[i], target[j] = target[j], target[i]
455 }
456 return offset + 1
457 }
458 459 // A variable length quantity (VLQ) is an encoding that uses an arbitrary number of binary octets to represent an
460 // arbitrarily large integer. The scheme employs a most significant byte (MSB) base-128 encoding where the high bit in
461 // each byte indicates whether or not the byte is the final one. In addition, to ensure there are no redundant
462 // encodings, an offset is subtracted every time a group of 7 bits is shifted out. Therefore each integer can be
463 // represented in exactly one way, and each representation stands for exactly one integer.
464 //
465 // Another nice property of this encoding is that it provides a compact representation of values that are typically used
466 // to indicate txsizes. For example, the values 0 - 127 are represented with a single byte, 128 - 16511 with two bytes,
467 // and 16512 - 2113663 with three bytes.
468 //
469 // While the encoding allows arbitrarily large integers, it is artificially limited in this code to an unsigned 64-bit
470 // integer for efficiency purposes.
471 //
472 // Example encodings:
473 //
474 // 0 -> [0x00]
475 // 127 -> [0x7f] * Max 1-byte value
476 // 128 -> [0x80 0x00]
477 // 129 -> [0x80 0x01]
478 // 255 -> [0x80 0x7f]
479 // 256 -> [0x81 0x00]
480 // 16511 -> [0xff 0x7f] * Max 2-byte value
481 // 16512 -> [0x80 0x80 0x00]
482 // 32895 -> [0x80 0xff 0x7f]
483 // 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value
484 // 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value
485 // 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f]
486 //
487 // References:
488 //
489 // https://en.wikipedia.org/wiki/Variable-length_quantity
490 // http://www.codecodex.com/wiki/Variable-Length_Integers
491 //
492 // -----------------------------------------------------------------------------
493 494 // serializeSizeVLQ returns the number of bytes it would take to serialize the passed number as a variable-length
495 // quantity according to the format described above.
496 func serializeSizeVLQ(n uint64) int {
497 size := 1
498 for ; n > 0x7f; n = (n >> 7) - 1 {
499 size++
500 }
501 return size
502 }
503