secalloc_chacha.mx raw

   1  //go:build darwin || (linux && !baremetal && !wasip1 && !wasm_unknown && !wasip2 && !nintendoswitch)
   2  
   3  package runtime
   4  
   5  // ChaCha12 keystream generator used by the secure allocator to fill its
   6  // noise buffer once at process init.
   7  //
   8  // This is a minimal port of github.com/aead/chacha20/chacha (generic path
   9  // only). It lives inside the runtime package because importing a separate
  10  // frand package would create a cycle: runtime → frand → encoding/binary
  11  // → errors → runtime. The chacha primitive has no external dependencies,
  12  // so it sits next to the secure allocator with no fan-out.
  13  //
  14  // Only the bare-minimum surface is exposed to secalloc.mx:
  15  //   - secChachaXORKeyStream(dst, src, nonce, key, rounds)
  16  // 8-byte nonce only, no XChaCha20, no IETF 96-bit nonce, no Cipher struct.
  17  
  18  const (
  19  	secChachaKeySize   = 32
  20  	secChachaNonceSize = 8
  21  )
  22  
  23  func _secChachaSigma() [4]uint32 {
  24  	return [4]uint32{0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}
  25  }
  26  
  27  func secChachaXORKeyStream(dst, src, nonce, key []byte, rounds int) {
  28  	if rounds != 20 && rounds != 12 && rounds != 8 {
  29  		runtimePanic("secalloc: bad chacha rounds")
  30  	}
  31  	if len(key) != secChachaKeySize {
  32  		runtimePanic("secalloc: bad chacha key length")
  33  	}
  34  	if len(nonce) != secChachaNonceSize {
  35  		runtimePanic("secalloc: bad chacha nonce length")
  36  	}
  37  	if len(dst) < len(src) {
  38  		runtimePanic("secalloc: chacha dst too small")
  39  	}
  40  
  41  	var state [64]byte
  42  	var block [64]byte
  43  
  44  	sigma := _secChachaSigma()
  45  	secPutUint32LE(state[0:], sigma[0])
  46  	secPutUint32LE(state[4:], sigma[1])
  47  	secPutUint32LE(state[8:], sigma[2])
  48  	secPutUint32LE(state[12:], sigma[3])
  49  	copy(state[16:48], key)
  50  	copy(state[56:64], nonce)
  51  
  52  	for len(src) >= 64 {
  53  		secChachaBlock(&block, &state, rounds)
  54  		for i, v := range block {
  55  			dst[i] = src[i] ^ v
  56  		}
  57  		src = src[64:]
  58  		dst = dst[64:]
  59  	}
  60  	if n := len(src); n > 0 {
  61  		secChachaBlock(&block, &state, rounds)
  62  		for i := 0; i < n; i++ {
  63  			dst[i] = src[i] ^ block[i]
  64  		}
  65  	}
  66  }
  67  
  68  // secChachaBlock runs the ChaCha core on state, writes 64 bytes of
  69  // keystream to dst, and increments the 64-bit counter at state[48:56].
  70  func secChachaBlock(dst *[64]byte, state *[64]byte, rounds int) {
  71  	v00 := secLoadUint32LE(state[0:])
  72  	v01 := secLoadUint32LE(state[4:])
  73  	v02 := secLoadUint32LE(state[8:])
  74  	v03 := secLoadUint32LE(state[12:])
  75  	v04 := secLoadUint32LE(state[16:])
  76  	v05 := secLoadUint32LE(state[20:])
  77  	v06 := secLoadUint32LE(state[24:])
  78  	v07 := secLoadUint32LE(state[28:])
  79  	v08 := secLoadUint32LE(state[32:])
  80  	v09 := secLoadUint32LE(state[36:])
  81  	v10 := secLoadUint32LE(state[40:])
  82  	v11 := secLoadUint32LE(state[44:])
  83  	v12 := secLoadUint32LE(state[48:])
  84  	v13 := secLoadUint32LE(state[52:])
  85  	v14 := secLoadUint32LE(state[56:])
  86  	v15 := secLoadUint32LE(state[60:])
  87  
  88  	s00, s01, s02, s03, s04, s05, s06, s07 := v00, v01, v02, v03, v04, v05, v06, v07
  89  	s08, s09, s10, s11, s12, s13, s14, s15 := v08, v09, v10, v11, v12, v13, v14, v15
  90  
  91  	for i := 0; i < rounds; i += 2 {
  92  		// Column round.
  93  		v00 += v04
  94  		v12 ^= v00
  95  		v12 = (v12 << 16) | (v12 >> 16)
  96  		v08 += v12
  97  		v04 ^= v08
  98  		v04 = (v04 << 12) | (v04 >> 20)
  99  		v00 += v04
 100  		v12 ^= v00
 101  		v12 = (v12 << 8) | (v12 >> 24)
 102  		v08 += v12
 103  		v04 ^= v08
 104  		v04 = (v04 << 7) | (v04 >> 25)
 105  
 106  		v01 += v05
 107  		v13 ^= v01
 108  		v13 = (v13 << 16) | (v13 >> 16)
 109  		v09 += v13
 110  		v05 ^= v09
 111  		v05 = (v05 << 12) | (v05 >> 20)
 112  		v01 += v05
 113  		v13 ^= v01
 114  		v13 = (v13 << 8) | (v13 >> 24)
 115  		v09 += v13
 116  		v05 ^= v09
 117  		v05 = (v05 << 7) | (v05 >> 25)
 118  
 119  		v02 += v06
 120  		v14 ^= v02
 121  		v14 = (v14 << 16) | (v14 >> 16)
 122  		v10 += v14
 123  		v06 ^= v10
 124  		v06 = (v06 << 12) | (v06 >> 20)
 125  		v02 += v06
 126  		v14 ^= v02
 127  		v14 = (v14 << 8) | (v14 >> 24)
 128  		v10 += v14
 129  		v06 ^= v10
 130  		v06 = (v06 << 7) | (v06 >> 25)
 131  
 132  		v03 += v07
 133  		v15 ^= v03
 134  		v15 = (v15 << 16) | (v15 >> 16)
 135  		v11 += v15
 136  		v07 ^= v11
 137  		v07 = (v07 << 12) | (v07 >> 20)
 138  		v03 += v07
 139  		v15 ^= v03
 140  		v15 = (v15 << 8) | (v15 >> 24)
 141  		v11 += v15
 142  		v07 ^= v11
 143  		v07 = (v07 << 7) | (v07 >> 25)
 144  
 145  		// Diagonal round.
 146  		v00 += v05
 147  		v15 ^= v00
 148  		v15 = (v15 << 16) | (v15 >> 16)
 149  		v10 += v15
 150  		v05 ^= v10
 151  		v05 = (v05 << 12) | (v05 >> 20)
 152  		v00 += v05
 153  		v15 ^= v00
 154  		v15 = (v15 << 8) | (v15 >> 24)
 155  		v10 += v15
 156  		v05 ^= v10
 157  		v05 = (v05 << 7) | (v05 >> 25)
 158  
 159  		v01 += v06
 160  		v12 ^= v01
 161  		v12 = (v12 << 16) | (v12 >> 16)
 162  		v11 += v12
 163  		v06 ^= v11
 164  		v06 = (v06 << 12) | (v06 >> 20)
 165  		v01 += v06
 166  		v12 ^= v01
 167  		v12 = (v12 << 8) | (v12 >> 24)
 168  		v11 += v12
 169  		v06 ^= v11
 170  		v06 = (v06 << 7) | (v06 >> 25)
 171  
 172  		v02 += v07
 173  		v13 ^= v02
 174  		v13 = (v13 << 16) | (v13 >> 16)
 175  		v08 += v13
 176  		v07 ^= v08
 177  		v07 = (v07 << 12) | (v07 >> 20)
 178  		v02 += v07
 179  		v13 ^= v02
 180  		v13 = (v13 << 8) | (v13 >> 24)
 181  		v08 += v13
 182  		v07 ^= v08
 183  		v07 = (v07 << 7) | (v07 >> 25)
 184  
 185  		v03 += v04
 186  		v14 ^= v03
 187  		v14 = (v14 << 16) | (v14 >> 16)
 188  		v09 += v14
 189  		v04 ^= v09
 190  		v04 = (v04 << 12) | (v04 >> 20)
 191  		v03 += v04
 192  		v14 ^= v03
 193  		v14 = (v14 << 8) | (v14 >> 24)
 194  		v09 += v14
 195  		v04 ^= v09
 196  		v04 = (v04 << 7) | (v04 >> 25)
 197  	}
 198  
 199  	v00 += s00
 200  	v01 += s01
 201  	v02 += s02
 202  	v03 += s03
 203  	v04 += s04
 204  	v05 += s05
 205  	v06 += s06
 206  	v07 += s07
 207  	v08 += s08
 208  	v09 += s09
 209  	v10 += s10
 210  	v11 += s11
 211  	v12 += s12
 212  	v13 += s13
 213  	v14 += s14
 214  	v15 += s15
 215  
 216  	// Increment 64-bit counter at state[48:56].
 217  	s12++
 218  	secPutUint32LE(state[48:], s12)
 219  	if s12 == 0 {
 220  		s13++
 221  		secPutUint32LE(state[52:], s13)
 222  	}
 223  
 224  	secPutUint32LE(dst[0:], v00)
 225  	secPutUint32LE(dst[4:], v01)
 226  	secPutUint32LE(dst[8:], v02)
 227  	secPutUint32LE(dst[12:], v03)
 228  	secPutUint32LE(dst[16:], v04)
 229  	secPutUint32LE(dst[20:], v05)
 230  	secPutUint32LE(dst[24:], v06)
 231  	secPutUint32LE(dst[28:], v07)
 232  	secPutUint32LE(dst[32:], v08)
 233  	secPutUint32LE(dst[36:], v09)
 234  	secPutUint32LE(dst[40:], v10)
 235  	secPutUint32LE(dst[44:], v11)
 236  	secPutUint32LE(dst[48:], v12)
 237  	secPutUint32LE(dst[52:], v13)
 238  	secPutUint32LE(dst[56:], v14)
 239  	secPutUint32LE(dst[60:], v15)
 240  }
 241  
 242  func secLoadUint32LE(b []byte) uint32 {
 243  	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
 244  }
 245  
 246  func secPutUint32LE(b []byte, v uint32) {
 247  	b[0] = byte(v)
 248  	b[1] = byte(v >> 8)
 249  	b[2] = byte(v >> 16)
 250  	b[3] = byte(v >> 24)
 251  }
 252