sha256block_arm64.s raw

   1  //+build !noasm,!appengine,gc
   2  
   3  // ARM64 version of SHA256
   4  
   5  //
   6  // Minio Cloud Storage, (C) 2016 Minio, Inc.
   7  //
   8  // Licensed under the Apache License, Version 2.0 (the "License");
   9  // you may not use this file except in compliance with the License.
  10  // You may obtain a copy of the License at
  11  //
  12  //     http://www.apache.org/licenses/LICENSE-2.0
  13  //
  14  // Unless required by applicable law or agreed to in writing, software
  15  // distributed under the License is distributed on an "AS IS" BASIS,
  16  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  // See the License for the specific language governing permissions and
  18  // limitations under the License.
  19  //
  20  
  21  //
  22  // Based on implementation as found in https://github.com/jocover/sha256-armv8
  23  //
  24  // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
  25  // their Plan9 equivalents
  26  //
  27  
  28  TEXT ·blockArmSha2(SB), 7, $0
  29  	MOVD h+0(FP), R0
  30  	MOVD message+24(FP), R1
  31  	MOVD message_len+32(FP), R2 // length of message
  32  	SUBS $64, R2
  33  	BMI  complete
  34  
  35  	// Load constants table pointer
  36  	MOVD $·constants(SB), R3
  37  
  38  	// Cache constants table in registers v16 - v31
  39  	WORD $0x4cdf2870 // ld1	{v16.4s-v19.4s}, [x3], #64
  40  	WORD $0x4cdf7800 // ld1	{v0.4s}, [x0], #16
  41  	WORD $0x4cdf2874 // ld1	{v20.4s-v23.4s}, [x3], #64
  42  
  43  	WORD $0x4c407801 // ld1	{v1.4s}, [x0]
  44  	WORD $0x4cdf2878 // ld1	{v24.4s-v27.4s}, [x3], #64
  45  	WORD $0xd1004000 // sub	x0, x0, #0x10
  46  	WORD $0x4cdf287c // ld1	{v28.4s-v31.4s}, [x3], #64
  47  
  48  loop:
  49  	// Main loop
  50  	WORD $0x4cdf2025 // ld1	{v5.16b-v8.16b}, [x1], #64
  51  	WORD $0x4ea01c02 // mov	v2.16b, v0.16b
  52  	WORD $0x4ea11c23 // mov	v3.16b, v1.16b
  53  	WORD $0x6e2008a5 // rev32	v5.16b, v5.16b
  54  	WORD $0x6e2008c6 // rev32	v6.16b, v6.16b
  55  	WORD $0x4eb084a9 // add	v9.4s, v5.4s, v16.4s
  56  	WORD $0x6e2008e7 // rev32	v7.16b, v7.16b
  57  	WORD $0x4eb184ca // add	v10.4s, v6.4s, v17.4s
  58  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  59  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
  60  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
  61  	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
  62  	WORD $0x6e200908 // rev32	v8.16b, v8.16b
  63  	WORD $0x4eb284e9 // add	v9.4s, v7.4s, v18.4s
  64  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  65  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
  66  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
  67  	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
  68  	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
  69  	WORD $0x4eb3850a // add	v10.4s, v8.4s, v19.4s
  70  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  71  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
  72  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
  73  	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
  74  	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
  75  	WORD $0x4eb484a9 // add	v9.4s, v5.4s, v20.4s
  76  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  77  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
  78  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
  79  	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
  80  	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
  81  	WORD $0x4eb584ca // add	v10.4s, v6.4s, v21.4s
  82  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  83  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
  84  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
  85  	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
  86  	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
  87  	WORD $0x4eb684e9 // add	v9.4s, v7.4s, v22.4s
  88  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  89  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
  90  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
  91  	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
  92  	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
  93  	WORD $0x4eb7850a // add	v10.4s, v8.4s, v23.4s
  94  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
  95  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
  96  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
  97  	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
  98  	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
  99  	WORD $0x4eb884a9 // add	v9.4s, v5.4s, v24.4s
 100  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 101  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
 102  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
 103  	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
 104  	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
 105  	WORD $0x4eb984ca // add	v10.4s, v6.4s, v25.4s
 106  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 107  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
 108  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
 109  	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
 110  	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
 111  	WORD $0x4eba84e9 // add	v9.4s, v7.4s, v26.4s
 112  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 113  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
 114  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
 115  	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
 116  	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
 117  	WORD $0x4ebb850a // add	v10.4s, v8.4s, v27.4s
 118  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 119  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
 120  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
 121  	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
 122  	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
 123  	WORD $0x4ebc84a9 // add	v9.4s, v5.4s, v28.4s
 124  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 125  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
 126  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
 127  	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
 128  	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
 129  	WORD $0x4ebd84ca // add	v10.4s, v6.4s, v29.4s
 130  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 131  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
 132  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
 133  	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
 134  	WORD $0x4ebe84e9 // add	v9.4s, v7.4s, v30.4s
 135  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 136  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
 137  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
 138  	WORD $0x4ebf850a // add	v10.4s, v8.4s, v31.4s
 139  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 140  	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
 141  	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
 142  	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
 143  	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
 144  	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
 145  	WORD $0x4ea38421 // add	v1.4s, v1.4s, v3.4s
 146  	WORD $0x4ea28400 // add	v0.4s, v0.4s, v2.4s
 147  
 148  	SUBS $64, R2
 149  	BPL  loop
 150  
 151  	// Store result
 152  	WORD $0x4c00a800 // st1	{v0.4s, v1.4s}, [x0]
 153  
 154  complete:
 155  	RET
 156  
 157  // Constants table
 158  DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
 159  DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
 160  DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
 161  DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
 162  DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
 163  DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
 164  DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
 165  DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
 166  DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
 167  DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
 168  DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
 169  DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
 170  DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
 171  DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
 172  DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
 173  DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
 174  DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
 175  DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
 176  DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
 177  DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
 178  DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
 179  DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
 180  DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
 181  DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
 182  DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
 183  DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
 184  DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
 185  DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
 186  DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
 187  DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
 188  DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
 189  DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
 190  
 191  GLOBL ·constants(SB), 8, $256
 192  
 193