md5block_arm.s raw

   1  // Copyright 2013 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  //
   5  // ARM version of md5block.go
   6  
   7  //go:build !purego
   8  
   9  #include "textflag.h"
  10  
  11  // Register definitions
  12  #define Rtable	R0	// Pointer to MD5 constants table
  13  #define Rdata	R1	// Pointer to data to hash
  14  #define Ra	R2	// MD5 accumulator
  15  #define Rb	R3	// MD5 accumulator
  16  #define Rc	R4	// MD5 accumulator
  17  #define Rd	R5	// MD5 accumulator
  18  #define Rc0	R6	// MD5 constant
  19  #define Rc1	R7	// MD5 constant
  20  #define Rc2	R8	// MD5 constant
  21  // r9, r10 are forbidden
  22  // r11 is OK provided you check the assembler that no synthetic instructions use it
  23  #define Rc3	R11	// MD5 constant
  24  #define Rt0	R12	// temporary
  25  #define Rt1	R14	// temporary
  26  
  27  // func block(dig *digest, p []byte)
  28  // 0(FP) is *digest
  29  // 4(FP) is p.array (struct Slice)
  30  // 8(FP) is p.len
  31  //12(FP) is p.cap
  32  //
  33  // Stack frame
  34  #define p_end	end-4(SP)	// pointer to the end of data
  35  #define p_data	data-8(SP)	// current data pointer
  36  #define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
  37  		// 3 words at 4..12(R13) for called routine parameters
  38  
  39  TEXT	·block(SB), NOSPLIT, $84-16
  40  	MOVW	p+4(FP), Rdata	// pointer to the data
  41  	MOVW	p_len+8(FP), Rt0	// number of bytes
  42  	ADD	Rdata, Rt0
  43  	MOVW	Rt0, p_end	// pointer to end of data
  44  
  45  loop:
  46  	MOVW	Rdata, p_data	// Save Rdata
  47  	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
  48  	BEQ	aligned			// aligned detected - skip copy
  49  
  50  	// Copy the unaligned source data into the aligned temporary buffer
  51  	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
  52  	MOVW	$buf, Rtable	// to
  53  	MOVW	$64, Rc0		// n
  54  	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
  55  	BL	runtime·memmove(SB)
  56  
  57  	// Point to the local aligned copy of the data
  58  	MOVW	$buf, Rdata
  59  
  60  aligned:
  61  	// Point to the table of constants
  62  	// A PC relative add would be cheaper than this
  63  	MOVW	$·table(SB), Rtable
  64  
  65  	// Load up initial MD5 accumulator
  66  	MOVW	dig+0(FP), Rc0
  67  	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
  68  
  69  // a += (((c^d)&b)^d) + X[index] + const
  70  // a = a<<shift | a>>(32-shift) + b
  71  #define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
  72  	EOR	Rc, Rd, Rt0		; \
  73  	AND	Rb, Rt0			; \
  74  	EOR	Rd, Rt0			; \
  75  	MOVW	(index<<2)(Rdata), Rt1	; \
  76  	ADD	Rt1, Rt0			; \
  77  	ADD	Rconst, Rt0			; \
  78  	ADD	Rt0, Ra			; \
  79  	ADD	Ra@>(32-shift), Rb, Ra	;
  80  
  81  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
  82  	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
  83  	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
  84  	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
  85  	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
  86  
  87  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
  88  	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
  89  	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
  90  	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
  91  	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
  92  
  93  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
  94  	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
  95  	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
  96  	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
  97  	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
  98  
  99  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 100  	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
 101  	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
 102  	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
 103  	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
 104  
 105  // a += (((b^c)&d)^c) + X[index] + const
 106  // a = a<<shift | a>>(32-shift) + b
 107  #define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
 108  	EOR	Rb, Rc, Rt0		; \
 109  	AND	Rd, Rt0			; \
 110  	EOR	Rc, Rt0			; \
 111  	MOVW	(index<<2)(Rdata), Rt1	; \
 112  	ADD	Rt1, Rt0			; \
 113  	ADD	Rconst, Rt0			; \
 114  	ADD	Rt0, Ra			; \
 115  	ADD	Ra@>(32-shift), Rb, Ra	;
 116  
 117  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 118  	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
 119  	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
 120  	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
 121  	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
 122  
 123  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 124  	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
 125  	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
 126  	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
 127  	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
 128  
 129  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 130  	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
 131  	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
 132  	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
 133  	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
 134  
 135  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 136  	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
 137  	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
 138  	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
 139  	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
 140  
 141  // a += (b^c^d) + X[index] + const
 142  // a = a<<shift | a>>(32-shift) + b
 143  #define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
 144  	EOR	Rb, Rc, Rt0		; \
 145  	EOR	Rd, Rt0			; \
 146  	MOVW	(index<<2)(Rdata), Rt1	; \
 147  	ADD	Rt1, Rt0			; \
 148  	ADD	Rconst, Rt0			; \
 149  	ADD	Rt0, Ra			; \
 150  	ADD	Ra@>(32-shift), Rb, Ra	;
 151  
 152  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 153  	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
 154  	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
 155  	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
 156  	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
 157  
 158  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 159  	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
 160  	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
 161  	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
 162  	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
 163  
 164  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 165  	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
 166  	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
 167  	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
 168  	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
 169  
 170  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 171  	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
 172  	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
 173  	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
 174  	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
 175  
 176  // a += (c^(b|^d)) + X[index] + const
 177  // a = a<<shift | a>>(32-shift) + b
 178  #define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
 179  	MVN	Rd, Rt0			; \
 180  	ORR	Rb, Rt0			; \
 181  	EOR	Rc, Rt0			; \
 182  	MOVW	(index<<2)(Rdata), Rt1	; \
 183  	ADD	Rt1, Rt0			; \
 184  	ADD	Rconst, Rt0			; \
 185  	ADD	Rt0, Ra			; \
 186  	ADD	Ra@>(32-shift), Rb, Ra	;
 187  
 188  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 189  	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
 190  	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
 191  	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
 192  	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
 193  
 194  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 195  	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
 196  	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
 197  	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
 198  	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
 199  
 200  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 201  	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
 202  	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
 203  	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
 204  	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
 205  
 206  	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
 207  	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
 208  	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
 209  	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
 210  	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
 211  
 212  	MOVW	dig+0(FP), Rt0
 213  	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
 214  
 215  	ADD	Rc0, Ra
 216  	ADD	Rc1, Rb
 217  	ADD	Rc2, Rc
 218  	ADD	Rc3, Rd
 219  
 220  	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
 221  
 222  	MOVW	p_data, Rdata
 223  	MOVW	p_end, Rt0
 224  	ADD	$64, Rdata
 225  	CMP	Rt0, Rdata
 226  	BLO	loop
 227  
 228  	RET
 229  
 230  // MD5 constants table
 231  
 232  	// Round 1
 233  	DATA	·table+0x00(SB)/4, $0xd76aa478
 234  	DATA	·table+0x04(SB)/4, $0xe8c7b756
 235  	DATA	·table+0x08(SB)/4, $0x242070db
 236  	DATA	·table+0x0c(SB)/4, $0xc1bdceee
 237  	DATA	·table+0x10(SB)/4, $0xf57c0faf
 238  	DATA	·table+0x14(SB)/4, $0x4787c62a
 239  	DATA	·table+0x18(SB)/4, $0xa8304613
 240  	DATA	·table+0x1c(SB)/4, $0xfd469501
 241  	DATA	·table+0x20(SB)/4, $0x698098d8
 242  	DATA	·table+0x24(SB)/4, $0x8b44f7af
 243  	DATA	·table+0x28(SB)/4, $0xffff5bb1
 244  	DATA	·table+0x2c(SB)/4, $0x895cd7be
 245  	DATA	·table+0x30(SB)/4, $0x6b901122
 246  	DATA	·table+0x34(SB)/4, $0xfd987193
 247  	DATA	·table+0x38(SB)/4, $0xa679438e
 248  	DATA	·table+0x3c(SB)/4, $0x49b40821
 249  	// Round 2
 250  	DATA	·table+0x40(SB)/4, $0xf61e2562
 251  	DATA	·table+0x44(SB)/4, $0xc040b340
 252  	DATA	·table+0x48(SB)/4, $0x265e5a51
 253  	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
 254  	DATA	·table+0x50(SB)/4, $0xd62f105d
 255  	DATA	·table+0x54(SB)/4, $0x02441453
 256  	DATA	·table+0x58(SB)/4, $0xd8a1e681
 257  	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
 258  	DATA	·table+0x60(SB)/4, $0x21e1cde6
 259  	DATA	·table+0x64(SB)/4, $0xc33707d6
 260  	DATA	·table+0x68(SB)/4, $0xf4d50d87
 261  	DATA	·table+0x6c(SB)/4, $0x455a14ed
 262  	DATA	·table+0x70(SB)/4, $0xa9e3e905
 263  	DATA	·table+0x74(SB)/4, $0xfcefa3f8
 264  	DATA	·table+0x78(SB)/4, $0x676f02d9
 265  	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
 266  	// Round 3
 267  	DATA	·table+0x80(SB)/4, $0xfffa3942
 268  	DATA	·table+0x84(SB)/4, $0x8771f681
 269  	DATA	·table+0x88(SB)/4, $0x6d9d6122
 270  	DATA	·table+0x8c(SB)/4, $0xfde5380c
 271  	DATA	·table+0x90(SB)/4, $0xa4beea44
 272  	DATA	·table+0x94(SB)/4, $0x4bdecfa9
 273  	DATA	·table+0x98(SB)/4, $0xf6bb4b60
 274  	DATA	·table+0x9c(SB)/4, $0xbebfbc70
 275  	DATA	·table+0xa0(SB)/4, $0x289b7ec6
 276  	DATA	·table+0xa4(SB)/4, $0xeaa127fa
 277  	DATA	·table+0xa8(SB)/4, $0xd4ef3085
 278  	DATA	·table+0xac(SB)/4, $0x04881d05
 279  	DATA	·table+0xb0(SB)/4, $0xd9d4d039
 280  	DATA	·table+0xb4(SB)/4, $0xe6db99e5
 281  	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
 282  	DATA	·table+0xbc(SB)/4, $0xc4ac5665
 283  	// Round 4
 284  	DATA	·table+0xc0(SB)/4, $0xf4292244
 285  	DATA	·table+0xc4(SB)/4, $0x432aff97
 286  	DATA	·table+0xc8(SB)/4, $0xab9423a7
 287  	DATA	·table+0xcc(SB)/4, $0xfc93a039
 288  	DATA	·table+0xd0(SB)/4, $0x655b59c3
 289  	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
 290  	DATA	·table+0xd8(SB)/4, $0xffeff47d
 291  	DATA	·table+0xdc(SB)/4, $0x85845dd1
 292  	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
 293  	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
 294  	DATA	·table+0xe8(SB)/4, $0xa3014314
 295  	DATA	·table+0xec(SB)/4, $0x4e0811a1
 296  	DATA	·table+0xf0(SB)/4, $0xf7537e82
 297  	DATA	·table+0xf4(SB)/4, $0xbd3af235
 298  	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
 299  	DATA	·table+0xfc(SB)/4, $0xeb86d391
 300  	// Global definition
 301  	GLOBL	·table(SB),8,$256
 302