sum_ppc64x.s raw

   1  // Copyright 2019 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build gc && !purego && (ppc64 || ppc64le)
   6  
   7  #include "textflag.h"
   8  
   9  // This was ported from the amd64 implementation.
  10  
  11  #ifdef GOARCH_ppc64le
  12  #define LE_MOVD MOVD
  13  #define LE_MOVWZ MOVWZ
  14  #define LE_MOVHZ MOVHZ
  15  #else
  16  #define LE_MOVD MOVDBR
  17  #define LE_MOVWZ MOVWBR
  18  #define LE_MOVHZ MOVHBR
  19  #endif
  20  
  21  #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
  22  	LE_MOVD (msg)( R0), t0; \
  23  	LE_MOVD (msg)(R24), t1; \
  24  	MOVD $1, t2;     \
  25  	ADDC t0, h0, h0; \
  26  	ADDE t1, h1, h1; \
  27  	ADDE t2, h2;     \
  28  	ADD  $16, msg
  29  
  30  #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
  31  	MULLD  r0, h0, t0;  \
  32  	MULHDU r0, h0, t1;  \
  33  	MULLD  r0, h1, t4;  \
  34  	MULHDU r0, h1, t5;  \
  35  	ADDC   t4, t1, t1;  \
  36  	MULLD  r0, h2, t2;  \
  37  	MULHDU r1, h0, t4;  \
  38  	MULLD  r1, h0, h0;  \
  39  	ADDE   t5, t2, t2;  \
  40  	ADDC   h0, t1, t1;  \
  41  	MULLD  h2, r1, t3;  \
  42  	ADDZE  t4, h0;      \
  43  	MULHDU r1, h1, t5;  \
  44  	MULLD  r1, h1, t4;  \
  45  	ADDC   t4, t2, t2;  \
  46  	ADDE   t5, t3, t3;  \
  47  	ADDC   h0, t2, t2;  \
  48  	MOVD   $-4, t4;     \
  49  	ADDZE  t3;          \
  50  	RLDICL $0, t2, $62, h2; \
  51  	AND    t2, t4, h0;  \
  52  	ADDC   t0, h0, h0;  \
  53  	ADDE   t3, t1, h1;  \
  54  	SLD    $62, t3, t4; \
  55  	SRD    $2, t2;      \
  56  	ADDZE  h2;          \
  57  	OR     t4, t2, t2;  \
  58  	SRD    $2, t3;      \
  59  	ADDC   t2, h0, h0;  \
  60  	ADDE   t3, h1, h1;  \
  61  	ADDZE  h2
  62  
  63  // func update(state *[7]uint64, msg []byte)
  64  TEXT ·update(SB), $0-32
  65  	MOVD state+0(FP), R3
  66  	MOVD msg_base+8(FP), R4
  67  	MOVD msg_len+16(FP), R5
  68  
  69  	MOVD 0(R3), R8   // h0
  70  	MOVD 8(R3), R9   // h1
  71  	MOVD 16(R3), R10 // h2
  72  	MOVD 24(R3), R11 // r0
  73  	MOVD 32(R3), R12 // r1
  74  
  75  	MOVD $8, R24
  76  
  77  	CMP R5, $16
  78  	BLT bytes_between_0_and_15
  79  
  80  loop:
  81  	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
  82  
  83  	PCALIGN $16
  84  multiply:
  85  	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
  86  	ADD $-16, R5
  87  	CMP R5, $16
  88  	BGE loop
  89  
  90  bytes_between_0_and_15:
  91  	CMP  R5, $0
  92  	BEQ  done
  93  	MOVD $0, R16 // h0
  94  	MOVD $0, R17 // h1
  95  
  96  flush_buffer:
  97  	CMP R5, $8
  98  	BLE just1
  99  
 100  	MOVD $8, R21
 101  	SUB  R21, R5, R21
 102  
 103  	// Greater than 8 -- load the rightmost remaining bytes in msg
 104  	// and put into R17 (h1)
 105  	LE_MOVD (R4)(R21), R17
 106  	MOVD $16, R22
 107  
 108  	// Find the offset to those bytes
 109  	SUB R5, R22, R22
 110  	SLD $3, R22
 111  
 112  	// Shift to get only the bytes in msg
 113  	SRD R22, R17, R17
 114  
 115  	// Put 1 at high end
 116  	MOVD $1, R23
 117  	SLD  $3, R21
 118  	SLD  R21, R23, R23
 119  	OR   R23, R17, R17
 120  
 121  	// Remainder is 8
 122  	MOVD $8, R5
 123  
 124  just1:
 125  	CMP R5, $8
 126  	BLT less8
 127  
 128  	// Exactly 8
 129  	LE_MOVD (R4), R16
 130  
 131  	CMP R17, $0
 132  
 133  	// Check if we've already set R17; if not
 134  	// set 1 to indicate end of msg.
 135  	BNE  carry
 136  	MOVD $1, R17
 137  	BR   carry
 138  
 139  less8:
 140  	MOVD  $0, R16   // h0
 141  	MOVD  $0, R22   // shift count
 142  	CMP   R5, $4
 143  	BLT   less4
 144  	LE_MOVWZ (R4), R16
 145  	ADD   $4, R4
 146  	ADD   $-4, R5
 147  	MOVD  $32, R22
 148  
 149  less4:
 150  	CMP   R5, $2
 151  	BLT   less2
 152  	LE_MOVHZ (R4), R21
 153  	SLD   R22, R21, R21
 154  	OR    R16, R21, R16
 155  	ADD   $16, R22
 156  	ADD   $-2, R5
 157  	ADD   $2, R4
 158  
 159  less2:
 160  	CMP   R5, $0
 161  	BEQ   insert1
 162  	MOVBZ (R4), R21
 163  	SLD   R22, R21, R21
 164  	OR    R16, R21, R16
 165  	ADD   $8, R22
 166  
 167  insert1:
 168  	// Insert 1 at end of msg
 169  	MOVD $1, R21
 170  	SLD  R22, R21, R21
 171  	OR   R16, R21, R16
 172  
 173  carry:
 174  	// Add new values to h0, h1, h2
 175  	ADDC  R16, R8
 176  	ADDE  R17, R9
 177  	ADDZE R10, R10
 178  	MOVD  $16, R5
 179  	ADD   R5, R4
 180  	BR    multiply
 181  
 182  done:
 183  	// Save h0, h1, h2 in state
 184  	MOVD R8, 0(R3)
 185  	MOVD R9, 8(R3)
 186  	MOVD R10, 16(R3)
 187  	RET
 188