nat_s390x.s raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build !purego
   6  
   7  #include "textflag.h"
   8  
   9  // func addMulVVW1024(z, x *uint, y uint) (c uint)
  10  TEXT ·addMulVVW1024(SB), $0-32
  11  	MOVD	$16, R5
  12  	JMP		addMulVVWx(SB)
  13  
  14  // func addMulVVW1536(z, x *uint, y uint) (c uint)
  15  TEXT ·addMulVVW1536(SB), $0-32
  16  	MOVD	$24, R5
  17  	JMP		addMulVVWx(SB)
  18  
  19  // func addMulVVW2048(z, x *uint, y uint) (c uint)
  20  TEXT ·addMulVVW2048(SB), $0-32
  21  	MOVD	$32, R5
  22  	JMP		addMulVVWx(SB)
  23  
  24  TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
  25  	MOVD z+0(FP), R2
  26  	MOVD x+8(FP), R8
  27  	MOVD y+16(FP), R9
  28  
  29  	MOVD $0, R1 // i*8 = 0
  30  	MOVD $0, R7 // i = 0
  31  	MOVD $0, R0 // make sure it's zero
  32  	MOVD $0, R4 // c = 0
  33  
  34  	MOVD   R5, R12
  35  	AND    $-2, R12
  36  	CMPBGE R5, $2, A6
  37  	BR     E6
  38  
  39  A6:
  40  	MOVD   (R8)(R1*1), R6
  41  	MULHDU R9, R6
  42  	MOVD   (R2)(R1*1), R10
  43  	ADDC   R10, R11        // add to low order bits
  44  	ADDE   R0, R6
  45  	ADDC   R4, R11
  46  	ADDE   R0, R6
  47  	MOVD   R6, R4
  48  	MOVD   R11, (R2)(R1*1)
  49  
  50  	MOVD   (8)(R8)(R1*1), R6
  51  	MULHDU R9, R6
  52  	MOVD   (8)(R2)(R1*1), R10
  53  	ADDC   R10, R11           // add to low order bits
  54  	ADDE   R0, R6
  55  	ADDC   R4, R11
  56  	ADDE   R0, R6
  57  	MOVD   R6, R4
  58  	MOVD   R11, (8)(R2)(R1*1)
  59  
  60  	ADD $16, R1 // i*8 + 8
  61  	ADD $2, R7  // i++
  62  
  63  	CMPBLT R7, R12, A6
  64  	BR     E6
  65  
  66  L6:
  67  	// TODO: drop unused single-step loop.
  68  	MOVD   (R8)(R1*1), R6
  69  	MULHDU R9, R6
  70  	MOVD   (R2)(R1*1), R10
  71  	ADDC   R10, R11        // add to low order bits
  72  	ADDE   R0, R6
  73  	ADDC   R4, R11
  74  	ADDE   R0, R6
  75  	MOVD   R6, R4
  76  	MOVD   R11, (R2)(R1*1)
  77  
  78  	ADD $8, R1 // i*8 + 8
  79  	ADD $1, R7 // i++
  80  
  81  E6:
  82  	CMPBLT R7, R5, L6 // i < n
  83  
  84  	MOVD R4, c+24(FP)
  85  	RET
  86