1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package asmgen
6 7 // shiftVU generates lshVU and rshVU, which do
8 // z, c = x << s and z, c = x >> s, for 0 < s < _W.
9 func shiftVU(a *Asm, name []byte) {
10 // Because these routines can be called for z.Lsh(z, N) and z.Rsh(z, N),
11 // the input and output slices may be aliased at different offsets.
12 // For example (on 64-bit systems), during z.Lsh(z, 65), &z[0] == &x[1],
13 // and during z.Rsh(z, 65), &z[1] == &x[0].
14 // For left shift, we must process the slices from len(z)-1 down to 0,
15 // so that we don't overwrite a word before we need to read it.
16 // For right shift, we must process the slices from 0 up to len(z)-1.
17 // The different traversals at least make the two cases more consistent,
18 // since we're always delaying the output by one word compared
19 // to the input.
20 21 f := a.Func("func " + name + "(z, x []Word, s uint) (c Word)")
22 23 // Check for no input early, since we need to start by reading 1 word.
24 n := f.Arg("z_len")
25 a.JmpZero(n, "ret0")
26 27 // Start loop by reading first input word.
28 s := f.ArgHint("s", HintShiftCount)
29 p := f.Pipe()
30 if name == "lshVU" {
31 p.SetBackward()
32 }
33 unroll := []int{1, 4}
34 if a.Arch == Arch386 {
35 unroll = []int{1} // too few registers for more
36 p.SetUseIndexCounter()
37 }
38 p.LoadPtrs(n)
39 a.Comment("shift first word into carry")
40 prev := p.LoadN(1)[0][0]
41 42 // Decide how to shift. On systems with a wide shift (x86), use that.
43 // Otherwise, we need shift by s and negative (reverse) shift by 64-s or 32-s.
44 shift := a.Lsh
45 shiftWide := a.LshWide
46 negShift := a.Rsh
47 negShiftReg := a.RshReg
48 if name == "rshVU" {
49 shift = a.Rsh
50 shiftWide = a.RshWide
51 negShift = a.Lsh
52 negShiftReg = a.LshReg
53 }
54 if a.Arch.HasShiftWide() {
55 // Use wide shift to avoid needing negative shifts.
56 // The invariant is that prev holds the previous word (not shifted at all),
57 // to be used as input into the wide shift.
58 // After the loop finishes, prev holds the final output word to be written.
59 c := a.Reg()
60 shiftWide(s, prev, a.Imm(0), c)
61 f.StoreArg(c, "c")
62 a.Free(c)
63 a.Comment("shift remaining words")
64 p.Start(n, unroll...)
65 p.Loop(func(in [][]Reg, out [][]Reg) {
66 // We reuse the input registers as output, delayed one cycle; prev is the first output.
67 // After writing the outputs to memory, we can copy the final x value into prev
68 // for the next iteration.
69 old := prev
70 for i, x := range in[0] {
71 shiftWide(s, x, old, old)
72 out[0][i] = old
73 old = x
74 }
75 p.StoreN(out)
76 a.Mov(old, prev)
77 })
78 a.Comment("store final shifted bits")
79 shift(s, prev, prev)
80 } else {
81 // Construct values from x << s and x >> (64-s).
82 // After the first word has been processed, the invariant is that
83 // prev holds x << s, to be used as the high bits of the next output word,
84 // once we find the low bits after reading the next input word.
85 // After the loop finishes, prev holds the final output word to be written.
86 sNeg := a.Reg()
87 a.Mov(a.Imm(a.Arch.WordBits), sNeg)
88 a.Sub(s, sNeg, sNeg, SmashCarry)
89 c := a.Reg()
90 negShift(sNeg, prev, c)
91 shift(s, prev, prev)
92 f.StoreArg(c, "c")
93 a.Free(c)
94 a.Comment("shift remaining words")
95 p.Start(n, unroll...)
96 p.Loop(func(in, out [][]Reg) {
97 if a.HasRegShift() {
98 // ARM (32-bit) allows shifts in most arithmetic expressions,
99 // including OR, letting us combine the negShift and a.Or.
100 // The simplest way to manage the registers is to do StoreN for
101 // one output at a time, and since we don't use multi-register
102 // stores on ARM, that doesn't hurt us.
103 out[0] = out[0][:1]
104 for _, x := range in[0] {
105 a.Or(negShiftReg(sNeg, x), prev, prev)
106 out[0][0] = prev
107 p.StoreN(out)
108 shift(s, x, prev)
109 }
110 return
111 }
112 // We reuse the input registers as output, delayed one cycle; z0 is the first output.
113 z0 := a.Reg()
114 z := z0
115 for i, x := range in[0] {
116 negShift(sNeg, x, z)
117 a.Or(prev, z, z)
118 shift(s, x, prev)
119 out[0][i] = z
120 z = x
121 }
122 p.StoreN(out)
123 })
124 a.Comment("store final shifted bits")
125 }
126 p.StoreN([][]Reg{{prev}})
127 p.Done()
128 a.Free(s)
129 a.Ret()
130 131 // Return 0, used from above.
132 a.Label("ret0")
133 f.StoreArg(a.Imm(0), "c")
134 a.Ret()
135 }
136