1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // This file provides Go implementations of elementary multi-precision
6 // arithmetic operations on word vectors. These have the suffix _g.
7 // These are needed for platforms without assembly implementations of these routines.
8 // This file also contains elementary operations that can be implemented
9 // sufficiently efficiently in Go.
10 11 package big
12 13 import (
14 "math/bits"
15 _ "unsafe" // for go:linkname
16 )
17 18 // A Word represents a single digit of a multi-precision unsigned integer.
19 type Word uint
20 21 const (
22 _S = _W / 8 // word size in bytes
23 24 _W = bits.UintSize // word size in bits
25 _B = 1 << _W // digit base
26 _M = _B - 1 // digit mask
27 )
28 29 // In these routines, it is the caller's responsibility to arrange for
30 // x, y, and z to all have the same length. We check this and panic.
31 // The assembly versions of these routines do not include that check.
32 //
33 // The check+panic also has the effect of teaching the compiler that
34 // “i in range for z” implies “i in range for x and y”, eliminating all
35 // bounds checks in loops from 0 to len(z) and vice versa.
36 37 // ----------------------------------------------------------------------------
38 // Elementary operations on words
39 //
40 // These operations are used by the vector operations below.
41 42 // z1<<_W + z0 = x*y
43 func mulWW(x, y Word) (z1, z0 Word) {
44 hi, lo := bits.Mul(uint(x), uint(y))
45 return Word(hi), Word(lo)
46 }
47 48 // z1<<_W + z0 = x*y + c
49 func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
50 hi, lo := bits.Mul(uint(x), uint(y))
51 var cc uint
52 lo, cc = bits.Add(lo, uint(c), 0)
53 return Word(hi + cc), Word(lo)
54 }
55 56 // nlz returns the number of leading zeros in x.
57 // Wraps bits.LeadingZeros call for convenience.
58 func nlz(x Word) uint {
59 return uint(bits.LeadingZeros(uint(x)))
60 }
61 62 // The resulting carry c is either 0 or 1.
63 func addVV_g(z, x, y []Word) (c Word) {
64 if len(x) != len(z) || len(y) != len(z) {
65 panic("addVV len")
66 }
67 68 for i := range z {
69 zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
70 z[i] = Word(zi)
71 c = Word(cc)
72 }
73 return
74 }
75 76 // The resulting carry c is either 0 or 1.
77 func subVV_g(z, x, y []Word) (c Word) {
78 if len(x) != len(z) || len(y) != len(z) {
79 panic("subVV len")
80 }
81 82 for i := range z {
83 zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
84 z[i] = Word(zi)
85 c = Word(cc)
86 }
87 return
88 }
89 90 // addVW sets z = x + y, returning the final carry c.
91 // The behavior is undefined if len(x) != len(z).
92 // If len(z) == 0, c = y; otherwise, c is 0 or 1.
93 //
94 // addVW should be an internal detail,
95 // but widely used packages access it using linkname.
96 // Notable members of the hall of shame include:
97 // - github.com/remyoudompheng/bigfft
98 //
99 // Do not remove or change the type signature.
100 // See go.dev/issue/67401.
101 //
102 //go:linkname addVW
103 func addVW(z, x []Word, y Word) (c Word) {
104 if len(x) != len(z) {
105 panic("addVW len")
106 }
107 108 if len(z) == 0 {
109 return y
110 }
111 zi, cc := bits.Add(uint(x[0]), uint(y), 0)
112 z[0] = Word(zi)
113 if cc == 0 {
114 if &z[0] != &x[0] {
115 copy(z[1:], x[1:])
116 }
117 return 0
118 }
119 for i := 1; i < len(z); i++ {
120 xi := x[i]
121 if xi != ^Word(0) {
122 z[i] = xi + 1
123 if &z[0] != &x[0] {
124 copy(z[i+1:], x[i+1:])
125 }
126 return 0
127 }
128 z[i] = 0
129 }
130 return 1
131 }
132 133 // addVW_ref is the reference implementation for addVW, used only for testing.
134 func addVW_ref(z, x []Word, y Word) (c Word) {
135 c = y
136 for i := range z {
137 zi, cc := bits.Add(uint(x[i]), uint(c), 0)
138 z[i] = Word(zi)
139 c = Word(cc)
140 }
141 return
142 }
143 144 // subVW sets z = x - y, returning the final carry c.
145 // The behavior is undefined if len(x) != len(z).
146 // If len(z) == 0, c = y; otherwise, c is 0 or 1.
147 //
148 // subVW should be an internal detail,
149 // but widely used packages access it using linkname.
150 // Notable members of the hall of shame include:
151 // - github.com/remyoudompheng/bigfft
152 //
153 // Do not remove or change the type signature.
154 // See go.dev/issue/67401.
155 //
156 //go:linkname subVW
157 func subVW(z, x []Word, y Word) (c Word) {
158 if len(x) != len(z) {
159 panic("subVW len")
160 }
161 162 if len(z) == 0 {
163 return y
164 }
165 zi, cc := bits.Sub(uint(x[0]), uint(y), 0)
166 z[0] = Word(zi)
167 if cc == 0 {
168 if &z[0] != &x[0] {
169 copy(z[1:], x[1:])
170 }
171 return 0
172 }
173 for i := 1; i < len(z); i++ {
174 xi := x[i]
175 if xi != 0 {
176 z[i] = xi - 1
177 if &z[0] != &x[0] {
178 copy(z[i+1:], x[i+1:])
179 }
180 return 0
181 }
182 z[i] = ^Word(0)
183 }
184 return 1
185 }
186 187 // subVW_ref is the reference implementation for subVW, used only for testing.
188 func subVW_ref(z, x []Word, y Word) (c Word) {
189 c = y
190 for i := range z {
191 zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
192 z[i] = Word(zi)
193 c = Word(cc)
194 }
195 return c
196 }
197 198 func lshVU_g(z, x []Word, s uint) (c Word) {
199 if len(x) != len(z) {
200 panic("lshVU len")
201 }
202 203 if s == 0 {
204 copy(z, x)
205 return
206 }
207 if len(z) == 0 {
208 return
209 }
210 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
211 ŝ := _W - s
212 ŝ &= _W - 1 // ditto
213 c = x[len(z)-1] >> ŝ
214 for i := len(z) - 1; i > 0; i-- {
215 z[i] = x[i]<<s | x[i-1]>>ŝ
216 }
217 z[0] = x[0] << s
218 return
219 }
220 221 func rshVU_g(z, x []Word, s uint) (c Word) {
222 if len(x) != len(z) {
223 panic("rshVU len")
224 }
225 226 if s == 0 {
227 copy(z, x)
228 return
229 }
230 if len(z) == 0 {
231 return
232 }
233 s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
234 ŝ := _W - s
235 ŝ &= _W - 1 // ditto
236 c = x[0] << ŝ
237 for i := 1; i < len(z); i++ {
238 z[i-1] = x[i-1]>>s | x[i]<<ŝ
239 }
240 z[len(z)-1] = x[len(z)-1] >> s
241 return
242 }
243 244 func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
245 if len(x) != len(z) {
246 panic("mulAddVWW len")
247 }
248 c = r
249 for i := range z {
250 c, z[i] = mulAddWWW_g(x[i], y, c)
251 }
252 return
253 }
254 255 func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
256 if len(x) != len(z) || len(y) != len(z) {
257 panic("rshVU len")
258 }
259 260 c = a
261 for i := range z {
262 z1, z0 := mulAddWWW_g(y[i], m, x[i])
263 lo, cc := bits.Add(uint(z0), uint(c), 0)
264 c, z[i] = Word(cc), Word(lo)
265 c += z1
266 }
267 return
268 }
269 270 // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
271 // An approximate reciprocal with a reference to "Improved Division by Invariant Integers
272 // (IEEE Transactions on Computers, 11 Jun. 2010)"
273 func divWW(x1, x0, y, m Word) (q, r Word) {
274 s := nlz(y)
275 if s != 0 {
276 x1 = x1<<s | x0>>(_W-s)
277 x0 <<= s
278 y <<= s
279 }
280 d := uint(y)
281 // We know that
282 // m = ⎣(B^2-1)/d⎦-B
283 // ⎣(B^2-1)/d⎦ = m+B
284 // (B^2-1)/d = m+B+delta1 0 <= delta1 <= (d-1)/d
285 // B^2/d = m+B+delta2 0 <= delta2 <= 1
286 // The quotient we're trying to compute is
287 // quotient = ⎣(x1*B+x0)/d⎦
288 // = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
289 // = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
290 // = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
291 // The latter two terms of this three-term sum are between 0 and 1.
292 // So we can compute just the first term, and we will be low by at most 2.
293 t1, t0 := bits.Mul(uint(m), uint(x1))
294 _, c := bits.Add(t0, uint(x0), 0)
295 t1, _ = bits.Add(t1, uint(x1), c)
296 // The quotient is either t1, t1+1, or t1+2.
297 // We'll try t1 and adjust if needed.
298 qq := t1
299 // compute remainder r=x-d*q.
300 dq1, dq0 := bits.Mul(d, qq)
301 r0, b := bits.Sub(uint(x0), dq0, 0)
302 r1, _ := bits.Sub(uint(x1), dq1, b)
303 // The remainder we just computed is bounded above by B+d:
304 // r = x1*B + x0 - d*q.
305 // = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
306 // = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha) 0 <= alpha < 1
307 // = x1*B + x0 - x1*d/B*m - x1*d - x0*d/B + d*alpha
308 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha
309 // = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦ - x1*d - x0*d/B + d*alpha
310 // = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta) - x1*d - x0*d/B + d*alpha 0 <= beta < 1
311 // = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
312 // = x0 + x1/B + x1*d/B*beta - x0*d/B + d*alpha
313 // = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
314 // < B*(1-d/B) + d*B/B + d because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
315 // = B - d + d + d
316 // = B+d
317 // So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
318 // Add 1 to q and subtract d from r. That guarantees that r is <B, so
319 // we no longer need to keep track of r1.
320 if r1 != 0 {
321 qq++
322 r0 -= d
323 }
324 // If the remainder is still too large, increment q one more time.
325 if r0 >= d {
326 qq++
327 r0 -= d
328 }
329 return Word(qq), Word(r0 >> s)
330 }
331 332 // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
333 func reciprocalWord(d1 Word) Word {
334 u := uint(d1 << nlz(d1))
335 x1 := ^u
336 x0 := uint(_M)
337 rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
338 return Word(rec)
339 }
340