arith.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // This file provides Go implementations of elementary multi-precision
   6  // arithmetic operations on word vectors. These have the suffix _g.
   7  // These are needed for platforms without assembly implementations of these routines.
   8  // This file also contains elementary operations that can be implemented
   9  // sufficiently efficiently in Go.
  10  
  11  package big
  12  
  13  import (
  14  	"math/bits"
  15  	_ "unsafe" // for go:linkname
  16  )
  17  
  18  // A Word represents a single digit of a multi-precision unsigned integer.
  19  type Word uint
  20  
  21  const (
  22  	_S = _W / 8 // word size in bytes
  23  
  24  	_W = bits.UintSize // word size in bits
  25  	_B = 1 << _W       // digit base
  26  	_M = _B - 1        // digit mask
  27  )
  28  
  29  // In these routines, it is the caller's responsibility to arrange for
  30  // x, y, and z to all have the same length. We check this and panic.
  31  // The assembly versions of these routines do not include that check.
  32  //
  33  // The check+panic also has the effect of teaching the compiler that
  34  // “i in range for z” implies “i in range for x and y”, eliminating all
  35  // bounds checks in loops from 0 to len(z) and vice versa.
  36  
  37  // ----------------------------------------------------------------------------
  38  // Elementary operations on words
  39  //
  40  // These operations are used by the vector operations below.
  41  
  42  // z1<<_W + z0 = x*y
  43  func mulWW(x, y Word) (z1, z0 Word) {
  44  	hi, lo := bits.Mul(uint(x), uint(y))
  45  	return Word(hi), Word(lo)
  46  }
  47  
  48  // z1<<_W + z0 = x*y + c
  49  func mulAddWWW_g(x, y, c Word) (z1, z0 Word) {
  50  	hi, lo := bits.Mul(uint(x), uint(y))
  51  	var cc uint
  52  	lo, cc = bits.Add(lo, uint(c), 0)
  53  	return Word(hi + cc), Word(lo)
  54  }
  55  
  56  // nlz returns the number of leading zeros in x.
  57  // Wraps bits.LeadingZeros call for convenience.
  58  func nlz(x Word) uint {
  59  	return uint(bits.LeadingZeros(uint(x)))
  60  }
  61  
  62  // The resulting carry c is either 0 or 1.
  63  func addVV_g(z, x, y []Word) (c Word) {
  64  	if len(x) != len(z) || len(y) != len(z) {
  65  		panic("addVV len")
  66  	}
  67  
  68  	for i := range z {
  69  		zi, cc := bits.Add(uint(x[i]), uint(y[i]), uint(c))
  70  		z[i] = Word(zi)
  71  		c = Word(cc)
  72  	}
  73  	return
  74  }
  75  
  76  // The resulting carry c is either 0 or 1.
  77  func subVV_g(z, x, y []Word) (c Word) {
  78  	if len(x) != len(z) || len(y) != len(z) {
  79  		panic("subVV len")
  80  	}
  81  
  82  	for i := range z {
  83  		zi, cc := bits.Sub(uint(x[i]), uint(y[i]), uint(c))
  84  		z[i] = Word(zi)
  85  		c = Word(cc)
  86  	}
  87  	return
  88  }
  89  
  90  // addVW sets z = x + y, returning the final carry c.
  91  // The behavior is undefined if len(x) != len(z).
  92  // If len(z) == 0, c = y; otherwise, c is 0 or 1.
  93  //
  94  // addVW should be an internal detail,
  95  // but widely used packages access it using linkname.
  96  // Notable members of the hall of shame include:
  97  //   - github.com/remyoudompheng/bigfft
  98  //
  99  // Do not remove or change the type signature.
 100  // See go.dev/issue/67401.
 101  //
 102  //go:linkname addVW
 103  func addVW(z, x []Word, y Word) (c Word) {
 104  	if len(x) != len(z) {
 105  		panic("addVW len")
 106  	}
 107  
 108  	if len(z) == 0 {
 109  		return y
 110  	}
 111  	zi, cc := bits.Add(uint(x[0]), uint(y), 0)
 112  	z[0] = Word(zi)
 113  	if cc == 0 {
 114  		if &z[0] != &x[0] {
 115  			copy(z[1:], x[1:])
 116  		}
 117  		return 0
 118  	}
 119  	for i := 1; i < len(z); i++ {
 120  		xi := x[i]
 121  		if xi != ^Word(0) {
 122  			z[i] = xi + 1
 123  			if &z[0] != &x[0] {
 124  				copy(z[i+1:], x[i+1:])
 125  			}
 126  			return 0
 127  		}
 128  		z[i] = 0
 129  	}
 130  	return 1
 131  }
 132  
 133  // addVW_ref is the reference implementation for addVW, used only for testing.
 134  func addVW_ref(z, x []Word, y Word) (c Word) {
 135  	c = y
 136  	for i := range z {
 137  		zi, cc := bits.Add(uint(x[i]), uint(c), 0)
 138  		z[i] = Word(zi)
 139  		c = Word(cc)
 140  	}
 141  	return
 142  }
 143  
 144  // subVW sets z = x - y, returning the final carry c.
 145  // The behavior is undefined if len(x) != len(z).
 146  // If len(z) == 0, c = y; otherwise, c is 0 or 1.
 147  //
 148  // subVW should be an internal detail,
 149  // but widely used packages access it using linkname.
 150  // Notable members of the hall of shame include:
 151  //   - github.com/remyoudompheng/bigfft
 152  //
 153  // Do not remove or change the type signature.
 154  // See go.dev/issue/67401.
 155  //
 156  //go:linkname subVW
 157  func subVW(z, x []Word, y Word) (c Word) {
 158  	if len(x) != len(z) {
 159  		panic("subVW len")
 160  	}
 161  
 162  	if len(z) == 0 {
 163  		return y
 164  	}
 165  	zi, cc := bits.Sub(uint(x[0]), uint(y), 0)
 166  	z[0] = Word(zi)
 167  	if cc == 0 {
 168  		if &z[0] != &x[0] {
 169  			copy(z[1:], x[1:])
 170  		}
 171  		return 0
 172  	}
 173  	for i := 1; i < len(z); i++ {
 174  		xi := x[i]
 175  		if xi != 0 {
 176  			z[i] = xi - 1
 177  			if &z[0] != &x[0] {
 178  				copy(z[i+1:], x[i+1:])
 179  			}
 180  			return 0
 181  		}
 182  		z[i] = ^Word(0)
 183  	}
 184  	return 1
 185  }
 186  
 187  // subVW_ref is the reference implementation for subVW, used only for testing.
 188  func subVW_ref(z, x []Word, y Word) (c Word) {
 189  	c = y
 190  	for i := range z {
 191  		zi, cc := bits.Sub(uint(x[i]), uint(c), 0)
 192  		z[i] = Word(zi)
 193  		c = Word(cc)
 194  	}
 195  	return c
 196  }
 197  
 198  func lshVU_g(z, x []Word, s uint) (c Word) {
 199  	if len(x) != len(z) {
 200  		panic("lshVU len")
 201  	}
 202  
 203  	if s == 0 {
 204  		copy(z, x)
 205  		return
 206  	}
 207  	if len(z) == 0 {
 208  		return
 209  	}
 210  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
 211  	ŝ := _W - s
 212  	ŝ &= _W - 1 // ditto
 213  	c = x[len(z)-1] >> ŝ
 214  	for i := len(z) - 1; i > 0; i-- {
 215  		z[i] = x[i]<<s | x[i-1]>>ŝ
 216  	}
 217  	z[0] = x[0] << s
 218  	return
 219  }
 220  
 221  func rshVU_g(z, x []Word, s uint) (c Word) {
 222  	if len(x) != len(z) {
 223  		panic("rshVU len")
 224  	}
 225  
 226  	if s == 0 {
 227  		copy(z, x)
 228  		return
 229  	}
 230  	if len(z) == 0 {
 231  		return
 232  	}
 233  	s &= _W - 1 // hint to the compiler that shifts by s don't need guard code
 234  	ŝ := _W - s
 235  	ŝ &= _W - 1 // ditto
 236  	c = x[0] << ŝ
 237  	for i := 1; i < len(z); i++ {
 238  		z[i-1] = x[i-1]>>s | x[i]<<ŝ
 239  	}
 240  	z[len(z)-1] = x[len(z)-1] >> s
 241  	return
 242  }
 243  
 244  func mulAddVWW_g(z, x []Word, y, r Word) (c Word) {
 245  	if len(x) != len(z) {
 246  		panic("mulAddVWW len")
 247  	}
 248  	c = r
 249  	for i := range z {
 250  		c, z[i] = mulAddWWW_g(x[i], y, c)
 251  	}
 252  	return
 253  }
 254  
 255  func addMulVVWW_g(z, x, y []Word, m, a Word) (c Word) {
 256  	if len(x) != len(z) || len(y) != len(z) {
 257  		panic("rshVU len")
 258  	}
 259  
 260  	c = a
 261  	for i := range z {
 262  		z1, z0 := mulAddWWW_g(y[i], m, x[i])
 263  		lo, cc := bits.Add(uint(z0), uint(c), 0)
 264  		c, z[i] = Word(cc), Word(lo)
 265  		c += z1
 266  	}
 267  	return
 268  }
 269  
 270  // q = ( x1 << _W + x0 - r)/y. m = floor(( _B^2 - 1 ) / d - _B). Requiring x1<y.
 271  // An approximate reciprocal with a reference to "Improved Division by Invariant Integers
 272  // (IEEE Transactions on Computers, 11 Jun. 2010)"
 273  func divWW(x1, x0, y, m Word) (q, r Word) {
 274  	s := nlz(y)
 275  	if s != 0 {
 276  		x1 = x1<<s | x0>>(_W-s)
 277  		x0 <<= s
 278  		y <<= s
 279  	}
 280  	d := uint(y)
 281  	// We know that
 282  	//   m = ⎣(B^2-1)/d⎦-B
 283  	//   ⎣(B^2-1)/d⎦ = m+B
 284  	//   (B^2-1)/d = m+B+delta1    0 <= delta1 <= (d-1)/d
 285  	//   B^2/d = m+B+delta2        0 <= delta2 <= 1
 286  	// The quotient we're trying to compute is
 287  	//   quotient = ⎣(x1*B+x0)/d⎦
 288  	//            = ⎣(x1*B*(B^2/d)+x0*(B^2/d))/B^2⎦
 289  	//            = ⎣(x1*B*(m+B+delta2)+x0*(m+B+delta2))/B^2⎦
 290  	//            = ⎣(x1*m+x1*B+x0)/B + x0*m/B^2 + delta2*(x1*B+x0)/B^2⎦
 291  	// The latter two terms of this three-term sum are between 0 and 1.
 292  	// So we can compute just the first term, and we will be low by at most 2.
 293  	t1, t0 := bits.Mul(uint(m), uint(x1))
 294  	_, c := bits.Add(t0, uint(x0), 0)
 295  	t1, _ = bits.Add(t1, uint(x1), c)
 296  	// The quotient is either t1, t1+1, or t1+2.
 297  	// We'll try t1 and adjust if needed.
 298  	qq := t1
 299  	// compute remainder r=x-d*q.
 300  	dq1, dq0 := bits.Mul(d, qq)
 301  	r0, b := bits.Sub(uint(x0), dq0, 0)
 302  	r1, _ := bits.Sub(uint(x1), dq1, b)
 303  	// The remainder we just computed is bounded above by B+d:
 304  	// r = x1*B + x0 - d*q.
 305  	//   = x1*B + x0 - d*⎣(x1*m+x1*B+x0)/B⎦
 306  	//   = x1*B + x0 - d*((x1*m+x1*B+x0)/B-alpha)                                   0 <= alpha < 1
 307  	//   = x1*B + x0 - x1*d/B*m                         - x1*d - x0*d/B + d*alpha
 308  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
 309  	//   = x1*B + x0 - x1*d/B*⎣(B^2-1)/d-B⎦             - x1*d - x0*d/B + d*alpha
 310  	//   = x1*B + x0 - x1*d/B*((B^2-1)/d-B-beta)        - x1*d - x0*d/B + d*alpha   0 <= beta < 1
 311  	//   = x1*B + x0 - x1*B + x1/B + x1*d + x1*d/B*beta - x1*d - x0*d/B + d*alpha
 312  	//   =        x0        + x1/B        + x1*d/B*beta        - x0*d/B + d*alpha
 313  	//   = x0*(1-d/B) + x1*(1+d*beta)/B + d*alpha
 314  	//   <  B*(1-d/B) +  d*B/B          + d          because x0<B (and 1-d/B>0), x1<d, 1+d*beta<=B, alpha<1
 315  	//   =  B - d     +  d              + d
 316  	//   = B+d
 317  	// So r1 can only be 0 or 1. If r1 is 1, then we know q was too small.
 318  	// Add 1 to q and subtract d from r. That guarantees that r is <B, so
 319  	// we no longer need to keep track of r1.
 320  	if r1 != 0 {
 321  		qq++
 322  		r0 -= d
 323  	}
 324  	// If the remainder is still too large, increment q one more time.
 325  	if r0 >= d {
 326  		qq++
 327  		r0 -= d
 328  	}
 329  	return Word(qq), Word(r0 >> s)
 330  }
 331  
 332  // reciprocalWord return the reciprocal of the divisor. rec = floor(( _B^2 - 1 ) / u - _B). u = d1 << nlz(d1).
 333  func reciprocalWord(d1 Word) Word {
 334  	u := uint(d1 << nlz(d1))
 335  	x1 := ^u
 336  	x0 := uint(_M)
 337  	rec, _ := bits.Div(x1, x0, u) // (_B^2-1)/U-_B = (_B*(_M-C)+_M)/U
 338  	return Word(rec)
 339  }
 340