ints.go raw

   1  // Package ints is an optimised encoder for decimal numbers in ASCII format,
   2  // that simplifies and accelerates encoding and decoding decimal strings. It is
   3  // faster than strconv in part because it uses a base of 10000 and a lookup
   4  // table.
   5  package ints
   6  
   7  import (
   8  	_ "embed"
   9  	"io"
  10  
  11  	"golang.org/x/exp/constraints"
  12  	"next.orly.dev/pkg/lol/errorf"
  13  )
  14  
  15  // run this to regenerate (pointlessly) the base 10 array of 4 places per entry
  16  //go:generate go run ./gen/.
  17  
  18  //go:embed base10k.txt
  19  var base10k []byte
  20  
  21  const base = 10000
  22  
  23  // T is an integer with a fast codec to decimal ASCII.
  24  type T struct {
  25  	N uint64
  26  }
  27  
  28  func New[V constraints.Integer](n V) *T {
  29  	return &T{uint64(n)}
  30  }
  31  
  32  // Uint64 returns the int.T as a uint64 (the base type)
  33  func (n *T) Uint64() uint64 { return n.N }
  34  
  35  // Int64 returns an int64 from the base number (may cause truncation)
  36  func (n *T) Int64() int64 { return int64(n.N) }
  37  
  38  // Uint16 returns an uint16 from the base number (may cause truncation)
  39  func (n *T) Uint16() uint16 { return uint16(n.N) }
  40  
  41  var powers = []*T{
  42  	{1},
  43  	{1_0000},
  44  	{1_0000_0000},
  45  	{1_0000_0000_0000},
  46  	{1_0000_0000_0000_0000},
  47  }
  48  
  49  const zero = '0'
  50  const nine = '9'
  51  
  52  // Marshal the int.T into a byte string.
  53  func (n *T) Marshal(dst []byte) (b []byte) {
  54  	nn := n.N
  55  	b = dst
  56  	if n.N == 0 {
  57  		b = append(b, '0')
  58  		return
  59  	}
  60  	var i int
  61  	var trimmed bool
  62  	k := len(powers)
  63  	for k > 0 {
  64  		k--
  65  		q := n.N / powers[k].N
  66  		if !trimmed && q == 0 {
  67  			continue
  68  		}
  69  		offset := q * 4
  70  		bb := base10k[offset : offset+4]
  71  		if !trimmed {
  72  			for i = range bb {
  73  				if bb[i] != '0' {
  74  					bb = bb[i:]
  75  					trimmed = true
  76  					break
  77  				}
  78  			}
  79  		}
  80  		b = append(b, bb...)
  81  		n.N = n.N - q*powers[k].N
  82  	}
  83  	n.N = nn
  84  	return
  85  }
  86  
  87  // Unmarshal reads a string, which must be a positive integer no larger than math.MaxUint64,
  88  // skipping any non-numeric content before it.
  89  //
  90  // Note that leading zeros are not considered valid, but basically no such thing as machine
  91  // generated JSON integers with leading zeroes. Until this is disproven, this is the fastest way
  92  // to read a positive json integer, and a leading zero is decoded as a zero, and the remainder
  93  // returned.
  94  func (n *T) Unmarshal(b []byte) (r []byte, err error) {
  95  	if len(b) < 1 {
  96  		err = errorf.E("zero length number")
  97  		return
  98  	}
  99  	var sLen int
 100  	if b[0] == zero {
 101  		r = b[1:]
 102  		n.N = 0
 103  		return
 104  	}
 105  	// skip non-number characters
 106  	for i, v := range b {
 107  		if v >= '0' && v <= '9' {
 108  			b = b[i:]
 109  			break
 110  		}
 111  	}
 112  	// log.I.F("%s", b)
 113  	if len(b) == 0 {
 114  		err = io.EOF
 115  		return
 116  	}
 117  	// count the digits
 118  	for ; sLen < len(b) && b[sLen] >= zero && b[sLen] <= nine && b[sLen] != ','; sLen++ {
 119  	}
 120  	// log.I.F("%s", b[:sLen])
 121  	if sLen == 0 {
 122  		err = errorf.E("zero length number")
 123  		return
 124  	}
 125  	if sLen > 20 {
 126  		err = errorf.E("too big number for uint64")
 127  		return
 128  	}
 129  	// the length of the string found
 130  	r = b[sLen:]
 131  	b = b[:sLen]
 132  	// log.I.F("\n%s\n%s", b, r)
 133  	n.N = uint64(b[0]) - zero
 134  	b = b[1:]
 135  	for _, ch := range b {
 136  		ch -= zero
 137  		n.N = n.N*10 + uint64(ch)
 138  	}
 139  	// log.I.F("%d", n.N)
 140  	return
 141  }
 142