sum_s390x.mx raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  //go:build gc && !purego
   6  
   7  package poly1305
   8  
   9  import (
  10  	"golang.org/x/sys/cpu"
  11  )
  12  
  13  // updateVX is an assembly implementation of Poly1305 that uses vector
  14  // instructions. It must only be called if the vector facility (vx) is
  15  // available.
  16  //
  17  //go:noescape
  18  func updateVX(state *macState, msg []byte)
  19  
  20  // mac is a replacement for macGeneric that uses a larger buffer and redirects
  21  // calls that would have gone to updateGeneric to updateVX if the vector
  22  // facility is installed.
  23  //
  24  // A larger buffer is required for good performance because the vector
  25  // implementation has a higher fixed cost per call than the generic
  26  // implementation.
  27  type mac struct {
  28  	macState
  29  
  30  	buffer [16 * TagSize]byte // size must be a multiple of block size (16)
  31  	offset int
  32  }
  33  
  34  func (h *mac) Write(p []byte) (int, error) {
  35  	nn := len(p)
  36  	if h.offset > 0 {
  37  		n := copy(h.buffer[h.offset:], p)
  38  		if h.offset+n < len(h.buffer) {
  39  			h.offset += n
  40  			return nn, nil
  41  		}
  42  		p = p[n:]
  43  		h.offset = 0
  44  		if cpu.S390X.HasVX {
  45  			updateVX(&h.macState, h.buffer[:])
  46  		} else {
  47  			updateGeneric(&h.macState, h.buffer[:])
  48  		}
  49  	}
  50  
  51  	tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
  52  	body := len(p) - tail          // number of bytes to process now
  53  	if body > 0 {
  54  		if cpu.S390X.HasVX {
  55  			updateVX(&h.macState, p[:body])
  56  		} else {
  57  			updateGeneric(&h.macState, p[:body])
  58  		}
  59  	}
  60  	h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
  61  	return nn, nil
  62  }
  63  
  64  func (h *mac) Sum(out *[TagSize]byte) {
  65  	state := h.macState
  66  	remainder := h.buffer[:h.offset]
  67  
  68  	// Use the generic implementation if we have 2 or fewer blocks left
  69  	// to sum. The vector implementation has a higher startup time.
  70  	if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
  71  		updateVX(&state, remainder)
  72  	} else if len(remainder) > 0 {
  73  		updateGeneric(&state, remainder)
  74  	}
  75  	finalize(out, &state.h, &state.s)
  76  }
  77