crc32_s390x.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package crc32
   6  
   7  import "internal/cpu"
   8  
   9  const (
  10  	vxMinLen    = 64
  11  	vxAlignMask = 15 // align to 16 bytes
  12  )
  13  
  14  // hasVX reports whether the machine has the z/Architecture
  15  // vector facility installed and enabled.
  16  var hasVX = cpu.S390X.HasVX
  17  
  18  // vectorizedCastagnoli implements CRC32 using vector instructions.
  19  // It is defined in crc32_s390x.s.
  20  //
  21  //go:noescape
  22  func vectorizedCastagnoli(crc uint32, p []byte) uint32
  23  
  24  // vectorizedIEEE implements CRC32 using vector instructions.
  25  // It is defined in crc32_s390x.s.
  26  //
  27  //go:noescape
  28  func vectorizedIEEE(crc uint32, p []byte) uint32
  29  
  30  func archAvailableCastagnoli() bool {
  31  	return hasVX
  32  }
  33  
  34  var archCastagnoliTable8 *slicing8Table
  35  
  36  func archInitCastagnoli() {
  37  	if !hasVX {
  38  		panic("not available")
  39  	}
  40  	// We still use slicing-by-8 for small buffers.
  41  	archCastagnoliTable8 = slicingMakeTable(Castagnoli)
  42  }
  43  
  44  // archUpdateCastagnoli calculates the checksum of p using
  45  // vectorizedCastagnoli.
  46  func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
  47  	if !hasVX {
  48  		panic("not available")
  49  	}
  50  	// Use vectorized function if data length is above threshold.
  51  	if len(p) >= vxMinLen {
  52  		aligned := len(p) & ^vxAlignMask
  53  		crc = vectorizedCastagnoli(crc, p[:aligned])
  54  		p = p[aligned:]
  55  	}
  56  	if len(p) == 0 {
  57  		return crc
  58  	}
  59  	return slicingUpdate(crc, archCastagnoliTable8, p)
  60  }
  61  
  62  func archAvailableIEEE() bool {
  63  	return hasVX
  64  }
  65  
  66  var archIeeeTable8 *slicing8Table
  67  
  68  func archInitIEEE() {
  69  	if !hasVX {
  70  		panic("not available")
  71  	}
  72  	// We still use slicing-by-8 for small buffers.
  73  	archIeeeTable8 = slicingMakeTable(IEEE)
  74  }
  75  
  76  // archUpdateIEEE calculates the checksum of p using vectorizedIEEE.
  77  func archUpdateIEEE(crc uint32, p []byte) uint32 {
  78  	if !hasVX {
  79  		panic("not available")
  80  	}
  81  	// Use vectorized function if data length is above threshold.
  82  	if len(p) >= vxMinLen {
  83  		aligned := len(p) & ^vxAlignMask
  84  		crc = vectorizedIEEE(crc, p[:aligned])
  85  		p = p[aligned:]
  86  	}
  87  	if len(p) == 0 {
  88  		return crc
  89  	}
  90  	return slicingUpdate(crc, archIeeeTable8, p)
  91  }
  92