lz4sconvert.go raw

   1  // Copyright (c) 2022 Klaus Post. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package s2
   6  
   7  import (
   8  	"encoding/binary"
   9  	"fmt"
  10  )
  11  
  12  // LZ4sConverter provides conversion from LZ4s.
  13  // (Intel modified LZ4 Blocks)
  14  // https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
  15  // LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
  16  // The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
  17  // The LZ4s block returned by the IntelĀ® QAT hardware can be used by an external
  18  // software post-processing to generate other compressed data formats.
  19  // The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
  20  // the same high-level formatting as LZ4 block format with the following encoding changes:
  21  // For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
  22  // ONLY "Min match of 4 bytes" is supported.
  23  type LZ4sConverter struct {
  24  }
  25  
  26  // ConvertBlock will convert an LZ4s block and append it as an S2
  27  // block without block length to dst.
  28  // The uncompressed size is returned as well.
  29  // dst must have capacity to contain the entire compressed block.
  30  func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
  31  	if len(src) == 0 {
  32  		return dst, 0, nil
  33  	}
  34  	const debug = false
  35  	const inline = true
  36  	const lz4MinMatch = 3
  37  
  38  	s, d := 0, len(dst)
  39  	dst = dst[:cap(dst)]
  40  	if !debug && hasAmd64Asm {
  41  		res, sz := cvtLZ4sBlockAsm(dst[d:], src)
  42  		if res < 0 {
  43  			const (
  44  				errCorrupt     = -1
  45  				errDstTooSmall = -2
  46  			)
  47  			switch res {
  48  			case errCorrupt:
  49  				return nil, 0, ErrCorrupt
  50  			case errDstTooSmall:
  51  				return nil, 0, ErrDstTooSmall
  52  			default:
  53  				return nil, 0, fmt.Errorf("unexpected result: %d", res)
  54  			}
  55  		}
  56  		if d+sz > len(dst) {
  57  			return nil, 0, ErrDstTooSmall
  58  		}
  59  		return dst[:d+sz], res, nil
  60  	}
  61  
  62  	dLimit := len(dst) - 10
  63  	var lastOffset uint16
  64  	var uncompressed int
  65  	if debug {
  66  		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
  67  	}
  68  
  69  	for {
  70  		if s >= len(src) {
  71  			return dst[:d], 0, ErrCorrupt
  72  		}
  73  		// Read literal info
  74  		token := src[s]
  75  		ll := int(token >> 4)
  76  		ml := int(lz4MinMatch + (token & 0xf))
  77  
  78  		// If upper nibble is 15, literal length is extended
  79  		if token >= 0xf0 {
  80  			for {
  81  				s++
  82  				if s >= len(src) {
  83  					if debug {
  84  						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
  85  					}
  86  					return dst[:d], 0, ErrCorrupt
  87  				}
  88  				val := src[s]
  89  				ll += int(val)
  90  				if val != 255 {
  91  					break
  92  				}
  93  			}
  94  		}
  95  		// Skip past token
  96  		if s+ll >= len(src) {
  97  			if debug {
  98  				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
  99  			}
 100  			return nil, 0, ErrCorrupt
 101  		}
 102  		s++
 103  		if ll > 0 {
 104  			if d+ll > dLimit {
 105  				return nil, 0, ErrDstTooSmall
 106  			}
 107  			if debug {
 108  				fmt.Printf("emit %d literals\n", ll)
 109  			}
 110  			d += emitLiteralGo(dst[d:], src[s:s+ll])
 111  			s += ll
 112  			uncompressed += ll
 113  		}
 114  
 115  		// Check if we are done...
 116  		if ml == lz4MinMatch {
 117  			if s == len(src) {
 118  				break
 119  			}
 120  			// 0 bytes.
 121  			continue
 122  		}
 123  		// 2 byte offset
 124  		if s >= len(src)-2 {
 125  			if debug {
 126  				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
 127  			}
 128  			return nil, 0, ErrCorrupt
 129  		}
 130  		offset := binary.LittleEndian.Uint16(src[s:])
 131  		s += 2
 132  		if offset == 0 {
 133  			if debug {
 134  				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
 135  			}
 136  			return nil, 0, ErrCorrupt
 137  		}
 138  		if int(offset) > uncompressed {
 139  			if debug {
 140  				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
 141  			}
 142  			return nil, 0, ErrCorrupt
 143  		}
 144  
 145  		if ml == lz4MinMatch+15 {
 146  			for {
 147  				if s >= len(src) {
 148  					if debug {
 149  						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
 150  					}
 151  					return nil, 0, ErrCorrupt
 152  				}
 153  				val := src[s]
 154  				s++
 155  				ml += int(val)
 156  				if val != 255 {
 157  					if s >= len(src) {
 158  						if debug {
 159  							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
 160  						}
 161  						return nil, 0, ErrCorrupt
 162  					}
 163  					break
 164  				}
 165  			}
 166  		}
 167  		if offset == lastOffset {
 168  			if debug {
 169  				fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
 170  			}
 171  			if !inline {
 172  				d += emitRepeat16(dst[d:], offset, ml)
 173  			} else {
 174  				length := ml
 175  				dst := dst[d:]
 176  				for len(dst) > 5 {
 177  					// Repeat offset, make length cheaper
 178  					length -= 4
 179  					if length <= 4 {
 180  						dst[0] = uint8(length)<<2 | tagCopy1
 181  						dst[1] = 0
 182  						d += 2
 183  						break
 184  					}
 185  					if length < 8 && offset < 2048 {
 186  						// Encode WITH offset
 187  						dst[1] = uint8(offset)
 188  						dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
 189  						d += 2
 190  						break
 191  					}
 192  					if length < (1<<8)+4 {
 193  						length -= 4
 194  						dst[2] = uint8(length)
 195  						dst[1] = 0
 196  						dst[0] = 5<<2 | tagCopy1
 197  						d += 3
 198  						break
 199  					}
 200  					if length < (1<<16)+(1<<8) {
 201  						length -= 1 << 8
 202  						dst[3] = uint8(length >> 8)
 203  						dst[2] = uint8(length >> 0)
 204  						dst[1] = 0
 205  						dst[0] = 6<<2 | tagCopy1
 206  						d += 4
 207  						break
 208  					}
 209  					const maxRepeat = (1 << 24) - 1
 210  					length -= 1 << 16
 211  					left := 0
 212  					if length > maxRepeat {
 213  						left = length - maxRepeat + 4
 214  						length = maxRepeat - 4
 215  					}
 216  					dst[4] = uint8(length >> 16)
 217  					dst[3] = uint8(length >> 8)
 218  					dst[2] = uint8(length >> 0)
 219  					dst[1] = 0
 220  					dst[0] = 7<<2 | tagCopy1
 221  					if left > 0 {
 222  						d += 5 + emitRepeat16(dst[5:], offset, left)
 223  						break
 224  					}
 225  					d += 5
 226  					break
 227  				}
 228  			}
 229  		} else {
 230  			if debug {
 231  				fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
 232  			}
 233  			if !inline {
 234  				d += emitCopy16(dst[d:], offset, ml)
 235  			} else {
 236  				length := ml
 237  				dst := dst[d:]
 238  				for len(dst) > 5 {
 239  					// Offset no more than 2 bytes.
 240  					if length > 64 {
 241  						off := 3
 242  						if offset < 2048 {
 243  							// emit 8 bytes as tagCopy1, rest as repeats.
 244  							dst[1] = uint8(offset)
 245  							dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
 246  							length -= 8
 247  							off = 2
 248  						} else {
 249  							// Emit a length 60 copy, encoded as 3 bytes.
 250  							// Emit remaining as repeat value (minimum 4 bytes).
 251  							dst[2] = uint8(offset >> 8)
 252  							dst[1] = uint8(offset)
 253  							dst[0] = 59<<2 | tagCopy2
 254  							length -= 60
 255  						}
 256  						// Emit remaining as repeats, at least 4 bytes remain.
 257  						d += off + emitRepeat16(dst[off:], offset, length)
 258  						break
 259  					}
 260  					if length >= 12 || offset >= 2048 {
 261  						// Emit the remaining copy, encoded as 3 bytes.
 262  						dst[2] = uint8(offset >> 8)
 263  						dst[1] = uint8(offset)
 264  						dst[0] = uint8(length-1)<<2 | tagCopy2
 265  						d += 3
 266  						break
 267  					}
 268  					// Emit the remaining copy, encoded as 2 bytes.
 269  					dst[1] = uint8(offset)
 270  					dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
 271  					d += 2
 272  					break
 273  				}
 274  			}
 275  			lastOffset = offset
 276  		}
 277  		uncompressed += ml
 278  		if d > dLimit {
 279  			return nil, 0, ErrDstTooSmall
 280  		}
 281  	}
 282  
 283  	return dst[:d], uncompressed, nil
 284  }
 285  
 286  // ConvertBlockSnappy will convert an LZ4s block and append it
 287  // as a Snappy block without block length to dst.
 288  // The uncompressed size is returned as well.
 289  // dst must have capacity to contain the entire compressed block.
 290  func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
 291  	if len(src) == 0 {
 292  		return dst, 0, nil
 293  	}
 294  	const debug = false
 295  	const lz4MinMatch = 3
 296  
 297  	s, d := 0, len(dst)
 298  	dst = dst[:cap(dst)]
 299  	// Use assembly when possible
 300  	if !debug && hasAmd64Asm {
 301  		res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
 302  		if res < 0 {
 303  			const (
 304  				errCorrupt     = -1
 305  				errDstTooSmall = -2
 306  			)
 307  			switch res {
 308  			case errCorrupt:
 309  				return nil, 0, ErrCorrupt
 310  			case errDstTooSmall:
 311  				return nil, 0, ErrDstTooSmall
 312  			default:
 313  				return nil, 0, fmt.Errorf("unexpected result: %d", res)
 314  			}
 315  		}
 316  		if d+sz > len(dst) {
 317  			return nil, 0, ErrDstTooSmall
 318  		}
 319  		return dst[:d+sz], res, nil
 320  	}
 321  
 322  	dLimit := len(dst) - 10
 323  	var uncompressed int
 324  	if debug {
 325  		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
 326  	}
 327  
 328  	for {
 329  		if s >= len(src) {
 330  			return nil, 0, ErrCorrupt
 331  		}
 332  		// Read literal info
 333  		token := src[s]
 334  		ll := int(token >> 4)
 335  		ml := int(lz4MinMatch + (token & 0xf))
 336  
 337  		// If upper nibble is 15, literal length is extended
 338  		if token >= 0xf0 {
 339  			for {
 340  				s++
 341  				if s >= len(src) {
 342  					if debug {
 343  						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
 344  					}
 345  					return nil, 0, ErrCorrupt
 346  				}
 347  				val := src[s]
 348  				ll += int(val)
 349  				if val != 255 {
 350  					break
 351  				}
 352  			}
 353  		}
 354  		// Skip past token
 355  		if s+ll >= len(src) {
 356  			if debug {
 357  				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
 358  			}
 359  			return nil, 0, ErrCorrupt
 360  		}
 361  		s++
 362  		if ll > 0 {
 363  			if d+ll > dLimit {
 364  				return nil, 0, ErrDstTooSmall
 365  			}
 366  			if debug {
 367  				fmt.Printf("emit %d literals\n", ll)
 368  			}
 369  			d += emitLiteralGo(dst[d:], src[s:s+ll])
 370  			s += ll
 371  			uncompressed += ll
 372  		}
 373  
 374  		// Check if we are done...
 375  		if ml == lz4MinMatch {
 376  			if s == len(src) {
 377  				break
 378  			}
 379  			// 0 bytes.
 380  			continue
 381  		}
 382  		// 2 byte offset
 383  		if s >= len(src)-2 {
 384  			if debug {
 385  				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
 386  			}
 387  			return nil, 0, ErrCorrupt
 388  		}
 389  		offset := binary.LittleEndian.Uint16(src[s:])
 390  		s += 2
 391  		if offset == 0 {
 392  			if debug {
 393  				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
 394  			}
 395  			return nil, 0, ErrCorrupt
 396  		}
 397  		if int(offset) > uncompressed {
 398  			if debug {
 399  				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
 400  			}
 401  			return nil, 0, ErrCorrupt
 402  		}
 403  
 404  		if ml == lz4MinMatch+15 {
 405  			for {
 406  				if s >= len(src) {
 407  					if debug {
 408  						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
 409  					}
 410  					return nil, 0, ErrCorrupt
 411  				}
 412  				val := src[s]
 413  				s++
 414  				ml += int(val)
 415  				if val != 255 {
 416  					if s >= len(src) {
 417  						if debug {
 418  							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
 419  						}
 420  						return nil, 0, ErrCorrupt
 421  					}
 422  					break
 423  				}
 424  			}
 425  		}
 426  		if debug {
 427  			fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
 428  		}
 429  		length := ml
 430  		// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
 431  		for length > 0 {
 432  			if d >= dLimit {
 433  				return nil, 0, ErrDstTooSmall
 434  			}
 435  
 436  			// Offset no more than 2 bytes.
 437  			if length > 64 {
 438  				// Emit a length 64 copy, encoded as 3 bytes.
 439  				dst[d+2] = uint8(offset >> 8)
 440  				dst[d+1] = uint8(offset)
 441  				dst[d+0] = 63<<2 | tagCopy2
 442  				length -= 64
 443  				d += 3
 444  				continue
 445  			}
 446  			if length >= 12 || offset >= 2048 || length < 4 {
 447  				// Emit the remaining copy, encoded as 3 bytes.
 448  				dst[d+2] = uint8(offset >> 8)
 449  				dst[d+1] = uint8(offset)
 450  				dst[d+0] = uint8(length-1)<<2 | tagCopy2
 451  				d += 3
 452  				break
 453  			}
 454  			// Emit the remaining copy, encoded as 2 bytes.
 455  			dst[d+1] = uint8(offset)
 456  			dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
 457  			d += 2
 458  			break
 459  		}
 460  		uncompressed += ml
 461  		if d > dLimit {
 462  			return nil, 0, ErrDstTooSmall
 463  		}
 464  	}
 465  
 466  	return dst[:d], uncompressed, nil
 467  }
 468