fdct.mx raw

   1  // Copyright 2011 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package jpeg
   6  
   7  // This file implements a Forward Discrete Cosine Transformation.
   8  
   9  /*
  10  It is based on the code in jfdctint.c from the Independent JPEG Group,
  11  found at http://www.ijg.org/files/jpegsrc.v8c.tar.gz.
  12  
  13  The "LEGAL ISSUES" section of the README in that archive says:
  14  
  15  In plain English:
  16  
  17  1. We don't promise that this software works.  (But if you find any bugs,
  18     please let us know!)
  19  2. You can use this software for whatever you want.  You don't have to pay us.
  20  3. You may not pretend that you wrote this software.  If you use it in a
  21     program, you must acknowledge somewhere in your documentation that
  22     you've used the IJG code.
  23  
  24  In legalese:
  25  
  26  The authors make NO WARRANTY or representation, either express or implied,
  27  with respect to this software, its quality, accuracy, merchantability, or
  28  fitness for a particular purpose.  This software is provided "AS IS", and you,
  29  its user, assume the entire risk as to its quality and accuracy.
  30  
  31  This software is copyright (C) 1991-2011, Thomas G. Lane, Guido Vollbeding.
  32  All Rights Reserved except as specified below.
  33  
  34  Permission is hereby granted to use, copy, modify, and distribute this
  35  software (or portions thereof) for any purpose, without fee, subject to these
  36  conditions:
  37  (1) If any part of the source code for this software is distributed, then this
  38  README file must be included, with this copyright and no-warranty notice
  39  unaltered; and any additions, deletions, or changes to the original files
  40  must be clearly indicated in accompanying documentation.
  41  (2) If only executable code is distributed, then the accompanying
  42  documentation must state that "this software is based in part on the work of
  43  the Independent JPEG Group".
  44  (3) Permission for use of this software is granted only if the user accepts
  45  full responsibility for any undesirable consequences; the authors accept
  46  NO LIABILITY for damages of any kind.
  47  
  48  These conditions apply to any software derived from or based on the IJG code,
  49  not just to the unmodified library.  If you use our work, you ought to
  50  acknowledge us.
  51  
  52  Permission is NOT granted for the use of any IJG author's name or company name
  53  in advertising or publicity relating to this software or products derived from
  54  it.  This software may be referred to only as "the Independent JPEG Group's
  55  software".
  56  
  57  We specifically permit and encourage the use of this software as the basis of
  58  commercial products, provided that all warranty or liability claims are
  59  assumed by the product vendor.
  60  */
  61  
  62  // Trigonometric constants in 13-bit fixed point format.
  63  const (
  64  	fix_0_298631336 = 2446
  65  	fix_0_390180644 = 3196
  66  	fix_0_541196100 = 4433
  67  	fix_0_765366865 = 6270
  68  	fix_0_899976223 = 7373
  69  	fix_1_175875602 = 9633
  70  	fix_1_501321110 = 12299
  71  	fix_1_847759065 = 15137
  72  	fix_1_961570560 = 16069
  73  	fix_2_053119869 = 16819
  74  	fix_2_562915447 = 20995
  75  	fix_3_072711026 = 25172
  76  )
  77  
  78  const (
  79  	constBits     = 13
  80  	pass1Bits     = 2
  81  	centerJSample = 128
  82  )
  83  
  84  // fdct performs a forward DCT on an 8x8 block of coefficients, including a
  85  // level shift.
  86  func fdct(b *block) {
  87  	// Pass 1: process rows.
  88  	for y := 0; y < 8; y++ {
  89  		y8 := y * 8
  90  		s := b[y8 : y8+8 : y8+8] // Small cap improves performance, see https://golang.org/issue/27857
  91  		x0 := s[0]
  92  		x1 := s[1]
  93  		x2 := s[2]
  94  		x3 := s[3]
  95  		x4 := s[4]
  96  		x5 := s[5]
  97  		x6 := s[6]
  98  		x7 := s[7]
  99  
 100  		tmp0 := x0 + x7
 101  		tmp1 := x1 + x6
 102  		tmp2 := x2 + x5
 103  		tmp3 := x3 + x4
 104  
 105  		tmp10 := tmp0 + tmp3
 106  		tmp12 := tmp0 - tmp3
 107  		tmp11 := tmp1 + tmp2
 108  		tmp13 := tmp1 - tmp2
 109  
 110  		tmp0 = x0 - x7
 111  		tmp1 = x1 - x6
 112  		tmp2 = x2 - x5
 113  		tmp3 = x3 - x4
 114  
 115  		s[0] = (tmp10 + tmp11 - 8*centerJSample) << pass1Bits
 116  		s[4] = (tmp10 - tmp11) << pass1Bits
 117  		z1 := (tmp12 + tmp13) * fix_0_541196100
 118  		z1 += 1 << (constBits - pass1Bits - 1)
 119  		s[2] = (z1 + tmp12*fix_0_765366865) >> (constBits - pass1Bits)
 120  		s[6] = (z1 - tmp13*fix_1_847759065) >> (constBits - pass1Bits)
 121  
 122  		tmp10 = tmp0 + tmp3
 123  		tmp11 = tmp1 + tmp2
 124  		tmp12 = tmp0 + tmp2
 125  		tmp13 = tmp1 + tmp3
 126  		z1 = (tmp12 + tmp13) * fix_1_175875602
 127  		z1 += 1 << (constBits - pass1Bits - 1)
 128  		tmp0 *= fix_1_501321110
 129  		tmp1 *= fix_3_072711026
 130  		tmp2 *= fix_2_053119869
 131  		tmp3 *= fix_0_298631336
 132  		tmp10 *= -fix_0_899976223
 133  		tmp11 *= -fix_2_562915447
 134  		tmp12 *= -fix_0_390180644
 135  		tmp13 *= -fix_1_961570560
 136  
 137  		tmp12 += z1
 138  		tmp13 += z1
 139  		s[1] = (tmp0 + tmp10 + tmp12) >> (constBits - pass1Bits)
 140  		s[3] = (tmp1 + tmp11 + tmp13) >> (constBits - pass1Bits)
 141  		s[5] = (tmp2 + tmp11 + tmp12) >> (constBits - pass1Bits)
 142  		s[7] = (tmp3 + tmp10 + tmp13) >> (constBits - pass1Bits)
 143  	}
 144  	// Pass 2: process columns.
 145  	// We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8.
 146  	for x := 0; x < 8; x++ {
 147  		tmp0 := b[0*8+x] + b[7*8+x]
 148  		tmp1 := b[1*8+x] + b[6*8+x]
 149  		tmp2 := b[2*8+x] + b[5*8+x]
 150  		tmp3 := b[3*8+x] + b[4*8+x]
 151  
 152  		tmp10 := tmp0 + tmp3 + 1<<(pass1Bits-1)
 153  		tmp12 := tmp0 - tmp3
 154  		tmp11 := tmp1 + tmp2
 155  		tmp13 := tmp1 - tmp2
 156  
 157  		tmp0 = b[0*8+x] - b[7*8+x]
 158  		tmp1 = b[1*8+x] - b[6*8+x]
 159  		tmp2 = b[2*8+x] - b[5*8+x]
 160  		tmp3 = b[3*8+x] - b[4*8+x]
 161  
 162  		b[0*8+x] = (tmp10 + tmp11) >> pass1Bits
 163  		b[4*8+x] = (tmp10 - tmp11) >> pass1Bits
 164  
 165  		z1 := (tmp12 + tmp13) * fix_0_541196100
 166  		z1 += 1 << (constBits + pass1Bits - 1)
 167  		b[2*8+x] = (z1 + tmp12*fix_0_765366865) >> (constBits + pass1Bits)
 168  		b[6*8+x] = (z1 - tmp13*fix_1_847759065) >> (constBits + pass1Bits)
 169  
 170  		tmp10 = tmp0 + tmp3
 171  		tmp11 = tmp1 + tmp2
 172  		tmp12 = tmp0 + tmp2
 173  		tmp13 = tmp1 + tmp3
 174  		z1 = (tmp12 + tmp13) * fix_1_175875602
 175  		z1 += 1 << (constBits + pass1Bits - 1)
 176  		tmp0 *= fix_1_501321110
 177  		tmp1 *= fix_3_072711026
 178  		tmp2 *= fix_2_053119869
 179  		tmp3 *= fix_0_298631336
 180  		tmp10 *= -fix_0_899976223
 181  		tmp11 *= -fix_2_562915447
 182  		tmp12 *= -fix_0_390180644
 183  		tmp13 *= -fix_1_961570560
 184  
 185  		tmp12 += z1
 186  		tmp13 += z1
 187  		b[1*8+x] = (tmp0 + tmp10 + tmp12) >> (constBits + pass1Bits)
 188  		b[3*8+x] = (tmp1 + tmp11 + tmp13) >> (constBits + pass1Bits)
 189  		b[5*8+x] = (tmp2 + tmp11 + tmp12) >> (constBits + pass1Bits)
 190  		b[7*8+x] = (tmp3 + tmp10 + tmp13) >> (constBits + pass1Bits)
 191  	}
 192  }
 193