| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 #include <math.h> | 12 #include <math.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 | 16 |
| 17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
| 18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
| 19 #include "vp9/common/vp9_systemdependent.h" | 19 #include "vp9/common/vp9_systemdependent.h" |
| 20 | 20 |
| 21 #include "vp9/encoder/vp9_dct.h" | 21 #include "vp9/encoder/vp9_dct.h" |
| 22 | 22 |
| 23 static INLINE int fdct_round_shift(int input) { |
| 24 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
| 25 assert(INT16_MIN <= rv && rv <= INT16_MAX); |
| 26 return rv; |
| 27 } |
| 28 |
| 23 static void fdct4(const int16_t *input, int16_t *output) { | 29 static void fdct4(const int16_t *input, int16_t *output) { |
| 24 int16_t step[4]; | 30 int16_t step[4]; |
| 25 int temp1, temp2; | 31 int temp1, temp2; |
| 26 | 32 |
| 27 step[0] = input[0] + input[3]; | 33 step[0] = input[0] + input[3]; |
| 28 step[1] = input[1] + input[2]; | 34 step[1] = input[1] + input[2]; |
| 29 step[2] = input[1] - input[2]; | 35 step[2] = input[1] - input[2]; |
| 30 step[3] = input[0] - input[3]; | 36 step[3] = input[0] - input[3]; |
| 31 | 37 |
| 32 temp1 = (step[0] + step[1]) * cospi_16_64; | 38 temp1 = (step[0] + step[1]) * cospi_16_64; |
| 33 temp2 = (step[0] - step[1]) * cospi_16_64; | 39 temp2 = (step[0] - step[1]) * cospi_16_64; |
| 34 output[0] = dct_const_round_shift(temp1); | 40 output[0] = fdct_round_shift(temp1); |
| 35 output[2] = dct_const_round_shift(temp2); | 41 output[2] = fdct_round_shift(temp2); |
| 36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
| 37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
| 38 output[1] = dct_const_round_shift(temp1); | 44 output[1] = fdct_round_shift(temp1); |
| 39 output[3] = dct_const_round_shift(temp2); | 45 output[3] = fdct_round_shift(temp2); |
| 40 } | 46 } |
| 41 | 47 |
| 42 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { | 48 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { |
| 43 // The 2D transform is done with two passes which are actually pretty | 49 // The 2D transform is done with two passes which are actually pretty |
| 44 // similar. In the first one, we transform the columns and transpose | 50 // similar. In the first one, we transform the columns and transpose |
| 45 // the results. In the second one, we transform the rows. To achieve that, | 51 // the results. In the second one, we transform the rows. To achieve that, |
| 46 // as the first pass results are transposed, we tranpose the columns (that | 52 // as the first pass results are transposed, we tranpose the columns (that |
| 47 // is the transposed rows) and transpose the results (so that it goes back | 53 // is the transposed rows) and transpose the results (so that it goes back |
| 48 // in normal/row positions). | 54 // in normal/row positions). |
| 49 int pass; | 55 int pass; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 73 input[2] = in[2 * 4]; | 79 input[2] = in[2 * 4]; |
| 74 input[3] = in[3 * 4]; | 80 input[3] = in[3 * 4]; |
| 75 } | 81 } |
| 76 // Transform. | 82 // Transform. |
| 77 step[0] = input[0] + input[3]; | 83 step[0] = input[0] + input[3]; |
| 78 step[1] = input[1] + input[2]; | 84 step[1] = input[1] + input[2]; |
| 79 step[2] = input[1] - input[2]; | 85 step[2] = input[1] - input[2]; |
| 80 step[3] = input[0] - input[3]; | 86 step[3] = input[0] - input[3]; |
| 81 temp1 = (step[0] + step[1]) * cospi_16_64; | 87 temp1 = (step[0] + step[1]) * cospi_16_64; |
| 82 temp2 = (step[0] - step[1]) * cospi_16_64; | 88 temp2 = (step[0] - step[1]) * cospi_16_64; |
| 83 out[0] = dct_const_round_shift(temp1); | 89 out[0] = fdct_round_shift(temp1); |
| 84 out[2] = dct_const_round_shift(temp2); | 90 out[2] = fdct_round_shift(temp2); |
| 85 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 91 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
| 86 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 92 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
| 87 out[1] = dct_const_round_shift(temp1); | 93 out[1] = fdct_round_shift(temp1); |
| 88 out[3] = dct_const_round_shift(temp2); | 94 out[3] = fdct_round_shift(temp2); |
| 89 // Do next column (which is a transposed row in second/horizontal pass) | 95 // Do next column (which is a transposed row in second/horizontal pass) |
| 90 in++; | 96 in++; |
| 91 out += 4; | 97 out += 4; |
| 92 } | 98 } |
| 93 // Setup in/out for next pass. | 99 // Setup in/out for next pass. |
| 94 in = intermediate; | 100 in = intermediate; |
| 95 out = output; | 101 out = output; |
| 96 } | 102 } |
| 97 | 103 |
| 98 { | 104 { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 x1 = sinpi_3_9 * s7; | 137 x1 = sinpi_3_9 * s7; |
| 132 x2 = s1 - s3 + s6; | 138 x2 = s1 - s3 + s6; |
| 133 x3 = s4; | 139 x3 = s4; |
| 134 | 140 |
| 135 s0 = x0 + x3; | 141 s0 = x0 + x3; |
| 136 s1 = x1; | 142 s1 = x1; |
| 137 s2 = x2 - x3; | 143 s2 = x2 - x3; |
| 138 s3 = x2 - x0 + x3; | 144 s3 = x2 - x0 + x3; |
| 139 | 145 |
| 140 // 1-D transform scaling factor is sqrt(2). | 146 // 1-D transform scaling factor is sqrt(2). |
| 141 output[0] = dct_const_round_shift(s0); | 147 output[0] = fdct_round_shift(s0); |
| 142 output[1] = dct_const_round_shift(s1); | 148 output[1] = fdct_round_shift(s1); |
| 143 output[2] = dct_const_round_shift(s2); | 149 output[2] = fdct_round_shift(s2); |
| 144 output[3] = dct_const_round_shift(s3); | 150 output[3] = fdct_round_shift(s3); |
| 145 } | 151 } |
| 146 | 152 |
| 147 static const transform_2d FHT_4[] = { | 153 static const transform_2d FHT_4[] = { |
| 148 { fdct4, fdct4 }, // DCT_DCT = 0 | 154 { fdct4, fdct4 }, // DCT_DCT = 0 |
| 149 { fadst4, fdct4 }, // ADST_DCT = 1 | 155 { fadst4, fdct4 }, // ADST_DCT = 1 |
| 150 { fdct4, fadst4 }, // DCT_ADST = 2 | 156 { fdct4, fadst4 }, // DCT_ADST = 2 |
| 151 { fadst4, fadst4 } // ADST_ADST = 3 | 157 { fadst4, fadst4 } // ADST_ADST = 3 |
| 152 }; | 158 }; |
| 153 | 159 |
| 154 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, | 160 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 197 | 203 |
| 198 // fdct4(step, step); | 204 // fdct4(step, step); |
| 199 x0 = s0 + s3; | 205 x0 = s0 + s3; |
| 200 x1 = s1 + s2; | 206 x1 = s1 + s2; |
| 201 x2 = s1 - s2; | 207 x2 = s1 - s2; |
| 202 x3 = s0 - s3; | 208 x3 = s0 - s3; |
| 203 t0 = (x0 + x1) * cospi_16_64; | 209 t0 = (x0 + x1) * cospi_16_64; |
| 204 t1 = (x0 - x1) * cospi_16_64; | 210 t1 = (x0 - x1) * cospi_16_64; |
| 205 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 211 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 206 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 212 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 207 output[0] = dct_const_round_shift(t0); | 213 output[0] = fdct_round_shift(t0); |
| 208 output[2] = dct_const_round_shift(t2); | 214 output[2] = fdct_round_shift(t2); |
| 209 output[4] = dct_const_round_shift(t1); | 215 output[4] = fdct_round_shift(t1); |
| 210 output[6] = dct_const_round_shift(t3); | 216 output[6] = fdct_round_shift(t3); |
| 211 | 217 |
| 212 // Stage 2 | 218 // Stage 2 |
| 213 t0 = (s6 - s5) * cospi_16_64; | 219 t0 = (s6 - s5) * cospi_16_64; |
| 214 t1 = (s6 + s5) * cospi_16_64; | 220 t1 = (s6 + s5) * cospi_16_64; |
| 215 t2 = dct_const_round_shift(t0); | 221 t2 = fdct_round_shift(t0); |
| 216 t3 = dct_const_round_shift(t1); | 222 t3 = fdct_round_shift(t1); |
| 217 | 223 |
| 218 // Stage 3 | 224 // Stage 3 |
| 219 x0 = s4 + t2; | 225 x0 = s4 + t2; |
| 220 x1 = s4 - t2; | 226 x1 = s4 - t2; |
| 221 x2 = s7 - t3; | 227 x2 = s7 - t3; |
| 222 x3 = s7 + t3; | 228 x3 = s7 + t3; |
| 223 | 229 |
| 224 // Stage 4 | 230 // Stage 4 |
| 225 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 231 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 226 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 232 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 227 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 233 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 228 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 234 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 229 output[1] = dct_const_round_shift(t0); | 235 output[1] = fdct_round_shift(t0); |
| 230 output[3] = dct_const_round_shift(t2); | 236 output[3] = fdct_round_shift(t2); |
| 231 output[5] = dct_const_round_shift(t1); | 237 output[5] = fdct_round_shift(t1); |
| 232 output[7] = dct_const_round_shift(t3); | 238 output[7] = fdct_round_shift(t3); |
| 233 } | 239 } |
| 234 | 240 |
| 235 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { | 241 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { |
| 236 int i, j; | 242 int i, j; |
| 237 int16_t intermediate[64]; | 243 int16_t intermediate[64]; |
| 238 | 244 |
| 239 // Transform columns | 245 // Transform columns |
| 240 { | 246 { |
| 241 int16_t *output = intermediate; | 247 int16_t *output = intermediate; |
| 242 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; | 248 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; |
| (...skipping 14 matching lines...) Expand all Loading... |
| 257 | 263 |
| 258 // fdct4(step, step); | 264 // fdct4(step, step); |
| 259 x0 = s0 + s3; | 265 x0 = s0 + s3; |
| 260 x1 = s1 + s2; | 266 x1 = s1 + s2; |
| 261 x2 = s1 - s2; | 267 x2 = s1 - s2; |
| 262 x3 = s0 - s3; | 268 x3 = s0 - s3; |
| 263 t0 = (x0 + x1) * cospi_16_64; | 269 t0 = (x0 + x1) * cospi_16_64; |
| 264 t1 = (x0 - x1) * cospi_16_64; | 270 t1 = (x0 - x1) * cospi_16_64; |
| 265 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 271 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 266 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 272 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 267 output[0 * 8] = dct_const_round_shift(t0); | 273 output[0 * 8] = fdct_round_shift(t0); |
| 268 output[2 * 8] = dct_const_round_shift(t2); | 274 output[2 * 8] = fdct_round_shift(t2); |
| 269 output[4 * 8] = dct_const_round_shift(t1); | 275 output[4 * 8] = fdct_round_shift(t1); |
| 270 output[6 * 8] = dct_const_round_shift(t3); | 276 output[6 * 8] = fdct_round_shift(t3); |
| 271 | 277 |
| 272 // Stage 2 | 278 // Stage 2 |
| 273 t0 = (s6 - s5) * cospi_16_64; | 279 t0 = (s6 - s5) * cospi_16_64; |
| 274 t1 = (s6 + s5) * cospi_16_64; | 280 t1 = (s6 + s5) * cospi_16_64; |
| 275 t2 = dct_const_round_shift(t0); | 281 t2 = fdct_round_shift(t0); |
| 276 t3 = dct_const_round_shift(t1); | 282 t3 = fdct_round_shift(t1); |
| 277 | 283 |
| 278 // Stage 3 | 284 // Stage 3 |
| 279 x0 = s4 + t2; | 285 x0 = s4 + t2; |
| 280 x1 = s4 - t2; | 286 x1 = s4 - t2; |
| 281 x2 = s7 - t3; | 287 x2 = s7 - t3; |
| 282 x3 = s7 + t3; | 288 x3 = s7 + t3; |
| 283 | 289 |
| 284 // Stage 4 | 290 // Stage 4 |
| 285 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 291 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 286 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 292 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 287 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 293 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 288 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 294 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 289 output[1 * 8] = dct_const_round_shift(t0); | 295 output[1 * 8] = fdct_round_shift(t0); |
| 290 output[3 * 8] = dct_const_round_shift(t2); | 296 output[3 * 8] = fdct_round_shift(t2); |
| 291 output[5 * 8] = dct_const_round_shift(t1); | 297 output[5 * 8] = fdct_round_shift(t1); |
| 292 output[7 * 8] = dct_const_round_shift(t3); | 298 output[7 * 8] = fdct_round_shift(t3); |
| 293 input++; | 299 input++; |
| 294 output++; | 300 output++; |
| 295 } | 301 } |
| 296 } | 302 } |
| 297 | 303 |
| 298 // Rows | 304 // Rows |
| 299 for (i = 0; i < 8; ++i) { | 305 for (i = 0; i < 8; ++i) { |
| 300 fdct8(&intermediate[i * 8], &final_output[i * 8]); | 306 fdct8(&intermediate[i * 8], &final_output[i * 8]); |
| 301 for (j = 0; j < 8; ++j) | 307 for (j = 0; j < 8; ++j) |
| 302 final_output[j + i * 8] /= 2; | 308 final_output[j + i * 8] /= 2; |
| (...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 381 | 387 |
| 382 // fdct4(step, step); | 388 // fdct4(step, step); |
| 383 x0 = s0 + s3; | 389 x0 = s0 + s3; |
| 384 x1 = s1 + s2; | 390 x1 = s1 + s2; |
| 385 x2 = s1 - s2; | 391 x2 = s1 - s2; |
| 386 x3 = s0 - s3; | 392 x3 = s0 - s3; |
| 387 t0 = (x0 + x1) * cospi_16_64; | 393 t0 = (x0 + x1) * cospi_16_64; |
| 388 t1 = (x0 - x1) * cospi_16_64; | 394 t1 = (x0 - x1) * cospi_16_64; |
| 389 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 395 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
| 390 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 396 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
| 391 out[0] = dct_const_round_shift(t0); | 397 out[0] = fdct_round_shift(t0); |
| 392 out[4] = dct_const_round_shift(t2); | 398 out[4] = fdct_round_shift(t2); |
| 393 out[8] = dct_const_round_shift(t1); | 399 out[8] = fdct_round_shift(t1); |
| 394 out[12] = dct_const_round_shift(t3); | 400 out[12] = fdct_round_shift(t3); |
| 395 | 401 |
| 396 // Stage 2 | 402 // Stage 2 |
| 397 t0 = (s6 - s5) * cospi_16_64; | 403 t0 = (s6 - s5) * cospi_16_64; |
| 398 t1 = (s6 + s5) * cospi_16_64; | 404 t1 = (s6 + s5) * cospi_16_64; |
| 399 t2 = dct_const_round_shift(t0); | 405 t2 = fdct_round_shift(t0); |
| 400 t3 = dct_const_round_shift(t1); | 406 t3 = fdct_round_shift(t1); |
| 401 | 407 |
| 402 // Stage 3 | 408 // Stage 3 |
| 403 x0 = s4 + t2; | 409 x0 = s4 + t2; |
| 404 x1 = s4 - t2; | 410 x1 = s4 - t2; |
| 405 x2 = s7 - t3; | 411 x2 = s7 - t3; |
| 406 x3 = s7 + t3; | 412 x3 = s7 + t3; |
| 407 | 413 |
| 408 // Stage 4 | 414 // Stage 4 |
| 409 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 415 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 410 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 416 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 411 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 417 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 412 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 418 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 413 out[2] = dct_const_round_shift(t0); | 419 out[2] = fdct_round_shift(t0); |
| 414 out[6] = dct_const_round_shift(t2); | 420 out[6] = fdct_round_shift(t2); |
| 415 out[10] = dct_const_round_shift(t1); | 421 out[10] = fdct_round_shift(t1); |
| 416 out[14] = dct_const_round_shift(t3); | 422 out[14] = fdct_round_shift(t3); |
| 417 } | 423 } |
| 418 // Work on the next eight values; step1 -> odd_results | 424 // Work on the next eight values; step1 -> odd_results |
| 419 { | 425 { |
| 420 // step 2 | 426 // step 2 |
| 421 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 427 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
| 422 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 428 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
| 423 step2[2] = dct_const_round_shift(temp1); | 429 step2[2] = fdct_round_shift(temp1); |
| 424 step2[3] = dct_const_round_shift(temp2); | 430 step2[3] = fdct_round_shift(temp2); |
| 425 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 431 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
| 426 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 432 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
| 427 step2[4] = dct_const_round_shift(temp1); | 433 step2[4] = fdct_round_shift(temp1); |
| 428 step2[5] = dct_const_round_shift(temp2); | 434 step2[5] = fdct_round_shift(temp2); |
| 429 // step 3 | 435 // step 3 |
| 430 step3[0] = step1[0] + step2[3]; | 436 step3[0] = step1[0] + step2[3]; |
| 431 step3[1] = step1[1] + step2[2]; | 437 step3[1] = step1[1] + step2[2]; |
| 432 step3[2] = step1[1] - step2[2]; | 438 step3[2] = step1[1] - step2[2]; |
| 433 step3[3] = step1[0] - step2[3]; | 439 step3[3] = step1[0] - step2[3]; |
| 434 step3[4] = step1[7] - step2[4]; | 440 step3[4] = step1[7] - step2[4]; |
| 435 step3[5] = step1[6] - step2[5]; | 441 step3[5] = step1[6] - step2[5]; |
| 436 step3[6] = step1[6] + step2[5]; | 442 step3[6] = step1[6] + step2[5]; |
| 437 step3[7] = step1[7] + step2[4]; | 443 step3[7] = step1[7] + step2[4]; |
| 438 // step 4 | 444 // step 4 |
| 439 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 445 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
| 440 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 446 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; |
| 441 step2[1] = dct_const_round_shift(temp1); | 447 step2[1] = fdct_round_shift(temp1); |
| 442 step2[2] = dct_const_round_shift(temp2); | 448 step2[2] = fdct_round_shift(temp2); |
| 443 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 449 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; |
| 444 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 450 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
| 445 step2[5] = dct_const_round_shift(temp1); | 451 step2[5] = fdct_round_shift(temp1); |
| 446 step2[6] = dct_const_round_shift(temp2); | 452 step2[6] = fdct_round_shift(temp2); |
| 447 // step 5 | 453 // step 5 |
| 448 step1[0] = step3[0] + step2[1]; | 454 step1[0] = step3[0] + step2[1]; |
| 449 step1[1] = step3[0] - step2[1]; | 455 step1[1] = step3[0] - step2[1]; |
| 450 step1[2] = step3[3] - step2[2]; | 456 step1[2] = step3[3] - step2[2]; |
| 451 step1[3] = step3[3] + step2[2]; | 457 step1[3] = step3[3] + step2[2]; |
| 452 step1[4] = step3[4] + step2[5]; | 458 step1[4] = step3[4] + step2[5]; |
| 453 step1[5] = step3[4] - step2[5]; | 459 step1[5] = step3[4] - step2[5]; |
| 454 step1[6] = step3[7] - step2[6]; | 460 step1[6] = step3[7] - step2[6]; |
| 455 step1[7] = step3[7] + step2[6]; | 461 step1[7] = step3[7] + step2[6]; |
| 456 // step 6 | 462 // step 6 |
| 457 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 463 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
| 458 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 464 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
| 459 out[1] = dct_const_round_shift(temp1); | 465 out[1] = fdct_round_shift(temp1); |
| 460 out[9] = dct_const_round_shift(temp2); | 466 out[9] = fdct_round_shift(temp2); |
| 461 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 467 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
| 462 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 468 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
| 463 out[5] = dct_const_round_shift(temp1); | 469 out[5] = fdct_round_shift(temp1); |
| 464 out[13] = dct_const_round_shift(temp2); | 470 out[13] = fdct_round_shift(temp2); |
| 465 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 471 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
| 466 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 472 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
| 467 out[3] = dct_const_round_shift(temp1); | 473 out[3] = fdct_round_shift(temp1); |
| 468 out[11] = dct_const_round_shift(temp2); | 474 out[11] = fdct_round_shift(temp2); |
| 469 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 475 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
| 470 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 476 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
| 471 out[7] = dct_const_round_shift(temp1); | 477 out[7] = fdct_round_shift(temp1); |
| 472 out[15] = dct_const_round_shift(temp2); | 478 out[15] = fdct_round_shift(temp2); |
| 473 } | 479 } |
| 474 // Do next column (which is a transposed row in second/horizontal pass) | 480 // Do next column (which is a transposed row in second/horizontal pass) |
| 475 in++; | 481 in++; |
| 476 out += 16; | 482 out += 16; |
| 477 } | 483 } |
| 478 // Setup in/out for next pass. | 484 // Setup in/out for next pass. |
| 479 in = intermediate; | 485 in = intermediate; |
| 480 out = output; | 486 out = output; |
| 481 } | 487 } |
| 482 } | 488 } |
| (...skipping 13 matching lines...) Expand all Loading... |
| 496 // stage 1 | 502 // stage 1 |
| 497 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 503 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
| 498 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 504 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
| 499 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 505 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
| 500 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 506 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
| 501 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 507 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
| 502 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 508 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
| 503 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 509 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
| 504 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 510 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
| 505 | 511 |
| 506 x0 = dct_const_round_shift(s0 + s4); | 512 x0 = fdct_round_shift(s0 + s4); |
| 507 x1 = dct_const_round_shift(s1 + s5); | 513 x1 = fdct_round_shift(s1 + s5); |
| 508 x2 = dct_const_round_shift(s2 + s6); | 514 x2 = fdct_round_shift(s2 + s6); |
| 509 x3 = dct_const_round_shift(s3 + s7); | 515 x3 = fdct_round_shift(s3 + s7); |
| 510 x4 = dct_const_round_shift(s0 - s4); | 516 x4 = fdct_round_shift(s0 - s4); |
| 511 x5 = dct_const_round_shift(s1 - s5); | 517 x5 = fdct_round_shift(s1 - s5); |
| 512 x6 = dct_const_round_shift(s2 - s6); | 518 x6 = fdct_round_shift(s2 - s6); |
| 513 x7 = dct_const_round_shift(s3 - s7); | 519 x7 = fdct_round_shift(s3 - s7); |
| 514 | 520 |
| 515 // stage 2 | 521 // stage 2 |
| 516 s0 = x0; | 522 s0 = x0; |
| 517 s1 = x1; | 523 s1 = x1; |
| 518 s2 = x2; | 524 s2 = x2; |
| 519 s3 = x3; | 525 s3 = x3; |
| 520 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 526 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
| 521 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 527 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
| 522 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; | 528 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; |
| 523 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 529 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
| 524 | 530 |
| 525 x0 = s0 + s2; | 531 x0 = s0 + s2; |
| 526 x1 = s1 + s3; | 532 x1 = s1 + s3; |
| 527 x2 = s0 - s2; | 533 x2 = s0 - s2; |
| 528 x3 = s1 - s3; | 534 x3 = s1 - s3; |
| 529 x4 = dct_const_round_shift(s4 + s6); | 535 x4 = fdct_round_shift(s4 + s6); |
| 530 x5 = dct_const_round_shift(s5 + s7); | 536 x5 = fdct_round_shift(s5 + s7); |
| 531 x6 = dct_const_round_shift(s4 - s6); | 537 x6 = fdct_round_shift(s4 - s6); |
| 532 x7 = dct_const_round_shift(s5 - s7); | 538 x7 = fdct_round_shift(s5 - s7); |
| 533 | 539 |
| 534 // stage 3 | 540 // stage 3 |
| 535 s2 = cospi_16_64 * (x2 + x3); | 541 s2 = cospi_16_64 * (x2 + x3); |
| 536 s3 = cospi_16_64 * (x2 - x3); | 542 s3 = cospi_16_64 * (x2 - x3); |
| 537 s6 = cospi_16_64 * (x6 + x7); | 543 s6 = cospi_16_64 * (x6 + x7); |
| 538 s7 = cospi_16_64 * (x6 - x7); | 544 s7 = cospi_16_64 * (x6 - x7); |
| 539 | 545 |
| 540 x2 = dct_const_round_shift(s2); | 546 x2 = fdct_round_shift(s2); |
| 541 x3 = dct_const_round_shift(s3); | 547 x3 = fdct_round_shift(s3); |
| 542 x6 = dct_const_round_shift(s6); | 548 x6 = fdct_round_shift(s6); |
| 543 x7 = dct_const_round_shift(s7); | 549 x7 = fdct_round_shift(s7); |
| 544 | 550 |
| 545 output[0] = x0; | 551 output[0] = x0; |
| 546 output[1] = - x4; | 552 output[1] = - x4; |
| 547 output[2] = x6; | 553 output[2] = x6; |
| 548 output[3] = - x2; | 554 output[3] = - x2; |
| 549 output[4] = x3; | 555 output[4] = x3; |
| 550 output[5] = - x7; | 556 output[5] = - x7; |
| 551 output[6] = x5; | 557 output[6] = x5; |
| 552 output[7] = - x1; | 558 output[7] = - x1; |
| 553 } | 559 } |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 686 | 692 |
| 687 // fdct4(step, step); | 693 // fdct4(step, step); |
| 688 x0 = s0 + s3; | 694 x0 = s0 + s3; |
| 689 x1 = s1 + s2; | 695 x1 = s1 + s2; |
| 690 x2 = s1 - s2; | 696 x2 = s1 - s2; |
| 691 x3 = s0 - s3; | 697 x3 = s0 - s3; |
| 692 t0 = (x0 + x1) * cospi_16_64; | 698 t0 = (x0 + x1) * cospi_16_64; |
| 693 t1 = (x0 - x1) * cospi_16_64; | 699 t1 = (x0 - x1) * cospi_16_64; |
| 694 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 700 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
| 695 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 701 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
| 696 out[0] = dct_const_round_shift(t0); | 702 out[0] = fdct_round_shift(t0); |
| 697 out[4] = dct_const_round_shift(t2); | 703 out[4] = fdct_round_shift(t2); |
| 698 out[8] = dct_const_round_shift(t1); | 704 out[8] = fdct_round_shift(t1); |
| 699 out[12] = dct_const_round_shift(t3); | 705 out[12] = fdct_round_shift(t3); |
| 700 | 706 |
| 701 // Stage 2 | 707 // Stage 2 |
| 702 t0 = (s6 - s5) * cospi_16_64; | 708 t0 = (s6 - s5) * cospi_16_64; |
| 703 t1 = (s6 + s5) * cospi_16_64; | 709 t1 = (s6 + s5) * cospi_16_64; |
| 704 t2 = dct_const_round_shift(t0); | 710 t2 = fdct_round_shift(t0); |
| 705 t3 = dct_const_round_shift(t1); | 711 t3 = fdct_round_shift(t1); |
| 706 | 712 |
| 707 // Stage 3 | 713 // Stage 3 |
| 708 x0 = s4 + t2; | 714 x0 = s4 + t2; |
| 709 x1 = s4 - t2; | 715 x1 = s4 - t2; |
| 710 x2 = s7 - t3; | 716 x2 = s7 - t3; |
| 711 x3 = s7 + t3; | 717 x3 = s7 + t3; |
| 712 | 718 |
| 713 // Stage 4 | 719 // Stage 4 |
| 714 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 720 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 715 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 721 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 716 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 722 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 717 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 723 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 718 out[2] = dct_const_round_shift(t0); | 724 out[2] = fdct_round_shift(t0); |
| 719 out[6] = dct_const_round_shift(t2); | 725 out[6] = fdct_round_shift(t2); |
| 720 out[10] = dct_const_round_shift(t1); | 726 out[10] = fdct_round_shift(t1); |
| 721 out[14] = dct_const_round_shift(t3); | 727 out[14] = fdct_round_shift(t3); |
| 722 } | 728 } |
| 723 | 729 |
| 724 // step 2 | 730 // step 2 |
| 725 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 731 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
| 726 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 732 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
| 727 step2[2] = dct_const_round_shift(temp1); | 733 step2[2] = fdct_round_shift(temp1); |
| 728 step2[3] = dct_const_round_shift(temp2); | 734 step2[3] = fdct_round_shift(temp2); |
| 729 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 735 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
| 730 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 736 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
| 731 step2[4] = dct_const_round_shift(temp1); | 737 step2[4] = fdct_round_shift(temp1); |
| 732 step2[5] = dct_const_round_shift(temp2); | 738 step2[5] = fdct_round_shift(temp2); |
| 733 | 739 |
| 734 // step 3 | 740 // step 3 |
| 735 step3[0] = step1[0] + step2[3]; | 741 step3[0] = step1[0] + step2[3]; |
| 736 step3[1] = step1[1] + step2[2]; | 742 step3[1] = step1[1] + step2[2]; |
| 737 step3[2] = step1[1] - step2[2]; | 743 step3[2] = step1[1] - step2[2]; |
| 738 step3[3] = step1[0] - step2[3]; | 744 step3[3] = step1[0] - step2[3]; |
| 739 step3[4] = step1[7] - step2[4]; | 745 step3[4] = step1[7] - step2[4]; |
| 740 step3[5] = step1[6] - step2[5]; | 746 step3[5] = step1[6] - step2[5]; |
| 741 step3[6] = step1[6] + step2[5]; | 747 step3[6] = step1[6] + step2[5]; |
| 742 step3[7] = step1[7] + step2[4]; | 748 step3[7] = step1[7] + step2[4]; |
| 743 | 749 |
| 744 // step 4 | 750 // step 4 |
| 745 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 751 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
| 746 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 752 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; |
| 747 step2[1] = dct_const_round_shift(temp1); | 753 step2[1] = fdct_round_shift(temp1); |
| 748 step2[2] = dct_const_round_shift(temp2); | 754 step2[2] = fdct_round_shift(temp2); |
| 749 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 755 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; |
| 750 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 756 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
| 751 step2[5] = dct_const_round_shift(temp1); | 757 step2[5] = fdct_round_shift(temp1); |
| 752 step2[6] = dct_const_round_shift(temp2); | 758 step2[6] = fdct_round_shift(temp2); |
| 753 | 759 |
| 754 // step 5 | 760 // step 5 |
| 755 step1[0] = step3[0] + step2[1]; | 761 step1[0] = step3[0] + step2[1]; |
| 756 step1[1] = step3[0] - step2[1]; | 762 step1[1] = step3[0] - step2[1]; |
| 757 step1[2] = step3[3] - step2[2]; | 763 step1[2] = step3[3] - step2[2]; |
| 758 step1[3] = step3[3] + step2[2]; | 764 step1[3] = step3[3] + step2[2]; |
| 759 step1[4] = step3[4] + step2[5]; | 765 step1[4] = step3[4] + step2[5]; |
| 760 step1[5] = step3[4] - step2[5]; | 766 step1[5] = step3[4] - step2[5]; |
| 761 step1[6] = step3[7] - step2[6]; | 767 step1[6] = step3[7] - step2[6]; |
| 762 step1[7] = step3[7] + step2[6]; | 768 step1[7] = step3[7] + step2[6]; |
| 763 | 769 |
| 764 // step 6 | 770 // step 6 |
| 765 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 771 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
| 766 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 772 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
| 767 out[1] = dct_const_round_shift(temp1); | 773 out[1] = fdct_round_shift(temp1); |
| 768 out[9] = dct_const_round_shift(temp2); | 774 out[9] = fdct_round_shift(temp2); |
| 769 | 775 |
| 770 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 776 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
| 771 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 777 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
| 772 out[5] = dct_const_round_shift(temp1); | 778 out[5] = fdct_round_shift(temp1); |
| 773 out[13] = dct_const_round_shift(temp2); | 779 out[13] = fdct_round_shift(temp2); |
| 774 | 780 |
| 775 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 781 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
| 776 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 782 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
| 777 out[3] = dct_const_round_shift(temp1); | 783 out[3] = fdct_round_shift(temp1); |
| 778 out[11] = dct_const_round_shift(temp2); | 784 out[11] = fdct_round_shift(temp2); |
| 779 | 785 |
| 780 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 786 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
| 781 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 787 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
| 782 out[7] = dct_const_round_shift(temp1); | 788 out[7] = fdct_round_shift(temp1); |
| 783 out[15] = dct_const_round_shift(temp2); | 789 out[15] = fdct_round_shift(temp2); |
| 784 } | 790 } |
| 785 | 791 |
| 786 static void fadst16(const int16_t *input, int16_t *output) { | 792 static void fadst16(const int16_t *input, int16_t *output) { |
| 787 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; | 793 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; |
| 788 | 794 |
| 789 int x0 = input[15]; | 795 int x0 = input[15]; |
| 790 int x1 = input[0]; | 796 int x1 = input[0]; |
| 791 int x2 = input[13]; | 797 int x2 = input[13]; |
| 792 int x3 = input[2]; | 798 int x3 = input[2]; |
| 793 int x4 = input[11]; | 799 int x4 = input[11]; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 814 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 820 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
| 815 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 821 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
| 816 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 822 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
| 817 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 823 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
| 818 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 824 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
| 819 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 825 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
| 820 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 826 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
| 821 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 827 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
| 822 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 828 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
| 823 | 829 |
| 824 x0 = dct_const_round_shift(s0 + s8); | 830 x0 = fdct_round_shift(s0 + s8); |
| 825 x1 = dct_const_round_shift(s1 + s9); | 831 x1 = fdct_round_shift(s1 + s9); |
| 826 x2 = dct_const_round_shift(s2 + s10); | 832 x2 = fdct_round_shift(s2 + s10); |
| 827 x3 = dct_const_round_shift(s3 + s11); | 833 x3 = fdct_round_shift(s3 + s11); |
| 828 x4 = dct_const_round_shift(s4 + s12); | 834 x4 = fdct_round_shift(s4 + s12); |
| 829 x5 = dct_const_round_shift(s5 + s13); | 835 x5 = fdct_round_shift(s5 + s13); |
| 830 x6 = dct_const_round_shift(s6 + s14); | 836 x6 = fdct_round_shift(s6 + s14); |
| 831 x7 = dct_const_round_shift(s7 + s15); | 837 x7 = fdct_round_shift(s7 + s15); |
| 832 x8 = dct_const_round_shift(s0 - s8); | 838 x8 = fdct_round_shift(s0 - s8); |
| 833 x9 = dct_const_round_shift(s1 - s9); | 839 x9 = fdct_round_shift(s1 - s9); |
| 834 x10 = dct_const_round_shift(s2 - s10); | 840 x10 = fdct_round_shift(s2 - s10); |
| 835 x11 = dct_const_round_shift(s3 - s11); | 841 x11 = fdct_round_shift(s3 - s11); |
| 836 x12 = dct_const_round_shift(s4 - s12); | 842 x12 = fdct_round_shift(s4 - s12); |
| 837 x13 = dct_const_round_shift(s5 - s13); | 843 x13 = fdct_round_shift(s5 - s13); |
| 838 x14 = dct_const_round_shift(s6 - s14); | 844 x14 = fdct_round_shift(s6 - s14); |
| 839 x15 = dct_const_round_shift(s7 - s15); | 845 x15 = fdct_round_shift(s7 - s15); |
| 840 | 846 |
| 841 // stage 2 | 847 // stage 2 |
| 842 s0 = x0; | 848 s0 = x0; |
| 843 s1 = x1; | 849 s1 = x1; |
| 844 s2 = x2; | 850 s2 = x2; |
| 845 s3 = x3; | 851 s3 = x3; |
| 846 s4 = x4; | 852 s4 = x4; |
| 847 s5 = x5; | 853 s5 = x5; |
| 848 s6 = x6; | 854 s6 = x6; |
| 849 s7 = x7; | 855 s7 = x7; |
| 850 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 856 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
| 851 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 857 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
| 852 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 858 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
| 853 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 859 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
| 854 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; | 860 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; |
| 855 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 861 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
| 856 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; | 862 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; |
| 857 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 863 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
| 858 | 864 |
| 859 x0 = s0 + s4; | 865 x0 = s0 + s4; |
| 860 x1 = s1 + s5; | 866 x1 = s1 + s5; |
| 861 x2 = s2 + s6; | 867 x2 = s2 + s6; |
| 862 x3 = s3 + s7; | 868 x3 = s3 + s7; |
| 863 x4 = s0 - s4; | 869 x4 = s0 - s4; |
| 864 x5 = s1 - s5; | 870 x5 = s1 - s5; |
| 865 x6 = s2 - s6; | 871 x6 = s2 - s6; |
| 866 x7 = s3 - s7; | 872 x7 = s3 - s7; |
| 867 x8 = dct_const_round_shift(s8 + s12); | 873 x8 = fdct_round_shift(s8 + s12); |
| 868 x9 = dct_const_round_shift(s9 + s13); | 874 x9 = fdct_round_shift(s9 + s13); |
| 869 x10 = dct_const_round_shift(s10 + s14); | 875 x10 = fdct_round_shift(s10 + s14); |
| 870 x11 = dct_const_round_shift(s11 + s15); | 876 x11 = fdct_round_shift(s11 + s15); |
| 871 x12 = dct_const_round_shift(s8 - s12); | 877 x12 = fdct_round_shift(s8 - s12); |
| 872 x13 = dct_const_round_shift(s9 - s13); | 878 x13 = fdct_round_shift(s9 - s13); |
| 873 x14 = dct_const_round_shift(s10 - s14); | 879 x14 = fdct_round_shift(s10 - s14); |
| 874 x15 = dct_const_round_shift(s11 - s15); | 880 x15 = fdct_round_shift(s11 - s15); |
| 875 | 881 |
| 876 // stage 3 | 882 // stage 3 |
| 877 s0 = x0; | 883 s0 = x0; |
| 878 s1 = x1; | 884 s1 = x1; |
| 879 s2 = x2; | 885 s2 = x2; |
| 880 s3 = x3; | 886 s3 = x3; |
| 881 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 887 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
| 882 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 888 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
| 883 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; | 889 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; |
| 884 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 890 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
| 885 s8 = x8; | 891 s8 = x8; |
| 886 s9 = x9; | 892 s9 = x9; |
| 887 s10 = x10; | 893 s10 = x10; |
| 888 s11 = x11; | 894 s11 = x11; |
| 889 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 895 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
| 890 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 896 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
| 891 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; | 897 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; |
| 892 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 898 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
| 893 | 899 |
| 894 x0 = s0 + s2; | 900 x0 = s0 + s2; |
| 895 x1 = s1 + s3; | 901 x1 = s1 + s3; |
| 896 x2 = s0 - s2; | 902 x2 = s0 - s2; |
| 897 x3 = s1 - s3; | 903 x3 = s1 - s3; |
| 898 x4 = dct_const_round_shift(s4 + s6); | 904 x4 = fdct_round_shift(s4 + s6); |
| 899 x5 = dct_const_round_shift(s5 + s7); | 905 x5 = fdct_round_shift(s5 + s7); |
| 900 x6 = dct_const_round_shift(s4 - s6); | 906 x6 = fdct_round_shift(s4 - s6); |
| 901 x7 = dct_const_round_shift(s5 - s7); | 907 x7 = fdct_round_shift(s5 - s7); |
| 902 x8 = s8 + s10; | 908 x8 = s8 + s10; |
| 903 x9 = s9 + s11; | 909 x9 = s9 + s11; |
| 904 x10 = s8 - s10; | 910 x10 = s8 - s10; |
| 905 x11 = s9 - s11; | 911 x11 = s9 - s11; |
| 906 x12 = dct_const_round_shift(s12 + s14); | 912 x12 = fdct_round_shift(s12 + s14); |
| 907 x13 = dct_const_round_shift(s13 + s15); | 913 x13 = fdct_round_shift(s13 + s15); |
| 908 x14 = dct_const_round_shift(s12 - s14); | 914 x14 = fdct_round_shift(s12 - s14); |
| 909 x15 = dct_const_round_shift(s13 - s15); | 915 x15 = fdct_round_shift(s13 - s15); |
| 910 | 916 |
| 911 // stage 4 | 917 // stage 4 |
| 912 s2 = (- cospi_16_64) * (x2 + x3); | 918 s2 = (- cospi_16_64) * (x2 + x3); |
| 913 s3 = cospi_16_64 * (x2 - x3); | 919 s3 = cospi_16_64 * (x2 - x3); |
| 914 s6 = cospi_16_64 * (x6 + x7); | 920 s6 = cospi_16_64 * (x6 + x7); |
| 915 s7 = cospi_16_64 * (- x6 + x7); | 921 s7 = cospi_16_64 * (- x6 + x7); |
| 916 s10 = cospi_16_64 * (x10 + x11); | 922 s10 = cospi_16_64 * (x10 + x11); |
| 917 s11 = cospi_16_64 * (- x10 + x11); | 923 s11 = cospi_16_64 * (- x10 + x11); |
| 918 s14 = (- cospi_16_64) * (x14 + x15); | 924 s14 = (- cospi_16_64) * (x14 + x15); |
| 919 s15 = cospi_16_64 * (x14 - x15); | 925 s15 = cospi_16_64 * (x14 - x15); |
| 920 | 926 |
| 921 x2 = dct_const_round_shift(s2); | 927 x2 = fdct_round_shift(s2); |
| 922 x3 = dct_const_round_shift(s3); | 928 x3 = fdct_round_shift(s3); |
| 923 x6 = dct_const_round_shift(s6); | 929 x6 = fdct_round_shift(s6); |
| 924 x7 = dct_const_round_shift(s7); | 930 x7 = fdct_round_shift(s7); |
| 925 x10 = dct_const_round_shift(s10); | 931 x10 = fdct_round_shift(s10); |
| 926 x11 = dct_const_round_shift(s11); | 932 x11 = fdct_round_shift(s11); |
| 927 x14 = dct_const_round_shift(s14); | 933 x14 = fdct_round_shift(s14); |
| 928 x15 = dct_const_round_shift(s15); | 934 x15 = fdct_round_shift(s15); |
| 929 | 935 |
| 930 output[0] = x0; | 936 output[0] = x0; |
| 931 output[1] = - x8; | 937 output[1] = - x8; |
| 932 output[2] = x12; | 938 output[2] = x12; |
| 933 output[3] = - x4; | 939 output[3] = - x4; |
| 934 output[4] = x6; | 940 output[4] = x6; |
| 935 output[5] = x14; | 941 output[5] = x14; |
| 936 output[6] = x10; | 942 output[6] = x10; |
| 937 output[7] = x2; | 943 output[7] = x2; |
| 938 output[8] = x3; | 944 output[8] = x3; |
| (...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1386 vp9_short_fht8x8(input, output, stride, tx_type); | 1392 vp9_short_fht8x8(input, output, stride, tx_type); |
| 1387 } | 1393 } |
| 1388 | 1394 |
| 1389 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, | 1395 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, |
| 1390 int stride) { | 1396 int stride) { |
| 1391 if (tx_type == DCT_DCT) | 1397 if (tx_type == DCT_DCT) |
| 1392 vp9_fdct16x16(input, output, stride); | 1398 vp9_fdct16x16(input, output, stride); |
| 1393 else | 1399 else |
| 1394 vp9_short_fht16x16(input, output, stride, tx_type); | 1400 vp9_short_fht16x16(input, output, stride, tx_type); |
| 1395 } | 1401 } |
| OLD | NEW |