| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 tran_high_t step[4]; | 30 tran_high_t step[4]; |
| 31 tran_high_t temp1, temp2; | 31 tran_high_t temp1, temp2; |
| 32 | 32 |
| 33 step[0] = input[0] + input[3]; | 33 step[0] = input[0] + input[3]; |
| 34 step[1] = input[1] + input[2]; | 34 step[1] = input[1] + input[2]; |
| 35 step[2] = input[1] - input[2]; | 35 step[2] = input[1] - input[2]; |
| 36 step[3] = input[0] - input[3]; | 36 step[3] = input[0] - input[3]; |
| 37 | 37 |
| 38 temp1 = (step[0] + step[1]) * cospi_16_64; | 38 temp1 = (step[0] + step[1]) * cospi_16_64; |
| 39 temp2 = (step[0] - step[1]) * cospi_16_64; | 39 temp2 = (step[0] - step[1]) * cospi_16_64; |
| 40 output[0] = fdct_round_shift(temp1); | 40 output[0] = (tran_low_t)fdct_round_shift(temp1); |
| 41 output[2] = fdct_round_shift(temp2); | 41 output[2] = (tran_low_t)fdct_round_shift(temp2); |
| 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
| 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
| 44 output[1] = fdct_round_shift(temp1); | 44 output[1] = (tran_low_t)fdct_round_shift(temp1); |
| 45 output[3] = fdct_round_shift(temp2); | 45 output[3] = (tran_low_t)fdct_round_shift(temp2); |
| 46 } | 46 } |
| 47 | 47 |
| 48 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { | 48 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { |
| 49 int r, c; | 49 int r, c; |
| 50 tran_low_t sum = 0; | 50 tran_low_t sum = 0; |
| 51 for (r = 0; r < 4; ++r) | 51 for (r = 0; r < 4; ++r) |
| 52 for (c = 0; c < 4; ++c) | 52 for (c = 0; c < 4; ++c) |
| 53 sum += input[r * stride + c]; | 53 sum += input[r * stride + c]; |
| 54 | 54 |
| 55 output[0] = sum << 1; | 55 output[0] = sum << 1; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 91 input[2] = in[2 * 4]; | 91 input[2] = in[2 * 4]; |
| 92 input[3] = in[3 * 4]; | 92 input[3] = in[3 * 4]; |
| 93 } | 93 } |
| 94 // Transform. | 94 // Transform. |
| 95 step[0] = input[0] + input[3]; | 95 step[0] = input[0] + input[3]; |
| 96 step[1] = input[1] + input[2]; | 96 step[1] = input[1] + input[2]; |
| 97 step[2] = input[1] - input[2]; | 97 step[2] = input[1] - input[2]; |
| 98 step[3] = input[0] - input[3]; | 98 step[3] = input[0] - input[3]; |
| 99 temp1 = (step[0] + step[1]) * cospi_16_64; | 99 temp1 = (step[0] + step[1]) * cospi_16_64; |
| 100 temp2 = (step[0] - step[1]) * cospi_16_64; | 100 temp2 = (step[0] - step[1]) * cospi_16_64; |
| 101 out[0] = fdct_round_shift(temp1); | 101 out[0] = (tran_low_t)fdct_round_shift(temp1); |
| 102 out[2] = fdct_round_shift(temp2); | 102 out[2] = (tran_low_t)fdct_round_shift(temp2); |
| 103 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 103 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
| 104 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 104 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
| 105 out[1] = fdct_round_shift(temp1); | 105 out[1] = (tran_low_t)fdct_round_shift(temp1); |
| 106 out[3] = fdct_round_shift(temp2); | 106 out[3] = (tran_low_t)fdct_round_shift(temp2); |
| 107 // Do next column (which is a transposed row in second/horizontal pass) | 107 // Do next column (which is a transposed row in second/horizontal pass) |
| 108 in_pass0++; | 108 in_pass0++; |
| 109 in++; | 109 in++; |
| 110 out += 4; | 110 out += 4; |
| 111 } | 111 } |
| 112 // Setup in/out for next pass. | 112 // Setup in/out for next pass. |
| 113 in = intermediate; | 113 in = intermediate; |
| 114 out = output; | 114 out = output; |
| 115 } | 115 } |
| 116 | 116 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 150 x1 = sinpi_3_9 * s7; | 150 x1 = sinpi_3_9 * s7; |
| 151 x2 = s1 - s3 + s6; | 151 x2 = s1 - s3 + s6; |
| 152 x3 = s4; | 152 x3 = s4; |
| 153 | 153 |
| 154 s0 = x0 + x3; | 154 s0 = x0 + x3; |
| 155 s1 = x1; | 155 s1 = x1; |
| 156 s2 = x2 - x3; | 156 s2 = x2 - x3; |
| 157 s3 = x2 - x0 + x3; | 157 s3 = x2 - x0 + x3; |
| 158 | 158 |
| 159 // 1-D transform scaling factor is sqrt(2). | 159 // 1-D transform scaling factor is sqrt(2). |
| 160 output[0] = fdct_round_shift(s0); | 160 output[0] = (tran_low_t)fdct_round_shift(s0); |
| 161 output[1] = fdct_round_shift(s1); | 161 output[1] = (tran_low_t)fdct_round_shift(s1); |
| 162 output[2] = fdct_round_shift(s2); | 162 output[2] = (tran_low_t)fdct_round_shift(s2); |
| 163 output[3] = fdct_round_shift(s3); | 163 output[3] = (tran_low_t)fdct_round_shift(s3); |
| 164 } | 164 } |
| 165 | 165 |
| 166 static const transform_2d FHT_4[] = { | 166 static const transform_2d FHT_4[] = { |
| 167 { fdct4, fdct4 }, // DCT_DCT = 0 | 167 { fdct4, fdct4 }, // DCT_DCT = 0 |
| 168 { fadst4, fdct4 }, // ADST_DCT = 1 | 168 { fadst4, fdct4 }, // ADST_DCT = 1 |
| 169 { fdct4, fadst4 }, // DCT_ADST = 2 | 169 { fdct4, fadst4 }, // DCT_ADST = 2 |
| 170 { fadst4, fadst4 } // ADST_ADST = 3 | 170 { fadst4, fadst4 } // ADST_ADST = 3 |
| 171 }; | 171 }; |
| 172 | 172 |
| 173 void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, | 173 void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 220 | 220 |
| 221 // fdct4(step, step); | 221 // fdct4(step, step); |
| 222 x0 = s0 + s3; | 222 x0 = s0 + s3; |
| 223 x1 = s1 + s2; | 223 x1 = s1 + s2; |
| 224 x2 = s1 - s2; | 224 x2 = s1 - s2; |
| 225 x3 = s0 - s3; | 225 x3 = s0 - s3; |
| 226 t0 = (x0 + x1) * cospi_16_64; | 226 t0 = (x0 + x1) * cospi_16_64; |
| 227 t1 = (x0 - x1) * cospi_16_64; | 227 t1 = (x0 - x1) * cospi_16_64; |
| 228 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 228 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 229 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 229 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 230 output[0] = fdct_round_shift(t0); | 230 output[0] = (tran_low_t)fdct_round_shift(t0); |
| 231 output[2] = fdct_round_shift(t2); | 231 output[2] = (tran_low_t)fdct_round_shift(t2); |
| 232 output[4] = fdct_round_shift(t1); | 232 output[4] = (tran_low_t)fdct_round_shift(t1); |
| 233 output[6] = fdct_round_shift(t3); | 233 output[6] = (tran_low_t)fdct_round_shift(t3); |
| 234 | 234 |
| 235 // Stage 2 | 235 // Stage 2 |
| 236 t0 = (s6 - s5) * cospi_16_64; | 236 t0 = (s6 - s5) * cospi_16_64; |
| 237 t1 = (s6 + s5) * cospi_16_64; | 237 t1 = (s6 + s5) * cospi_16_64; |
| 238 t2 = fdct_round_shift(t0); | 238 t2 = (tran_low_t)fdct_round_shift(t0); |
| 239 t3 = fdct_round_shift(t1); | 239 t3 = (tran_low_t)fdct_round_shift(t1); |
| 240 | 240 |
| 241 // Stage 3 | 241 // Stage 3 |
| 242 x0 = s4 + t2; | 242 x0 = s4 + t2; |
| 243 x1 = s4 - t2; | 243 x1 = s4 - t2; |
| 244 x2 = s7 - t3; | 244 x2 = s7 - t3; |
| 245 x3 = s7 + t3; | 245 x3 = s7 + t3; |
| 246 | 246 |
| 247 // Stage 4 | 247 // Stage 4 |
| 248 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 248 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 249 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 249 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 250 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 250 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 251 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 251 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 252 output[1] = fdct_round_shift(t0); | 252 output[1] = (tran_low_t)fdct_round_shift(t0); |
| 253 output[3] = fdct_round_shift(t2); | 253 output[3] = (tran_low_t)fdct_round_shift(t2); |
| 254 output[5] = fdct_round_shift(t1); | 254 output[5] = (tran_low_t)fdct_round_shift(t1); |
| 255 output[7] = fdct_round_shift(t3); | 255 output[7] = (tran_low_t)fdct_round_shift(t3); |
| 256 } | 256 } |
| 257 | 257 |
| 258 void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { | 258 void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { |
| 259 int r, c; | 259 int r, c; |
| 260 tran_low_t sum = 0; | 260 tran_low_t sum = 0; |
| 261 for (r = 0; r < 8; ++r) | 261 for (r = 0; r < 8; ++r) |
| 262 for (c = 0; c < 8; ++c) | 262 for (c = 0; c < 8; ++c) |
| 263 sum += input[r * stride + c]; | 263 sum += input[r * stride + c]; |
| 264 | 264 |
| 265 output[0] = sum; | 265 output[0] = sum; |
| (...skipping 25 matching lines...) Expand all Loading... |
| 291 | 291 |
| 292 // fdct4(step, step); | 292 // fdct4(step, step); |
| 293 x0 = s0 + s3; | 293 x0 = s0 + s3; |
| 294 x1 = s1 + s2; | 294 x1 = s1 + s2; |
| 295 x2 = s1 - s2; | 295 x2 = s1 - s2; |
| 296 x3 = s0 - s3; | 296 x3 = s0 - s3; |
| 297 t0 = (x0 + x1) * cospi_16_64; | 297 t0 = (x0 + x1) * cospi_16_64; |
| 298 t1 = (x0 - x1) * cospi_16_64; | 298 t1 = (x0 - x1) * cospi_16_64; |
| 299 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 299 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 300 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 300 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 301 output[0 * 8] = fdct_round_shift(t0); | 301 output[0 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 302 output[2 * 8] = fdct_round_shift(t2); | 302 output[2 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 303 output[4 * 8] = fdct_round_shift(t1); | 303 output[4 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 304 output[6 * 8] = fdct_round_shift(t3); | 304 output[6 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 305 | 305 |
| 306 // Stage 2 | 306 // Stage 2 |
| 307 t0 = (s6 - s5) * cospi_16_64; | 307 t0 = (s6 - s5) * cospi_16_64; |
| 308 t1 = (s6 + s5) * cospi_16_64; | 308 t1 = (s6 + s5) * cospi_16_64; |
| 309 t2 = fdct_round_shift(t0); | 309 t2 = fdct_round_shift(t0); |
| 310 t3 = fdct_round_shift(t1); | 310 t3 = fdct_round_shift(t1); |
| 311 | 311 |
| 312 // Stage 3 | 312 // Stage 3 |
| 313 x0 = s4 + t2; | 313 x0 = s4 + t2; |
| 314 x1 = s4 - t2; | 314 x1 = s4 - t2; |
| 315 x2 = s7 - t3; | 315 x2 = s7 - t3; |
| 316 x3 = s7 + t3; | 316 x3 = s7 + t3; |
| 317 | 317 |
| 318 // Stage 4 | 318 // Stage 4 |
| 319 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 319 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 320 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 320 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 321 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 321 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 322 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 322 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 323 output[1 * 8] = fdct_round_shift(t0); | 323 output[1 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 324 output[3 * 8] = fdct_round_shift(t2); | 324 output[3 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 325 output[5 * 8] = fdct_round_shift(t1); | 325 output[5 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 326 output[7 * 8] = fdct_round_shift(t3); | 326 output[7 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 327 input++; | 327 input++; |
| 328 output++; | 328 output++; |
| 329 } | 329 } |
| 330 } | 330 } |
| 331 | 331 |
| 332 // Rows | 332 // Rows |
| 333 for (i = 0; i < 8; ++i) { | 333 for (i = 0; i < 8; ++i) { |
| 334 fdct8(&intermediate[i * 8], &final_output[i * 8]); | 334 fdct8(&intermediate[i * 8], &final_output[i * 8]); |
| 335 for (j = 0; j < 8; ++j) | 335 for (j = 0; j < 8; ++j) |
| 336 final_output[j + i * 8] /= 2; | 336 final_output[j + i * 8] /= 2; |
| 337 } | 337 } |
| 338 } | 338 } |
| 339 | 339 |
| 340 void vp9_fdct8x8_quant_c(const int16_t *input, int stride, |
| 341 tran_low_t *coeff_ptr, intptr_t n_coeffs, |
| 342 int skip_block, |
| 343 const int16_t *zbin_ptr, const int16_t *round_ptr, |
| 344 const int16_t *quant_ptr, |
| 345 const int16_t *quant_shift_ptr, |
| 346 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
| 347 const int16_t *dequant_ptr, |
| 348 int zbin_oq_value, uint16_t *eob_ptr, |
| 349 const int16_t *scan, const int16_t *iscan) { |
| 350 int eob = -1; |
| 351 |
| 352 int i, j; |
| 353 tran_low_t intermediate[64]; |
| 354 |
| 355 // Transform columns |
| 356 { |
| 357 tran_low_t *output = intermediate; |
| 358 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 |
| 359 tran_high_t t0, t1, t2, t3; // needs32 |
| 360 tran_high_t x0, x1, x2, x3; // canbe16 |
| 361 |
| 362 int i; |
| 363 for (i = 0; i < 8; i++) { |
| 364 // stage 1 |
| 365 s0 = (input[0 * stride] + input[7 * stride]) * 4; |
| 366 s1 = (input[1 * stride] + input[6 * stride]) * 4; |
| 367 s2 = (input[2 * stride] + input[5 * stride]) * 4; |
| 368 s3 = (input[3 * stride] + input[4 * stride]) * 4; |
| 369 s4 = (input[3 * stride] - input[4 * stride]) * 4; |
| 370 s5 = (input[2 * stride] - input[5 * stride]) * 4; |
| 371 s6 = (input[1 * stride] - input[6 * stride]) * 4; |
| 372 s7 = (input[0 * stride] - input[7 * stride]) * 4; |
| 373 |
| 374 // fdct4(step, step); |
| 375 x0 = s0 + s3; |
| 376 x1 = s1 + s2; |
| 377 x2 = s1 - s2; |
| 378 x3 = s0 - s3; |
| 379 t0 = (x0 + x1) * cospi_16_64; |
| 380 t1 = (x0 - x1) * cospi_16_64; |
| 381 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 382 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 383 output[0 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 384 output[2 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 385 output[4 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 386 output[6 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 387 |
| 388 // Stage 2 |
| 389 t0 = (s6 - s5) * cospi_16_64; |
| 390 t1 = (s6 + s5) * cospi_16_64; |
| 391 t2 = fdct_round_shift(t0); |
| 392 t3 = fdct_round_shift(t1); |
| 393 |
| 394 // Stage 3 |
| 395 x0 = s4 + t2; |
| 396 x1 = s4 - t2; |
| 397 x2 = s7 - t3; |
| 398 x3 = s7 + t3; |
| 399 |
| 400 // Stage 4 |
| 401 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 402 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 403 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 404 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 405 output[1 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 406 output[3 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 407 output[5 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 408 output[7 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 409 input++; |
| 410 output++; |
| 411 } |
| 412 } |
| 413 |
| 414 // Rows |
| 415 for (i = 0; i < 8; ++i) { |
| 416 fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); |
| 417 for (j = 0; j < 8; ++j) |
| 418 coeff_ptr[j + i * 8] /= 2; |
| 419 } |
| 420 |
| 421 // TODO(jingning) Decide the need of these arguments after the |
| 422 // quantization process is completed. |
| 423 (void)zbin_ptr; |
| 424 (void)quant_shift_ptr; |
| 425 (void)zbin_oq_value; |
| 426 (void)iscan; |
| 427 |
| 428 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
| 429 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
| 430 |
| 431 if (!skip_block) { |
| 432 // Quantization pass: All coefficients with index >= zero_flag are |
| 433 // skippable. Note: zero_flag can be zero. |
| 434 for (i = 0; i < n_coeffs; i++) { |
| 435 const int rc = scan[i]; |
| 436 const int coeff = coeff_ptr[rc]; |
| 437 const int coeff_sign = (coeff >> 31); |
| 438 const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
| 439 |
| 440 int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); |
| 441 tmp = (tmp * quant_ptr[rc != 0]) >> 16; |
| 442 |
| 443 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; |
| 444 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; |
| 445 |
| 446 if (tmp) |
| 447 eob = i; |
| 448 } |
| 449 } |
| 450 *eob_ptr = eob + 1; |
| 451 } |
| 452 |
| 340 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { | 453 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { |
| 341 int r, c; | 454 int r, c; |
| 342 tran_low_t sum = 0; | 455 tran_low_t sum = 0; |
| 343 for (r = 0; r < 16; ++r) | 456 for (r = 0; r < 16; ++r) |
| 344 for (c = 0; c < 16; ++c) | 457 for (c = 0; c < 16; ++c) |
| 345 sum += input[r * stride + c]; | 458 sum += input[r * stride + c]; |
| 346 | 459 |
| 347 output[0] = sum >> 1; | 460 output[0] = sum >> 1; |
| 348 output[1] = 0; | 461 output[1] = 0; |
| 349 } | 462 } |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 427 | 540 |
| 428 // fdct4(step, step); | 541 // fdct4(step, step); |
| 429 x0 = s0 + s3; | 542 x0 = s0 + s3; |
| 430 x1 = s1 + s2; | 543 x1 = s1 + s2; |
| 431 x2 = s1 - s2; | 544 x2 = s1 - s2; |
| 432 x3 = s0 - s3; | 545 x3 = s0 - s3; |
| 433 t0 = (x0 + x1) * cospi_16_64; | 546 t0 = (x0 + x1) * cospi_16_64; |
| 434 t1 = (x0 - x1) * cospi_16_64; | 547 t1 = (x0 - x1) * cospi_16_64; |
| 435 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 548 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
| 436 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 549 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
| 437 out[0] = fdct_round_shift(t0); | 550 out[0] = (tran_low_t)fdct_round_shift(t0); |
| 438 out[4] = fdct_round_shift(t2); | 551 out[4] = (tran_low_t)fdct_round_shift(t2); |
| 439 out[8] = fdct_round_shift(t1); | 552 out[8] = (tran_low_t)fdct_round_shift(t1); |
| 440 out[12] = fdct_round_shift(t3); | 553 out[12] = (tran_low_t)fdct_round_shift(t3); |
| 441 | 554 |
| 442 // Stage 2 | 555 // Stage 2 |
| 443 t0 = (s6 - s5) * cospi_16_64; | 556 t0 = (s6 - s5) * cospi_16_64; |
| 444 t1 = (s6 + s5) * cospi_16_64; | 557 t1 = (s6 + s5) * cospi_16_64; |
| 445 t2 = fdct_round_shift(t0); | 558 t2 = fdct_round_shift(t0); |
| 446 t3 = fdct_round_shift(t1); | 559 t3 = fdct_round_shift(t1); |
| 447 | 560 |
| 448 // Stage 3 | 561 // Stage 3 |
| 449 x0 = s4 + t2; | 562 x0 = s4 + t2; |
| 450 x1 = s4 - t2; | 563 x1 = s4 - t2; |
| 451 x2 = s7 - t3; | 564 x2 = s7 - t3; |
| 452 x3 = s7 + t3; | 565 x3 = s7 + t3; |
| 453 | 566 |
| 454 // Stage 4 | 567 // Stage 4 |
| 455 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 568 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 456 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 569 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 457 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 570 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 458 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 571 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 459 out[2] = fdct_round_shift(t0); | 572 out[2] = (tran_low_t)fdct_round_shift(t0); |
| 460 out[6] = fdct_round_shift(t2); | 573 out[6] = (tran_low_t)fdct_round_shift(t2); |
| 461 out[10] = fdct_round_shift(t1); | 574 out[10] = (tran_low_t)fdct_round_shift(t1); |
| 462 out[14] = fdct_round_shift(t3); | 575 out[14] = (tran_low_t)fdct_round_shift(t3); |
| 463 } | 576 } |
| 464 // Work on the next eight values; step1 -> odd_results | 577 // Work on the next eight values; step1 -> odd_results |
| 465 { | 578 { |
| 466 // step 2 | 579 // step 2 |
| 467 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 580 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
| 468 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 581 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
| 469 step2[2] = fdct_round_shift(temp1); | 582 step2[2] = fdct_round_shift(temp1); |
| 470 step2[3] = fdct_round_shift(temp2); | 583 step2[3] = fdct_round_shift(temp2); |
| 471 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 584 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
| 472 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 585 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
| (...skipping 22 matching lines...) Expand all Loading... |
| 495 step1[1] = step3[0] - step2[1]; | 608 step1[1] = step3[0] - step2[1]; |
| 496 step1[2] = step3[3] + step2[2]; | 609 step1[2] = step3[3] + step2[2]; |
| 497 step1[3] = step3[3] - step2[2]; | 610 step1[3] = step3[3] - step2[2]; |
| 498 step1[4] = step3[4] - step2[5]; | 611 step1[4] = step3[4] - step2[5]; |
| 499 step1[5] = step3[4] + step2[5]; | 612 step1[5] = step3[4] + step2[5]; |
| 500 step1[6] = step3[7] - step2[6]; | 613 step1[6] = step3[7] - step2[6]; |
| 501 step1[7] = step3[7] + step2[6]; | 614 step1[7] = step3[7] + step2[6]; |
| 502 // step 6 | 615 // step 6 |
| 503 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 616 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
| 504 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 617 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
| 505 out[1] = fdct_round_shift(temp1); | 618 out[1] = (tran_low_t)fdct_round_shift(temp1); |
| 506 out[9] = fdct_round_shift(temp2); | 619 out[9] = (tran_low_t)fdct_round_shift(temp2); |
| 507 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 620 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
| 508 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 621 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
| 509 out[5] = fdct_round_shift(temp1); | 622 out[5] = (tran_low_t)fdct_round_shift(temp1); |
| 510 out[13] = fdct_round_shift(temp2); | 623 out[13] = (tran_low_t)fdct_round_shift(temp2); |
| 511 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 624 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
| 512 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 625 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
| 513 out[3] = fdct_round_shift(temp1); | 626 out[3] = (tran_low_t)fdct_round_shift(temp1); |
| 514 out[11] = fdct_round_shift(temp2); | 627 out[11] = (tran_low_t)fdct_round_shift(temp2); |
| 515 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 628 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
| 516 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 629 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
| 517 out[7] = fdct_round_shift(temp1); | 630 out[7] = (tran_low_t)fdct_round_shift(temp1); |
| 518 out[15] = fdct_round_shift(temp2); | 631 out[15] = (tran_low_t)fdct_round_shift(temp2); |
| 519 } | 632 } |
| 520 // Do next column (which is a transposed row in second/horizontal pass) | 633 // Do next column (which is a transposed row in second/horizontal pass) |
| 521 in++; | 634 in++; |
| 522 in_pass0++; | 635 in_pass0++; |
| 523 out += 16; | 636 out += 16; |
| 524 } | 637 } |
| 525 // Setup in/out for next pass. | 638 // Setup in/out for next pass. |
| 526 in = intermediate; | 639 in = intermediate; |
| 527 out = output; | 640 out = output; |
| 528 } | 641 } |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 582 s2 = cospi_16_64 * (x2 + x3); | 695 s2 = cospi_16_64 * (x2 + x3); |
| 583 s3 = cospi_16_64 * (x2 - x3); | 696 s3 = cospi_16_64 * (x2 - x3); |
| 584 s6 = cospi_16_64 * (x6 + x7); | 697 s6 = cospi_16_64 * (x6 + x7); |
| 585 s7 = cospi_16_64 * (x6 - x7); | 698 s7 = cospi_16_64 * (x6 - x7); |
| 586 | 699 |
| 587 x2 = fdct_round_shift(s2); | 700 x2 = fdct_round_shift(s2); |
| 588 x3 = fdct_round_shift(s3); | 701 x3 = fdct_round_shift(s3); |
| 589 x6 = fdct_round_shift(s6); | 702 x6 = fdct_round_shift(s6); |
| 590 x7 = fdct_round_shift(s7); | 703 x7 = fdct_round_shift(s7); |
| 591 | 704 |
| 592 output[0] = x0; | 705 output[0] = (tran_low_t)x0; |
| 593 output[1] = - x4; | 706 output[1] = (tran_low_t)-x4; |
| 594 output[2] = x6; | 707 output[2] = (tran_low_t)x6; |
| 595 output[3] = - x2; | 708 output[3] = (tran_low_t)-x2; |
| 596 output[4] = x3; | 709 output[4] = (tran_low_t)x3; |
| 597 output[5] = - x7; | 710 output[5] = (tran_low_t)-x7; |
| 598 output[6] = x5; | 711 output[6] = (tran_low_t)x5; |
| 599 output[7] = - x1; | 712 output[7] = (tran_low_t)-x1; |
| 600 } | 713 } |
| 601 | 714 |
| 602 static const transform_2d FHT_8[] = { | 715 static const transform_2d FHT_8[] = { |
| 603 { fdct8, fdct8 }, // DCT_DCT = 0 | 716 { fdct8, fdct8 }, // DCT_DCT = 0 |
| 604 { fadst8, fdct8 }, // ADST_DCT = 1 | 717 { fadst8, fdct8 }, // ADST_DCT = 1 |
| 605 { fdct8, fadst8 }, // DCT_ADST = 2 | 718 { fdct8, fadst8 }, // DCT_ADST = 2 |
| 606 { fadst8, fadst8 } // ADST_ADST = 3 | 719 { fadst8, fadst8 } // ADST_ADST = 3 |
| 607 }; | 720 }; |
| 608 | 721 |
| 609 void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, | 722 void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 652 c1 = ip_pass0[2 * stride]; | 765 c1 = ip_pass0[2 * stride]; |
| 653 d1 = ip_pass0[3 * stride]; | 766 d1 = ip_pass0[3 * stride]; |
| 654 | 767 |
| 655 a1 += b1; | 768 a1 += b1; |
| 656 d1 = d1 - c1; | 769 d1 = d1 - c1; |
| 657 e1 = (a1 - d1) >> 1; | 770 e1 = (a1 - d1) >> 1; |
| 658 b1 = e1 - b1; | 771 b1 = e1 - b1; |
| 659 c1 = e1 - c1; | 772 c1 = e1 - c1; |
| 660 a1 -= c1; | 773 a1 -= c1; |
| 661 d1 += b1; | 774 d1 += b1; |
| 662 op[0] = a1; | 775 op[0] = (tran_low_t)a1; |
| 663 op[4] = c1; | 776 op[4] = (tran_low_t)c1; |
| 664 op[8] = d1; | 777 op[8] = (tran_low_t)d1; |
| 665 op[12] = b1; | 778 op[12] = (tran_low_t)b1; |
| 666 | 779 |
| 667 ip_pass0++; | 780 ip_pass0++; |
| 668 op++; | 781 op++; |
| 669 } | 782 } |
| 670 ip = output; | 783 ip = output; |
| 671 op = output; | 784 op = output; |
| 672 | 785 |
| 673 for (i = 0; i < 4; i++) { | 786 for (i = 0; i < 4; i++) { |
| 674 a1 = ip[0]; | 787 a1 = ip[0]; |
| 675 b1 = ip[1]; | 788 b1 = ip[1]; |
| 676 c1 = ip[2]; | 789 c1 = ip[2]; |
| 677 d1 = ip[3]; | 790 d1 = ip[3]; |
| 678 | 791 |
| 679 a1 += b1; | 792 a1 += b1; |
| 680 d1 -= c1; | 793 d1 -= c1; |
| 681 e1 = (a1 - d1) >> 1; | 794 e1 = (a1 - d1) >> 1; |
| 682 b1 = e1 - b1; | 795 b1 = e1 - b1; |
| 683 c1 = e1 - c1; | 796 c1 = e1 - c1; |
| 684 a1 -= c1; | 797 a1 -= c1; |
| 685 d1 += b1; | 798 d1 += b1; |
| 686 op[0] = a1 * UNIT_QUANT_FACTOR; | 799 op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); |
| 687 op[1] = c1 * UNIT_QUANT_FACTOR; | 800 op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); |
| 688 op[2] = d1 * UNIT_QUANT_FACTOR; | 801 op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); |
| 689 op[3] = b1 * UNIT_QUANT_FACTOR; | 802 op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); |
| 690 | 803 |
| 691 ip += 4; | 804 ip += 4; |
| 692 op += 4; | 805 op += 4; |
| 693 } | 806 } |
| 694 } | 807 } |
| 695 | 808 |
| 696 // Rewrote to use same algorithm as others. | 809 // Rewrote to use same algorithm as others. |
| 697 static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { | 810 static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { |
| 698 tran_high_t step1[8]; // canbe16 | 811 tran_high_t step1[8]; // canbe16 |
| 699 tran_high_t step2[8]; // canbe16 | 812 tran_high_t step2[8]; // canbe16 |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 738 | 851 |
| 739 // fdct4(step, step); | 852 // fdct4(step, step); |
| 740 x0 = s0 + s3; | 853 x0 = s0 + s3; |
| 741 x1 = s1 + s2; | 854 x1 = s1 + s2; |
| 742 x2 = s1 - s2; | 855 x2 = s1 - s2; |
| 743 x3 = s0 - s3; | 856 x3 = s0 - s3; |
| 744 t0 = (x0 + x1) * cospi_16_64; | 857 t0 = (x0 + x1) * cospi_16_64; |
| 745 t1 = (x0 - x1) * cospi_16_64; | 858 t1 = (x0 - x1) * cospi_16_64; |
| 746 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 859 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
| 747 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 860 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
| 748 out[0] = fdct_round_shift(t0); | 861 out[0] = (tran_low_t)fdct_round_shift(t0); |
| 749 out[4] = fdct_round_shift(t2); | 862 out[4] = (tran_low_t)fdct_round_shift(t2); |
| 750 out[8] = fdct_round_shift(t1); | 863 out[8] = (tran_low_t)fdct_round_shift(t1); |
| 751 out[12] = fdct_round_shift(t3); | 864 out[12] = (tran_low_t)fdct_round_shift(t3); |
| 752 | 865 |
| 753 // Stage 2 | 866 // Stage 2 |
| 754 t0 = (s6 - s5) * cospi_16_64; | 867 t0 = (s6 - s5) * cospi_16_64; |
| 755 t1 = (s6 + s5) * cospi_16_64; | 868 t1 = (s6 + s5) * cospi_16_64; |
| 756 t2 = fdct_round_shift(t0); | 869 t2 = fdct_round_shift(t0); |
| 757 t3 = fdct_round_shift(t1); | 870 t3 = fdct_round_shift(t1); |
| 758 | 871 |
| 759 // Stage 3 | 872 // Stage 3 |
| 760 x0 = s4 + t2; | 873 x0 = s4 + t2; |
| 761 x1 = s4 - t2; | 874 x1 = s4 - t2; |
| 762 x2 = s7 - t3; | 875 x2 = s7 - t3; |
| 763 x3 = s7 + t3; | 876 x3 = s7 + t3; |
| 764 | 877 |
| 765 // Stage 4 | 878 // Stage 4 |
| 766 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 879 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 767 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 880 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 768 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 881 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 769 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 882 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 770 out[2] = fdct_round_shift(t0); | 883 out[2] = (tran_low_t)fdct_round_shift(t0); |
| 771 out[6] = fdct_round_shift(t2); | 884 out[6] = (tran_low_t)fdct_round_shift(t2); |
| 772 out[10] = fdct_round_shift(t1); | 885 out[10] = (tran_low_t)fdct_round_shift(t1); |
| 773 out[14] = fdct_round_shift(t3); | 886 out[14] = (tran_low_t)fdct_round_shift(t3); |
| 774 } | 887 } |
| 775 | 888 |
| 776 // step 2 | 889 // step 2 |
| 777 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 890 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
| 778 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 891 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
| 779 step2[2] = fdct_round_shift(temp1); | 892 step2[2] = fdct_round_shift(temp1); |
| 780 step2[3] = fdct_round_shift(temp2); | 893 step2[3] = fdct_round_shift(temp2); |
| 781 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 894 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
| 782 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 895 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
| 783 step2[4] = fdct_round_shift(temp1); | 896 step2[4] = fdct_round_shift(temp1); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 809 step1[2] = step3[3] + step2[2]; | 922 step1[2] = step3[3] + step2[2]; |
| 810 step1[3] = step3[3] - step2[2]; | 923 step1[3] = step3[3] - step2[2]; |
| 811 step1[4] = step3[4] - step2[5]; | 924 step1[4] = step3[4] - step2[5]; |
| 812 step1[5] = step3[4] + step2[5]; | 925 step1[5] = step3[4] + step2[5]; |
| 813 step1[6] = step3[7] - step2[6]; | 926 step1[6] = step3[7] - step2[6]; |
| 814 step1[7] = step3[7] + step2[6]; | 927 step1[7] = step3[7] + step2[6]; |
| 815 | 928 |
| 816 // step 6 | 929 // step 6 |
| 817 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 930 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
| 818 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 931 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
| 819 out[1] = fdct_round_shift(temp1); | 932 out[1] = (tran_low_t)fdct_round_shift(temp1); |
| 820 out[9] = fdct_round_shift(temp2); | 933 out[9] = (tran_low_t)fdct_round_shift(temp2); |
| 821 | 934 |
| 822 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 935 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
| 823 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 936 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
| 824 out[5] = fdct_round_shift(temp1); | 937 out[5] = (tran_low_t)fdct_round_shift(temp1); |
| 825 out[13] = fdct_round_shift(temp2); | 938 out[13] = (tran_low_t)fdct_round_shift(temp2); |
| 826 | 939 |
| 827 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 940 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
| 828 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 941 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
| 829 out[3] = fdct_round_shift(temp1); | 942 out[3] = (tran_low_t)fdct_round_shift(temp1); |
| 830 out[11] = fdct_round_shift(temp2); | 943 out[11] = (tran_low_t)fdct_round_shift(temp2); |
| 831 | 944 |
| 832 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 945 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
| 833 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 946 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
| 834 out[7] = fdct_round_shift(temp1); | 947 out[7] = (tran_low_t)fdct_round_shift(temp1); |
| 835 out[15] = fdct_round_shift(temp2); | 948 out[15] = (tran_low_t)fdct_round_shift(temp2); |
| 836 } | 949 } |
| 837 | 950 |
| 838 static void fadst16(const tran_low_t *input, tran_low_t *output) { | 951 static void fadst16(const tran_low_t *input, tran_low_t *output) { |
| 839 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 952 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
| 840 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 953 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
| 841 | 954 |
| 842 tran_high_t x0 = input[15]; | 955 tran_high_t x0 = input[15]; |
| 843 tran_high_t x1 = input[0]; | 956 tran_high_t x1 = input[0]; |
| 844 tran_high_t x2 = input[13]; | 957 tran_high_t x2 = input[13]; |
| 845 tran_high_t x3 = input[2]; | 958 tran_high_t x3 = input[2]; |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 973 | 1086 |
| 974 x2 = fdct_round_shift(s2); | 1087 x2 = fdct_round_shift(s2); |
| 975 x3 = fdct_round_shift(s3); | 1088 x3 = fdct_round_shift(s3); |
| 976 x6 = fdct_round_shift(s6); | 1089 x6 = fdct_round_shift(s6); |
| 977 x7 = fdct_round_shift(s7); | 1090 x7 = fdct_round_shift(s7); |
| 978 x10 = fdct_round_shift(s10); | 1091 x10 = fdct_round_shift(s10); |
| 979 x11 = fdct_round_shift(s11); | 1092 x11 = fdct_round_shift(s11); |
| 980 x14 = fdct_round_shift(s14); | 1093 x14 = fdct_round_shift(s14); |
| 981 x15 = fdct_round_shift(s15); | 1094 x15 = fdct_round_shift(s15); |
| 982 | 1095 |
| 983 output[0] = x0; | 1096 output[0] = (tran_low_t)x0; |
| 984 output[1] = - x8; | 1097 output[1] = (tran_low_t)-x8; |
| 985 output[2] = x12; | 1098 output[2] = (tran_low_t)x12; |
| 986 output[3] = - x4; | 1099 output[3] = (tran_low_t)-x4; |
| 987 output[4] = x6; | 1100 output[4] = (tran_low_t)x6; |
| 988 output[5] = x14; | 1101 output[5] = (tran_low_t)x14; |
| 989 output[6] = x10; | 1102 output[6] = (tran_low_t)x10; |
| 990 output[7] = x2; | 1103 output[7] = (tran_low_t)x2; |
| 991 output[8] = x3; | 1104 output[8] = (tran_low_t)x3; |
| 992 output[9] = x11; | 1105 output[9] = (tran_low_t)x11; |
| 993 output[10] = x15; | 1106 output[10] = (tran_low_t)x15; |
| 994 output[11] = x7; | 1107 output[11] = (tran_low_t)x7; |
| 995 output[12] = x5; | 1108 output[12] = (tran_low_t)x5; |
| 996 output[13] = - x13; | 1109 output[13] = (tran_low_t)-x13; |
| 997 output[14] = x9; | 1110 output[14] = (tran_low_t)x9; |
| 998 output[15] = - x1; | 1111 output[15] = (tran_low_t)-x1; |
| 999 } | 1112 } |
| 1000 | 1113 |
| 1001 static const transform_2d FHT_16[] = { | 1114 static const transform_2d FHT_16[] = { |
| 1002 { fdct16, fdct16 }, // DCT_DCT = 0 | 1115 { fdct16, fdct16 }, // DCT_DCT = 0 |
| 1003 { fadst16, fdct16 }, // ADST_DCT = 1 | 1116 { fadst16, fdct16 }, // ADST_DCT = 1 |
| 1004 { fdct16, fadst16 }, // DCT_ADST = 2 | 1117 { fdct16, fadst16 }, // DCT_ADST = 2 |
| 1005 { fadst16, fadst16 } // ADST_ADST = 3 | 1118 { fadst16, fadst16 } // ADST_ADST = 3 |
| 1006 }; | 1119 }; |
| 1007 | 1120 |
| 1008 void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, | 1121 void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, |
| (...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1397 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 1510 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
| 1398 } | 1511 } |
| 1399 | 1512 |
| 1400 // Rows | 1513 // Rows |
| 1401 for (i = 0; i < 32; ++i) { | 1514 for (i = 0; i < 32; ++i) { |
| 1402 tran_high_t temp_in[32], temp_out[32]; | 1515 tran_high_t temp_in[32], temp_out[32]; |
| 1403 for (j = 0; j < 32; ++j) | 1516 for (j = 0; j < 32; ++j) |
| 1404 temp_in[j] = output[j + i * 32]; | 1517 temp_in[j] = output[j + i * 32]; |
| 1405 fdct32(temp_in, temp_out, 0); | 1518 fdct32(temp_in, temp_out, 0); |
| 1406 for (j = 0; j < 32; ++j) | 1519 for (j = 0; j < 32; ++j) |
| 1407 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; | 1520 out[j + i * 32] = |
| 1521 (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); |
| 1408 } | 1522 } |
| 1409 } | 1523 } |
| 1410 | 1524 |
| 1411 // Note that although we use dct_32_round in dct32 computation flow, | 1525 // Note that although we use dct_32_round in dct32 computation flow, |
| 1412 // this 2d fdct32x32 for rate-distortion optimization loop is operating | 1526 // this 2d fdct32x32 for rate-distortion optimization loop is operating |
| 1413 // within 16 bits precision. | 1527 // within 16 bits precision. |
| 1414 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { | 1528 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { |
| 1415 int i, j; | 1529 int i, j; |
| 1416 tran_high_t output[32 * 32]; | 1530 tran_high_t output[32 * 32]; |
| 1417 | 1531 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1428 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 1542 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
| 1429 } | 1543 } |
| 1430 | 1544 |
| 1431 // Rows | 1545 // Rows |
| 1432 for (i = 0; i < 32; ++i) { | 1546 for (i = 0; i < 32; ++i) { |
| 1433 tran_high_t temp_in[32], temp_out[32]; | 1547 tran_high_t temp_in[32], temp_out[32]; |
| 1434 for (j = 0; j < 32; ++j) | 1548 for (j = 0; j < 32; ++j) |
| 1435 temp_in[j] = output[j + i * 32]; | 1549 temp_in[j] = output[j + i * 32]; |
| 1436 fdct32(temp_in, temp_out, 1); | 1550 fdct32(temp_in, temp_out, 1); |
| 1437 for (j = 0; j < 32; ++j) | 1551 for (j = 0; j < 32; ++j) |
| 1438 out[j + i * 32] = temp_out[j]; | 1552 out[j + i * 32] = (tran_low_t)temp_out[j]; |
| 1439 } | 1553 } |
| 1440 } | 1554 } |
| 1441 | 1555 |
| 1442 #if CONFIG_VP9_HIGHBITDEPTH | 1556 #if CONFIG_VP9_HIGHBITDEPTH |
| 1443 void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, | 1557 void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, |
| 1444 int stride) { | 1558 int stride) { |
| 1445 vp9_fdct4x4_c(input, output, stride); | 1559 vp9_fdct4x4_c(input, output, stride); |
| 1446 } | 1560 } |
| 1447 | 1561 |
| 1448 void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, | 1562 void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1492 | 1606 |
| 1493 void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { | 1607 void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { |
| 1494 vp9_fdct32x32_c(input, out, stride); | 1608 vp9_fdct32x32_c(input, out, stride); |
| 1495 } | 1609 } |
| 1496 | 1610 |
| 1497 void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, | 1611 void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, |
| 1498 int stride) { | 1612 int stride) { |
| 1499 vp9_fdct32x32_rd_c(input, out, stride); | 1613 vp9_fdct32x32_rd_c(input, out, stride); |
| 1500 } | 1614 } |
| 1501 #endif // CONFIG_VP9_HIGHBITDEPTH | 1615 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |