OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 19 matching lines...) Expand all Loading... |
30 tran_high_t step[4]; | 30 tran_high_t step[4]; |
31 tran_high_t temp1, temp2; | 31 tran_high_t temp1, temp2; |
32 | 32 |
33 step[0] = input[0] + input[3]; | 33 step[0] = input[0] + input[3]; |
34 step[1] = input[1] + input[2]; | 34 step[1] = input[1] + input[2]; |
35 step[2] = input[1] - input[2]; | 35 step[2] = input[1] - input[2]; |
36 step[3] = input[0] - input[3]; | 36 step[3] = input[0] - input[3]; |
37 | 37 |
38 temp1 = (step[0] + step[1]) * cospi_16_64; | 38 temp1 = (step[0] + step[1]) * cospi_16_64; |
39 temp2 = (step[0] - step[1]) * cospi_16_64; | 39 temp2 = (step[0] - step[1]) * cospi_16_64; |
40 output[0] = fdct_round_shift(temp1); | 40 output[0] = (tran_low_t)fdct_round_shift(temp1); |
41 output[2] = fdct_round_shift(temp2); | 41 output[2] = (tran_low_t)fdct_round_shift(temp2); |
42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
44 output[1] = fdct_round_shift(temp1); | 44 output[1] = (tran_low_t)fdct_round_shift(temp1); |
45 output[3] = fdct_round_shift(temp2); | 45 output[3] = (tran_low_t)fdct_round_shift(temp2); |
46 } | 46 } |
47 | 47 |
48 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { | 48 void vp9_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { |
49 int r, c; | 49 int r, c; |
50 tran_low_t sum = 0; | 50 tran_low_t sum = 0; |
51 for (r = 0; r < 4; ++r) | 51 for (r = 0; r < 4; ++r) |
52 for (c = 0; c < 4; ++c) | 52 for (c = 0; c < 4; ++c) |
53 sum += input[r * stride + c]; | 53 sum += input[r * stride + c]; |
54 | 54 |
55 output[0] = sum << 1; | 55 output[0] = sum << 1; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
91 input[2] = in[2 * 4]; | 91 input[2] = in[2 * 4]; |
92 input[3] = in[3 * 4]; | 92 input[3] = in[3 * 4]; |
93 } | 93 } |
94 // Transform. | 94 // Transform. |
95 step[0] = input[0] + input[3]; | 95 step[0] = input[0] + input[3]; |
96 step[1] = input[1] + input[2]; | 96 step[1] = input[1] + input[2]; |
97 step[2] = input[1] - input[2]; | 97 step[2] = input[1] - input[2]; |
98 step[3] = input[0] - input[3]; | 98 step[3] = input[0] - input[3]; |
99 temp1 = (step[0] + step[1]) * cospi_16_64; | 99 temp1 = (step[0] + step[1]) * cospi_16_64; |
100 temp2 = (step[0] - step[1]) * cospi_16_64; | 100 temp2 = (step[0] - step[1]) * cospi_16_64; |
101 out[0] = fdct_round_shift(temp1); | 101 out[0] = (tran_low_t)fdct_round_shift(temp1); |
102 out[2] = fdct_round_shift(temp2); | 102 out[2] = (tran_low_t)fdct_round_shift(temp2); |
103 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 103 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
104 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 104 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
105 out[1] = fdct_round_shift(temp1); | 105 out[1] = (tran_low_t)fdct_round_shift(temp1); |
106 out[3] = fdct_round_shift(temp2); | 106 out[3] = (tran_low_t)fdct_round_shift(temp2); |
107 // Do next column (which is a transposed row in second/horizontal pass) | 107 // Do next column (which is a transposed row in second/horizontal pass) |
108 in_pass0++; | 108 in_pass0++; |
109 in++; | 109 in++; |
110 out += 4; | 110 out += 4; |
111 } | 111 } |
112 // Setup in/out for next pass. | 112 // Setup in/out for next pass. |
113 in = intermediate; | 113 in = intermediate; |
114 out = output; | 114 out = output; |
115 } | 115 } |
116 | 116 |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
150 x1 = sinpi_3_9 * s7; | 150 x1 = sinpi_3_9 * s7; |
151 x2 = s1 - s3 + s6; | 151 x2 = s1 - s3 + s6; |
152 x3 = s4; | 152 x3 = s4; |
153 | 153 |
154 s0 = x0 + x3; | 154 s0 = x0 + x3; |
155 s1 = x1; | 155 s1 = x1; |
156 s2 = x2 - x3; | 156 s2 = x2 - x3; |
157 s3 = x2 - x0 + x3; | 157 s3 = x2 - x0 + x3; |
158 | 158 |
159 // 1-D transform scaling factor is sqrt(2). | 159 // 1-D transform scaling factor is sqrt(2). |
160 output[0] = fdct_round_shift(s0); | 160 output[0] = (tran_low_t)fdct_round_shift(s0); |
161 output[1] = fdct_round_shift(s1); | 161 output[1] = (tran_low_t)fdct_round_shift(s1); |
162 output[2] = fdct_round_shift(s2); | 162 output[2] = (tran_low_t)fdct_round_shift(s2); |
163 output[3] = fdct_round_shift(s3); | 163 output[3] = (tran_low_t)fdct_round_shift(s3); |
164 } | 164 } |
165 | 165 |
166 static const transform_2d FHT_4[] = { | 166 static const transform_2d FHT_4[] = { |
167 { fdct4, fdct4 }, // DCT_DCT = 0 | 167 { fdct4, fdct4 }, // DCT_DCT = 0 |
168 { fadst4, fdct4 }, // ADST_DCT = 1 | 168 { fadst4, fdct4 }, // ADST_DCT = 1 |
169 { fdct4, fadst4 }, // DCT_ADST = 2 | 169 { fdct4, fadst4 }, // DCT_ADST = 2 |
170 { fadst4, fadst4 } // ADST_ADST = 3 | 170 { fadst4, fadst4 } // ADST_ADST = 3 |
171 }; | 171 }; |
172 | 172 |
173 void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, | 173 void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
220 | 220 |
221 // fdct4(step, step); | 221 // fdct4(step, step); |
222 x0 = s0 + s3; | 222 x0 = s0 + s3; |
223 x1 = s1 + s2; | 223 x1 = s1 + s2; |
224 x2 = s1 - s2; | 224 x2 = s1 - s2; |
225 x3 = s0 - s3; | 225 x3 = s0 - s3; |
226 t0 = (x0 + x1) * cospi_16_64; | 226 t0 = (x0 + x1) * cospi_16_64; |
227 t1 = (x0 - x1) * cospi_16_64; | 227 t1 = (x0 - x1) * cospi_16_64; |
228 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 228 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
229 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 229 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
230 output[0] = fdct_round_shift(t0); | 230 output[0] = (tran_low_t)fdct_round_shift(t0); |
231 output[2] = fdct_round_shift(t2); | 231 output[2] = (tran_low_t)fdct_round_shift(t2); |
232 output[4] = fdct_round_shift(t1); | 232 output[4] = (tran_low_t)fdct_round_shift(t1); |
233 output[6] = fdct_round_shift(t3); | 233 output[6] = (tran_low_t)fdct_round_shift(t3); |
234 | 234 |
235 // Stage 2 | 235 // Stage 2 |
236 t0 = (s6 - s5) * cospi_16_64; | 236 t0 = (s6 - s5) * cospi_16_64; |
237 t1 = (s6 + s5) * cospi_16_64; | 237 t1 = (s6 + s5) * cospi_16_64; |
238 t2 = fdct_round_shift(t0); | 238 t2 = (tran_low_t)fdct_round_shift(t0); |
239 t3 = fdct_round_shift(t1); | 239 t3 = (tran_low_t)fdct_round_shift(t1); |
240 | 240 |
241 // Stage 3 | 241 // Stage 3 |
242 x0 = s4 + t2; | 242 x0 = s4 + t2; |
243 x1 = s4 - t2; | 243 x1 = s4 - t2; |
244 x2 = s7 - t3; | 244 x2 = s7 - t3; |
245 x3 = s7 + t3; | 245 x3 = s7 + t3; |
246 | 246 |
247 // Stage 4 | 247 // Stage 4 |
248 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 248 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
249 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 249 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
250 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 250 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
251 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 251 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
252 output[1] = fdct_round_shift(t0); | 252 output[1] = (tran_low_t)fdct_round_shift(t0); |
253 output[3] = fdct_round_shift(t2); | 253 output[3] = (tran_low_t)fdct_round_shift(t2); |
254 output[5] = fdct_round_shift(t1); | 254 output[5] = (tran_low_t)fdct_round_shift(t1); |
255 output[7] = fdct_round_shift(t3); | 255 output[7] = (tran_low_t)fdct_round_shift(t3); |
256 } | 256 } |
257 | 257 |
258 void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { | 258 void vp9_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { |
259 int r, c; | 259 int r, c; |
260 tran_low_t sum = 0; | 260 tran_low_t sum = 0; |
261 for (r = 0; r < 8; ++r) | 261 for (r = 0; r < 8; ++r) |
262 for (c = 0; c < 8; ++c) | 262 for (c = 0; c < 8; ++c) |
263 sum += input[r * stride + c]; | 263 sum += input[r * stride + c]; |
264 | 264 |
265 output[0] = sum; | 265 output[0] = sum; |
(...skipping 25 matching lines...) Expand all Loading... |
291 | 291 |
292 // fdct4(step, step); | 292 // fdct4(step, step); |
293 x0 = s0 + s3; | 293 x0 = s0 + s3; |
294 x1 = s1 + s2; | 294 x1 = s1 + s2; |
295 x2 = s1 - s2; | 295 x2 = s1 - s2; |
296 x3 = s0 - s3; | 296 x3 = s0 - s3; |
297 t0 = (x0 + x1) * cospi_16_64; | 297 t0 = (x0 + x1) * cospi_16_64; |
298 t1 = (x0 - x1) * cospi_16_64; | 298 t1 = (x0 - x1) * cospi_16_64; |
299 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 299 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
300 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 300 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
301 output[0 * 8] = fdct_round_shift(t0); | 301 output[0 * 8] = (tran_low_t)fdct_round_shift(t0); |
302 output[2 * 8] = fdct_round_shift(t2); | 302 output[2 * 8] = (tran_low_t)fdct_round_shift(t2); |
303 output[4 * 8] = fdct_round_shift(t1); | 303 output[4 * 8] = (tran_low_t)fdct_round_shift(t1); |
304 output[6 * 8] = fdct_round_shift(t3); | 304 output[6 * 8] = (tran_low_t)fdct_round_shift(t3); |
305 | 305 |
306 // Stage 2 | 306 // Stage 2 |
307 t0 = (s6 - s5) * cospi_16_64; | 307 t0 = (s6 - s5) * cospi_16_64; |
308 t1 = (s6 + s5) * cospi_16_64; | 308 t1 = (s6 + s5) * cospi_16_64; |
309 t2 = fdct_round_shift(t0); | 309 t2 = fdct_round_shift(t0); |
310 t3 = fdct_round_shift(t1); | 310 t3 = fdct_round_shift(t1); |
311 | 311 |
312 // Stage 3 | 312 // Stage 3 |
313 x0 = s4 + t2; | 313 x0 = s4 + t2; |
314 x1 = s4 - t2; | 314 x1 = s4 - t2; |
315 x2 = s7 - t3; | 315 x2 = s7 - t3; |
316 x3 = s7 + t3; | 316 x3 = s7 + t3; |
317 | 317 |
318 // Stage 4 | 318 // Stage 4 |
319 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 319 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
320 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 320 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
321 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 321 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
322 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 322 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
323 output[1 * 8] = fdct_round_shift(t0); | 323 output[1 * 8] = (tran_low_t)fdct_round_shift(t0); |
324 output[3 * 8] = fdct_round_shift(t2); | 324 output[3 * 8] = (tran_low_t)fdct_round_shift(t2); |
325 output[5 * 8] = fdct_round_shift(t1); | 325 output[5 * 8] = (tran_low_t)fdct_round_shift(t1); |
326 output[7 * 8] = fdct_round_shift(t3); | 326 output[7 * 8] = (tran_low_t)fdct_round_shift(t3); |
327 input++; | 327 input++; |
328 output++; | 328 output++; |
329 } | 329 } |
330 } | 330 } |
331 | 331 |
332 // Rows | 332 // Rows |
333 for (i = 0; i < 8; ++i) { | 333 for (i = 0; i < 8; ++i) { |
334 fdct8(&intermediate[i * 8], &final_output[i * 8]); | 334 fdct8(&intermediate[i * 8], &final_output[i * 8]); |
335 for (j = 0; j < 8; ++j) | 335 for (j = 0; j < 8; ++j) |
336 final_output[j + i * 8] /= 2; | 336 final_output[j + i * 8] /= 2; |
337 } | 337 } |
338 } | 338 } |
339 | 339 |
| 340 void vp9_fdct8x8_quant_c(const int16_t *input, int stride, |
| 341 tran_low_t *coeff_ptr, intptr_t n_coeffs, |
| 342 int skip_block, |
| 343 const int16_t *zbin_ptr, const int16_t *round_ptr, |
| 344 const int16_t *quant_ptr, |
| 345 const int16_t *quant_shift_ptr, |
| 346 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, |
| 347 const int16_t *dequant_ptr, |
| 348 int zbin_oq_value, uint16_t *eob_ptr, |
| 349 const int16_t *scan, const int16_t *iscan) { |
| 350 int eob = -1; |
| 351 |
| 352 int i, j; |
| 353 tran_low_t intermediate[64]; |
| 354 |
| 355 // Transform columns |
| 356 { |
| 357 tran_low_t *output = intermediate; |
| 358 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 |
| 359 tran_high_t t0, t1, t2, t3; // needs32 |
| 360 tran_high_t x0, x1, x2, x3; // canbe16 |
| 361 |
| 362 int i; |
| 363 for (i = 0; i < 8; i++) { |
| 364 // stage 1 |
| 365 s0 = (input[0 * stride] + input[7 * stride]) * 4; |
| 366 s1 = (input[1 * stride] + input[6 * stride]) * 4; |
| 367 s2 = (input[2 * stride] + input[5 * stride]) * 4; |
| 368 s3 = (input[3 * stride] + input[4 * stride]) * 4; |
| 369 s4 = (input[3 * stride] - input[4 * stride]) * 4; |
| 370 s5 = (input[2 * stride] - input[5 * stride]) * 4; |
| 371 s6 = (input[1 * stride] - input[6 * stride]) * 4; |
| 372 s7 = (input[0 * stride] - input[7 * stride]) * 4; |
| 373 |
| 374 // fdct4(step, step); |
| 375 x0 = s0 + s3; |
| 376 x1 = s1 + s2; |
| 377 x2 = s1 - s2; |
| 378 x3 = s0 - s3; |
| 379 t0 = (x0 + x1) * cospi_16_64; |
| 380 t1 = (x0 - x1) * cospi_16_64; |
| 381 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
| 382 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
| 383 output[0 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 384 output[2 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 385 output[4 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 386 output[6 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 387 |
| 388 // Stage 2 |
| 389 t0 = (s6 - s5) * cospi_16_64; |
| 390 t1 = (s6 + s5) * cospi_16_64; |
| 391 t2 = fdct_round_shift(t0); |
| 392 t3 = fdct_round_shift(t1); |
| 393 |
| 394 // Stage 3 |
| 395 x0 = s4 + t2; |
| 396 x1 = s4 - t2; |
| 397 x2 = s7 - t3; |
| 398 x3 = s7 + t3; |
| 399 |
| 400 // Stage 4 |
| 401 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
| 402 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
| 403 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
| 404 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
| 405 output[1 * 8] = (tran_low_t)fdct_round_shift(t0); |
| 406 output[3 * 8] = (tran_low_t)fdct_round_shift(t2); |
| 407 output[5 * 8] = (tran_low_t)fdct_round_shift(t1); |
| 408 output[7 * 8] = (tran_low_t)fdct_round_shift(t3); |
| 409 input++; |
| 410 output++; |
| 411 } |
| 412 } |
| 413 |
| 414 // Rows |
| 415 for (i = 0; i < 8; ++i) { |
| 416 fdct8(&intermediate[i * 8], &coeff_ptr[i * 8]); |
| 417 for (j = 0; j < 8; ++j) |
| 418 coeff_ptr[j + i * 8] /= 2; |
| 419 } |
| 420 |
| 421 // TODO(jingning) Decide the need of these arguments after the |
| 422 // quantization process is completed. |
| 423 (void)zbin_ptr; |
| 424 (void)quant_shift_ptr; |
| 425 (void)zbin_oq_value; |
| 426 (void)iscan; |
| 427 |
| 428 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
| 429 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
| 430 |
| 431 if (!skip_block) { |
| 432 // Quantization pass: All coefficients with index >= zero_flag are |
| 433 // skippable. Note: zero_flag can be zero. |
| 434 for (i = 0; i < n_coeffs; i++) { |
| 435 const int rc = scan[i]; |
| 436 const int coeff = coeff_ptr[rc]; |
| 437 const int coeff_sign = (coeff >> 31); |
| 438 const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; |
| 439 |
| 440 int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); |
| 441 tmp = (tmp * quant_ptr[rc != 0]) >> 16; |
| 442 |
| 443 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; |
| 444 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; |
| 445 |
| 446 if (tmp) |
| 447 eob = i; |
| 448 } |
| 449 } |
| 450 *eob_ptr = eob + 1; |
| 451 } |
| 452 |
340 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { | 453 void vp9_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { |
341 int r, c; | 454 int r, c; |
342 tran_low_t sum = 0; | 455 tran_low_t sum = 0; |
343 for (r = 0; r < 16; ++r) | 456 for (r = 0; r < 16; ++r) |
344 for (c = 0; c < 16; ++c) | 457 for (c = 0; c < 16; ++c) |
345 sum += input[r * stride + c]; | 458 sum += input[r * stride + c]; |
346 | 459 |
347 output[0] = sum >> 1; | 460 output[0] = sum >> 1; |
348 output[1] = 0; | 461 output[1] = 0; |
349 } | 462 } |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
427 | 540 |
428 // fdct4(step, step); | 541 // fdct4(step, step); |
429 x0 = s0 + s3; | 542 x0 = s0 + s3; |
430 x1 = s1 + s2; | 543 x1 = s1 + s2; |
431 x2 = s1 - s2; | 544 x2 = s1 - s2; |
432 x3 = s0 - s3; | 545 x3 = s0 - s3; |
433 t0 = (x0 + x1) * cospi_16_64; | 546 t0 = (x0 + x1) * cospi_16_64; |
434 t1 = (x0 - x1) * cospi_16_64; | 547 t1 = (x0 - x1) * cospi_16_64; |
435 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 548 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
436 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 549 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
437 out[0] = fdct_round_shift(t0); | 550 out[0] = (tran_low_t)fdct_round_shift(t0); |
438 out[4] = fdct_round_shift(t2); | 551 out[4] = (tran_low_t)fdct_round_shift(t2); |
439 out[8] = fdct_round_shift(t1); | 552 out[8] = (tran_low_t)fdct_round_shift(t1); |
440 out[12] = fdct_round_shift(t3); | 553 out[12] = (tran_low_t)fdct_round_shift(t3); |
441 | 554 |
442 // Stage 2 | 555 // Stage 2 |
443 t0 = (s6 - s5) * cospi_16_64; | 556 t0 = (s6 - s5) * cospi_16_64; |
444 t1 = (s6 + s5) * cospi_16_64; | 557 t1 = (s6 + s5) * cospi_16_64; |
445 t2 = fdct_round_shift(t0); | 558 t2 = fdct_round_shift(t0); |
446 t3 = fdct_round_shift(t1); | 559 t3 = fdct_round_shift(t1); |
447 | 560 |
448 // Stage 3 | 561 // Stage 3 |
449 x0 = s4 + t2; | 562 x0 = s4 + t2; |
450 x1 = s4 - t2; | 563 x1 = s4 - t2; |
451 x2 = s7 - t3; | 564 x2 = s7 - t3; |
452 x3 = s7 + t3; | 565 x3 = s7 + t3; |
453 | 566 |
454 // Stage 4 | 567 // Stage 4 |
455 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 568 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
456 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 569 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
457 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 570 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
458 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 571 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
459 out[2] = fdct_round_shift(t0); | 572 out[2] = (tran_low_t)fdct_round_shift(t0); |
460 out[6] = fdct_round_shift(t2); | 573 out[6] = (tran_low_t)fdct_round_shift(t2); |
461 out[10] = fdct_round_shift(t1); | 574 out[10] = (tran_low_t)fdct_round_shift(t1); |
462 out[14] = fdct_round_shift(t3); | 575 out[14] = (tran_low_t)fdct_round_shift(t3); |
463 } | 576 } |
464 // Work on the next eight values; step1 -> odd_results | 577 // Work on the next eight values; step1 -> odd_results |
465 { | 578 { |
466 // step 2 | 579 // step 2 |
467 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 580 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
468 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 581 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
469 step2[2] = fdct_round_shift(temp1); | 582 step2[2] = fdct_round_shift(temp1); |
470 step2[3] = fdct_round_shift(temp2); | 583 step2[3] = fdct_round_shift(temp2); |
471 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 584 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
472 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 585 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
(...skipping 22 matching lines...) Expand all Loading... |
495 step1[1] = step3[0] - step2[1]; | 608 step1[1] = step3[0] - step2[1]; |
496 step1[2] = step3[3] + step2[2]; | 609 step1[2] = step3[3] + step2[2]; |
497 step1[3] = step3[3] - step2[2]; | 610 step1[3] = step3[3] - step2[2]; |
498 step1[4] = step3[4] - step2[5]; | 611 step1[4] = step3[4] - step2[5]; |
499 step1[5] = step3[4] + step2[5]; | 612 step1[5] = step3[4] + step2[5]; |
500 step1[6] = step3[7] - step2[6]; | 613 step1[6] = step3[7] - step2[6]; |
501 step1[7] = step3[7] + step2[6]; | 614 step1[7] = step3[7] + step2[6]; |
502 // step 6 | 615 // step 6 |
503 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 616 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
504 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 617 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
505 out[1] = fdct_round_shift(temp1); | 618 out[1] = (tran_low_t)fdct_round_shift(temp1); |
506 out[9] = fdct_round_shift(temp2); | 619 out[9] = (tran_low_t)fdct_round_shift(temp2); |
507 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 620 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
508 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 621 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
509 out[5] = fdct_round_shift(temp1); | 622 out[5] = (tran_low_t)fdct_round_shift(temp1); |
510 out[13] = fdct_round_shift(temp2); | 623 out[13] = (tran_low_t)fdct_round_shift(temp2); |
511 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 624 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
512 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 625 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
513 out[3] = fdct_round_shift(temp1); | 626 out[3] = (tran_low_t)fdct_round_shift(temp1); |
514 out[11] = fdct_round_shift(temp2); | 627 out[11] = (tran_low_t)fdct_round_shift(temp2); |
515 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 628 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
516 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 629 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
517 out[7] = fdct_round_shift(temp1); | 630 out[7] = (tran_low_t)fdct_round_shift(temp1); |
518 out[15] = fdct_round_shift(temp2); | 631 out[15] = (tran_low_t)fdct_round_shift(temp2); |
519 } | 632 } |
520 // Do next column (which is a transposed row in second/horizontal pass) | 633 // Do next column (which is a transposed row in second/horizontal pass) |
521 in++; | 634 in++; |
522 in_pass0++; | 635 in_pass0++; |
523 out += 16; | 636 out += 16; |
524 } | 637 } |
525 // Setup in/out for next pass. | 638 // Setup in/out for next pass. |
526 in = intermediate; | 639 in = intermediate; |
527 out = output; | 640 out = output; |
528 } | 641 } |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
582 s2 = cospi_16_64 * (x2 + x3); | 695 s2 = cospi_16_64 * (x2 + x3); |
583 s3 = cospi_16_64 * (x2 - x3); | 696 s3 = cospi_16_64 * (x2 - x3); |
584 s6 = cospi_16_64 * (x6 + x7); | 697 s6 = cospi_16_64 * (x6 + x7); |
585 s7 = cospi_16_64 * (x6 - x7); | 698 s7 = cospi_16_64 * (x6 - x7); |
586 | 699 |
587 x2 = fdct_round_shift(s2); | 700 x2 = fdct_round_shift(s2); |
588 x3 = fdct_round_shift(s3); | 701 x3 = fdct_round_shift(s3); |
589 x6 = fdct_round_shift(s6); | 702 x6 = fdct_round_shift(s6); |
590 x7 = fdct_round_shift(s7); | 703 x7 = fdct_round_shift(s7); |
591 | 704 |
592 output[0] = x0; | 705 output[0] = (tran_low_t)x0; |
593 output[1] = - x4; | 706 output[1] = (tran_low_t)-x4; |
594 output[2] = x6; | 707 output[2] = (tran_low_t)x6; |
595 output[3] = - x2; | 708 output[3] = (tran_low_t)-x2; |
596 output[4] = x3; | 709 output[4] = (tran_low_t)x3; |
597 output[5] = - x7; | 710 output[5] = (tran_low_t)-x7; |
598 output[6] = x5; | 711 output[6] = (tran_low_t)x5; |
599 output[7] = - x1; | 712 output[7] = (tran_low_t)-x1; |
600 } | 713 } |
601 | 714 |
602 static const transform_2d FHT_8[] = { | 715 static const transform_2d FHT_8[] = { |
603 { fdct8, fdct8 }, // DCT_DCT = 0 | 716 { fdct8, fdct8 }, // DCT_DCT = 0 |
604 { fadst8, fdct8 }, // ADST_DCT = 1 | 717 { fadst8, fdct8 }, // ADST_DCT = 1 |
605 { fdct8, fadst8 }, // DCT_ADST = 2 | 718 { fdct8, fadst8 }, // DCT_ADST = 2 |
606 { fadst8, fadst8 } // ADST_ADST = 3 | 719 { fadst8, fadst8 } // ADST_ADST = 3 |
607 }; | 720 }; |
608 | 721 |
609 void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, | 722 void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
652 c1 = ip_pass0[2 * stride]; | 765 c1 = ip_pass0[2 * stride]; |
653 d1 = ip_pass0[3 * stride]; | 766 d1 = ip_pass0[3 * stride]; |
654 | 767 |
655 a1 += b1; | 768 a1 += b1; |
656 d1 = d1 - c1; | 769 d1 = d1 - c1; |
657 e1 = (a1 - d1) >> 1; | 770 e1 = (a1 - d1) >> 1; |
658 b1 = e1 - b1; | 771 b1 = e1 - b1; |
659 c1 = e1 - c1; | 772 c1 = e1 - c1; |
660 a1 -= c1; | 773 a1 -= c1; |
661 d1 += b1; | 774 d1 += b1; |
662 op[0] = a1; | 775 op[0] = (tran_low_t)a1; |
663 op[4] = c1; | 776 op[4] = (tran_low_t)c1; |
664 op[8] = d1; | 777 op[8] = (tran_low_t)d1; |
665 op[12] = b1; | 778 op[12] = (tran_low_t)b1; |
666 | 779 |
667 ip_pass0++; | 780 ip_pass0++; |
668 op++; | 781 op++; |
669 } | 782 } |
670 ip = output; | 783 ip = output; |
671 op = output; | 784 op = output; |
672 | 785 |
673 for (i = 0; i < 4; i++) { | 786 for (i = 0; i < 4; i++) { |
674 a1 = ip[0]; | 787 a1 = ip[0]; |
675 b1 = ip[1]; | 788 b1 = ip[1]; |
676 c1 = ip[2]; | 789 c1 = ip[2]; |
677 d1 = ip[3]; | 790 d1 = ip[3]; |
678 | 791 |
679 a1 += b1; | 792 a1 += b1; |
680 d1 -= c1; | 793 d1 -= c1; |
681 e1 = (a1 - d1) >> 1; | 794 e1 = (a1 - d1) >> 1; |
682 b1 = e1 - b1; | 795 b1 = e1 - b1; |
683 c1 = e1 - c1; | 796 c1 = e1 - c1; |
684 a1 -= c1; | 797 a1 -= c1; |
685 d1 += b1; | 798 d1 += b1; |
686 op[0] = a1 * UNIT_QUANT_FACTOR; | 799 op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); |
687 op[1] = c1 * UNIT_QUANT_FACTOR; | 800 op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); |
688 op[2] = d1 * UNIT_QUANT_FACTOR; | 801 op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); |
689 op[3] = b1 * UNIT_QUANT_FACTOR; | 802 op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); |
690 | 803 |
691 ip += 4; | 804 ip += 4; |
692 op += 4; | 805 op += 4; |
693 } | 806 } |
694 } | 807 } |
695 | 808 |
696 // Rewrote to use same algorithm as others. | 809 // Rewrote to use same algorithm as others. |
697 static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { | 810 static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { |
698 tran_high_t step1[8]; // canbe16 | 811 tran_high_t step1[8]; // canbe16 |
699 tran_high_t step2[8]; // canbe16 | 812 tran_high_t step2[8]; // canbe16 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
738 | 851 |
739 // fdct4(step, step); | 852 // fdct4(step, step); |
740 x0 = s0 + s3; | 853 x0 = s0 + s3; |
741 x1 = s1 + s2; | 854 x1 = s1 + s2; |
742 x2 = s1 - s2; | 855 x2 = s1 - s2; |
743 x3 = s0 - s3; | 856 x3 = s0 - s3; |
744 t0 = (x0 + x1) * cospi_16_64; | 857 t0 = (x0 + x1) * cospi_16_64; |
745 t1 = (x0 - x1) * cospi_16_64; | 858 t1 = (x0 - x1) * cospi_16_64; |
746 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 859 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
747 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 860 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
748 out[0] = fdct_round_shift(t0); | 861 out[0] = (tran_low_t)fdct_round_shift(t0); |
749 out[4] = fdct_round_shift(t2); | 862 out[4] = (tran_low_t)fdct_round_shift(t2); |
750 out[8] = fdct_round_shift(t1); | 863 out[8] = (tran_low_t)fdct_round_shift(t1); |
751 out[12] = fdct_round_shift(t3); | 864 out[12] = (tran_low_t)fdct_round_shift(t3); |
752 | 865 |
753 // Stage 2 | 866 // Stage 2 |
754 t0 = (s6 - s5) * cospi_16_64; | 867 t0 = (s6 - s5) * cospi_16_64; |
755 t1 = (s6 + s5) * cospi_16_64; | 868 t1 = (s6 + s5) * cospi_16_64; |
756 t2 = fdct_round_shift(t0); | 869 t2 = fdct_round_shift(t0); |
757 t3 = fdct_round_shift(t1); | 870 t3 = fdct_round_shift(t1); |
758 | 871 |
759 // Stage 3 | 872 // Stage 3 |
760 x0 = s4 + t2; | 873 x0 = s4 + t2; |
761 x1 = s4 - t2; | 874 x1 = s4 - t2; |
762 x2 = s7 - t3; | 875 x2 = s7 - t3; |
763 x3 = s7 + t3; | 876 x3 = s7 + t3; |
764 | 877 |
765 // Stage 4 | 878 // Stage 4 |
766 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 879 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
767 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 880 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
768 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 881 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
769 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 882 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
770 out[2] = fdct_round_shift(t0); | 883 out[2] = (tran_low_t)fdct_round_shift(t0); |
771 out[6] = fdct_round_shift(t2); | 884 out[6] = (tran_low_t)fdct_round_shift(t2); |
772 out[10] = fdct_round_shift(t1); | 885 out[10] = (tran_low_t)fdct_round_shift(t1); |
773 out[14] = fdct_round_shift(t3); | 886 out[14] = (tran_low_t)fdct_round_shift(t3); |
774 } | 887 } |
775 | 888 |
776 // step 2 | 889 // step 2 |
777 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 890 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
778 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 891 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
779 step2[2] = fdct_round_shift(temp1); | 892 step2[2] = fdct_round_shift(temp1); |
780 step2[3] = fdct_round_shift(temp2); | 893 step2[3] = fdct_round_shift(temp2); |
781 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 894 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
782 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 895 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
783 step2[4] = fdct_round_shift(temp1); | 896 step2[4] = fdct_round_shift(temp1); |
(...skipping 25 matching lines...) Expand all Loading... |
809 step1[2] = step3[3] + step2[2]; | 922 step1[2] = step3[3] + step2[2]; |
810 step1[3] = step3[3] - step2[2]; | 923 step1[3] = step3[3] - step2[2]; |
811 step1[4] = step3[4] - step2[5]; | 924 step1[4] = step3[4] - step2[5]; |
812 step1[5] = step3[4] + step2[5]; | 925 step1[5] = step3[4] + step2[5]; |
813 step1[6] = step3[7] - step2[6]; | 926 step1[6] = step3[7] - step2[6]; |
814 step1[7] = step3[7] + step2[6]; | 927 step1[7] = step3[7] + step2[6]; |
815 | 928 |
816 // step 6 | 929 // step 6 |
817 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 930 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
818 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 931 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
819 out[1] = fdct_round_shift(temp1); | 932 out[1] = (tran_low_t)fdct_round_shift(temp1); |
820 out[9] = fdct_round_shift(temp2); | 933 out[9] = (tran_low_t)fdct_round_shift(temp2); |
821 | 934 |
822 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 935 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
823 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 936 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
824 out[5] = fdct_round_shift(temp1); | 937 out[5] = (tran_low_t)fdct_round_shift(temp1); |
825 out[13] = fdct_round_shift(temp2); | 938 out[13] = (tran_low_t)fdct_round_shift(temp2); |
826 | 939 |
827 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 940 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
828 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 941 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
829 out[3] = fdct_round_shift(temp1); | 942 out[3] = (tran_low_t)fdct_round_shift(temp1); |
830 out[11] = fdct_round_shift(temp2); | 943 out[11] = (tran_low_t)fdct_round_shift(temp2); |
831 | 944 |
832 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 945 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
833 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 946 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
834 out[7] = fdct_round_shift(temp1); | 947 out[7] = (tran_low_t)fdct_round_shift(temp1); |
835 out[15] = fdct_round_shift(temp2); | 948 out[15] = (tran_low_t)fdct_round_shift(temp2); |
836 } | 949 } |
837 | 950 |
838 static void fadst16(const tran_low_t *input, tran_low_t *output) { | 951 static void fadst16(const tran_low_t *input, tran_low_t *output) { |
839 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 952 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
840 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 953 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
841 | 954 |
842 tran_high_t x0 = input[15]; | 955 tran_high_t x0 = input[15]; |
843 tran_high_t x1 = input[0]; | 956 tran_high_t x1 = input[0]; |
844 tran_high_t x2 = input[13]; | 957 tran_high_t x2 = input[13]; |
845 tran_high_t x3 = input[2]; | 958 tran_high_t x3 = input[2]; |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
973 | 1086 |
974 x2 = fdct_round_shift(s2); | 1087 x2 = fdct_round_shift(s2); |
975 x3 = fdct_round_shift(s3); | 1088 x3 = fdct_round_shift(s3); |
976 x6 = fdct_round_shift(s6); | 1089 x6 = fdct_round_shift(s6); |
977 x7 = fdct_round_shift(s7); | 1090 x7 = fdct_round_shift(s7); |
978 x10 = fdct_round_shift(s10); | 1091 x10 = fdct_round_shift(s10); |
979 x11 = fdct_round_shift(s11); | 1092 x11 = fdct_round_shift(s11); |
980 x14 = fdct_round_shift(s14); | 1093 x14 = fdct_round_shift(s14); |
981 x15 = fdct_round_shift(s15); | 1094 x15 = fdct_round_shift(s15); |
982 | 1095 |
983 output[0] = x0; | 1096 output[0] = (tran_low_t)x0; |
984 output[1] = - x8; | 1097 output[1] = (tran_low_t)-x8; |
985 output[2] = x12; | 1098 output[2] = (tran_low_t)x12; |
986 output[3] = - x4; | 1099 output[3] = (tran_low_t)-x4; |
987 output[4] = x6; | 1100 output[4] = (tran_low_t)x6; |
988 output[5] = x14; | 1101 output[5] = (tran_low_t)x14; |
989 output[6] = x10; | 1102 output[6] = (tran_low_t)x10; |
990 output[7] = x2; | 1103 output[7] = (tran_low_t)x2; |
991 output[8] = x3; | 1104 output[8] = (tran_low_t)x3; |
992 output[9] = x11; | 1105 output[9] = (tran_low_t)x11; |
993 output[10] = x15; | 1106 output[10] = (tran_low_t)x15; |
994 output[11] = x7; | 1107 output[11] = (tran_low_t)x7; |
995 output[12] = x5; | 1108 output[12] = (tran_low_t)x5; |
996 output[13] = - x13; | 1109 output[13] = (tran_low_t)-x13; |
997 output[14] = x9; | 1110 output[14] = (tran_low_t)x9; |
998 output[15] = - x1; | 1111 output[15] = (tran_low_t)-x1; |
999 } | 1112 } |
1000 | 1113 |
1001 static const transform_2d FHT_16[] = { | 1114 static const transform_2d FHT_16[] = { |
1002 { fdct16, fdct16 }, // DCT_DCT = 0 | 1115 { fdct16, fdct16 }, // DCT_DCT = 0 |
1003 { fadst16, fdct16 }, // ADST_DCT = 1 | 1116 { fadst16, fdct16 }, // ADST_DCT = 1 |
1004 { fdct16, fadst16 }, // DCT_ADST = 2 | 1117 { fdct16, fadst16 }, // DCT_ADST = 2 |
1005 { fadst16, fadst16 } // ADST_ADST = 3 | 1118 { fadst16, fadst16 } // ADST_ADST = 3 |
1006 }; | 1119 }; |
1007 | 1120 |
1008 void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, | 1121 void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, |
(...skipping 388 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1397 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 1510 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
1398 } | 1511 } |
1399 | 1512 |
1400 // Rows | 1513 // Rows |
1401 for (i = 0; i < 32; ++i) { | 1514 for (i = 0; i < 32; ++i) { |
1402 tran_high_t temp_in[32], temp_out[32]; | 1515 tran_high_t temp_in[32], temp_out[32]; |
1403 for (j = 0; j < 32; ++j) | 1516 for (j = 0; j < 32; ++j) |
1404 temp_in[j] = output[j + i * 32]; | 1517 temp_in[j] = output[j + i * 32]; |
1405 fdct32(temp_in, temp_out, 0); | 1518 fdct32(temp_in, temp_out, 0); |
1406 for (j = 0; j < 32; ++j) | 1519 for (j = 0; j < 32; ++j) |
1407 out[j + i * 32] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; | 1520 out[j + i * 32] = |
| 1521 (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); |
1408 } | 1522 } |
1409 } | 1523 } |
1410 | 1524 |
1411 // Note that although we use dct_32_round in dct32 computation flow, | 1525 // Note that although we use dct_32_round in dct32 computation flow, |
1412 // this 2d fdct32x32 for rate-distortion optimization loop is operating | 1526 // this 2d fdct32x32 for rate-distortion optimization loop is operating |
1413 // within 16 bits precision. | 1527 // within 16 bits precision. |
1414 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { | 1528 void vp9_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, int stride) { |
1415 int i, j; | 1529 int i, j; |
1416 tran_high_t output[32 * 32]; | 1530 tran_high_t output[32 * 32]; |
1417 | 1531 |
(...skipping 10 matching lines...) Expand all Loading... |
1428 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; | 1542 output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; |
1429 } | 1543 } |
1430 | 1544 |
1431 // Rows | 1545 // Rows |
1432 for (i = 0; i < 32; ++i) { | 1546 for (i = 0; i < 32; ++i) { |
1433 tran_high_t temp_in[32], temp_out[32]; | 1547 tran_high_t temp_in[32], temp_out[32]; |
1434 for (j = 0; j < 32; ++j) | 1548 for (j = 0; j < 32; ++j) |
1435 temp_in[j] = output[j + i * 32]; | 1549 temp_in[j] = output[j + i * 32]; |
1436 fdct32(temp_in, temp_out, 1); | 1550 fdct32(temp_in, temp_out, 1); |
1437 for (j = 0; j < 32; ++j) | 1551 for (j = 0; j < 32; ++j) |
1438 out[j + i * 32] = temp_out[j]; | 1552 out[j + i * 32] = (tran_low_t)temp_out[j]; |
1439 } | 1553 } |
1440 } | 1554 } |
1441 | 1555 |
1442 #if CONFIG_VP9_HIGHBITDEPTH | 1556 #if CONFIG_VP9_HIGHBITDEPTH |
1443 void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, | 1557 void vp9_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, |
1444 int stride) { | 1558 int stride) { |
1445 vp9_fdct4x4_c(input, output, stride); | 1559 vp9_fdct4x4_c(input, output, stride); |
1446 } | 1560 } |
1447 | 1561 |
1448 void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, | 1562 void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1492 | 1606 |
1493 void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { | 1607 void vp9_highbd_fdct32x32_c(const int16_t *input, tran_low_t *out, int stride) { |
1494 vp9_fdct32x32_c(input, out, stride); | 1608 vp9_fdct32x32_c(input, out, stride); |
1495 } | 1609 } |
1496 | 1610 |
1497 void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, | 1611 void vp9_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *out, |
1498 int stride) { | 1612 int stride) { |
1499 vp9_fdct32x32_rd_c(input, out, stride); | 1613 vp9_fdct32x32_rd_c(input, out, stride); |
1500 } | 1614 } |
1501 #endif // CONFIG_VP9_HIGHBITDEPTH | 1615 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |