OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #if defined(_MSC_VER) && _MSC_VER <= 1500 | 11 #if defined(_MSC_VER) && _MSC_VER <= 1500 |
12 // Need to include math.h before calling tmmintrin.h/intrin.h | 12 // Need to include math.h before calling tmmintrin.h/intrin.h |
13 // in certain versions of MSVS. | 13 // in certain versions of MSVS. |
14 #include <math.h> | 14 #include <math.h> |
15 #endif | 15 #endif |
16 #include <tmmintrin.h> // SSSE3 | 16 #include <tmmintrin.h> // SSSE3 |
17 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" | 17 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" |
18 | 18 |
19 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, | 19 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, |
20 int16_t* coeff_ptr, intptr_t n_coeffs, | 20 int16_t* coeff_ptr, intptr_t n_coeffs, |
21 int skip_block, const int16_t* zbin_ptr, | 21 int skip_block, const int16_t* zbin_ptr, |
22 const int16_t* round_ptr, const int16_t* quant_ptr, | 22 const int16_t* round_ptr, const int16_t* quant_ptr, |
23 const int16_t* quant_shift_ptr, | 23 const int16_t* quant_shift_ptr, |
24 int16_t* qcoeff_ptr, | 24 int16_t* qcoeff_ptr, |
25 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 25 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
26 int zbin_oq_value, uint16_t* eob_ptr, | 26 uint16_t* eob_ptr, |
27 const int16_t* scan_ptr, | 27 const int16_t* scan_ptr, |
28 const int16_t* iscan_ptr) { | 28 const int16_t* iscan_ptr) { |
29 __m128i zero; | 29 __m128i zero; |
30 int pass; | 30 int pass; |
31 // Constants | 31 // Constants |
32 // When we use them, in one case, they are all the same. In all others | 32 // When we use them, in one case, they are all the same. In all others |
33 // it's a pair of them that we need to repeat four times. This is done | 33 // it's a pair of them that we need to repeat four times. This is done |
34 // by constructing the 32 bit constant corresponding to that pair. | 34 // by constructing the 32 bit constant corresponding to that pair. |
35 const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170); | 35 const __m128i k__dual_p16_p16 = dual_set_epi16(23170, 23170); |
36 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); | 36 const __m128i k__cospi_p16_p16 = _mm_set1_epi16((int16_t)cospi_16_64); |
(...skipping 13 matching lines...) Expand all Loading... |
50 __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); | 50 __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); |
51 __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); | 51 __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); |
52 __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); | 52 __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); |
53 __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); | 53 __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); |
54 __m128i *in[8]; | 54 __m128i *in[8]; |
55 int index = 0; | 55 int index = 0; |
56 | 56 |
57 (void)scan_ptr; | 57 (void)scan_ptr; |
58 (void)zbin_ptr; | 58 (void)zbin_ptr; |
59 (void)quant_shift_ptr; | 59 (void)quant_shift_ptr; |
60 (void)zbin_oq_value; | |
61 (void)coeff_ptr; | 60 (void)coeff_ptr; |
62 | 61 |
63 // Pre-condition input (shift by two) | 62 // Pre-condition input (shift by two) |
64 in0 = _mm_slli_epi16(in0, 2); | 63 in0 = _mm_slli_epi16(in0, 2); |
65 in1 = _mm_slli_epi16(in1, 2); | 64 in1 = _mm_slli_epi16(in1, 2); |
66 in2 = _mm_slli_epi16(in2, 2); | 65 in2 = _mm_slli_epi16(in2, 2); |
67 in3 = _mm_slli_epi16(in3, 2); | 66 in3 = _mm_slli_epi16(in3, 2); |
68 in4 = _mm_slli_epi16(in4, 2); | 67 in4 = _mm_slli_epi16(in4, 2); |
69 in5 = _mm_slli_epi16(in5, 2); | 68 in5 = _mm_slli_epi16(in5, 2); |
70 in6 = _mm_slli_epi16(in6, 2); | 69 in6 = _mm_slli_epi16(in6, 2); |
(...skipping 418 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
489 do { | 488 do { |
490 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); | 489 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); |
491 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); | 490 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); |
492 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); | 491 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); |
493 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); | 492 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); |
494 n_coeffs += 8 * 2; | 493 n_coeffs += 8 * 2; |
495 } while (n_coeffs < 0); | 494 } while (n_coeffs < 0); |
496 *eob_ptr = 0; | 495 *eob_ptr = 0; |
497 } | 496 } |
498 } | 497 } |
OLD | NEW |