| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> |
| 11 #include <emmintrin.h> // SSE2 | 12 #include <emmintrin.h> // SSE2 |
| 12 #include "vp9/common/vp9_idct.h" // for cospi constants | 13 #include "vp9/common/vp9_idct.h" // for cospi constants |
| 13 #include "vp9/encoder/vp9_dct.h" | 14 #include "vp9/encoder/vp9_dct.h" |
| 14 #include "vp9/encoder/x86/vp9_dct_sse2.h" | 15 #include "vp9/encoder/x86/vp9_dct_sse2.h" |
| 15 #include "vpx_ports/mem.h" | 16 #include "vpx_ports/mem.h" |
| 16 | 17 |
| 17 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { | 18 void vp9_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { |
| 18 __m128i in0, in1; | 19 __m128i in0, in1; |
| 19 __m128i tmp; | 20 __m128i tmp; |
| 20 const __m128i zero = _mm_setzero_si128(); | 21 const __m128i zero = _mm_setzero_si128(); |
| (...skipping 582 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 603 | 604 |
| 604 // AC only loop | 605 // AC only loop |
| 605 index = 2; | 606 index = 2; |
| 606 while (n_coeffs < 0) { | 607 while (n_coeffs < 0) { |
| 607 __m128i coeff0, coeff1; | 608 __m128i coeff0, coeff1; |
| 608 { | 609 { |
| 609 __m128i coeff0_sign, coeff1_sign; | 610 __m128i coeff0_sign, coeff1_sign; |
| 610 __m128i qcoeff0, qcoeff1; | 611 __m128i qcoeff0, qcoeff1; |
| 611 __m128i qtmp0, qtmp1; | 612 __m128i qtmp0, qtmp1; |
| 612 | 613 |
| 614 assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1); |
| 613 coeff0 = *in[index]; | 615 coeff0 = *in[index]; |
| 614 coeff1 = *in[index + 1]; | 616 coeff1 = *in[index + 1]; |
| 615 | 617 |
| 616 // Poor man's sign extract | 618 // Poor man's sign extract |
| 617 coeff0_sign = _mm_srai_epi16(coeff0, 15); | 619 coeff0_sign = _mm_srai_epi16(coeff0, 15); |
| 618 coeff1_sign = _mm_srai_epi16(coeff1, 15); | 620 coeff1_sign = _mm_srai_epi16(coeff1, 15); |
| 619 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); | 621 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); |
| 620 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); | 622 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); |
| 621 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); | 623 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); |
| 622 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); | 624 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); |
| (...skipping 1793 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2416 | 2418 |
| 2417 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 | 2419 #define FDCT32x32_2D vp9_highbd_fdct32x32_sse2 |
| 2418 #define FDCT32x32_HIGH_PRECISION 1 | 2420 #define FDCT32x32_HIGH_PRECISION 1 |
| 2419 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT | 2421 #include "vp9/encoder/x86/vp9_dct32x32_sse2.c" // NOLINT |
| 2420 #undef FDCT32x32_2D | 2422 #undef FDCT32x32_2D |
| 2421 #undef FDCT32x32_HIGH_PRECISION | 2423 #undef FDCT32x32_HIGH_PRECISION |
| 2422 | 2424 |
| 2423 #undef DCT_HIGH_BIT_DEPTH | 2425 #undef DCT_HIGH_BIT_DEPTH |
| 2424 | 2426 |
| 2425 #endif // CONFIG_VP9_HIGHBITDEPTH | 2427 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |