| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> |
| 11 #if defined(_MSC_VER) && _MSC_VER <= 1500 | 12 #if defined(_MSC_VER) && _MSC_VER <= 1500 |
| 12 // Need to include math.h before calling tmmintrin.h/intrin.h | 13 // Need to include math.h before calling tmmintrin.h/intrin.h |
| 13 // in certain versions of MSVS. | 14 // in certain versions of MSVS. |
| 14 #include <math.h> | 15 #include <math.h> |
| 15 #endif | 16 #endif |
| 16 #include <tmmintrin.h> // SSSE3 | 17 #include <tmmintrin.h> // SSSE3 |
| 17 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" | 18 #include "vp9/common/x86/vp9_idct_intrin_sse2.h" |
| 18 | 19 |
| 19 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, | 20 void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride, |
| 20 int16_t* coeff_ptr, intptr_t n_coeffs, | 21 int16_t* coeff_ptr, intptr_t n_coeffs, |
| (...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 367 | 368 |
| 368 // AC only loop | 369 // AC only loop |
| 369 index = 2; | 370 index = 2; |
| 370 while (n_coeffs < 0) { | 371 while (n_coeffs < 0) { |
| 371 __m128i coeff0, coeff1; | 372 __m128i coeff0, coeff1; |
| 372 { | 373 { |
| 373 __m128i coeff0_sign, coeff1_sign; | 374 __m128i coeff0_sign, coeff1_sign; |
| 374 __m128i qcoeff0, qcoeff1; | 375 __m128i qcoeff0, qcoeff1; |
| 375 __m128i qtmp0, qtmp1; | 376 __m128i qtmp0, qtmp1; |
| 376 | 377 |
| 378 assert(index < (int)(sizeof(in) / sizeof(in[0])) - 1); |
| 377 coeff0 = *in[index]; | 379 coeff0 = *in[index]; |
| 378 coeff1 = *in[index + 1]; | 380 coeff1 = *in[index + 1]; |
| 379 | 381 |
| 380 // Poor man's sign extract | 382 // Poor man's sign extract |
| 381 coeff0_sign = _mm_srai_epi16(coeff0, 15); | 383 coeff0_sign = _mm_srai_epi16(coeff0, 15); |
| 382 coeff1_sign = _mm_srai_epi16(coeff1, 15); | 384 coeff1_sign = _mm_srai_epi16(coeff1, 15); |
| 383 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); | 385 qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); |
| 384 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); | 386 qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); |
| 385 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); | 387 qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); |
| 386 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); | 388 qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 445 do { | 447 do { |
| 446 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); | 448 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); |
| 447 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); | 449 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); |
| 448 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); | 450 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); |
| 449 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); | 451 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); |
| 450 n_coeffs += 8 * 2; | 452 n_coeffs += 8 * 2; |
| 451 } while (n_coeffs < 0); | 453 } while (n_coeffs < 0); |
| 452 *eob_ptr = 0; | 454 *eob_ptr = 0; |
| 453 } | 455 } |
| 454 } | 456 } |
| OLD | NEW |