| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
| 12 #include <xmmintrin.h> | 12 #include <xmmintrin.h> |
| 13 | 13 |
| 14 #include "vpx/vpx_integer.h" | 14 #include "vpx/vpx_integer.h" |
| 15 | 15 |
| 16 void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, | 16 void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, |
| 17 int skip_block, const int16_t* zbin_ptr, | 17 int skip_block, const int16_t* zbin_ptr, |
| 18 const int16_t* round_ptr, const int16_t* quant_ptr, | 18 const int16_t* round_ptr, const int16_t* quant_ptr, |
| 19 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, | 19 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, |
| 20 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 20 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
| 21 int zbin_oq_value, uint16_t* eob_ptr, | 21 uint16_t* eob_ptr, |
| 22 const int16_t* scan_ptr, | 22 const int16_t* scan_ptr, |
| 23 const int16_t* iscan_ptr) { | 23 const int16_t* iscan_ptr) { |
| 24 __m128i zero; | 24 __m128i zero; |
| 25 (void)scan_ptr; | 25 (void)scan_ptr; |
| 26 | 26 |
| 27 coeff_ptr += n_coeffs; | 27 coeff_ptr += n_coeffs; |
| 28 iscan_ptr += n_coeffs; | 28 iscan_ptr += n_coeffs; |
| 29 qcoeff_ptr += n_coeffs; | 29 qcoeff_ptr += n_coeffs; |
| 30 dqcoeff_ptr += n_coeffs; | 30 dqcoeff_ptr += n_coeffs; |
| 31 n_coeffs = -n_coeffs; | 31 n_coeffs = -n_coeffs; |
| 32 zero = _mm_setzero_si128(); | 32 zero = _mm_setzero_si128(); |
| 33 if (!skip_block) { | 33 if (!skip_block) { |
| 34 __m128i eob; | 34 __m128i eob; |
| 35 __m128i zbin; | 35 __m128i zbin; |
| 36 __m128i round, quant, dequant, shift; | 36 __m128i round, quant, dequant, shift; |
| 37 { | 37 { |
| 38 __m128i coeff0, coeff1; | 38 __m128i coeff0, coeff1; |
| 39 | 39 |
| 40 // Setup global values | 40 // Setup global values |
| 41 { | 41 { |
| 42 __m128i zbin_oq; | |
| 43 __m128i pw_1; | 42 __m128i pw_1; |
| 44 zbin_oq = _mm_set1_epi16(zbin_oq_value); | |
| 45 zbin = _mm_load_si128((const __m128i*)zbin_ptr); | 43 zbin = _mm_load_si128((const __m128i*)zbin_ptr); |
| 46 round = _mm_load_si128((const __m128i*)round_ptr); | 44 round = _mm_load_si128((const __m128i*)round_ptr); |
| 47 quant = _mm_load_si128((const __m128i*)quant_ptr); | 45 quant = _mm_load_si128((const __m128i*)quant_ptr); |
| 48 zbin = _mm_add_epi16(zbin, zbin_oq); | |
| 49 pw_1 = _mm_set1_epi16(1); | 46 pw_1 = _mm_set1_epi16(1); |
| 50 zbin = _mm_sub_epi16(zbin, pw_1); | 47 zbin = _mm_sub_epi16(zbin, pw_1); |
| 51 dequant = _mm_load_si128((const __m128i*)dequant_ptr); | 48 dequant = _mm_load_si128((const __m128i*)dequant_ptr); |
| 52 shift = _mm_load_si128((const __m128i*)quant_shift_ptr); | 49 shift = _mm_load_si128((const __m128i*)quant_shift_ptr); |
| 53 } | 50 } |
| 54 | 51 |
| 55 { | 52 { |
| 56 __m128i coeff0_sign, coeff1_sign; | 53 __m128i coeff0_sign, coeff1_sign; |
| 57 __m128i qcoeff0, qcoeff1; | 54 __m128i qcoeff0, qcoeff1; |
| 58 __m128i qtmp0, qtmp1; | 55 __m128i qtmp0, qtmp1; |
| (...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 } while (n_coeffs < 0); | 219 } while (n_coeffs < 0); |
| 223 *eob_ptr = 0; | 220 *eob_ptr = 0; |
| 224 } | 221 } |
| 225 } | 222 } |
| 226 | 223 |
| 227 void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, | 224 void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, |
| 228 int skip_block, const int16_t* zbin_ptr, | 225 int skip_block, const int16_t* zbin_ptr, |
| 229 const int16_t* round_ptr, const int16_t* quant_ptr, | 226 const int16_t* round_ptr, const int16_t* quant_ptr, |
| 230 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, | 227 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, |
| 231 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 228 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
| 232 int zbin_oq_value, uint16_t* eob_ptr, | 229 uint16_t* eob_ptr, |
| 233 const int16_t* scan_ptr, | 230 const int16_t* scan_ptr, |
| 234 const int16_t* iscan_ptr) { | 231 const int16_t* iscan_ptr) { |
| 235 __m128i zero; | 232 __m128i zero; |
| 236 (void)scan_ptr; | 233 (void)scan_ptr; |
| 237 (void)zbin_ptr; | 234 (void)zbin_ptr; |
| 238 (void)quant_shift_ptr; | 235 (void)quant_shift_ptr; |
| 239 (void)zbin_oq_value; | |
| 240 | 236 |
| 241 coeff_ptr += n_coeffs; | 237 coeff_ptr += n_coeffs; |
| 242 iscan_ptr += n_coeffs; | 238 iscan_ptr += n_coeffs; |
| 243 qcoeff_ptr += n_coeffs; | 239 qcoeff_ptr += n_coeffs; |
| 244 dqcoeff_ptr += n_coeffs; | 240 dqcoeff_ptr += n_coeffs; |
| 245 n_coeffs = -n_coeffs; | 241 n_coeffs = -n_coeffs; |
| 246 zero = _mm_setzero_si128(); | 242 zero = _mm_setzero_si128(); |
| 247 | 243 |
| 248 if (!skip_block) { | 244 if (!skip_block) { |
| 249 __m128i eob; | 245 __m128i eob; |
| (...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 398 do { | 394 do { |
| 399 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); | 395 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); |
| 400 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); | 396 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); |
| 401 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); | 397 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); |
| 402 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); | 398 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); |
| 403 n_coeffs += 8 * 2; | 399 n_coeffs += 8 * 2; |
| 404 } while (n_coeffs < 0); | 400 } while (n_coeffs < 0); |
| 405 *eob_ptr = 0; | 401 *eob_ptr = 0; |
| 406 } | 402 } |
| 407 } | 403 } |
| OLD | NEW |