OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
12 #include <xmmintrin.h> | 12 #include <xmmintrin.h> |
13 | 13 |
14 #include "vpx/vpx_integer.h" | 14 #include "vpx/vpx_integer.h" |
15 | 15 |
16 void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, | 16 void vp9_quantize_b_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, |
17 int skip_block, const int16_t* zbin_ptr, | 17 int skip_block, const int16_t* zbin_ptr, |
18 const int16_t* round_ptr, const int16_t* quant_ptr, | 18 const int16_t* round_ptr, const int16_t* quant_ptr, |
19 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, | 19 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, |
20 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 20 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
21 int zbin_oq_value, uint16_t* eob_ptr, | 21 uint16_t* eob_ptr, |
22 const int16_t* scan_ptr, | 22 const int16_t* scan_ptr, |
23 const int16_t* iscan_ptr) { | 23 const int16_t* iscan_ptr) { |
24 __m128i zero; | 24 __m128i zero; |
25 (void)scan_ptr; | 25 (void)scan_ptr; |
26 | 26 |
27 coeff_ptr += n_coeffs; | 27 coeff_ptr += n_coeffs; |
28 iscan_ptr += n_coeffs; | 28 iscan_ptr += n_coeffs; |
29 qcoeff_ptr += n_coeffs; | 29 qcoeff_ptr += n_coeffs; |
30 dqcoeff_ptr += n_coeffs; | 30 dqcoeff_ptr += n_coeffs; |
31 n_coeffs = -n_coeffs; | 31 n_coeffs = -n_coeffs; |
32 zero = _mm_setzero_si128(); | 32 zero = _mm_setzero_si128(); |
33 if (!skip_block) { | 33 if (!skip_block) { |
34 __m128i eob; | 34 __m128i eob; |
35 __m128i zbin; | 35 __m128i zbin; |
36 __m128i round, quant, dequant, shift; | 36 __m128i round, quant, dequant, shift; |
37 { | 37 { |
38 __m128i coeff0, coeff1; | 38 __m128i coeff0, coeff1; |
39 | 39 |
40 // Setup global values | 40 // Setup global values |
41 { | 41 { |
42 __m128i zbin_oq; | |
43 __m128i pw_1; | 42 __m128i pw_1; |
44 zbin_oq = _mm_set1_epi16(zbin_oq_value); | |
45 zbin = _mm_load_si128((const __m128i*)zbin_ptr); | 43 zbin = _mm_load_si128((const __m128i*)zbin_ptr); |
46 round = _mm_load_si128((const __m128i*)round_ptr); | 44 round = _mm_load_si128((const __m128i*)round_ptr); |
47 quant = _mm_load_si128((const __m128i*)quant_ptr); | 45 quant = _mm_load_si128((const __m128i*)quant_ptr); |
48 zbin = _mm_add_epi16(zbin, zbin_oq); | |
49 pw_1 = _mm_set1_epi16(1); | 46 pw_1 = _mm_set1_epi16(1); |
50 zbin = _mm_sub_epi16(zbin, pw_1); | 47 zbin = _mm_sub_epi16(zbin, pw_1); |
51 dequant = _mm_load_si128((const __m128i*)dequant_ptr); | 48 dequant = _mm_load_si128((const __m128i*)dequant_ptr); |
52 shift = _mm_load_si128((const __m128i*)quant_shift_ptr); | 49 shift = _mm_load_si128((const __m128i*)quant_shift_ptr); |
53 } | 50 } |
54 | 51 |
55 { | 52 { |
56 __m128i coeff0_sign, coeff1_sign; | 53 __m128i coeff0_sign, coeff1_sign; |
57 __m128i qcoeff0, qcoeff1; | 54 __m128i qcoeff0, qcoeff1; |
58 __m128i qtmp0, qtmp1; | 55 __m128i qtmp0, qtmp1; |
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 } while (n_coeffs < 0); | 219 } while (n_coeffs < 0); |
223 *eob_ptr = 0; | 220 *eob_ptr = 0; |
224 } | 221 } |
225 } | 222 } |
226 | 223 |
227 void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, | 224 void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs, |
228 int skip_block, const int16_t* zbin_ptr, | 225 int skip_block, const int16_t* zbin_ptr, |
229 const int16_t* round_ptr, const int16_t* quant_ptr, | 226 const int16_t* round_ptr, const int16_t* quant_ptr, |
230 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, | 227 const int16_t* quant_shift_ptr, int16_t* qcoeff_ptr, |
231 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, | 228 int16_t* dqcoeff_ptr, const int16_t* dequant_ptr, |
232 int zbin_oq_value, uint16_t* eob_ptr, | 229 uint16_t* eob_ptr, |
233 const int16_t* scan_ptr, | 230 const int16_t* scan_ptr, |
234 const int16_t* iscan_ptr) { | 231 const int16_t* iscan_ptr) { |
235 __m128i zero; | 232 __m128i zero; |
236 (void)scan_ptr; | 233 (void)scan_ptr; |
237 (void)zbin_ptr; | 234 (void)zbin_ptr; |
238 (void)quant_shift_ptr; | 235 (void)quant_shift_ptr; |
239 (void)zbin_oq_value; | |
240 | 236 |
241 coeff_ptr += n_coeffs; | 237 coeff_ptr += n_coeffs; |
242 iscan_ptr += n_coeffs; | 238 iscan_ptr += n_coeffs; |
243 qcoeff_ptr += n_coeffs; | 239 qcoeff_ptr += n_coeffs; |
244 dqcoeff_ptr += n_coeffs; | 240 dqcoeff_ptr += n_coeffs; |
245 n_coeffs = -n_coeffs; | 241 n_coeffs = -n_coeffs; |
246 zero = _mm_setzero_si128(); | 242 zero = _mm_setzero_si128(); |
247 | 243 |
248 if (!skip_block) { | 244 if (!skip_block) { |
249 __m128i eob; | 245 __m128i eob; |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
398 do { | 394 do { |
399 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); | 395 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero); |
400 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); | 396 _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero); |
401 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); | 397 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero); |
402 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); | 398 _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero); |
403 n_coeffs += 8 * 2; | 399 n_coeffs += 8 * 2; |
404 } while (n_coeffs < 0); | 400 } while (n_coeffs < 0); |
405 *eob_ptr = 0; | 401 *eob_ptr = 0; |
406 } | 402 } |
407 } | 403 } |
OLD | NEW |