OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
12 | 12 |
13 #include "vp9/common/vp9_common.h" | 13 #include "vp9/common/vp9_common.h" |
14 | 14 |
15 #if CONFIG_VP9_HIGHBITDEPTH | 15 #if CONFIG_VP9_HIGHBITDEPTH |
16 // from vp9_idct.h: typedef int32_t tran_low_t; | 16 // from vp9_idct.h: typedef int32_t tran_low_t; |
17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, | 17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, |
18 intptr_t count, | 18 intptr_t count, |
19 int skip_block, | 19 int skip_block, |
20 const int16_t *zbin_ptr, | 20 const int16_t *zbin_ptr, |
21 const int16_t *round_ptr, | 21 const int16_t *round_ptr, |
22 const int16_t *quant_ptr, | 22 const int16_t *quant_ptr, |
23 const int16_t *quant_shift_ptr, | 23 const int16_t *quant_shift_ptr, |
24 tran_low_t *qcoeff_ptr, | 24 tran_low_t *qcoeff_ptr, |
25 tran_low_t *dqcoeff_ptr, | 25 tran_low_t *dqcoeff_ptr, |
26 const int16_t *dequant_ptr, | 26 const int16_t *dequant_ptr, |
27 int zbin_oq_value, | |
28 uint16_t *eob_ptr, | 27 uint16_t *eob_ptr, |
29 const int16_t *scan, | 28 const int16_t *scan, |
30 const int16_t *iscan) { | 29 const int16_t *iscan) { |
31 int i, j, non_zero_regs = (int)count / 4, eob_i = -1; | 30 int i, j, non_zero_regs = (int)count / 4, eob_i = -1; |
32 __m128i zbins[2]; | 31 __m128i zbins[2]; |
33 __m128i nzbins[2]; | 32 __m128i nzbins[2]; |
34 | 33 |
35 zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value), | 34 zbins[0] = _mm_set_epi32((int)zbin_ptr[1], |
36 (int)(zbin_ptr[1] + zbin_oq_value), | 35 (int)zbin_ptr[1], |
37 (int)(zbin_ptr[1] + zbin_oq_value), | 36 (int)zbin_ptr[1], |
38 (int)(zbin_ptr[0] + zbin_oq_value)); | 37 (int)zbin_ptr[0]); |
39 zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value)); | 38 zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); |
40 | 39 |
41 nzbins[0] = _mm_setzero_si128(); | 40 nzbins[0] = _mm_setzero_si128(); |
42 nzbins[1] = _mm_setzero_si128(); | 41 nzbins[1] = _mm_setzero_si128(); |
43 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); | 42 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); |
44 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); | 43 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); |
45 | 44 |
46 (void)scan; | 45 (void)scan; |
47 | 46 |
48 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); | 47 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); |
49 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); | 48 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
104 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, | 103 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, |
105 intptr_t n_coeffs, | 104 intptr_t n_coeffs, |
106 int skip_block, | 105 int skip_block, |
107 const int16_t *zbin_ptr, | 106 const int16_t *zbin_ptr, |
108 const int16_t *round_ptr, | 107 const int16_t *round_ptr, |
109 const int16_t *quant_ptr, | 108 const int16_t *quant_ptr, |
110 const int16_t *quant_shift_ptr, | 109 const int16_t *quant_shift_ptr, |
111 tran_low_t *qcoeff_ptr, | 110 tran_low_t *qcoeff_ptr, |
112 tran_low_t *dqcoeff_ptr, | 111 tran_low_t *dqcoeff_ptr, |
113 const int16_t *dequant_ptr, | 112 const int16_t *dequant_ptr, |
114 int zbin_oq_value, | |
115 uint16_t *eob_ptr, | 113 uint16_t *eob_ptr, |
116 const int16_t *scan, | 114 const int16_t *scan, |
117 const int16_t *iscan) { | 115 const int16_t *iscan) { |
118 __m128i zbins[2]; | 116 __m128i zbins[2]; |
119 __m128i nzbins[2]; | 117 __m128i nzbins[2]; |
120 int idx = 0; | 118 int idx = 0; |
121 int idx_arr[1024]; | 119 int idx_arr[1024]; |
122 int i, eob = -1; | 120 int i, eob = -1; |
123 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); | 121 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); |
124 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); | 122 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); |
125 (void)scan; | 123 (void)scan; |
126 zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value), | 124 zbins[0] = _mm_set_epi32(zbin1_tmp, |
127 (zbin1_tmp + zbin_oq_value), | 125 zbin1_tmp, |
128 (zbin1_tmp + zbin_oq_value), | 126 zbin1_tmp, |
129 (zbin0_tmp + zbin_oq_value)); | 127 zbin0_tmp); |
130 zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value)); | 128 zbins[1] = _mm_set1_epi32(zbin1_tmp); |
131 | 129 |
132 nzbins[0] = _mm_setzero_si128(); | 130 nzbins[0] = _mm_setzero_si128(); |
133 nzbins[1] = _mm_setzero_si128(); | 131 nzbins[1] = _mm_setzero_si128(); |
134 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); | 132 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); |
135 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); | 133 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); |
136 | 134 |
137 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); | 135 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
138 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); | 136 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
139 | 137 |
140 if (!skip_block) { | 138 if (!skip_block) { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; | 171 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; |
174 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; | 172 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; |
175 | 173 |
176 if (tmp) | 174 if (tmp) |
177 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; | 175 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; |
178 } | 176 } |
179 } | 177 } |
180 *eob_ptr = eob + 1; | 178 *eob_ptr = eob + 1; |
181 } | 179 } |
182 #endif | 180 #endif |
OLD | NEW |