Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(913)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_highbd_quantize_intrin_sse2.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <emmintrin.h> 11 #include <emmintrin.h>
12 12
13 #include "vp9/common/vp9_common.h" 13 #include "vp9/common/vp9_common.h"
14 14
15 #if CONFIG_VP9_HIGHBITDEPTH 15 #if CONFIG_VP9_HIGHBITDEPTH
16 // from vp9_idct.h: typedef int32_t tran_low_t; 16 // from vp9_idct.h: typedef int32_t tran_low_t;
17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, 17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr,
18 intptr_t count, 18 intptr_t count,
19 int skip_block, 19 int skip_block,
20 const int16_t *zbin_ptr, 20 const int16_t *zbin_ptr,
21 const int16_t *round_ptr, 21 const int16_t *round_ptr,
22 const int16_t *quant_ptr, 22 const int16_t *quant_ptr,
23 const int16_t *quant_shift_ptr, 23 const int16_t *quant_shift_ptr,
24 tran_low_t *qcoeff_ptr, 24 tran_low_t *qcoeff_ptr,
25 tran_low_t *dqcoeff_ptr, 25 tran_low_t *dqcoeff_ptr,
26 const int16_t *dequant_ptr, 26 const int16_t *dequant_ptr,
27 int zbin_oq_value,
28 uint16_t *eob_ptr, 27 uint16_t *eob_ptr,
29 const int16_t *scan, 28 const int16_t *scan,
30 const int16_t *iscan) { 29 const int16_t *iscan) {
31 int i, j, non_zero_regs = (int)count / 4, eob_i = -1; 30 int i, j, non_zero_regs = (int)count / 4, eob_i = -1;
32 __m128i zbins[2]; 31 __m128i zbins[2];
33 __m128i nzbins[2]; 32 __m128i nzbins[2];
34 33
35 zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value), 34 zbins[0] = _mm_set_epi32((int)zbin_ptr[1],
36 (int)(zbin_ptr[1] + zbin_oq_value), 35 (int)zbin_ptr[1],
37 (int)(zbin_ptr[1] + zbin_oq_value), 36 (int)zbin_ptr[1],
38 (int)(zbin_ptr[0] + zbin_oq_value)); 37 (int)zbin_ptr[0]);
39 zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value)); 38 zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]);
40 39
41 nzbins[0] = _mm_setzero_si128(); 40 nzbins[0] = _mm_setzero_si128();
42 nzbins[1] = _mm_setzero_si128(); 41 nzbins[1] = _mm_setzero_si128();
43 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); 42 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
44 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); 43 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
45 44
46 (void)scan; 45 (void)scan;
47 46
48 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); 47 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr));
49 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); 48 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr));
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
104 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, 103 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr,
105 intptr_t n_coeffs, 104 intptr_t n_coeffs,
106 int skip_block, 105 int skip_block,
107 const int16_t *zbin_ptr, 106 const int16_t *zbin_ptr,
108 const int16_t *round_ptr, 107 const int16_t *round_ptr,
109 const int16_t *quant_ptr, 108 const int16_t *quant_ptr,
110 const int16_t *quant_shift_ptr, 109 const int16_t *quant_shift_ptr,
111 tran_low_t *qcoeff_ptr, 110 tran_low_t *qcoeff_ptr,
112 tran_low_t *dqcoeff_ptr, 111 tran_low_t *dqcoeff_ptr,
113 const int16_t *dequant_ptr, 112 const int16_t *dequant_ptr,
114 int zbin_oq_value,
115 uint16_t *eob_ptr, 113 uint16_t *eob_ptr,
116 const int16_t *scan, 114 const int16_t *scan,
117 const int16_t *iscan) { 115 const int16_t *iscan) {
118 __m128i zbins[2]; 116 __m128i zbins[2];
119 __m128i nzbins[2]; 117 __m128i nzbins[2];
120 int idx = 0; 118 int idx = 0;
121 int idx_arr[1024]; 119 int idx_arr[1024];
122 int i, eob = -1; 120 int i, eob = -1;
123 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); 121 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1);
124 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); 122 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1);
125 (void)scan; 123 (void)scan;
126 zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value), 124 zbins[0] = _mm_set_epi32(zbin1_tmp,
127 (zbin1_tmp + zbin_oq_value), 125 zbin1_tmp,
128 (zbin1_tmp + zbin_oq_value), 126 zbin1_tmp,
129 (zbin0_tmp + zbin_oq_value)); 127 zbin0_tmp);
130 zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value)); 128 zbins[1] = _mm_set1_epi32(zbin1_tmp);
131 129
132 nzbins[0] = _mm_setzero_si128(); 130 nzbins[0] = _mm_setzero_si128();
133 nzbins[1] = _mm_setzero_si128(); 131 nzbins[1] = _mm_setzero_si128();
134 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); 132 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]);
135 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); 133 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]);
136 134
137 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); 135 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr));
138 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); 136 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
139 137
140 if (!skip_block) { 138 if (!skip_block) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; 171 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
174 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; 172 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
175 173
176 if (tmp) 174 if (tmp)
177 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; 175 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob;
178 } 176 }
179 } 177 }
180 *eob_ptr = eob + 1; 178 *eob_ptr = eob + 1;
181 } 179 }
182 #endif 180 #endif
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c ('k') | source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698