| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
| 12 | 12 |
| 13 #include "vp9/common/vp9_common.h" | 13 #include "vp9/common/vp9_common.h" |
| 14 | 14 |
| 15 #if CONFIG_VP9_HIGHBITDEPTH | 15 #if CONFIG_VP9_HIGHBITDEPTH |
| 16 // from vp9_idct.h: typedef int32_t tran_low_t; | 16 // from vp9_idct.h: typedef int32_t tran_low_t; |
| 17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, | 17 void vp9_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, |
| 18 intptr_t count, | 18 intptr_t count, |
| 19 int skip_block, | 19 int skip_block, |
| 20 const int16_t *zbin_ptr, | 20 const int16_t *zbin_ptr, |
| 21 const int16_t *round_ptr, | 21 const int16_t *round_ptr, |
| 22 const int16_t *quant_ptr, | 22 const int16_t *quant_ptr, |
| 23 const int16_t *quant_shift_ptr, | 23 const int16_t *quant_shift_ptr, |
| 24 tran_low_t *qcoeff_ptr, | 24 tran_low_t *qcoeff_ptr, |
| 25 tran_low_t *dqcoeff_ptr, | 25 tran_low_t *dqcoeff_ptr, |
| 26 const int16_t *dequant_ptr, | 26 const int16_t *dequant_ptr, |
| 27 int zbin_oq_value, | |
| 28 uint16_t *eob_ptr, | 27 uint16_t *eob_ptr, |
| 29 const int16_t *scan, | 28 const int16_t *scan, |
| 30 const int16_t *iscan) { | 29 const int16_t *iscan) { |
| 31 int i, j, non_zero_regs = (int)count / 4, eob_i = -1; | 30 int i, j, non_zero_regs = (int)count / 4, eob_i = -1; |
| 32 __m128i zbins[2]; | 31 __m128i zbins[2]; |
| 33 __m128i nzbins[2]; | 32 __m128i nzbins[2]; |
| 34 | 33 |
| 35 zbins[0] = _mm_set_epi32((int)(zbin_ptr[1] + zbin_oq_value), | 34 zbins[0] = _mm_set_epi32((int)zbin_ptr[1], |
| 36 (int)(zbin_ptr[1] + zbin_oq_value), | 35 (int)zbin_ptr[1], |
| 37 (int)(zbin_ptr[1] + zbin_oq_value), | 36 (int)zbin_ptr[1], |
| 38 (int)(zbin_ptr[0] + zbin_oq_value)); | 37 (int)zbin_ptr[0]); |
| 39 zbins[1] = _mm_set1_epi32((int)(zbin_ptr[1] + zbin_oq_value)); | 38 zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); |
| 40 | 39 |
| 41 nzbins[0] = _mm_setzero_si128(); | 40 nzbins[0] = _mm_setzero_si128(); |
| 42 nzbins[1] = _mm_setzero_si128(); | 41 nzbins[1] = _mm_setzero_si128(); |
| 43 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); | 42 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); |
| 44 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); | 43 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); |
| 45 | 44 |
| 46 (void)scan; | 45 (void)scan; |
| 47 | 46 |
| 48 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); | 47 vpx_memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); |
| 49 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); | 48 vpx_memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 104 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, | 103 void vp9_highbd_quantize_b_32x32_sse2(const tran_low_t *coeff_ptr, |
| 105 intptr_t n_coeffs, | 104 intptr_t n_coeffs, |
| 106 int skip_block, | 105 int skip_block, |
| 107 const int16_t *zbin_ptr, | 106 const int16_t *zbin_ptr, |
| 108 const int16_t *round_ptr, | 107 const int16_t *round_ptr, |
| 109 const int16_t *quant_ptr, | 108 const int16_t *quant_ptr, |
| 110 const int16_t *quant_shift_ptr, | 109 const int16_t *quant_shift_ptr, |
| 111 tran_low_t *qcoeff_ptr, | 110 tran_low_t *qcoeff_ptr, |
| 112 tran_low_t *dqcoeff_ptr, | 111 tran_low_t *dqcoeff_ptr, |
| 113 const int16_t *dequant_ptr, | 112 const int16_t *dequant_ptr, |
| 114 int zbin_oq_value, | |
| 115 uint16_t *eob_ptr, | 113 uint16_t *eob_ptr, |
| 116 const int16_t *scan, | 114 const int16_t *scan, |
| 117 const int16_t *iscan) { | 115 const int16_t *iscan) { |
| 118 __m128i zbins[2]; | 116 __m128i zbins[2]; |
| 119 __m128i nzbins[2]; | 117 __m128i nzbins[2]; |
| 120 int idx = 0; | 118 int idx = 0; |
| 121 int idx_arr[1024]; | 119 int idx_arr[1024]; |
| 122 int i, eob = -1; | 120 int i, eob = -1; |
| 123 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0] + zbin_oq_value, 1); | 121 const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); |
| 124 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1] + zbin_oq_value, 1); | 122 const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); |
| 125 (void)scan; | 123 (void)scan; |
| 126 zbins[0] = _mm_set_epi32((zbin1_tmp + zbin_oq_value), | 124 zbins[0] = _mm_set_epi32(zbin1_tmp, |
| 127 (zbin1_tmp + zbin_oq_value), | 125 zbin1_tmp, |
| 128 (zbin1_tmp + zbin_oq_value), | 126 zbin1_tmp, |
| 129 (zbin0_tmp + zbin_oq_value)); | 127 zbin0_tmp); |
| 130 zbins[1] = _mm_set1_epi32((zbin1_tmp + zbin_oq_value)); | 128 zbins[1] = _mm_set1_epi32(zbin1_tmp); |
| 131 | 129 |
| 132 nzbins[0] = _mm_setzero_si128(); | 130 nzbins[0] = _mm_setzero_si128(); |
| 133 nzbins[1] = _mm_setzero_si128(); | 131 nzbins[1] = _mm_setzero_si128(); |
| 134 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); | 132 nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); |
| 135 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); | 133 nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); |
| 136 | 134 |
| 137 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); | 135 vpx_memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); |
| 138 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); | 136 vpx_memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); |
| 139 | 137 |
| 140 if (!skip_block) { | 138 if (!skip_block) { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 173 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; | 171 qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; |
| 174 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; | 172 dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; |
| 175 | 173 |
| 176 if (tmp) | 174 if (tmp) |
| 177 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; | 175 eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; |
| 178 } | 176 } |
| 179 } | 177 } |
| 180 *eob_ptr = eob + 1; | 178 *eob_ptr = eob + 1; |
| 181 } | 179 } |
| 182 #endif | 180 #endif |
| OLD | NEW |