Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(252)

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
diff --git a/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index 679c66e30b86c843d08d0eb248659c5ee12a304a..00abd3c49622b83e73de14618220ae15ba5f935f 100644
--- a/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/source/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -230,6 +230,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
const int16_t* scan_ptr,
const int16_t* iscan_ptr) {
__m128i zero;
+ __m128i thr;
+ int16_t nzflag;
(void)scan_ptr;
(void)zbin_ptr;
(void)quant_shift_ptr;
@@ -316,6 +318,8 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
n_coeffs += 8 * 2;
}
+ thr = _mm_srai_epi16(dequant, 1);
+
// AC only loop
while (n_coeffs < 0) {
__m128i coeff0, coeff1;
@@ -335,28 +339,39 @@ void vp9_quantize_fp_sse2(const int16_t* coeff_ptr, intptr_t n_coeffs,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- qcoeff0 = _mm_adds_epi16(qcoeff0, round);
- qcoeff1 = _mm_adds_epi16(qcoeff1, round);
- qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
- qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
+ nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) |
+ _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr));
- // Reinsert signs
- qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
- qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
- qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
- qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
+ if (nzflag) {
+ qcoeff0 = _mm_adds_epi16(qcoeff0, round);
+ qcoeff1 = _mm_adds_epi16(qcoeff1, round);
+ qtmp0 = _mm_mulhi_epi16(qcoeff0, quant);
+ qtmp1 = _mm_mulhi_epi16(qcoeff1, quant);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ // Reinsert signs
+ qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign);
+ qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign);
+ qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
+ qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
- coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), qcoeff0);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
+ coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), coeff0);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ } else {
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(qcoeff_ptr + n_coeffs) + 1, zero);
+
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs), zero);
+ _mm_store_si128((__m128i*)(dqcoeff_ptr + n_coeffs) + 1, zero);
+ }
}
- {
+ if (nzflag) {
// Scan for eob
__m128i zero_coeff0, zero_coeff1;
__m128i nzero_coeff0, nzero_coeff1;
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_highbd_sad_sse2.asm ('k') | source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698