| Index: source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c (revision 292072)
|
| +++ source/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c (working copy)
|
| @@ -3573,6 +3573,7 @@
|
| int stride) {
|
| const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
| const __m128i final_rounding = _mm_set1_epi16(1<<5);
|
| + const __m128i zero = _mm_setzero_si128();
|
|
|
| // idct constants for each stage
|
| const __m128i stg1_0 = pair_set_epi16(cospi_31_64, -cospi_1_64);
|
| @@ -3635,7 +3636,6 @@
|
| stp2_30, stp2_31;
|
| __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
| int i, j, i32;
|
| - int zero_flag[2];
|
|
|
| for (i = 0; i < 4; i++) {
|
| i32 = (i << 5);
|
| @@ -3710,13 +3710,7 @@
|
| zero_idx[13] = _mm_or_si128(zero_idx[10], zero_idx[11]);
|
| zero_idx[14] = _mm_or_si128(zero_idx[12], zero_idx[13]);
|
|
|
| - zero_idx[0] = _mm_unpackhi_epi64(zero_idx[14], zero_idx[14]);
|
| - zero_idx[1] = _mm_or_si128(zero_idx[0], zero_idx[14]);
|
| - zero_idx[2] = _mm_srli_epi64(zero_idx[1], 32);
|
| - zero_flag[0] = _mm_cvtsi128_si32(zero_idx[1]);
|
| - zero_flag[1] = _mm_cvtsi128_si32(zero_idx[2]);
|
| -
|
| - if (!zero_flag[0] && !zero_flag[1]) {
|
| + if (_mm_movemask_epi8(_mm_cmpeq_epi32(zero_idx[14], zero)) == 0xFFFF) {
|
| col[i32 + 0] = _mm_setzero_si128();
|
| col[i32 + 1] = _mm_setzero_si128();
|
| col[i32 + 2] = _mm_setzero_si128();
|
| @@ -3795,7 +3789,6 @@
|
| col[i32 + 31] = _mm_sub_epi16(stp1_0, stp1_31);
|
| }
|
| for (i = 0; i < 4; i++) {
|
| - const __m128i zero = _mm_setzero_si128();
|
| // Second 1-D idct
|
| j = i << 3;
|
|
|
|
|