| Index: source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
|
| ===================================================================
|
| --- source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c (revision 290053)
|
| +++ source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c (working copy)
|
| @@ -67,7 +67,7 @@
|
| #define LOAD_SRC_DST \
|
| /* load source and destination */ \
|
| src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
|
| - dst_reg = _mm256_load_si256((__m256i const *) (dst));
|
| + dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
|
|
|
| #define AVG_NEXT_SRC(src_reg, size_stride) \
|
| src_next_reg = _mm256_loadu_si256((__m256i const *) \
|
| @@ -333,7 +333,7 @@
|
| if (y_offset == 0) {
|
| for (i = 0; i < height ; i++) {
|
| LOAD_SRC_DST
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_reg = _mm256_avg_epu8(src_reg, sec_reg);
|
| sec+= sec_stride;
|
| // expend each byte to 2 bytes
|
| @@ -347,7 +347,7 @@
|
| for (i = 0; i < height ; i++) {
|
| LOAD_SRC_DST
|
| AVG_NEXT_SRC(src_reg, src_stride)
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_reg = _mm256_avg_epu8(src_reg, sec_reg);
|
| sec+= sec_stride;
|
| // expend each byte to 2 bytes
|
| @@ -369,7 +369,7 @@
|
| MERGE_NEXT_SRC(src_reg, src_stride)
|
| FILTER_SRC(filter)
|
| src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_reg = _mm256_avg_epu8(src_reg, sec_reg);
|
| sec+= sec_stride;
|
| MERGE_WITH_SRC(src_reg, zero_reg)
|
| @@ -385,7 +385,7 @@
|
| for (i = 0; i < height ; i++) {
|
| LOAD_SRC_DST
|
| AVG_NEXT_SRC(src_reg, 1)
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_reg = _mm256_avg_epu8(src_reg, sec_reg);
|
| sec+= sec_stride;
|
| // expand each byte to 2 bytes
|
| @@ -409,7 +409,7 @@
|
| AVG_NEXT_SRC(src_reg, 1)
|
| // average between previous average to current average
|
| src_avg = _mm256_avg_epu8(src_avg, src_reg);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_avg = _mm256_avg_epu8(src_avg, sec_reg);
|
| sec+= sec_stride;
|
| // expand each byte to 2 bytes
|
| @@ -437,7 +437,7 @@
|
| MERGE_WITH_SRC(src_avg, src_reg)
|
| FILTER_SRC(filter)
|
| src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_avg = _mm256_avg_epu8(src_avg, sec_reg);
|
| // expand each byte to 2 bytes
|
| MERGE_WITH_SRC(src_avg, zero_reg)
|
| @@ -459,7 +459,7 @@
|
| MERGE_NEXT_SRC(src_reg, 1)
|
| FILTER_SRC(filter)
|
| src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_reg = _mm256_avg_epu8(src_reg, sec_reg);
|
| MERGE_WITH_SRC(src_reg, zero_reg)
|
| sec+= sec_stride;
|
| @@ -487,7 +487,7 @@
|
| src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
|
| // average between previous pack to the current
|
| src_pack = _mm256_avg_epu8(src_pack, src_reg);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_pack = _mm256_avg_epu8(src_pack, sec_reg);
|
| sec+= sec_stride;
|
| MERGE_WITH_SRC(src_pack, zero_reg)
|
| @@ -524,7 +524,7 @@
|
| // filter the source
|
| FILTER_SRC(yfilter)
|
| src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
|
| - sec_reg = _mm256_load_si256((__m256i const *) (sec));
|
| + sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
|
| src_pack = _mm256_avg_epu8(src_pack, sec_reg);
|
| MERGE_WITH_SRC(src_pack, zero_reg)
|
| src_pack = src_reg;
|
|
|