Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(307)

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
===================================================================
--- source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c (revision 290053)
+++ source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c (working copy)
@@ -67,7 +67,7 @@
#define LOAD_SRC_DST \
/* load source and destination */ \
src_reg = _mm256_loadu_si256((__m256i const *) (src)); \
- dst_reg = _mm256_load_si256((__m256i const *) (dst));
+ dst_reg = _mm256_loadu_si256((__m256i const *) (dst));
#define AVG_NEXT_SRC(src_reg, size_stride) \
src_next_reg = _mm256_loadu_si256((__m256i const *) \
@@ -333,7 +333,7 @@
if (y_offset == 0) {
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expend each byte to 2 bytes
@@ -347,7 +347,7 @@
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
AVG_NEXT_SRC(src_reg, src_stride)
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expend each byte to 2 bytes
@@ -369,7 +369,7 @@
MERGE_NEXT_SRC(src_reg, src_stride)
FILTER_SRC(filter)
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
MERGE_WITH_SRC(src_reg, zero_reg)
@@ -385,7 +385,7 @@
for (i = 0; i < height ; i++) {
LOAD_SRC_DST
AVG_NEXT_SRC(src_reg, 1)
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
sec+= sec_stride;
// expand each byte to 2 bytes
@@ -409,7 +409,7 @@
AVG_NEXT_SRC(src_reg, 1)
// average between previous average to current average
src_avg = _mm256_avg_epu8(src_avg, src_reg);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_avg = _mm256_avg_epu8(src_avg, sec_reg);
sec+= sec_stride;
// expand each byte to 2 bytes
@@ -437,7 +437,7 @@
MERGE_WITH_SRC(src_avg, src_reg)
FILTER_SRC(filter)
src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_avg = _mm256_avg_epu8(src_avg, sec_reg);
// expand each byte to 2 bytes
MERGE_WITH_SRC(src_avg, zero_reg)
@@ -459,7 +459,7 @@
MERGE_NEXT_SRC(src_reg, 1)
FILTER_SRC(filter)
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_reg = _mm256_avg_epu8(src_reg, sec_reg);
MERGE_WITH_SRC(src_reg, zero_reg)
sec+= sec_stride;
@@ -487,7 +487,7 @@
src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
// average between previous pack to the current
src_pack = _mm256_avg_epu8(src_pack, src_reg);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_pack = _mm256_avg_epu8(src_pack, sec_reg);
sec+= sec_stride;
MERGE_WITH_SRC(src_pack, zero_reg)
@@ -524,7 +524,7 @@
// filter the source
FILTER_SRC(yfilter)
src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi);
- sec_reg = _mm256_load_si256((__m256i const *) (sec));
+ sec_reg = _mm256_loadu_si256((__m256i const *) (sec));
src_pack = _mm256_avg_epu8(src_pack, sec_reg);
MERGE_WITH_SRC(src_pack, zero_reg)
src_pack = src_reg;
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_sad4d_intrin_avx2.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698