Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1228)

Unified Diff: source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c

Issue 484923003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vp9/common/vp9_rtcd_defs.pl ('k') | source/libvpx/vp9/decoder/vp9_decoder.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c
===================================================================
--- source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c (revision 291087)
+++ source/libvpx/vp9/common/x86/vp9_subpixel_8t_intrin_avx2.c (working copy)
@@ -307,7 +307,7 @@
__m256i addFilterReg64;
__m256i srcReg32b1, srcReg32b2, srcReg32b3, srcReg32b4, srcReg32b5;
__m256i srcReg32b6, srcReg32b7, srcReg32b8, srcReg32b9, srcReg32b10;
- __m256i srcReg32b11, srcReg32b12, srcReg32b13, filtersReg32;
+ __m256i srcReg32b11, srcReg32b12, filtersReg32;
__m256i firstFilters, secondFilters, thirdFilters, forthFilters;
unsigned int i;
unsigned int src_stride, dst_stride;
@@ -409,36 +409,36 @@
// multiply 2 adjacent elements with the filter and add the result
srcReg32b10 = _mm256_maddubs_epi16(srcReg32b10, firstFilters);
srcReg32b6 = _mm256_maddubs_epi16(srcReg32b4, forthFilters);
- srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
- srcReg32b8 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, srcReg32b6);
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b8);
-
// multiply 2 adjacent elements with the filter and add the result
srcReg32b8 = _mm256_maddubs_epi16(srcReg32b11, secondFilters);
- srcReg32b6 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
-
- // multiply 2 adjacent elements with the filter and add the result
srcReg32b12 = _mm256_maddubs_epi16(srcReg32b2, thirdFilters);
- srcReg32b13 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
-
// add and saturate the results together
srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
_mm256_min_epi16(srcReg32b8, srcReg32b12));
- srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
- _mm256_min_epi16(srcReg32b6, srcReg32b13));
+ srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
+ _mm256_max_epi16(srcReg32b8, srcReg32b12));
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b1 = _mm256_maddubs_epi16(srcReg32b1, firstFilters);
+ srcReg32b6 = _mm256_maddubs_epi16(srcReg32b7, forthFilters);
+
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1, srcReg32b6);
+
+ // multiply 2 adjacent elements with the filter and add the result
+ srcReg32b8 = _mm256_maddubs_epi16(srcReg32b3, secondFilters);
+ srcReg32b12 = _mm256_maddubs_epi16(srcReg32b5, thirdFilters);
+
// add and saturate the results together
- srcReg32b10 = _mm256_adds_epi16(srcReg32b10,
- _mm256_max_epi16(srcReg32b8, srcReg32b12));
srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
- _mm256_max_epi16(srcReg32b6, srcReg32b13));
+ _mm256_min_epi16(srcReg32b8, srcReg32b12));
+ srcReg32b1 = _mm256_adds_epi16(srcReg32b1,
+ _mm256_max_epi16(srcReg32b8, srcReg32b12));
-
srcReg32b10 = _mm256_adds_epi16(srcReg32b10, addFilterReg64);
srcReg32b1 = _mm256_adds_epi16(srcReg32b1, addFilterReg64);
« no previous file with comments | « source/libvpx/vp9/common/vp9_rtcd_defs.pl ('k') | source/libvpx/vp9/decoder/vp9_decoder.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698