Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(860)

Unified Diff: source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c

Issue 181493009: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c
===================================================================
--- source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c (revision 254565)
+++ source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c (working copy)
@@ -42,6 +42,32 @@
int *Sum
);
+unsigned int vp9_sub_pixel_variance32xh_avx2
+(
+ const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ int height,
+ unsigned int *sse
+);
+
+unsigned int vp9_sub_pixel_avg_variance32xh_avx2
+(
+ const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ const uint8_t *sec,
+ int sec_stride,
+ int height,
+ unsigned int *sseptr
+);
+
static void variance_avx2(const unsigned char *src_ptr, int source_stride,
const unsigned char *ref_ptr, int recon_stride,
int w, int h, unsigned int *sse, int *sum,
@@ -155,3 +181,88 @@
*sse = var;
return (var - (((int64_t)avg * avg) >> 11));
}
+
+unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse_ptr) {
+ // processing 32 elements in parallel
+ unsigned int sse;
+ int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 64, &sse);
+ // processing the next 32 elements in parallel
+ unsigned int sse2;
+ int se2 = vp9_sub_pixel_variance32xh_avx2(src + 32, src_stride,
+ x_offset, y_offset,
+ dst + 32, dst_stride,
+ 64, &sse2);
+ se += se2;
+ sse += sse2;
+ *sse_ptr = sse;
+ return sse - (((int64_t)se * se) >> 12);
+}
+
+unsigned int vp9_sub_pixel_variance32x32_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sse_ptr) {
+ // processing 32 element in parallel
+ unsigned int sse;
+ int se = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ 32, &sse);
+ *sse_ptr = sse;
+ return sse - (((int64_t)se * se) >> 10);
+}
+
+unsigned int vp9_sub_pixel_avg_variance64x64_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sseptr,
+ const uint8_t *sec) {
+ // processing 32 elements in parallel
+ unsigned int sse;
+
+ int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ sec, 64, 64, &sse);
+ unsigned int sse2;
+ // processing the next 32 elements in parallel
+ int se2 = vp9_sub_pixel_avg_variance32xh_avx2(src + 32, src_stride, x_offset,
+ y_offset, dst + 32, dst_stride,
+ sec + 32, 64, 64, &sse2);
+ se += se2;
+ sse += sse2;
+ *sseptr = sse;
+
+ return sse - (((int64_t)se * se) >> 12);
+}
+
+unsigned int vp9_sub_pixel_avg_variance32x32_avx2(const uint8_t *src,
+ int src_stride,
+ int x_offset,
+ int y_offset,
+ const uint8_t *dst,
+ int dst_stride,
+ unsigned int *sseptr,
+ const uint8_t *sec) {
+ // processing 32 element in parallel
+ unsigned int sse;
+ int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
+ y_offset, dst, dst_stride,
+ sec, 32, 32, &sse);
+ *sseptr = sse;
+ return sse - (((int64_t)se * se) >> 10);
+}
+
+
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698