Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_avx2.c

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 #include "./vp9_rtcd.h"
10 #include "./vpx_config.h" 11 #include "./vpx_config.h"
11 12
12 #include "vp9/encoder/vp9_variance.h" 13 #include "vp9/encoder/vp9_variance.h"
13 #include "vpx_ports/mem.h" 14 #include "vpx_ports/mem.h"
14 15
15 typedef void (*get_var_avx2)(const uint8_t *src, int src_stride,
16 const uint8_t *ref, int ref_stride,
17 unsigned int *sse, int *sum);
18
19 void vp9_get16x16var_avx2(const uint8_t *src, int src_stride,
20 const uint8_t *ref, int ref_stride,
21 unsigned int *sse, int *sum);
22
23 void vp9_get32x32var_avx2(const uint8_t *src, int src_stride,
24 const uint8_t *ref, int ref_stride,
25 unsigned int *sse, int *sum);
26
27 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride, 16 unsigned int vp9_sub_pixel_variance32xh_avx2(const uint8_t *src, int src_stride,
28 int x_offset, int y_offset, 17 int x_offset, int y_offset,
29 const uint8_t *dst, int dst_stride, 18 const uint8_t *dst, int dst_stride,
30 int height, 19 int height,
31 unsigned int *sse); 20 unsigned int *sse);
32 21
33 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src, 22 unsigned int vp9_sub_pixel_avg_variance32xh_avx2(const uint8_t *src,
34 int src_stride, 23 int src_stride,
35 int x_offset, 24 int x_offset,
36 int y_offset, 25 int y_offset,
37 const uint8_t *dst, 26 const uint8_t *dst,
38 int dst_stride, 27 int dst_stride,
39 const uint8_t *sec, 28 const uint8_t *sec,
40 int sec_stride, 29 int sec_stride,
41 int height, 30 int height,
42 unsigned int *sseptr); 31 unsigned int *sseptr);
43 32
44 static void variance_avx2(const uint8_t *src, int src_stride,
45 const uint8_t *ref, int ref_stride,
46 int w, int h, unsigned int *sse, int *sum,
47 get_var_avx2 var_fn, int block_size) {
48 int i, j;
49
50 *sse = 0;
51 *sum = 0;
52
53 for (i = 0; i < h; i += 16) {
54 for (j = 0; j < w; j += block_size) {
55 unsigned int sse0;
56 int sum0;
57 var_fn(&src[src_stride * i + j], src_stride,
58 &ref[ref_stride * i + j], ref_stride, &sse0, &sum0);
59 *sse += sse0;
60 *sum += sum0;
61 }
62 }
63 }
64
65
66 unsigned int vp9_variance16x16_avx2(const uint8_t *src, int src_stride,
67 const uint8_t *ref, int ref_stride,
68 unsigned int *sse) {
69 int sum;
70 variance_avx2(src, src_stride, ref, ref_stride, 16, 16,
71 sse, &sum, vp9_get16x16var_avx2, 16);
72 return *sse - (((unsigned int)sum * sum) >> 8);
73 }
74
75 unsigned int vp9_mse16x16_avx2(const uint8_t *src, int src_stride,
76 const uint8_t *ref, int ref_stride,
77 unsigned int *sse) {
78 int sum;
79 vp9_get16x16var_avx2(src, src_stride, ref, ref_stride, sse, &sum);
80 return *sse;
81 }
82
83 unsigned int vp9_variance32x16_avx2(const uint8_t *src, int src_stride,
84 const uint8_t *ref, int ref_stride,
85 unsigned int *sse) {
86 int sum;
87 variance_avx2(src, src_stride, ref, ref_stride, 32, 16,
88 sse, &sum, vp9_get32x32var_avx2, 32);
89 return *sse - (((int64_t)sum * sum) >> 9);
90 }
91
92 unsigned int vp9_variance32x32_avx2(const uint8_t *src, int src_stride,
93 const uint8_t *ref, int ref_stride,
94 unsigned int *sse) {
95 int sum;
96 variance_avx2(src, src_stride, ref, ref_stride, 32, 32,
97 sse, &sum, vp9_get32x32var_avx2, 32);
98 return *sse - (((int64_t)sum * sum) >> 10);
99 }
100
101 unsigned int vp9_variance64x64_avx2(const uint8_t *src, int src_stride,
102 const uint8_t *ref, int ref_stride,
103 unsigned int *sse) {
104 int sum;
105 variance_avx2(src, src_stride, ref, ref_stride, 64, 64,
106 sse, &sum, vp9_get32x32var_avx2, 32);
107 return *sse - (((int64_t)sum * sum) >> 12);
108 }
109
110 unsigned int vp9_variance64x32_avx2(const uint8_t *src, int src_stride,
111 const uint8_t *ref, int ref_stride,
112 unsigned int *sse) {
113 int sum;
114 variance_avx2(src, src_stride, ref, ref_stride, 64, 32,
115 sse, &sum, vp9_get32x32var_avx2, 32);
116 return *sse - (((int64_t)sum * sum) >> 11);
117 }
118
119 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src, 33 unsigned int vp9_sub_pixel_variance64x64_avx2(const uint8_t *src,
120 int src_stride, 34 int src_stride,
121 int x_offset, 35 int x_offset,
122 int y_offset, 36 int y_offset,
123 const uint8_t *dst, 37 const uint8_t *dst,
124 int dst_stride, 38 int dst_stride,
125 unsigned int *sse) { 39 unsigned int *sse) {
126 unsigned int sse1; 40 unsigned int sse1;
127 const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset, 41 const int se1 = vp9_sub_pixel_variance32xh_avx2(src, src_stride, x_offset,
128 y_offset, dst, dst_stride, 42 y_offset, dst, dst_stride,
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 const uint8_t *dst, 95 const uint8_t *dst,
182 int dst_stride, 96 int dst_stride,
183 unsigned int *sse, 97 unsigned int *sse,
184 const uint8_t *sec) { 98 const uint8_t *sec) {
185 // processing 32 element in parallel 99 // processing 32 element in parallel
186 const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset, 100 const int se = vp9_sub_pixel_avg_variance32xh_avx2(src, src_stride, x_offset,
187 y_offset, dst, dst_stride, 101 y_offset, dst, dst_stride,
188 sec, 32, 32, sse); 102 sec, 32, 32, sse);
189 return *sse - (((int64_t)se * se) >> 10); 103 return *sse - (((int64_t)se * se) >> 10);
190 } 104 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698