Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(454)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "./vpx_config.h" 11 #include "./vpx_config.h"
12 12
13 #include "vp9/encoder/vp9_variance.h" 13 #include "vp9/encoder/vp9_variance.h"
14 #include "vp9/common/vp9_pragmas.h"
15 #include "vpx_ports/mem.h" 14 #include "vpx_ports/mem.h"
16 15
17 extern unsigned int vp9_get4x4var_mmx 16 typedef unsigned int (*variance_fn_t) (const unsigned char *src, int src_stride,
18 ( 17 const unsigned char *ref, int ref_stride,
19 const unsigned char *src_ptr, 18 unsigned int *sse, int *sum);
20 int source_stride,
21 const unsigned char *ref_ptr,
22 int recon_stride,
23 unsigned int *SSE,
24 int *Sum
25 );
26 19
27 unsigned int vp9_get16x16var_sse2 20 unsigned int vp9_get4x4var_mmx(const unsigned char *src, int src_stride,
28 ( 21 const unsigned char *ref, int ref_stride,
29 const unsigned char *src_ptr, 22 unsigned int *sse, int *sum);
30 int source_stride,
31 const unsigned char *ref_ptr,
32 int recon_stride,
33 unsigned int *SSE,
34 int *Sum
35 );
36 unsigned int vp9_get8x8var_sse2
37 (
38 const unsigned char *src_ptr,
39 int source_stride,
40 const unsigned char *ref_ptr,
41 int recon_stride,
42 unsigned int *SSE,
43 int *Sum
44 );
45 23
46 typedef unsigned int (*get_var_sse2) (
47 const unsigned char *src_ptr,
48 int source_stride,
49 const unsigned char *ref_ptr,
50 int recon_stride,
51 unsigned int *SSE,
52 int *Sum
53 );
54 24
55 static void variance_sse2(const unsigned char *src_ptr, int source_stride, 25 unsigned int vp9_get8x8var_sse2(const unsigned char *src, int src_stride,
56 const unsigned char *ref_ptr, int recon_stride, 26 const unsigned char *ref, int ref_stride,
57 int w, int h, unsigned int *sse, int *sum, 27 unsigned int *sse, int *sum);
58 get_var_sse2 var_fn, int block_size) { 28
59 unsigned int sse0; 29 unsigned int vp9_get16x16var_sse2(const unsigned char *src, int src_stride,
60 int sum0; 30 const unsigned char *ref, int ref_stride,
31 unsigned int *sse, int *sum);
32
33 static void variance_sse2(const unsigned char *src, int src_stride,
34 const unsigned char *ref, int ref_stride,
35 int w, int h, unsigned int *sse, int *sum,
36 variance_fn_t var_fn, int block_size) {
61 int i, j; 37 int i, j;
62 38
63 *sse = 0; 39 *sse = 0;
64 *sum = 0; 40 *sum = 0;
65 41
66 for (i = 0; i < h; i += block_size) { 42 for (i = 0; i < h; i += block_size) {
67 for (j = 0; j < w; j += block_size) { 43 for (j = 0; j < w; j += block_size) {
68 var_fn(src_ptr + source_stride * i + j, source_stride, 44 unsigned int sse0;
69 ref_ptr + recon_stride * i + j, recon_stride, &sse0, &sum0); 45 int sum0;
46 var_fn(src + src_stride * i + j, src_stride,
47 ref + ref_stride * i + j, ref_stride, &sse0, &sum0);
70 *sse += sse0; 48 *sse += sse0;
71 *sum += sum0; 49 *sum += sum0;
72 } 50 }
73 } 51 }
74 } 52 }
75 53
76 unsigned int vp9_variance4x4_sse2( 54 unsigned int vp9_variance4x4_sse2(const unsigned char *src, int src_stride,
77 const unsigned char *src_ptr, 55 const unsigned char *ref, int ref_stride,
78 int source_stride, 56 unsigned int *sse) {
79 const unsigned char *ref_ptr, 57 int sum;
80 int recon_stride, 58 variance_sse2(src, src_stride, ref, ref_stride, 4, 4,
81 unsigned int *sse) { 59 sse, &sum, vp9_get4x4var_mmx, 4);
82 unsigned int var; 60 return *sse - (((unsigned int)sum * sum) >> 4);
83 int avg;
84
85 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
86 &var, &avg, vp9_get4x4var_mmx, 4);
87 *sse = var;
88 return (var - (((unsigned int)avg * avg) >> 4));
89 } 61 }
90 62
91 unsigned int vp9_variance8x4_sse2(const uint8_t *src_ptr, 63 unsigned int vp9_variance8x4_sse2(const uint8_t *src, int src_stride,
92 int source_stride, 64 const uint8_t *ref, int ref_stride,
93 const uint8_t *ref_ptr,
94 int recon_stride,
95 unsigned int *sse) { 65 unsigned int *sse) {
96 unsigned int var; 66 int sum;
97 int avg; 67 variance_sse2(src, src_stride, ref, ref_stride, 8, 4,
98 68 sse, &sum, vp9_get4x4var_mmx, 4);
99 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, 69 return *sse - (((unsigned int)sum * sum) >> 5);
100 &var, &avg, vp9_get4x4var_mmx, 4);
101 *sse = var;
102 return (var - (((unsigned int)avg * avg) >> 5));
103 } 70 }
104 71
105 unsigned int vp9_variance4x8_sse2(const uint8_t *src_ptr, 72 unsigned int vp9_variance4x8_sse2(const uint8_t *src, int src_stride,
106 int source_stride, 73 const uint8_t *ref, int ref_stride,
107 const uint8_t *ref_ptr,
108 int recon_stride,
109 unsigned int *sse) { 74 unsigned int *sse) {
110 unsigned int var; 75 int sum;
111 int avg; 76 variance_sse2(src, src_stride, ref, ref_stride, 4, 8,
112 77 sse, &sum, vp9_get4x4var_mmx, 4);
113 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, 78 return *sse - (((unsigned int)sum * sum) >> 5);
114 &var, &avg, vp9_get4x4var_mmx, 4);
115 *sse = var;
116 return (var - (((unsigned int)avg * avg) >> 5));
117 } 79 }
118 80
119 unsigned int vp9_variance8x8_sse2 81 unsigned int vp9_variance8x8_sse2(const unsigned char *src, int src_stride,
120 ( 82 const unsigned char *ref, int ref_stride,
121 const unsigned char *src_ptr, 83 unsigned int *sse) {
122 int source_stride, 84 int sum;
123 const unsigned char *ref_ptr, 85 variance_sse2(src, src_stride, ref, ref_stride, 8, 8,
124 int recon_stride, 86 sse, &sum, vp9_get8x8var_sse2, 8);
125 unsigned int *sse) { 87 return *sse - (((unsigned int)sum * sum) >> 6);
126 unsigned int var;
127 int avg;
128
129 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
130 &var, &avg, vp9_get8x8var_sse2, 8);
131 *sse = var;
132 return (var - (((unsigned int)avg * avg) >> 6));
133 } 88 }
134 89
135 unsigned int vp9_variance16x8_sse2 90 unsigned int vp9_variance16x8_sse2(const unsigned char *src, int src_stride,
136 ( 91 const unsigned char *ref, int ref_stride,
137 const unsigned char *src_ptr, 92 unsigned int *sse) {
138 int source_stride, 93 int sum;
139 const unsigned char *ref_ptr, 94 variance_sse2(src, src_stride, ref, ref_stride, 16, 8,
140 int recon_stride, 95 sse, &sum, vp9_get8x8var_sse2, 8);
141 unsigned int *sse) { 96 return *sse - (((unsigned int)sum * sum) >> 7);
142 unsigned int var;
143 int avg;
144
145 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
146 &var, &avg, vp9_get8x8var_sse2, 8);
147 *sse = var;
148 return (var - (((unsigned int)avg * avg) >> 7));
149 } 97 }
150 98
151 unsigned int vp9_variance8x16_sse2 99 unsigned int vp9_variance8x16_sse2(const unsigned char *src, int src_stride,
152 ( 100 const unsigned char *ref, int ref_stride,
153 const unsigned char *src_ptr, 101 unsigned int *sse) {
154 int source_stride, 102 int sum;
155 const unsigned char *ref_ptr, 103 variance_sse2(src, src_stride, ref, ref_stride, 8, 16,
156 int recon_stride, 104 sse, &sum, vp9_get8x8var_sse2, 8);
157 unsigned int *sse) { 105 return *sse - (((unsigned int)sum * sum) >> 7);
158 unsigned int var;
159 int avg;
160
161 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
162 &var, &avg, vp9_get8x8var_sse2, 8);
163 *sse = var;
164 return (var - (((unsigned int)avg * avg) >> 7));
165 } 106 }
166 107
167 unsigned int vp9_variance16x16_sse2 108 unsigned int vp9_variance16x16_sse2(const unsigned char *src, int src_stride,
168 ( 109 const unsigned char *ref, int ref_stride,
169 const unsigned char *src_ptr, 110 unsigned int *sse) {
170 int source_stride, 111 int sum;
171 const unsigned char *ref_ptr, 112 variance_sse2(src, src_stride, ref, ref_stride, 16, 16,
172 int recon_stride, 113 sse, &sum, vp9_get16x16var_sse2, 16);
173 unsigned int *sse) { 114 return *sse - (((unsigned int)sum * sum) >> 8);
174 unsigned int var;
175 int avg;
176
177 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
178 &var, &avg, vp9_get16x16var_sse2, 16);
179 *sse = var;
180 return (var - (((unsigned int)avg * avg) >> 8));
181 } 115 }
182 116
183 unsigned int vp9_mse16x16_sse2( 117 unsigned int vp9_mse16x16_sse2(const unsigned char *src, int src_stride,
184 const unsigned char *src_ptr, 118 const unsigned char *ref, int ref_stride,
185 int source_stride, 119 unsigned int *sse) {
186 const unsigned char *ref_ptr, 120 int sum;
187 int recon_stride, 121 vp9_get16x16var_sse2(src, src_stride, ref, ref_stride, sse, &sum);
188 unsigned int *sse) { 122 return *sse;
189 unsigned int sse0;
190 int sum0;
191 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0,
192 &sum0);
193 *sse = sse0;
194 return sse0;
195 } 123 }
196 124
197 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, 125 unsigned int vp9_variance32x32_sse2(const uint8_t *src, int src_stride,
198 int source_stride, 126 const uint8_t *ref, int ref_stride,
199 const uint8_t *ref_ptr,
200 int recon_stride,
201 unsigned int *sse) { 127 unsigned int *sse) {
202 unsigned int var; 128 int sum;
203 int avg; 129 variance_sse2(src, src_stride, ref, ref_stride, 32, 32,
204 130 sse, &sum, vp9_get16x16var_sse2, 16);
205 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, 131 return *sse - (((int64_t)sum * sum) >> 10);
206 &var, &avg, vp9_get16x16var_sse2, 16);
207 *sse = var;
208 return (var - (((int64_t)avg * avg) >> 10));
209 } 132 }
210 133
211 unsigned int vp9_variance32x16_sse2(const uint8_t *src_ptr, 134 unsigned int vp9_variance32x16_sse2(const uint8_t *src, int src_stride,
212 int source_stride, 135 const uint8_t *ref, int ref_stride,
213 const uint8_t *ref_ptr,
214 int recon_stride,
215 unsigned int *sse) { 136 unsigned int *sse) {
216 unsigned int var; 137 int sum;
217 int avg; 138 variance_sse2(src, src_stride, ref, ref_stride, 32, 16,
218 139 sse, &sum, vp9_get16x16var_sse2, 16);
219 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, 140 return *sse - (((int64_t)sum * sum) >> 9);
220 &var, &avg, vp9_get16x16var_sse2, 16);
221 *sse = var;
222 return (var - (((int64_t)avg * avg) >> 9));
223 } 141 }
224 142
225 unsigned int vp9_variance16x32_sse2(const uint8_t *src_ptr, 143 unsigned int vp9_variance16x32_sse2(const uint8_t *src, int src_stride,
226 int source_stride, 144 const uint8_t *ref, int ref_stride,
227 const uint8_t *ref_ptr,
228 int recon_stride,
229 unsigned int *sse) { 145 unsigned int *sse) {
230 unsigned int var; 146 int sum;
231 int avg; 147 variance_sse2(src, src_stride, ref, ref_stride, 16, 32,
232 148 sse, &sum, vp9_get16x16var_sse2, 16);
233 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, 149 return *sse - (((int64_t)sum * sum) >> 9);
234 &var, &avg, vp9_get16x16var_sse2, 16);
235 *sse = var;
236 return (var - (((int64_t)avg * avg) >> 9));
237 } 150 }
238 151
239 unsigned int vp9_variance64x64_sse2(const uint8_t *src_ptr, 152 unsigned int vp9_variance64x64_sse2(const uint8_t *src, int src_stride,
240 int source_stride, 153 const uint8_t *ref, int ref_stride,
241 const uint8_t *ref_ptr,
242 int recon_stride,
243 unsigned int *sse) { 154 unsigned int *sse) {
244 unsigned int var; 155 int sum;
245 int avg; 156 variance_sse2(src, src_stride, ref, ref_stride, 64, 64,
246 157 sse, &sum, vp9_get16x16var_sse2, 16);
247 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, 158 return *sse - (((int64_t)sum * sum) >> 12);
248 &var, &avg, vp9_get16x16var_sse2, 16);
249 *sse = var;
250 return (var - (((int64_t)avg * avg) >> 12));
251 } 159 }
252 160
253 unsigned int vp9_variance64x32_sse2(const uint8_t *src_ptr, 161 unsigned int vp9_variance64x32_sse2(const uint8_t *src, int src_stride,
254 int source_stride, 162 const uint8_t *ref, int ref_stride,
255 const uint8_t *ref_ptr,
256 int recon_stride,
257 unsigned int *sse) { 163 unsigned int *sse) {
258 unsigned int var; 164 int sum;
259 int avg; 165 variance_sse2(src, src_stride, ref, ref_stride, 64, 32,
260 166 sse, &sum, vp9_get16x16var_sse2, 16);
261 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, 167 return *sse - (((int64_t)sum * sum) >> 11);
262 &var, &avg, vp9_get16x16var_sse2, 16);
263 *sse = var;
264 return (var - (((int64_t)avg * avg) >> 11));
265 } 168 }
266 169
267 unsigned int vp9_variance32x64_sse2(const uint8_t *src_ptr, 170 unsigned int vp9_variance32x64_sse2(const uint8_t *src, int src_stride,
268 int source_stride, 171 const uint8_t *ref, int ref_stride,
269 const uint8_t *ref_ptr,
270 int recon_stride,
271 unsigned int *sse) { 172 unsigned int *sse) {
272 unsigned int var; 173 int sum;
273 int avg; 174 variance_sse2(src, src_stride, ref, ref_stride, 32, 64,
274 175 sse, &sum, vp9_get16x16var_sse2, 16);
275 variance_sse2(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, 176 return *sse - (((int64_t)sum * sum) >> 11);
276 &var, &avg, vp9_get16x16var_sse2, 16);
277 *sse = var;
278 return (var - (((int64_t)avg * avg) >> 11));
279 } 177 }
280 178
281 #define DECL(w, opt) \ 179 #define DECL(w, opt) \
282 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 180 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
283 ptrdiff_t src_stride, \ 181 ptrdiff_t src_stride, \
284 int x_offset, int y_offset, \ 182 int x_offset, int y_offset, \
285 const uint8_t *dst, \ 183 const uint8_t *dst, \
286 ptrdiff_t dst_stride, \ 184 ptrdiff_t dst_stride, \
287 int height, unsigned int *sse) 185 int height, unsigned int *sse)
288 #define DECLS(opt1, opt2) \ 186 #define DECLS(opt1, opt2) \
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
427 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 325 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
428 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 326 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
429 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 327 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
430 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 328 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
431 329
432 FNS(sse2, sse); 330 FNS(sse2, sse);
433 FNS(ssse3, ssse3); 331 FNS(ssse3, ssse3);
434 332
435 #undef FNS 333 #undef FNS
436 #undef FN 334 #undef FN
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_variance_mmx.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698