OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license | |
5 * that can be found in the LICENSE file in the root of the source | |
6 * tree. An additional intellectual property rights grant can be found | |
7 * in the file PATENTS. All contributing project authors may | |
8 * be found in the AUTHORS file in the root of the source tree. | |
9 */ | |
10 | |
11 #include "./vp8_rtcd.h" | |
12 #include "vpx_config.h" | |
13 #include "vp8/common/variance.h" | |
14 #include "vpx_ports/mem.h" | |
15 #include "vp8/common/x86/filter_x86.h" | |
16 | |
17 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *
output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned
int output_height, unsigned int output_width, short *filter); | |
18 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_pt
r, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_he
ight, unsigned int output_width, short *filter); | |
19 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short
*output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigne
d int output_height, unsigned int output_width, short *filter); | |
20 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_
ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_
height, unsigned int output_width, short *filter); | |
21 | |
22 extern void vp8_filter_block2d_bil4x4_var_mmx | |
23 ( | |
24 const unsigned char *ref_ptr, | |
25 int ref_pixels_per_line, | |
26 const unsigned char *src_ptr, | |
27 int src_pixels_per_line, | |
28 const short *HFilter, | |
29 const short *VFilter, | |
30 int *sum, | |
31 unsigned int *sumsquared | |
32 ); | |
33 | |
34 void vp8_filter_block2d_bil_var_sse2 | |
35 ( | |
36 const unsigned char *ref_ptr, | |
37 int ref_pixels_per_line, | |
38 const unsigned char *src_ptr, | |
39 int src_pixels_per_line, | |
40 unsigned int Height, | |
41 int xoffset, | |
42 int yoffset, | |
43 int *sum, | |
44 unsigned int *sumsquared | |
45 ); | |
46 void vp8_half_horiz_vert_variance8x_h_sse2 | |
47 ( | |
48 const unsigned char *ref_ptr, | |
49 int ref_pixels_per_line, | |
50 const unsigned char *src_ptr, | |
51 int src_pixels_per_line, | |
52 unsigned int Height, | |
53 int *sum, | |
54 unsigned int *sumsquared | |
55 ); | |
56 void vp8_half_horiz_vert_variance16x_h_sse2 | |
57 ( | |
58 const unsigned char *ref_ptr, | |
59 int ref_pixels_per_line, | |
60 const unsigned char *src_ptr, | |
61 int src_pixels_per_line, | |
62 unsigned int Height, | |
63 int *sum, | |
64 unsigned int *sumsquared | |
65 ); | |
66 void vp8_half_horiz_variance8x_h_sse2 | |
67 ( | |
68 const unsigned char *ref_ptr, | |
69 int ref_pixels_per_line, | |
70 const unsigned char *src_ptr, | |
71 int src_pixels_per_line, | |
72 unsigned int Height, | |
73 int *sum, | |
74 unsigned int *sumsquared | |
75 ); | |
76 void vp8_half_horiz_variance16x_h_sse2 | |
77 ( | |
78 const unsigned char *ref_ptr, | |
79 int ref_pixels_per_line, | |
80 const unsigned char *src_ptr, | |
81 int src_pixels_per_line, | |
82 unsigned int Height, | |
83 int *sum, | |
84 unsigned int *sumsquared | |
85 ); | |
86 void vp8_half_vert_variance8x_h_sse2 | |
87 ( | |
88 const unsigned char *ref_ptr, | |
89 int ref_pixels_per_line, | |
90 const unsigned char *src_ptr, | |
91 int src_pixels_per_line, | |
92 unsigned int Height, | |
93 int *sum, | |
94 unsigned int *sumsquared | |
95 ); | |
96 void vp8_half_vert_variance16x_h_sse2 | |
97 ( | |
98 const unsigned char *ref_ptr, | |
99 int ref_pixels_per_line, | |
100 const unsigned char *src_ptr, | |
101 int src_pixels_per_line, | |
102 unsigned int Height, | |
103 int *sum, | |
104 unsigned int *sumsquared | |
105 ); | |
106 | |
107 unsigned int vp8_sub_pixel_variance4x4_wmt | |
108 ( | |
109 const unsigned char *src_ptr, | |
110 int src_pixels_per_line, | |
111 int xoffset, | |
112 int yoffset, | |
113 const unsigned char *dst_ptr, | |
114 int dst_pixels_per_line, | |
115 unsigned int *sse | |
116 ) | |
117 { | |
118 int xsum; | |
119 unsigned int xxsum; | |
120 vp8_filter_block2d_bil4x4_var_mmx( | |
121 src_ptr, src_pixels_per_line, | |
122 dst_ptr, dst_pixels_per_line, | |
123 vp8_bilinear_filters_x86_4[xoffset], vp8_bilinear_filters_x86_4[yoffset]
, | |
124 &xsum, &xxsum | |
125 ); | |
126 *sse = xxsum; | |
127 return (xxsum - (((unsigned int)xsum * xsum) >> 4)); | |
128 } | |
129 | |
130 | |
131 unsigned int vp8_sub_pixel_variance8x8_wmt | |
132 ( | |
133 const unsigned char *src_ptr, | |
134 int src_pixels_per_line, | |
135 int xoffset, | |
136 int yoffset, | |
137 const unsigned char *dst_ptr, | |
138 int dst_pixels_per_line, | |
139 unsigned int *sse | |
140 ) | |
141 { | |
142 int xsum; | |
143 unsigned int xxsum; | |
144 | |
145 if (xoffset == 4 && yoffset == 0) | |
146 { | |
147 vp8_half_horiz_variance8x_h_sse2( | |
148 src_ptr, src_pixels_per_line, | |
149 dst_ptr, dst_pixels_per_line, 8, | |
150 &xsum, &xxsum); | |
151 } | |
152 else if (xoffset == 0 && yoffset == 4) | |
153 { | |
154 vp8_half_vert_variance8x_h_sse2( | |
155 src_ptr, src_pixels_per_line, | |
156 dst_ptr, dst_pixels_per_line, 8, | |
157 &xsum, &xxsum); | |
158 } | |
159 else if (xoffset == 4 && yoffset == 4) | |
160 { | |
161 vp8_half_horiz_vert_variance8x_h_sse2( | |
162 src_ptr, src_pixels_per_line, | |
163 dst_ptr, dst_pixels_per_line, 8, | |
164 &xsum, &xxsum); | |
165 } | |
166 else | |
167 { | |
168 vp8_filter_block2d_bil_var_sse2( | |
169 src_ptr, src_pixels_per_line, | |
170 dst_ptr, dst_pixels_per_line, 8, | |
171 xoffset, yoffset, | |
172 &xsum, &xxsum); | |
173 } | |
174 | |
175 *sse = xxsum; | |
176 return (xxsum - (((unsigned int)xsum * xsum) >> 6)); | |
177 } | |
178 | |
179 unsigned int vp8_sub_pixel_variance16x16_wmt | |
180 ( | |
181 const unsigned char *src_ptr, | |
182 int src_pixels_per_line, | |
183 int xoffset, | |
184 int yoffset, | |
185 const unsigned char *dst_ptr, | |
186 int dst_pixels_per_line, | |
187 unsigned int *sse | |
188 ) | |
189 { | |
190 int xsum0, xsum1; | |
191 unsigned int xxsum0, xxsum1; | |
192 | |
193 | |
194 /* note we could avoid these if statements if the calling function | |
195 * just called the appropriate functions inside. | |
196 */ | |
197 if (xoffset == 4 && yoffset == 0) | |
198 { | |
199 vp8_half_horiz_variance16x_h_sse2( | |
200 src_ptr, src_pixels_per_line, | |
201 dst_ptr, dst_pixels_per_line, 16, | |
202 &xsum0, &xxsum0); | |
203 } | |
204 else if (xoffset == 0 && yoffset == 4) | |
205 { | |
206 vp8_half_vert_variance16x_h_sse2( | |
207 src_ptr, src_pixels_per_line, | |
208 dst_ptr, dst_pixels_per_line, 16, | |
209 &xsum0, &xxsum0); | |
210 } | |
211 else if (xoffset == 4 && yoffset == 4) | |
212 { | |
213 vp8_half_horiz_vert_variance16x_h_sse2( | |
214 src_ptr, src_pixels_per_line, | |
215 dst_ptr, dst_pixels_per_line, 16, | |
216 &xsum0, &xxsum0); | |
217 } | |
218 else | |
219 { | |
220 vp8_filter_block2d_bil_var_sse2( | |
221 src_ptr, src_pixels_per_line, | |
222 dst_ptr, dst_pixels_per_line, 16, | |
223 xoffset, yoffset, | |
224 &xsum0, &xxsum0 | |
225 ); | |
226 | |
227 vp8_filter_block2d_bil_var_sse2( | |
228 src_ptr + 8, src_pixels_per_line, | |
229 dst_ptr + 8, dst_pixels_per_line, 16, | |
230 xoffset, yoffset, | |
231 &xsum1, &xxsum1 | |
232 ); | |
233 xsum0 += xsum1; | |
234 xxsum0 += xxsum1; | |
235 } | |
236 | |
237 *sse = xxsum0; | |
238 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
239 } | |
240 | |
241 unsigned int vp8_sub_pixel_mse16x16_wmt( | |
242 const unsigned char *src_ptr, | |
243 int src_pixels_per_line, | |
244 int xoffset, | |
245 int yoffset, | |
246 const unsigned char *dst_ptr, | |
247 int dst_pixels_per_line, | |
248 unsigned int *sse | |
249 ) | |
250 { | |
251 vp8_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffs
et, dst_ptr, dst_pixels_per_line, sse); | |
252 return *sse; | |
253 } | |
254 | |
255 unsigned int vp8_sub_pixel_variance16x8_wmt | |
256 ( | |
257 const unsigned char *src_ptr, | |
258 int src_pixels_per_line, | |
259 int xoffset, | |
260 int yoffset, | |
261 const unsigned char *dst_ptr, | |
262 int dst_pixels_per_line, | |
263 unsigned int *sse | |
264 | |
265 ) | |
266 { | |
267 int xsum0, xsum1; | |
268 unsigned int xxsum0, xxsum1; | |
269 | |
270 if (xoffset == 4 && yoffset == 0) | |
271 { | |
272 vp8_half_horiz_variance16x_h_sse2( | |
273 src_ptr, src_pixels_per_line, | |
274 dst_ptr, dst_pixels_per_line, 8, | |
275 &xsum0, &xxsum0); | |
276 } | |
277 else if (xoffset == 0 && yoffset == 4) | |
278 { | |
279 vp8_half_vert_variance16x_h_sse2( | |
280 src_ptr, src_pixels_per_line, | |
281 dst_ptr, dst_pixels_per_line, 8, | |
282 &xsum0, &xxsum0); | |
283 } | |
284 else if (xoffset == 4 && yoffset == 4) | |
285 { | |
286 vp8_half_horiz_vert_variance16x_h_sse2( | |
287 src_ptr, src_pixels_per_line, | |
288 dst_ptr, dst_pixels_per_line, 8, | |
289 &xsum0, &xxsum0); | |
290 } | |
291 else | |
292 { | |
293 vp8_filter_block2d_bil_var_sse2( | |
294 src_ptr, src_pixels_per_line, | |
295 dst_ptr, dst_pixels_per_line, 8, | |
296 xoffset, yoffset, | |
297 &xsum0, &xxsum0); | |
298 | |
299 vp8_filter_block2d_bil_var_sse2( | |
300 src_ptr + 8, src_pixels_per_line, | |
301 dst_ptr + 8, dst_pixels_per_line, 8, | |
302 xoffset, yoffset, | |
303 &xsum1, &xxsum1); | |
304 xsum0 += xsum1; | |
305 xxsum0 += xxsum1; | |
306 } | |
307 | |
308 *sse = xxsum0; | |
309 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7)); | |
310 } | |
311 | |
312 unsigned int vp8_sub_pixel_variance8x16_wmt | |
313 ( | |
314 const unsigned char *src_ptr, | |
315 int src_pixels_per_line, | |
316 int xoffset, | |
317 int yoffset, | |
318 const unsigned char *dst_ptr, | |
319 int dst_pixels_per_line, | |
320 unsigned int *sse | |
321 ) | |
322 { | |
323 int xsum; | |
324 unsigned int xxsum; | |
325 | |
326 if (xoffset == 4 && yoffset == 0) | |
327 { | |
328 vp8_half_horiz_variance8x_h_sse2( | |
329 src_ptr, src_pixels_per_line, | |
330 dst_ptr, dst_pixels_per_line, 16, | |
331 &xsum, &xxsum); | |
332 } | |
333 else if (xoffset == 0 && yoffset == 4) | |
334 { | |
335 vp8_half_vert_variance8x_h_sse2( | |
336 src_ptr, src_pixels_per_line, | |
337 dst_ptr, dst_pixels_per_line, 16, | |
338 &xsum, &xxsum); | |
339 } | |
340 else if (xoffset == 4 && yoffset == 4) | |
341 { | |
342 vp8_half_horiz_vert_variance8x_h_sse2( | |
343 src_ptr, src_pixels_per_line, | |
344 dst_ptr, dst_pixels_per_line, 16, | |
345 &xsum, &xxsum); | |
346 } | |
347 else | |
348 { | |
349 vp8_filter_block2d_bil_var_sse2( | |
350 src_ptr, src_pixels_per_line, | |
351 dst_ptr, dst_pixels_per_line, 16, | |
352 xoffset, yoffset, | |
353 &xsum, &xxsum); | |
354 } | |
355 | |
356 *sse = xxsum; | |
357 return (xxsum - (((unsigned int)xsum * xsum) >> 7)); | |
358 } | |
359 | |
360 | |
361 unsigned int vp8_variance_halfpixvar16x16_h_wmt( | |
362 const unsigned char *src_ptr, | |
363 int src_pixels_per_line, | |
364 const unsigned char *dst_ptr, | |
365 int dst_pixels_per_line, | |
366 unsigned int *sse) | |
367 { | |
368 int xsum0; | |
369 unsigned int xxsum0; | |
370 | |
371 vp8_half_horiz_variance16x_h_sse2( | |
372 src_ptr, src_pixels_per_line, | |
373 dst_ptr, dst_pixels_per_line, 16, | |
374 &xsum0, &xxsum0); | |
375 | |
376 *sse = xxsum0; | |
377 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
378 } | |
379 | |
380 | |
381 unsigned int vp8_variance_halfpixvar16x16_v_wmt( | |
382 const unsigned char *src_ptr, | |
383 int src_pixels_per_line, | |
384 const unsigned char *dst_ptr, | |
385 int dst_pixels_per_line, | |
386 unsigned int *sse) | |
387 { | |
388 int xsum0; | |
389 unsigned int xxsum0; | |
390 vp8_half_vert_variance16x_h_sse2( | |
391 src_ptr, src_pixels_per_line, | |
392 dst_ptr, dst_pixels_per_line, 16, | |
393 &xsum0, &xxsum0); | |
394 | |
395 *sse = xxsum0; | |
396 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
397 } | |
398 | |
399 | |
400 unsigned int vp8_variance_halfpixvar16x16_hv_wmt( | |
401 const unsigned char *src_ptr, | |
402 int src_pixels_per_line, | |
403 const unsigned char *dst_ptr, | |
404 int dst_pixels_per_line, | |
405 unsigned int *sse) | |
406 { | |
407 int xsum0; | |
408 unsigned int xxsum0; | |
409 | |
410 vp8_half_horiz_vert_variance16x_h_sse2( | |
411 src_ptr, src_pixels_per_line, | |
412 dst_ptr, dst_pixels_per_line, 16, | |
413 &xsum0, &xxsum0); | |
414 | |
415 *sse = xxsum0; | |
416 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | |
417 } | |
OLD | NEW |