Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Issue 11555023: libvpx: Add VP9 decoder. (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
Property Changes:
Added: svn:eol-style
+ LF
OLDNEW
(Empty)
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "vpx_config.h"
12 #include "vp9/encoder/vp9_variance.h"
13 #include "vp9/common/vp9_pragmas.h"
14 #include "vpx_ports/mem.h"
15
16 #define HALFNDX 8
17
18 extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short * output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
19 extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_pt r, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_he ight, unsigned int output_width, short *vp7_filter);
20 extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigne d int output_height, unsigned int output_width, short *vp7_filter);
21 extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_ height, unsigned int output_width, short *vp7_filter);
22
23 extern void vp9_filter_block2d_bil4x4_var_mmx
24 (
25 const unsigned char *ref_ptr,
26 int ref_pixels_per_line,
27 const unsigned char *src_ptr,
28 int src_pixels_per_line,
29 const short *HFilter,
30 const short *VFilter,
31 int *sum,
32 unsigned int *sumsquared
33 );
34
35 extern unsigned int vp9_get4x4var_mmx
36 (
37 const unsigned char *src_ptr,
38 int source_stride,
39 const unsigned char *ref_ptr,
40 int recon_stride,
41 unsigned int *SSE,
42 int *Sum
43 );
44
45 unsigned int vp9_get_mb_ss_sse2
46 (
47 const short *src_ptr
48 );
49 unsigned int vp9_get16x16var_sse2
50 (
51 const unsigned char *src_ptr,
52 int source_stride,
53 const unsigned char *ref_ptr,
54 int recon_stride,
55 unsigned int *SSE,
56 int *Sum
57 );
58 unsigned int vp9_get8x8var_sse2
59 (
60 const unsigned char *src_ptr,
61 int source_stride,
62 const unsigned char *ref_ptr,
63 int recon_stride,
64 unsigned int *SSE,
65 int *Sum
66 );
67 void vp9_filter_block2d_bil_var_sse2
68 (
69 const unsigned char *ref_ptr,
70 int ref_pixels_per_line,
71 const unsigned char *src_ptr,
72 int src_pixels_per_line,
73 unsigned int Height,
74 int xoffset,
75 int yoffset,
76 int *sum,
77 unsigned int *sumsquared
78 );
79 void vp9_half_horiz_vert_variance8x_h_sse2
80 (
81 const unsigned char *ref_ptr,
82 int ref_pixels_per_line,
83 const unsigned char *src_ptr,
84 int src_pixels_per_line,
85 unsigned int Height,
86 int *sum,
87 unsigned int *sumsquared
88 );
89 void vp9_half_horiz_vert_variance16x_h_sse2
90 (
91 const unsigned char *ref_ptr,
92 int ref_pixels_per_line,
93 const unsigned char *src_ptr,
94 int src_pixels_per_line,
95 unsigned int Height,
96 int *sum,
97 unsigned int *sumsquared
98 );
99 void vp9_half_horiz_variance8x_h_sse2
100 (
101 const unsigned char *ref_ptr,
102 int ref_pixels_per_line,
103 const unsigned char *src_ptr,
104 int src_pixels_per_line,
105 unsigned int Height,
106 int *sum,
107 unsigned int *sumsquared
108 );
109 void vp9_half_horiz_variance16x_h_sse2
110 (
111 const unsigned char *ref_ptr,
112 int ref_pixels_per_line,
113 const unsigned char *src_ptr,
114 int src_pixels_per_line,
115 unsigned int Height,
116 int *sum,
117 unsigned int *sumsquared
118 );
119 void vp9_half_vert_variance8x_h_sse2
120 (
121 const unsigned char *ref_ptr,
122 int ref_pixels_per_line,
123 const unsigned char *src_ptr,
124 int src_pixels_per_line,
125 unsigned int Height,
126 int *sum,
127 unsigned int *sumsquared
128 );
129 void vp9_half_vert_variance16x_h_sse2
130 (
131 const unsigned char *ref_ptr,
132 int ref_pixels_per_line,
133 const unsigned char *src_ptr,
134 int src_pixels_per_line,
135 unsigned int Height,
136 int *sum,
137 unsigned int *sumsquared
138 );
139
140 DECLARE_ALIGNED(16, extern const short, vp9_bilinear_filters_mmx[16][8]);
141
142 unsigned int vp9_variance4x4_wmt(
143 const unsigned char *src_ptr,
144 int source_stride,
145 const unsigned char *ref_ptr,
146 int recon_stride,
147 unsigned int *sse) {
148 unsigned int var;
149 int avg;
150
151 vp9_get4x4var_mmx(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
152 *sse = var;
153 return (var - (((unsigned int)avg * avg) >> 4));
154
155 }
156
157 unsigned int vp9_variance8x8_wmt
158 (
159 const unsigned char *src_ptr,
160 int source_stride,
161 const unsigned char *ref_ptr,
162 int recon_stride,
163 unsigned int *sse) {
164 unsigned int var;
165 int avg;
166
167 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &var, &avg);
168 *sse = var;
169 return (var - (((unsigned int)avg * avg) >> 6));
170
171 }
172
173
174 unsigned int vp9_variance16x16_wmt
175 (
176 const unsigned char *src_ptr,
177 int source_stride,
178 const unsigned char *ref_ptr,
179 int recon_stride,
180 unsigned int *sse) {
181 unsigned int sse0;
182 int sum0;
183
184
185 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su m0);
186 *sse = sse0;
187 return (sse0 - (((unsigned int)sum0 * sum0) >> 8));
188 }
189 unsigned int vp9_mse16x16_wmt(
190 const unsigned char *src_ptr,
191 int source_stride,
192 const unsigned char *ref_ptr,
193 int recon_stride,
194 unsigned int *sse) {
195
196 unsigned int sse0;
197 int sum0;
198 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &su m0);
199 *sse = sse0;
200 return sse0;
201
202 }
203
204
205 unsigned int vp9_variance16x8_wmt
206 (
207 const unsigned char *src_ptr,
208 int source_stride,
209 const unsigned char *ref_ptr,
210 int recon_stride,
211 unsigned int *sse) {
212 unsigned int sse0, sse1, var;
213 int sum0, sum1, avg;
214
215 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0 );
216 vp9_get8x8var_sse2(src_ptr + 8, source_stride, ref_ptr + 8, recon_stride, &sse 1, &sum1);
217
218 var = sse0 + sse1;
219 avg = sum0 + sum1;
220 *sse = var;
221 return (var - (((unsigned int)avg * avg) >> 7));
222
223 }
224
225 unsigned int vp9_variance8x16_wmt
226 (
227 const unsigned char *src_ptr,
228 int source_stride,
229 const unsigned char *ref_ptr,
230 int recon_stride,
231 unsigned int *sse) {
232 unsigned int sse0, sse1, var;
233 int sum0, sum1, avg;
234
235 vp9_get8x8var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, &sum0 );
236 vp9_get8x8var_sse2(src_ptr + 8 * source_stride, source_stride, ref_ptr + 8 * r econ_stride, recon_stride, &sse1, &sum1);
237
238 var = sse0 + sse1;
239 avg = sum0 + sum1;
240 *sse = var;
241 return (var - (((unsigned int)avg * avg) >> 7));
242
243 }
244
245 unsigned int vp9_sub_pixel_variance4x4_wmt
246 (
247 const unsigned char *src_ptr,
248 int src_pixels_per_line,
249 int xoffset,
250 int yoffset,
251 const unsigned char *dst_ptr,
252 int dst_pixels_per_line,
253 unsigned int *sse
254 ) {
255 int xsum;
256 unsigned int xxsum;
257 vp9_filter_block2d_bil4x4_var_mmx(
258 src_ptr, src_pixels_per_line,
259 dst_ptr, dst_pixels_per_line,
260 vp9_bilinear_filters_mmx[xoffset], vp9_bilinear_filters_mmx[yoffset],
261 &xsum, &xxsum
262 );
263 *sse = xxsum;
264 return (xxsum - (((unsigned int)xsum * xsum) >> 4));
265 }
266
267
268 unsigned int vp9_sub_pixel_variance8x8_wmt
269 (
270 const unsigned char *src_ptr,
271 int src_pixels_per_line,
272 int xoffset,
273 int yoffset,
274 const unsigned char *dst_ptr,
275 int dst_pixels_per_line,
276 unsigned int *sse
277 ) {
278 int xsum;
279 unsigned int xxsum;
280
281 if (xoffset == HALFNDX && yoffset == 0) {
282 vp9_half_horiz_variance8x_h_sse2(
283 src_ptr, src_pixels_per_line,
284 dst_ptr, dst_pixels_per_line, 8,
285 &xsum, &xxsum);
286 } else if (xoffset == 0 && yoffset == HALFNDX) {
287 vp9_half_vert_variance8x_h_sse2(
288 src_ptr, src_pixels_per_line,
289 dst_ptr, dst_pixels_per_line, 8,
290 &xsum, &xxsum);
291 } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
292 vp9_half_horiz_vert_variance8x_h_sse2(
293 src_ptr, src_pixels_per_line,
294 dst_ptr, dst_pixels_per_line, 8,
295 &xsum, &xxsum);
296 } else {
297 vp9_filter_block2d_bil_var_sse2(
298 src_ptr, src_pixels_per_line,
299 dst_ptr, dst_pixels_per_line, 8,
300 xoffset, yoffset,
301 &xsum, &xxsum);
302 }
303
304 *sse = xxsum;
305 return (xxsum - (((unsigned int)xsum * xsum) >> 6));
306 }
307
308 unsigned int vp9_sub_pixel_variance16x16_wmt
309 (
310 const unsigned char *src_ptr,
311 int src_pixels_per_line,
312 int xoffset,
313 int yoffset,
314 const unsigned char *dst_ptr,
315 int dst_pixels_per_line,
316 unsigned int *sse
317 ) {
318 int xsum0, xsum1;
319 unsigned int xxsum0, xxsum1;
320
321
322 // note we could avoid these if statements if the calling function
323 // just called the appropriate functions inside.
324 if (xoffset == HALFNDX && yoffset == 0) {
325 vp9_half_horiz_variance16x_h_sse2(
326 src_ptr, src_pixels_per_line,
327 dst_ptr, dst_pixels_per_line, 16,
328 &xsum0, &xxsum0);
329 } else if (xoffset == 0 && yoffset == HALFNDX) {
330 vp9_half_vert_variance16x_h_sse2(
331 src_ptr, src_pixels_per_line,
332 dst_ptr, dst_pixels_per_line, 16,
333 &xsum0, &xxsum0);
334 } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
335 vp9_half_horiz_vert_variance16x_h_sse2(
336 src_ptr, src_pixels_per_line,
337 dst_ptr, dst_pixels_per_line, 16,
338 &xsum0, &xxsum0);
339 } else {
340 vp9_filter_block2d_bil_var_sse2(
341 src_ptr, src_pixels_per_line,
342 dst_ptr, dst_pixels_per_line, 16,
343 xoffset, yoffset,
344 &xsum0, &xxsum0
345 );
346
347 vp9_filter_block2d_bil_var_sse2(
348 src_ptr + 8, src_pixels_per_line,
349 dst_ptr + 8, dst_pixels_per_line, 16,
350 xoffset, yoffset,
351 &xsum1, &xxsum1
352 );
353 xsum0 += xsum1;
354 xxsum0 += xxsum1;
355 }
356
357 *sse = xxsum0;
358 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
359 }
360
361 unsigned int vp9_sub_pixel_mse16x16_wmt(
362 const unsigned char *src_ptr,
363 int src_pixels_per_line,
364 int xoffset,
365 int yoffset,
366 const unsigned char *dst_ptr,
367 int dst_pixels_per_line,
368 unsigned int *sse
369 ) {
370 vp9_sub_pixel_variance16x16_wmt(src_ptr, src_pixels_per_line, xoffset, yoffset , dst_ptr, dst_pixels_per_line, sse);
371 return *sse;
372 }
373
374 unsigned int vp9_sub_pixel_variance16x8_wmt
375 (
376 const unsigned char *src_ptr,
377 int src_pixels_per_line,
378 int xoffset,
379 int yoffset,
380 const unsigned char *dst_ptr,
381 int dst_pixels_per_line,
382 unsigned int *sse
383
384 ) {
385 int xsum0, xsum1;
386 unsigned int xxsum0, xxsum1;
387
388 if (xoffset == HALFNDX && yoffset == 0) {
389 vp9_half_horiz_variance16x_h_sse2(
390 src_ptr, src_pixels_per_line,
391 dst_ptr, dst_pixels_per_line, 8,
392 &xsum0, &xxsum0);
393 } else if (xoffset == 0 && yoffset == HALFNDX) {
394 vp9_half_vert_variance16x_h_sse2(
395 src_ptr, src_pixels_per_line,
396 dst_ptr, dst_pixels_per_line, 8,
397 &xsum0, &xxsum0);
398 } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
399 vp9_half_horiz_vert_variance16x_h_sse2(
400 src_ptr, src_pixels_per_line,
401 dst_ptr, dst_pixels_per_line, 8,
402 &xsum0, &xxsum0);
403 } else {
404 vp9_filter_block2d_bil_var_sse2(
405 src_ptr, src_pixels_per_line,
406 dst_ptr, dst_pixels_per_line, 8,
407 xoffset, yoffset,
408 &xsum0, &xxsum0);
409
410 vp9_filter_block2d_bil_var_sse2(
411 src_ptr + 8, src_pixels_per_line,
412 dst_ptr + 8, dst_pixels_per_line, 8,
413 xoffset, yoffset,
414 &xsum1, &xxsum1);
415 xsum0 += xsum1;
416 xxsum0 += xxsum1;
417 }
418
419 *sse = xxsum0;
420 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 7));
421 }
422
423 unsigned int vp9_sub_pixel_variance8x16_wmt
424 (
425 const unsigned char *src_ptr,
426 int src_pixels_per_line,
427 int xoffset,
428 int yoffset,
429 const unsigned char *dst_ptr,
430 int dst_pixels_per_line,
431 unsigned int *sse
432 ) {
433 int xsum;
434 unsigned int xxsum;
435
436 if (xoffset == HALFNDX && yoffset == 0) {
437 vp9_half_horiz_variance8x_h_sse2(
438 src_ptr, src_pixels_per_line,
439 dst_ptr, dst_pixels_per_line, 16,
440 &xsum, &xxsum);
441 } else if (xoffset == 0 && yoffset == HALFNDX) {
442 vp9_half_vert_variance8x_h_sse2(
443 src_ptr, src_pixels_per_line,
444 dst_ptr, dst_pixels_per_line, 16,
445 &xsum, &xxsum);
446 } else if (xoffset == HALFNDX && yoffset == HALFNDX) {
447 vp9_half_horiz_vert_variance8x_h_sse2(
448 src_ptr, src_pixels_per_line,
449 dst_ptr, dst_pixels_per_line, 16,
450 &xsum, &xxsum);
451 } else {
452 vp9_filter_block2d_bil_var_sse2(
453 src_ptr, src_pixels_per_line,
454 dst_ptr, dst_pixels_per_line, 16,
455 xoffset, yoffset,
456 &xsum, &xxsum);
457 }
458
459 *sse = xxsum;
460 return (xxsum - (((unsigned int)xsum * xsum) >> 7));
461 }
462
463
464 unsigned int vp9_variance_halfpixvar16x16_h_wmt(
465 const unsigned char *src_ptr,
466 int src_pixels_per_line,
467 const unsigned char *dst_ptr,
468 int dst_pixels_per_line,
469 unsigned int *sse) {
470 int xsum0;
471 unsigned int xxsum0;
472
473 vp9_half_horiz_variance16x_h_sse2(
474 src_ptr, src_pixels_per_line,
475 dst_ptr, dst_pixels_per_line, 16,
476 &xsum0, &xxsum0);
477
478 *sse = xxsum0;
479 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
480 }
481
482
483 unsigned int vp9_variance_halfpixvar16x16_v_wmt(
484 const unsigned char *src_ptr,
485 int src_pixels_per_line,
486 const unsigned char *dst_ptr,
487 int dst_pixels_per_line,
488 unsigned int *sse) {
489 int xsum0;
490 unsigned int xxsum0;
491 vp9_half_vert_variance16x_h_sse2(
492 src_ptr, src_pixels_per_line,
493 dst_ptr, dst_pixels_per_line, 16,
494 &xsum0, &xxsum0);
495
496 *sse = xxsum0;
497 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
498 }
499
500
501 unsigned int vp9_variance_halfpixvar16x16_hv_wmt(
502 const unsigned char *src_ptr,
503 int src_pixels_per_line,
504 const unsigned char *dst_ptr,
505 int dst_pixels_per_line,
506 unsigned int *sse) {
507 int xsum0;
508 unsigned int xxsum0;
509
510 vp9_half_horiz_vert_variance16x_h_sse2(
511 src_ptr, src_pixels_per_line,
512 dst_ptr, dst_pixels_per_line, 16,
513 &xsum0, &xxsum0);
514
515 *sse = xxsum0;
516 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8));
517 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698