OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "vpx_config.h" | 11 #include "./vpx_config.h" |
12 | 12 |
13 #include "vp9/encoder/vp9_variance.h" | 13 #include "vp9/encoder/vp9_variance.h" |
14 #include "vp9/common/vp9_pragmas.h" | 14 #include "vp9/common/vp9_pragmas.h" |
15 #include "vpx_ports/mem.h" | 15 #include "vpx_ports/mem.h" |
16 | 16 |
17 extern unsigned int vp9_get4x4var_mmx | 17 extern unsigned int vp9_get4x4var_mmx |
18 ( | 18 ( |
19 const unsigned char *src_ptr, | 19 const unsigned char *src_ptr, |
20 int source_stride, | 20 int source_stride, |
21 const unsigned char *ref_ptr, | 21 const unsigned char *ref_ptr, |
22 int recon_stride, | 22 int recon_stride, |
23 unsigned int *SSE, | 23 unsigned int *SSE, |
24 int *Sum | 24 int *Sum |
25 ); | 25 ); |
26 | 26 |
27 unsigned int vp9_get_mb_ss_sse2 | 27 unsigned int vp9_get_mb_ss_sse2 |
28 ( | 28 ( |
29 const short *src_ptr | 29 const int16_t *src_ptr |
30 ); | 30 ); |
31 unsigned int vp9_get16x16var_sse2 | 31 unsigned int vp9_get16x16var_sse2 |
32 ( | 32 ( |
33 const unsigned char *src_ptr, | 33 const unsigned char *src_ptr, |
34 int source_stride, | 34 int source_stride, |
35 const unsigned char *ref_ptr, | 35 const unsigned char *ref_ptr, |
36 int recon_stride, | 36 int recon_stride, |
37 unsigned int *SSE, | 37 unsigned int *SSE, |
38 int *Sum | 38 int *Sum |
39 ); | 39 ); |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
243 *sse = var; | 243 *sse = var; |
244 return (var - (((unsigned int)avg * avg) >> 8)); | 244 return (var - (((unsigned int)avg * avg) >> 8)); |
245 } | 245 } |
246 | 246 |
247 unsigned int vp9_mse16x16_sse2( | 247 unsigned int vp9_mse16x16_sse2( |
248 const unsigned char *src_ptr, | 248 const unsigned char *src_ptr, |
249 int source_stride, | 249 int source_stride, |
250 const unsigned char *ref_ptr, | 250 const unsigned char *ref_ptr, |
251 int recon_stride, | 251 int recon_stride, |
252 unsigned int *sse) { | 252 unsigned int *sse) { |
253 | |
254 unsigned int sse0; | 253 unsigned int sse0; |
255 int sum0; | 254 int sum0; |
256 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, | 255 vp9_get16x16var_sse2(src_ptr, source_stride, ref_ptr, recon_stride, &sse0, |
257 &sum0); | 256 &sum0); |
258 *sse = sse0; | 257 *sse = sse0; |
259 return sse0; | 258 return sse0; |
260 } | 259 } |
261 | 260 |
262 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, | 261 unsigned int vp9_variance32x32_sse2(const uint8_t *src_ptr, |
263 int source_stride, | 262 int source_stride, |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
400 } | 399 } |
401 | 400 |
402 #define FNS(opt1, opt2) \ | 401 #define FNS(opt1, opt2) \ |
403 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 402 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
404 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 403 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
405 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 404 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
406 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 405 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
407 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 406 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
408 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 407 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
409 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 408 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
410 FN(16, 8, 16, 4, 3, opt1,); \ | 409 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
411 FN(8, 16, 8, 3, 4, opt1,); \ | 410 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
412 FN(8, 8, 8, 3, 3, opt1,); \ | 411 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
413 FN(8, 4, 8, 3, 2, opt1,); \ | 412 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
414 FN(4, 8, 4, 2, 3, opt2,); \ | 413 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
415 FN(4, 4, 4, 2, 2, opt2,) | 414 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
416 | 415 |
417 FNS(sse2, sse); | 416 FNS(sse2, sse); |
418 FNS(ssse3, ssse3); | 417 FNS(ssse3, ssse3); |
419 | 418 |
420 #undef FNS | 419 #undef FNS |
421 #undef FN | 420 #undef FN |
422 | 421 |
423 #define DECL(w, opt) \ | 422 #define DECL(w, opt) \ |
424 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ | 423 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ |
425 ptrdiff_t src_stride, \ | 424 ptrdiff_t src_stride, \ |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
480 } | 479 } |
481 | 480 |
482 #define FNS(opt1, opt2) \ | 481 #define FNS(opt1, opt2) \ |
483 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 482 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
484 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 483 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
485 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 484 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
486 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 485 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
487 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 486 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
488 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 487 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
489 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 488 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
490 FN(16, 8, 16, 4, 3, opt1,); \ | 489 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
491 FN(8, 16, 8, 3, 4, opt1,); \ | 490 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
492 FN(8, 8, 8, 3, 3, opt1,); \ | 491 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
493 FN(8, 4, 8, 3, 2, opt1,); \ | 492 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
494 FN(4, 8, 4, 2, 3, opt2,); \ | 493 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
495 FN(4, 4, 4, 2, 2, opt2,) | 494 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
496 | 495 |
497 FNS(sse2, sse); | 496 FNS(sse2, sse); |
498 FNS(ssse3, ssse3); | 497 FNS(ssse3, ssse3); |
499 | 498 |
500 #undef FNS | 499 #undef FNS |
501 #undef FN | 500 #undef FN |
502 | 501 |
503 unsigned int vp9_variance_halfpixvar16x16_h_sse2( | 502 unsigned int vp9_variance_halfpixvar16x16_h_sse2( |
504 const unsigned char *src_ptr, | 503 const unsigned char *src_ptr, |
505 int src_pixels_per_line, | 504 int src_pixels_per_line, |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
547 unsigned int xxsum0; | 546 unsigned int xxsum0; |
548 | 547 |
549 vp9_half_horiz_vert_variance16x_h_sse2( | 548 vp9_half_horiz_vert_variance16x_h_sse2( |
550 src_ptr, src_pixels_per_line, | 549 src_ptr, src_pixels_per_line, |
551 dst_ptr, dst_pixels_per_line, 16, | 550 dst_ptr, dst_pixels_per_line, 16, |
552 &xsum0, &xxsum0); | 551 &xsum0, &xxsum0); |
553 | 552 |
554 *sse = xxsum0; | 553 *sse = xxsum0; |
555 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); | 554 return (xxsum0 - (((unsigned int)xsum0 * xsum0) >> 8)); |
556 } | 555 } |
OLD | NEW |