| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 307 return *sse; | 307 return *sse; |
| 308 } | 308 } |
| 309 | 309 |
| 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, | 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, |
| 311 const uint8_t *ref, int ref_stride, | 311 const uint8_t *ref, int ref_stride, |
| 312 unsigned int *sse) { | 312 unsigned int *sse) { |
| 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); | 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); |
| 314 return *sse; | 314 return *sse; |
| 315 } | 315 } |
| 316 | 316 |
| 317 // The 2 unused parameters are place holders for PIC enabled build. |
| 317 #define DECL(w, opt) \ | 318 #define DECL(w, opt) \ |
| 318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ | 319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ |
| 319 ptrdiff_t src_stride, \ | 320 ptrdiff_t src_stride, \ |
| 320 int x_offset, int y_offset, \ | 321 int x_offset, int y_offset, \ |
| 321 const uint8_t *dst, \ | 322 const uint8_t *dst, \ |
| 322 ptrdiff_t dst_stride, \ | 323 ptrdiff_t dst_stride, \ |
| 323 int height, unsigned int *sse) | 324 int height, unsigned int *sse, \ |
| 325 void *unused0, void *unused) |
| 324 #define DECLS(opt1, opt2) \ | 326 #define DECLS(opt1, opt2) \ |
| 325 DECL(4, opt2); \ | 327 DECL(4, opt2); \ |
| 326 DECL(8, opt1); \ | 328 DECL(8, opt1); \ |
| 327 DECL(16, opt1) | 329 DECL(16, opt1) |
| 328 | 330 |
| 329 DECLS(sse2, sse); | 331 DECLS(sse2, sse); |
| 330 DECLS(ssse3, ssse3); | 332 DECLS(ssse3, ssse3); |
| 331 #undef DECLS | 333 #undef DECLS |
| 332 #undef DECL | 334 #undef DECL |
| 333 | 335 |
| 334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
| 335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ | 337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ |
| 336 int src_stride, \ | 338 int src_stride, \ |
| 337 int x_offset, \ | 339 int x_offset, \ |
| 338 int y_offset, \ | 340 int y_offset, \ |
| 339 const uint8_t *dst, \ | 341 const uint8_t *dst, \ |
| 340 int dst_stride, \ | 342 int dst_stride, \ |
| 341 unsigned int *sse_ptr) { \ | 343 unsigned int *sse_ptr) { \ |
| 342 unsigned int sse; \ | 344 unsigned int sse; \ |
| 343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
| 344 y_offset, dst, dst_stride, \ | 346 y_offset, dst, dst_stride, \ |
| 345 h, &sse); \ | 347 h, &sse, NULL, NULL); \ |
| 346 if (w > wf) { \ | 348 if (w > wf) { \ |
| 347 unsigned int sse2; \ | 349 unsigned int sse2; \ |
| 348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ | 350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ |
| 349 x_offset, y_offset, \ | 351 x_offset, y_offset, \ |
| 350 dst + 16, dst_stride, \ | 352 dst + 16, dst_stride, \ |
| 351 h, &sse2); \ | 353 h, &sse2, NULL, NULL); \ |
| 352 se += se2; \ | 354 se += se2; \ |
| 353 sse += sse2; \ | 355 sse += sse2; \ |
| 354 if (w > wf * 2) { \ | 356 if (w > wf * 2) { \ |
| 355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ | 357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ |
| 356 x_offset, y_offset, \ | 358 x_offset, y_offset, \ |
| 357 dst + 32, dst_stride, \ | 359 dst + 32, dst_stride, \ |
| 358 h, &sse2); \ | 360 h, &sse2, NULL, NULL); \ |
| 359 se += se2; \ | 361 se += se2; \ |
| 360 sse += sse2; \ | 362 sse += sse2; \ |
| 361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ | 363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ |
| 362 x_offset, y_offset, \ | 364 x_offset, y_offset, \ |
| 363 dst + 48, dst_stride, \ | 365 dst + 48, dst_stride, \ |
| 364 h, &sse2); \ | 366 h, &sse2, NULL, NULL); \ |
| 365 se += se2; \ | 367 se += se2; \ |
| 366 sse += sse2; \ | 368 sse += sse2; \ |
| 367 } \ | 369 } \ |
| 368 } \ | 370 } \ |
| 369 *sse_ptr = sse; \ | 371 *sse_ptr = sse; \ |
| 370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
| 371 } | 373 } |
| 372 | 374 |
| 373 #define FNS(opt1, opt2) \ | 375 #define FNS(opt1, opt2) \ |
| 374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
| 375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
| 376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
| 377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
| 378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
| 379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
| 380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
| 381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
| 382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
| 383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
| 384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
| 385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
| 386 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 388 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
| 387 | 389 |
| 388 FNS(sse2, sse); | 390 FNS(sse2, sse); |
| 389 FNS(ssse3, ssse3); | 391 FNS(ssse3, ssse3); |
| 390 | 392 |
| 391 #undef FNS | 393 #undef FNS |
| 392 #undef FN | 394 #undef FN |
| 393 | 395 |
| 396 // The 2 unused parameters are place holders for PIC enabled build. |
| 394 #define DECL(w, opt) \ | 397 #define DECL(w, opt) \ |
| 395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ | 398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ |
| 396 ptrdiff_t src_stride, \ | 399 ptrdiff_t src_stride, \ |
| 397 int x_offset, int y_offset, \ | 400 int x_offset, int y_offset, \ |
| 398 const uint8_t *dst, \ | 401 const uint8_t *dst, \ |
| 399 ptrdiff_t dst_stride, \ | 402 ptrdiff_t dst_stride, \ |
| 400 const uint8_t *sec, \ | 403 const uint8_t *sec, \ |
| 401 ptrdiff_t sec_stride, \ | 404 ptrdiff_t sec_stride, \ |
| 402 int height, unsigned int *sse) | 405 int height, unsigned int *sse, \ |
| 406 void *unused0, void *unused) |
| 403 #define DECLS(opt1, opt2) \ | 407 #define DECLS(opt1, opt2) \ |
| 404 DECL(4, opt2); \ | 408 DECL(4, opt2); \ |
| 405 DECL(8, opt1); \ | 409 DECL(8, opt1); \ |
| 406 DECL(16, opt1) | 410 DECL(16, opt1) |
| 407 | 411 |
| 408 DECLS(sse2, sse); | 412 DECLS(sse2, sse); |
| 409 DECLS(ssse3, ssse3); | 413 DECLS(ssse3, ssse3); |
| 410 #undef DECL | 414 #undef DECL |
| 411 #undef DECLS | 415 #undef DECLS |
| 412 | 416 |
| 413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
| 414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ | 418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ |
| 415 int src_stride, \ | 419 int src_stride, \ |
| 416 int x_offset, \ | 420 int x_offset, \ |
| 417 int y_offset, \ | 421 int y_offset, \ |
| 418 const uint8_t *dst, \ | 422 const uint8_t *dst, \ |
| 419 int dst_stride, \ | 423 int dst_stride, \ |
| 420 unsigned int *sseptr, \ | 424 unsigned int *sseptr, \ |
| 421 const uint8_t *sec) { \ | 425 const uint8_t *sec) { \ |
| 422 unsigned int sse; \ | 426 unsigned int sse; \ |
| 423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
| 424 y_offset, dst, dst_stride, \ | 428 y_offset, dst, dst_stride, \ |
| 425 sec, w, h, &sse); \ | 429 sec, w, h, &sse, NULL, \ |
| 430 NULL); \ |
| 426 if (w > wf) { \ | 431 if (w > wf) { \ |
| 427 unsigned int sse2; \ | 432 unsigned int sse2; \ |
| 428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ | 433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ |
| 429 x_offset, y_offset, \ | 434 x_offset, y_offset, \ |
| 430 dst + 16, dst_stride, \ | 435 dst + 16, dst_stride, \ |
| 431 sec + 16, w, h, &sse2); \ | 436 sec + 16, w, h, &sse2, \ |
| 437 NULL, NULL); \ |
| 432 se += se2; \ | 438 se += se2; \ |
| 433 sse += sse2; \ | 439 sse += sse2; \ |
| 434 if (w > wf * 2) { \ | 440 if (w > wf * 2) { \ |
| 435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ | 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ |
| 436 x_offset, y_offset, \ | 442 x_offset, y_offset, \ |
| 437 dst + 32, dst_stride, \ | 443 dst + 32, dst_stride, \ |
| 438 sec + 32, w, h, &sse2); \ | 444 sec + 32, w, h, &sse2, \ |
| 445 NULL, NULL); \ |
| 439 se += se2; \ | 446 se += se2; \ |
| 440 sse += sse2; \ | 447 sse += sse2; \ |
| 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ | 448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ |
| 442 x_offset, y_offset, \ | 449 x_offset, y_offset, \ |
| 443 dst + 48, dst_stride, \ | 450 dst + 48, dst_stride, \ |
| 444 sec + 48, w, h, &sse2); \ | 451 sec + 48, w, h, &sse2, \ |
| 452 NULL, NULL); \ |
| 445 se += se2; \ | 453 se += se2; \ |
| 446 sse += sse2; \ | 454 sse += sse2; \ |
| 447 } \ | 455 } \ |
| 448 } \ | 456 } \ |
| 449 *sseptr = sse; \ | 457 *sseptr = sse; \ |
| 450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
| 451 } | 459 } |
| 452 | 460 |
| 453 #define FNS(opt1, opt2) \ | 461 #define FNS(opt1, opt2) \ |
| 454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
| 455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
| 456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
| 457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
| 458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
| 459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
| 460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
| 461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
| 462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
| 463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
| 464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
| 465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
| 466 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 474 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
| 467 | 475 |
| 468 FNS(sse2, sse); | 476 FNS(sse2, sse); |
| 469 FNS(ssse3, ssse3); | 477 FNS(ssse3, ssse3); |
| 470 | 478 |
| 471 #undef FNS | 479 #undef FNS |
| 472 #undef FN | 480 #undef FN |
| OLD | NEW |