OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
307 return *sse; | 307 return *sse; |
308 } | 308 } |
309 | 309 |
310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, | 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, |
311 const uint8_t *ref, int ref_stride, | 311 const uint8_t *ref, int ref_stride, |
312 unsigned int *sse) { | 312 unsigned int *sse) { |
313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); | 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); |
314 return *sse; | 314 return *sse; |
315 } | 315 } |
316 | 316 |
| 317 // The 2 unused parameters are place holders for PIC enabled build. |
317 #define DECL(w, opt) \ | 318 #define DECL(w, opt) \ |
318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ | 319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ |
319 ptrdiff_t src_stride, \ | 320 ptrdiff_t src_stride, \ |
320 int x_offset, int y_offset, \ | 321 int x_offset, int y_offset, \ |
321 const uint8_t *dst, \ | 322 const uint8_t *dst, \ |
322 ptrdiff_t dst_stride, \ | 323 ptrdiff_t dst_stride, \ |
323 int height, unsigned int *sse) | 324 int height, unsigned int *sse, \ |
| 325 void *unused0, void *unused) |
324 #define DECLS(opt1, opt2) \ | 326 #define DECLS(opt1, opt2) \ |
325 DECL(4, opt2); \ | 327 DECL(4, opt2); \ |
326 DECL(8, opt1); \ | 328 DECL(8, opt1); \ |
327 DECL(16, opt1) | 329 DECL(16, opt1) |
328 | 330 |
329 DECLS(sse2, sse); | 331 DECLS(sse2, sse); |
330 DECLS(ssse3, ssse3); | 332 DECLS(ssse3, ssse3); |
331 #undef DECLS | 333 #undef DECLS |
332 #undef DECL | 334 #undef DECL |
333 | 335 |
334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ | 337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ |
336 int src_stride, \ | 338 int src_stride, \ |
337 int x_offset, \ | 339 int x_offset, \ |
338 int y_offset, \ | 340 int y_offset, \ |
339 const uint8_t *dst, \ | 341 const uint8_t *dst, \ |
340 int dst_stride, \ | 342 int dst_stride, \ |
341 unsigned int *sse_ptr) { \ | 343 unsigned int *sse_ptr) { \ |
342 unsigned int sse; \ | 344 unsigned int sse; \ |
343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
344 y_offset, dst, dst_stride, \ | 346 y_offset, dst, dst_stride, \ |
345 h, &sse); \ | 347 h, &sse, NULL, NULL); \ |
346 if (w > wf) { \ | 348 if (w > wf) { \ |
347 unsigned int sse2; \ | 349 unsigned int sse2; \ |
348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ | 350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ |
349 x_offset, y_offset, \ | 351 x_offset, y_offset, \ |
350 dst + 16, dst_stride, \ | 352 dst + 16, dst_stride, \ |
351 h, &sse2); \ | 353 h, &sse2, NULL, NULL); \ |
352 se += se2; \ | 354 se += se2; \ |
353 sse += sse2; \ | 355 sse += sse2; \ |
354 if (w > wf * 2) { \ | 356 if (w > wf * 2) { \ |
355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ | 357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ |
356 x_offset, y_offset, \ | 358 x_offset, y_offset, \ |
357 dst + 32, dst_stride, \ | 359 dst + 32, dst_stride, \ |
358 h, &sse2); \ | 360 h, &sse2, NULL, NULL); \ |
359 se += se2; \ | 361 se += se2; \ |
360 sse += sse2; \ | 362 sse += sse2; \ |
361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ | 363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ |
362 x_offset, y_offset, \ | 364 x_offset, y_offset, \ |
363 dst + 48, dst_stride, \ | 365 dst + 48, dst_stride, \ |
364 h, &sse2); \ | 366 h, &sse2, NULL, NULL); \ |
365 se += se2; \ | 367 se += se2; \ |
366 sse += sse2; \ | 368 sse += sse2; \ |
367 } \ | 369 } \ |
368 } \ | 370 } \ |
369 *sse_ptr = sse; \ | 371 *sse_ptr = sse; \ |
370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
371 } | 373 } |
372 | 374 |
373 #define FNS(opt1, opt2) \ | 375 #define FNS(opt1, opt2) \ |
374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
386 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 388 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
387 | 389 |
388 FNS(sse2, sse); | 390 FNS(sse2, sse); |
389 FNS(ssse3, ssse3); | 391 FNS(ssse3, ssse3); |
390 | 392 |
391 #undef FNS | 393 #undef FNS |
392 #undef FN | 394 #undef FN |
393 | 395 |
| 396 // The 2 unused parameters are place holders for PIC enabled build. |
394 #define DECL(w, opt) \ | 397 #define DECL(w, opt) \ |
395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ | 398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ |
396 ptrdiff_t src_stride, \ | 399 ptrdiff_t src_stride, \ |
397 int x_offset, int y_offset, \ | 400 int x_offset, int y_offset, \ |
398 const uint8_t *dst, \ | 401 const uint8_t *dst, \ |
399 ptrdiff_t dst_stride, \ | 402 ptrdiff_t dst_stride, \ |
400 const uint8_t *sec, \ | 403 const uint8_t *sec, \ |
401 ptrdiff_t sec_stride, \ | 404 ptrdiff_t sec_stride, \ |
402 int height, unsigned int *sse) | 405 int height, unsigned int *sse, \ |
| 406 void *unused0, void *unused) |
403 #define DECLS(opt1, opt2) \ | 407 #define DECLS(opt1, opt2) \ |
404 DECL(4, opt2); \ | 408 DECL(4, opt2); \ |
405 DECL(8, opt1); \ | 409 DECL(8, opt1); \ |
406 DECL(16, opt1) | 410 DECL(16, opt1) |
407 | 411 |
408 DECLS(sse2, sse); | 412 DECLS(sse2, sse); |
409 DECLS(ssse3, ssse3); | 413 DECLS(ssse3, ssse3); |
410 #undef DECL | 414 #undef DECL |
411 #undef DECLS | 415 #undef DECLS |
412 | 416 |
413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ | 418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ |
415 int src_stride, \ | 419 int src_stride, \ |
416 int x_offset, \ | 420 int x_offset, \ |
417 int y_offset, \ | 421 int y_offset, \ |
418 const uint8_t *dst, \ | 422 const uint8_t *dst, \ |
419 int dst_stride, \ | 423 int dst_stride, \ |
420 unsigned int *sseptr, \ | 424 unsigned int *sseptr, \ |
421 const uint8_t *sec) { \ | 425 const uint8_t *sec) { \ |
422 unsigned int sse; \ | 426 unsigned int sse; \ |
423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
424 y_offset, dst, dst_stride, \ | 428 y_offset, dst, dst_stride, \ |
425 sec, w, h, &sse); \ | 429 sec, w, h, &sse, NULL, \ |
| 430 NULL); \ |
426 if (w > wf) { \ | 431 if (w > wf) { \ |
427 unsigned int sse2; \ | 432 unsigned int sse2; \ |
428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ | 433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ |
429 x_offset, y_offset, \ | 434 x_offset, y_offset, \ |
430 dst + 16, dst_stride, \ | 435 dst + 16, dst_stride, \ |
431 sec + 16, w, h, &sse2); \ | 436 sec + 16, w, h, &sse2, \ |
| 437 NULL, NULL); \ |
432 se += se2; \ | 438 se += se2; \ |
433 sse += sse2; \ | 439 sse += sse2; \ |
434 if (w > wf * 2) { \ | 440 if (w > wf * 2) { \ |
435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ | 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ |
436 x_offset, y_offset, \ | 442 x_offset, y_offset, \ |
437 dst + 32, dst_stride, \ | 443 dst + 32, dst_stride, \ |
438 sec + 32, w, h, &sse2); \ | 444 sec + 32, w, h, &sse2, \ |
| 445 NULL, NULL); \ |
439 se += se2; \ | 446 se += se2; \ |
440 sse += sse2; \ | 447 sse += sse2; \ |
441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ | 448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ |
442 x_offset, y_offset, \ | 449 x_offset, y_offset, \ |
443 dst + 48, dst_stride, \ | 450 dst + 48, dst_stride, \ |
444 sec + 48, w, h, &sse2); \ | 451 sec + 48, w, h, &sse2, \ |
| 452 NULL, NULL); \ |
445 se += se2; \ | 453 se += se2; \ |
446 sse += sse2; \ | 454 sse += sse2; \ |
447 } \ | 455 } \ |
448 } \ | 456 } \ |
449 *sseptr = sse; \ | 457 *sseptr = sse; \ |
450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
451 } | 459 } |
452 | 460 |
453 #define FNS(opt1, opt2) \ | 461 #define FNS(opt1, opt2) \ |
454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
466 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 474 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
467 | 475 |
468 FNS(sse2, sse); | 476 FNS(sse2, sse); |
469 FNS(ssse3, ssse3); | 477 FNS(ssse3, ssse3); |
470 | 478 |
471 #undef FNS | 479 #undef FNS |
472 #undef FN | 480 #undef FN |
OLD | NEW |