OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
307 return *sse; | 307 return *sse; |
308 } | 308 } |
309 | 309 |
310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, | 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, |
311 const uint8_t *ref, int ref_stride, | 311 const uint8_t *ref, int ref_stride, |
312 unsigned int *sse) { | 312 unsigned int *sse) { |
313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); | 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); |
314 return *sse; | 314 return *sse; |
315 } | 315 } |
316 | 316 |
317 // The 2 unused parameters are place holders for PIC enabled build. | |
318 #define DECL(w, opt) \ | 317 #define DECL(w, opt) \ |
319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ | 318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ |
320 ptrdiff_t src_stride, \ | 319 ptrdiff_t src_stride, \ |
321 int x_offset, int y_offset, \ | 320 int x_offset, int y_offset, \ |
322 const uint8_t *dst, \ | 321 const uint8_t *dst, \ |
323 ptrdiff_t dst_stride, \ | 322 ptrdiff_t dst_stride, \ |
324 int height, unsigned int *sse, \ | 323 int height, unsigned int *sse) |
325 void *unused0, void *unused) | |
326 #define DECLS(opt1, opt2) \ | 324 #define DECLS(opt1, opt2) \ |
327 DECL(4, opt2); \ | 325 DECL(4, opt2); \ |
328 DECL(8, opt1); \ | 326 DECL(8, opt1); \ |
329 DECL(16, opt1) | 327 DECL(16, opt1) |
330 | 328 |
331 DECLS(sse2, sse); | 329 DECLS(sse2, sse); |
332 DECLS(ssse3, ssse3); | 330 DECLS(ssse3, ssse3); |
333 #undef DECLS | 331 #undef DECLS |
334 #undef DECL | 332 #undef DECL |
335 | 333 |
336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ | 335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ |
338 int src_stride, \ | 336 int src_stride, \ |
339 int x_offset, \ | 337 int x_offset, \ |
340 int y_offset, \ | 338 int y_offset, \ |
341 const uint8_t *dst, \ | 339 const uint8_t *dst, \ |
342 int dst_stride, \ | 340 int dst_stride, \ |
343 unsigned int *sse_ptr) { \ | 341 unsigned int *sse_ptr) { \ |
344 unsigned int sse; \ | 342 unsigned int sse; \ |
345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
346 y_offset, dst, dst_stride, \ | 344 y_offset, dst, dst_stride, \ |
347 h, &sse, NULL, NULL); \ | 345 h, &sse); \ |
348 if (w > wf) { \ | 346 if (w > wf) { \ |
349 unsigned int sse2; \ | 347 unsigned int sse2; \ |
350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ | 348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ |
351 x_offset, y_offset, \ | 349 x_offset, y_offset, \ |
352 dst + 16, dst_stride, \ | 350 dst + 16, dst_stride, \ |
353 h, &sse2, NULL, NULL); \ | 351 h, &sse2); \ |
354 se += se2; \ | 352 se += se2; \ |
355 sse += sse2; \ | 353 sse += sse2; \ |
356 if (w > wf * 2) { \ | 354 if (w > wf * 2) { \ |
357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ | 355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ |
358 x_offset, y_offset, \ | 356 x_offset, y_offset, \ |
359 dst + 32, dst_stride, \ | 357 dst + 32, dst_stride, \ |
360 h, &sse2, NULL, NULL); \ | 358 h, &sse2); \ |
361 se += se2; \ | 359 se += se2; \ |
362 sse += sse2; \ | 360 sse += sse2; \ |
363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ | 361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ |
364 x_offset, y_offset, \ | 362 x_offset, y_offset, \ |
365 dst + 48, dst_stride, \ | 363 dst + 48, dst_stride, \ |
366 h, &sse2, NULL, NULL); \ | 364 h, &sse2); \ |
367 se += se2; \ | 365 se += se2; \ |
368 sse += sse2; \ | 366 sse += sse2; \ |
369 } \ | 367 } \ |
370 } \ | 368 } \ |
371 *sse_ptr = sse; \ | 369 *sse_ptr = sse; \ |
372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
373 } | 371 } |
374 | 372 |
375 #define FNS(opt1, opt2) \ | 373 #define FNS(opt1, opt2) \ |
376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
388 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 386 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
389 | 387 |
390 FNS(sse2, sse); | 388 FNS(sse2, sse); |
391 FNS(ssse3, ssse3); | 389 FNS(ssse3, ssse3); |
392 | 390 |
393 #undef FNS | 391 #undef FNS |
394 #undef FN | 392 #undef FN |
395 | 393 |
396 // The 2 unused parameters are place holders for PIC enabled build. | |
397 #define DECL(w, opt) \ | 394 #define DECL(w, opt) \ |
398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ | 395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ |
399 ptrdiff_t src_stride, \ | 396 ptrdiff_t src_stride, \ |
400 int x_offset, int y_offset, \ | 397 int x_offset, int y_offset, \ |
401 const uint8_t *dst, \ | 398 const uint8_t *dst, \ |
402 ptrdiff_t dst_stride, \ | 399 ptrdiff_t dst_stride, \ |
403 const uint8_t *sec, \ | 400 const uint8_t *sec, \ |
404 ptrdiff_t sec_stride, \ | 401 ptrdiff_t sec_stride, \ |
405 int height, unsigned int *sse, \ | 402 int height, unsigned int *sse) |
406 void *unused0, void *unused) | |
407 #define DECLS(opt1, opt2) \ | 403 #define DECLS(opt1, opt2) \ |
408 DECL(4, opt2); \ | 404 DECL(4, opt2); \ |
409 DECL(8, opt1); \ | 405 DECL(8, opt1); \ |
410 DECL(16, opt1) | 406 DECL(16, opt1) |
411 | 407 |
412 DECLS(sse2, sse); | 408 DECLS(sse2, sse); |
413 DECLS(ssse3, ssse3); | 409 DECLS(ssse3, ssse3); |
414 #undef DECL | 410 #undef DECL |
415 #undef DECLS | 411 #undef DECLS |
416 | 412 |
417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ | 413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ |
418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ | 414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ |
419 int src_stride, \ | 415 int src_stride, \ |
420 int x_offset, \ | 416 int x_offset, \ |
421 int y_offset, \ | 417 int y_offset, \ |
422 const uint8_t *dst, \ | 418 const uint8_t *dst, \ |
423 int dst_stride, \ | 419 int dst_stride, \ |
424 unsigned int *sseptr, \ | 420 unsigned int *sseptr, \ |
425 const uint8_t *sec) { \ | 421 const uint8_t *sec) { \ |
426 unsigned int sse; \ | 422 unsigned int sse; \ |
427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ | 423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ |
428 y_offset, dst, dst_stride, \ | 424 y_offset, dst, dst_stride, \ |
429 sec, w, h, &sse, NULL, \ | 425 sec, w, h, &sse); \ |
430 NULL); \ | |
431 if (w > wf) { \ | 426 if (w > wf) { \ |
432 unsigned int sse2; \ | 427 unsigned int sse2; \ |
433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ | 428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ |
434 x_offset, y_offset, \ | 429 x_offset, y_offset, \ |
435 dst + 16, dst_stride, \ | 430 dst + 16, dst_stride, \ |
436 sec + 16, w, h, &sse2, \ | 431 sec + 16, w, h, &sse2); \ |
437 NULL, NULL); \ | |
438 se += se2; \ | 432 se += se2; \ |
439 sse += sse2; \ | 433 sse += sse2; \ |
440 if (w > wf * 2) { \ | 434 if (w > wf * 2) { \ |
441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ | 435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ |
442 x_offset, y_offset, \ | 436 x_offset, y_offset, \ |
443 dst + 32, dst_stride, \ | 437 dst + 32, dst_stride, \ |
444 sec + 32, w, h, &sse2, \ | 438 sec + 32, w, h, &sse2); \ |
445 NULL, NULL); \ | |
446 se += se2; \ | 439 se += se2; \ |
447 sse += sse2; \ | 440 sse += sse2; \ |
448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ | 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ |
449 x_offset, y_offset, \ | 442 x_offset, y_offset, \ |
450 dst + 48, dst_stride, \ | 443 dst + 48, dst_stride, \ |
451 sec + 48, w, h, &sse2, \ | 444 sec + 48, w, h, &sse2); \ |
452 NULL, NULL); \ | |
453 se += se2; \ | 445 se += se2; \ |
454 sse += sse2; \ | 446 sse += sse2; \ |
455 } \ | 447 } \ |
456 } \ | 448 } \ |
457 *sseptr = sse; \ | 449 *sseptr = sse; \ |
458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ | 450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ |
459 } | 451 } |
460 | 452 |
461 #define FNS(opt1, opt2) \ | 453 #define FNS(opt1, opt2) \ |
462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ | 454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ |
463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ | 455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ |
464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ | 456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ |
465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ | 457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ |
466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ | 458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ |
467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ | 459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ |
468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ | 460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ |
469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ | 461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ |
470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ | 462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ |
471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ | 463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ |
472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ | 464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ |
473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ | 465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ |
474 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) | 466 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) |
475 | 467 |
476 FNS(sse2, sse); | 468 FNS(sse2, sse); |
477 FNS(ssse3, ssse3); | 469 FNS(ssse3, ssse3); |
478 | 470 |
479 #undef FNS | 471 #undef FNS |
480 #undef FN | 472 #undef FN |
OLD | NEW |