Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(177)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Issue 958693004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 return *sse; 307 return *sse;
308 } 308 }
309 309
310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,
311 const uint8_t *ref, int ref_stride, 311 const uint8_t *ref, int ref_stride,
312 unsigned int *sse) { 312 unsigned int *sse) {
313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
314 return *sse; 314 return *sse;
315 } 315 }
316 316
317 // The 2 unused parameters are place holders for PIC enabled build.
317 #define DECL(w, opt) \ 318 #define DECL(w, opt) \
318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
319 ptrdiff_t src_stride, \ 320 ptrdiff_t src_stride, \
320 int x_offset, int y_offset, \ 321 int x_offset, int y_offset, \
321 const uint8_t *dst, \ 322 const uint8_t *dst, \
322 ptrdiff_t dst_stride, \ 323 ptrdiff_t dst_stride, \
323 int height, unsigned int *sse) 324 int height, unsigned int *sse, \
325 void *unused0, void *unused)
324 #define DECLS(opt1, opt2) \ 326 #define DECLS(opt1, opt2) \
325 DECL(4, opt2); \ 327 DECL(4, opt2); \
326 DECL(8, opt1); \ 328 DECL(8, opt1); \
327 DECL(16, opt1) 329 DECL(16, opt1)
328 330
329 DECLS(sse2, sse); 331 DECLS(sse2, sse);
330 DECLS(ssse3, ssse3); 332 DECLS(ssse3, ssse3);
331 #undef DECLS 333 #undef DECLS
332 #undef DECL 334 #undef DECL
333 335
334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ 337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
336 int src_stride, \ 338 int src_stride, \
337 int x_offset, \ 339 int x_offset, \
338 int y_offset, \ 340 int y_offset, \
339 const uint8_t *dst, \ 341 const uint8_t *dst, \
340 int dst_stride, \ 342 int dst_stride, \
341 unsigned int *sse_ptr) { \ 343 unsigned int *sse_ptr) { \
342 unsigned int sse; \ 344 unsigned int sse; \
343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ 345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \
344 y_offset, dst, dst_stride, \ 346 y_offset, dst, dst_stride, \
345 h, &sse); \ 347 h, &sse, NULL, NULL); \
346 if (w > wf) { \ 348 if (w > wf) { \
347 unsigned int sse2; \ 349 unsigned int sse2; \
348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ 350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \
349 x_offset, y_offset, \ 351 x_offset, y_offset, \
350 dst + 16, dst_stride, \ 352 dst + 16, dst_stride, \
351 h, &sse2); \ 353 h, &sse2, NULL, NULL); \
352 se += se2; \ 354 se += se2; \
353 sse += sse2; \ 355 sse += sse2; \
354 if (w > wf * 2) { \ 356 if (w > wf * 2) { \
355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ 357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
356 x_offset, y_offset, \ 358 x_offset, y_offset, \
357 dst + 32, dst_stride, \ 359 dst + 32, dst_stride, \
358 h, &sse2); \ 360 h, &sse2, NULL, NULL); \
359 se += se2; \ 361 se += se2; \
360 sse += sse2; \ 362 sse += sse2; \
361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ 363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
362 x_offset, y_offset, \ 364 x_offset, y_offset, \
363 dst + 48, dst_stride, \ 365 dst + 48, dst_stride, \
364 h, &sse2); \ 366 h, &sse2, NULL, NULL); \
365 se += se2; \ 367 se += se2; \
366 sse += sse2; \ 368 sse += sse2; \
367 } \ 369 } \
368 } \ 370 } \
369 *sse_ptr = sse; \ 371 *sse_ptr = sse; \
370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
371 } 373 }
372 374
373 #define FNS(opt1, opt2) \ 375 #define FNS(opt1, opt2) \
374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
386 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 388 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
387 389
388 FNS(sse2, sse); 390 FNS(sse2, sse);
389 FNS(ssse3, ssse3); 391 FNS(ssse3, ssse3);
390 392
391 #undef FNS 393 #undef FNS
392 #undef FN 394 #undef FN
393 395
396 // The 2 unused parameters are place holders for PIC enabled build.
394 #define DECL(w, opt) \ 397 #define DECL(w, opt) \
395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ 398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \
396 ptrdiff_t src_stride, \ 399 ptrdiff_t src_stride, \
397 int x_offset, int y_offset, \ 400 int x_offset, int y_offset, \
398 const uint8_t *dst, \ 401 const uint8_t *dst, \
399 ptrdiff_t dst_stride, \ 402 ptrdiff_t dst_stride, \
400 const uint8_t *sec, \ 403 const uint8_t *sec, \
401 ptrdiff_t sec_stride, \ 404 ptrdiff_t sec_stride, \
402 int height, unsigned int *sse) 405 int height, unsigned int *sse, \
406 void *unused0, void *unused)
403 #define DECLS(opt1, opt2) \ 407 #define DECLS(opt1, opt2) \
404 DECL(4, opt2); \ 408 DECL(4, opt2); \
405 DECL(8, opt1); \ 409 DECL(8, opt1); \
406 DECL(16, opt1) 410 DECL(16, opt1)
407 411
408 DECLS(sse2, sse); 412 DECLS(sse2, sse);
409 DECLS(ssse3, ssse3); 413 DECLS(ssse3, ssse3);
410 #undef DECL 414 #undef DECL
411 #undef DECLS 415 #undef DECLS
412 416
413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ 418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
415 int src_stride, \ 419 int src_stride, \
416 int x_offset, \ 420 int x_offset, \
417 int y_offset, \ 421 int y_offset, \
418 const uint8_t *dst, \ 422 const uint8_t *dst, \
419 int dst_stride, \ 423 int dst_stride, \
420 unsigned int *sseptr, \ 424 unsigned int *sseptr, \
421 const uint8_t *sec) { \ 425 const uint8_t *sec) { \
422 unsigned int sse; \ 426 unsigned int sse; \
423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ 427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \
424 y_offset, dst, dst_stride, \ 428 y_offset, dst, dst_stride, \
425 sec, w, h, &sse); \ 429 sec, w, h, &sse, NULL, \
430 NULL); \
426 if (w > wf) { \ 431 if (w > wf) { \
427 unsigned int sse2; \ 432 unsigned int sse2; \
428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ 433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \
429 x_offset, y_offset, \ 434 x_offset, y_offset, \
430 dst + 16, dst_stride, \ 435 dst + 16, dst_stride, \
431 sec + 16, w, h, &sse2); \ 436 sec + 16, w, h, &sse2, \
437 NULL, NULL); \
432 se += se2; \ 438 se += se2; \
433 sse += sse2; \ 439 sse += sse2; \
434 if (w > wf * 2) { \ 440 if (w > wf * 2) { \
435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \
436 x_offset, y_offset, \ 442 x_offset, y_offset, \
437 dst + 32, dst_stride, \ 443 dst + 32, dst_stride, \
438 sec + 32, w, h, &sse2); \ 444 sec + 32, w, h, &sse2, \
445 NULL, NULL); \
439 se += se2; \ 446 se += se2; \
440 sse += sse2; \ 447 sse += sse2; \
441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ 448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \
442 x_offset, y_offset, \ 449 x_offset, y_offset, \
443 dst + 48, dst_stride, \ 450 dst + 48, dst_stride, \
444 sec + 48, w, h, &sse2); \ 451 sec + 48, w, h, &sse2, \
452 NULL, NULL); \
445 se += se2; \ 453 se += se2; \
446 sse += sse2; \ 454 sse += sse2; \
447 } \ 455 } \
448 } \ 456 } \
449 *sseptr = sse; \ 457 *sseptr = sse; \
450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
451 } 459 }
452 460
453 #define FNS(opt1, opt2) \ 461 #define FNS(opt1, opt2) \
454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
466 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 474 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
467 475
468 FNS(sse2, sse); 476 FNS(sse2, sse);
469 FNS(ssse3, ssse3); 477 FNS(ssse3, ssse3);
470 478
471 #undef FNS 479 #undef FNS
472 #undef FN 480 #undef FN
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698