Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(405)

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Issue 897063002: Revert "libvpx: Pull from upstream" (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_speed_features.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 return *sse; 307 return *sse;
308 } 308 }
309 309
310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride, 310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,
311 const uint8_t *ref, int ref_stride, 311 const uint8_t *ref, int ref_stride,
312 unsigned int *sse) { 312 unsigned int *sse) {
313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse); 313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);
314 return *sse; 314 return *sse;
315 } 315 }
316 316
317 // The 2 unused parameters are place holders for PIC enabled build.
318 #define DECL(w, opt) \ 317 #define DECL(w, opt) \
319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \ 318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \
320 ptrdiff_t src_stride, \ 319 ptrdiff_t src_stride, \
321 int x_offset, int y_offset, \ 320 int x_offset, int y_offset, \
322 const uint8_t *dst, \ 321 const uint8_t *dst, \
323 ptrdiff_t dst_stride, \ 322 ptrdiff_t dst_stride, \
324 int height, unsigned int *sse, \ 323 int height, unsigned int *sse)
325 void *unused0, void *unused)
326 #define DECLS(opt1, opt2) \ 324 #define DECLS(opt1, opt2) \
327 DECL(4, opt2); \ 325 DECL(4, opt2); \
328 DECL(8, opt1); \ 326 DECL(8, opt1); \
329 DECL(16, opt1) 327 DECL(16, opt1)
330 328
331 DECLS(sse2, sse); 329 DECLS(sse2, sse);
332 DECLS(ssse3, ssse3); 330 DECLS(ssse3, ssse3);
333 #undef DECLS 331 #undef DECLS
334 #undef DECL 332 #undef DECL
335 333
336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \ 335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \
338 int src_stride, \ 336 int src_stride, \
339 int x_offset, \ 337 int x_offset, \
340 int y_offset, \ 338 int y_offset, \
341 const uint8_t *dst, \ 339 const uint8_t *dst, \
342 int dst_stride, \ 340 int dst_stride, \
343 unsigned int *sse_ptr) { \ 341 unsigned int *sse_ptr) { \
344 unsigned int sse; \ 342 unsigned int sse; \
345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \ 343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \
346 y_offset, dst, dst_stride, \ 344 y_offset, dst, dst_stride, \
347 h, &sse, NULL, NULL); \ 345 h, &sse); \
348 if (w > wf) { \ 346 if (w > wf) { \
349 unsigned int sse2; \ 347 unsigned int sse2; \
350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \ 348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \
351 x_offset, y_offset, \ 349 x_offset, y_offset, \
352 dst + 16, dst_stride, \ 350 dst + 16, dst_stride, \
353 h, &sse2, NULL, NULL); \ 351 h, &sse2); \
354 se += se2; \ 352 se += se2; \
355 sse += sse2; \ 353 sse += sse2; \
356 if (w > wf * 2) { \ 354 if (w > wf * 2) { \
357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ 355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
358 x_offset, y_offset, \ 356 x_offset, y_offset, \
359 dst + 32, dst_stride, \ 357 dst + 32, dst_stride, \
360 h, &sse2, NULL, NULL); \ 358 h, &sse2); \
361 se += se2; \ 359 se += se2; \
362 sse += sse2; \ 360 sse += sse2; \
363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ 361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
364 x_offset, y_offset, \ 362 x_offset, y_offset, \
365 dst + 48, dst_stride, \ 363 dst + 48, dst_stride, \
366 h, &sse2, NULL, NULL); \ 364 h, &sse2); \
367 se += se2; \ 365 se += se2; \
368 sse += sse2; \ 366 sse += sse2; \
369 } \ 367 } \
370 } \ 368 } \
371 *sse_ptr = sse; \ 369 *sse_ptr = sse; \
372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
373 } 371 }
374 372
375 #define FNS(opt1, opt2) \ 373 #define FNS(opt1, opt2) \
376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
388 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 386 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
389 387
390 FNS(sse2, sse); 388 FNS(sse2, sse);
391 FNS(ssse3, ssse3); 389 FNS(ssse3, ssse3);
392 390
393 #undef FNS 391 #undef FNS
394 #undef FN 392 #undef FN
395 393
396 // The 2 unused parameters are place holders for PIC enabled build.
397 #define DECL(w, opt) \ 394 #define DECL(w, opt) \
398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \ 395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \
399 ptrdiff_t src_stride, \ 396 ptrdiff_t src_stride, \
400 int x_offset, int y_offset, \ 397 int x_offset, int y_offset, \
401 const uint8_t *dst, \ 398 const uint8_t *dst, \
402 ptrdiff_t dst_stride, \ 399 ptrdiff_t dst_stride, \
403 const uint8_t *sec, \ 400 const uint8_t *sec, \
404 ptrdiff_t sec_stride, \ 401 ptrdiff_t sec_stride, \
405 int height, unsigned int *sse, \ 402 int height, unsigned int *sse)
406 void *unused0, void *unused)
407 #define DECLS(opt1, opt2) \ 403 #define DECLS(opt1, opt2) \
408 DECL(4, opt2); \ 404 DECL(4, opt2); \
409 DECL(8, opt1); \ 405 DECL(8, opt1); \
410 DECL(16, opt1) 406 DECL(16, opt1)
411 407
412 DECLS(sse2, sse); 408 DECLS(sse2, sse);
413 DECLS(ssse3, ssse3); 409 DECLS(ssse3, ssse3);
414 #undef DECL 410 #undef DECL
415 #undef DECLS 411 #undef DECLS
416 412
417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ 413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \
418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \ 414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \
419 int src_stride, \ 415 int src_stride, \
420 int x_offset, \ 416 int x_offset, \
421 int y_offset, \ 417 int y_offset, \
422 const uint8_t *dst, \ 418 const uint8_t *dst, \
423 int dst_stride, \ 419 int dst_stride, \
424 unsigned int *sseptr, \ 420 unsigned int *sseptr, \
425 const uint8_t *sec) { \ 421 const uint8_t *sec) { \
426 unsigned int sse; \ 422 unsigned int sse; \
427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \ 423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \
428 y_offset, dst, dst_stride, \ 424 y_offset, dst, dst_stride, \
429 sec, w, h, &sse, NULL, \ 425 sec, w, h, &sse); \
430 NULL); \
431 if (w > wf) { \ 426 if (w > wf) { \
432 unsigned int sse2; \ 427 unsigned int sse2; \
433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \ 428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \
434 x_offset, y_offset, \ 429 x_offset, y_offset, \
435 dst + 16, dst_stride, \ 430 dst + 16, dst_stride, \
436 sec + 16, w, h, &sse2, \ 431 sec + 16, w, h, &sse2); \
437 NULL, NULL); \
438 se += se2; \ 432 se += se2; \
439 sse += sse2; \ 433 sse += sse2; \
440 if (w > wf * 2) { \ 434 if (w > wf * 2) { \
441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \ 435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \
442 x_offset, y_offset, \ 436 x_offset, y_offset, \
443 dst + 32, dst_stride, \ 437 dst + 32, dst_stride, \
444 sec + 32, w, h, &sse2, \ 438 sec + 32, w, h, &sse2); \
445 NULL, NULL); \
446 se += se2; \ 439 se += se2; \
447 sse += sse2; \ 440 sse += sse2; \
448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \ 441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \
449 x_offset, y_offset, \ 442 x_offset, y_offset, \
450 dst + 48, dst_stride, \ 443 dst + 48, dst_stride, \
451 sec + 48, w, h, &sse2, \ 444 sec + 48, w, h, &sse2); \
452 NULL, NULL); \
453 se += se2; \ 445 se += se2; \
454 sse += sse2; \ 446 sse += sse2; \
455 } \ 447 } \
456 } \ 448 } \
457 *sseptr = sse; \ 449 *sseptr = sse; \
458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \ 450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \
459 } 451 }
460 452
461 #define FNS(opt1, opt2) \ 453 #define FNS(opt1, opt2) \
462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ 454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \
463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ 455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \
464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ 456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \
465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ 457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \
466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ 458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \
467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ 459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \
468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \ 460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \
469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \ 461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \
470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \ 462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \
471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \ 463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \
472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \ 464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \
473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \ 465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \
474 FN(4, 4, 4, 2, 2, opt2, (unsigned int)) 466 FN(4, 4, 4, 2, 2, opt2, (unsigned int))
475 467
476 FNS(sse2, sse); 468 FNS(sse2, sse);
477 FNS(ssse3, ssse3); 469 FNS(ssse3, ssse3);
478 470
479 #undef FNS 471 #undef FNS
480 #undef FN 472 #undef FN
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_speed_features.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698