source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c - Issue 897063002: Revert "libvpx: Pull from upstream"

Side by Side Diff: source/libvpx/vp9/encoder/x86/vp9_variance_sse2.c

Issue 897063002: Revert "libvpx: Pull from upstream" (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 296 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
307 return *sse;	307 return *sse;

308 }	308 }

309	309

310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,	310 unsigned int vp9_mse16x16_sse2(const uint8_t *src, int src_stride,

311 const uint8_t *ref, int ref_stride,	311 const uint8_t *ref, int ref_stride,

312 unsigned int *sse) {	312 unsigned int *sse) {

313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);	313 vp9_variance16x16_sse2(src, src_stride, ref, ref_stride, sse);

314 return *sse;	314 return *sse;

315 }	315 }

316	316

317 // The 2 unused parameters are place holders for PIC enabled build.

318 #define DECL(w, opt) \	317 #define DECL(w, opt) \

319 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \	318 int vp9_sub_pixel_variance##w##xh_##opt(const uint8_t *src, \

320 ptrdiff_t src_stride, \	319 ptrdiff_t src_stride, \

321 int x_offset, int y_offset, \	320 int x_offset, int y_offset, \

322 const uint8_t *dst, \	321 const uint8_t *dst, \

323 ptrdiff_t dst_stride, \	322 ptrdiff_t dst_stride, \

324 int height, unsigned int *sse, \	323 int height, unsigned int *sse)

325 void unused0, void unused)

326 #define DECLS(opt1, opt2) \	324 #define DECLS(opt1, opt2) \

327 DECL(4, opt2); \	325 DECL(4, opt2); \

328 DECL(8, opt1); \	326 DECL(8, opt1); \

329 DECL(16, opt1)	327 DECL(16, opt1)

330	328

331 DECLS(sse2, sse);	329 DECLS(sse2, sse);

332 DECLS(ssse3, ssse3);	330 DECLS(ssse3, ssse3);

333 #undef DECLS	331 #undef DECLS

334 #undef DECL	332 #undef DECL

335	333

336 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \	334 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \

337 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \	335 unsigned int vp9_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src, \

338 int src_stride, \	336 int src_stride, \

339 int x_offset, \	337 int x_offset, \

340 int y_offset, \	338 int y_offset, \

341 const uint8_t *dst, \	339 const uint8_t *dst, \

342 int dst_stride, \	340 int dst_stride, \

343 unsigned int *sse_ptr) { \	341 unsigned int *sse_ptr) { \

344 unsigned int sse; \	342 unsigned int sse; \

345 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \	343 int se = vp9_sub_pixel_variance##wf##xh_##opt(src, src_stride, x_offset, \

346 y_offset, dst, dst_stride, \	344 y_offset, dst, dst_stride, \

347 h, &sse, NULL, NULL); \	345 h, &sse); \

348 if (w > wf) { \	346 if (w > wf) { \

349 unsigned int sse2; \	347 unsigned int sse2; \

350 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \	348 int se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 16, src_stride, \

351 x_offset, y_offset, \	349 x_offset, y_offset, \

352 dst + 16, dst_stride, \	350 dst + 16, dst_stride, \

353 h, &sse2, NULL, NULL); \	351 h, &sse2); \

354 se += se2; \	352 se += se2; \

355 sse += sse2; \	353 sse += sse2; \

356 if (w > wf * 2) { \	354 if (w > wf * 2) { \

357 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \	355 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \

358 x_offset, y_offset, \	356 x_offset, y_offset, \

359 dst + 32, dst_stride, \	357 dst + 32, dst_stride, \

360 h, &sse2, NULL, NULL); \	358 h, &sse2); \

361 se += se2; \	359 se += se2; \

362 sse += sse2; \	360 sse += sse2; \

363 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \	361 se2 = vp9_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \

364 x_offset, y_offset, \	362 x_offset, y_offset, \

365 dst + 48, dst_stride, \	363 dst + 48, dst_stride, \

366 h, &sse2, NULL, NULL); \	364 h, &sse2); \

367 se += se2; \	365 se += se2; \

368 sse += sse2; \	366 sse += sse2; \

369 } \	367 } \

370 } \	368 } \

371 *sse_ptr = sse; \	369 *sse_ptr = sse; \

372 return sse - ((cast se * se) >> (wlog2 + hlog2)); \	370 return sse - ((cast se * se) >> (wlog2 + hlog2)); \

373 }	371 }

374	372

375 #define FNS(opt1, opt2) \	373 #define FNS(opt1, opt2) \

376 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \	374 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \

377 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \	375 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \

378 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \	376 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \

379 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \	377 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \

380 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \	378 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \

381 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \	379 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \

382 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \	380 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \

383 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \	381 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \

384 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \	382 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \

385 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \	383 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \

386 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \	384 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \

387 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \	385 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \

388 FN(4, 4, 4, 2, 2, opt2, (unsigned int))	386 FN(4, 4, 4, 2, 2, opt2, (unsigned int))

389	387

390 FNS(sse2, sse);	388 FNS(sse2, sse);

391 FNS(ssse3, ssse3);	389 FNS(ssse3, ssse3);

392	390

393 #undef FNS	391 #undef FNS

394 #undef FN	392 #undef FN

395	393

396 // The 2 unused parameters are place holders for PIC enabled build.

397 #define DECL(w, opt) \	394 #define DECL(w, opt) \

398 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \	395 int vp9_sub_pixel_avg_variance##w##xh_##opt(const uint8_t *src, \

399 ptrdiff_t src_stride, \	396 ptrdiff_t src_stride, \

400 int x_offset, int y_offset, \	397 int x_offset, int y_offset, \

401 const uint8_t *dst, \	398 const uint8_t *dst, \

402 ptrdiff_t dst_stride, \	399 ptrdiff_t dst_stride, \

403 const uint8_t *sec, \	400 const uint8_t *sec, \

404 ptrdiff_t sec_stride, \	401 ptrdiff_t sec_stride, \

405 int height, unsigned int *sse, \	402 int height, unsigned int *sse)

406 void unused0, void unused)

407 #define DECLS(opt1, opt2) \	403 #define DECLS(opt1, opt2) \

408 DECL(4, opt2); \	404 DECL(4, opt2); \

409 DECL(8, opt1); \	405 DECL(8, opt1); \

410 DECL(16, opt1)	406 DECL(16, opt1)

411	407

412 DECLS(sse2, sse);	408 DECLS(sse2, sse);

413 DECLS(ssse3, ssse3);	409 DECLS(ssse3, ssse3);

414 #undef DECL	410 #undef DECL

415 #undef DECLS	411 #undef DECLS

416	412

417 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \	413 #define FN(w, h, wf, wlog2, hlog2, opt, cast) \

418 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \	414 unsigned int vp9_sub_pixel_avg_variance##w##x##h##_##opt(const uint8_t *src, \

419 int src_stride, \	415 int src_stride, \

420 int x_offset, \	416 int x_offset, \

421 int y_offset, \	417 int y_offset, \

422 const uint8_t *dst, \	418 const uint8_t *dst, \

423 int dst_stride, \	419 int dst_stride, \

424 unsigned int *sseptr, \	420 unsigned int *sseptr, \

425 const uint8_t *sec) { \	421 const uint8_t *sec) { \

426 unsigned int sse; \	422 unsigned int sse; \

427 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \	423 int se = vp9_sub_pixel_avg_variance##wf##xh_##opt(src, src_stride, x_offset, \

428 y_offset, dst, dst_stride, \	424 y_offset, dst, dst_stride, \

429 sec, w, h, &sse, NULL, \	425 sec, w, h, &sse); \

430 NULL); \

431 if (w > wf) { \	426 if (w > wf) { \

432 unsigned int sse2; \	427 unsigned int sse2; \

433 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \	428 int se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 16, src_stride, \

434 x_offset, y_offset, \	429 x_offset, y_offset, \

435 dst + 16, dst_stride, \	430 dst + 16, dst_stride, \

436 sec + 16, w, h, &sse2, \	431 sec + 16, w, h, &sse2); \

437 NULL, NULL); \

438 se += se2; \	432 se += se2; \

439 sse += sse2; \	433 sse += sse2; \

440 if (w > wf * 2) { \	434 if (w > wf * 2) { \

441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \	435 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 32, src_stride, \

442 x_offset, y_offset, \	436 x_offset, y_offset, \

443 dst + 32, dst_stride, \	437 dst + 32, dst_stride, \

444 sec + 32, w, h, &sse2, \	438 sec + 32, w, h, &sse2); \

445 NULL, NULL); \

446 se += se2; \	439 se += se2; \

447 sse += sse2; \	440 sse += sse2; \

448 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \	441 se2 = vp9_sub_pixel_avg_variance##wf##xh_##opt(src + 48, src_stride, \

449 x_offset, y_offset, \	442 x_offset, y_offset, \

450 dst + 48, dst_stride, \	443 dst + 48, dst_stride, \

451 sec + 48, w, h, &sse2, \	444 sec + 48, w, h, &sse2); \

452 NULL, NULL); \

453 se += se2; \	445 se += se2; \

454 sse += sse2; \	446 sse += sse2; \

455 } \	447 } \

456 } \	448 } \

457 *sseptr = sse; \	449 *sseptr = sse; \

458 return sse - ((cast se * se) >> (wlog2 + hlog2)); \	450 return sse - ((cast se * se) >> (wlog2 + hlog2)); \

459 }	451 }

460	452

461 #define FNS(opt1, opt2) \	453 #define FNS(opt1, opt2) \

462 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \	454 FN(64, 64, 16, 6, 6, opt1, (int64_t)); \

463 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \	455 FN(64, 32, 16, 6, 5, opt1, (int64_t)); \

464 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \	456 FN(32, 64, 16, 5, 6, opt1, (int64_t)); \

465 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \	457 FN(32, 32, 16, 5, 5, opt1, (int64_t)); \

466 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \	458 FN(32, 16, 16, 5, 4, opt1, (int64_t)); \

467 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \	459 FN(16, 32, 16, 4, 5, opt1, (int64_t)); \

468 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \	460 FN(16, 16, 16, 4, 4, opt1, (unsigned int)); \

469 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \	461 FN(16, 8, 16, 4, 3, opt1, (unsigned int)); \

470 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \	462 FN(8, 16, 8, 3, 4, opt1, (unsigned int)); \

471 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \	463 FN(8, 8, 8, 3, 3, opt1, (unsigned int)); \

472 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \	464 FN(8, 4, 8, 3, 2, opt1, (unsigned int)); \

473 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \	465 FN(4, 8, 4, 2, 3, opt2, (unsigned int)); \

474 FN(4, 4, 4, 2, 2, opt2, (unsigned int))	466 FN(4, 4, 4, 2, 2, opt2, (unsigned int))

475	467

476 FNS(sse2, sse);	468 FNS(sse2, sse);

477 FNS(ssse3, ssse3);	469 FNS(ssse3, ssse3);

478	470

479 #undef FNS	471 #undef FNS

480 #undef FN	472 #undef FN

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/vp9_speed_features.c ('k') | source/libvpx/vp9/vp9_common.mk » ('j') | no next file with comments »