source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm - Issue 1162573005: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon_asm.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 ;	1 ;

2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 ;	3 ;

4 ; Use of this source code is governed by a BSD-style license	4 ; Use of this source code is governed by a BSD-style license

5 ; that can be found in the LICENSE file in the root of the source	5 ; that can be found in the LICENSE file in the root of the source

6 ; tree. An additional intellectual property rights grant can be found	6 ; tree. An additional intellectual property rights grant can be found

7 ; in the file PATENTS. All contributing project authors may	7 ; in the file PATENTS. All contributing project authors may

8 ; be found in the AUTHORS file in the root of the source tree.	8 ; be found in the AUTHORS file in the root of the source tree.

9 ;	9 ;

10	10

(...skipping 327 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
338 ; const uint8_t *above,	338 ; const uint8_t *above,

339 ; const uint8_t *left)	339 ; const uint8_t *left)

340 ; r0 uint8_t *dst	340 ; r0 uint8_t *dst

341 ; r1 ptrdiff_t y_stride	341 ; r1 ptrdiff_t y_stride

342 ; r2 const uint8_t *above	342 ; r2 const uint8_t *above

343 ; r3 const uint8_t *left	343 ; r3 const uint8_t *left

344	344

345 \|vp9_tm_predictor_8x8_neon\| PROC	345 \|vp9_tm_predictor_8x8_neon\| PROC

346 ; Load ytop_left = above[-1];	346 ; Load ytop_left = above[-1];

347 sub r12, r2, #1	347 sub r12, r2, #1

348 ldrb r12, [r12]	348 vld1.8 {d0[]}, [r12]

349 vdup.u8 d0, r12

350	349

351 ; preload 8 left	350 ; preload 8 left

352 vld1.8 {d30}, [r3]	351 vld1.8 {d30}, [r3]

353	352

354 ; Load above 8 pixels	353 ; Load above 8 pixels

355 vld1.64 {d2}, [r2]	354 vld1.64 {d2}, [r2]

356	355

357 vmovl.u8 q10, d30	356 vmovl.u8 q10, d30

358	357

359 ; Compute above - ytop_left	358 ; Compute above - ytop_left

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
411 ; const uint8_t *above,	410 ; const uint8_t *above,

412 ; const uint8_t *left)	411 ; const uint8_t *left)

413 ; r0 uint8_t *dst	412 ; r0 uint8_t *dst

414 ; r1 ptrdiff_t y_stride	413 ; r1 ptrdiff_t y_stride

415 ; r2 const uint8_t *above	414 ; r2 const uint8_t *above

416 ; r3 const uint8_t *left	415 ; r3 const uint8_t *left

417	416

418 \|vp9_tm_predictor_16x16_neon\| PROC	417 \|vp9_tm_predictor_16x16_neon\| PROC

419 ; Load ytop_left = above[-1];	418 ; Load ytop_left = above[-1];

420 sub r12, r2, #1	419 sub r12, r2, #1

421 ldrb r12, [r12]	420 vld1.8 {d0[]}, [r12]

422 vdup.u8 q0, r12

423	421

424 ; Load above 8 pixels	422 ; Load above 8 pixels

425 vld1.8 {q1}, [r2]	423 vld1.8 {q1}, [r2]

426	424

427 ; preload 8 left into r12	425 ; preload 8 left into r12

428 vld1.8 {d18}, [r3]!	426 vld1.8 {d18}, [r3]!

429	427

430 ; Compute above - ytop_left	428 ; Compute above - ytop_left

431 vsubl.u8 q2, d2, d0	429 vsubl.u8 q2, d2, d0

432 vsubl.u8 q3, d3, d1	430 vsubl.u8 q3, d3, d0

433	431

434 vmovl.u8 q10, d18	432 vmovl.u8 q10, d18

435	433

436 ; Load left row by row and compute left + (above - ytop_left)	434 ; Load left row by row and compute left + (above - ytop_left)

437 ; Process 8 rows in each single loop and loop 2 times to process 16 rows.	435 ; Process 8 rows in each single loop and loop 2 times to process 16 rows.

438 mov r2, #2	436 mov r2, #2

439	437

440 loop_16x16_neon	438 loop_16x16_neon

441 ; Process two rows.	439 ; Process two rows.

442 vdup.16 q0, d20[0]	440 vdup.16 q0, d20[0]

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
505 ; const uint8_t *above,	503 ; const uint8_t *above,

506 ; const uint8_t *left)	504 ; const uint8_t *left)

507 ; r0 uint8_t *dst	505 ; r0 uint8_t *dst

508 ; r1 ptrdiff_t y_stride	506 ; r1 ptrdiff_t y_stride

509 ; r2 const uint8_t *above	507 ; r2 const uint8_t *above

510 ; r3 const uint8_t *left	508 ; r3 const uint8_t *left

511	509

512 \|vp9_tm_predictor_32x32_neon\| PROC	510 \|vp9_tm_predictor_32x32_neon\| PROC

513 ; Load ytop_left = above[-1];	511 ; Load ytop_left = above[-1];

514 sub r12, r2, #1	512 sub r12, r2, #1

515 ldrb r12, [r12]	513 vld1.8 {d0[]}, [r12]

516 vdup.u8 q0, r12

517	514

518 ; Load above 32 pixels	515 ; Load above 32 pixels

519 vld1.8 {q1}, [r2]!	516 vld1.8 {q1}, [r2]!

520 vld1.8 {q2}, [r2]	517 vld1.8 {q2}, [r2]

521	518

522 ; preload 8 left pixels	519 ; preload 8 left pixels

523 vld1.8 {d26}, [r3]!	520 vld1.8 {d26}, [r3]!

524	521

525 ; Compute above - ytop_left	522 ; Compute above - ytop_left

526 vsubl.u8 q8, d2, d0	523 vsubl.u8 q8, d2, d0

527 vsubl.u8 q9, d3, d1	524 vsubl.u8 q9, d3, d0

528 vsubl.u8 q10, d4, d0	525 vsubl.u8 q10, d4, d0

529 vsubl.u8 q11, d5, d1	526 vsubl.u8 q11, d5, d0

530	527

531 vmovl.u8 q3, d26	528 vmovl.u8 q3, d26

532	529

533 ; Load left row by row and compute left + (above - ytop_left)	530 ; Load left row by row and compute left + (above - ytop_left)

534 ; Process 8 rows in each single loop and loop 4 times to process 32 rows.	531 ; Process 8 rows in each single loop and loop 4 times to process 32 rows.

535 mov r2, #4	532 mov r2, #4

536	533

537 loop_32x32_neon	534 loop_32x32_neon

538 ; Process two rows.	535 ; Process two rows.

539 vdup.16 q0, d6[0]	536 vdup.16 q0, d6[0]

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
625 vmovl.u8 q3, d0	622 vmovl.u8 q3, d0

626 vst1.64 {d24-d27}, [r0], r1	623 vst1.64 {d24-d27}, [r0], r1

627	624

628 subs r2, r2, #1	625 subs r2, r2, #1

629 bgt loop_32x32_neon	626 bgt loop_32x32_neon

630	627

631 bx lr	628 bx lr

632 ENDP ; \|vp9_tm_predictor_32x32_neon\|	629 ENDP ; \|vp9_tm_predictor_32x32_neon\|

633	630

634 END	631 END

OLD	NEW