| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| (...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 338 ; const uint8_t *above, | 338 ; const uint8_t *above, |
| 339 ; const uint8_t *left) | 339 ; const uint8_t *left) |
| 340 ; r0 uint8_t *dst | 340 ; r0 uint8_t *dst |
| 341 ; r1 ptrdiff_t y_stride | 341 ; r1 ptrdiff_t y_stride |
| 342 ; r2 const uint8_t *above | 342 ; r2 const uint8_t *above |
| 343 ; r3 const uint8_t *left | 343 ; r3 const uint8_t *left |
| 344 | 344 |
| 345 |vp9_tm_predictor_8x8_neon| PROC | 345 |vp9_tm_predictor_8x8_neon| PROC |
| 346 ; Load ytop_left = above[-1]; | 346 ; Load ytop_left = above[-1]; |
| 347 sub r12, r2, #1 | 347 sub r12, r2, #1 |
| 348 ldrb r12, [r12] | 348 vld1.8 {d0[]}, [r12] |
| 349 vdup.u8 d0, r12 | |
| 350 | 349 |
| 351 ; preload 8 left | 350 ; preload 8 left |
| 352 vld1.8 {d30}, [r3] | 351 vld1.8 {d30}, [r3] |
| 353 | 352 |
| 354 ; Load above 8 pixels | 353 ; Load above 8 pixels |
| 355 vld1.64 {d2}, [r2] | 354 vld1.64 {d2}, [r2] |
| 356 | 355 |
| 357 vmovl.u8 q10, d30 | 356 vmovl.u8 q10, d30 |
| 358 | 357 |
| 359 ; Compute above - ytop_left | 358 ; Compute above - ytop_left |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 411 ; const uint8_t *above, | 410 ; const uint8_t *above, |
| 412 ; const uint8_t *left) | 411 ; const uint8_t *left) |
| 413 ; r0 uint8_t *dst | 412 ; r0 uint8_t *dst |
| 414 ; r1 ptrdiff_t y_stride | 413 ; r1 ptrdiff_t y_stride |
| 415 ; r2 const uint8_t *above | 414 ; r2 const uint8_t *above |
| 416 ; r3 const uint8_t *left | 415 ; r3 const uint8_t *left |
| 417 | 416 |
| 418 |vp9_tm_predictor_16x16_neon| PROC | 417 |vp9_tm_predictor_16x16_neon| PROC |
| 419 ; Load ytop_left = above[-1]; | 418 ; Load ytop_left = above[-1]; |
| 420 sub r12, r2, #1 | 419 sub r12, r2, #1 |
| 421 ldrb r12, [r12] | 420 vld1.8 {d0[]}, [r12] |
| 422 vdup.u8 q0, r12 | |
| 423 | 421 |
| 424 ; Load above 8 pixels | 422 ; Load above 8 pixels |
| 425 vld1.8 {q1}, [r2] | 423 vld1.8 {q1}, [r2] |
| 426 | 424 |
| 427 ; preload 8 left into r12 | 425 ; preload 8 left into r12 |
| 428 vld1.8 {d18}, [r3]! | 426 vld1.8 {d18}, [r3]! |
| 429 | 427 |
| 430 ; Compute above - ytop_left | 428 ; Compute above - ytop_left |
| 431 vsubl.u8 q2, d2, d0 | 429 vsubl.u8 q2, d2, d0 |
| 432 vsubl.u8 q3, d3, d1 | 430 vsubl.u8 q3, d3, d0 |
| 433 | 431 |
| 434 vmovl.u8 q10, d18 | 432 vmovl.u8 q10, d18 |
| 435 | 433 |
| 436 ; Load left row by row and compute left + (above - ytop_left) | 434 ; Load left row by row and compute left + (above - ytop_left) |
| 437 ; Process 8 rows in each single loop and loop 2 times to process 16 rows. | 435 ; Process 8 rows in each single loop and loop 2 times to process 16 rows. |
| 438 mov r2, #2 | 436 mov r2, #2 |
| 439 | 437 |
| 440 loop_16x16_neon | 438 loop_16x16_neon |
| 441 ; Process two rows. | 439 ; Process two rows. |
| 442 vdup.16 q0, d20[0] | 440 vdup.16 q0, d20[0] |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 505 ; const uint8_t *above, | 503 ; const uint8_t *above, |
| 506 ; const uint8_t *left) | 504 ; const uint8_t *left) |
| 507 ; r0 uint8_t *dst | 505 ; r0 uint8_t *dst |
| 508 ; r1 ptrdiff_t y_stride | 506 ; r1 ptrdiff_t y_stride |
| 509 ; r2 const uint8_t *above | 507 ; r2 const uint8_t *above |
| 510 ; r3 const uint8_t *left | 508 ; r3 const uint8_t *left |
| 511 | 509 |
| 512 |vp9_tm_predictor_32x32_neon| PROC | 510 |vp9_tm_predictor_32x32_neon| PROC |
| 513 ; Load ytop_left = above[-1]; | 511 ; Load ytop_left = above[-1]; |
| 514 sub r12, r2, #1 | 512 sub r12, r2, #1 |
| 515 ldrb r12, [r12] | 513 vld1.8 {d0[]}, [r12] |
| 516 vdup.u8 q0, r12 | |
| 517 | 514 |
| 518 ; Load above 32 pixels | 515 ; Load above 32 pixels |
| 519 vld1.8 {q1}, [r2]! | 516 vld1.8 {q1}, [r2]! |
| 520 vld1.8 {q2}, [r2] | 517 vld1.8 {q2}, [r2] |
| 521 | 518 |
| 522 ; preload 8 left pixels | 519 ; preload 8 left pixels |
| 523 vld1.8 {d26}, [r3]! | 520 vld1.8 {d26}, [r3]! |
| 524 | 521 |
| 525 ; Compute above - ytop_left | 522 ; Compute above - ytop_left |
| 526 vsubl.u8 q8, d2, d0 | 523 vsubl.u8 q8, d2, d0 |
| 527 vsubl.u8 q9, d3, d1 | 524 vsubl.u8 q9, d3, d0 |
| 528 vsubl.u8 q10, d4, d0 | 525 vsubl.u8 q10, d4, d0 |
| 529 vsubl.u8 q11, d5, d1 | 526 vsubl.u8 q11, d5, d0 |
| 530 | 527 |
| 531 vmovl.u8 q3, d26 | 528 vmovl.u8 q3, d26 |
| 532 | 529 |
| 533 ; Load left row by row and compute left + (above - ytop_left) | 530 ; Load left row by row and compute left + (above - ytop_left) |
| 534 ; Process 8 rows in each single loop and loop 4 times to process 32 rows. | 531 ; Process 8 rows in each single loop and loop 4 times to process 32 rows. |
| 535 mov r2, #4 | 532 mov r2, #4 |
| 536 | 533 |
| 537 loop_32x32_neon | 534 loop_32x32_neon |
| 538 ; Process two rows. | 535 ; Process two rows. |
| 539 vdup.16 q0, d6[0] | 536 vdup.16 q0, d6[0] |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 625 vmovl.u8 q3, d0 | 622 vmovl.u8 q3, d0 |
| 626 vst1.64 {d24-d27}, [r0], r1 | 623 vst1.64 {d24-d27}, [r0], r1 |
| 627 | 624 |
| 628 subs r2, r2, #1 | 625 subs r2, r2, #1 |
| 629 bgt loop_32x32_neon | 626 bgt loop_32x32_neon |
| 630 | 627 |
| 631 bx lr | 628 bx lr |
| 632 ENDP ; |vp9_tm_predictor_32x32_neon| | 629 ENDP ; |vp9_tm_predictor_32x32_neon| |
| 633 | 630 |
| 634 END | 631 END |
| OLD | NEW |