| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 EXPORT |vp8_sixtap_predict16x16_neon| | 12 EXPORT |vp8_sixtap_predict16x16_neon| |
| 13 ARM | 13 ARM |
| 14 REQUIRE8 | 14 REQUIRE8 |
| 15 PRESERVE8 | 15 PRESERVE8 |
| 16 | 16 |
| 17 AREA ||.text||, CODE, READONLY, ALIGN=2 | 17 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 18 |
| 19 filter16_coeff |
| 20 DCD 0, 0, 128, 0, 0, 0, 0, 0 |
| 21 DCD 0, -6, 123, 12, -1, 0, 0, 0 |
| 22 DCD 2, -11, 108, 36, -8, 1, 0, 0 |
| 23 DCD 0, -9, 93, 50, -6, 0, 0, 0 |
| 24 DCD 3, -16, 77, 77, -16, 3, 0, 0 |
| 25 DCD 0, -6, 50, 93, -9, 0, 0, 0 |
| 26 DCD 1, -8, 36, 108, -11, 2, 0, 0 |
| 27 DCD 0, -1, 12, 123, -6, 0, 0, 0 |
| 28 |
| 18 ; r0 unsigned char *src_ptr, | 29 ; r0 unsigned char *src_ptr, |
| 19 ; r1 int src_pixels_per_line, | 30 ; r1 int src_pixels_per_line, |
| 20 ; r2 int xoffset, | 31 ; r2 int xoffset, |
| 21 ; r3 int yoffset, | 32 ; r3 int yoffset, |
| 22 ; r4 unsigned char *dst_ptr, | 33 ; r4 unsigned char *dst_ptr, |
| 23 ; stack(r5) int dst_pitch | 34 ; stack(r5) int dst_pitch |
| 24 | 35 |
| 25 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply
abs() to | 36 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply
abs() to |
| 26 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi
plication, | 37 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi
plication, |
| 27 ; the result can be negtive. So, I treat the result as s16. But, since it is als
o possible | 38 ; the result can be negtive. So, I treat the result as s16. But, since it is als
o possible |
| 28 ; that the result can be a large positive number (> 2^15-1), which could be conf
used as a | 39 ; that the result can be a large positive number (> 2^15-1), which could be conf
used as a |
| 29 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1,
4 ,5 ,2, | 40 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1,
4 ,5 ,2, |
| 30 ; which ensures that the result stays in s16 range. Finally, saturated add the r
esult by | 41 ; which ensures that the result stays in s16 range. Finally, saturated add the r
esult by |
| 31 ; applying 3rd filter coeff. Same applys to other filter functions. | 42 ; applying 3rd filter coeff. Same applys to other filter functions. |
| 32 | 43 |
| 33 |vp8_sixtap_predict16x16_neon| PROC | 44 |vp8_sixtap_predict16x16_neon| PROC |
| 34 push {r4-r5, lr} | 45 push {r4-r5, lr} |
| 35 | 46 |
| 36 ldr r12, _filter16_coeff_ | 47 adr r12, filter16_coeff |
| 37 ldr r4, [sp, #12] ;load parameters from stack | 48 ldr r4, [sp, #12] ;load parameters from stack |
| 38 ldr r5, [sp, #16] ;load parameters from stack | 49 ldr r5, [sp, #16] ;load parameters from stack |
| 39 | 50 |
| 40 cmp r2, #0 ;skip first_pass filter if xoffset=0 | 51 cmp r2, #0 ;skip first_pass filter if xoffset=0 |
| 41 beq secondpass_filter16x16_only | 52 beq secondpass_filter16x16_only |
| 42 | 53 |
| 43 add r2, r12, r2, lsl #5 ;calculate filter location | 54 add r2, r12, r2, lsl #5 ;calculate filter location |
| 44 | 55 |
| 45 cmp r3, #0 ;skip second_pass filter if yoffset=
0 | 56 cmp r3, #0 ;skip second_pass filter if yoffset=
0 |
| 46 | 57 |
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 469 sub r4, r4, r5, lsl #4 | 480 sub r4, r4, r5, lsl #4 |
| 470 add r4, r4, #8 | 481 add r4, r4, #8 |
| 471 | 482 |
| 472 bne filt_blk2d_spo16x16_outloop_neon | 483 bne filt_blk2d_spo16x16_outloop_neon |
| 473 | 484 |
| 474 pop {r4-r5,pc} | 485 pop {r4-r5,pc} |
| 475 | 486 |
| 476 ENDP | 487 ENDP |
| 477 | 488 |
| 478 ;----------------- | 489 ;----------------- |
| 479 | |
| 480 _filter16_coeff_ | |
| 481 DCD filter16_coeff | |
| 482 filter16_coeff | |
| 483 DCD 0, 0, 128, 0, 0, 0, 0, 0 | |
| 484 DCD 0, -6, 123, 12, -1, 0, 0, 0 | |
| 485 DCD 2, -11, 108, 36, -8, 1, 0, 0 | |
| 486 DCD 0, -9, 93, 50, -6, 0, 0, 0 | |
| 487 DCD 3, -16, 77, 77, -16, 3, 0, 0 | |
| 488 DCD 0, -6, 50, 93, -9, 0, 0, 0 | |
| 489 DCD 1, -8, 36, 108, -11, 2, 0, 0 | |
| 490 DCD 0, -1, 12, 123, -6, 0, 0, 0 | |
| 491 | |
| 492 END | 490 END |
| OLD | NEW |