| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12     EXPORT  |vp8_sixtap_predict16x16_neon| | 12     EXPORT  |vp8_sixtap_predict16x16_neon| | 
| 13     ARM | 13     ARM | 
| 14     REQUIRE8 | 14     REQUIRE8 | 
| 15     PRESERVE8 | 15     PRESERVE8 | 
| 16 | 16 | 
| 17     AREA ||.text||, CODE, READONLY, ALIGN=2 | 17     AREA ||.text||, CODE, READONLY, ALIGN=2 | 
|  | 18 | 
|  | 19 filter16_coeff | 
|  | 20     DCD     0,  0,  128,    0,   0,  0,   0,  0 | 
|  | 21     DCD     0, -6,  123,   12,  -1,  0,   0,  0 | 
|  | 22     DCD     2, -11, 108,   36,  -8,  1,   0,  0 | 
|  | 23     DCD     0, -9,   93,   50,  -6,  0,   0,  0 | 
|  | 24     DCD     3, -16,  77,   77, -16,  3,   0,  0 | 
|  | 25     DCD     0, -6,   50,   93,  -9,  0,   0,  0 | 
|  | 26     DCD     1, -8,   36,  108, -11,  2,   0,  0 | 
|  | 27     DCD     0, -1,   12,  123,  -6,   0,  0,  0 | 
|  | 28 | 
| 18 ; r0    unsigned char  *src_ptr, | 29 ; r0    unsigned char  *src_ptr, | 
| 19 ; r1    int  src_pixels_per_line, | 30 ; r1    int  src_pixels_per_line, | 
| 20 ; r2    int  xoffset, | 31 ; r2    int  xoffset, | 
| 21 ; r3    int  yoffset, | 32 ; r3    int  yoffset, | 
| 22 ; r4    unsigned char *dst_ptr, | 33 ; r4    unsigned char *dst_ptr, | 
| 23 ; stack(r5) int  dst_pitch | 34 ; stack(r5) int  dst_pitch | 
| 24 | 35 | 
| 25 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply 
     abs() to | 36 ;Note: To take advantage of 8-bit mulplication instruction in NEON. First apply 
     abs() to | 
| 26 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi
     plication, | 37 ; filter coeffs to make them u8. Then, use vmlsl for negtive coeffs. After multi
     plication, | 
| 27 ; the result can be negtive. So, I treat the result as s16. But, since it is als
     o possible | 38 ; the result can be negtive. So, I treat the result as s16. But, since it is als
     o possible | 
| 28 ; that the result can be a large positive number (> 2^15-1), which could be conf
     used as a | 39 ; that the result can be a large positive number (> 2^15-1), which could be conf
     used as a | 
| 29 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1,
      4 ,5 ,2, | 40 ; negtive number. To avoid that error, apply filter coeffs in the order of 0, 1,
      4 ,5 ,2, | 
| 30 ; which ensures that the result stays in s16 range. Finally, saturated add the r
     esult by | 41 ; which ensures that the result stays in s16 range. Finally, saturated add the r
     esult by | 
| 31 ; applying 3rd filter coeff. Same applys to other filter functions. | 42 ; applying 3rd filter coeff. Same applys to other filter functions. | 
| 32 | 43 | 
| 33 |vp8_sixtap_predict16x16_neon| PROC | 44 |vp8_sixtap_predict16x16_neon| PROC | 
| 34     push            {r4-r5, lr} | 45     push            {r4-r5, lr} | 
| 35 | 46 | 
| 36     ldr             r12, _filter16_coeff_ | 47     adr             r12, filter16_coeff | 
| 37     ldr             r4, [sp, #12]           ;load parameters from stack | 48     ldr             r4, [sp, #12]           ;load parameters from stack | 
| 38     ldr             r5, [sp, #16]           ;load parameters from stack | 49     ldr             r5, [sp, #16]           ;load parameters from stack | 
| 39 | 50 | 
| 40     cmp             r2, #0                  ;skip first_pass filter if xoffset=0 | 51     cmp             r2, #0                  ;skip first_pass filter if xoffset=0 | 
| 41     beq             secondpass_filter16x16_only | 52     beq             secondpass_filter16x16_only | 
| 42 | 53 | 
| 43     add             r2, r12, r2, lsl #5     ;calculate filter location | 54     add             r2, r12, r2, lsl #5     ;calculate filter location | 
| 44 | 55 | 
| 45     cmp             r3, #0                  ;skip second_pass filter if yoffset=
     0 | 56     cmp             r3, #0                  ;skip second_pass filter if yoffset=
     0 | 
| 46 | 57 | 
| (...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 469     sub             r4, r4, r5, lsl #4 | 480     sub             r4, r4, r5, lsl #4 | 
| 470     add             r4, r4, #8 | 481     add             r4, r4, #8 | 
| 471 | 482 | 
| 472     bne filt_blk2d_spo16x16_outloop_neon | 483     bne filt_blk2d_spo16x16_outloop_neon | 
| 473 | 484 | 
| 474     pop             {r4-r5,pc} | 485     pop             {r4-r5,pc} | 
| 475 | 486 | 
| 476     ENDP | 487     ENDP | 
| 477 | 488 | 
| 478 ;----------------- | 489 ;----------------- | 
| 479 |  | 
| 480 _filter16_coeff_ |  | 
| 481     DCD     filter16_coeff |  | 
| 482 filter16_coeff |  | 
| 483     DCD     0,  0,  128,    0,   0,  0,   0,  0 |  | 
| 484     DCD     0, -6,  123,   12,  -1,  0,   0,  0 |  | 
| 485     DCD     2, -11, 108,   36,  -8,  1,   0,  0 |  | 
| 486     DCD     0, -9,   93,   50,  -6,  0,   0,  0 |  | 
| 487     DCD     3, -16,  77,   77, -16,  3,   0,  0 |  | 
| 488     DCD     0, -6,   50,   93,  -9,  0,   0,  0 |  | 
| 489     DCD     1, -8,   36,  108, -11,  2,   0,  0 |  | 
| 490     DCD     0, -1,   12,  123,  -6,   0,  0,  0 |  | 
| 491 |  | 
| 492     END | 490     END | 
| OLD | NEW | 
|---|