| OLD | NEW |
| (Empty) |
| 1 ; | |
| 2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. | |
| 3 ; | |
| 4 ; Use of this source code is governed by a BSD-style license | |
| 5 ; that can be found in the LICENSE file in the root of the source | |
| 6 ; tree. An additional intellectual property rights grant can be found | |
| 7 ; in the file PATENTS. All contributing project authors may | |
| 8 ; be found in the AUTHORS file in the root of the source tree. | |
| 9 ; | |
| 10 | |
| 11 | |
| 12 EXPORT |vp8_mse16x16_neon| | |
| 13 EXPORT |vp8_get4x4sse_cs_neon| | |
| 14 | |
| 15 ARM | |
| 16 REQUIRE8 | |
| 17 PRESERVE8 | |
| 18 | |
| 19 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
| 20 ;============================ | |
| 21 ; r0 unsigned char *src_ptr | |
| 22 ; r1 int source_stride | |
| 23 ; r2 unsigned char *ref_ptr | |
| 24 ; r3 int recon_stride | |
| 25 ; stack unsigned int *sse | |
| 26 ;note: in this function, sum is never used. So, we can remove this part of calcu
lation | |
| 27 ;from vp8_variance(). | |
| 28 | |
| 29 |vp8_mse16x16_neon| PROC | |
| 30 vpush {q7} | |
| 31 | |
| 32 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse | |
| 33 vmov.i8 q8, #0 | |
| 34 vmov.i8 q9, #0 | |
| 35 vmov.i8 q10, #0 | |
| 36 | |
| 37 mov r12, #8 | |
| 38 | |
| 39 mse16x16_neon_loop | |
| 40 vld1.8 {q0}, [r0], r1 ;Load up source and reference | |
| 41 vld1.8 {q2}, [r2], r3 | |
| 42 vld1.8 {q1}, [r0], r1 | |
| 43 vld1.8 {q3}, [r2], r3 | |
| 44 | |
| 45 vsubl.u8 q11, d0, d4 | |
| 46 vsubl.u8 q12, d1, d5 | |
| 47 vsubl.u8 q13, d2, d6 | |
| 48 vsubl.u8 q14, d3, d7 | |
| 49 | |
| 50 vmlal.s16 q7, d22, d22 | |
| 51 vmlal.s16 q8, d23, d23 | |
| 52 | |
| 53 subs r12, r12, #1 | |
| 54 | |
| 55 vmlal.s16 q9, d24, d24 | |
| 56 vmlal.s16 q10, d25, d25 | |
| 57 vmlal.s16 q7, d26, d26 | |
| 58 vmlal.s16 q8, d27, d27 | |
| 59 vmlal.s16 q9, d28, d28 | |
| 60 vmlal.s16 q10, d29, d29 | |
| 61 | |
| 62 bne mse16x16_neon_loop | |
| 63 | |
| 64 vadd.u32 q7, q7, q8 | |
| 65 vadd.u32 q9, q9, q10 | |
| 66 | |
| 67 ldr r12, [sp, #16] ;load *sse from stack | |
| 68 | |
| 69 vadd.u32 q10, q7, q9 | |
| 70 vpaddl.u32 q1, q10 | |
| 71 vadd.u64 d0, d2, d3 | |
| 72 | |
| 73 vst1.32 {d0[0]}, [r12] | |
| 74 vmov.32 r0, d0[0] | |
| 75 | |
| 76 vpop {q7} | |
| 77 bx lr | |
| 78 | |
| 79 ENDP | |
| 80 | |
| 81 | |
| 82 ;============================= | |
| 83 ; r0 unsigned char *src_ptr, | |
| 84 ; r1 int source_stride, | |
| 85 ; r2 unsigned char *ref_ptr, | |
| 86 ; r3 int recon_stride | |
| 87 |vp8_get4x4sse_cs_neon| PROC | |
| 88 vpush {q7} | |
| 89 | |
| 90 vld1.8 {d0}, [r0], r1 ;Load up source and reference | |
| 91 vld1.8 {d4}, [r2], r3 | |
| 92 vld1.8 {d1}, [r0], r1 | |
| 93 vld1.8 {d5}, [r2], r3 | |
| 94 vld1.8 {d2}, [r0], r1 | |
| 95 vld1.8 {d6}, [r2], r3 | |
| 96 vld1.8 {d3}, [r0], r1 | |
| 97 vld1.8 {d7}, [r2], r3 | |
| 98 | |
| 99 vsubl.u8 q11, d0, d4 | |
| 100 vsubl.u8 q12, d1, d5 | |
| 101 vsubl.u8 q13, d2, d6 | |
| 102 vsubl.u8 q14, d3, d7 | |
| 103 | |
| 104 vmull.s16 q7, d22, d22 | |
| 105 vmull.s16 q8, d24, d24 | |
| 106 vmull.s16 q9, d26, d26 | |
| 107 vmull.s16 q10, d28, d28 | |
| 108 | |
| 109 vadd.u32 q7, q7, q8 | |
| 110 vadd.u32 q9, q9, q10 | |
| 111 vadd.u32 q9, q7, q9 | |
| 112 | |
| 113 vpaddl.u32 q1, q9 | |
| 114 vadd.u64 d0, d2, d3 | |
| 115 | |
| 116 vmov.32 r0, d0[0] | |
| 117 | |
| 118 vpop {q7} | |
| 119 bx lr | |
| 120 | |
| 121 ENDP | |
| 122 | |
| 123 END | |
| OLD | NEW |