| OLD | NEW |
| (Empty) |
| 1 /*************************************************************************** | |
| 2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 ***************************************************************************/ | |
| 7 | |
| 8 .syntax unified | |
| 9 .code 32 | |
| 10 .fpu neon | |
| 11 .align 4 | |
| 12 .globl memset32_neon | |
| 13 .hidden memset32_neon | |
| 14 | |
| 15 /* r0 = buffer, r1 = value, r2 = times to write */ | |
| 16 memset32_neon: | |
| 17 cmp r2, #1 | |
| 18 streq r1, [r0], #4 | |
| 19 bxeq lr | |
| 20 | |
| 21 cmp r2, #4 | |
| 22 bgt memset32_neon_start | |
| 23 cmp r2, #0 | |
| 24 bxeq lr | |
| 25 memset32_neon_small: | |
| 26 str r1, [r0], #4 | |
| 27 subs r2, r2, #1 | |
| 28 bne memset32_neon_small | |
| 29 bx lr | |
| 30 memset32_neon_start: | |
| 31 cmp r2, #16 | |
| 32 blt memset32_dropthru | |
| 33 vdup.32 q0, r1 | |
| 34 vmov q1, q0 | |
| 35 cmp r2, #32 | |
| 36 blt memset32_16 | |
| 37 cmp r2, #64 | |
| 38 blt memset32_32 | |
| 39 cmp r2, #128 | |
| 40 blt memset32_64 | |
| 41 memset32_128: | |
| 42 movs r12, r2, lsr #7 | |
| 43 memset32_loop128: | |
| 44 subs r12, r12, #1 | |
| 45 vst1.64 {q0, q1}, [r0]! | |
| 46 vst1.64 {q0, q1}, [r0]! | |
| 47 vst1.64 {q0, q1}, [r0]! | |
| 48 vst1.64 {q0, q1}, [r0]! | |
| 49 vst1.64 {q0, q1}, [r0]! | |
| 50 vst1.64 {q0, q1}, [r0]! | |
| 51 vst1.64 {q0, q1}, [r0]! | |
| 52 vst1.64 {q0, q1}, [r0]! | |
| 53 vst1.64 {q0, q1}, [r0]! | |
| 54 vst1.64 {q0, q1}, [r0]! | |
| 55 vst1.64 {q0, q1}, [r0]! | |
| 56 vst1.64 {q0, q1}, [r0]! | |
| 57 vst1.64 {q0, q1}, [r0]! | |
| 58 vst1.64 {q0, q1}, [r0]! | |
| 59 vst1.64 {q0, q1}, [r0]! | |
| 60 vst1.64 {q0, q1}, [r0]! | |
| 61 bne memset32_loop128 | |
| 62 ands r2, r2, #0x7f | |
| 63 bxeq lr | |
| 64 memset32_64: | |
| 65 movs r12, r2, lsr #6 | |
| 66 beq memset32_32 | |
| 67 vst1.64 {q0, q1}, [r0]! | |
| 68 vst1.64 {q0, q1}, [r0]! | |
| 69 vst1.64 {q0, q1}, [r0]! | |
| 70 vst1.64 {q0, q1}, [r0]! | |
| 71 vst1.64 {q0, q1}, [r0]! | |
| 72 vst1.64 {q0, q1}, [r0]! | |
| 73 vst1.64 {q0, q1}, [r0]! | |
| 74 vst1.64 {q0, q1}, [r0]! | |
| 75 ands r2, r2, #0x3f | |
| 76 bxeq lr | |
| 77 memset32_32: | |
| 78 movs r12, r2, lsr #5 | |
| 79 beq memset32_16 | |
| 80 vst1.64 {q0, q1}, [r0]! | |
| 81 vst1.64 {q0, q1}, [r0]! | |
| 82 vst1.64 {q0, q1}, [r0]! | |
| 83 vst1.64 {q0, q1}, [r0]! | |
| 84 ands r2, r2, #0x1f | |
| 85 bxeq lr | |
| 86 memset32_16: | |
| 87 movs r12, r2, lsr #4 | |
| 88 beq memset32_dropthru | |
| 89 and r2, r2, #0xf | |
| 90 vst1.64 {q0, q1}, [r0]! | |
| 91 vst1.64 {q0, q1}, [r0]! | |
| 92 memset32_dropthru: | |
| 93 rsb r2, r2, #15 | |
| 94 add pc, pc, r2, lsl #2 | |
| 95 nop | |
| 96 str r1, [r0, #56] | |
| 97 str r1, [r0, #52] | |
| 98 str r1, [r0, #48] | |
| 99 str r1, [r0, #44] | |
| 100 str r1, [r0, #40] | |
| 101 str r1, [r0, #36] | |
| 102 str r1, [r0, #32] | |
| 103 str r1, [r0, #28] | |
| 104 str r1, [r0, #24] | |
| 105 str r1, [r0, #20] | |
| 106 str r1, [r0, #16] | |
| 107 str r1, [r0, #12] | |
| 108 str r1, [r0, #8] | |
| 109 str r1, [r0, #4] | |
| 110 str r1, [r0, #0] | |
| 111 bx lr | |
| 112 | |
| 113 .end | |
| OLD | NEW |