OLD | NEW |
| (Empty) |
1 /*************************************************************************** | |
2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 ***************************************************************************/ | |
7 | |
8 .syntax unified | |
9 .code 32 | |
10 .fpu neon | |
11 .align 4 | |
12 .globl memset32_neon | |
13 .hidden memset32_neon | |
14 | |
15 /* r0 = buffer, r1 = value, r2 = times to write */ | |
16 memset32_neon: | |
17 cmp r2, #1 | |
18 streq r1, [r0], #4 | |
19 bxeq lr | |
20 | |
21 cmp r2, #4 | |
22 bgt memset32_neon_start | |
23 cmp r2, #0 | |
24 bxeq lr | |
25 memset32_neon_small: | |
26 str r1, [r0], #4 | |
27 subs r2, r2, #1 | |
28 bne memset32_neon_small | |
29 bx lr | |
30 memset32_neon_start: | |
31 cmp r2, #16 | |
32 blt memset32_dropthru | |
33 vdup.32 q0, r1 | |
34 vmov q1, q0 | |
35 cmp r2, #32 | |
36 blt memset32_16 | |
37 cmp r2, #64 | |
38 blt memset32_32 | |
39 cmp r2, #128 | |
40 blt memset32_64 | |
41 memset32_128: | |
42 movs r12, r2, lsr #7 | |
43 memset32_loop128: | |
44 subs r12, r12, #1 | |
45 vst1.64 {q0, q1}, [r0]! | |
46 vst1.64 {q0, q1}, [r0]! | |
47 vst1.64 {q0, q1}, [r0]! | |
48 vst1.64 {q0, q1}, [r0]! | |
49 vst1.64 {q0, q1}, [r0]! | |
50 vst1.64 {q0, q1}, [r0]! | |
51 vst1.64 {q0, q1}, [r0]! | |
52 vst1.64 {q0, q1}, [r0]! | |
53 vst1.64 {q0, q1}, [r0]! | |
54 vst1.64 {q0, q1}, [r0]! | |
55 vst1.64 {q0, q1}, [r0]! | |
56 vst1.64 {q0, q1}, [r0]! | |
57 vst1.64 {q0, q1}, [r0]! | |
58 vst1.64 {q0, q1}, [r0]! | |
59 vst1.64 {q0, q1}, [r0]! | |
60 vst1.64 {q0, q1}, [r0]! | |
61 bne memset32_loop128 | |
62 ands r2, r2, #0x7f | |
63 bxeq lr | |
64 memset32_64: | |
65 movs r12, r2, lsr #6 | |
66 beq memset32_32 | |
67 vst1.64 {q0, q1}, [r0]! | |
68 vst1.64 {q0, q1}, [r0]! | |
69 vst1.64 {q0, q1}, [r0]! | |
70 vst1.64 {q0, q1}, [r0]! | |
71 vst1.64 {q0, q1}, [r0]! | |
72 vst1.64 {q0, q1}, [r0]! | |
73 vst1.64 {q0, q1}, [r0]! | |
74 vst1.64 {q0, q1}, [r0]! | |
75 ands r2, r2, #0x3f | |
76 bxeq lr | |
77 memset32_32: | |
78 movs r12, r2, lsr #5 | |
79 beq memset32_16 | |
80 vst1.64 {q0, q1}, [r0]! | |
81 vst1.64 {q0, q1}, [r0]! | |
82 vst1.64 {q0, q1}, [r0]! | |
83 vst1.64 {q0, q1}, [r0]! | |
84 ands r2, r2, #0x1f | |
85 bxeq lr | |
86 memset32_16: | |
87 movs r12, r2, lsr #4 | |
88 beq memset32_dropthru | |
89 and r2, r2, #0xf | |
90 vst1.64 {q0, q1}, [r0]! | |
91 vst1.64 {q0, q1}, [r0]! | |
92 memset32_dropthru: | |
93 rsb r2, r2, #15 | |
94 add pc, pc, r2, lsl #2 | |
95 nop | |
96 str r1, [r0, #56] | |
97 str r1, [r0, #52] | |
98 str r1, [r0, #48] | |
99 str r1, [r0, #44] | |
100 str r1, [r0, #40] | |
101 str r1, [r0, #36] | |
102 str r1, [r0, #32] | |
103 str r1, [r0, #28] | |
104 str r1, [r0, #24] | |
105 str r1, [r0, #20] | |
106 str r1, [r0, #16] | |
107 str r1, [r0, #12] | |
108 str r1, [r0, #8] | |
109 str r1, [r0, #4] | |
110 str r1, [r0, #0] | |
111 bx lr | |
112 | |
113 .end | |
OLD | NEW |