OLD | NEW |
1 /*************************************************************************** | 1 /*************************************************************************** |
2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. | 2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 ***************************************************************************/ | 6 ***************************************************************************/ |
7 | 7 |
8 /*************************************************************************** | 8 /*************************************************************************** |
9 Neon memset: Attempts to do a memset with Neon registers if possible, | 9 Neon memset: Attempts to do a memset with Neon registers if possible, |
10 Inputs: | 10 Inputs: |
11 s: The buffer to write to | 11 s: The buffer to write to |
12 c: The integer data to write to the buffer | 12 c: The integer data to write to the buffer |
13 n: The size_t count. | 13 n: The size_t count. |
14 Outputs: | 14 Outputs: |
15 | 15 |
16 ***************************************************************************/ | 16 ***************************************************************************/ |
17 | 17 |
| 18 .syntax unified |
| 19 |
18 .code 32 | 20 .code 32 |
19 .fpu neon | 21 .fpu neon |
20 .align 4 | 22 .align 4 |
21 .globl memset16_neon | 23 .globl memset16_neon |
22 .func | 24 .func |
23 | 25 |
24 memset16_neon: | 26 memset16_neon: |
25 cmp r2, #0 | 27 cmp r2, #0 |
26 bxeq lr | 28 bxeq lr |
27 | 29 |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
63 * boundary. Note that this will be a multiple of 4, since we | 65 * boundary. Note that this will be a multiple of 4, since we |
64 * already are word-aligned. | 66 * already are word-aligned. |
65 */ | 67 */ |
66 rsb r12, r12, #16 | 68 rsb r12, r12, #16 |
67 sub r2, r2, r12 | 69 sub r2, r2, r12 |
68 lsls r12, r12, #29 | 70 lsls r12, r12, #29 |
69 strmi r1, [r0], #4 | 71 strmi r1, [r0], #4 |
70 strcs r1, [r0], #4 | 72 strcs r1, [r0], #4 |
71 strcs r1, [r0], #4 | 73 strcs r1, [r0], #4 |
72 lsls r12, r12, #2 | 74 lsls r12, r12, #2 |
73 strcsh r1, [r0], #2 | 75 strhcs r1, [r0], #2 |
74 memset_route: | 76 memset_route: |
75 /* | 77 /* |
76 * Decide where to route for the maximum copy sizes. Note that we | 78 * Decide where to route for the maximum copy sizes. Note that we |
77 * build q0 and q1 depending on if we'll need it, so that's | 79 * build q0 and q1 depending on if we'll need it, so that's |
78 * interwoven here as well. | 80 * interwoven here as well. |
79 */ | 81 */ |
80 vdup.u32 d0, r1 | 82 vdup.u32 d0, r1 |
81 cmp r2, #16 | 83 cmp r2, #16 |
82 blt memset_8 | 84 blt memset_8 |
83 vmov d1, d0 | 85 vmov d1, d0 |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 memset_2: | 136 memset_2: |
135 cmp r2, #0 | 137 cmp r2, #0 |
136 ble memset_end | 138 ble memset_end |
137 strh r1, [r0], #2 | 139 strh r1, [r0], #2 |
138 memset_end: | 140 memset_end: |
139 pop {r0} | 141 pop {r0} |
140 bx lr | 142 bx lr |
141 | 143 |
142 .endfunc | 144 .endfunc |
143 .end | 145 .end |
OLD | NEW |