| OLD | NEW |
| 1 /*************************************************************************** | 1 /*************************************************************************** |
| 2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. | 2 * Copyright (c) 2009,2010, Code Aurora Forum. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 ***************************************************************************/ | 6 ***************************************************************************/ |
| 7 | 7 |
| 8 /*************************************************************************** | 8 /*************************************************************************** |
| 9 Neon memset: Attempts to do a memset with Neon registers if possible, | 9 Neon memset: Attempts to do a memset with Neon registers if possible, |
| 10 Inputs: | 10 Inputs: |
| 11 s: The buffer to write to | 11 s: The buffer to write to |
| 12 c: The integer data to write to the buffer | 12 c: The integer data to write to the buffer |
| 13 n: The size_t count. | 13 n: The size_t count. |
| 14 Outputs: | 14 Outputs: |
| 15 | 15 |
| 16 ***************************************************************************/ | 16 ***************************************************************************/ |
| 17 | 17 |
| 18 .syntax unified |
| 19 |
| 18 .code 32 | 20 .code 32 |
| 19 .fpu neon | 21 .fpu neon |
| 20 .align 4 | 22 .align 4 |
| 21 .globl memset16_neon | 23 .globl memset16_neon |
| 22 .func | 24 .func |
| 23 | 25 |
| 24 memset16_neon: | 26 memset16_neon: |
| 25 cmp r2, #0 | 27 cmp r2, #0 |
| 26 bxeq lr | 28 bxeq lr |
| 27 | 29 |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 * boundary. Note that this will be a multiple of 4, since we | 65 * boundary. Note that this will be a multiple of 4, since we |
| 64 * already are word-aligned. | 66 * already are word-aligned. |
| 65 */ | 67 */ |
| 66 rsb r12, r12, #16 | 68 rsb r12, r12, #16 |
| 67 sub r2, r2, r12 | 69 sub r2, r2, r12 |
| 68 lsls r12, r12, #29 | 70 lsls r12, r12, #29 |
| 69 strmi r1, [r0], #4 | 71 strmi r1, [r0], #4 |
| 70 strcs r1, [r0], #4 | 72 strcs r1, [r0], #4 |
| 71 strcs r1, [r0], #4 | 73 strcs r1, [r0], #4 |
| 72 lsls r12, r12, #2 | 74 lsls r12, r12, #2 |
| 73 strcsh r1, [r0], #2 | 75 strhcs r1, [r0], #2 |
| 74 memset_route: | 76 memset_route: |
| 75 /* | 77 /* |
| 76 * Decide where to route for the maximum copy sizes. Note that we | 78 * Decide where to route for the maximum copy sizes. Note that we |
| 77 * build q0 and q1 depending on if we'll need it, so that's | 79 * build q0 and q1 depending on if we'll need it, so that's |
| 78 * interwoven here as well. | 80 * interwoven here as well. |
| 79 */ | 81 */ |
| 80 vdup.u32 d0, r1 | 82 vdup.u32 d0, r1 |
| 81 cmp r2, #16 | 83 cmp r2, #16 |
| 82 blt memset_8 | 84 blt memset_8 |
| 83 vmov d1, d0 | 85 vmov d1, d0 |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 134 memset_2: | 136 memset_2: |
| 135 cmp r2, #0 | 137 cmp r2, #0 |
| 136 ble memset_end | 138 ble memset_end |
| 137 strh r1, [r0], #2 | 139 strh r1, [r0], #2 |
| 138 memset_end: | 140 memset_end: |
| 139 pop {r0} | 141 pop {r0} |
| 140 bx lr | 142 bx lr |
| 141 | 143 |
| 142 .endfunc | 144 .endfunc |
| 143 .end | 145 .end |
| OLD | NEW |