OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright 2010 The Android Open Source Project | |
3 * | |
4 * Use of this source code is governed by a BSD-style license that can be | |
5 * found in the LICENSE file. | |
6 */ | |
7 | |
8 /* Changes: | |
9 * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com> | |
10 * Added small changes to the two functions to make them work on the | |
11 * specified number of 16- or 32-bit values rather than the original | |
12 * code which was specified as a count of bytes. More verbose comments | |
13 * to aid future maintenance. | |
14 */ | |
15 | |
16 .text | |
17 .align 4 | |
18 .syntax unified | |
19 | |
20 .global arm_memset32 | |
21 .hidden arm_memset32 | |
22 .type arm_memset32, %function | |
23 .global arm_memset16 | |
24 .hidden arm_memset16 | |
25 .type arm_memset16, %function | |
26 | |
27 /* | |
28 * Optimized memset functions for ARM. | |
29 * | |
30 * void arm_memset16(uint16_t* dst, uint16_t value, int count); | |
31 * void arm_memset32(uint32_t* dst, uint32_t value, int count); | |
32 * | |
33 */ | |
34 arm_memset16: | |
35 .fnstart | |
36 push {lr} | |
37 | |
38 /* if count is equal to zero then abort */ | |
39 teq r2, #0 | |
40 ble .Lfinish | |
41 | |
42 /* Multiply count by 2 - go from the number of 16-bit shorts | |
43 * to the number of bytes desired. */ | |
44 mov r2, r2, lsl #1 | |
45 | |
46 /* expand the data to 32 bits */ | |
47 orr r1, r1, r1, lsl #16 | |
48 | |
49 /* align to 32 bits */ | |
50 tst r0, #2 | |
51 strhne r1, [r0], #2 | |
52 subne r2, r2, #2 | |
53 | |
54 /* Now jump into the main loop below. */ | |
55 b .Lwork_32 | |
56 .fnend | |
57 | |
58 arm_memset32: | |
59 .fnstart | |
60 push {lr} | |
61 | |
62 /* if count is equal to zero then abort */ | |
63 teq r2, #0 | |
64 ble .Lfinish | |
65 | |
66 /* Multiply count by 4 - go from the number of 32-bit words to | |
67 * the number of bytes desired. */ | |
68 mov r2, r2, lsl #2 | |
69 | |
70 .Lwork_32: | |
71 /* Set up registers ready for writing them out. */ | |
72 mov ip, r1 | |
73 mov lr, r1 | |
74 | |
75 /* Try to align the destination to a cache line. Assume 32 | |
76 * byte (8 word) cache lines, it's the common case. */ | |
77 rsb r3, r0, #0 | |
78 ands r3, r3, #0x1C | |
79 beq .Laligned32 | |
80 cmp r3, r2 | |
81 andhi r3, r2, #0x1C | |
82 sub r2, r2, r3 | |
83 | |
84 /* (Optionally) write any unaligned leading bytes. | |
85 * (0-28 bytes, length in r3) */ | |
86 movs r3, r3, lsl #28 | |
87 stmiacs r0!, {r1, lr} | |
88 stmiacs r0!, {r1, lr} | |
89 stmiami r0!, {r1, lr} | |
90 movs r3, r3, lsl #2 | |
91 strcs r1, [r0], #4 | |
92 | |
93 /* Now quickly loop through the cache-aligned data. */ | |
94 .Laligned32: | |
95 mov r3, r1 | |
96 1: subs r2, r2, #32 | |
97 stmiahs r0!, {r1,r3,ip,lr} | |
98 stmiahs r0!, {r1,r3,ip,lr} | |
99 bhs 1b | |
100 add r2, r2, #32 | |
101 | |
102 /* (Optionally) store any remaining trailing bytes. | |
103 * (0-30 bytes, length in r2) */ | |
104 movs r2, r2, lsl #28 | |
105 stmiacs r0!, {r1,r3,ip,lr} | |
106 stmiami r0!, {r1,lr} | |
107 movs r2, r2, lsl #2 | |
108 strcs r1, [r0], #4 | |
109 strhmi lr, [r0], #2 | |
110 | |
111 .Lfinish: | |
112 pop {pc} | |
113 .fnend | |
OLD | NEW |