| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright 2010 The Android Open Source Project | |
| 3 * | |
| 4 * Use of this source code is governed by a BSD-style license that can be | |
| 5 * found in the LICENSE file. | |
| 6 */ | |
| 7 | |
| 8 /* Changes: | |
| 9 * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com> | |
| 10 * Added small changes to the two functions to make them work on the | |
| 11 * specified number of 16- or 32-bit values rather than the original | |
| 12 * code which was specified as a count of bytes. More verbose comments | |
| 13 * to aid future maintenance. | |
| 14 */ | |
| 15 | |
| 16 .text | |
| 17 .align 4 | |
| 18 .syntax unified | |
| 19 | |
| 20 .global arm_memset32 | |
| 21 .hidden arm_memset32 | |
| 22 .type arm_memset32, %function | |
| 23 .global arm_memset16 | |
| 24 .hidden arm_memset16 | |
| 25 .type arm_memset16, %function | |
| 26 | |
| 27 /* | |
| 28 * Optimized memset functions for ARM. | |
| 29 * | |
| 30 * void arm_memset16(uint16_t* dst, uint16_t value, int count); | |
| 31 * void arm_memset32(uint32_t* dst, uint32_t value, int count); | |
| 32 * | |
| 33 */ | |
| 34 arm_memset16: | |
| 35 .fnstart | |
| 36 push {lr} | |
| 37 | |
| 38 /* if count is equal to zero then abort */ | |
| 39 teq r2, #0 | |
| 40 ble .Lfinish | |
| 41 | |
| 42 /* Multiply count by 2 - go from the number of 16-bit shorts | |
| 43 * to the number of bytes desired. */ | |
| 44 mov r2, r2, lsl #1 | |
| 45 | |
| 46 /* expand the data to 32 bits */ | |
| 47 orr r1, r1, r1, lsl #16 | |
| 48 | |
| 49 /* align to 32 bits */ | |
| 50 tst r0, #2 | |
| 51 strhne r1, [r0], #2 | |
| 52 subne r2, r2, #2 | |
| 53 | |
| 54 /* Now jump into the main loop below. */ | |
| 55 b .Lwork_32 | |
| 56 .fnend | |
| 57 | |
| 58 arm_memset32: | |
| 59 .fnstart | |
| 60 push {lr} | |
| 61 | |
| 62 /* if count is equal to zero then abort */ | |
| 63 teq r2, #0 | |
| 64 ble .Lfinish | |
| 65 | |
| 66 /* Multiply count by 4 - go from the number of 32-bit words to | |
| 67 * the number of bytes desired. */ | |
| 68 mov r2, r2, lsl #2 | |
| 69 | |
| 70 .Lwork_32: | |
| 71 /* Set up registers ready for writing them out. */ | |
| 72 mov ip, r1 | |
| 73 mov lr, r1 | |
| 74 | |
| 75 /* Try to align the destination to a cache line. Assume 32 | |
| 76 * byte (8 word) cache lines, it's the common case. */ | |
| 77 rsb r3, r0, #0 | |
| 78 ands r3, r3, #0x1C | |
| 79 beq .Laligned32 | |
| 80 cmp r3, r2 | |
| 81 andhi r3, r2, #0x1C | |
| 82 sub r2, r2, r3 | |
| 83 | |
| 84 /* (Optionally) write any unaligned leading bytes. | |
| 85 * (0-28 bytes, length in r3) */ | |
| 86 movs r3, r3, lsl #28 | |
| 87 stmiacs r0!, {r1, lr} | |
| 88 stmiacs r0!, {r1, lr} | |
| 89 stmiami r0!, {r1, lr} | |
| 90 movs r3, r3, lsl #2 | |
| 91 strcs r1, [r0], #4 | |
| 92 | |
| 93 /* Now quickly loop through the cache-aligned data. */ | |
| 94 .Laligned32: | |
| 95 mov r3, r1 | |
| 96 1: subs r2, r2, #32 | |
| 97 stmiahs r0!, {r1,r3,ip,lr} | |
| 98 stmiahs r0!, {r1,r3,ip,lr} | |
| 99 bhs 1b | |
| 100 add r2, r2, #32 | |
| 101 | |
| 102 /* (Optionally) store any remaining trailing bytes. | |
| 103 * (0-30 bytes, length in r2) */ | |
| 104 movs r2, r2, lsl #28 | |
| 105 stmiacs r0!, {r1,r3,ip,lr} | |
| 106 stmiami r0!, {r1,lr} | |
| 107 movs r2, r2, lsl #2 | |
| 108 strcs r1, [r0], #4 | |
| 109 strhmi lr, [r0], #2 | |
| 110 | |
| 111 .Lfinish: | |
| 112 pop {pc} | |
| 113 .fnend | |
| OLD | NEW |