Index: src/opts/memset.arm.S |
diff --git a/src/opts/memset.arm.S b/src/opts/memset.arm.S |
deleted file mode 100644 |
index 8c82f74922809ffe7ec7e14af3651bcf2919ed35..0000000000000000000000000000000000000000 |
--- a/src/opts/memset.arm.S |
+++ /dev/null |
@@ -1,113 +0,0 @@ |
-/* |
- * Copyright 2010 The Android Open Source Project |
- * |
- * Use of this source code is governed by a BSD-style license that can be |
- * found in the LICENSE file. |
- */ |
- |
-/* Changes: |
- * 2010-08-11 Steve McIntyre <steve.mcintyre@arm.com> |
- * Added small changes to the two functions to make them work on the |
- * specified number of 16- or 32-bit values rather than the original |
- * code which was specified as a count of bytes. More verbose comments |
- * to aid future maintenance. |
- */ |
- |
- .text |
- .align 4 |
- .syntax unified |
- |
- .global arm_memset32 |
- .hidden arm_memset32 |
- .type arm_memset32, %function |
- .global arm_memset16 |
- .hidden arm_memset16 |
- .type arm_memset16, %function |
- |
-/* |
- * Optimized memset functions for ARM. |
- * |
- * void arm_memset16(uint16_t* dst, uint16_t value, int count); |
- * void arm_memset32(uint32_t* dst, uint32_t value, int count); |
- * |
- */ |
-arm_memset16: |
- .fnstart |
- push {lr} |
- |
- /* if count is equal to zero then abort */ |
- teq r2, #0 |
- ble .Lfinish |
- |
- /* Multiply count by 2 - go from the number of 16-bit shorts |
- * to the number of bytes desired. */ |
- mov r2, r2, lsl #1 |
- |
- /* expand the data to 32 bits */ |
- orr r1, r1, r1, lsl #16 |
- |
- /* align to 32 bits */ |
- tst r0, #2 |
- strhne r1, [r0], #2 |
- subne r2, r2, #2 |
- |
- /* Now jump into the main loop below. */ |
- b .Lwork_32 |
- .fnend |
- |
-arm_memset32: |
- .fnstart |
- push {lr} |
- |
- /* if count is equal to zero then abort */ |
- teq r2, #0 |
- ble .Lfinish |
- |
- /* Multiply count by 4 - go from the number of 32-bit words to |
- * the number of bytes desired. */ |
- mov r2, r2, lsl #2 |
- |
-.Lwork_32: |
- /* Set up registers ready for writing them out. */ |
- mov ip, r1 |
- mov lr, r1 |
- |
- /* Try to align the destination to a cache line. Assume 32 |
- * byte (8 word) cache lines, it's the common case. */ |
- rsb r3, r0, #0 |
- ands r3, r3, #0x1C |
- beq .Laligned32 |
- cmp r3, r2 |
- andhi r3, r2, #0x1C |
- sub r2, r2, r3 |
- |
- /* (Optionally) write any unaligned leading bytes. |
- * (0-28 bytes, length in r3) */ |
- movs r3, r3, lsl #28 |
- stmiacs r0!, {r1, lr} |
- stmiacs r0!, {r1, lr} |
- stmiami r0!, {r1, lr} |
- movs r3, r3, lsl #2 |
- strcs r1, [r0], #4 |
- |
- /* Now quickly loop through the cache-aligned data. */ |
-.Laligned32: |
- mov r3, r1 |
-1: subs r2, r2, #32 |
- stmiahs r0!, {r1,r3,ip,lr} |
- stmiahs r0!, {r1,r3,ip,lr} |
- bhs 1b |
- add r2, r2, #32 |
- |
- /* (Optionally) store any remaining trailing bytes. |
- * (0-30 bytes, length in r2) */ |
- movs r2, r2, lsl #28 |
- stmiacs r0!, {r1,r3,ip,lr} |
- stmiami r0!, {r1,lr} |
- movs r2, r2, lsl #2 |
- strcs r1, [r0], #4 |
- strhmi lr, [r0], #2 |
- |
-.Lfinish: |
- pop {pc} |
- .fnend |