Index: src/ia32/codegen-ia32.cc |
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc |
index 550c83d51adad7225b995218a43e0ccf2c5327dc..fc02458753613386c96680bf5e53ffefa6d5611f 100644 |
--- a/src/ia32/codegen-ia32.cc |
+++ b/src/ia32/codegen-ia32.cc |
@@ -173,21 +173,94 @@ UnaryMathFunction CreateSqrtFunction() { |
} |
-static void MemCopyWrapper(void* dest, const void* src, size_t size) { |
- memcpy(dest, src, size); |
+// Helper functions for CreateMemMoveFunction. |
+#undef __ |
+#define __ ACCESS_MASM(masm) |
+ |
+// Keep around global pointers to these objects so that Valgrind won't complain. |
+static size_t* medium_handlers = NULL; |
+static size_t* small_handlers = NULL; |
+ |
+ |
+enum Direction { FORWARD, BACKWARD }; |
+enum Alignment { ALIGNED, UNALIGNED }; |
+ |
+// Expects registers: |
+// esi - source, aligned if alignment == ALIGNED |
+// edi - destination, always aligned |
+// ecx - count (copy size in bytes) |
+// edx - loop count (number of 64 byte chunks) |
+void MemMoveEmitMainLoop(MacroAssembler* masm, |
+ Label* move_last_15, |
+ Direction direction, |
+ Alignment alignment) { |
+ Register src = esi; |
+ Register dst = edi; |
+ Register count = ecx; |
+ Register loop_count = edx; |
+ Label loop, move_last_31, move_last_63; |
+ __ cmp(loop_count, 0); |
+ __ j(equal, &move_last_63); |
+ __ bind(&loop); |
+ // Main loop. Copy in 64 byte chunks. |
+ if (direction == BACKWARD) __ sub(src, Immediate(0x40)); |
+ __ movdq(alignment == ALIGNED, xmm0, Operand(src, 0x00)); |
+ __ movdq(alignment == ALIGNED, xmm1, Operand(src, 0x10)); |
+ __ movdq(alignment == ALIGNED, xmm2, Operand(src, 0x20)); |
+ __ movdq(alignment == ALIGNED, xmm3, Operand(src, 0x30)); |
+ if (direction == FORWARD) __ add(src, Immediate(0x40)); |
+ if (direction == BACKWARD) __ sub(dst, Immediate(0x40)); |
+ __ movdqa(Operand(dst, 0x00), xmm0); |
+ __ movdqa(Operand(dst, 0x10), xmm1); |
+ __ movdqa(Operand(dst, 0x20), xmm2); |
+ __ movdqa(Operand(dst, 0x30), xmm3); |
+ if (direction == FORWARD) __ add(dst, Immediate(0x40)); |
+ __ dec(loop_count); |
+ __ j(not_zero, &loop); |
+ // At most 63 bytes left to copy. |
+ __ bind(&move_last_63); |
+ __ test(count, Immediate(0x20)); |
+ __ j(zero, &move_last_31); |
+ if (direction == BACKWARD) __ sub(src, Immediate(0x20)); |
+ __ movdq(alignment == ALIGNED, xmm0, Operand(src, 0x00)); |
+ __ movdq(alignment == ALIGNED, xmm1, Operand(src, 0x10)); |
+ if (direction == FORWARD) __ add(src, Immediate(0x20)); |
+ if (direction == BACKWARD) __ sub(dst, Immediate(0x20)); |
+ __ movdqa(Operand(dst, 0x00), xmm0); |
+ __ movdqa(Operand(dst, 0x10), xmm1); |
+ if (direction == FORWARD) __ add(dst, Immediate(0x20)); |
+ // At most 31 bytes left to copy. |
+ __ bind(&move_last_31); |
+ __ test(count, Immediate(0x10)); |
+ __ j(zero, move_last_15); |
+ if (direction == BACKWARD) __ sub(src, Immediate(0x10)); |
+ __ movdq(alignment == ALIGNED, xmm0, Operand(src, 0)); |
+ if (direction == FORWARD) __ add(src, Immediate(0x10)); |
+ if (direction == BACKWARD) __ sub(dst, Immediate(0x10)); |
+ __ movdqa(Operand(dst, 0), xmm0); |
+ if (direction == FORWARD) __ add(dst, Immediate(0x10)); |
+} |
+ |
+ |
+void MemMoveEmitPopAndReturn(MacroAssembler* masm) { |
+ __ pop(esi); |
+ __ pop(edi); |
+ __ ret(0); |
} |
-OS::MemCopyFunction CreateMemCopyFunction() { |
+#undef __ |
+#define __ masm. |
+ |
+ |
+OS::MemMoveFunction CreateMemMoveFunction() { |
size_t actual_size; |
// Allocate buffer in executable space. |
- byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, |
- &actual_size, |
- true)); |
- if (buffer == NULL) return &MemCopyWrapper; |
+ byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true)); |
+ if (buffer == NULL) return NULL; |
MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); |
- // Generated code is put into a fixed, unmovable, buffer, and not into |
+ // Generated code is put into a fixed, unmovable buffer, and not into |
// the V8 heap. We can't, and don't, refer to any relocatable addresses |
// (e.g. the JavaScript nan-object). |
@@ -203,185 +276,367 @@ OS::MemCopyFunction CreateMemCopyFunction() { |
const int kSourceOffset = 2 * kPointerSize; |
const int kSizeOffset = 3 * kPointerSize; |
+ // When copying up to this many bytes, use special "small" handlers. |
+ const size_t kSmallCopySize = 8; |
+ // When copying up to this many bytes, use special "medium" handlers. |
+ const size_t kMediumCopySize = 63; |
+ // When non-overlapping region of src and dst is less than this, |
+ // use a more careful implementation (slightly slower). |
+ const size_t kMinMoveDistance = 16; |
+ // Note that these values are dictated by the implementation below, |
+ // do not just change them and hope things will work! |
+ |
int stack_offset = 0; // Update if we change the stack height. |
- if (FLAG_debug_code) { |
- __ cmp(Operand(esp, kSizeOffset + stack_offset), |
- Immediate(OS::kMinComplexMemCopy)); |
- Label ok; |
- __ j(greater_equal, &ok); |
- __ int3(); |
- __ bind(&ok); |
- } |
+ Label backward, backward_much_overlap; |
+ Label forward_much_overlap, small_size, medium_size, pop_and_return; |
+ __ push(edi); |
+ __ push(esi); |
+ stack_offset += 2 * kPointerSize; |
+ Register dst = edi; |
+ Register src = esi; |
+ Register count = ecx; |
+ Register loop_count = edx; |
+ __ mov(dst, Operand(esp, stack_offset + kDestinationOffset)); |
+ __ mov(src, Operand(esp, stack_offset + kSourceOffset)); |
+ __ mov(count, Operand(esp, stack_offset + kSizeOffset)); |
+ |
+ __ cmp(dst, src); |
+ __ j(equal, &pop_and_return); |
+ |
if (CpuFeatures::IsSupported(SSE2)) { |
- CpuFeatureScope enable(&masm, SSE2); |
- __ push(edi); |
- __ push(esi); |
- stack_offset += 2 * kPointerSize; |
- Register dst = edi; |
- Register src = esi; |
- Register count = ecx; |
- __ mov(dst, Operand(esp, stack_offset + kDestinationOffset)); |
- __ mov(src, Operand(esp, stack_offset + kSourceOffset)); |
- __ mov(count, Operand(esp, stack_offset + kSizeOffset)); |
- |
- |
- __ movdqu(xmm0, Operand(src, 0)); |
- __ movdqu(Operand(dst, 0), xmm0); |
- __ mov(edx, dst); |
- __ and_(edx, 0xF); |
- __ neg(edx); |
- __ add(edx, Immediate(16)); |
- __ add(dst, edx); |
- __ add(src, edx); |
- __ sub(count, edx); |
- |
- // edi is now aligned. Check if esi is also aligned. |
- Label unaligned_source; |
- __ test(src, Immediate(0x0F)); |
- __ j(not_zero, &unaligned_source); |
+ CpuFeatureScope sse2_scope(&masm, SSE2); |
+ __ prefetch(Operand(src, 0), 1); |
+ __ cmp(count, kSmallCopySize); |
+ __ j(below_equal, &small_size); |
+ __ cmp(count, kMediumCopySize); |
+ __ j(below_equal, &medium_size); |
+ __ cmp(dst, src); |
+ __ j(above, &backward); |
+ |
{ |
+ // |dst| is a lower address than |src|. Copy front-to-back. |
+ Label unaligned_source, move_last_15, skip_last_move; |
+ __ mov(eax, src); |
+ __ sub(eax, dst); |
+ __ cmp(eax, kMinMoveDistance); |
+ __ j(below, &forward_much_overlap); |
+ // Copy first 16 bytes. |
+ __ movdqu(xmm0, Operand(src, 0)); |
+ __ movdqu(Operand(dst, 0), xmm0); |
+ // Determine distance to alignment: 16 - (dst & 0xF). |
+ __ mov(edx, dst); |
+ __ and_(edx, 0xF); |
+ __ neg(edx); |
+ __ add(edx, Immediate(16)); |
+ __ add(dst, edx); |
+ __ add(src, edx); |
+ __ sub(count, edx); |
+ // dst is now aligned. Main copy loop. |
+ __ mov(loop_count, count); |
+ __ shr(loop_count, 6); |
+ // Check if src is also aligned. |
+ __ test(src, Immediate(0xF)); |
+ __ j(not_zero, &unaligned_source); |
// Copy loop for aligned source and destination. |
- __ mov(edx, count); |
- Register loop_count = ecx; |
- Register count = edx; |
- __ shr(loop_count, 5); |
- { |
- // Main copy loop. |
- Label loop; |
- __ bind(&loop); |
- __ prefetch(Operand(src, 0x20), 1); |
- __ movdqa(xmm0, Operand(src, 0x00)); |
- __ movdqa(xmm1, Operand(src, 0x10)); |
- __ add(src, Immediate(0x20)); |
- |
- __ movdqa(Operand(dst, 0x00), xmm0); |
- __ movdqa(Operand(dst, 0x10), xmm1); |
- __ add(dst, Immediate(0x20)); |
- |
- __ dec(loop_count); |
- __ j(not_zero, &loop); |
- } |
- |
- // At most 31 bytes to copy. |
- Label move_less_16; |
- __ test(count, Immediate(0x10)); |
- __ j(zero, &move_less_16); |
- __ movdqa(xmm0, Operand(src, 0)); |
- __ add(src, Immediate(0x10)); |
- __ movdqa(Operand(dst, 0), xmm0); |
- __ add(dst, Immediate(0x10)); |
- __ bind(&move_less_16); |
- |
+ MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, ALIGNED); |
// At most 15 bytes to copy. Copy 16 bytes at end of string. |
+ __ bind(&move_last_15); |
__ and_(count, 0xF); |
+ __ j(zero, &skip_last_move, Label::kNear); |
__ movdqu(xmm0, Operand(src, count, times_1, -0x10)); |
__ movdqu(Operand(dst, count, times_1, -0x10), xmm0); |
+ __ bind(&skip_last_move); |
+ MemMoveEmitPopAndReturn(&masm); |
- __ mov(eax, Operand(esp, stack_offset + kDestinationOffset)); |
- __ pop(esi); |
- __ pop(edi); |
- __ ret(0); |
- } |
- __ Align(16); |
- { |
// Copy loop for unaligned source and aligned destination. |
- // If source is not aligned, we can't read it as efficiently. |
__ bind(&unaligned_source); |
- __ mov(edx, ecx); |
- Register loop_count = ecx; |
- Register count = edx; |
- __ shr(loop_count, 5); |
- { |
- // Main copy loop |
- Label loop; |
- __ bind(&loop); |
- __ prefetch(Operand(src, 0x20), 1); |
- __ movdqu(xmm0, Operand(src, 0x00)); |
- __ movdqu(xmm1, Operand(src, 0x10)); |
- __ add(src, Immediate(0x20)); |
- |
- __ movdqa(Operand(dst, 0x00), xmm0); |
- __ movdqa(Operand(dst, 0x10), xmm1); |
- __ add(dst, Immediate(0x20)); |
- |
- __ dec(loop_count); |
- __ j(not_zero, &loop); |
- } |
+ MemMoveEmitMainLoop(&masm, &move_last_15, FORWARD, UNALIGNED); |
+ __ jmp(&move_last_15); |
+ |
+ // Less than kMinMoveDistance offset between dst and src. |
+ Label loop_until_aligned, last_15_much_overlap; |
+ __ bind(&loop_until_aligned); |
+ __ mov_b(eax, Operand(src, 0)); |
+ __ inc(src); |
+ __ mov_b(Operand(dst, 0), eax); |
+ __ inc(dst); |
+ __ dec(count); |
+ __ bind(&forward_much_overlap); // Entry point into this block. |
+ __ test(dst, Immediate(0xF)); |
+ __ j(not_zero, &loop_until_aligned); |
+ // dst is now aligned, src can't be. Main copy loop. |
+ __ mov(loop_count, count); |
+ __ shr(loop_count, 6); |
+ MemMoveEmitMainLoop(&masm, &last_15_much_overlap, FORWARD, UNALIGNED); |
+ __ bind(&last_15_much_overlap); |
+ __ and_(count, 0xF); |
+ __ j(zero, &pop_and_return); |
+ __ cmp(count, kSmallCopySize); |
+ __ j(below_equal, &small_size); |
+ __ jmp(&medium_size); |
+ } |
- // At most 31 bytes to copy. |
- Label move_less_16; |
- __ test(count, Immediate(0x10)); |
- __ j(zero, &move_less_16); |
+ { |
+ // |dst| is a higher address than |src|. Copy backwards. |
+ Label unaligned_source, move_first_15, skip_last_move; |
+ __ bind(&backward); |
+ // |dst| and |src| always point to the end of what's left to copy. |
+ __ add(dst, count); |
+ __ add(src, count); |
+ __ mov(eax, dst); |
+ __ sub(eax, src); |
+ __ cmp(eax, kMinMoveDistance); |
+ __ j(below, &backward_much_overlap); |
+ // Copy last 16 bytes. |
+ __ movdqu(xmm0, Operand(src, -0x10)); |
+ __ movdqu(Operand(dst, -0x10), xmm0); |
+ // Find distance to alignment: dst & 0xF |
+ __ mov(edx, dst); |
+ __ and_(edx, 0xF); |
+ __ sub(dst, edx); |
+ __ sub(src, edx); |
+ __ sub(count, edx); |
+ // dst is now aligned. Main copy loop. |
+ __ mov(loop_count, count); |
+ __ shr(loop_count, 6); |
+ // Check if src is also aligned. |
+ __ test(src, Immediate(0xF)); |
+ __ j(not_zero, &unaligned_source); |
+ // Copy loop for aligned source and destination. |
+ MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, ALIGNED); |
+ // At most 15 bytes to copy. Copy 16 bytes at beginning of string. |
+ __ bind(&move_first_15); |
+ __ and_(count, 0xF); |
+ __ j(zero, &skip_last_move, Label::kNear); |
+ __ sub(src, count); |
+ __ sub(dst, count); |
__ movdqu(xmm0, Operand(src, 0)); |
- __ add(src, Immediate(0x10)); |
- __ movdqa(Operand(dst, 0), xmm0); |
- __ add(dst, Immediate(0x10)); |
- __ bind(&move_less_16); |
- |
- // At most 15 bytes to copy. Copy 16 bytes at end of string. |
- __ and_(count, 0x0F); |
- __ movdqu(xmm0, Operand(src, count, times_1, -0x10)); |
- __ movdqu(Operand(dst, count, times_1, -0x10), xmm0); |
+ __ movdqu(Operand(dst, 0), xmm0); |
+ __ bind(&skip_last_move); |
+ MemMoveEmitPopAndReturn(&masm); |
- __ mov(eax, Operand(esp, stack_offset + kDestinationOffset)); |
- __ pop(esi); |
- __ pop(edi); |
- __ ret(0); |
+ // Copy loop for unaligned source and aligned destination. |
+ __ bind(&unaligned_source); |
+ MemMoveEmitMainLoop(&masm, &move_first_15, BACKWARD, UNALIGNED); |
+ __ jmp(&move_first_15); |
+ |
+ // Less than kMinMoveDistance offset between dst and src. |
+ Label loop_until_aligned, first_15_much_overlap; |
+ __ bind(&loop_until_aligned); |
+ __ dec(src); |
+ __ dec(dst); |
+ __ mov_b(eax, Operand(src, 0)); |
+ __ mov_b(Operand(dst, 0), eax); |
+ __ dec(count); |
+ __ bind(&backward_much_overlap); // Entry point into this block. |
+ __ test(dst, Immediate(0xF)); |
+ __ j(not_zero, &loop_until_aligned); |
+ // dst is now aligned, src can't be. Main copy loop. |
+ __ mov(loop_count, count); |
+ __ shr(loop_count, 6); |
+ MemMoveEmitMainLoop(&masm, &first_15_much_overlap, BACKWARD, UNALIGNED); |
+ __ bind(&first_15_much_overlap); |
+ __ and_(count, 0xF); |
+ __ j(zero, &pop_and_return); |
+ // Small/medium handlers expect dst/src to point to the beginning. |
+ __ sub(dst, count); |
+ __ sub(src, count); |
+ __ cmp(count, kSmallCopySize); |
+ __ j(below_equal, &small_size); |
+ __ jmp(&medium_size); |
+ } |
+ { |
+ // Special handlers for 9 <= copy_size < 64. No assumptions about |
+ // alignment or move distance, so all reads must be unaligned and |
+ // must happen before any writes. |
+ Label f9_16, f17_32, f33_48, f49_63; |
+ |
+ __ bind(&f9_16); |
+ __ movdbl(xmm0, Operand(src, 0)); |
+ __ movdbl(xmm1, Operand(src, count, times_1, -8)); |
+ __ movdbl(Operand(dst, 0), xmm0); |
+ __ movdbl(Operand(dst, count, times_1, -8), xmm1); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f17_32); |
+ __ movdqu(xmm0, Operand(src, 0)); |
+ __ movdqu(xmm1, Operand(src, count, times_1, -0x10)); |
+ __ movdqu(Operand(dst, 0x00), xmm0); |
+ __ movdqu(Operand(dst, count, times_1, -0x10), xmm1); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f33_48); |
+ __ movdqu(xmm0, Operand(src, 0x00)); |
+ __ movdqu(xmm1, Operand(src, 0x10)); |
+ __ movdqu(xmm2, Operand(src, count, times_1, -0x10)); |
+ __ movdqu(Operand(dst, 0x00), xmm0); |
+ __ movdqu(Operand(dst, 0x10), xmm1); |
+ __ movdqu(Operand(dst, count, times_1, -0x10), xmm2); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f49_63); |
+ __ movdqu(xmm0, Operand(src, 0x00)); |
+ __ movdqu(xmm1, Operand(src, 0x10)); |
+ __ movdqu(xmm2, Operand(src, 0x20)); |
+ __ movdqu(xmm3, Operand(src, count, times_1, -0x10)); |
+ __ movdqu(Operand(dst, 0x00), xmm0); |
+ __ movdqu(Operand(dst, 0x10), xmm1); |
+ __ movdqu(Operand(dst, 0x20), xmm2); |
+ __ movdqu(Operand(dst, count, times_1, -0x10), xmm3); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ medium_handlers = new size_t[4]; |
+ medium_handlers[0] = reinterpret_cast<intptr_t>(buffer) + f9_16.pos(); |
+ medium_handlers[1] = reinterpret_cast<intptr_t>(buffer) + f17_32.pos(); |
+ medium_handlers[2] = reinterpret_cast<intptr_t>(buffer) + f33_48.pos(); |
+ medium_handlers[3] = reinterpret_cast<intptr_t>(buffer) + f49_63.pos(); |
+ |
+ __ bind(&medium_size); // Entry point into this block. |
+ __ mov(eax, count); |
+ __ dec(eax); |
+ __ shr(eax, 4); |
+ if (FLAG_debug_code) { |
+ Label ok; |
+ __ cmp(eax, 3); |
+ __ j(below_equal, &ok); |
+ __ int3(); |
+ __ bind(&ok); |
+ } |
+ __ mov(eax, Operand(eax, times_4, |
+ reinterpret_cast<intptr_t>(medium_handlers))); |
+ __ jmp(eax); |
+ } |
+ { |
+ // Specialized copiers for copy_size <= 8 bytes. |
+ Label f0, f1, f2, f3, f4, f5_8; |
+ __ bind(&f0); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f1); |
+ __ mov_b(eax, Operand(src, 0)); |
+ __ mov_b(Operand(dst, 0), eax); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f2); |
+ __ mov_w(eax, Operand(src, 0)); |
+ __ mov_w(Operand(dst, 0), eax); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f3); |
+ __ mov_w(eax, Operand(src, 0)); |
+ __ mov_b(edx, Operand(src, 2)); |
+ __ mov_w(Operand(dst, 0), eax); |
+ __ mov_b(Operand(dst, 2), edx); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f4); |
+ __ mov(eax, Operand(src, 0)); |
+ __ mov(Operand(dst, 0), eax); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ __ bind(&f5_8); |
+ __ mov(eax, Operand(src, 0)); |
+ __ mov(edx, Operand(src, count, times_1, -4)); |
+ __ mov(Operand(dst, 0), eax); |
+ __ mov(Operand(dst, count, times_1, -4), edx); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
+ small_handlers = new size_t[9]; |
+ small_handlers[0] = reinterpret_cast<intptr_t>(buffer) + f0.pos(); |
+ small_handlers[1] = reinterpret_cast<intptr_t>(buffer) + f1.pos(); |
+ small_handlers[2] = reinterpret_cast<intptr_t>(buffer) + f2.pos(); |
+ small_handlers[3] = reinterpret_cast<intptr_t>(buffer) + f3.pos(); |
+ small_handlers[4] = reinterpret_cast<intptr_t>(buffer) + f4.pos(); |
+ small_handlers[5] = reinterpret_cast<intptr_t>(buffer) + f5_8.pos(); |
+ small_handlers[6] = reinterpret_cast<intptr_t>(buffer) + f5_8.pos(); |
+ small_handlers[7] = reinterpret_cast<intptr_t>(buffer) + f5_8.pos(); |
+ small_handlers[8] = reinterpret_cast<intptr_t>(buffer) + f5_8.pos(); |
+ |
+ __ bind(&small_size); // Entry point into this block. |
+ if (FLAG_debug_code) { |
+ Label ok; |
+ __ cmp(count, 8); |
+ __ j(below_equal, &ok); |
+ __ int3(); |
+ __ bind(&ok); |
+ } |
+ __ mov(eax, Operand(count, times_4, |
+ reinterpret_cast<intptr_t>(small_handlers))); |
+ __ jmp(eax); |
} |
- |
} else { |
- // SSE2 not supported. Unlikely to happen in practice. |
- __ push(edi); |
- __ push(esi); |
- stack_offset += 2 * kPointerSize; |
- __ cld(); |
- Register dst = edi; |
- Register src = esi; |
- Register count = ecx; |
- __ mov(dst, Operand(esp, stack_offset + kDestinationOffset)); |
- __ mov(src, Operand(esp, stack_offset + kSourceOffset)); |
- __ mov(count, Operand(esp, stack_offset + kSizeOffset)); |
- |
- // Copy the first word. |
- __ mov(eax, Operand(src, 0)); |
- __ mov(Operand(dst, 0), eax); |
- |
- // Increment src,dstso that dst is aligned. |
- __ mov(edx, dst); |
- __ and_(edx, 0x03); |
- __ neg(edx); |
- __ add(edx, Immediate(4)); // edx = 4 - (dst & 3) |
- __ add(dst, edx); |
- __ add(src, edx); |
- __ sub(count, edx); |
- // edi is now aligned, ecx holds number of remaning bytes to copy. |
- |
- __ mov(edx, count); |
- count = edx; |
- __ shr(ecx, 2); // Make word count instead of byte count. |
- __ rep_movs(); |
- |
- // At most 3 bytes left to copy. Copy 4 bytes at end of string. |
- __ and_(count, 3); |
- __ mov(eax, Operand(src, count, times_1, -4)); |
- __ mov(Operand(dst, count, times_1, -4), eax); |
- |
- __ mov(eax, Operand(esp, stack_offset + kDestinationOffset)); |
- __ pop(esi); |
- __ pop(edi); |
- __ ret(0); |
+ // No SSE2. |
+ Label forward; |
+ __ cmp(count, 0); |
+ __ j(equal, &pop_and_return); |
+ __ cmp(dst, src); |
+ __ j(above, &backward); |
+ __ jmp(&forward); |
+ { |
+ // Simple forward copier. |
+ Label forward_loop_1byte, forward_loop_4byte; |
+ __ bind(&forward_loop_4byte); |
+ __ mov(eax, Operand(src, 0)); |
+ __ sub(count, Immediate(4)); |
+ __ add(src, Immediate(4)); |
+ __ mov(Operand(dst, 0), eax); |
+ __ add(dst, Immediate(4)); |
+ __ bind(&forward); // Entry point. |
+ __ cmp(count, 3); |
+ __ j(above, &forward_loop_4byte); |
+ __ bind(&forward_loop_1byte); |
+ __ cmp(count, 0); |
+ __ j(below_equal, &pop_and_return); |
+ __ mov_b(eax, Operand(src, 0)); |
+ __ dec(count); |
+ __ inc(src); |
+ __ mov_b(Operand(dst, 0), eax); |
+ __ inc(dst); |
+ __ jmp(&forward_loop_1byte); |
+ } |
+ { |
+ // Simple backward copier. |
+ Label backward_loop_1byte, backward_loop_4byte, entry_shortcut; |
+ __ bind(&backward); |
+ __ add(src, count); |
+ __ add(dst, count); |
+ __ cmp(count, 3); |
+ __ j(below_equal, &entry_shortcut); |
+ |
+ __ bind(&backward_loop_4byte); |
+ __ sub(src, Immediate(4)); |
+ __ sub(count, Immediate(4)); |
+ __ mov(eax, Operand(src, 0)); |
+ __ sub(dst, Immediate(4)); |
+ __ mov(Operand(dst, 0), eax); |
+ __ cmp(count, 3); |
+ __ j(above, &backward_loop_4byte); |
+ __ bind(&backward_loop_1byte); |
+ __ cmp(count, 0); |
+ __ j(below_equal, &pop_and_return); |
+ __ bind(&entry_shortcut); |
+ __ dec(src); |
+ __ dec(count); |
+ __ mov_b(eax, Operand(src, 0)); |
+ __ dec(dst); |
+ __ mov_b(Operand(dst, 0), eax); |
+ __ jmp(&backward_loop_1byte); |
+ } |
} |
+ __ bind(&pop_and_return); |
+ MemMoveEmitPopAndReturn(&masm); |
+ |
CodeDesc desc; |
masm.GetCode(&desc); |
ASSERT(!RelocInfo::RequiresRelocation(desc)); |
- |
CPU::FlushICache(buffer, actual_size); |
OS::ProtectCode(buffer, actual_size); |
- return FUNCTION_CAST<OS::MemCopyFunction>(buffer); |
+ return FUNCTION_CAST<OS::MemMoveFunction>(buffer); |
} |
+ |
#undef __ |
// ------------------------------------------------------------------------- |