Index: src/ia32/codegen-ia32.cc |
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc |
index 7d8116dcb68c3f74a4e3433005f98b8e25097f41..e8b56f7b2f39dec63d7bfcc732188c4ab40b8853 100644 |
--- a/src/ia32/codegen-ia32.cc |
+++ b/src/ia32/codegen-ia32.cc |
@@ -13494,6 +13494,197 @@ void StringCompareStub::Generate(MacroAssembler* masm) { |
__ TailCallRuntime(Runtime::kStringCompare, 2, 1); |
} |
+ |
+MemCopyFunction CreateMemCopyFunction() { |
+ size_t actual_size; |
+ byte* buffer = static_cast<byte*>(OS::Allocate(Assembler::kMinimalBufferSize, |
+ &actual_size, |
+ true)); |
+ CHECK(buffer); |
+ HandleScope handles; |
+ MacroAssembler assembler(buffer, static_cast<int>(actual_size)); |
Erik Corry
2010/06/03 20:29:49
Might as well just call this 'masm'?
Lasse Reichstein
2010/06/04 11:52:13
True. Just have to redefine __ as well.
|
+ MacroAssembler* masm = &assembler; // For the __ macro. |
+ |
+ // Generated code is put into a fixed, unmovable, buffer, and not into |
+ // the V8 heap. We can't, and don't, refer to any relocatable addresses |
+ // (e.g. the JavaScript nan-object). |
+ |
+ // 32-bit C declaration function calls pass arguments on stack. |
+ |
+ // Stack layout: |
+ // esp[12]: Third argument, size. |
+ // esp[8]: Second argument, source pointer. |
+ // esp[4]: First argument, destination pointer. |
+ // esp[0]: return address |
+ |
+ const int kDestinationOffset = 1 * kPointerSize; |
+ const int kSourceOffset = 2 * kPointerSize; |
+ const int kSizeOffset = 3 * kPointerSize; |
+ |
+ int stack_offset = 0; // Update if we change the stack height. |
+ |
+ if (FLAG_debug_code) { |
+ __ cmp(Operand(esp, kSizeOffset + stack_offset), |
+ Immediate(kMinComplexMemCopy)); |
+ Label ok; |
+ __ j(greater_equal, &ok); |
+ __ int3(); |
+ __ bind(&ok); |
+ } |
+ if (CpuFeatures::IsSupported(SSE2)) { |
+ CpuFeatures::Scope enable(SSE2); |
+ __ push(edi); |
+ __ push(esi); |
+ stack_offset += 2 * kPointerSize; |
+ __ mov(edi, Operand(esp, stack_offset + kDestinationOffset)); |
+ __ mov(esi, Operand(esp, stack_offset + kSourceOffset)); |
+ __ mov(ecx, Operand(esp, stack_offset + kSizeOffset)); |
Erik Corry
2010/06/04 07:18:10
I think the clarity of this code would benefit fro
Lasse Reichstein
2010/06/04 11:52:13
Done.
|
+ |
+ __ movdqu(xmm0, Operand(esi, 0)); |
+ __ movdqu(Operand(edi, 0), xmm0); |
+ __ mov(edx, edi); |
+ __ and_(edx, 0x0F); |
Erik Corry
2010/06/04 07:18:10
0x0F -> 0xF
Lasse Reichstein
2010/06/04 11:52:13
Done.
|
+ __ neg(edx); |
+ __ add(Operand(edx), Immediate(16)); |
+ __ add(edi, Operand(edx)); |
+ __ add(esi, Operand(edx)); |
+ __ sub(Operand(ecx), edx); |
Erik Corry
2010/06/04 07:18:10
Where do the bytes you skipped over here get copie
Lasse Reichstein
2010/06/04 11:52:13
They were copied just before. I only increase src/
|
+ |
+ // edi is now aligned. Check if esi is also aligned. |
+ Label unaligned_source; |
+ __ test(Operand(esi), Immediate(0x0F)); |
+ __ j(not_zero, &unaligned_source); |
+ { |
+ __ IncrementCounter(&Counters::memcopy_aligned, 1); |
+ // Copy loop for aligned source and destination. |
+ __ mov(edx, ecx); |
+ __ shr(ecx, 5); |
+ { |
+ // Main copy loop. |
+ Label loop; |
+ __ bind(&loop); |
+ __ prefetch(Operand(esi, 0x20), 1); |
+ __ movdqa(xmm0, Operand(esi, 0x00)); |
Erik Corry
2010/06/04 07:18:10
Apart from the dqa/dqu this seems to be duplicated
Lasse Reichstein
2010/06/04 11:52:13
Correct.
This is the fast case code where source i
|
+ __ movdqa(xmm1, Operand(esi, 0x10)); |
+ __ add(Operand(esi), Immediate(0x20)); |
+ |
+ __ movdqa(Operand(edi, 0x00), xmm0); |
+ __ movdqa(Operand(edi, 0x10), xmm1); |
+ __ add(Operand(edi), Immediate(0x20)); |
+ |
+ __ dec(ecx); |
+ __ j(not_zero, &loop); |
+ } |
+ |
+ // At most 31 bytes to copy. |
Erik Corry
2010/06/04 07:18:10
This code seems to be duplicated lower down.
Lasse Reichstein
2010/06/04 11:52:13
Not identically, the second copy uses movdqu for t
|
+ Label move_less_16; |
+ __ test(Operand(edx), Immediate(0x10)); |
+ __ j(zero, &move_less_16); |
+ __ movdqa(xmm0, Operand(esi, 0)); |
+ __ add(Operand(esi), Immediate(0x10)); |
+ __ movdqa(Operand(edi, 0), xmm0); |
+ __ add(Operand(edi), Immediate(0x10)); |
+ __ bind(&move_less_16); |
+ |
+ // At most 15 bytes to copy. Copy 16 bytes at end of string. |
+ __ and_(edx, 0x0F); |
+ __ movdqu(xmm0, Operand(esi, edx, times_1, -16)); |
+ __ movdqu(Operand(edi, edx, times_1, -16), xmm0); |
+ |
+ __ pop(esi); |
+ __ pop(edi); |
+ __ ret(0); |
+ } |
+ __ Align(16); |
+ { |
+ // Copy loop for unaligned source and aligned destination. |
+ // If source is not aligned, we can't read it as efficiently. |
+ __ bind(&unaligned_source); |
+ __ IncrementCounter(&Counters::memcopy_unaligned, 1); |
+ __ mov(edx, ecx); |
+ __ shr(ecx, 5); |
+ { |
+ // Main copy loop |
+ Label loop; |
+ __ bind(&loop); |
+ __ prefetch(Operand(esi, 0x20), 1); |
+ __ movdqu(xmm0, Operand(esi, 0x00)); |
+ __ movdqu(xmm1, Operand(esi, 0x10)); |
+ __ add(Operand(esi), Immediate(0x20)); |
+ |
+ __ movdqa(Operand(edi, 0x00), xmm0); |
+ __ movdqa(Operand(edi, 0x10), xmm1); |
+ __ add(Operand(edi), Immediate(0x20)); |
+ |
+ __ dec(ecx); |
+ __ j(not_zero, &loop); |
+ } |
+ |
+ // At most 31 bytes to copy. |
+ Label move_less_16; |
+ __ test(Operand(edx), Immediate(0x10)); |
+ __ j(zero, &move_less_16); |
+ __ movdqu(xmm0, Operand(esi, 0)); |
+ __ add(Operand(esi), Immediate(0x10)); |
+ __ movdqa(Operand(edi, 0), xmm0); |
+ __ add(Operand(edi), Immediate(0x10)); |
+ __ bind(&move_less_16); |
+ |
+ // At most 15 bytes to copy. Copy 16 bytes at end of string. |
+ __ and_(edx, 0x0F); |
+ __ movdqu(xmm0, Operand(esi, edx, times_1, -0x10)); |
+ __ movdqu(Operand(edi, edx, times_1, -0x10), xmm0); |
+ |
+ __ pop(esi); |
+ __ pop(edi); |
+ __ ret(0); |
+ } |
+ |
+ } else { |
+ __ IncrementCounter(&Counters::memcopy_noxmm, 1); |
+ // SSE2 not supported. Unlikely to happen in practice. |
+ __ push(edi); |
+ __ push(esi); |
+ stack_offset += 2 * kPointerSize; |
+ __ cld(); |
+ __ mov(edi, Operand(esp, stack_offset + kDestinationOffset)); |
+ __ mov(esi, Operand(esp, stack_offset + kSourceOffset)); |
+ __ mov(ecx, Operand(esp, stack_offset + kSizeOffset)); |
+ |
+ // Copy the first word. |
+ __ mov(eax, Operand(esi, 0)); |
+ __ mov(Operand(edi, 0), eax); |
+ |
+ // Increment esi,edi so that edi is aligned. |
+ __ mov(edx, edi); |
+ __ and_(edx, 0x03); |
+ __ neg(edx); |
+ __ add(Operand(edx), Immediate(4)); // edx = 4 - (edi & 3) |
+ __ add(edi, Operand(edx)); |
+ __ add(esi, Operand(edx)); |
+ __ sub(Operand(ecx), edx); |
+ // edi is now aligned, ecx holds number of remaning bytes to copy. |
+ __ mov(edx, ecx); |
+ __ shr(ecx, 2); // Make word count instead of byte count. |
+ |
+ __ rep_movs(); |
+ |
+ // At most 3 bytes left to copy. Copy 4 bytes at end of string. |
+ __ and_(edx, 3); |
+ __ mov(eax, Operand(esi, edx, times_1, -4)); |
+ __ mov(Operand(edi, edx, times_1, -4), eax); |
+ |
+ __ pop(esi); |
+ __ pop(edi); |
+ __ ret(0); |
+ } |
+ |
+ CodeDesc desc; |
+ assembler.GetCode(&desc); |
+ // Call the function from C++. |
+ return FUNCTION_CAST<MemCopyFunction>(buffer); |
+} |
+ |
#undef __ |
} } // namespace v8::internal |