Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(356)

Unified Diff: src/ia32/codegen-ia32.cc

Issue 2582001: Add optimized version of memcpy on ia32. (Closed)
Patch Set: Created 10 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/ia32/codegen-ia32.cc
diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc
index 7d8116dcb68c3f74a4e3433005f98b8e25097f41..e8b56f7b2f39dec63d7bfcc732188c4ab40b8853 100644
--- a/src/ia32/codegen-ia32.cc
+++ b/src/ia32/codegen-ia32.cc
@@ -13494,6 +13494,197 @@ void StringCompareStub::Generate(MacroAssembler* masm) {
__ TailCallRuntime(Runtime::kStringCompare, 2, 1);
}
+
+MemCopyFunction CreateMemCopyFunction() {
+ size_t actual_size;
+ byte* buffer = static_cast<byte*>(OS::Allocate(Assembler::kMinimalBufferSize,
+ &actual_size,
+ true));
+ CHECK(buffer);
+ HandleScope handles;
+ MacroAssembler assembler(buffer, static_cast<int>(actual_size));
Erik Corry 2010/06/03 20:29:49 Might as well just call this 'masm'?
Lasse Reichstein 2010/06/04 11:52:13 True. Just have to redefine __ as well.
+ MacroAssembler* masm = &assembler; // For the __ macro.
+
+ // Generated code is put into a fixed, unmovable, buffer, and not into
+ // the V8 heap. We can't, and don't, refer to any relocatable addresses
+ // (e.g. the JavaScript nan-object).
+
+ // 32-bit C declaration function calls pass arguments on stack.
+
+ // Stack layout:
+ // esp[12]: Third argument, size.
+ // esp[8]: Second argument, source pointer.
+ // esp[4]: First argument, destination pointer.
+ // esp[0]: return address
+
+ const int kDestinationOffset = 1 * kPointerSize;
+ const int kSourceOffset = 2 * kPointerSize;
+ const int kSizeOffset = 3 * kPointerSize;
+
+ int stack_offset = 0; // Update if we change the stack height.
+
+ if (FLAG_debug_code) {
+ __ cmp(Operand(esp, kSizeOffset + stack_offset),
+ Immediate(kMinComplexMemCopy));
+ Label ok;
+ __ j(greater_equal, &ok);
+ __ int3();
+ __ bind(&ok);
+ }
+ if (CpuFeatures::IsSupported(SSE2)) {
+ CpuFeatures::Scope enable(SSE2);
+ __ push(edi);
+ __ push(esi);
+ stack_offset += 2 * kPointerSize;
+ __ mov(edi, Operand(esp, stack_offset + kDestinationOffset));
+ __ mov(esi, Operand(esp, stack_offset + kSourceOffset));
+ __ mov(ecx, Operand(esp, stack_offset + kSizeOffset));
Erik Corry 2010/06/04 07:18:10 I think the clarity of this code would benefit fro
Lasse Reichstein 2010/06/04 11:52:13 Done.
+
+ __ movdqu(xmm0, Operand(esi, 0));
+ __ movdqu(Operand(edi, 0), xmm0);
+ __ mov(edx, edi);
+ __ and_(edx, 0x0F);
Erik Corry 2010/06/04 07:18:10 0x0F -> 0xF
Lasse Reichstein 2010/06/04 11:52:13 Done.
+ __ neg(edx);
+ __ add(Operand(edx), Immediate(16));
+ __ add(edi, Operand(edx));
+ __ add(esi, Operand(edx));
+ __ sub(Operand(ecx), edx);
Erik Corry 2010/06/04 07:18:10 Where do the bytes you skipped over here get copie
Lasse Reichstein 2010/06/04 11:52:13 They were copied just before. I only increase src/
+
+ // edi is now aligned. Check if esi is also aligned.
+ Label unaligned_source;
+ __ test(Operand(esi), Immediate(0x0F));
+ __ j(not_zero, &unaligned_source);
+ {
+ __ IncrementCounter(&Counters::memcopy_aligned, 1);
+ // Copy loop for aligned source and destination.
+ __ mov(edx, ecx);
+ __ shr(ecx, 5);
+ {
+ // Main copy loop.
+ Label loop;
+ __ bind(&loop);
+ __ prefetch(Operand(esi, 0x20), 1);
+ __ movdqa(xmm0, Operand(esi, 0x00));
Erik Corry 2010/06/04 07:18:10 Apart from the dqa/dqu this seems to be duplicated
Lasse Reichstein 2010/06/04 11:52:13 Correct. This is the fast case code where source i
+ __ movdqa(xmm1, Operand(esi, 0x10));
+ __ add(Operand(esi), Immediate(0x20));
+
+ __ movdqa(Operand(edi, 0x00), xmm0);
+ __ movdqa(Operand(edi, 0x10), xmm1);
+ __ add(Operand(edi), Immediate(0x20));
+
+ __ dec(ecx);
+ __ j(not_zero, &loop);
+ }
+
+ // At most 31 bytes to copy.
Erik Corry 2010/06/04 07:18:10 This code seems to be duplicated lower down.
Lasse Reichstein 2010/06/04 11:52:13 Not identically, the second copy uses movdqu for t
+ Label move_less_16;
+ __ test(Operand(edx), Immediate(0x10));
+ __ j(zero, &move_less_16);
+ __ movdqa(xmm0, Operand(esi, 0));
+ __ add(Operand(esi), Immediate(0x10));
+ __ movdqa(Operand(edi, 0), xmm0);
+ __ add(Operand(edi), Immediate(0x10));
+ __ bind(&move_less_16);
+
+ // At most 15 bytes to copy. Copy 16 bytes at end of string.
+ __ and_(edx, 0x0F);
+ __ movdqu(xmm0, Operand(esi, edx, times_1, -16));
+ __ movdqu(Operand(edi, edx, times_1, -16), xmm0);
+
+ __ pop(esi);
+ __ pop(edi);
+ __ ret(0);
+ }
+ __ Align(16);
+ {
+ // Copy loop for unaligned source and aligned destination.
+ // If source is not aligned, we can't read it as efficiently.
+ __ bind(&unaligned_source);
+ __ IncrementCounter(&Counters::memcopy_unaligned, 1);
+ __ mov(edx, ecx);
+ __ shr(ecx, 5);
+ {
+ // Main copy loop
+ Label loop;
+ __ bind(&loop);
+ __ prefetch(Operand(esi, 0x20), 1);
+ __ movdqu(xmm0, Operand(esi, 0x00));
+ __ movdqu(xmm1, Operand(esi, 0x10));
+ __ add(Operand(esi), Immediate(0x20));
+
+ __ movdqa(Operand(edi, 0x00), xmm0);
+ __ movdqa(Operand(edi, 0x10), xmm1);
+ __ add(Operand(edi), Immediate(0x20));
+
+ __ dec(ecx);
+ __ j(not_zero, &loop);
+ }
+
+ // At most 31 bytes to copy.
+ Label move_less_16;
+ __ test(Operand(edx), Immediate(0x10));
+ __ j(zero, &move_less_16);
+ __ movdqu(xmm0, Operand(esi, 0));
+ __ add(Operand(esi), Immediate(0x10));
+ __ movdqa(Operand(edi, 0), xmm0);
+ __ add(Operand(edi), Immediate(0x10));
+ __ bind(&move_less_16);
+
+ // At most 15 bytes to copy. Copy 16 bytes at end of string.
+ __ and_(edx, 0x0F);
+ __ movdqu(xmm0, Operand(esi, edx, times_1, -0x10));
+ __ movdqu(Operand(edi, edx, times_1, -0x10), xmm0);
+
+ __ pop(esi);
+ __ pop(edi);
+ __ ret(0);
+ }
+
+ } else {
+ __ IncrementCounter(&Counters::memcopy_noxmm, 1);
+ // SSE2 not supported. Unlikely to happen in practice.
+ __ push(edi);
+ __ push(esi);
+ stack_offset += 2 * kPointerSize;
+ __ cld();
+ __ mov(edi, Operand(esp, stack_offset + kDestinationOffset));
+ __ mov(esi, Operand(esp, stack_offset + kSourceOffset));
+ __ mov(ecx, Operand(esp, stack_offset + kSizeOffset));
+
+ // Copy the first word.
+ __ mov(eax, Operand(esi, 0));
+ __ mov(Operand(edi, 0), eax);
+
+ // Increment esi,edi so that edi is aligned.
+ __ mov(edx, edi);
+ __ and_(edx, 0x03);
+ __ neg(edx);
+ __ add(Operand(edx), Immediate(4)); // edx = 4 - (edi & 3)
+ __ add(edi, Operand(edx));
+ __ add(esi, Operand(edx));
+ __ sub(Operand(ecx), edx);
+ // edi is now aligned, ecx holds number of remaning bytes to copy.
+ __ mov(edx, ecx);
+ __ shr(ecx, 2); // Make word count instead of byte count.
+
+ __ rep_movs();
+
+ // At most 3 bytes left to copy. Copy 4 bytes at end of string.
+ __ and_(edx, 3);
+ __ mov(eax, Operand(esi, edx, times_1, -4));
+ __ mov(Operand(edi, edx, times_1, -4), eax);
+
+ __ pop(esi);
+ __ pop(edi);
+ __ ret(0);
+ }
+
+ CodeDesc desc;
+ assembler.GetCode(&desc);
+ // Call the function from C++.
+ return FUNCTION_CAST<MemCopyFunction>(buffer);
+}
+
#undef __
} } // namespace v8::internal

Powered by Google App Engine
This is Rietveld 408576698