Index: src/ia32/macro-assembler-ia32.cc |
=================================================================== |
--- src/ia32/macro-assembler-ia32.cc (revision 6301) |
+++ src/ia32/macro-assembler-ia32.cc (working copy) |
@@ -877,55 +877,51 @@ |
Immediate(Factory::cons_ascii_string_map())); |
} |
-// All registers must be distinct. Only current_string needs valid contents |
-// on entry. All registers may be invalid on exit. result_operand is |
-// unchanged, padding_chars is updated correctly. |
-void MacroAssembler::AppendStringToTopOfNewSpace( |
- Register current_string, // Tagged pointer to string to copy. |
- Register current_string_length, |
- Register result_pos, |
- Register scratch, |
- Register new_padding_chars, |
- Operand operand_result, |
- Operand operand_padding_chars, |
- Label* bailout) { |
- mov(current_string_length, |
- FieldOperand(current_string, String::kLengthOffset)); |
- shr(current_string_length, 1); |
- sub(current_string_length, operand_padding_chars); |
- mov(new_padding_chars, current_string_length); |
- add(Operand(current_string_length), Immediate(kObjectAlignmentMask)); |
- and_(Operand(current_string_length), Immediate(~kObjectAlignmentMask)); |
- sub(new_padding_chars, Operand(current_string_length)); |
- neg(new_padding_chars); |
- // We need an allocation even if current_string_length is 0, to fetch |
- // result_pos. Consider using a faster fetch of result_pos in that case. |
- AllocateInNewSpace(current_string_length, result_pos, scratch, no_reg, |
- bailout, NO_ALLOCATION_FLAGS); |
- sub(result_pos, operand_padding_chars); |
- mov(operand_padding_chars, new_padding_chars); |
- Register scratch_2 = new_padding_chars; // Used to compute total length. |
- // Copy string to the end of result. |
- mov(current_string_length, |
- FieldOperand(current_string, String::kLengthOffset)); |
- mov(scratch, operand_result); |
- mov(scratch_2, current_string_length); |
- add(scratch_2, FieldOperand(scratch, String::kLengthOffset)); |
- mov(FieldOperand(scratch, String::kLengthOffset), scratch_2); |
- shr(current_string_length, 1); |
- lea(current_string, |
- FieldOperand(current_string, SeqAsciiString::kHeaderSize)); |
- // Loop condition: while (--current_string_length >= 0). |
- Label copy_loop; |
- Label copy_loop_entry; |
- jmp(©_loop_entry); |
- bind(©_loop); |
- mov_b(scratch, Operand(current_string, current_string_length, times_1, 0)); |
- mov_b(Operand(result_pos, current_string_length, times_1, 0), scratch); |
- bind(©_loop_entry); |
- sub(Operand(current_string_length), Immediate(1)); |
- j(greater_equal, ©_loop); |
+// Copy memory, byte-by-byte, from source to destination. Not optimized for |
+// long or aligned copies. The contents of scratch and length are destroyed. |
+// Source and destination are incremented by length. |
+// Many variants of movsb, loop unrolling, word moves, and indexed operands |
+// have been tried here already, and this is fastest. |
+// A simpler loop is faster on small copies, but 30% slower on large ones. |
+// The cld() instruction must have been emitted, to set the direction flag(), |
+// before calling this function. |
+void MacroAssembler::CopyBytes(Register source, |
+ Register destination, |
+ Register length, |
+ Register scratch) { |
+ Label loop, done, short_string, short_loop; |
+ // Experimentation shows that the short string loop is faster if length < 10. |
+ cmp(Operand(length), Immediate(10)); |
+ j(less_equal, &short_string); |
+ |
+ ASSERT(source.is(esi)); |
+ ASSERT(destination.is(edi)); |
+ ASSERT(length.is(ecx)); |
+ |
+ // Because destination is 4-byte aligned, we keep it aligned for movs. |
Lasse Reichstein
2011/01/14 10:47:37
How do we know that destination is 4-byte aligned?
William Hesse
2011/01/14 10:54:44
In our uses, source is 4-byte aligned.
Changed com
|
+ mov(scratch, Operand(source, length, times_1, -4)); |
+ mov(Operand(destination, length, times_1, -4), scratch); |
+ mov(scratch, ecx); |
+ shr(ecx, 2); |
Lasse Reichstein
2011/01/14 10:47:37
If length was divisible by four, you will copy the
William Hesse
2011/01/14 10:54:44
Long rep.movs averages much less than a cycle per
|
+ rep_movs(); |
+ and_(Operand(scratch), Immediate(0x3)); |
+ add(destination, Operand(scratch)); |
+ jmp(&done); |
+ |
+ bind(&short_string); |
+ test(length, Operand(length)); |
+ j(zero, &done); |
+ |
+ bind(&short_loop); |
+ mov_b(scratch, Operand(source, 0)); |
+ mov_b(Operand(destination, 0), scratch); |
+ inc(source); |
+ inc(destination); |
+ dec(length); |
Lasse Reichstein
2011/01/14 10:47:37
This won't be faster if you do:
add(source, leng
William Hesse
2011/01/14 10:54:44
I tried that - it was slower. On 2011/01/14 10:47:
|
+ j(not_zero, &short_loop); |
+ |
+ bind(&done); |
} |