Chromium Code Reviews| Index: src/ia32/macro-assembler-ia32.cc |
| =================================================================== |
| --- src/ia32/macro-assembler-ia32.cc (revision 6301) |
| +++ src/ia32/macro-assembler-ia32.cc (working copy) |
| @@ -877,55 +877,51 @@ |
| Immediate(Factory::cons_ascii_string_map())); |
| } |
| -// All registers must be distinct. Only current_string needs valid contents |
| -// on entry. All registers may be invalid on exit. result_operand is |
| -// unchanged, padding_chars is updated correctly. |
| -void MacroAssembler::AppendStringToTopOfNewSpace( |
| - Register current_string, // Tagged pointer to string to copy. |
| - Register current_string_length, |
| - Register result_pos, |
| - Register scratch, |
| - Register new_padding_chars, |
| - Operand operand_result, |
| - Operand operand_padding_chars, |
| - Label* bailout) { |
| - mov(current_string_length, |
| - FieldOperand(current_string, String::kLengthOffset)); |
| - shr(current_string_length, 1); |
| - sub(current_string_length, operand_padding_chars); |
| - mov(new_padding_chars, current_string_length); |
| - add(Operand(current_string_length), Immediate(kObjectAlignmentMask)); |
| - and_(Operand(current_string_length), Immediate(~kObjectAlignmentMask)); |
| - sub(new_padding_chars, Operand(current_string_length)); |
| - neg(new_padding_chars); |
| - // We need an allocation even if current_string_length is 0, to fetch |
| - // result_pos. Consider using a faster fetch of result_pos in that case. |
| - AllocateInNewSpace(current_string_length, result_pos, scratch, no_reg, |
| - bailout, NO_ALLOCATION_FLAGS); |
| - sub(result_pos, operand_padding_chars); |
| - mov(operand_padding_chars, new_padding_chars); |
| - Register scratch_2 = new_padding_chars; // Used to compute total length. |
| - // Copy string to the end of result. |
| - mov(current_string_length, |
| - FieldOperand(current_string, String::kLengthOffset)); |
| - mov(scratch, operand_result); |
| - mov(scratch_2, current_string_length); |
| - add(scratch_2, FieldOperand(scratch, String::kLengthOffset)); |
| - mov(FieldOperand(scratch, String::kLengthOffset), scratch_2); |
| - shr(current_string_length, 1); |
| - lea(current_string, |
| - FieldOperand(current_string, SeqAsciiString::kHeaderSize)); |
| - // Loop condition: while (--current_string_length >= 0). |
| - Label copy_loop; |
| - Label copy_loop_entry; |
| - jmp(©_loop_entry); |
| - bind(©_loop); |
| - mov_b(scratch, Operand(current_string, current_string_length, times_1, 0)); |
| - mov_b(Operand(result_pos, current_string_length, times_1, 0), scratch); |
| - bind(©_loop_entry); |
| - sub(Operand(current_string_length), Immediate(1)); |
| - j(greater_equal, ©_loop); |
| +// Copy memory, byte-by-byte, from source to destination. Not optimized for |
| +// long or aligned copies. The contents of scratch and length are destroyed. |
| +// Source and destination are incremented by length. |
| +// Many variants of movsb, loop unrolling, word moves, and indexed operands |
| +// have been tried here already, and this is fastest. |
| +// A simpler loop is faster on small copies, but 30% slower on large ones. |
| +// The cld() instruction must have been emitted, to set the direction flag(), |
| +// before calling this function. |
| +void MacroAssembler::CopyBytes(Register source, |
| + Register destination, |
| + Register length, |
| + Register scratch) { |
| + Label loop, done, short_string, short_loop; |
| + // Experimentation shows that the short string loop is faster if length < 10. |
| + cmp(Operand(length), Immediate(10)); |
| + j(less_equal, &short_string); |
| + |
| + ASSERT(source.is(esi)); |
| + ASSERT(destination.is(edi)); |
| + ASSERT(length.is(ecx)); |
| + |
| + // Because destination is 4-byte aligned, we keep it aligned for movs. |
|
Lasse Reichstein
2011/01/14 10:47:37
How do we know that destination is 4-byte aligned?
William Hesse
2011/01/14 10:54:44
In our uses, source is 4-byte aligned.
Changed com
|
| + mov(scratch, Operand(source, length, times_1, -4)); |
| + mov(Operand(destination, length, times_1, -4), scratch); |
| + mov(scratch, ecx); |
| + shr(ecx, 2); |
|
Lasse Reichstein
2011/01/14 10:47:37
If length was divisible by four, you will copy the
William Hesse
2011/01/14 10:54:44
Long rep.movs averages much less than a cycle per
|
| + rep_movs(); |
| + and_(Operand(scratch), Immediate(0x3)); |
| + add(destination, Operand(scratch)); |
| + jmp(&done); |
| + |
| + bind(&short_string); |
| + test(length, Operand(length)); |
| + j(zero, &done); |
| + |
| + bind(&short_loop); |
| + mov_b(scratch, Operand(source, 0)); |
| + mov_b(Operand(destination, 0), scratch); |
| + inc(source); |
| + inc(destination); |
| + dec(length); |
|
Lasse Reichstein
2011/01/14 10:47:37
This won't be faster if you do:
add(source, leng
William Hesse
2011/01/14 10:54:44
I tried that - it was slower. On 2011/01/14 10:47:
|
| + j(not_zero, &short_loop); |
| + |
| + bind(&done); |
| } |