Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(618)

Side by Side Diff: src/ia32/macro-assembler-ia32.cc

Issue 6148007: Speed up FastAsciiArrayJoin on ia32 by improving hand-written assembly code. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 9 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/ia32/macro-assembler-ia32.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 859 matching lines...) Expand 10 before | Expand all | Expand 10 after
870 scratch1, 870 scratch1,
871 scratch2, 871 scratch2,
872 gc_required, 872 gc_required,
873 TAG_OBJECT); 873 TAG_OBJECT);
874 874
875 // Set the map. The other fields are left uninitialized. 875 // Set the map. The other fields are left uninitialized.
876 mov(FieldOperand(result, HeapObject::kMapOffset), 876 mov(FieldOperand(result, HeapObject::kMapOffset),
877 Immediate(Factory::cons_ascii_string_map())); 877 Immediate(Factory::cons_ascii_string_map()));
878 } 878 }
879 879
880 // All registers must be distinct. Only current_string needs valid contents
881 // on entry. All registers may be invalid on exit. result_operand is
882 // unchanged, padding_chars is updated correctly.
883 void MacroAssembler::AppendStringToTopOfNewSpace(
884 Register current_string, // Tagged pointer to string to copy.
885 Register current_string_length,
886 Register result_pos,
887 Register scratch,
888 Register new_padding_chars,
889 Operand operand_result,
890 Operand operand_padding_chars,
891 Label* bailout) {
892 mov(current_string_length,
893 FieldOperand(current_string, String::kLengthOffset));
894 shr(current_string_length, 1);
895 sub(current_string_length, operand_padding_chars);
896 mov(new_padding_chars, current_string_length);
897 add(Operand(current_string_length), Immediate(kObjectAlignmentMask));
898 and_(Operand(current_string_length), Immediate(~kObjectAlignmentMask));
899 sub(new_padding_chars, Operand(current_string_length));
900 neg(new_padding_chars);
901 // We need an allocation even if current_string_length is 0, to fetch
902 // result_pos. Consider using a faster fetch of result_pos in that case.
903 AllocateInNewSpace(current_string_length, result_pos, scratch, no_reg,
904 bailout, NO_ALLOCATION_FLAGS);
905 sub(result_pos, operand_padding_chars);
906 mov(operand_padding_chars, new_padding_chars);
907 880
908 Register scratch_2 = new_padding_chars; // Used to compute total length. 881 // Copy memory, byte-by-byte, from source to destination. Not optimized for
909 // Copy string to the end of result. 882 // long or aligned copies. The contents of scratch and length are destroyed.
910 mov(current_string_length, 883 // Source and destination are incremented by length.
911 FieldOperand(current_string, String::kLengthOffset)); 884 // Many variants of movsb, loop unrolling, word moves, and indexed operands
912 mov(scratch, operand_result); 885 // have been tried here already, and this is fastest.
913 mov(scratch_2, current_string_length); 886 // A simpler loop is faster on small copies, but 30% slower on large ones.
914 add(scratch_2, FieldOperand(scratch, String::kLengthOffset)); 887 // The cld() instruction must have been emitted, to set the direction flag(),
915 mov(FieldOperand(scratch, String::kLengthOffset), scratch_2); 888 // before calling this function.
916 shr(current_string_length, 1); 889 void MacroAssembler::CopyBytes(Register source,
917 lea(current_string, 890 Register destination,
918 FieldOperand(current_string, SeqAsciiString::kHeaderSize)); 891 Register length,
919 // Loop condition: while (--current_string_length >= 0). 892 Register scratch) {
920 Label copy_loop; 893 Label loop, done, short_string, short_loop;
921 Label copy_loop_entry; 894 // Experimentation shows that the short string loop is faster if length < 10.
922 jmp(&copy_loop_entry); 895 cmp(Operand(length), Immediate(10));
923 bind(&copy_loop); 896 j(less_equal, &short_string);
924 mov_b(scratch, Operand(current_string, current_string_length, times_1, 0)); 897
925 mov_b(Operand(result_pos, current_string_length, times_1, 0), scratch); 898 ASSERT(source.is(esi));
926 bind(&copy_loop_entry); 899 ASSERT(destination.is(edi));
927 sub(Operand(current_string_length), Immediate(1)); 900 ASSERT(length.is(ecx));
928 j(greater_equal, &copy_loop); 901
902 // Because destination is 4-byte aligned, we keep it aligned for movs.
Lasse Reichstein 2011/01/14 10:47:37 How do we know that destination is 4-byte aligned?
William Hesse 2011/01/14 10:54:44 In our uses, source is 4-byte aligned. Changed com
903 mov(scratch, Operand(source, length, times_1, -4));
904 mov(Operand(destination, length, times_1, -4), scratch);
905 mov(scratch, ecx);
906 shr(ecx, 2);
Lasse Reichstein 2011/01/14 10:47:37 If length was divisible by four, you will copy the
William Hesse 2011/01/14 10:54:44 Long rep.movs averages much less than a cycle per
907 rep_movs();
908 and_(Operand(scratch), Immediate(0x3));
909 add(destination, Operand(scratch));
910 jmp(&done);
911
912 bind(&short_string);
913 test(length, Operand(length));
914 j(zero, &done);
915
916 bind(&short_loop);
917 mov_b(scratch, Operand(source, 0));
918 mov_b(Operand(destination, 0), scratch);
919 inc(source);
920 inc(destination);
921 dec(length);
Lasse Reichstein 2011/01/14 10:47:37 This won't be faster if you do: add(source, leng
William Hesse 2011/01/14 10:54:44 I tried that - it was slower. On 2011/01/14 10:47:
922 j(not_zero, &short_loop);
923
924 bind(&done);
929 } 925 }
930 926
931 927
932 void MacroAssembler::NegativeZeroTest(CodeGenerator* cgen, 928 void MacroAssembler::NegativeZeroTest(CodeGenerator* cgen,
933 Register result, 929 Register result,
934 Register op, 930 Register op,
935 JumpTarget* then_target) { 931 JumpTarget* then_target) {
936 JumpTarget ok; 932 JumpTarget ok;
937 test(result, Operand(result)); 933 test(result, Operand(result));
938 ok.Branch(not_zero, taken); 934 ok.Branch(not_zero, taken);
(...skipping 952 matching lines...) Expand 10 before | Expand all | Expand 10 after
1891 1887
1892 // Check that the code was patched as expected. 1888 // Check that the code was patched as expected.
1893 ASSERT(masm_.pc_ == address_ + size_); 1889 ASSERT(masm_.pc_ == address_ + size_);
1894 ASSERT(masm_.reloc_info_writer.pos() == address_ + size_ + Assembler::kGap); 1890 ASSERT(masm_.reloc_info_writer.pos() == address_ + size_ + Assembler::kGap);
1895 } 1891 }
1896 1892
1897 1893
1898 } } // namespace v8::internal 1894 } } // namespace v8::internal
1899 1895
1900 #endif // V8_TARGET_ARCH_IA32 1896 #endif // V8_TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « src/ia32/macro-assembler-ia32.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698