Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(172)

Side by Side Diff: src/ia32/codegen-ia32.cc

Issue 11418149: Faster implementation of Math.exp() (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: fix Win build Created 8 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/ia32/codegen-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 CodeDesc desc; 95 CodeDesc desc;
96 masm.GetCode(&desc); 96 masm.GetCode(&desc);
97 ASSERT(desc.reloc_size == 0); 97 ASSERT(desc.reloc_size == 0);
98 98
99 CPU::FlushICache(buffer, actual_size); 99 CPU::FlushICache(buffer, actual_size);
100 OS::ProtectCode(buffer, actual_size); 100 OS::ProtectCode(buffer, actual_size);
101 return FUNCTION_CAST<UnaryMathFunction>(buffer); 101 return FUNCTION_CAST<UnaryMathFunction>(buffer);
102 } 102 }
103 103
104 104
105 UnaryMathFunction CreateExpFunction() {
106 if (!CpuFeatures::IsSupported(SSE2)) return &exp;
107 if (!FLAG_fast_math) return &exp;
108 size_t actual_size;
109 byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, &actual_size, true));
110 if (buffer == NULL) return &exp;
111 ExternalReference::InitializeMathExpData();
112
113 MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
114 // esp[1 * kPointerSize]: raw double input
115 // esp[0 * kPointerSize]: return address
116 {
117 CpuFeatures::Scope use_sse2(SSE2);
118 XMMRegister input = xmm1;
119 XMMRegister result = xmm2;
120 __ movdbl(input, Operand(esp, 1 * kPointerSize));
121 __ push(eax);
122 __ push(ebx);
123
124 MathExpGenerator::EmitMathExp(&masm, input, result, xmm0, eax, ebx);
125
126 __ pop(ebx);
127 __ pop(eax);
128 __ movdbl(Operand(esp, 1 * kPointerSize), result);
129 __ fld_d(Operand(esp, 1 * kPointerSize));
130 __ Ret();
131 }
132
133 CodeDesc desc;
134 masm.GetCode(&desc);
135
136 CPU::FlushICache(buffer, actual_size);
137 OS::ProtectCode(buffer, actual_size);
138 return FUNCTION_CAST<UnaryMathFunction>(buffer);
139 }
140
141
105 UnaryMathFunction CreateSqrtFunction() { 142 UnaryMathFunction CreateSqrtFunction() {
106 size_t actual_size; 143 size_t actual_size;
107 // Allocate buffer in executable space. 144 // Allocate buffer in executable space.
108 byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB, 145 byte* buffer = static_cast<byte*>(OS::Allocate(1 * KB,
109 &actual_size, 146 &actual_size,
110 true)); 147 true));
111 // If SSE2 is not available, we can use libc's implementation to ensure 148 // If SSE2 is not available, we can use libc's implementation to ensure
112 // consistency since code by fullcodegen's calls into runtime in that case. 149 // consistency since code by fullcodegen's calls into runtime in that case.
113 if (buffer == NULL || !CpuFeatures::IsSupported(SSE2)) return &sqrt; 150 if (buffer == NULL || !CpuFeatures::IsSupported(SSE2)) return &sqrt;
114 MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size)); 151 MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
(...skipping 633 matching lines...) Expand 10 before | Expand all | Expand 10 after
748 // Ascii string. 785 // Ascii string.
749 // Load the byte into the result register. 786 // Load the byte into the result register.
750 __ bind(&ascii); 787 __ bind(&ascii);
751 __ movzx_b(result, FieldOperand(string, 788 __ movzx_b(result, FieldOperand(string,
752 index, 789 index,
753 times_1, 790 times_1,
754 SeqOneByteString::kHeaderSize)); 791 SeqOneByteString::kHeaderSize));
755 __ bind(&done); 792 __ bind(&done);
756 } 793 }
757 794
795
796 static Operand ExpConstant(int index) {
797 return Operand::StaticVariable(ExternalReference::math_exp_constants(index));
798 }
799
800
801 void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
802 XMMRegister input,
803 XMMRegister result,
804 XMMRegister double_scratch,
805 Register temp1,
806 Register temp2) {
807 ASSERT(!input.is(double_scratch));
808 ASSERT(!input.is(result));
809 ASSERT(!result.is(double_scratch));
810 ASSERT(!temp1.is(temp2));
811 ASSERT(ExternalReference::math_exp_constants(0).address() != NULL);
812
813 Label done;
814
815 __ movdbl(double_scratch, ExpConstant(0));
816 __ xorpd(result, result);
817 __ ucomisd(double_scratch, input);
818 __ j(above_equal, &done);
819 __ ucomisd(input, ExpConstant(1));
820 __ movdbl(result, ExpConstant(2));
821 __ j(above_equal, &done);
822 __ movdbl(double_scratch, ExpConstant(3));
823 __ movdbl(result, ExpConstant(4));
824 __ mulsd(double_scratch, input);
825 __ addsd(double_scratch, result);
826 __ movd(temp2, double_scratch);
827 __ subsd(double_scratch, result);
828 __ movdbl(result, ExpConstant(6));
829 __ mulsd(double_scratch, ExpConstant(5));
830 __ subsd(double_scratch, input);
831 __ subsd(result, double_scratch);
832 __ movsd(input, double_scratch);
833 __ mulsd(input, double_scratch);
834 __ mulsd(result, input);
835 __ mov(temp1, temp2);
836 __ mulsd(result, ExpConstant(7));
837 __ subsd(result, double_scratch);
838 __ add(temp1, Immediate(0x1ff800));
839 __ addsd(result, ExpConstant(8));
840 __ and_(temp2, Immediate(0x7ff));
841 __ shr(temp1, 11);
842 __ shl(temp1, 20);
843 __ movd(input, temp1);
844 __ pshufd(input, input, static_cast<uint8_t>(0xe1)); // Order: 11 10 00 01
845 __ movdbl(double_scratch, Operand::StaticArray(
846 temp2, times_8, ExternalReference::math_exp_log_table()));
847 __ por(input, double_scratch);
848 __ mulsd(result, input);
849 __ bind(&done);
850 }
851
758 #undef __ 852 #undef __
759 853
760 static const int kNoCodeAgeSequenceLength = 5; 854 static const int kNoCodeAgeSequenceLength = 5;
761 855
762 static byte* GetNoCodeAgeSequence(uint32_t* length) { 856 static byte* GetNoCodeAgeSequence(uint32_t* length) {
763 static bool initialized = false; 857 static bool initialized = false;
764 static byte sequence[kNoCodeAgeSequenceLength]; 858 static byte sequence[kNoCodeAgeSequenceLength];
765 *length = kNoCodeAgeSequenceLength; 859 *length = kNoCodeAgeSequenceLength;
766 if (!initialized) { 860 if (!initialized) {
767 // The sequence of instructions that is patched out for aging code is the 861 // The sequence of instructions that is patched out for aging code is the
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
850 Code* stub = GetCodeAgeStub(age, parity); 944 Code* stub = GetCodeAgeStub(age, parity);
851 CodePatcher patcher(sequence, young_length); 945 CodePatcher patcher(sequence, young_length);
852 patcher.masm()->call(stub->instruction_start(), RelocInfo::NONE); 946 patcher.masm()->call(stub->instruction_start(), RelocInfo::NONE);
853 } 947 }
854 } 948 }
855 949
856 950
857 } } // namespace v8::internal 951 } } // namespace v8::internal
858 952
859 #endif // V8_TARGET_ARCH_IA32 953 #endif // V8_TARGET_ARCH_IA32
OLDNEW
« no previous file with comments | « src/ia32/codegen-ia32.h ('k') | src/ia32/disasm-ia32.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698