Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(240)

Side by Side Diff: src/compiler/x64/code-generator-x64.cc

Issue 1416663004: [x64] Replace movaps with appropriate vmov* instructions when AVX is enabled. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/x64/assembler-x64.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/compiler/code-generator-impl.h" 7 #include "src/compiler/code-generator-impl.h"
8 #include "src/compiler/gap-resolver.h" 8 #include "src/compiler/gap-resolver.h"
9 #include "src/compiler/node-matchers.h" 9 #include "src/compiler/node-matchers.h"
10 #include "src/compiler/osr.h" 10 #include "src/compiler/osr.h"
(...skipping 831 matching lines...) Expand 10 before | Expand all | Expand 10 after
842 case kSSEFloat64Sub: 842 case kSSEFloat64Sub:
843 ASSEMBLE_SSE_BINOP(subsd); 843 ASSEMBLE_SSE_BINOP(subsd);
844 break; 844 break;
845 case kSSEFloat64Mul: 845 case kSSEFloat64Mul:
846 ASSEMBLE_SSE_BINOP(mulsd); 846 ASSEMBLE_SSE_BINOP(mulsd);
847 break; 847 break;
848 case kSSEFloat64Div: 848 case kSSEFloat64Div:
849 ASSEMBLE_SSE_BINOP(divsd); 849 ASSEMBLE_SSE_BINOP(divsd);
850 // Don't delete this mov. It may improve performance on some CPUs, 850 // Don't delete this mov. It may improve performance on some CPUs,
851 // when there is a (v)mulsd depending on the result. 851 // when there is a (v)mulsd depending on the result.
852 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); 852 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
853 break; 853 break;
854 case kSSEFloat64Mod: { 854 case kSSEFloat64Mod: {
855 __ subq(rsp, Immediate(kDoubleSize)); 855 __ subq(rsp, Immediate(kDoubleSize));
856 // Move values to st(0) and st(1). 856 // Move values to st(0) and st(1).
857 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); 857 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
858 __ fld_d(Operand(rsp, 0)); 858 __ fld_d(Operand(rsp, 0));
859 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); 859 __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
860 __ fld_d(Operand(rsp, 0)); 860 __ fld_d(Operand(rsp, 0));
861 // Loop while fprem isn't done. 861 // Loop while fprem isn't done.
862 Label mod_loop; 862 Label mod_loop;
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
997 case kAVXFloat32Sub: 997 case kAVXFloat32Sub:
998 ASSEMBLE_AVX_BINOP(vsubss); 998 ASSEMBLE_AVX_BINOP(vsubss);
999 break; 999 break;
1000 case kAVXFloat32Mul: 1000 case kAVXFloat32Mul:
1001 ASSEMBLE_AVX_BINOP(vmulss); 1001 ASSEMBLE_AVX_BINOP(vmulss);
1002 break; 1002 break;
1003 case kAVXFloat32Div: 1003 case kAVXFloat32Div:
1004 ASSEMBLE_AVX_BINOP(vdivss); 1004 ASSEMBLE_AVX_BINOP(vdivss);
1005 // Don't delete this mov. It may improve performance on some CPUs, 1005 // Don't delete this mov. It may improve performance on some CPUs,
1006 // when there is a (v)mulss depending on the result. 1006 // when there is a (v)mulss depending on the result.
1007 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); 1007 __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1008 break; 1008 break;
1009 case kAVXFloat32Max: 1009 case kAVXFloat32Max:
1010 ASSEMBLE_AVX_BINOP(vmaxss); 1010 ASSEMBLE_AVX_BINOP(vmaxss);
1011 break; 1011 break;
1012 case kAVXFloat32Min: 1012 case kAVXFloat32Min:
1013 ASSEMBLE_AVX_BINOP(vminss); 1013 ASSEMBLE_AVX_BINOP(vminss);
1014 break; 1014 break;
1015 case kAVXFloat64Cmp: { 1015 case kAVXFloat64Cmp: {
1016 CpuFeatureScope avx_scope(masm(), AVX); 1016 CpuFeatureScope avx_scope(masm(), AVX);
1017 if (instr->InputAt(1)->IsDoubleRegister()) { 1017 if (instr->InputAt(1)->IsDoubleRegister()) {
1018 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); 1018 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1019 } else { 1019 } else {
1020 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); 1020 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1021 } 1021 }
1022 break; 1022 break;
1023 } 1023 }
1024 case kAVXFloat64Add: 1024 case kAVXFloat64Add:
1025 ASSEMBLE_AVX_BINOP(vaddsd); 1025 ASSEMBLE_AVX_BINOP(vaddsd);
1026 break; 1026 break;
1027 case kAVXFloat64Sub: 1027 case kAVXFloat64Sub:
1028 ASSEMBLE_AVX_BINOP(vsubsd); 1028 ASSEMBLE_AVX_BINOP(vsubsd);
1029 break; 1029 break;
1030 case kAVXFloat64Mul: 1030 case kAVXFloat64Mul:
1031 ASSEMBLE_AVX_BINOP(vmulsd); 1031 ASSEMBLE_AVX_BINOP(vmulsd);
1032 break; 1032 break;
1033 case kAVXFloat64Div: 1033 case kAVXFloat64Div:
1034 ASSEMBLE_AVX_BINOP(vdivsd); 1034 ASSEMBLE_AVX_BINOP(vdivsd);
1035 // Don't delete this mov. It may improve performance on some CPUs, 1035 // Don't delete this mov. It may improve performance on some CPUs,
1036 // when there is a (v)mulsd depending on the result. 1036 // when there is a (v)mulsd depending on the result.
1037 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister()); 1037 __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1038 break; 1038 break;
1039 case kAVXFloat64Max: 1039 case kAVXFloat64Max:
1040 ASSEMBLE_AVX_BINOP(vmaxsd); 1040 ASSEMBLE_AVX_BINOP(vmaxsd);
1041 break; 1041 break;
1042 case kAVXFloat64Min: 1042 case kAVXFloat64Min:
1043 ASSEMBLE_AVX_BINOP(vminsd); 1043 ASSEMBLE_AVX_BINOP(vminsd);
1044 break; 1044 break;
1045 case kAVXFloat32Abs: { 1045 case kAVXFloat32Abs: {
1046 // TODO(bmeurer): Use RIP relative 128-bit constants. 1046 // TODO(bmeurer): Use RIP relative 128-bit constants.
1047 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); 1047 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
(...skipping 684 matching lines...) Expand 10 before | Expand all | Expand 10 after
1732 } else { 1732 } else {
1733 DCHECK(destination->IsDoubleStackSlot()); 1733 DCHECK(destination->IsDoubleStackSlot());
1734 __ movq(kScratchRegister, src_const); 1734 __ movq(kScratchRegister, src_const);
1735 __ movq(g.ToOperand(destination), kScratchRegister); 1735 __ movq(g.ToOperand(destination), kScratchRegister);
1736 } 1736 }
1737 } 1737 }
1738 } else if (source->IsDoubleRegister()) { 1738 } else if (source->IsDoubleRegister()) {
1739 XMMRegister src = g.ToDoubleRegister(source); 1739 XMMRegister src = g.ToDoubleRegister(source);
1740 if (destination->IsDoubleRegister()) { 1740 if (destination->IsDoubleRegister()) {
1741 XMMRegister dst = g.ToDoubleRegister(destination); 1741 XMMRegister dst = g.ToDoubleRegister(destination);
1742 __ movaps(dst, src); 1742 __ Movapd(dst, src);
1743 } else { 1743 } else {
1744 DCHECK(destination->IsDoubleStackSlot()); 1744 DCHECK(destination->IsDoubleStackSlot());
1745 Operand dst = g.ToOperand(destination); 1745 Operand dst = g.ToOperand(destination);
1746 __ Movsd(dst, src); 1746 __ Movsd(dst, src);
1747 } 1747 }
1748 } else if (source->IsDoubleStackSlot()) { 1748 } else if (source->IsDoubleStackSlot()) {
1749 DCHECK(destination->IsDoubleRegister() || destination->IsDoubleStackSlot()); 1749 DCHECK(destination->IsDoubleRegister() || destination->IsDoubleStackSlot());
1750 Operand src = g.ToOperand(source); 1750 Operand src = g.ToOperand(source);
1751 if (destination->IsDoubleRegister()) { 1751 if (destination->IsDoubleRegister()) {
1752 XMMRegister dst = g.ToDoubleRegister(destination); 1752 XMMRegister dst = g.ToDoubleRegister(destination);
(...skipping 30 matching lines...) Expand all
1783 Operand src = g.ToOperand(source); 1783 Operand src = g.ToOperand(source);
1784 Operand dst = g.ToOperand(destination); 1784 Operand dst = g.ToOperand(destination);
1785 __ movq(tmp, dst); 1785 __ movq(tmp, dst);
1786 __ xchgq(tmp, src); 1786 __ xchgq(tmp, src);
1787 __ movq(dst, tmp); 1787 __ movq(dst, tmp);
1788 } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) { 1788 } else if (source->IsDoubleRegister() && destination->IsDoubleRegister()) {
1789 // XMM register-register swap. We rely on having xmm0 1789 // XMM register-register swap. We rely on having xmm0
1790 // available as a fixed scratch register. 1790 // available as a fixed scratch register.
1791 XMMRegister src = g.ToDoubleRegister(source); 1791 XMMRegister src = g.ToDoubleRegister(source);
1792 XMMRegister dst = g.ToDoubleRegister(destination); 1792 XMMRegister dst = g.ToDoubleRegister(destination);
1793 __ movaps(xmm0, src); 1793 __ Movapd(xmm0, src);
1794 __ movaps(src, dst); 1794 __ Movapd(src, dst);
1795 __ movaps(dst, xmm0); 1795 __ Movapd(dst, xmm0);
1796 } else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) { 1796 } else if (source->IsDoubleRegister() && destination->IsDoubleStackSlot()) {
1797 // XMM register-memory swap. We rely on having xmm0 1797 // XMM register-memory swap. We rely on having xmm0
1798 // available as a fixed scratch register. 1798 // available as a fixed scratch register.
1799 XMMRegister src = g.ToDoubleRegister(source); 1799 XMMRegister src = g.ToDoubleRegister(source);
1800 Operand dst = g.ToOperand(destination); 1800 Operand dst = g.ToOperand(destination);
1801 __ Movsd(xmm0, src); 1801 __ Movsd(xmm0, src);
1802 __ Movsd(src, dst); 1802 __ Movsd(src, dst);
1803 __ Movsd(dst, xmm0); 1803 __ Movsd(dst, xmm0);
1804 } else { 1804 } else {
1805 // No other combinations are possible. 1805 // No other combinations are possible.
(...skipping 25 matching lines...) Expand all
1831 int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc; 1831 int padding_size = last_lazy_deopt_pc_ + space_needed - current_pc;
1832 __ Nop(padding_size); 1832 __ Nop(padding_size);
1833 } 1833 }
1834 } 1834 }
1835 1835
1836 #undef __ 1836 #undef __
1837 1837
1838 } // namespace compiler 1838 } // namespace compiler
1839 } // namespace internal 1839 } // namespace internal
1840 } // namespace v8 1840 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | src/x64/assembler-x64.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698