Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: src/compiler/x64/code-generator-x64.cc

Issue 1081033003: [x86] Allow (v)divsd->(v)mulsd to execute in parallel. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/compiler/ia32/code-generator-ia32.cc ('k') | src/ia32/lithium-codegen-ia32.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 the V8 project authors. All rights reserved. 1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/compiler/code-generator-impl.h" 7 #include "src/compiler/code-generator-impl.h"
8 #include "src/compiler/gap-resolver.h" 8 #include "src/compiler/gap-resolver.h"
9 #include "src/compiler/node-matchers.h" 9 #include "src/compiler/node-matchers.h"
10 #include "src/scopes.h" 10 #include "src/scopes.h"
(...skipping 708 matching lines...) Expand 10 before | Expand all | Expand 10 after
719 ASSEMBLE_SSE_BINOP(addss); 719 ASSEMBLE_SSE_BINOP(addss);
720 break; 720 break;
721 case kSSEFloat32Sub: 721 case kSSEFloat32Sub:
722 ASSEMBLE_SSE_BINOP(subss); 722 ASSEMBLE_SSE_BINOP(subss);
723 break; 723 break;
724 case kSSEFloat32Mul: 724 case kSSEFloat32Mul:
725 ASSEMBLE_SSE_BINOP(mulss); 725 ASSEMBLE_SSE_BINOP(mulss);
726 break; 726 break;
727 case kSSEFloat32Div: 727 case kSSEFloat32Div:
728 ASSEMBLE_SSE_BINOP(divss); 728 ASSEMBLE_SSE_BINOP(divss);
729 // Don't delete this mov. It may improve performance on some CPUs,
730 // when there is a (v)mulss depending on the result.
731 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
729 break; 732 break;
730 case kSSEFloat32Abs: { 733 case kSSEFloat32Abs: {
731 // TODO(bmeurer): Use RIP relative 128-bit constants. 734 // TODO(bmeurer): Use RIP relative 128-bit constants.
732 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); 735 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
733 __ psrlq(kScratchDoubleReg, 33); 736 __ psrlq(kScratchDoubleReg, 33);
734 __ andps(i.OutputDoubleRegister(), kScratchDoubleReg); 737 __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
735 break; 738 break;
736 } 739 }
737 case kSSEFloat32Neg: { 740 case kSSEFloat32Neg: {
738 // TODO(bmeurer): Use RIP relative 128-bit constants. 741 // TODO(bmeurer): Use RIP relative 128-bit constants.
(...skipping 21 matching lines...) Expand all
760 ASSEMBLE_SSE_BINOP(addsd); 763 ASSEMBLE_SSE_BINOP(addsd);
761 break; 764 break;
762 case kSSEFloat64Sub: 765 case kSSEFloat64Sub:
763 ASSEMBLE_SSE_BINOP(subsd); 766 ASSEMBLE_SSE_BINOP(subsd);
764 break; 767 break;
765 case kSSEFloat64Mul: 768 case kSSEFloat64Mul:
766 ASSEMBLE_SSE_BINOP(mulsd); 769 ASSEMBLE_SSE_BINOP(mulsd);
767 break; 770 break;
768 case kSSEFloat64Div: 771 case kSSEFloat64Div:
769 ASSEMBLE_SSE_BINOP(divsd); 772 ASSEMBLE_SSE_BINOP(divsd);
773 // Don't delete this mov. It may improve performance on some CPUs,
774 // when there is a (v)mulsd depending on the result.
775 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
770 break; 776 break;
771 case kSSEFloat64Mod: { 777 case kSSEFloat64Mod: {
772 __ subq(rsp, Immediate(kDoubleSize)); 778 __ subq(rsp, Immediate(kDoubleSize));
773 // Move values to st(0) and st(1). 779 // Move values to st(0) and st(1).
774 __ movsd(Operand(rsp, 0), i.InputDoubleRegister(1)); 780 __ movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
775 __ fld_d(Operand(rsp, 0)); 781 __ fld_d(Operand(rsp, 0));
776 __ movsd(Operand(rsp, 0), i.InputDoubleRegister(0)); 782 __ movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
777 __ fld_d(Operand(rsp, 0)); 783 __ fld_d(Operand(rsp, 0));
778 // Loop while fprem isn't done. 784 // Loop while fprem isn't done.
779 Label mod_loop; 785 Label mod_loop;
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
912 ASSEMBLE_AVX_BINOP(vaddss); 918 ASSEMBLE_AVX_BINOP(vaddss);
913 break; 919 break;
914 case kAVXFloat32Sub: 920 case kAVXFloat32Sub:
915 ASSEMBLE_AVX_BINOP(vsubss); 921 ASSEMBLE_AVX_BINOP(vsubss);
916 break; 922 break;
917 case kAVXFloat32Mul: 923 case kAVXFloat32Mul:
918 ASSEMBLE_AVX_BINOP(vmulss); 924 ASSEMBLE_AVX_BINOP(vmulss);
919 break; 925 break;
920 case kAVXFloat32Div: 926 case kAVXFloat32Div:
921 ASSEMBLE_AVX_BINOP(vdivss); 927 ASSEMBLE_AVX_BINOP(vdivss);
928 // Don't delete this mov. It may improve performance on some CPUs,
929 // when there is a (v)mulss depending on the result.
930 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
922 break; 931 break;
923 case kAVXFloat32Max: 932 case kAVXFloat32Max:
924 ASSEMBLE_AVX_BINOP(vmaxss); 933 ASSEMBLE_AVX_BINOP(vmaxss);
925 break; 934 break;
926 case kAVXFloat32Min: 935 case kAVXFloat32Min:
927 ASSEMBLE_AVX_BINOP(vminss); 936 ASSEMBLE_AVX_BINOP(vminss);
928 break; 937 break;
929 case kAVXFloat64Cmp: { 938 case kAVXFloat64Cmp: {
930 CpuFeatureScope avx_scope(masm(), AVX); 939 CpuFeatureScope avx_scope(masm(), AVX);
931 if (instr->InputAt(1)->IsDoubleRegister()) { 940 if (instr->InputAt(1)->IsDoubleRegister()) {
932 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); 941 __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
933 } else { 942 } else {
934 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1)); 943 __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
935 } 944 }
936 break; 945 break;
937 } 946 }
938 case kAVXFloat64Add: 947 case kAVXFloat64Add:
939 ASSEMBLE_AVX_BINOP(vaddsd); 948 ASSEMBLE_AVX_BINOP(vaddsd);
940 break; 949 break;
941 case kAVXFloat64Sub: 950 case kAVXFloat64Sub:
942 ASSEMBLE_AVX_BINOP(vsubsd); 951 ASSEMBLE_AVX_BINOP(vsubsd);
943 break; 952 break;
944 case kAVXFloat64Mul: 953 case kAVXFloat64Mul:
945 ASSEMBLE_AVX_BINOP(vmulsd); 954 ASSEMBLE_AVX_BINOP(vmulsd);
946 break; 955 break;
947 case kAVXFloat64Div: 956 case kAVXFloat64Div:
948 ASSEMBLE_AVX_BINOP(vdivsd); 957 ASSEMBLE_AVX_BINOP(vdivsd);
958 // Don't delete this mov. It may improve performance on some CPUs,
959 // when there is a (v)mulsd depending on the result.
960 __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
949 break; 961 break;
950 case kAVXFloat64Max: 962 case kAVXFloat64Max:
951 ASSEMBLE_AVX_BINOP(vmaxsd); 963 ASSEMBLE_AVX_BINOP(vmaxsd);
952 break; 964 break;
953 case kAVXFloat64Min: 965 case kAVXFloat64Min:
954 ASSEMBLE_AVX_BINOP(vminsd); 966 ASSEMBLE_AVX_BINOP(vminsd);
955 break; 967 break;
956 case kAVXFloat32Abs: { 968 case kAVXFloat32Abs: {
957 // TODO(bmeurer): Use RIP relative 128-bit constants. 969 // TODO(bmeurer): Use RIP relative 128-bit constants.
958 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); 970 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
(...skipping 699 matching lines...) Expand 10 before | Expand all | Expand 10 after
1658 } 1670 }
1659 } 1671 }
1660 MarkLazyDeoptSite(); 1672 MarkLazyDeoptSite();
1661 } 1673 }
1662 1674
1663 #undef __ 1675 #undef __
1664 1676
1665 } // namespace internal 1677 } // namespace internal
1666 } // namespace compiler 1678 } // namespace compiler
1667 } // namespace v8 1679 } // namespace v8
OLDNEW
« no previous file with comments | « src/compiler/ia32/code-generator-ia32.cc ('k') | src/ia32/lithium-codegen-ia32.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698