src/IceInstARM32.cpp - Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract.

Side by Side Diff: src/IceInstARM32.cpp

Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//	1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the	11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the

12 /// constructors and the dump()/emit() methods.	12 /// constructors and the dump()/emit() methods.

13 ///	13 ///

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15	15

16 #include "IceInstARM32.h"	16 #include "IceInstARM32.h"

17	17

18 #include "IceAssemblerARM32.h"	18 #include "IceAssemblerARM32.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceInst.h"	21 #include "IceInst.h"

22 #include "IceOperand.h"	22 #include "IceOperand.h"

23 #include "IceRegistersARM32.h"

24 #include "IceTargetLoweringARM32.h"	23 #include "IceTargetLoweringARM32.h"

25	24

26 namespace Ice {	25 namespace Ice {

27 namespace ARM32 {	26 namespace ARM32 {

28	27

29 namespace {	28 namespace {

30	29

31 // maximum number of registers allowed in vpush/vpop.	30 // maximum number of registers allowed in vpush/vpop.

32 static constexpr SizeT VpushVpopMaxConsecRegs = 16;	31 static constexpr SizeT VpushVpopMaxConsecRegs = 16;

33	32

(...skipping 940 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
974 }	973 }

975	974

976 if (Src64 == nullptr) {	975 if (Src64 == nullptr) {

977 addSource(Src);	976 addSource(Src);

978 } else {	977 } else {

979 addSource(Src64->getLo());	978 addSource(Src64->getLo());

980 addSource(Src64->getHi());	979 addSource(Src64->getHi());

981 }	980 }

982 }	981 }

983	982

	983 RegARM32::AllRegisters

	984 InstARM32ExtractInsert::getDRegister(Variable *Src, uint32_t VectorSize) const {

	985 assert(Src->hasReg());

	986 auto SrcReg = Src->getRegNum();
	Jim Stichnoth 2016/02/03 15:28:37 You will get some disagreement within the team, bu You will get some disagreement within the team, but I would like you to back off on use of auto. My approximate rules for when to use auto: 1. Something like a cast operation (static_cast, llvm::dyn_cast) where the explicit type is completely redundant. const auto Var = llvm::dyn_cast<Variable>(Opnd); 2. Other operations where the the type is pretty clear and practically redundant from context. auto Instr = InstFoo::create(); 3. Situations where the actual type is obnoxiously verbose for no good reason. auto Iter = MyContainer.begin(); auto Func = / some lambda expression /; Otherwise, code starts to become write-only and it gets really hard to make sense of the actual types without continual scrutinizing of header files. John* 2016/02/03 16:06:51 const auto const auto Eric Holk 2016/02/03 21:02:21 I meant to remove most of my uses of auto before s Show quoted text On 2016/02/03 15:28:37, stichnot wrote: > You will get some disagreement within the team, but I would like you to back off > on use of auto. > > My approximate rules for when to use auto: > > 1. Something like a cast operation (static_cast, llvm::dyn_cast) where the > explicit type is completely redundant. > > const auto Var = llvm::dyn_cast<Variable>(Opnd); > > 2. Other operations where the the type is pretty clear and practically redundant > from context. > > auto Instr = InstFoo::create(); > > 3. Situations where the actual type is obnoxiously verbose for no good reason. > > auto Iter = MyContainer.begin(); > auto Func = / some lambda expression /; > > Otherwise, code starts to become write-only and it gets really hard to make > sense of the actual types without continual scrutinizing of header files. I meant to remove most of my uses of auto before submitting this CL. I'm generally a fan of auto since I've also spent a lot of time writing in type-inferred languages, but the local style here is definitely to avoid using it. Eric Holk* 2016/02/03 21:02:21 Done. Show quoted text On 2016/02/03 16:06:51, John wrote: > const auto Done.
	987

	988 const auto &RegEntry = RegARM32::RegTable[SrcReg];

	989 assert(RegEntry.IsVec128);

	990

	991 // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
	Jim Stichnoth 2016/02/03 15:28:37 I would really like to stay away from any assumpti I would really like to stay away from any assumptions on ordering of the register enum values or the Aliases list. IIUC, the purpose of mapping from Q register to D registers is for syntactic correctness of the emit() output, but ultimately in the emitIAS() output you will just take the encoded register value of the Q register and apply a simple transformation that has nothing to do with our various orderings. So I'm thinking maybe this method should return the encoded D register value instead of the enum value, and the emit() code could then call another function that returns a register name given the encoded value and register class/type (presumably f64 in this case because we know that's how to get at D registers). John 2016/02/03 16:06:51 he will actually need to provide a D register to t Show quoted text On 2016/02/03 15:28:37, stichnot wrote: > I would really like to stay away from any assumptions on ordering of the > register enum values or the Aliases list. > > IIUC, the purpose of mapping from Q register to D registers is for syntactic > correctness of the emit() output, but ultimately in the emitIAS() output you > will just take the encoded register value of the Q register and apply a simple > transformation that has nothing to do with our various orderings. > > So I'm thinking maybe this method should return the encoded D register value > instead of the enum value, and the emit() code could then call another function > that returns a register name given the encoded value and register class/type > (presumably f64 in this case because we know that's how to get at D registers). he will actually need to provide a D register to the underlying assembler. I am concerned about this as well, so I asked him to leave a TODO behind (see below) until we come up with something better that does not require assumptions or clever tricks. Eric Holk 2016/02/03 21:02:21 I've added the TODO. Show quoted text On 2016/02/03 16:06:51, John wrote: > On 2016/02/03 15:28:37, stichnot wrote: > > I would really like to stay away from any assumptions on ordering of the > > register enum values or the Aliases list. > > > > IIUC, the purpose of mapping from Q register to D registers is for syntactic > > correctness of the emit() output, but ultimately in the emitIAS() output you > > will just take the encoded register value of the Q register and apply a simple > > transformation that has nothing to do with our various orderings. > > > > So I'm thinking maybe this method should return the encoded D register value > > instead of the enum value, and the emit() code could then call another > function > > that returns a register name given the encoded value and register class/type > > (presumably f64 in this case because we know that's how to get at D > registers). > > he will actually need to provide a D register to the underlying assembler. I am > concerned about this as well, so I asked him to leave a TODO behind (see below) > until we come up with something better that does not require assumptions or > clever tricks. I've added the TODO.
	992 // the next two branches help to check that this is still true.

	993 auto HalfWidth = VectorSize / 2;
	John 2016/02/03 16:06:51 optional: I am very auto-lenient, but this is prob optional: I am very auto-lenient, but this is probably too much Eric Holk 2016/02/03 21:02:21 I removed it. HalfWidth was only used in one place Show quoted text On 2016/02/03 16:06:51, John wrote: > optional: I am very auto-lenient, but this is probably too much I removed it. HalfWidth was only used in one place, so I just replaced it with VectorSize / 2.
	994 if (Index < HalfWidth) {

	995 SrcReg = RegEntry.Aliases[1];

	996 // We have a Q register that's made up of two D registers. This assert is

	997 // to help ensure that we picked the right D register.

	998 assert(RegARM32::RegTable[RegEntry.Aliases[1]].Encoding + 1 ==

	999 RegARM32::RegTable[RegEntry.Aliases[2]].Encoding);

	1000 } else {

	1001 SrcReg = RegEntry.Aliases[2];

	1002 // We have a Q register that's made up of two D registers. This assert is

	1003 // to help ensure that we picked the right D register.

	1004 assert(RegARM32::RegTable[RegEntry.Aliases[2]].Encoding - 1 ==

	1005 RegARM32::RegTable[RegEntry.Aliases[1]].Encoding);

	1006 }

	1007 return (RegARM32::AllRegisters)SrcReg;

	1008 }

	1009

	1010 uint32_t InstARM32ExtractInsert::getDIndex(uint32_t VectorSize) const {

	1011 if (Index < VectorSize / 2) {

	1012 return Index;

	1013 } else {

	1014 return Index - (VectorSize / 2);

	1015 }

	1016 }

	1017

	1018 RegARM32::AllRegisters

	1019 InstARM32ExtractInsert::getSRegister(Variable *Src) const {
	Jim Stichnoth 2016/02/03 15:28:37 Same comment as above. Same comment as above. Eric Holk 2016/02/03 21:02:21 Acknowledged. Show quoted text On 2016/02/03 15:28:37, stichnot wrote: > Same comment as above. Acknowledged.
	1020 assert(Src->hasReg());

	1021 auto SrcReg = Src->getRegNum();

	1022

	1023 // For floating point values, we hope we got allocated to Q0 - Q7, so we can
	John 2016/02/03 16:06:51 I would rephrase this. "hoping" conveys the idea t I would rephrase this. "hoping" conveys the idea that we cross our fingers that Src was allocated the right register. also, won't this assertion fail for non-fp types if, e.g., q15 is allocated? (I had to search for the uses of getSRegisters() below to figure out that this will not happen... maybe assert(Src->getType() == v4f32)?) Eric Holk 2016/02/03 21:02:21 I fixed the comment. I wrote that before the highe Show quoted text On 2016/02/03 16:06:51, John wrote: > I would rephrase this. "hoping" conveys the idea that we cross our fingers that > Src was allocated the right register. > > also, won't this assertion fail for non-fp types if, e.g., q15 is allocated? (I > had to search for the uses of getSRegisters() below to figure out that this will > not happen... maybe assert(Src->getType() == v4f32)?) I fixed the comment. I wrote that before the higher up code was actually guaranteeing that invariant. I strengthened the assert too. This function should only be called on the v4f32 version anyway.
	1024 // directly access the value we want as one of the S registers.

	1025 assert(SrcReg < RegARM32::Reg_q8);

	1026

	1027 // This part assumes the register alias list is goes q0, d0, d1, s0, s1, s2,

	1028 // s3.

	1029 assert(Index < 4);

	1030

	1031 return (RegARM32::AllRegisters)RegARM32::RegTable[SrcReg].Aliases[Index + 3];
	John 2016/02/03 16:06:51 I fear what will happen if the alias declaration e I fear what will happen if the alias declaration ever changes. Please leave a TODO(jpp), and describe that we need to make this more resilient to changes in the alias declaration. Eric Holk 2016/02/03 21:02:21 Done. Show quoted text On 2016/02/03 16:06:51, John wrote: > I fear what will happen if the alias declaration ever changes. Please leave a > TODO(jpp), and describe that we need to make this more resilient to changes in > the alias declaration. Done.
	1032 }

	1033

	1034 InstARM32Extract::InstARM32Extract(Cfg Func, Variable Dest, Variable *Src,

	1035 uint32_t Index, CondARM32::Cond Predicate)

	1036 : InstARM32ExtractInsert(Func, Dest, Index, Predicate) {

	1037 addSource(Src);

	1038 }

	1039

	1040 void InstARM32Extract::emit(const Cfg *Func) const {

	1041 auto &Str = Func->getContext()->getStrEmit();

	1042 auto DestTy = getDest()->getType();

	1043

	1044 assert(llvm::isa<Variable>(getSrc(0)));

	1045 auto Src = llvm::dyn_cast<Variable>(getSrc(0));
	Jim Stichnoth 2016/02/03 15:28:37 You can probably just use llvm::cast<> here and re You can probably just use llvm::cast<> here and remove the assert. Eric Holk 2016/02/03 21:02:21 Done. Show quoted text On 2016/02/03 15:28:37, stichnot wrote: > You can probably just use llvm::cast<> here and remove the assert. Done.
	1046

	1047 if (isIntegerType(DestTy)) {

	1048 Str << "\t"

	1049 << "vmov" << getPredicate();

	1050 auto BitSize = typeWidthInBits(DestTy);
	John 2016/02/03 16:06:51 instead of adding this new method, why don't you j instead of adding this new method, why don't you just do auto BitSize = typeWidthInBytes() * CHAR_BIT ? Eric Holk 2016/02/03 21:02:21 Done. Show quoted text On 2016/02/03 16:06:51, John wrote: > instead of adding this new method, why don't you just do > > auto BitSize = typeWidthInBytes() * CHAR_BIT > > ? Done.
	1051 if (BitSize < 32) {

	1052 Str << ".s" << BitSize;

	1053 } else {

	1054 Str << "." << BitSize;

	1055 }

	1056 Str << "\t";

	1057 getDest()->emit(Func);

	1058 Str << ", ";

	1059

	1060 auto VectorSize = typeNumElements(Src->getType());

	1061

	1062 auto SrcReg = getDRegister(Src, VectorSize);

	1063

	1064 Str << RegARM32::RegTable[SrcReg].Name;

	1065 Str << "[" << getDIndex(VectorSize) << "]";

	1066 } else if (isFloatingType(DestTy)) {

	1067 auto SrcReg = getSRegister(Src);

	1068

	1069 Str << "\t"

	1070 << "vmov" << getPredicate() << ".f32"

	1071 << "\t";

	1072 getDest()->emit(Func);

	1073 Str << ", " << RegARM32::RegTable[SrcReg].Name;

	1074 } else {

	1075 assert(false && "Invalid extract type");

	1076 }

	1077 }

	1078

	1079 InstARM32Insert::InstARM32Insert(Cfg Func, Variable Dest, Variable *Src,

	1080 uint32_t Index, CondARM32::Cond Predicate)

	1081 : InstARM32ExtractInsert(Func, Dest, Index, Predicate) {

	1082 addSource(Src);

	1083 }

	1084

	1085 void InstARM32Insert::emit(const Cfg *Func) const {

	1086 auto &Str = Func->getContext()->getStrEmit();

	1087 auto Dest = getDest();

	1088 auto DestTy = getDest()->getType();

	1089

	1090 assert(llvm::isa<Variable>(getSrc(0)));

	1091 auto Src = llvm::dyn_cast<Variable>(getSrc(0));

	1092

	1093 if (isIntegerType(DestTy)) {

	1094 Str << "\t"

	1095 << "vmov" << getPredicate();

	1096 auto BitSize = typeWidthInBits(typeElementType(DestTy));

	1097 Str << "." << BitSize << "\t";

	1098

	1099 auto VectorSize = typeNumElements(DestTy);

	1100 auto DestReg = getDRegister(Dest, VectorSize);

	1101 auto Index = getDIndex(VectorSize);

	1102 Str << RegARM32::RegTable[DestReg].Name;

	1103 Str << "[" << Index << "], ";

	1104 Src->emit(Func);

	1105 } else if (isFloatingType(DestTy)) {

	1106 Str << "\t"

	1107 << "vmov" << getPredicate() << ".f32"

	1108 << "\t";

	1109 auto DestReg = getSRegister(Dest);

	1110 Str << RegARM32::RegTable[DestReg].Name << ", ";

	1111 Src->emit(Func);

	1112 } else {

	1113 assert(false && "Invalid insert type");

	1114 }

	1115 }

	1116

984 template <InstARM32::InstKindARM32 K>	1117 template <InstARM32::InstKindARM32 K>

985 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {	1118 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {

986 emitUsingTextFixup(Func);	1119 emitUsingTextFixup(Func);

987 }	1120 }

988	1121

989 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {	1122 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {

990 assert(getSrcSize() == 2);	1123 assert(getSrcSize() == 2);

991 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();	1124 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();

992 Asm->cmn(getSrc(0), getSrc(1), getPredicate());	1125 Asm->cmn(getSrc(0), getSrc(1), getPredicate());

993 if (Asm->needsTextFixup())	1126 if (Asm->needsTextFixup())

(...skipping 1456 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2450	2583

2451 template class InstARM32FourAddrGPR<InstARM32::Mla>;	2584 template class InstARM32FourAddrGPR<InstARM32::Mla>;

2452 template class InstARM32FourAddrGPR<InstARM32::Mls>;	2585 template class InstARM32FourAddrGPR<InstARM32::Mls>;

2453	2586

2454 template class InstARM32CmpLike<InstARM32::Cmn>;	2587 template class InstARM32CmpLike<InstARM32::Cmn>;

2455 template class InstARM32CmpLike<InstARM32::Cmp>;	2588 template class InstARM32CmpLike<InstARM32::Cmp>;

2456 template class InstARM32CmpLike<InstARM32::Tst>;	2589 template class InstARM32CmpLike<InstARM32::Tst>;

2457	2590

2458 } // end of namespace ARM32	2591 } // end of namespace ARM32

2459 } // end of namespace Ice	2592 } // end of namespace Ice

OLD	NEW

« src/IceInstARM32.h ('K') | « src/IceInstARM32.h ('k') | src/IceRegistersARM32.h » ('j') | src/IceRegistersARM32.h » ('J')