src/IceInstARM32.cpp - Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract.

Side by Side Diff: src/IceInstARM32.cpp

Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Code review feedback. Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//	1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the	11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the

12 /// constructors and the dump()/emit() methods.	12 /// constructors and the dump()/emit() methods.

13 ///	13 ///

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15	15

16 #include "IceInstARM32.h"	16 #include "IceInstARM32.h"

17	17

18 #include "IceAssemblerARM32.h"	18 #include "IceAssemblerARM32.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceInst.h"	21 #include "IceInst.h"

22 #include "IceOperand.h"	22 #include "IceOperand.h"

23 #include "IceRegistersARM32.h"

24 #include "IceTargetLoweringARM32.h"	23 #include "IceTargetLoweringARM32.h"

25	24

26 namespace Ice {	25 namespace Ice {

27 namespace ARM32 {	26 namespace ARM32 {

28	27

29 namespace {	28 namespace {

30	29

	30 using Register = RegARM32::AllRegisters;

	31

31 // maximum number of registers allowed in vpush/vpop.	32 // maximum number of registers allowed in vpush/vpop.

32 static constexpr SizeT VpushVpopMaxConsecRegs = 16;	33 static constexpr SizeT VpushVpopMaxConsecRegs = 16;

33	34

34 const struct TypeARM32Attributes_ {	35 const struct TypeARM32Attributes_ {

35 const char *WidthString; // b, h, <blank>, or d	36 const char *WidthString; // b, h, <blank>, or d

36 const char *VecWidthString; // i8, i16, i32, f32, f64	37 const char *VecWidthString; // i8, i16, i32, f32, f64

37 int8_t SExtAddrOffsetBits;	38 int8_t SExtAddrOffsetBits;

38 int8_t ZExtAddrOffsetBits;	39 int8_t ZExtAddrOffsetBits;

39 } TypeARM32Attributes[] = {	40 } TypeARM32Attributes[] = {

40 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \	41 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \

(...skipping 995 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1036 }	1037 }

1037	1038

1038 if (Src64 == nullptr) {	1039 if (Src64 == nullptr) {

1039 addSource(Src);	1040 addSource(Src);

1040 } else {	1041 } else {

1041 addSource(Src64->getLo());	1042 addSource(Src64->getLo());

1042 addSource(Src64->getHi());	1043 addSource(Src64->getHi());

1043 }	1044 }

1044 }	1045 }

1045	1046

	1047 // These next two functions find the D register that maps to the half of the Q

	1048 // register that this instruction is accessing.

	1049 Register getDRegister(const Variable *Src, uint32_t Index) {

	1050 assert(Src->hasReg());

	1051 const auto SrcReg = (Register)Src->getRegNum();

	1052

	1053 const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];

	1054 assert(SrcEntry.IsVec128);

	1055

	1056 const uint32_t NumElements = typeNumElements(Src->getType());

	1057

	1058 // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in

	1059 // the next two branches help to check that this is still true.

	1060 if (Index < NumElements / 2) {

	1061 // We have a Q register that's made up of two D registers. This assert is

	1062 // to help ensure that we picked the right D register.

	1063 //

	1064 // TODO(jpp): find a way to do this that doesn't rely on ordering of the

	1065 // alias list.

	1066 assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==

	1067 RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);

	1068 return (Register)SrcEntry.Aliases[1];

	1069 } else {

	1070 // We have a Q register that's made up of two D registers. This assert is

	1071 // to help ensure that we picked the right D register.

	1072 //

	1073 // TODO(jpp): find a way to do this that doesn't rely on ordering of the

	1074 // alias list.

	1075 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==

	1076 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);

	1077 return (Register)SrcEntry.Aliases[2];

	1078 }

	1079 }

	1080

	1081 constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {

	1082 return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);

	1083 }

	1084

	1085 // For floating point values, we can insertelement or extractelement by moving

	1086 // directly from an S register. This function finds the right one.

	1087 Register getSRegister(const Variable *Src, uint32_t Index) {

	1088 assert(Src->hasReg());

	1089 auto SrcReg = Src->getRegNum();

	1090

	1091 // For floating point values, we need to be allocated to Q0 - Q7, so we can

	1092 // directly access the value we want as one of the S registers.

	1093 assert(Src->getType() == IceType_v4f32 && SrcReg < RegARM32::Reg_q8);

	1094

	1095 // This part assumes the register alias list goes q0, d0, d1, s0, s1, s2, s3.

	1096 assert(Index < 4);

	1097

	1098 // TODO(jpp): find a way to do this that doesn't rely on ordering of the alias

	1099 // list.

	1100 return (Register)RegARM32::RegTable[SrcReg].Aliases[Index + 3];

	1101 }

	1102

	1103 void InstARM32Extract::emit(const Cfg *Func) const {

	1104 auto &Str = Func->getContext()->getStrEmit();

	1105 auto DestTy = getDest()->getType();

	1106

	1107 auto *Src = llvm::cast<Variable>(getSrc(0));

	1108

	1109 if (isIntegerType(DestTy)) {

	1110 Str << "\t"

	1111 << "vmov" << getPredicate();

	1112 const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;

	1113 if (BitSize < 32) {

	1114 Str << ".s" << BitSize;

	1115 } else {

	1116 Str << "." << BitSize;

	1117 }

	1118 Str << "\t";

	1119 getDest()->emit(Func);

	1120 Str << ", ";

	1121

	1122 auto VectorSize = typeNumElements(Src->getType());
	Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think no auto here, I think Eric Holk 2016/02/08 21:24:17 Done. Show quoted text On 2016/02/08 20:13:59, stichnot wrote: > no auto here, I think Done.
	1123

	1124 auto SrcReg = getDRegister(Src, Index);

	1125

	1126 Str << RegARM32::RegTable[SrcReg].Name;

	1127 Str << "[" << getDIndex(VectorSize, Index) << "]";

	1128 } else if (isFloatingType(DestTy)) {

	1129 const auto SrcReg = getSRegister(Src, Index);

	1130

	1131 Str << "\t"

	1132 << "vmov" << getPredicate() << ".f32"

	1133 << "\t";

	1134 getDest()->emit(Func);

	1135 Str << ", " << RegARM32::RegTable[SrcReg].Name;

	1136 } else {

	1137 assert(false && "Invalid extract type");

	1138 }

	1139 }

	1140

	1141 void InstARM32Insert::emit(const Cfg *Func) const {

	1142 Ostream &Str = Func->getContext()->getStrEmit();

	1143 const Variable *Dest = getDest();

	1144 const Type DestTy = getDest()->getType();

	1145

	1146 const auto *Src = llvm::cast<Variable>(getSrc(0));

	1147

	1148 if (isIntegerType(DestTy)) {

	1149 Str << "\t"

	1150 << "vmov" << getPredicate();

	1151 const auto BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
	Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think no auto here, I think Eric Holk 2016/02/08 21:24:17 Done. Show quoted text On 2016/02/08 20:13:59, stichnot wrote: > no auto here, I think Done.
	1152 Str << "." << BitSize << "\t";

	1153

	1154 const auto VectorSize = typeNumElements(DestTy);
	Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think no auto here, I think Eric Holk 2016/02/08 21:24:17 Done. Show quoted text On 2016/02/08 20:13:59, stichnot wrote: > no auto here, I think Done.
	1155 const auto DestReg = getDRegister(Dest, Index);

	1156 const auto Index = getDIndex(VectorSize, this->Index);
	Jim Stichnoth 2016/02/08 20:13:59 not sure about this auto not sure about this auto Eric Holk 2016/02/08 21:24:17 Done. Show quoted text On 2016/02/08 20:13:59, stichnot wrote: > not sure about this auto Done.
	1157 Str << RegARM32::RegTable[DestReg].Name;

	1158 Str << "[" << Index << "], ";

	1159 Src->emit(Func);

	1160 } else if (isFloatingType(DestTy)) {

	1161 Str << "\t"

	1162 << "vmov" << getPredicate() << ".f32"

	1163 << "\t";

	1164 const auto DestReg = getSRegister(Dest, Index);

	1165 Str << RegARM32::RegTable[DestReg].Name << ", ";

	1166 Src->emit(Func);

	1167 } else {

	1168 assert(false && "Invalid insert type");

	1169 }

	1170 }

	1171

1046 template <InstARM32::InstKindARM32 K>	1172 template <InstARM32::InstKindARM32 K>

1047 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {	1173 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {

1048 emitUsingTextFixup(Func);	1174 emitUsingTextFixup(Func);

1049 }	1175 }

1050	1176

1051 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {	1177 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {

1052 assert(getSrcSize() == 2);	1178 assert(getSrcSize() == 2);

1053 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();	1179 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();

1054 Asm->cmn(getSrc(0), getSrc(1), getPredicate());	1180 Asm->cmn(getSrc(0), getSrc(1), getPredicate());

1055 if (Asm->needsTextFixup())	1181 if (Asm->needsTextFixup())

(...skipping 1485 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2541	2667

2542 template class InstARM32FourAddrGPR<InstARM32::Mla>;	2668 template class InstARM32FourAddrGPR<InstARM32::Mla>;

2543 template class InstARM32FourAddrGPR<InstARM32::Mls>;	2669 template class InstARM32FourAddrGPR<InstARM32::Mls>;

2544	2670

2545 template class InstARM32CmpLike<InstARM32::Cmn>;	2671 template class InstARM32CmpLike<InstARM32::Cmn>;

2546 template class InstARM32CmpLike<InstARM32::Cmp>;	2672 template class InstARM32CmpLike<InstARM32::Cmp>;

2547 template class InstARM32CmpLike<InstARM32::Tst>;	2673 template class InstARM32CmpLike<InstARM32::Tst>;

2548	2674

2549 } // end of namespace ARM32	2675 } // end of namespace ARM32

2550 } // end of namespace Ice	2676 } // end of namespace Ice

OLD	NEW

« src/IceInstARM32.h ('K') | « src/IceInstARM32.h ('k') | src/IceRegistersARM32.h » ('j') | src/IceTargetLoweringARM32.cpp » ('J')