Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1313)

Side by Side Diff: src/IceInstARM32.cpp

Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review feedback. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===// 1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the 11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the
12 /// constructors and the dump()/emit() methods. 12 /// constructors and the dump()/emit() methods.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #include "IceInstARM32.h" 16 #include "IceInstARM32.h"
17 17
18 #include "IceAssemblerARM32.h" 18 #include "IceAssemblerARM32.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInst.h" 21 #include "IceInst.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceRegistersARM32.h"
24 #include "IceTargetLoweringARM32.h" 23 #include "IceTargetLoweringARM32.h"
25 24
26 namespace Ice { 25 namespace Ice {
27 namespace ARM32 { 26 namespace ARM32 {
28 27
29 namespace { 28 namespace {
30 29
30 using Register = RegARM32::AllRegisters;
31
31 // maximum number of registers allowed in vpush/vpop. 32 // maximum number of registers allowed in vpush/vpop.
32 static constexpr SizeT VpushVpopMaxConsecRegs = 16; 33 static constexpr SizeT VpushVpopMaxConsecRegs = 16;
33 34
34 const struct TypeARM32Attributes_ { 35 const struct TypeARM32Attributes_ {
35 const char *WidthString; // b, h, <blank>, or d 36 const char *WidthString; // b, h, <blank>, or d
36 const char *VecWidthString; // i8, i16, i32, f32, f64 37 const char *VecWidthString; // i8, i16, i32, f32, f64
37 int8_t SExtAddrOffsetBits; 38 int8_t SExtAddrOffsetBits;
38 int8_t ZExtAddrOffsetBits; 39 int8_t ZExtAddrOffsetBits;
39 } TypeARM32Attributes[] = { 40 } TypeARM32Attributes[] = {
40 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ 41 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
(...skipping 995 matching lines...) Expand 10 before | Expand all | Expand 10 after
1036 } 1037 }
1037 1038
1038 if (Src64 == nullptr) { 1039 if (Src64 == nullptr) {
1039 addSource(Src); 1040 addSource(Src);
1040 } else { 1041 } else {
1041 addSource(Src64->getLo()); 1042 addSource(Src64->getLo());
1042 addSource(Src64->getHi()); 1043 addSource(Src64->getHi());
1043 } 1044 }
1044 } 1045 }
1045 1046
1047 // These next two functions find the D register that maps to the half of the Q
1048 // register that this instruction is accessing.
1049 Register getDRegister(const Variable *Src, uint32_t Index) {
1050 assert(Src->hasReg());
1051 const auto SrcReg = (Register)Src->getRegNum();
1052
1053 const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];
1054 assert(SrcEntry.IsVec128);
1055
1056 const uint32_t NumElements = typeNumElements(Src->getType());
1057
1058 // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
1059 // the next two branches help to check that this is still true.
1060 if (Index < NumElements / 2) {
1061 // We have a Q register that's made up of two D registers. This assert is
1062 // to help ensure that we picked the right D register.
1063 //
1064 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1065 // alias list.
1066 assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==
1067 RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);
1068 return (Register)SrcEntry.Aliases[1];
1069 } else {
1070 // We have a Q register that's made up of two D registers. This assert is
1071 // to help ensure that we picked the right D register.
1072 //
1073 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1074 // alias list.
1075 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==
1076 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);
1077 return (Register)SrcEntry.Aliases[2];
1078 }
1079 }
1080
1081 constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {
1082 return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);
1083 }
1084
1085 // For floating point values, we can insertelement or extractelement by moving
1086 // directly from an S register. This function finds the right one.
1087 Register getSRegister(const Variable *Src, uint32_t Index) {
1088 assert(Src->hasReg());
1089 auto SrcReg = Src->getRegNum();
1090
1091 // For floating point values, we need to be allocated to Q0 - Q7, so we can
1092 // directly access the value we want as one of the S registers.
1093 assert(Src->getType() == IceType_v4f32 && SrcReg < RegARM32::Reg_q8);
1094
1095 // This part assumes the register alias list goes q0, d0, d1, s0, s1, s2, s3.
1096 assert(Index < 4);
1097
1098 // TODO(jpp): find a way to do this that doesn't rely on ordering of the alias
1099 // list.
1100 return (Register)RegARM32::RegTable[SrcReg].Aliases[Index + 3];
1101 }
1102
1103 void InstARM32Extract::emit(const Cfg *Func) const {
1104 auto &Str = Func->getContext()->getStrEmit();
1105 auto DestTy = getDest()->getType();
1106
1107 auto *Src = llvm::cast<Variable>(getSrc(0));
1108
1109 if (isIntegerType(DestTy)) {
1110 Str << "\t"
1111 << "vmov" << getPredicate();
1112 const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;
1113 if (BitSize < 32) {
1114 Str << ".s" << BitSize;
1115 } else {
1116 Str << "." << BitSize;
1117 }
1118 Str << "\t";
1119 getDest()->emit(Func);
1120 Str << ", ";
1121
1122 auto VectorSize = typeNumElements(Src->getType());
Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think
Eric Holk 2016/02/08 21:24:17 Done.
1123
1124 auto SrcReg = getDRegister(Src, Index);
1125
1126 Str << RegARM32::RegTable[SrcReg].Name;
1127 Str << "[" << getDIndex(VectorSize, Index) << "]";
1128 } else if (isFloatingType(DestTy)) {
1129 const auto SrcReg = getSRegister(Src, Index);
1130
1131 Str << "\t"
1132 << "vmov" << getPredicate() << ".f32"
1133 << "\t";
1134 getDest()->emit(Func);
1135 Str << ", " << RegARM32::RegTable[SrcReg].Name;
1136 } else {
1137 assert(false && "Invalid extract type");
1138 }
1139 }
1140
1141 void InstARM32Insert::emit(const Cfg *Func) const {
1142 Ostream &Str = Func->getContext()->getStrEmit();
1143 const Variable *Dest = getDest();
1144 const Type DestTy = getDest()->getType();
1145
1146 const auto *Src = llvm::cast<Variable>(getSrc(0));
1147
1148 if (isIntegerType(DestTy)) {
1149 Str << "\t"
1150 << "vmov" << getPredicate();
1151 const auto BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think
Eric Holk 2016/02/08 21:24:17 Done.
1152 Str << "." << BitSize << "\t";
1153
1154 const auto VectorSize = typeNumElements(DestTy);
Jim Stichnoth 2016/02/08 20:13:59 no auto here, I think
Eric Holk 2016/02/08 21:24:17 Done.
1155 const auto DestReg = getDRegister(Dest, Index);
1156 const auto Index = getDIndex(VectorSize, this->Index);
Jim Stichnoth 2016/02/08 20:13:59 not sure about this auto
Eric Holk 2016/02/08 21:24:17 Done.
1157 Str << RegARM32::RegTable[DestReg].Name;
1158 Str << "[" << Index << "], ";
1159 Src->emit(Func);
1160 } else if (isFloatingType(DestTy)) {
1161 Str << "\t"
1162 << "vmov" << getPredicate() << ".f32"
1163 << "\t";
1164 const auto DestReg = getSRegister(Dest, Index);
1165 Str << RegARM32::RegTable[DestReg].Name << ", ";
1166 Src->emit(Func);
1167 } else {
1168 assert(false && "Invalid insert type");
1169 }
1170 }
1171
1046 template <InstARM32::InstKindARM32 K> 1172 template <InstARM32::InstKindARM32 K>
1047 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const { 1173 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
1048 emitUsingTextFixup(Func); 1174 emitUsingTextFixup(Func);
1049 } 1175 }
1050 1176
1051 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const { 1177 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {
1052 assert(getSrcSize() == 2); 1178 assert(getSrcSize() == 2);
1053 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); 1179 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
1054 Asm->cmn(getSrc(0), getSrc(1), getPredicate()); 1180 Asm->cmn(getSrc(0), getSrc(1), getPredicate());
1055 if (Asm->needsTextFixup()) 1181 if (Asm->needsTextFixup())
(...skipping 1485 matching lines...) Expand 10 before | Expand all | Expand 10 after
2541 2667
2542 template class InstARM32FourAddrGPR<InstARM32::Mla>; 2668 template class InstARM32FourAddrGPR<InstARM32::Mla>;
2543 template class InstARM32FourAddrGPR<InstARM32::Mls>; 2669 template class InstARM32FourAddrGPR<InstARM32::Mls>;
2544 2670
2545 template class InstARM32CmpLike<InstARM32::Cmn>; 2671 template class InstARM32CmpLike<InstARM32::Cmn>;
2546 template class InstARM32CmpLike<InstARM32::Cmp>; 2672 template class InstARM32CmpLike<InstARM32::Cmp>;
2547 template class InstARM32CmpLike<InstARM32::Tst>; 2673 template class InstARM32CmpLike<InstARM32::Tst>;
2548 2674
2549 } // end of namespace ARM32 2675 } // end of namespace ARM32
2550 } // end of namespace Ice 2676 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698