Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(482)

Side by Side Diff: src/IceInstARM32.cpp

Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixes Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceInstARM32.h ('k') | src/IceRegistersARM32.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===// 1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the 11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the
12 /// constructors and the dump()/emit() methods. 12 /// constructors and the dump()/emit() methods.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #include "IceInstARM32.h" 16 #include "IceInstARM32.h"
17 17
18 #include "IceAssemblerARM32.h" 18 #include "IceAssemblerARM32.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInst.h" 21 #include "IceInst.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceRegistersARM32.h"
24 #include "IceTargetLoweringARM32.h" 23 #include "IceTargetLoweringARM32.h"
25 24
26 namespace Ice { 25 namespace Ice {
27 namespace ARM32 { 26 namespace ARM32 {
28 27
29 namespace { 28 namespace {
30 29
30 using Register = RegARM32::AllRegisters;
31
31 // maximum number of registers allowed in vpush/vpop. 32 // maximum number of registers allowed in vpush/vpop.
32 static constexpr SizeT VpushVpopMaxConsecRegs = 16; 33 static constexpr SizeT VpushVpopMaxConsecRegs = 16;
33 34
34 const struct TypeARM32Attributes_ { 35 const struct TypeARM32Attributes_ {
35 const char *WidthString; // b, h, <blank>, or d 36 const char *WidthString; // b, h, <blank>, or d
36 const char *VecWidthString; // i8, i16, i32, f32, f64 37 const char *VecWidthString; // i8, i16, i32, f32, f64
37 int8_t SExtAddrOffsetBits; 38 int8_t SExtAddrOffsetBits;
38 int8_t ZExtAddrOffsetBits; 39 int8_t ZExtAddrOffsetBits;
39 } TypeARM32Attributes[] = { 40 } TypeARM32Attributes[] = {
40 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ 41 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
(...skipping 995 matching lines...) Expand 10 before | Expand all | Expand 10 after
1036 } 1037 }
1037 1038
1038 if (Src64 == nullptr) { 1039 if (Src64 == nullptr) {
1039 addSource(Src); 1040 addSource(Src);
1040 } else { 1041 } else {
1041 addSource(Src64->getLo()); 1042 addSource(Src64->getLo());
1042 addSource(Src64->getHi()); 1043 addSource(Src64->getHi());
1043 } 1044 }
1044 } 1045 }
1045 1046
1047 // These next two functions find the D register that maps to the half of the Q
1048 // register that this instruction is accessing.
1049 Register getDRegister(const Variable *Src, uint32_t Index) {
1050 assert(Src->hasReg());
1051 const auto SrcReg = static_cast<Register>(Src->getRegNum());
1052
1053 const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];
1054 assert(SrcEntry.IsVec128);
1055
1056 const uint32_t NumElements = typeNumElements(Src->getType());
1057
1058 // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
1059 // the next two branches help to check that this is still true.
1060 if (Index < NumElements / 2) {
1061 // We have a Q register that's made up of two D registers. This assert is
1062 // to help ensure that we picked the right D register.
1063 //
1064 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1065 // alias list.
1066 assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==
1067 RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);
1068 return static_cast<Register>(SrcEntry.Aliases[1]);
1069 } else {
1070 // We have a Q register that's made up of two D registers. This assert is
1071 // to help ensure that we picked the right D register.
1072 //
1073 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1074 // alias list.
1075 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==
1076 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);
1077 return static_cast<Register>(SrcEntry.Aliases[2]);
1078 }
1079 }
1080
1081 constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {
1082 return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);
1083 }
1084
1085 // For floating point values, we can insertelement or extractelement by moving
1086 // directly from an S register. This function finds the right one.
1087 Register getSRegister(const Variable *Src, uint32_t Index) {
1088 assert(Src->hasReg());
1089 const auto SrcReg = static_cast<Register>(Src->getRegNum());
1090
1091 // For floating point values, we need to be allocated to Q0 - Q7, so we can
1092 // directly access the value we want as one of the S registers.
1093 assert(Src->getType() == IceType_v4f32);
1094 assert(SrcReg < RegARM32::Reg_q8);
1095
1096 // This part assumes the register alias list goes q0, d0, d1, s0, s1, s2, s3.
1097 assert(Index < 4);
1098
1099 // TODO(jpp): find a way to do this that doesn't rely on ordering of the alias
1100 // list.
1101 return static_cast<Register>(RegARM32::RegTable[SrcReg].Aliases[Index + 3]);
1102 }
1103
1104 void InstARM32Extract::emit(const Cfg *Func) const {
1105 Ostream &Str = Func->getContext()->getStrEmit();
1106 const Type DestTy = getDest()->getType();
1107
1108 const auto *Src = llvm::cast<Variable>(getSrc(0));
1109
1110 if (isIntegerType(DestTy)) {
1111 Str << "\t"
1112 << "vmov" << getPredicate();
1113 const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;
1114 if (BitSize < 32) {
1115 Str << ".s" << BitSize;
1116 } else {
1117 Str << "." << BitSize;
1118 }
1119 Str << "\t";
1120 getDest()->emit(Func);
1121 Str << ", ";
1122
1123 const size_t VectorSize = typeNumElements(Src->getType());
1124
1125 const Register SrcReg = getDRegister(Src, Index);
1126
1127 Str << RegARM32::RegTable[SrcReg].Name;
1128 Str << "[" << getDIndex(VectorSize, Index) << "]";
1129 } else if (isFloatingType(DestTy)) {
1130 const Register SrcReg = getSRegister(Src, Index);
1131
1132 Str << "\t"
1133 << "vmov" << getPredicate() << ".f32"
1134 << "\t";
1135 getDest()->emit(Func);
1136 Str << ", " << RegARM32::RegTable[SrcReg].Name;
1137 } else {
1138 assert(false && "Invalid extract type");
1139 }
1140 }
1141
1142 void InstARM32Insert::emit(const Cfg *Func) const {
1143 Ostream &Str = Func->getContext()->getStrEmit();
1144 const Variable *Dest = getDest();
1145 const Type DestTy = getDest()->getType();
1146
1147 const auto *Src = llvm::cast<Variable>(getSrc(0));
1148
1149 if (isIntegerType(DestTy)) {
1150 Str << "\t"
1151 << "vmov" << getPredicate();
1152 const size_t BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
1153 Str << "." << BitSize << "\t";
1154
1155 const size_t VectorSize = typeNumElements(DestTy);
1156 const Register DestReg = getDRegister(Dest, Index);
1157 const uint32_t Index = getDIndex(VectorSize, this->Index);
1158 Str << RegARM32::RegTable[DestReg].Name;
1159 Str << "[" << Index << "], ";
1160 Src->emit(Func);
1161 } else if (isFloatingType(DestTy)) {
1162 Str << "\t"
1163 << "vmov" << getPredicate() << ".f32"
1164 << "\t";
1165 const Register DestReg = getSRegister(Dest, Index);
1166 Str << RegARM32::RegTable[DestReg].Name << ", ";
1167 Src->emit(Func);
1168 } else {
1169 assert(false && "Invalid insert type");
1170 }
1171 }
1172
1046 template <InstARM32::InstKindARM32 K> 1173 template <InstARM32::InstKindARM32 K>
1047 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const { 1174 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
1048 emitUsingTextFixup(Func); 1175 emitUsingTextFixup(Func);
1049 } 1176 }
1050 1177
1051 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const { 1178 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {
1052 assert(getSrcSize() == 2); 1179 assert(getSrcSize() == 2);
1053 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); 1180 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
1054 Asm->cmn(getSrc(0), getSrc(1), getPredicate()); 1181 Asm->cmn(getSrc(0), getSrc(1), getPredicate());
1055 if (Asm->needsTextFixup()) 1182 if (Asm->needsTextFixup())
(...skipping 1485 matching lines...) Expand 10 before | Expand all | Expand 10 after
2541 2668
2542 template class InstARM32FourAddrGPR<InstARM32::Mla>; 2669 template class InstARM32FourAddrGPR<InstARM32::Mla>;
2543 template class InstARM32FourAddrGPR<InstARM32::Mls>; 2670 template class InstARM32FourAddrGPR<InstARM32::Mls>;
2544 2671
2545 template class InstARM32CmpLike<InstARM32::Cmn>; 2672 template class InstARM32CmpLike<InstARM32::Cmn>;
2546 template class InstARM32CmpLike<InstARM32::Cmp>; 2673 template class InstARM32CmpLike<InstARM32::Cmp>;
2547 template class InstARM32CmpLike<InstARM32::Tst>; 2674 template class InstARM32CmpLike<InstARM32::Tst>;
2548 2675
2549 } // end of namespace ARM32 2676 } // end of namespace ARM32
2550 } // end of namespace Ice 2677 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceInstARM32.h ('k') | src/IceRegistersARM32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698