 Chromium Code Reviews
 Chromium Code Reviews Issue 1876083004:
  Subzero. ARM32. Fixes Insert/Extract v(8|16)i1 bug.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
    
  
    Issue 1876083004:
  Subzero. ARM32. Fixes Insert/Extract v(8|16)i1 bug.  (Closed) 
  Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master| OLD | NEW | 
|---|---|
| 1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===// | 1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===// | 
| 2 // | 2 // | 
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator | 
| 4 // | 4 // | 
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source | 
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. | 
| 7 // | 7 // | 
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// | 
| 9 /// | 9 /// | 
| 10 /// \file | 10 /// \file | 
| (...skipping 1090 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1101 // to help ensure that we picked the right D register. | 1101 // to help ensure that we picked the right D register. | 
| 1102 // | 1102 // | 
| 1103 // TODO(jpp): find a way to do this that doesn't rely on ordering of the | 1103 // TODO(jpp): find a way to do this that doesn't rely on ordering of the | 
| 1104 // alias list. | 1104 // alias list. | 
| 1105 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 == | 1105 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 == | 
| 1106 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding); | 1106 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding); | 
| 1107 return static_cast<Register>(SrcEntry.Aliases[2]); | 1107 return static_cast<Register>(SrcEntry.Aliases[2]); | 
| 1108 } | 1108 } | 
| 1109 } | 1109 } | 
| 1110 | 1110 | 
| 1111 constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) { | 1111 uint32_t adjustDIndex(Type Ty, uint32_t DIndex) { | 
| 1112 return (Index < NumElements / 2) ? Index : Index - (NumElements / 2); | 1112 // If Ty is a vector of i1, we may need to adjust DIndex. This is needed | 
| 1113 // because, e.g., the second i1 in a v4i1 is accessed with a | |
| 1114 // | |
| 1115 // vmov.s8 Qd[4], Rn | |
| 1116 switch (Ty) { | |
| 1117 case IceType_v4i1: | |
| 1118 return DIndex * 4; | |
| 1119 case IceType_v8i1: | |
| 1120 return DIndex * 2; | |
| 1121 case IceType_v16i1: | |
| 1122 return DIndex; | |
| 1123 default: | |
| 1124 return DIndex; | |
| 1125 } | |
| 1126 } | |
| 1127 | |
| 1128 uint32_t getDIndex(Type Ty, uint32_t NumElements, uint32_t Index) { | |
| 1129 const uint32_t DIndex = | |
| 1130 (Index < NumElements / 2) ? Index : Index - (NumElements / 2); | |
| 1131 return adjustDIndex(Ty, DIndex); | |
| 1113 } | 1132 } | 
| 1114 | 1133 | 
| 1115 // For floating point values, we can insertelement or extractelement by moving | 1134 // For floating point values, we can insertelement or extractelement by moving | 
| 1116 // directly from an S register. This function finds the right one. | 1135 // directly from an S register. This function finds the right one. | 
| 1117 Register getSRegister(const Variable *Src, uint32_t Index) { | 1136 Register getSRegister(const Variable *Src, uint32_t Index) { | 
| 1118 assert(Src->hasReg()); | 1137 assert(Src->hasReg()); | 
| 1119 const auto SrcReg = Src->getRegNum(); | 1138 const auto SrcReg = Src->getRegNum(); | 
| 1120 | 1139 | 
| 1121 // For floating point values, we need to be allocated to Q0 - Q7, so we can | 1140 // For floating point values, we need to be allocated to Q0 - Q7, so we can | 
| 1122 // directly access the value we want as one of the S registers. | 1141 // directly access the value we want as one of the S registers. | 
| (...skipping 22 matching lines...) Expand all Loading... | |
| 1145 const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT; | 1164 const uint32_t BitSize = typeWidthInBytes(DestTy) * CHAR_BIT; | 
| 1146 if (BitSize < 32) { | 1165 if (BitSize < 32) { | 
| 1147 Str << ".s" << BitSize; | 1166 Str << ".s" << BitSize; | 
| 1148 } else { | 1167 } else { | 
| 1149 Str << "." << BitSize; | 1168 Str << "." << BitSize; | 
| 1150 } | 1169 } | 
| 1151 Str << "\t"; | 1170 Str << "\t"; | 
| 1152 getDest()->emit(Func); | 1171 getDest()->emit(Func); | 
| 1153 Str << ", "; | 1172 Str << ", "; | 
| 1154 | 1173 | 
| 1155 const size_t VectorSize = typeNumElements(Src->getType()); | 1174 const Type SrcTy = Src->getType(); | 
| 1175 const size_t VectorSize = typeNumElements(SrcTy); | |
| 1156 | 1176 | 
| 1157 const Register SrcReg = getDRegister(Src, Index); | 1177 const Register SrcReg = getDRegister(Src, Index); | 
| 1158 | 1178 | 
| 1159 Str << RegARM32::RegTable[SrcReg].Name; | 1179 Str << RegARM32::RegTable[SrcReg].Name; | 
| 1160 Str << "[" << getDIndex(VectorSize, Index) << "]"; | 1180 Str << "[" << getDIndex(SrcTy, VectorSize, Index) << "]"; | 
| 1161 } else if (isFloatingType(DestTy)) { | 1181 } else if (isFloatingType(DestTy)) { | 
| 1162 const Register SrcReg = getSRegister(Src, Index); | 1182 const Register SrcReg = getSRegister(Src, Index); | 
| 1163 | 1183 | 
| 1164 Str << "\t" | 1184 Str << "\t" | 
| 1165 << "vmov" << getPredicate() << ".f32" | 1185 << "vmov" << getPredicate() << ".f32" | 
| 1166 << "\t"; | 1186 << "\t"; | 
| 1167 getDest()->emit(Func); | 1187 getDest()->emit(Func); | 
| 1168 Str << ", " << RegARM32::RegTable[SrcReg].Name; | 1188 Str << ", " << RegARM32::RegTable[SrcReg].Name; | 
| 1169 } else { | 1189 } else { | 
| 1170 assert(false && "Invalid extract type"); | 1190 assert(false && "Invalid extract type"); | 
| 1171 } | 1191 } | 
| 1172 } | 1192 } | 
| 1173 | 1193 | 
| 1174 void InstARM32Extract::emitIAS(const Cfg *Func) const { | 1194 void InstARM32Extract::emitIAS(const Cfg *Func) const { | 
| 1175 const Operand *Dest = getDest(); | 1195 const Operand *Dest = getDest(); | 
| 1176 const Type DestTy = Dest->getType(); | 1196 const Type DestTy = Dest->getType(); | 
| 1177 const Operand *Src = getSrc(0); | 1197 const Operand *Src = getSrc(0); | 
| 1198 const Type SrcTy = Src->getType(); | |
| 1178 assert(isVectorType(Src->getType())); | 1199 assert(isVectorType(Src->getType())); | 
| 1179 assert(DestTy == typeElementType(Src->getType())); | 1200 assert(DestTy == typeElementType(Src->getType())); | 
| 1180 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); | 1201 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); | 
| 1181 if (isIntegerType(DestTy)) { | 1202 if (isIntegerType(DestTy)) { | 
| 1182 Asm->vmovrqi(Dest, Src, Index, getPredicate()); | 1203 Asm->vmovrqi(Dest, Src, adjustDIndex(SrcTy, Index), getPredicate()); | 
| 1183 assert(!Asm->needsTextFixup()); | 1204 assert(!Asm->needsTextFixup()); | 
| 1184 return; | 1205 return; | 
| 1185 } | 1206 } | 
| 1186 assert(isFloatingType(DestTy)); | 1207 assert(isFloatingType(DestTy)); | 
| 1187 Asm->vmovsqi(Dest, Src, Index, getPredicate()); | 1208 Asm->vmovsqi(Dest, Src, Index, getPredicate()); | 
| 1188 assert(!Asm->needsTextFixup()); | 1209 assert(!Asm->needsTextFixup()); | 
| 1189 } | 1210 } | 
| 1190 | 1211 | 
| 1212 namespace { | |
| 1213 Type insertionType(Type Ty) { | |
| 1214 assert(isVectorType(Ty)); | |
| 1215 switch (Ty) { | |
| 1216 case IceType_v4i1: | |
| 1217 return IceType_v4i32; | |
| 1218 case IceType_v8i1: | |
| 1219 return IceType_v8i16; | |
| 1220 case IceType_v16i1: | |
| 1221 return IceType_v16i8; | |
| 1222 default: | |
| 1223 return Ty; | |
| 1224 } | |
| 1225 } | |
| 1226 } // end of anonymous namespace | |
| 1227 | |
| 1191 void InstARM32Insert::emit(const Cfg *Func) const { | 1228 void InstARM32Insert::emit(const Cfg *Func) const { | 
| 1192 Ostream &Str = Func->getContext()->getStrEmit(); | 1229 Ostream &Str = Func->getContext()->getStrEmit(); | 
| 1193 const Variable *Dest = getDest(); | 1230 const Variable *Dest = getDest(); | 
| 1194 const Type DestTy = getDest()->getType(); | |
| 1195 | |
| 1196 const auto *Src = llvm::cast<Variable>(getSrc(0)); | 1231 const auto *Src = llvm::cast<Variable>(getSrc(0)); | 
| 1232 const Type DestTy = insertionType(getDest()->getType()); | |
| 1233 assert(isVectorType(DestTy)); | |
| 1197 | 1234 | 
| 1198 if (isIntegerType(DestTy)) { | 1235 if (isIntegerType(DestTy)) { | 
| 1199 Str << "\t" | 1236 Str << "\t" | 
| 1200 << "vmov" << getPredicate(); | 1237 << "vmov" << getPredicate(); | 
| 1201 const size_t BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT; | 1238 const size_t BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT; | 
| 1202 Str << "." << BitSize << "\t"; | 1239 Str << "." << BitSize << "\t"; | 
| 1203 | 1240 | 
| 1204 const size_t VectorSize = typeNumElements(DestTy); | 1241 const size_t VectorSize = typeNumElements(DestTy); | 
| 1205 const Register DestReg = getDRegister(Dest, Index); | 1242 const Register DestReg = getDRegister(Dest, Index); | 
| 1206 const uint32_t Index = getDIndex(VectorSize, this->Index); | 1243 const uint32_t Index = | 
| 1244 getDIndex(insertionType(DestTy), VectorSize, this->Index); | |
| 1207 Str << RegARM32::RegTable[DestReg].Name; | 1245 Str << RegARM32::RegTable[DestReg].Name; | 
| 1208 Str << "[" << Index << "], "; | 1246 Str << "[" << Index << "], "; | 
| 1209 Src->emit(Func); | 1247 Src->emit(Func); | 
| 1210 } else if (isFloatingType(DestTy)) { | 1248 } else if (isFloatingType(DestTy)) { | 
| 1211 Str << "\t" | 1249 Str << "\t" | 
| 1212 << "vmov" << getPredicate() << ".f32" | 1250 << "vmov" << getPredicate() << ".f32" | 
| 1213 << "\t"; | 1251 << "\t"; | 
| 1214 const Register DestReg = getSRegister(Dest, Index); | 1252 const Register DestReg = getSRegister(Dest, Index); | 
| 1215 Str << RegARM32::RegTable[DestReg].Name << ", "; | 1253 Str << RegARM32::RegTable[DestReg].Name << ", "; | 
| 1216 Src->emit(Func); | 1254 Src->emit(Func); | 
| 1217 } else { | 1255 } else { | 
| 1218 assert(false && "Invalid insert type"); | 1256 assert(false && "Invalid insert type"); | 
| 1219 } | 1257 } | 
| 1220 } | 1258 } | 
| 1221 | 1259 | 
| 1222 void InstARM32Insert::emitIAS(const Cfg *Func) const { | 1260 void InstARM32Insert::emitIAS(const Cfg *Func) const { | 
| 1223 const Variable *Dest = getDest(); | 1261 const Variable *Dest = getDest(); | 
| 1224 const Operand *Src = getSrc(0); | 1262 const auto *Src = llvm::cast<Variable>(getSrc(0)); | 
| 1225 const Type SrcTy = Src->getType(); | 1263 const Type DestTy = insertionType(Dest->getType()); | 
| 1226 assert(isVectorType(Dest->getType())); | 1264 const Type SrcTy = typeElementType(DestTy); | 
| 1227 assert(typeElementType(Dest->getType()) == SrcTy); | 1265 assert(SrcTy == Src->getType() || Src->getType() == IceType_i1); | 
| 1266 assert(isVectorType(DestTy)); | |
| 1228 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); | 1267 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); | 
| 1229 if (isIntegerType(SrcTy)) { | 1268 if (isIntegerType(SrcTy)) { | 
| 1230 const Operand *Src = getSrc(0); | 1269 Asm->vmovqir(Dest->asType(Func, DestTy, Dest->getRegNum()), | 
| 1231 Asm->vmovqir(Dest, Index, Src, getPredicate()); | 1270 adjustDIndex(DestTy, Index), | 
| 1271 Src->asType(Func, SrcTy, Src->getRegNum()), getPredicate()); | |
| 
Eric Holk
2016/04/11 17:34:57
Are we guaranteed that Src has a registers at this
 
John
2016/04/11 18:23:23
yes
 | |
| 1232 assert(!Asm->needsTextFixup()); | 1272 assert(!Asm->needsTextFixup()); | 
| 1233 return; | 1273 return; | 
| 1234 } | 1274 } | 
| 1235 assert(isFloatingType(SrcTy)); | 1275 assert(isFloatingType(SrcTy)); | 
| 1236 Asm->vmovqis(Dest, Index, Src, getPredicate()); | 1276 Asm->vmovqis(Dest, Index, Src, getPredicate()); | 
| 1237 assert(!Asm->needsTextFixup()); | 1277 assert(!Asm->needsTextFixup()); | 
| 1238 } | 1278 } | 
| 1239 | 1279 | 
| 1240 template <InstARM32::InstKindARM32 K> | 1280 template <InstARM32::InstKindARM32 K> | 
| 1241 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const { | 1281 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const { | 
| (...skipping 1545 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2787 | 2827 | 
| 2788 template class InstARM32FourAddrGPR<InstARM32::Mla>; | 2828 template class InstARM32FourAddrGPR<InstARM32::Mla>; | 
| 2789 template class InstARM32FourAddrGPR<InstARM32::Mls>; | 2829 template class InstARM32FourAddrGPR<InstARM32::Mls>; | 
| 2790 | 2830 | 
| 2791 template class InstARM32CmpLike<InstARM32::Cmn>; | 2831 template class InstARM32CmpLike<InstARM32::Cmn>; | 
| 2792 template class InstARM32CmpLike<InstARM32::Cmp>; | 2832 template class InstARM32CmpLike<InstARM32::Cmp>; | 
| 2793 template class InstARM32CmpLike<InstARM32::Tst>; | 2833 template class InstARM32CmpLike<InstARM32::Tst>; | 
| 2794 | 2834 | 
| 2795 } // end of namespace ARM32 | 2835 } // end of namespace ARM32 | 
| 2796 } // end of namespace Ice | 2836 } // end of namespace Ice | 
| OLD | NEW |