Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(621)

Side by Side Diff: src/IceInstARM32.cpp

Issue 1655313002: Subzero: ARM32: lowering of vector insert and extract. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Removing incorrect use of rematerializable check" Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===// 1 //===- subzero/src/IceInstARM32.cpp - ARM32 instruction implementation ----===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the 11 /// \brief Implements the InstARM32 and OperandARM32 classes, primarily the
12 /// constructors and the dump()/emit() methods. 12 /// constructors and the dump()/emit() methods.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #include "IceInstARM32.h" 16 #include "IceInstARM32.h"
17 17
18 #include "IceAssemblerARM32.h" 18 #include "IceAssemblerARM32.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInst.h" 21 #include "IceInst.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceRegistersARM32.h"
24 #include "IceTargetLoweringARM32.h" 23 #include "IceTargetLoweringARM32.h"
25 24
26 namespace Ice { 25 namespace Ice {
27 namespace ARM32 { 26 namespace ARM32 {
28 27
29 namespace { 28 namespace {
30 29
30 using Register = RegARM32::AllRegisters;
31
31 // maximum number of registers allowed in vpush/vpop. 32 // maximum number of registers allowed in vpush/vpop.
32 static constexpr SizeT VpushVpopMaxConsecRegs = 16; 33 static constexpr SizeT VpushVpopMaxConsecRegs = 16;
33 34
34 const struct TypeARM32Attributes_ { 35 const struct TypeARM32Attributes_ {
35 const char *WidthString; // b, h, <blank>, or d 36 const char *WidthString; // b, h, <blank>, or d
36 const char *VecWidthString; // i8, i16, i32, f32, f64 37 const char *VecWidthString; // i8, i16, i32, f32, f64
37 int8_t SExtAddrOffsetBits; 38 int8_t SExtAddrOffsetBits;
38 int8_t ZExtAddrOffsetBits; 39 int8_t ZExtAddrOffsetBits;
39 } TypeARM32Attributes[] = { 40 } TypeARM32Attributes[] = {
40 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ 41 #define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
(...skipping 933 matching lines...) Expand 10 before | Expand all | Expand 10 after
974 } 975 }
975 976
976 if (Src64 == nullptr) { 977 if (Src64 == nullptr) {
977 addSource(Src); 978 addSource(Src);
978 } else { 979 } else {
979 addSource(Src64->getLo()); 980 addSource(Src64->getLo());
980 addSource(Src64->getHi()); 981 addSource(Src64->getHi());
981 } 982 }
982 } 983 }
983 984
985 // These next two functions find the D register that maps to the half of the Q
986 // register that this instruction is accessing.
987 Register getDRegister(const Variable *Src, uint32_t Index) {
988 assert(Src->hasReg());
989 const auto SrcReg = (Register)Src->getRegNum();
990
991 const RegARM32::RegTableType &SrcEntry = RegARM32::RegTable[SrcReg];
992 assert(SrcEntry.IsVec128);
993
994 const uint32_t NumElements = typeNumElements(Src->getType());
995
996 // This code assumes the Aliases list goes Q_n, S_2n, S_2n+1. The asserts in
997 // the next two branches help to check that this is still true.
998 if (Index < NumElements / 2) {
999 // We have a Q register that's made up of two D registers. This assert is
1000 // to help ensure that we picked the right D register.
1001 //
1002 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1003 // alias list.
1004 assert(RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding + 1 ==
1005 RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding);
1006 return (Register)SrcEntry.Aliases[1];
1007 } else {
1008 // We have a Q register that's made up of two D registers. This assert is
1009 // to help ensure that we picked the right D register.
1010 //
1011 // TODO(jpp): find a way to do this that doesn't rely on ordering of the
1012 // alias list.
1013 assert(RegARM32::RegTable[SrcEntry.Aliases[2]].Encoding - 1 ==
1014 RegARM32::RegTable[SrcEntry.Aliases[1]].Encoding);
1015 return (Register)SrcEntry.Aliases[2];
1016 }
1017 }
1018
1019 constexpr uint32_t getDIndex(uint32_t NumElements, uint32_t Index) {
1020 return (Index < NumElements / 2) ? Index : Index - (NumElements / 2);
1021 }
1022
1023 // For floating point values, we can insertelement or extractelement by moving
1024 // directly from an S register. This function finds the right one.
1025 Register getSRegister(const Variable *Src, uint32_t Index) {
1026 assert(Src->hasReg());
1027 auto SrcReg = Src->getRegNum();
1028
1029 // For floating point values, we need to be allocated to Q0 - Q7, so we can
1030 // directly access the value we want as one of the S registers.
1031 assert(Src->getType() == IceType_v4f32 && SrcReg < RegARM32::Reg_q8);
1032
1033 // This part assumes the register alias list is goes q0, d0, d1, s0, s1, s2,
1034 // s3.
1035 assert(Index < 4);
1036
1037 // TODO(jpp): find a way to do this that doesn't rely on ordering of the alias
1038 // list.
1039 return (Register)RegARM32::RegTable[SrcReg].Aliases[Index + 3];
1040 }
1041
1042 void InstARM32Extract::emit(const Cfg *Func) const {
1043 auto &Str = Func->getContext()->getStrEmit();
1044 auto DestTy = getDest()->getType();
1045
1046 auto Src = llvm::dyn_cast<Variable>(getSrc(0));
Jim Stichnoth 2016/02/05 13:27:20 auto *
Eric Holk 2016/02/05 17:52:01 Done.
1047
1048 if (isIntegerType(DestTy)) {
1049 Str << "\t"
1050 << "vmov" << getPredicate();
1051 auto BitSize = typeWidthInBytes(DestTy) * CHAR_BIT;
1052 if (BitSize < 32) {
1053 Str << ".s" << BitSize;
1054 } else {
1055 Str << "." << BitSize;
1056 }
1057 Str << "\t";
1058 getDest()->emit(Func);
1059 Str << ", ";
1060
1061 auto VectorSize = typeNumElements(Src->getType());
1062
1063 auto SrcReg = getDRegister(Src, Index);
1064
1065 Str << RegARM32::RegTable[SrcReg].Name;
1066 Str << "[" << getDIndex(VectorSize, Index) << "]";
1067 } else if (isFloatingType(DestTy)) {
1068 const auto SrcReg = getSRegister(Src, Index);
1069
1070 Str << "\t"
1071 << "vmov" << getPredicate() << ".f32"
1072 << "\t";
1073 getDest()->emit(Func);
1074 Str << ", " << RegARM32::RegTable[SrcReg].Name;
1075 } else {
1076 assert(false && "Invalid extract type");
1077 }
1078 }
1079
1080 void InstARM32Insert::emit(const Cfg *Func) const {
1081 Ostream &Str = Func->getContext()->getStrEmit();
1082 const Variable *Dest = getDest();
1083 const Type DestTy = getDest()->getType();
1084
1085 assert(llvm::isa<Variable>(getSrc(0)));
1086 const auto Src = llvm::dyn_cast<Variable>(getSrc(0));
Jim Stichnoth 2016/02/05 13:27:20 auto *
Eric Holk 2016/02/05 17:52:01 Done.
1087
1088 if (isIntegerType(DestTy)) {
1089 Str << "\t"
1090 << "vmov" << getPredicate();
1091 const auto BitSize = typeWidthInBytes(typeElementType(DestTy)) * CHAR_BIT;
1092 Str << "." << BitSize << "\t";
1093
1094 const auto VectorSize = typeNumElements(DestTy);
1095 const auto DestReg = getDRegister(Dest, Index);
1096 const auto Index = getDIndex(VectorSize, this->Index);
1097 Str << RegARM32::RegTable[DestReg].Name;
1098 Str << "[" << Index << "], ";
1099 Src->emit(Func);
1100 } else if (isFloatingType(DestTy)) {
1101 Str << "\t"
1102 << "vmov" << getPredicate() << ".f32"
1103 << "\t";
1104 const auto DestReg = getSRegister(Dest, Index);
1105 Str << RegARM32::RegTable[DestReg].Name << ", ";
1106 Src->emit(Func);
1107 } else {
1108 assert(false && "Invalid insert type");
1109 }
1110 }
1111
984 template <InstARM32::InstKindARM32 K> 1112 template <InstARM32::InstKindARM32 K>
985 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const { 1113 void InstARM32CmpLike<K>::emitIAS(const Cfg *Func) const {
986 emitUsingTextFixup(Func); 1114 emitUsingTextFixup(Func);
987 } 1115 }
988 1116
989 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const { 1117 template <> void InstARM32Cmn::emitIAS(const Cfg *Func) const {
990 assert(getSrcSize() == 2); 1118 assert(getSrcSize() == 2);
991 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); 1119 auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
992 Asm->cmn(getSrc(0), getSrc(1), getPredicate()); 1120 Asm->cmn(getSrc(0), getSrc(1), getPredicate());
993 if (Asm->needsTextFixup()) 1121 if (Asm->needsTextFixup())
(...skipping 1456 matching lines...) Expand 10 before | Expand all | Expand 10 after
2450 2578
2451 template class InstARM32FourAddrGPR<InstARM32::Mla>; 2579 template class InstARM32FourAddrGPR<InstARM32::Mla>;
2452 template class InstARM32FourAddrGPR<InstARM32::Mls>; 2580 template class InstARM32FourAddrGPR<InstARM32::Mls>;
2453 2581
2454 template class InstARM32CmpLike<InstARM32::Cmn>; 2582 template class InstARM32CmpLike<InstARM32::Cmn>;
2455 template class InstARM32CmpLike<InstARM32::Cmp>; 2583 template class InstARM32CmpLike<InstARM32::Cmp>;
2456 template class InstARM32CmpLike<InstARM32::Tst>; 2584 template class InstARM32CmpLike<InstARM32::Tst>;
2457 2585
2458 } // end of namespace ARM32 2586 } // end of namespace ARM32
2459 } // end of namespace Ice 2587 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceInstARM32.h ('k') | src/IceRegistersARM32.h » ('j') | src/IceTargetLoweringARM32.cpp » ('J')

Powered by Google App Engine
This is Rietveld 408576698