Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(260)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 569033002: Split ConstantInteger into ConstantInteger32 and ConstantInteger64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: rebase Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 582 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 } 593 }
594 if (isVectorType(Ty)) { 594 if (isVectorType(Ty)) {
595 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 595 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
596 } 596 }
597 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 597 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
598 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 598 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
599 if (Arg->hasReg()) { 599 if (Arg->hasReg()) {
600 assert(Ty != IceType_i64); 600 assert(Ty != IceType_i64);
601 OperandX8632Mem *Mem = OperandX8632Mem::create( 601 OperandX8632Mem *Mem = OperandX8632Mem::create(
602 Func, Ty, FramePtr, 602 Func, Ty, FramePtr,
603 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); 603 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
604 if (isVectorType(Arg->getType())) { 604 if (isVectorType(Arg->getType())) {
605 _movp(Arg, Mem); 605 _movp(Arg, Mem);
606 } else { 606 } else {
607 _mov(Arg, Mem); 607 _mov(Arg, Mem);
608 } 608 }
609 } 609 }
610 } 610 }
611 611
612 Type TargetX8632::stackSlotType() { return IceType_i32; } 612 Type TargetX8632::stackSlotType() { return IceType_i32; }
613 613
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 // Align esp if necessary. 792 // Align esp if necessary.
793 if (NeedsStackAlignment) { 793 if (NeedsStackAlignment) {
794 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 794 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
795 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 795 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
796 SpillAreaSizeBytes = StackSize - StackOffset; 796 SpillAreaSizeBytes = StackSize - StackOffset;
797 } 797 }
798 798
799 // Generate "sub esp, SpillAreaSizeBytes" 799 // Generate "sub esp, SpillAreaSizeBytes"
800 if (SpillAreaSizeBytes) 800 if (SpillAreaSizeBytes)
801 _sub(getPhysicalRegister(Reg_esp), 801 _sub(getPhysicalRegister(Reg_esp),
802 Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes)); 802 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
803 803
804 resetStackAdjustment(); 804 resetStackAdjustment();
805 805
806 // Fill in stack offsets for stack args, and copy args into registers 806 // Fill in stack offsets for stack args, and copy args into registers
807 // for those that were register-allocated. Args are pushed right to 807 // for those that were register-allocated. Args are pushed right to
808 // left, so Arg[0] is closest to the stack/frame pointer. 808 // left, so Arg[0] is closest to the stack/frame pointer.
809 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 809 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
810 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 810 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
811 if (!IsEbpBasedFrame) 811 if (!IsEbpBasedFrame)
812 BasicFrameOffset += SpillAreaSizeBytes; 812 BasicFrameOffset += SpillAreaSizeBytes;
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
908 Context.setInsertPoint(InsertPoint); 908 Context.setInsertPoint(InsertPoint);
909 909
910 Variable *esp = getPhysicalRegister(Reg_esp); 910 Variable *esp = getPhysicalRegister(Reg_esp);
911 if (IsEbpBasedFrame) { 911 if (IsEbpBasedFrame) {
912 Variable *ebp = getPhysicalRegister(Reg_ebp); 912 Variable *ebp = getPhysicalRegister(Reg_ebp);
913 _mov(esp, ebp); 913 _mov(esp, ebp);
914 _pop(ebp); 914 _pop(ebp);
915 } else { 915 } else {
916 // add esp, SpillAreaSizeBytes 916 // add esp, SpillAreaSizeBytes
917 if (SpillAreaSizeBytes) 917 if (SpillAreaSizeBytes)
918 _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes)); 918 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
919 } 919 }
920 920
921 // Add pop instructions for preserved registers. 921 // Add pop instructions for preserved registers.
922 llvm::SmallBitVector CalleeSaves = 922 llvm::SmallBitVector CalleeSaves =
923 getRegisterSet(RegSet_CalleeSave, RegSet_None); 923 getRegisterSet(RegSet_CalleeSave, RegSet_None);
924 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 924 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
925 SizeT j = CalleeSaves.size() - i - 1; 925 SizeT j = CalleeSaves.size() - i - 1;
926 if (j == Reg_ebp && IsEbpBasedFrame) 926 if (j == Reg_ebp && IsEbpBasedFrame)
927 continue; 927 continue;
928 if (CalleeSaves[j] && RegsUsed[j]) { 928 if (CalleeSaves[j] && RegsUsed[j]) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1026 } 1026 }
1027 1027
1028 Operand *TargetX8632::loOperand(Operand *Operand) { 1028 Operand *TargetX8632::loOperand(Operand *Operand) {
1029 assert(Operand->getType() == IceType_i64); 1029 assert(Operand->getType() == IceType_i64);
1030 if (Operand->getType() != IceType_i64) 1030 if (Operand->getType() != IceType_i64)
1031 return Operand; 1031 return Operand;
1032 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1032 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1033 split64(Var); 1033 split64(Var);
1034 return Var->getLo(); 1034 return Var->getLo();
1035 } 1035 }
1036 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { 1036 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1037 uint64_t Mask = (1ull << 32) - 1; 1037 return Ctx->getConstantInt32(IceType_i32,
1038 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask); 1038 static_cast<uint32_t>(Const->getValue()));
1039 } 1039 }
1040 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1040 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1041 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), 1041 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1042 Mem->getOffset(), Mem->getIndex(), 1042 Mem->getOffset(), Mem->getIndex(),
1043 Mem->getShift(), Mem->getSegmentRegister()); 1043 Mem->getShift(), Mem->getSegmentRegister());
1044 } 1044 }
1045 llvm_unreachable("Unsupported operand type"); 1045 llvm_unreachable("Unsupported operand type");
1046 return NULL; 1046 return NULL;
1047 } 1047 }
1048 1048
1049 Operand *TargetX8632::hiOperand(Operand *Operand) { 1049 Operand *TargetX8632::hiOperand(Operand *Operand) {
1050 assert(Operand->getType() == IceType_i64); 1050 assert(Operand->getType() == IceType_i64);
1051 if (Operand->getType() != IceType_i64) 1051 if (Operand->getType() != IceType_i64)
1052 return Operand; 1052 return Operand;
1053 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1053 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1054 split64(Var); 1054 split64(Var);
1055 return Var->getHi(); 1055 return Var->getHi();
1056 } 1056 }
1057 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { 1057 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1058 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32); 1058 return Ctx->getConstantInt32(
1059 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));
1059 } 1060 }
1060 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1061 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1061 Constant *Offset = Mem->getOffset(); 1062 Constant *Offset = Mem->getOffset();
1062 if (Offset == NULL) 1063 if (Offset == NULL)
1063 Offset = Ctx->getConstantInt(IceType_i32, 4); 1064 Offset = Ctx->getConstantInt32(IceType_i32, 4);
1064 else if (ConstantInteger *IntOffset = 1065 else if (ConstantInteger32 *IntOffset =
1065 llvm::dyn_cast<ConstantInteger>(Offset)) { 1066 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1066 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue()); 1067 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());
1067 } else if (ConstantRelocatable *SymOffset = 1068 } else if (ConstantRelocatable *SymOffset =
1068 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1069 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1069 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), 1070 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
1070 SymOffset->getName()); 1071 SymOffset->getName());
1071 } 1072 }
1072 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, 1073 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
1073 Mem->getIndex(), Mem->getShift(), 1074 Mem->getIndex(), Mem->getShift(),
1074 Mem->getSegmentRegister()); 1075 Mem->getSegmentRegister());
1075 } 1076 }
1076 llvm_unreachable("Unsupported operand type"); 1077 llvm_unreachable("Unsupported operand type");
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
1124 // For default align=0, set it to the real value 1, to avoid any 1125 // For default align=0, set it to the real value 1, to avoid any
1125 // bit-manipulation problems below. 1126 // bit-manipulation problems below.
1126 AlignmentParam = std::max(AlignmentParam, 1u); 1127 AlignmentParam = std::max(AlignmentParam, 1u);
1127 1128
1128 // LLVM enforces power of 2 alignment. 1129 // LLVM enforces power of 2 alignment.
1129 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1130 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1130 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1131 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1131 1132
1132 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1133 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1133 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1134 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
1134 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1135 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));
1135 } 1136 }
1136 if (ConstantInteger *ConstantTotalSize = 1137 if (ConstantInteger32 *ConstantTotalSize =
1137 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1138 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1138 uint32_t Value = ConstantTotalSize->getValue(); 1139 uint32_t Value = ConstantTotalSize->getValue();
1139 Value = applyAlignment(Value, Alignment); 1140 Value = applyAlignment(Value, Alignment);
1140 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1141 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));
1141 } else { 1142 } else {
1142 // Non-constant sizes need to be adjusted to the next highest 1143 // Non-constant sizes need to be adjusted to the next highest
1143 // multiple of the required alignment at runtime. 1144 // multiple of the required alignment at runtime.
1144 Variable *T = makeReg(IceType_i32); 1145 Variable *T = makeReg(IceType_i32);
1145 _mov(T, TotalSize); 1146 _mov(T, TotalSize);
1146 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1147 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));
1147 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1148 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));
1148 _sub(esp, T); 1149 _sub(esp, T);
1149 } 1150 }
1150 _mov(Dest, esp); 1151 _mov(Dest, esp);
1151 } 1152 }
1152 1153
1153 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1154 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1154 Variable *Dest = Inst->getDest(); 1155 Variable *Dest = Inst->getDest();
1155 Operand *Src0 = legalize(Inst->getSrc(0)); 1156 Operand *Src0 = legalize(Inst->getSrc(0));
1156 Operand *Src1 = legalize(Inst->getSrc(1)); 1157 Operand *Src1 = legalize(Inst->getSrc(1));
1157 if (Dest->getType() == IceType_i64) { 1158 if (Dest->getType() == IceType_i64) {
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1247 // t2 = shl t2, t1 1248 // t2 = shl t2, t1
1248 // test t1, 0x20 1249 // test t1, 0x20
1249 // je L1 1250 // je L1
1250 // use(t3) 1251 // use(t3)
1251 // t3 = t2 1252 // t3 = t2
1252 // t2 = 0 1253 // t2 = 0
1253 // L1: 1254 // L1:
1254 // a.lo = t2 1255 // a.lo = t2
1255 // a.hi = t3 1256 // a.hi = t3
1256 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1257 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1257 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1258 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1258 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1259 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1259 InstX8632Label *Label = InstX8632Label::create(Func, this); 1260 InstX8632Label *Label = InstX8632Label::create(Func, this);
1260 _mov(T_1, Src1Lo, Reg_ecx); 1261 _mov(T_1, Src1Lo, Reg_ecx);
1261 _mov(T_2, Src0Lo); 1262 _mov(T_2, Src0Lo);
1262 _mov(T_3, Src0Hi); 1263 _mov(T_3, Src0Hi);
1263 _shld(T_3, T_2, T_1); 1264 _shld(T_3, T_2, T_1);
1264 _shl(T_2, T_1); 1265 _shl(T_2, T_1);
1265 _test(T_1, BitTest); 1266 _test(T_1, BitTest);
1266 _br(InstX8632Br::Br_e, Label); 1267 _br(InstX8632Br::Br_e, Label);
1267 // Because of the intra-block control flow, we need to fake a use 1268 // Because of the intra-block control flow, we need to fake a use
(...skipping 15 matching lines...) Expand all
1283 // t3 = shr t3, t1 1284 // t3 = shr t3, t1
1284 // test t1, 0x20 1285 // test t1, 0x20
1285 // je L1 1286 // je L1
1286 // use(t2) 1287 // use(t2)
1287 // t2 = t3 1288 // t2 = t3
1288 // t3 = 0 1289 // t3 = 0
1289 // L1: 1290 // L1:
1290 // a.lo = t2 1291 // a.lo = t2
1291 // a.hi = t3 1292 // a.hi = t3
1292 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1293 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1293 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1294 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1294 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1295 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1295 InstX8632Label *Label = InstX8632Label::create(Func, this); 1296 InstX8632Label *Label = InstX8632Label::create(Func, this);
1296 _mov(T_1, Src1Lo, Reg_ecx); 1297 _mov(T_1, Src1Lo, Reg_ecx);
1297 _mov(T_2, Src0Lo); 1298 _mov(T_2, Src0Lo);
1298 _mov(T_3, Src0Hi); 1299 _mov(T_3, Src0Hi);
1299 _shrd(T_2, T_3, T_1); 1300 _shrd(T_2, T_3, T_1);
1300 _shr(T_3, T_1); 1301 _shr(T_3, T_1);
1301 _test(T_1, BitTest); 1302 _test(T_1, BitTest);
1302 _br(InstX8632Br::Br_e, Label); 1303 _br(InstX8632Br::Br_e, Label);
1303 // Because of the intra-block control flow, we need to fake a use 1304 // Because of the intra-block control flow, we need to fake a use
(...skipping 15 matching lines...) Expand all
1319 // t3 = sar t3, t1 1320 // t3 = sar t3, t1
1320 // test t1, 0x20 1321 // test t1, 0x20
1321 // je L1 1322 // je L1
1322 // use(t2) 1323 // use(t2)
1323 // t2 = t3 1324 // t2 = t3
1324 // t3 = sar t3, 0x1f 1325 // t3 = sar t3, 0x1f
1325 // L1: 1326 // L1:
1326 // a.lo = t2 1327 // a.lo = t2
1327 // a.hi = t3 1328 // a.hi = t3
1328 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1329 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1329 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1330 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1330 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f); 1331 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);
1331 InstX8632Label *Label = InstX8632Label::create(Func, this); 1332 InstX8632Label *Label = InstX8632Label::create(Func, this);
1332 _mov(T_1, Src1Lo, Reg_ecx); 1333 _mov(T_1, Src1Lo, Reg_ecx);
1333 _mov(T_2, Src0Lo); 1334 _mov(T_2, Src0Lo);
1334 _mov(T_3, Src0Hi); 1335 _mov(T_3, Src0Hi);
1335 _shrd(T_2, T_3, T_1); 1336 _shrd(T_2, T_3, T_1);
1336 _sar(T_3, T_1); 1337 _sar(T_3, T_1);
1337 _test(T_1, BitTest); 1338 _test(T_1, BitTest);
1338 _br(InstX8632Br::Br_e, Label); 1339 _br(InstX8632Br::Br_e, Label);
1339 // Because of the intra-block control flow, we need to fake a use 1340 // Because of the intra-block control flow, we need to fake a use
1340 // of T_3 to prevent its earlier definition from being dead-code 1341 // of T_3 to prevent its earlier definition from being dead-code
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
1441 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1442 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1442 // pmuludq T2, T3 1443 // pmuludq T2, T3
1443 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1444 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1444 // shufps T1, T2, {0,2,0,2} 1445 // shufps T1, T2, {0,2,0,2}
1445 // pshufd T4, T1, {0,2,1,3} 1446 // pshufd T4, T1, {0,2,1,3}
1446 // movups Dest, T4 1447 // movups Dest, T4
1447 1448
1448 // Mask that directs pshufd to create a vector with entries 1449 // Mask that directs pshufd to create a vector with entries
1449 // Src[1, 0, 3, 0] 1450 // Src[1, 0, 3, 0]
1450 const unsigned Constant1030 = 0x31; 1451 const unsigned Constant1030 = 0x31;
1451 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); 1452 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);
1452 // Mask that directs shufps to create a vector with entries 1453 // Mask that directs shufps to create a vector with entries
1453 // Dest[0, 2], Src[0, 2] 1454 // Dest[0, 2], Src[0, 2]
1454 const unsigned Mask0202 = 0x88; 1455 const unsigned Mask0202 = 0x88;
1455 // Mask that directs pshufd to create a vector with entries 1456 // Mask that directs pshufd to create a vector with entries
1456 // Src[0, 2, 1, 3] 1457 // Src[0, 2, 1, 3]
1457 const unsigned Mask0213 = 0xd8; 1458 const unsigned Mask0213 = 0xd8;
1458 Variable *T1 = makeReg(IceType_v4i32); 1459 Variable *T1 = makeReg(IceType_v4i32);
1459 Variable *T2 = makeReg(IceType_v4i32); 1460 Variable *T2 = makeReg(IceType_v4i32);
1460 Variable *T3 = makeReg(IceType_v4i32); 1461 Variable *T3 = makeReg(IceType_v4i32);
1461 Variable *T4 = makeReg(IceType_v4i32); 1462 Variable *T4 = makeReg(IceType_v4i32);
1462 _movp(T1, Src0); 1463 _movp(T1, Src0);
1463 _pshufd(T2, Src0, Mask1030); 1464 _pshufd(T2, Src0, Mask1030);
1464 _pshufd(T3, Src1, Mask1030); 1465 _pshufd(T3, Src1, Mask1030);
1465 _pmuludq(T1, Src1); 1466 _pmuludq(T1, Src1);
1466 _pmuludq(T2, T3); 1467 _pmuludq(T2, T3);
1467 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1468 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));
1468 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1469 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));
1469 _movp(Dest, T4); 1470 _movp(Dest, T4);
1470 } else { 1471 } else {
1471 assert(Dest->getType() == IceType_v16i8); 1472 assert(Dest->getType() == IceType_v16i8);
1472 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1473 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1473 } 1474 }
1474 } break; 1475 } break;
1475 case InstArithmetic::Shl: 1476 case InstArithmetic::Shl:
1476 case InstArithmetic::Lshr: 1477 case InstArithmetic::Lshr:
1477 case InstArithmetic::Ashr: 1478 case InstArithmetic::Ashr:
1478 case InstArithmetic::Udiv: 1479 case InstArithmetic::Udiv:
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
1751 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || 1752 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
1752 Ty == IceType_f64 || isVectorType(Ty)); 1753 Ty == IceType_f64 || isVectorType(Ty));
1753 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1754 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1754 XmmArgs.push_back(Arg); 1755 XmmArgs.push_back(Arg);
1755 } else { 1756 } else {
1756 StackArgs.push_back(Arg); 1757 StackArgs.push_back(Arg);
1757 if (isVectorType(Arg->getType())) { 1758 if (isVectorType(Arg->getType())) {
1758 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1759 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1759 } 1760 }
1760 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1761 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1761 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); 1762 Constant *Loc =
1763 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
1762 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 1764 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1763 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1765 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1764 } 1766 }
1765 } 1767 }
1766 1768
1767 // Adjust the parameter area so that the stack is aligned. It is 1769 // Adjust the parameter area so that the stack is aligned. It is
1768 // assumed that the stack is already aligned at the start of the 1770 // assumed that the stack is already aligned at the start of the
1769 // calling sequence. 1771 // calling sequence.
1770 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1772 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1771 1773
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
1848 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); 1850 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
1849 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1851 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1850 Context.insert(NewCall); 1852 Context.insert(NewCall);
1851 if (ReturnRegHi) 1853 if (ReturnRegHi)
1852 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1854 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1853 1855
1854 // Add the appropriate offset to esp. The call instruction takes care 1856 // Add the appropriate offset to esp. The call instruction takes care
1855 // of resetting the stack offset during emission. 1857 // of resetting the stack offset during emission.
1856 if (ParameterAreaSizeBytes) { 1858 if (ParameterAreaSizeBytes) {
1857 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1859 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1858 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); 1860 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));
1859 } 1861 }
1860 1862
1861 // Insert a register-kill pseudo instruction. 1863 // Insert a register-kill pseudo instruction.
1862 VarList KilledRegs; 1864 VarList KilledRegs;
1863 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { 1865 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1864 if (ScratchRegs[i]) 1866 if (ScratchRegs[i])
1865 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); 1867 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1866 } 1868 }
1867 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); 1869 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1868 1870
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
1933 Variable *T = makeReg(DestTy); 1935 Variable *T = makeReg(DestTy);
1934 _movp(T, Src0RM); 1936 _movp(T, Src0RM);
1935 _pand(T, OneMask); 1937 _pand(T, OneMask);
1936 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1938 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1937 _pcmpgt(T, Zeros); 1939 _pcmpgt(T, Zeros);
1938 _movp(Dest, T); 1940 _movp(Dest, T);
1939 } else { 1941 } else {
1940 // width = width(elty) - 1; dest = (src << width) >> width 1942 // width = width(elty) - 1; dest = (src << width) >> width
1941 SizeT ShiftAmount = 1943 SizeT ShiftAmount =
1942 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 1944 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1943 Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount); 1945 Constant *ShiftConstant =
1946 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
1944 Variable *T = makeReg(DestTy); 1947 Variable *T = makeReg(DestTy);
1945 _movp(T, Src0RM); 1948 _movp(T, Src0RM);
1946 _psll(T, ShiftConstant); 1949 _psll(T, ShiftConstant);
1947 _psra(T, ShiftConstant); 1950 _psra(T, ShiftConstant);
1948 _movp(Dest, T); 1951 _movp(Dest, T);
1949 } 1952 }
1950 } else if (Dest->getType() == IceType_i64) { 1953 } else if (Dest->getType() == IceType_i64) {
1951 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1954 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1952 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31); 1955 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);
1953 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1956 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1954 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1957 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1955 Variable *T_Lo = makeReg(DestLo->getType()); 1958 Variable *T_Lo = makeReg(DestLo->getType());
1956 if (Src0RM->getType() == IceType_i32) { 1959 if (Src0RM->getType() == IceType_i32) {
1957 _mov(T_Lo, Src0RM); 1960 _mov(T_Lo, Src0RM);
1958 } else if (Src0RM->getType() == IceType_i1) { 1961 } else if (Src0RM->getType() == IceType_i1) {
1959 _movzx(T_Lo, Src0RM); 1962 _movzx(T_Lo, Src0RM);
1960 _shl(T_Lo, Shift); 1963 _shl(T_Lo, Shift);
1961 _sar(T_Lo, Shift); 1964 _sar(T_Lo, Shift);
1962 } else { 1965 } else {
1963 _movsx(T_Lo, Src0RM); 1966 _movsx(T_Lo, Src0RM);
1964 } 1967 }
1965 _mov(DestLo, T_Lo); 1968 _mov(DestLo, T_Lo);
1966 Variable *T_Hi = NULL; 1969 Variable *T_Hi = NULL;
1967 _mov(T_Hi, T_Lo); 1970 _mov(T_Hi, T_Lo);
1968 if (Src0RM->getType() != IceType_i1) 1971 if (Src0RM->getType() != IceType_i1)
1969 // For i1, the sar instruction is already done above. 1972 // For i1, the sar instruction is already done above.
1970 _sar(T_Hi, Shift); 1973 _sar(T_Hi, Shift);
1971 _mov(DestHi, T_Hi); 1974 _mov(DestHi, T_Hi);
1972 } else if (Src0RM->getType() == IceType_i1) { 1975 } else if (Src0RM->getType() == IceType_i1) {
1973 // t1 = src 1976 // t1 = src
1974 // shl t1, dst_bitwidth - 1 1977 // shl t1, dst_bitwidth - 1
1975 // sar t1, dst_bitwidth - 1 1978 // sar t1, dst_bitwidth - 1
1976 // dst = t1 1979 // dst = t1
1977 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 1980 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
1978 Constant *ShiftAmount = Ctx->getConstantInt(IceType_i32, DestBits - 1); 1981 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);
1979 Variable *T = makeReg(Dest->getType()); 1982 Variable *T = makeReg(Dest->getType());
1980 if (typeWidthInBytes(Dest->getType()) <= 1983 if (typeWidthInBytes(Dest->getType()) <=
1981 typeWidthInBytes(Src0RM->getType())) { 1984 typeWidthInBytes(Src0RM->getType())) {
1982 _mov(T, Src0RM); 1985 _mov(T, Src0RM);
1983 } else { 1986 } else {
1984 // Widen the source using movsx or movzx. (It doesn't matter 1987 // Widen the source using movsx or movzx. (It doesn't matter
1985 // which one, since the following shl/sar overwrite the bits.) 1988 // which one, since the following shl/sar overwrite the bits.)
1986 _movzx(T, Src0RM); 1989 _movzx(T, Src0RM);
1987 } 1990 }
1988 _shl(T, ShiftAmount); 1991 _shl(T, ShiftAmount);
(...skipping 22 matching lines...) Expand all
2011 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2014 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2012 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2015 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2013 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2016 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2014 Variable *Tmp = makeReg(DestLo->getType()); 2017 Variable *Tmp = makeReg(DestLo->getType());
2015 if (Src0RM->getType() == IceType_i32) { 2018 if (Src0RM->getType() == IceType_i32) {
2016 _mov(Tmp, Src0RM); 2019 _mov(Tmp, Src0RM);
2017 } else { 2020 } else {
2018 _movzx(Tmp, Src0RM); 2021 _movzx(Tmp, Src0RM);
2019 } 2022 }
2020 if (Src0RM->getType() == IceType_i1) { 2023 if (Src0RM->getType() == IceType_i1) {
2021 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2024 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2022 _and(Tmp, One); 2025 _and(Tmp, One);
2023 } 2026 }
2024 _mov(DestLo, Tmp); 2027 _mov(DestLo, Tmp);
2025 _mov(DestHi, Zero); 2028 _mov(DestHi, Zero);
2026 } else if (Src0RM->getType() == IceType_i1) { 2029 } else if (Src0RM->getType() == IceType_i1) {
2027 // t = Src0RM; t &= 1; Dest = t 2030 // t = Src0RM; t &= 1; Dest = t
2028 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2031 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2029 Variable *T = makeReg(IceType_i32); 2032 Variable *T = makeReg(IceType_i32);
2030 _movzx(T, Src0RM); 2033 _movzx(T, Src0RM);
2031 _and(T, One); 2034 _and(T, One);
2032 _mov(Dest, T); 2035 _mov(Dest, T);
2033 } else { 2036 } else {
2034 // t1 = movzx src; dst = t1 2037 // t1 = movzx src; dst = t1
2035 Variable *T = makeReg(Dest->getType()); 2038 Variable *T = makeReg(Dest->getType());
2036 _movzx(T, Src0RM); 2039 _movzx(T, Src0RM);
2037 _mov(Dest, T); 2040 _mov(Dest, T);
2038 } 2041 }
(...skipping 11 matching lines...) Expand all
2050 _movp(Dest, T); 2053 _movp(Dest, T);
2051 } else { 2054 } else {
2052 Operand *Src0 = Inst->getSrc(0); 2055 Operand *Src0 = Inst->getSrc(0);
2053 if (Src0->getType() == IceType_i64) 2056 if (Src0->getType() == IceType_i64)
2054 Src0 = loOperand(Src0); 2057 Src0 = loOperand(Src0);
2055 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2058 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2056 // t1 = trunc Src0RM; Dest = t1 2059 // t1 = trunc Src0RM; Dest = t1
2057 Variable *T = NULL; 2060 Variable *T = NULL;
2058 _mov(T, Src0RM); 2061 _mov(T, Src0RM);
2059 if (Dest->getType() == IceType_i1) 2062 if (Dest->getType() == IceType_i1)
2060 _and(T, Ctx->getConstantInt(IceType_i1, 1)); 2063 _and(T, Ctx->getConstantInt32(IceType_i1, 1));
2061 _mov(Dest, T); 2064 _mov(Dest, T);
2062 } 2065 }
2063 break; 2066 break;
2064 } 2067 }
2065 case InstCast::Fptrunc: 2068 case InstCast::Fptrunc:
2066 case InstCast::Fpext: { 2069 case InstCast::Fpext: {
2067 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2070 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2068 // t1 = cvt Src0RM; Dest = t1 2071 // t1 = cvt Src0RM; Dest = t1
2069 Variable *T = makeReg(Dest->getType()); 2072 Variable *T = makeReg(Dest->getType());
2070 _cvt(T, Src0RM); 2073 _cvt(T, Src0RM);
(...skipping 24 matching lines...) Expand all
2095 Call->addArg(Inst->getSrc(0)); 2098 Call->addArg(Inst->getSrc(0));
2096 lowerCall(Call); 2099 lowerCall(Call);
2097 } else { 2100 } else {
2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2102 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2100 Variable *T_1 = makeReg(IceType_i32); 2103 Variable *T_1 = makeReg(IceType_i32);
2101 Variable *T_2 = makeReg(Dest->getType()); 2104 Variable *T_2 = makeReg(Dest->getType());
2102 _cvtt(T_1, Src0RM); 2105 _cvtt(T_1, Src0RM);
2103 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2106 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2104 if (Dest->getType() == IceType_i1) 2107 if (Dest->getType() == IceType_i1)
2105 _and(T_2, Ctx->getConstantInt(IceType_i1, 1)); 2108 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
2106 _mov(Dest, T_2); 2109 _mov(Dest, T_2);
2107 T_2->setPreferredRegister(T_1, true); 2110 T_2->setPreferredRegister(T_1, true);
2108 } 2111 }
2109 break; 2112 break;
2110 case InstCast::Fptoui: 2113 case InstCast::Fptoui:
2111 if (isVectorType(Dest->getType())) { 2114 if (isVectorType(Dest->getType())) {
2112 assert(Dest->getType() == IceType_v4i32 && 2115 assert(Dest->getType() == IceType_v4i32 &&
2113 Inst->getSrc(0)->getType() == IceType_v4f32); 2116 Inst->getSrc(0)->getType() == IceType_v4f32);
2114 const SizeT MaxSrcs = 1; 2117 const SizeT MaxSrcs = 1;
2115 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); 2118 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
(...skipping 16 matching lines...) Expand all
2132 lowerCall(Call); 2135 lowerCall(Call);
2133 return; 2136 return;
2134 } else { 2137 } else {
2135 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2138 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2136 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2139 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2137 Variable *T_1 = makeReg(IceType_i32); 2140 Variable *T_1 = makeReg(IceType_i32);
2138 Variable *T_2 = makeReg(Dest->getType()); 2141 Variable *T_2 = makeReg(Dest->getType());
2139 _cvtt(T_1, Src0RM); 2142 _cvtt(T_1, Src0RM);
2140 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2143 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2141 if (Dest->getType() == IceType_i1) 2144 if (Dest->getType() == IceType_i1)
2142 _and(T_2, Ctx->getConstantInt(IceType_i1, 1)); 2145 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
2143 _mov(Dest, T_2); 2146 _mov(Dest, T_2);
2144 T_2->setPreferredRegister(T_1, true); 2147 T_2->setPreferredRegister(T_1, true);
2145 } 2148 }
2146 break; 2149 break;
2147 case InstCast::Sitofp: 2150 case InstCast::Sitofp:
2148 if (isVectorType(Dest->getType())) { 2151 if (isVectorType(Dest->getType())) {
2149 assert(Dest->getType() == IceType_v4f32 && 2152 assert(Dest->getType() == IceType_v4f32 &&
2150 Inst->getSrc(0)->getType() == IceType_v4i32); 2153 Inst->getSrc(0)->getType() == IceType_v4i32);
2151 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2154 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2152 Variable *T = makeReg(Dest->getType()); 2155 Variable *T = makeReg(Dest->getType());
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
2343 _movp(Dest, legalizeToVar(Src0)); 2346 _movp(Dest, legalizeToVar(Src0));
2344 } break; 2347 } break;
2345 } 2348 }
2346 break; 2349 break;
2347 } 2350 }
2348 } 2351 }
2349 } 2352 }
2350 2353
2351 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { 2354 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
2352 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2355 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2353 ConstantInteger *ElementIndex = 2356 ConstantInteger32 *ElementIndex =
2354 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1)); 2357 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2355 // Only constant indices are allowed in PNaCl IR. 2358 // Only constant indices are allowed in PNaCl IR.
2356 assert(ElementIndex); 2359 assert(ElementIndex);
2357 2360
2358 unsigned Index = ElementIndex->getValue(); 2361 unsigned Index = ElementIndex->getValue();
2359 Type Ty = SourceVectNotLegalized->getType(); 2362 Type Ty = SourceVectNotLegalized->getType();
2360 Type ElementTy = typeElementType(Ty); 2363 Type ElementTy = typeElementType(Ty);
2361 Type InVectorElementTy = getInVectorElementType(Ty); 2364 Type InVectorElementTy = getInVectorElementType(Ty);
2362 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2365 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2363 2366
2364 // TODO(wala): Determine the best lowering sequences for each type. 2367 // TODO(wala): Determine the best lowering sequences for each type.
2365 bool CanUsePextr = 2368 bool CanUsePextr =
2366 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2369 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2367 if (CanUsePextr && Ty != IceType_v4f32) { 2370 if (CanUsePextr && Ty != IceType_v4f32) {
2368 // Use pextrb, pextrw, or pextrd. 2371 // Use pextrb, pextrw, or pextrd.
2369 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2372 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
2370 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2373 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2371 _pextr(ExtractedElementR, SourceVectR, Mask); 2374 _pextr(ExtractedElementR, SourceVectR, Mask);
2372 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2375 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2373 // Use pshufd and movd/movss. 2376 // Use pshufd and movd/movss.
2374 Variable *T = NULL; 2377 Variable *T = NULL;
2375 if (Index) { 2378 if (Index) {
2376 // The shuffle only needs to occur if the element to be extracted 2379 // The shuffle only needs to occur if the element to be extracted
2377 // is not at the lowest index. 2380 // is not at the lowest index.
2378 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2381 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
2379 T = makeReg(Ty); 2382 T = makeReg(Ty);
2380 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); 2383 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2381 } else { 2384 } else {
2382 T = legalizeToVar(SourceVectNotLegalized); 2385 T = legalizeToVar(SourceVectNotLegalized);
2383 } 2386 }
2384 2387
2385 if (InVectorElementTy == IceType_i32) { 2388 if (InVectorElementTy == IceType_i32) {
2386 _movd(ExtractedElementR, T); 2389 _movd(ExtractedElementR, T);
2387 } else { // Ty == Icetype_f32 2390 } else { // Ty == Icetype_f32
2388 // TODO(wala): _movss is only used here because _mov does not 2391 // TODO(wala): _movss is only used here because _mov does not
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
2507 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); 2510 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
2508 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); 2511 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
2509 if (HasC1) { 2512 if (HasC1) {
2510 Src0 = legalize(Src0); 2513 Src0 = legalize(Src0);
2511 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2514 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2512 Variable *T = NULL; 2515 Variable *T = NULL;
2513 _mov(T, Src0); 2516 _mov(T, Src0);
2514 _ucomiss(T, Src1RM); 2517 _ucomiss(T, Src1RM);
2515 } 2518 }
2516 Constant *Default = 2519 Constant *Default =
2517 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default); 2520 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
2518 _mov(Dest, Default); 2521 _mov(Dest, Default);
2519 if (HasC1) { 2522 if (HasC1) {
2520 InstX8632Label *Label = InstX8632Label::create(Func, this); 2523 InstX8632Label *Label = InstX8632Label::create(Func, this);
2521 _br(TableFcmp[Index].C1, Label); 2524 _br(TableFcmp[Index].C1, Label);
2522 if (HasC2) { 2525 if (HasC2) {
2523 _br(TableFcmp[Index].C2, Label); 2526 _br(TableFcmp[Index].C2, Label);
2524 } 2527 }
2525 Context.insert(InstFakeUse::create(Func, Dest)); 2528 Context.insert(InstFakeUse::create(Func, Dest));
2526 Constant *NonDefault = 2529 Constant *NonDefault =
2527 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default); 2530 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
2528 _mov(Dest, NonDefault); 2531 _mov(Dest, NonDefault);
2529 Context.insert(Label); 2532 Context.insert(Label);
2530 } 2533 }
2531 } 2534 }
2532 2535
2533 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2536 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2534 Operand *Src0 = legalize(Inst->getSrc(0)); 2537 Operand *Src0 = legalize(Inst->getSrc(0));
2535 Operand *Src1 = legalize(Inst->getSrc(1)); 2538 Operand *Src1 = legalize(Inst->getSrc(1));
2536 Variable *Dest = Inst->getDest(); 2539 Variable *Dest = Inst->getDest();
2537 2540
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
2658 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), 2661 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2659 NextBr->getTargetFalse()); 2662 NextBr->getTargetFalse());
2660 // Skip over the following branch instruction. 2663 // Skip over the following branch instruction.
2661 Context.advanceNext(); 2664 Context.advanceNext();
2662 return; 2665 return;
2663 } 2666 }
2664 } 2667 }
2665 2668
2666 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2669 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2667 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2670 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2668 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2671 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2669 if (Src0->getType() == IceType_i64) { 2672 if (Src0->getType() == IceType_i64) {
2670 InstIcmp::ICond Condition = Inst->getCondition(); 2673 InstIcmp::ICond Condition = Inst->getCondition();
2671 size_t Index = static_cast<size_t>(Condition); 2674 size_t Index = static_cast<size_t>(Condition);
2672 assert(Index < TableIcmp64Size); 2675 assert(Index < TableIcmp64Size);
2673 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2676 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2674 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2677 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2675 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2678 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2676 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2679 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2677 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { 2680 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2678 InstX8632Label *Label = InstX8632Label::create(Func, this); 2681 InstX8632Label *Label = InstX8632Label::create(Func, this);
(...skipping 30 matching lines...) Expand all
2709 _mov(Dest, One); 2712 _mov(Dest, One);
2710 _br(getIcmp32Mapping(Inst->getCondition()), Label); 2713 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2711 Context.insert(InstFakeUse::create(Func, Dest)); 2714 Context.insert(InstFakeUse::create(Func, Dest));
2712 _mov(Dest, Zero); 2715 _mov(Dest, Zero);
2713 Context.insert(Label); 2716 Context.insert(Label);
2714 } 2717 }
2715 2718
2716 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 2719 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2717 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2720 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2718 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 2721 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2719 ConstantInteger *ElementIndex = 2722 ConstantInteger32 *ElementIndex =
2720 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); 2723 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2721 // Only constant indices are allowed in PNaCl IR. 2724 // Only constant indices are allowed in PNaCl IR.
2722 assert(ElementIndex); 2725 assert(ElementIndex);
2723 unsigned Index = ElementIndex->getValue(); 2726 unsigned Index = ElementIndex->getValue();
2724 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2727 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2725 2728
2726 Type Ty = SourceVectNotLegalized->getType(); 2729 Type Ty = SourceVectNotLegalized->getType();
2727 Type ElementTy = typeElementType(Ty); 2730 Type ElementTy = typeElementType(Ty);
2728 Type InVectorElementTy = getInVectorElementType(Ty); 2731 Type InVectorElementTy = getInVectorElementType(Ty);
2729 2732
2730 if (ElementTy == IceType_i1) { 2733 if (ElementTy == IceType_i1) {
2731 // Expand the element to the appropriate size for it to be inserted 2734 // Expand the element to the appropriate size for it to be inserted
2732 // in the vector. 2735 // in the vector.
2733 Variable *Expanded = 2736 Variable *Expanded =
2734 Func->makeVariable(InVectorElementTy, Context.getNode()); 2737 Func->makeVariable(InVectorElementTy, Context.getNode());
2735 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 2738 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2736 ElementToInsertNotLegalized); 2739 ElementToInsertNotLegalized);
2737 lowerCast(Cast); 2740 lowerCast(Cast);
2738 ElementToInsertNotLegalized = Expanded; 2741 ElementToInsertNotLegalized = Expanded;
2739 } 2742 }
2740 2743
2741 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 2744 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2742 // Use insertps, pinsrb, pinsrw, or pinsrd. 2745 // Use insertps, pinsrb, pinsrw, or pinsrd.
2743 Operand *ElementRM = 2746 Operand *ElementRM =
2744 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 2747 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2745 Operand *SourceVectRM = 2748 Operand *SourceVectRM =
2746 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2749 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2747 Variable *T = makeReg(Ty); 2750 Variable *T = makeReg(Ty);
2748 _movp(T, SourceVectRM); 2751 _movp(T, SourceVectRM);
2749 if (Ty == IceType_v4f32) 2752 if (Ty == IceType_v4f32)
2750 _insertps(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index << 4)); 2753 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));
2751 else 2754 else
2752 _pinsr(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index)); 2755 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));
2753 _movp(Inst->getDest(), T); 2756 _movp(Inst->getDest(), T);
2754 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2757 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2755 // Use shufps or movss. 2758 // Use shufps or movss.
2756 Variable *ElementR = NULL; 2759 Variable *ElementR = NULL;
2757 Operand *SourceVectRM = 2760 Operand *SourceVectRM =
2758 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2761 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2759 2762
2760 if (InVectorElementTy == IceType_f32) { 2763 if (InVectorElementTy == IceType_f32) {
2761 // ElementR will be in an XMM register since it is floating point. 2764 // ElementR will be in an XMM register since it is floating point.
2762 ElementR = legalizeToVar(ElementToInsertNotLegalized); 2765 ElementR = legalizeToVar(ElementToInsertNotLegalized);
(...skipping 30 matching lines...) Expand all
2793 // ElementR := ElementR[0, 0] T[0, 3] 2796 // ElementR := ElementR[0, 0] T[0, 3]
2794 // T := T[0, 1] ElementR[0, 3] 2797 // T := T[0, 1] ElementR[0, 3]
2795 // 2798 //
2796 // insertelement into index 3 (result is stored in T): 2799 // insertelement into index 3 (result is stored in T):
2797 // T := SourceVectRM 2800 // T := SourceVectRM
2798 // ElementR := ElementR[0, 0] T[0, 2] 2801 // ElementR := ElementR[0, 0] T[0, 2]
2799 // T := T[0, 1] ElementR[3, 0] 2802 // T := T[0, 1] ElementR[3, 0]
2800 const unsigned char Mask1[3] = {0, 192, 128}; 2803 const unsigned char Mask1[3] = {0, 192, 128};
2801 const unsigned char Mask2[3] = {227, 196, 52}; 2804 const unsigned char Mask2[3] = {227, 196, 52};
2802 2805
2803 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2806 Constant *Mask1Constant =
2804 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2807 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);
2808 Constant *Mask2Constant =
2809 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
2805 2810
2806 if (Index == 1) { 2811 if (Index == 1) {
2807 _shufps(ElementR, SourceVectRM, Mask1Constant); 2812 _shufps(ElementR, SourceVectRM, Mask1Constant);
2808 _shufps(ElementR, SourceVectRM, Mask2Constant); 2813 _shufps(ElementR, SourceVectRM, Mask2Constant);
2809 _movp(Inst->getDest(), ElementR); 2814 _movp(Inst->getDest(), ElementR);
2810 } else { 2815 } else {
2811 Variable *T = makeReg(Ty); 2816 Variable *T = makeReg(Ty);
2812 _movp(T, SourceVectRM); 2817 _movp(T, SourceVectRM);
2813 _shufps(ElementR, T, Mask1Constant); 2818 _shufps(ElementR, T, Mask1Constant);
2814 _shufps(T, ElementR, Mask2Constant); 2819 _shufps(T, ElementR, Mask2Constant);
(...skipping 19 matching lines...) Expand all
2834 Variable *T = makeReg(Ty); 2839 Variable *T = makeReg(Ty);
2835 _movp(T, Slot); 2840 _movp(T, Slot);
2836 _movp(Inst->getDest(), T); 2841 _movp(Inst->getDest(), T);
2837 } 2842 }
2838 } 2843 }
2839 2844
2840 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2845 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2841 switch (Instr->getIntrinsicInfo().ID) { 2846 switch (Instr->getIntrinsicInfo().ID) {
2842 case Intrinsics::AtomicCmpxchg: { 2847 case Intrinsics::AtomicCmpxchg: {
2843 if (!Intrinsics::VerifyMemoryOrder( 2848 if (!Intrinsics::VerifyMemoryOrder(
2844 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2849 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
2845 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 2850 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2846 return; 2851 return;
2847 } 2852 }
2848 if (!Intrinsics::VerifyMemoryOrder( 2853 if (!Intrinsics::VerifyMemoryOrder(
2849 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2854 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
2850 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 2855 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2851 return; 2856 return;
2852 } 2857 }
2853 Variable *DestPrev = Instr->getDest(); 2858 Variable *DestPrev = Instr->getDest();
2854 Operand *PtrToMem = Instr->getArg(0); 2859 Operand *PtrToMem = Instr->getArg(0);
2855 Operand *Expected = Instr->getArg(1); 2860 Operand *Expected = Instr->getArg(1);
2856 Operand *Desired = Instr->getArg(2); 2861 Operand *Desired = Instr->getArg(2);
2857 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) 2862 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2858 return; 2863 return;
2859 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); 2864 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2860 return; 2865 return;
2861 } 2866 }
2862 case Intrinsics::AtomicFence: 2867 case Intrinsics::AtomicFence:
2863 if (!Intrinsics::VerifyMemoryOrder( 2868 if (!Intrinsics::VerifyMemoryOrder(
2864 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 2869 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
2865 Func->setError("Unexpected memory ordering for AtomicFence"); 2870 Func->setError("Unexpected memory ordering for AtomicFence");
2866 return; 2871 return;
2867 } 2872 }
2868 _mfence(); 2873 _mfence();
2869 return; 2874 return;
2870 case Intrinsics::AtomicFenceAll: 2875 case Intrinsics::AtomicFenceAll:
2871 // NOTE: FenceAll should prevent and load/store from being moved 2876 // NOTE: FenceAll should prevent and load/store from being moved
2872 // across the fence (both atomic and non-atomic). The InstX8632Mfence 2877 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2873 // instruction is currently marked coarsely as "HasSideEffects". 2878 // instruction is currently marked coarsely as "HasSideEffects".
2874 _mfence(); 2879 _mfence();
2875 return; 2880 return;
2876 case Intrinsics::AtomicIsLockFree: { 2881 case Intrinsics::AtomicIsLockFree: {
2877 // X86 is always lock free for 8/16/32/64 bit accesses. 2882 // X86 is always lock free for 8/16/32/64 bit accesses.
2878 // TODO(jvoung): Since the result is constant when given a constant 2883 // TODO(jvoung): Since the result is constant when given a constant
2879 // byte size, this opens up DCE opportunities. 2884 // byte size, this opens up DCE opportunities.
2880 Operand *ByteSize = Instr->getArg(0); 2885 Operand *ByteSize = Instr->getArg(0);
2881 Variable *Dest = Instr->getDest(); 2886 Variable *Dest = Instr->getDest();
2882 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) { 2887 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
2883 Constant *Result; 2888 Constant *Result;
2884 switch (CI->getValue()) { 2889 switch (CI->getValue()) {
2885 default: 2890 default:
2886 // Some x86-64 processors support the cmpxchg16b intruction, which 2891 // Some x86-64 processors support the cmpxchg16b intruction, which
2887 // can make 16-byte operations lock free (when used with the LOCK 2892 // can make 16-byte operations lock free (when used with the LOCK
2888 // prefix). However, that's not supported in 32-bit mode, so just 2893 // prefix). However, that's not supported in 32-bit mode, so just
2889 // return 0 even for large sizes. 2894 // return 0 even for large sizes.
2890 Result = Ctx->getConstantZero(IceType_i32); 2895 Result = Ctx->getConstantZero(IceType_i32);
2891 break; 2896 break;
2892 case 1: 2897 case 1:
2893 case 2: 2898 case 2:
2894 case 4: 2899 case 4:
2895 case 8: 2900 case 8:
2896 Result = Ctx->getConstantInt(IceType_i32, 1); 2901 Result = Ctx->getConstantInt32(IceType_i32, 1);
2897 break; 2902 break;
2898 } 2903 }
2899 _mov(Dest, Result); 2904 _mov(Dest, Result);
2900 return; 2905 return;
2901 } 2906 }
2902 // The PNaCl ABI requires the byte size to be a compile-time constant. 2907 // The PNaCl ABI requires the byte size to be a compile-time constant.
2903 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 2908 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2904 return; 2909 return;
2905 } 2910 }
2906 case Intrinsics::AtomicLoad: { 2911 case Intrinsics::AtomicLoad: {
2907 // We require the memory address to be naturally aligned. 2912 // We require the memory address to be naturally aligned.
2908 // Given that is the case, then normal loads are atomic. 2913 // Given that is the case, then normal loads are atomic.
2909 if (!Intrinsics::VerifyMemoryOrder( 2914 if (!Intrinsics::VerifyMemoryOrder(
2910 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) { 2915 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
2911 Func->setError("Unexpected memory ordering for AtomicLoad"); 2916 Func->setError("Unexpected memory ordering for AtomicLoad");
2912 return; 2917 return;
2913 } 2918 }
2914 Variable *Dest = Instr->getDest(); 2919 Variable *Dest = Instr->getDest();
2915 if (Dest->getType() == IceType_i64) { 2920 if (Dest->getType() == IceType_i64) {
2916 // Follow what GCC does and use a movq instead of what lowerLoad() 2921 // Follow what GCC does and use a movq instead of what lowerLoad()
2917 // normally does (split the load into two). 2922 // normally does (split the load into two).
2918 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2923 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2919 // can't happen anyway, since this is x86-32 and integer arithmetic only 2924 // can't happen anyway, since this is x86-32 and integer arithmetic only
2920 // happens on 32-bit quantities. 2925 // happens on 32-bit quantities.
(...skipping 12 matching lines...) Expand all
2933 lowerLoad(Load); 2938 lowerLoad(Load);
2934 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 2939 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2935 // Since lowerLoad may fuse the load w/ an arithmetic instruction, 2940 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2936 // insert the FakeUse on the last-inserted instruction's dest. 2941 // insert the FakeUse on the last-inserted instruction's dest.
2937 Context.insert( 2942 Context.insert(
2938 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 2943 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2939 return; 2944 return;
2940 } 2945 }
2941 case Intrinsics::AtomicRMW: 2946 case Intrinsics::AtomicRMW:
2942 if (!Intrinsics::VerifyMemoryOrder( 2947 if (!Intrinsics::VerifyMemoryOrder(
2943 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2948 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
2944 Func->setError("Unexpected memory ordering for AtomicRMW"); 2949 Func->setError("Unexpected memory ordering for AtomicRMW");
2945 return; 2950 return;
2946 } 2951 }
2947 lowerAtomicRMW(Instr->getDest(), 2952 lowerAtomicRMW(Instr->getDest(),
2948 static_cast<uint32_t>(llvm::cast<ConstantInteger>( 2953 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
2949 Instr->getArg(0))->getValue()), 2954 Instr->getArg(0))->getValue()),
2950 Instr->getArg(1), Instr->getArg(2)); 2955 Instr->getArg(1), Instr->getArg(2));
2951 return; 2956 return;
2952 case Intrinsics::AtomicStore: { 2957 case Intrinsics::AtomicStore: {
2953 if (!Intrinsics::VerifyMemoryOrder( 2958 if (!Intrinsics::VerifyMemoryOrder(
2954 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) { 2959 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
2955 Func->setError("Unexpected memory ordering for AtomicStore"); 2960 Func->setError("Unexpected memory ordering for AtomicStore");
2956 return; 2961 return;
2957 } 2962 }
2958 // We require the memory address to be naturally aligned. 2963 // We require the memory address to be naturally aligned.
2959 // Given that is the case, then normal stores are atomic. 2964 // Given that is the case, then normal stores are atomic.
2960 // Add a fence after the store to make it visible. 2965 // Add a fence after the store to make it visible.
2961 Operand *Value = Instr->getArg(0); 2966 Operand *Value = Instr->getArg(0);
2962 Operand *Ptr = Instr->getArg(1); 2967 Operand *Ptr = Instr->getArg(1);
2963 if (Value->getType() == IceType_i64) { 2968 if (Value->getType() == IceType_i64) {
2964 // Use a movq instead of what lowerStore() normally does 2969 // Use a movq instead of what lowerStore() normally does
(...skipping 27 matching lines...) Expand all
2992 _bswap(T_Hi); 2997 _bswap(T_Hi);
2993 _mov(DestLo, T_Hi); 2998 _mov(DestLo, T_Hi);
2994 _mov(DestHi, T_Lo); 2999 _mov(DestHi, T_Lo);
2995 } else if (Val->getType() == IceType_i32) { 3000 } else if (Val->getType() == IceType_i32) {
2996 Variable *T = legalizeToVar(Val); 3001 Variable *T = legalizeToVar(Val);
2997 _bswap(T); 3002 _bswap(T);
2998 _mov(Dest, T); 3003 _mov(Dest, T);
2999 } else { 3004 } else {
3000 assert(Val->getType() == IceType_i16); 3005 assert(Val->getType() == IceType_i16);
3001 Val = legalize(Val); 3006 Val = legalize(Val);
3002 Constant *Eight = Ctx->getConstantInt(IceType_i16, 8); 3007 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);
3003 Variable *T = NULL; 3008 Variable *T = NULL;
3004 _mov(T, Val); 3009 _mov(T, Val);
3005 _rol(T, Eight); 3010 _rol(T, Eight);
3006 _mov(Dest, T); 3011 _mov(Dest, T);
3007 } 3012 }
3008 return; 3013 return;
3009 } 3014 }
3010 case Intrinsics::Ctpop: { 3015 case Intrinsics::Ctpop: {
3011 Variable *Dest = Instr->getDest(); 3016 Variable *Dest = Instr->getDest();
3012 Operand *Val = Instr->getArg(0); 3017 Operand *Val = Instr->getArg(0);
(...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after
3475 // bit position conversion, and the speculation is reversed. 3480 // bit position conversion, and the speculation is reversed.
3476 assert(Ty == IceType_i32 || Ty == IceType_i64); 3481 assert(Ty == IceType_i32 || Ty == IceType_i64);
3477 Variable *T = makeReg(IceType_i32); 3482 Variable *T = makeReg(IceType_i32);
3478 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3483 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3479 if (Cttz) { 3484 if (Cttz) {
3480 _bsf(T, FirstValRM); 3485 _bsf(T, FirstValRM);
3481 } else { 3486 } else {
3482 _bsr(T, FirstValRM); 3487 _bsr(T, FirstValRM);
3483 } 3488 }
3484 Variable *T_Dest = makeReg(IceType_i32); 3489 Variable *T_Dest = makeReg(IceType_i32);
3485 Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32); 3490 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);
3486 Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31); 3491 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);
3487 if (Cttz) { 3492 if (Cttz) {
3488 _mov(T_Dest, ThirtyTwo); 3493 _mov(T_Dest, ThirtyTwo);
3489 } else { 3494 } else {
3490 Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63); 3495 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);
3491 _mov(T_Dest, SixtyThree); 3496 _mov(T_Dest, SixtyThree);
3492 } 3497 }
3493 _cmov(T_Dest, T, InstX8632::Br_ne); 3498 _cmov(T_Dest, T, InstX8632::Br_ne);
3494 if (!Cttz) { 3499 if (!Cttz) {
3495 _xor(T_Dest, ThirtyOne); 3500 _xor(T_Dest, ThirtyOne);
3496 } 3501 }
3497 if (Ty == IceType_i32) { 3502 if (Ty == IceType_i32) {
3498 _mov(Dest, T_Dest); 3503 _mov(Dest, T_Dest);
3499 return; 3504 return;
3500 } 3505 }
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
3609 if (Index == NULL) 3614 if (Index == NULL)
3610 return false; 3615 return false;
3611 const Inst *IndexInst = Index->getDefinition(); 3616 const Inst *IndexInst = Index->getDefinition();
3612 if (IndexInst == NULL) 3617 if (IndexInst == NULL)
3613 return false; 3618 return false;
3614 if (IndexInst->getSrcSize() < 2) 3619 if (IndexInst->getSrcSize() < 2)
3615 return false; 3620 return false;
3616 if (const InstArithmetic *ArithInst = 3621 if (const InstArithmetic *ArithInst =
3617 llvm::dyn_cast<InstArithmetic>(IndexInst)) { 3622 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3618 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { 3623 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3619 if (ConstantInteger *Const = 3624 if (ConstantInteger32 *Const =
3620 llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1))) { 3625 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
3621 if (ArithInst->getOp() == InstArithmetic::Mul && 3626 if (ArithInst->getOp() == InstArithmetic::Mul &&
3622 !Var->getIsMultidef() && Const->getType() == IceType_i32) { 3627 !Var->getIsMultidef() && Const->getType() == IceType_i32) {
3623 uint64_t Mult = Const->getValue(); 3628 uint64_t Mult = Const->getValue();
3624 uint32_t LogMult; 3629 uint32_t LogMult;
3625 switch (Mult) { 3630 switch (Mult) {
3626 case 1: 3631 case 1:
3627 LogMult = 0; 3632 LogMult = 0;
3628 break; 3633 break;
3629 case 2: 3634 case 2:
3630 LogMult = 1; 3635 LogMult = 1;
(...skipping 30 matching lines...) Expand all
3661 const Inst *BaseInst = Base->getDefinition(); 3666 const Inst *BaseInst = Base->getDefinition();
3662 if (BaseInst == NULL) 3667 if (BaseInst == NULL)
3663 return false; 3668 return false;
3664 if (const InstArithmetic *ArithInst = 3669 if (const InstArithmetic *ArithInst =
3665 llvm::dyn_cast<const InstArithmetic>(BaseInst)) { 3670 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3666 if (ArithInst->getOp() != InstArithmetic::Add && 3671 if (ArithInst->getOp() != InstArithmetic::Add &&
3667 ArithInst->getOp() != InstArithmetic::Sub) 3672 ArithInst->getOp() != InstArithmetic::Sub)
3668 return false; 3673 return false;
3669 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add; 3674 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
3670 Variable *Var = NULL; 3675 Variable *Var = NULL;
3671 ConstantInteger *Const = NULL; 3676 ConstantInteger32 *Const = NULL;
3672 if (Variable *VariableOperand = 3677 if (Variable *VariableOperand =
3673 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { 3678 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3674 Var = VariableOperand; 3679 Var = VariableOperand;
3675 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1)); 3680 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
3676 } else if (IsAdd) { 3681 } else if (IsAdd) {
3677 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(0)); 3682 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
3678 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1)); 3683 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3679 } 3684 }
3680 if (Var == NULL || Const == NULL || Var->getIsMultidef()) 3685 if (Var == NULL || Const == NULL || Var->getIsMultidef())
3681 return false; 3686 return false;
3687 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
3688 if (WouldOverflowAdd(Offset, MoreOffset))
3689 return false;
3682 Base = Var; 3690 Base = Var;
3683 Offset += IsAdd ? Const->getValue() : -Const->getValue(); 3691 Offset += MoreOffset;
3684 Reason = BaseInst; 3692 Reason = BaseInst;
3685 return true; 3693 return true;
3686 } 3694 }
3687 return false; 3695 return false;
3688 } 3696 }
3689 3697
3690 void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base, 3698 void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
3691 Variable *&Index, uint16_t &Shift, int32_t &Offset) { 3699 Variable *&Index, uint16_t &Shift, int32_t &Offset) {
3692 Func->setCurrentNode(NULL); 3700 Func->setCurrentNode(NULL);
3693 if (Func->getContext()->isVerbose(IceV_AddrOpt)) { 3701 if (Func->getContext()->isVerbose(IceV_AddrOpt)) {
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
3805 // Vanilla ICE load instructions should not use the segment registers, 3813 // Vanilla ICE load instructions should not use the segment registers,
3806 // and computeAddressOpt only works at the level of Variables and Constants, 3814 // and computeAddressOpt only works at the level of Variables and Constants,
3807 // not other OperandX8632Mem, so there should be no mention of segment 3815 // not other OperandX8632Mem, so there should be no mention of segment
3808 // registers there either. 3816 // registers there either.
3809 const OperandX8632Mem::SegmentRegisters SegmentReg = 3817 const OperandX8632Mem::SegmentRegisters SegmentReg =
3810 OperandX8632Mem::DefaultSegment; 3818 OperandX8632Mem::DefaultSegment;
3811 Variable *Base = llvm::dyn_cast<Variable>(Addr); 3819 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3812 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 3820 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3813 if (Base && Addr != Base) { 3821 if (Base && Addr != Base) {
3814 Inst->setDeleted(); 3822 Inst->setDeleted();
3815 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); 3823 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
3816 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 3824 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
3817 Shift, SegmentReg); 3825 Shift, SegmentReg);
3818 Context.insert(InstLoad::create(Func, Dest, Addr)); 3826 Context.insert(InstLoad::create(Func, Dest, Addr));
3819 } 3827 }
3820 } 3828 }
3821 3829
3822 void TargetX8632::randomlyInsertNop(float Probability) { 3830 void TargetX8632::randomlyInsertNop(float Probability) {
3823 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 3831 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3824 if (RNG.getTrueWithProbability(Probability)) { 3832 if (RNG.getTrueWithProbability(Probability)) {
3825 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); 3833 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
3872 if (InstructionSet >= SSE4_1) { 3880 if (InstructionSet >= SSE4_1) {
3873 // TODO(wala): If the condition operand is a constant, use blendps 3881 // TODO(wala): If the condition operand is a constant, use blendps
3874 // or pblendw. 3882 // or pblendw.
3875 // 3883 //
3876 // Use blendvps or pblendvb to implement select. 3884 // Use blendvps or pblendvb to implement select.
3877 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3885 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3878 SrcTy == IceType_v4f32) { 3886 SrcTy == IceType_v4f32) {
3879 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3887 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3880 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3888 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3881 _movp(xmm0, ConditionRM); 3889 _movp(xmm0, ConditionRM);
3882 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3890 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));
3883 _movp(T, SrcFRM); 3891 _movp(T, SrcFRM);
3884 _blendvps(T, SrcTRM, xmm0); 3892 _blendvps(T, SrcTRM, xmm0);
3885 _movp(Dest, T); 3893 _movp(Dest, T);
3886 } else { 3894 } else {
3887 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3895 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3888 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3896 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3889 : IceType_v16i8; 3897 : IceType_v16i8;
3890 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3898 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3891 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3899 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3892 _movp(T, SrcFRM); 3900 _movp(T, SrcFRM);
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
3987 Variable *Base = llvm::dyn_cast<Variable>(Addr); 3995 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3988 // Vanilla ICE store instructions should not use the segment registers, 3996 // Vanilla ICE store instructions should not use the segment registers,
3989 // and computeAddressOpt only works at the level of Variables and Constants, 3997 // and computeAddressOpt only works at the level of Variables and Constants,
3990 // not other OperandX8632Mem, so there should be no mention of segment 3998 // not other OperandX8632Mem, so there should be no mention of segment
3991 // registers there either. 3999 // registers there either.
3992 const OperandX8632Mem::SegmentRegisters SegmentReg = 4000 const OperandX8632Mem::SegmentRegisters SegmentReg =
3993 OperandX8632Mem::DefaultSegment; 4001 OperandX8632Mem::DefaultSegment;
3994 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4002 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3995 if (Base && Addr != Base) { 4003 if (Base && Addr != Base) {
3996 Inst->setDeleted(); 4004 Inst->setDeleted();
3997 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); 4005 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
3998 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4006 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
3999 Shift, SegmentReg); 4007 Shift, SegmentReg);
4000 Context.insert(InstStore::create(Func, Data, Addr)); 4008 Context.insert(InstStore::create(Func, Data, Addr));
4001 } 4009 }
4002 } 4010 }
4003 4011
4004 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4012 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4005 // This implements the most naive possible lowering. 4013 // This implements the most naive possible lowering.
4006 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4014 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4007 Operand *Src0 = Inst->getComparison(); 4015 Operand *Src0 = Inst->getComparison();
4008 SizeT NumCases = Inst->getNumCases(); 4016 SizeT NumCases = Inst->getNumCases();
4009 // OK, we'll be slightly less naive by forcing Src into a physical 4017 // OK, we'll be slightly less naive by forcing Src into a physical
4010 // register if there are 2 or more uses. 4018 // register if there are 2 or more uses.
4011 if (NumCases >= 2) 4019 if (NumCases >= 2)
4012 Src0 = legalizeToVar(Src0, true); 4020 Src0 = legalizeToVar(Src0, true);
4013 else 4021 else
4014 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true); 4022 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true);
4015 for (SizeT I = 0; I < NumCases; ++I) { 4023 for (SizeT I = 0; I < NumCases; ++I) {
4016 // TODO(stichnot): Correct lowering for IceType_i64. 4024 // TODO(stichnot): Correct lowering for IceType_i64.
4017 Constant *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); 4025 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
4018 _cmp(Src0, Value); 4026 _cmp(Src0, Value);
4019 _br(InstX8632Br::Br_e, Inst->getLabel(I)); 4027 _br(InstX8632Br::Br_e, Inst->getLabel(I));
4020 } 4028 }
4021 4029
4022 _br(Inst->getLabelDefault()); 4030 _br(Inst->getLabelDefault());
4023 } 4031 }
4024 4032
4025 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4033 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4026 Variable *Dest, Operand *Src0, 4034 Variable *Dest, Operand *Src0,
4027 Operand *Src1) { 4035 Operand *Src1) {
4028 assert(isVectorType(Dest->getType())); 4036 assert(isVectorType(Dest->getType()));
4029 Type Ty = Dest->getType(); 4037 Type Ty = Dest->getType();
4030 Type ElementTy = typeElementType(Ty); 4038 Type ElementTy = typeElementType(Ty);
4031 SizeT NumElements = typeNumElements(Ty); 4039 SizeT NumElements = typeNumElements(Ty);
4032 4040
4033 Operand *T = Ctx->getConstantUndef(Ty); 4041 Operand *T = Ctx->getConstantUndef(Ty);
4034 for (SizeT I = 0; I < NumElements; ++I) { 4042 for (SizeT I = 0; I < NumElements; ++I) {
4035 Constant *Index = Ctx->getConstantInt(IceType_i32, I); 4043 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);
4036 4044
4037 // Extract the next two inputs. 4045 // Extract the next two inputs.
4038 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode()); 4046 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());
4039 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 4047 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
4040 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode()); 4048 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());
4041 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4049 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4042 4050
4043 // Perform the arithmetic as a scalar operation. 4051 // Perform the arithmetic as a scalar operation.
4044 Variable *Res = Func->makeVariable(ElementTy, Context.getNode()); 4052 Variable *Res = Func->makeVariable(ElementTy, Context.getNode());
4045 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4053 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
4114 _psub(Dest, MinusOne); 4122 _psub(Dest, MinusOne);
4115 return Dest; 4123 return Dest;
4116 } 4124 }
4117 4125
4118 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4126 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4119 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4127 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4120 Ty == IceType_v16i8); 4128 Ty == IceType_v16i8);
4121 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4129 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4122 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4130 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4123 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4131 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
4124 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); 4132 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));
4125 return Reg; 4133 return Reg;
4126 } else { 4134 } else {
4127 // SSE has no left shift operation for vectors of 8 bit integers. 4135 // SSE has no left shift operation for vectors of 8 bit integers.
4128 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4136 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4129 Constant *ConstantMask = 4137 Constant *ConstantMask =
4130 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); 4138 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
4131 Variable *Reg = makeReg(Ty, RegNum); 4139 Variable *Reg = makeReg(Ty, RegNum);
4132 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4140 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4133 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4141 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4134 return Reg; 4142 return Reg;
4135 } 4143 }
4136 } 4144 }
4137 4145
4138 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4146 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4139 Variable *Slot, 4147 Variable *Slot,
4140 uint32_t Offset) { 4148 uint32_t Offset) {
4141 // Ensure that Loc is a stack slot. 4149 // Ensure that Loc is a stack slot.
4142 assert(Slot->getWeight() == RegWeight::Zero); 4150 assert(Slot->getWeight() == RegWeight::Zero);
4143 assert(Slot->getRegNum() == Variable::NoRegister); 4151 assert(Slot->getRegNum() == Variable::NoRegister);
4144 // Compute the location of Loc in memory. 4152 // Compute the location of Loc in memory.
4145 // TODO(wala,stichnot): lea should not be required. The address of 4153 // TODO(wala,stichnot): lea should not be required. The address of
4146 // the stack slot is known at compile time (although not until after 4154 // the stack slot is known at compile time (although not until after
4147 // addProlog()). 4155 // addProlog()).
4148 const Type PointerType = IceType_i32; 4156 const Type PointerType = IceType_i32;
4149 Variable *Loc = makeReg(PointerType); 4157 Variable *Loc = makeReg(PointerType);
4150 _lea(Loc, Slot); 4158 _lea(Loc, Slot);
4151 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset); 4159 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);
4152 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4160 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4153 } 4161 }
4154 4162
4155 // Helper for legalize() to emit the right code to lower an operand to a 4163 // Helper for legalize() to emit the right code to lower an operand to a
4156 // register of the appropriate type. 4164 // register of the appropriate type.
4157 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 4165 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4158 Type Ty = Src->getType(); 4166 Type Ty = Src->getType();
4159 Variable *Reg = makeReg(Ty, RegNum); 4167 Variable *Reg = makeReg(Ty, RegNum);
4160 if (isVectorType(Ty)) { 4168 if (isVectorType(Ty)) {
4161 _movp(Reg, Src); 4169 _movp(Reg, Src);
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
4271 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) { 4279 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4272 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); 4280 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4273 // It may be the case that address mode optimization already creates 4281 // It may be the case that address mode optimization already creates
4274 // an OperandX8632Mem, so in that case it wouldn't need another level 4282 // an OperandX8632Mem, so in that case it wouldn't need another level
4275 // of transformation. 4283 // of transformation.
4276 if (!Mem) { 4284 if (!Mem) {
4277 Variable *Base = llvm::dyn_cast<Variable>(Operand); 4285 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4278 Constant *Offset = llvm::dyn_cast<Constant>(Operand); 4286 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4279 assert(Base || Offset); 4287 assert(Base || Offset);
4280 if (Offset) { 4288 if (Offset) {
4281 assert(llvm::isa<ConstantInteger>(Offset) || 4289 assert(llvm::isa<ConstantInteger32>(Offset) ||
4282 llvm::isa<ConstantRelocatable>(Offset)); 4290 llvm::isa<ConstantRelocatable>(Offset));
4283 } 4291 }
4284 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 4292 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4285 } 4293 }
4286 return llvm::cast<OperandX8632Mem>(legalize(Mem)); 4294 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4287 } 4295 }
4288 4296
4289 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { 4297 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
4290 // There aren't any 64-bit integer registers for x86-32. 4298 // There aren't any 64-bit integer registers for x86-32.
4291 assert(Type != IceType_i64); 4299 assert(Type != IceType_i64);
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
4395 if (WhiteList[RegNum]) 4403 if (WhiteList[RegNum])
4396 FreedRegisters[RegNum] = true; 4404 FreedRegisters[RegNum] = true;
4397 } 4405 }
4398 } 4406 }
4399 } 4407 }
4400 } 4408 }
4401 AvailableRegisters |= FreedRegisters; 4409 AvailableRegisters |= FreedRegisters;
4402 } 4410 }
4403 } 4411 }
4404 4412
4405 template <> void ConstantInteger::emit(GlobalContext *Ctx) const { 4413 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
4406 Ostream &Str = Ctx->getStrEmit(); 4414 Ostream &Str = Ctx->getStrEmit();
4407 Str << (int64_t) getValue(); 4415 Str << (int32_t)getValue();
4416 }
4417
4418 template <> void ConstantInteger64::emit(GlobalContext *) const {
4419 llvm_unreachable("Not expecting to emit 64-bit integers");
4408 } 4420 }
4409 4421
4410 template <> void ConstantFloat::emit(GlobalContext *Ctx) const { 4422 template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
4411 Ostream &Str = Ctx->getStrEmit(); 4423 Ostream &Str = Ctx->getStrEmit();
4412 // It would be better to prefix with ".L$" instead of "L$", but 4424 // It would be better to prefix with ".L$" instead of "L$", but
4413 // llvm-mc doesn't parse "dword ptr [.L$foo]". 4425 // llvm-mc doesn't parse "dword ptr [.L$foo]".
4414 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; 4426 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
4415 } 4427 }
4416 4428
4417 template <> void ConstantDouble::emit(GlobalContext *Ctx) const { 4429 template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
4513 Str << "\t.align\t" << Align << "\n"; 4525 Str << "\t.align\t" << Align << "\n";
4514 Str << MangledName << ":\n"; 4526 Str << MangledName << ":\n";
4515 for (SizeT i = 0; i < Size; ++i) { 4527 for (SizeT i = 0; i < Size; ++i) {
4516 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4528 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4517 } 4529 }
4518 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4530 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4519 } 4531 }
4520 } 4532 }
4521 4533
4522 } // end of namespace Ice 4534 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698