Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 476323004: Start adding an integrated assembler. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: make fixups part of address Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceUtils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 494 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 if (!hasFramePointer()) 505 if (!hasFramePointer())
506 Offset += getStackAdjustment(); 506 Offset += getStackAdjustment();
507 if (Offset) { 507 if (Offset) {
508 if (Offset > 0) 508 if (Offset > 0)
509 Str << "+"; 509 Str << "+";
510 Str << Offset; 510 Str << Offset;
511 } 511 }
512 Str << "]"; 512 Str << "]";
513 } 513 }
514 514
515 x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
516 assert(!Var->hasReg());
517 int32_t Offset = Var->getStackOffset();
518 if (!hasFramePointer())
519 Offset += getStackAdjustment();
520 return x86::Address(x86::Register(getFrameOrStackReg()), Offset);
521 }
522
515 void TargetX8632::lowerArguments() { 523 void TargetX8632::lowerArguments() {
516 VarList &Args = Func->getArgs(); 524 VarList &Args = Func->getArgs();
517 // The first four arguments of vector type, regardless of their 525 // The first four arguments of vector type, regardless of their
518 // position relative to the other arguments in the argument list, are 526 // position relative to the other arguments in the argument list, are
519 // passed in registers xmm0 - xmm3. 527 // passed in registers xmm0 - xmm3.
520 unsigned NumXmmArgs = 0; 528 unsigned NumXmmArgs = 0;
521 529
522 Context.init(Func->getEntryNode()); 530 Context.init(Func->getEntryNode());
523 Context.setInsertPoint(Context.getCur()); 531 Context.setInsertPoint(Context.getCur());
524 532
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
593 } 601 }
594 if (isVectorType(Ty)) { 602 if (isVectorType(Ty)) {
595 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 603 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
596 } 604 }
597 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 605 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
598 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 606 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
599 if (Arg->hasReg()) { 607 if (Arg->hasReg()) {
600 assert(Ty != IceType_i64); 608 assert(Ty != IceType_i64);
601 OperandX8632Mem *Mem = OperandX8632Mem::create( 609 OperandX8632Mem *Mem = OperandX8632Mem::create(
602 Func, Ty, FramePtr, 610 Func, Ty, FramePtr,
603 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); 611 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
604 if (isVectorType(Arg->getType())) { 612 if (isVectorType(Arg->getType())) {
605 _movp(Arg, Mem); 613 _movp(Arg, Mem);
606 } else { 614 } else {
607 _mov(Arg, Mem); 615 _mov(Arg, Mem);
608 } 616 }
609 } 617 }
610 } 618 }
611 619
612 Type TargetX8632::stackSlotType() { return IceType_i32; } 620 Type TargetX8632::stackSlotType() { return IceType_i32; }
613 621
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 // Align esp if necessary. 800 // Align esp if necessary.
793 if (NeedsStackAlignment) { 801 if (NeedsStackAlignment) {
794 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 802 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
795 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 803 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
796 SpillAreaSizeBytes = StackSize - StackOffset; 804 SpillAreaSizeBytes = StackSize - StackOffset;
797 } 805 }
798 806
799 // Generate "sub esp, SpillAreaSizeBytes" 807 // Generate "sub esp, SpillAreaSizeBytes"
800 if (SpillAreaSizeBytes) 808 if (SpillAreaSizeBytes)
801 _sub(getPhysicalRegister(Reg_esp), 809 _sub(getPhysicalRegister(Reg_esp),
802 Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes)); 810 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
803 811
804 resetStackAdjustment(); 812 resetStackAdjustment();
805 813
806 // Fill in stack offsets for stack args, and copy args into registers 814 // Fill in stack offsets for stack args, and copy args into registers
807 // for those that were register-allocated. Args are pushed right to 815 // for those that were register-allocated. Args are pushed right to
808 // left, so Arg[0] is closest to the stack/frame pointer. 816 // left, so Arg[0] is closest to the stack/frame pointer.
809 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 817 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
810 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 818 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
811 if (!IsEbpBasedFrame) 819 if (!IsEbpBasedFrame)
812 BasicFrameOffset += SpillAreaSizeBytes; 820 BasicFrameOffset += SpillAreaSizeBytes;
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
908 Context.setInsertPoint(InsertPoint); 916 Context.setInsertPoint(InsertPoint);
909 917
910 Variable *esp = getPhysicalRegister(Reg_esp); 918 Variable *esp = getPhysicalRegister(Reg_esp);
911 if (IsEbpBasedFrame) { 919 if (IsEbpBasedFrame) {
912 Variable *ebp = getPhysicalRegister(Reg_ebp); 920 Variable *ebp = getPhysicalRegister(Reg_ebp);
913 _mov(esp, ebp); 921 _mov(esp, ebp);
914 _pop(ebp); 922 _pop(ebp);
915 } else { 923 } else {
916 // add esp, SpillAreaSizeBytes 924 // add esp, SpillAreaSizeBytes
917 if (SpillAreaSizeBytes) 925 if (SpillAreaSizeBytes)
918 _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes)); 926 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
919 } 927 }
920 928
921 // Add pop instructions for preserved registers. 929 // Add pop instructions for preserved registers.
922 llvm::SmallBitVector CalleeSaves = 930 llvm::SmallBitVector CalleeSaves =
923 getRegisterSet(RegSet_CalleeSave, RegSet_None); 931 getRegisterSet(RegSet_CalleeSave, RegSet_None);
924 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 932 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
925 SizeT j = CalleeSaves.size() - i - 1; 933 SizeT j = CalleeSaves.size() - i - 1;
926 if (j == Reg_ebp && IsEbpBasedFrame) 934 if (j == Reg_ebp && IsEbpBasedFrame)
927 continue; 935 continue;
928 if (CalleeSaves[j] && RegsUsed[j]) { 936 if (CalleeSaves[j] && RegsUsed[j]) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1026 } 1034 }
1027 1035
1028 Operand *TargetX8632::loOperand(Operand *Operand) { 1036 Operand *TargetX8632::loOperand(Operand *Operand) {
1029 assert(Operand->getType() == IceType_i64); 1037 assert(Operand->getType() == IceType_i64);
1030 if (Operand->getType() != IceType_i64) 1038 if (Operand->getType() != IceType_i64)
1031 return Operand; 1039 return Operand;
1032 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1040 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1033 split64(Var); 1041 split64(Var);
1034 return Var->getLo(); 1042 return Var->getLo();
1035 } 1043 }
1036 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { 1044 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1037 uint64_t Mask = (1ull << 32) - 1; 1045 return Ctx->getConstantInt32(IceType_i32,
1038 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask); 1046 static_cast<uint32_t>(Const->getValue()));
1039 } 1047 }
1040 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1048 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1041 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), 1049 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1042 Mem->getOffset(), Mem->getIndex(), 1050 Mem->getOffset(), Mem->getIndex(),
1043 Mem->getShift(), Mem->getSegmentRegister()); 1051 Mem->getShift(), Mem->getSegmentRegister());
1044 } 1052 }
1045 llvm_unreachable("Unsupported operand type"); 1053 llvm_unreachable("Unsupported operand type");
1046 return NULL; 1054 return NULL;
1047 } 1055 }
1048 1056
1049 Operand *TargetX8632::hiOperand(Operand *Operand) { 1057 Operand *TargetX8632::hiOperand(Operand *Operand) {
1050 assert(Operand->getType() == IceType_i64); 1058 assert(Operand->getType() == IceType_i64);
1051 if (Operand->getType() != IceType_i64) 1059 if (Operand->getType() != IceType_i64)
1052 return Operand; 1060 return Operand;
1053 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1061 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1054 split64(Var); 1062 split64(Var);
1055 return Var->getHi(); 1063 return Var->getHi();
1056 } 1064 }
1057 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { 1065 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1058 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32); 1066 return Ctx->getConstantInt32(
1067 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));
1059 } 1068 }
1060 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1069 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1061 Constant *Offset = Mem->getOffset(); 1070 Constant *Offset = Mem->getOffset();
1062 if (Offset == NULL) 1071 if (Offset == NULL)
1063 Offset = Ctx->getConstantInt(IceType_i32, 4); 1072 Offset = Ctx->getConstantInt32(IceType_i32, 4);
1064 else if (ConstantInteger *IntOffset = 1073 else if (ConstantInteger32 *IntOffset =
1065 llvm::dyn_cast<ConstantInteger>(Offset)) { 1074 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1066 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue()); 1075 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());
1067 } else if (ConstantRelocatable *SymOffset = 1076 } else if (ConstantRelocatable *SymOffset =
1068 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1077 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1069 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), 1078 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
1070 SymOffset->getName()); 1079 SymOffset->getName());
1071 } 1080 }
1072 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, 1081 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
1073 Mem->getIndex(), Mem->getShift(), 1082 Mem->getIndex(), Mem->getShift(),
1074 Mem->getSegmentRegister()); 1083 Mem->getSegmentRegister());
1075 } 1084 }
1076 llvm_unreachable("Unsupported operand type"); 1085 llvm_unreachable("Unsupported operand type");
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
1124 // For default align=0, set it to the real value 1, to avoid any 1133 // For default align=0, set it to the real value 1, to avoid any
1125 // bit-manipulation problems below. 1134 // bit-manipulation problems below.
1126 AlignmentParam = std::max(AlignmentParam, 1u); 1135 AlignmentParam = std::max(AlignmentParam, 1u);
1127 1136
1128 // LLVM enforces power of 2 alignment. 1137 // LLVM enforces power of 2 alignment.
1129 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1138 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1130 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1139 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1131 1140
1132 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1141 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1133 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1142 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
1134 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1143 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));
1135 } 1144 }
1136 if (ConstantInteger *ConstantTotalSize = 1145 if (ConstantInteger32 *ConstantTotalSize =
1137 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1146 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1138 uint32_t Value = ConstantTotalSize->getValue(); 1147 uint32_t Value = ConstantTotalSize->getValue();
1139 Value = applyAlignment(Value, Alignment); 1148 Value = applyAlignment(Value, Alignment);
1140 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1149 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));
1141 } else { 1150 } else {
1142 // Non-constant sizes need to be adjusted to the next highest 1151 // Non-constant sizes need to be adjusted to the next highest
1143 // multiple of the required alignment at runtime. 1152 // multiple of the required alignment at runtime.
1144 Variable *T = makeReg(IceType_i32); 1153 Variable *T = makeReg(IceType_i32);
1145 _mov(T, TotalSize); 1154 _mov(T, TotalSize);
1146 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1155 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));
1147 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1156 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));
1148 _sub(esp, T); 1157 _sub(esp, T);
1149 } 1158 }
1150 _mov(Dest, esp); 1159 _mov(Dest, esp);
1151 } 1160 }
1152 1161
1153 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1162 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1154 Variable *Dest = Inst->getDest(); 1163 Variable *Dest = Inst->getDest();
1155 Operand *Src0 = legalize(Inst->getSrc(0)); 1164 Operand *Src0 = legalize(Inst->getSrc(0));
1156 Operand *Src1 = legalize(Inst->getSrc(1)); 1165 Operand *Src1 = legalize(Inst->getSrc(1));
1157 if (Dest->getType() == IceType_i64) { 1166 if (Dest->getType() == IceType_i64) {
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1247 // t2 = shl t2, t1 1256 // t2 = shl t2, t1
1248 // test t1, 0x20 1257 // test t1, 0x20
1249 // je L1 1258 // je L1
1250 // use(t3) 1259 // use(t3)
1251 // t3 = t2 1260 // t3 = t2
1252 // t2 = 0 1261 // t2 = 0
1253 // L1: 1262 // L1:
1254 // a.lo = t2 1263 // a.lo = t2
1255 // a.hi = t3 1264 // a.hi = t3
1256 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1265 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1257 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1266 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1258 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1267 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1259 InstX8632Label *Label = InstX8632Label::create(Func, this); 1268 InstX8632Label *Label = InstX8632Label::create(Func, this);
1260 _mov(T_1, Src1Lo, Reg_ecx); 1269 _mov(T_1, Src1Lo, Reg_ecx);
1261 _mov(T_2, Src0Lo); 1270 _mov(T_2, Src0Lo);
1262 _mov(T_3, Src0Hi); 1271 _mov(T_3, Src0Hi);
1263 _shld(T_3, T_2, T_1); 1272 _shld(T_3, T_2, T_1);
1264 _shl(T_2, T_1); 1273 _shl(T_2, T_1);
1265 _test(T_1, BitTest); 1274 _test(T_1, BitTest);
1266 _br(InstX8632Br::Br_e, Label); 1275 _br(InstX8632Br::Br_e, Label);
1267 // Because of the intra-block control flow, we need to fake a use 1276 // Because of the intra-block control flow, we need to fake a use
(...skipping 15 matching lines...) Expand all
1283 // t3 = shr t3, t1 1292 // t3 = shr t3, t1
1284 // test t1, 0x20 1293 // test t1, 0x20
1285 // je L1 1294 // je L1
1286 // use(t2) 1295 // use(t2)
1287 // t2 = t3 1296 // t2 = t3
1288 // t3 = 0 1297 // t3 = 0
1289 // L1: 1298 // L1:
1290 // a.lo = t2 1299 // a.lo = t2
1291 // a.hi = t3 1300 // a.hi = t3
1292 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1301 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1293 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1302 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1294 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1303 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1295 InstX8632Label *Label = InstX8632Label::create(Func, this); 1304 InstX8632Label *Label = InstX8632Label::create(Func, this);
1296 _mov(T_1, Src1Lo, Reg_ecx); 1305 _mov(T_1, Src1Lo, Reg_ecx);
1297 _mov(T_2, Src0Lo); 1306 _mov(T_2, Src0Lo);
1298 _mov(T_3, Src0Hi); 1307 _mov(T_3, Src0Hi);
1299 _shrd(T_2, T_3, T_1); 1308 _shrd(T_2, T_3, T_1);
1300 _shr(T_3, T_1); 1309 _shr(T_3, T_1);
1301 _test(T_1, BitTest); 1310 _test(T_1, BitTest);
1302 _br(InstX8632Br::Br_e, Label); 1311 _br(InstX8632Br::Br_e, Label);
1303 // Because of the intra-block control flow, we need to fake a use 1312 // Because of the intra-block control flow, we need to fake a use
(...skipping 15 matching lines...) Expand all
1319 // t3 = sar t3, t1 1328 // t3 = sar t3, t1
1320 // test t1, 0x20 1329 // test t1, 0x20
1321 // je L1 1330 // je L1
1322 // use(t2) 1331 // use(t2)
1323 // t2 = t3 1332 // t2 = t3
1324 // t3 = sar t3, 0x1f 1333 // t3 = sar t3, 0x1f
1325 // L1: 1334 // L1:
1326 // a.lo = t2 1335 // a.lo = t2
1327 // a.hi = t3 1336 // a.hi = t3
1328 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1337 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1329 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); 1338 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1330 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f); 1339 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);
1331 InstX8632Label *Label = InstX8632Label::create(Func, this); 1340 InstX8632Label *Label = InstX8632Label::create(Func, this);
1332 _mov(T_1, Src1Lo, Reg_ecx); 1341 _mov(T_1, Src1Lo, Reg_ecx);
1333 _mov(T_2, Src0Lo); 1342 _mov(T_2, Src0Lo);
1334 _mov(T_3, Src0Hi); 1343 _mov(T_3, Src0Hi);
1335 _shrd(T_2, T_3, T_1); 1344 _shrd(T_2, T_3, T_1);
1336 _sar(T_3, T_1); 1345 _sar(T_3, T_1);
1337 _test(T_1, BitTest); 1346 _test(T_1, BitTest);
1338 _br(InstX8632Br::Br_e, Label); 1347 _br(InstX8632Br::Br_e, Label);
1339 // Because of the intra-block control flow, we need to fake a use 1348 // Because of the intra-block control flow, we need to fake a use
1340 // of T_3 to prevent its earlier definition from being dead-code 1349 // of T_3 to prevent its earlier definition from being dead-code
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
1441 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1450 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1442 // pmuludq T2, T3 1451 // pmuludq T2, T3
1443 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1452 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1444 // shufps T1, T2, {0,2,0,2} 1453 // shufps T1, T2, {0,2,0,2}
1445 // pshufd T4, T1, {0,2,1,3} 1454 // pshufd T4, T1, {0,2,1,3}
1446 // movups Dest, T4 1455 // movups Dest, T4
1447 1456
1448 // Mask that directs pshufd to create a vector with entries 1457 // Mask that directs pshufd to create a vector with entries
1449 // Src[1, 0, 3, 0] 1458 // Src[1, 0, 3, 0]
1450 const unsigned Constant1030 = 0x31; 1459 const unsigned Constant1030 = 0x31;
1451 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030); 1460 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);
1452 // Mask that directs shufps to create a vector with entries 1461 // Mask that directs shufps to create a vector with entries
1453 // Dest[0, 2], Src[0, 2] 1462 // Dest[0, 2], Src[0, 2]
1454 const unsigned Mask0202 = 0x88; 1463 const unsigned Mask0202 = 0x88;
1455 // Mask that directs pshufd to create a vector with entries 1464 // Mask that directs pshufd to create a vector with entries
1456 // Src[0, 2, 1, 3] 1465 // Src[0, 2, 1, 3]
1457 const unsigned Mask0213 = 0xd8; 1466 const unsigned Mask0213 = 0xd8;
1458 Variable *T1 = makeReg(IceType_v4i32); 1467 Variable *T1 = makeReg(IceType_v4i32);
1459 Variable *T2 = makeReg(IceType_v4i32); 1468 Variable *T2 = makeReg(IceType_v4i32);
1460 Variable *T3 = makeReg(IceType_v4i32); 1469 Variable *T3 = makeReg(IceType_v4i32);
1461 Variable *T4 = makeReg(IceType_v4i32); 1470 Variable *T4 = makeReg(IceType_v4i32);
1462 _movp(T1, Src0); 1471 _movp(T1, Src0);
1463 _pshufd(T2, Src0, Mask1030); 1472 _pshufd(T2, Src0, Mask1030);
1464 _pshufd(T3, Src1, Mask1030); 1473 _pshufd(T3, Src1, Mask1030);
1465 _pmuludq(T1, Src1); 1474 _pmuludq(T1, Src1);
1466 _pmuludq(T2, T3); 1475 _pmuludq(T2, T3);
1467 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1476 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));
1468 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1477 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));
1469 _movp(Dest, T4); 1478 _movp(Dest, T4);
1470 } else { 1479 } else {
1471 assert(Dest->getType() == IceType_v16i8); 1480 assert(Dest->getType() == IceType_v16i8);
1472 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1481 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1473 } 1482 }
1474 } break; 1483 } break;
1475 case InstArithmetic::Shl: 1484 case InstArithmetic::Shl:
1476 case InstArithmetic::Lshr: 1485 case InstArithmetic::Lshr:
1477 case InstArithmetic::Ashr: 1486 case InstArithmetic::Ashr:
1478 case InstArithmetic::Udiv: 1487 case InstArithmetic::Udiv:
(...skipping 272 matching lines...) Expand 10 before | Expand all | Expand 10 after
1751 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || 1760 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
1752 Ty == IceType_f64 || isVectorType(Ty)); 1761 Ty == IceType_f64 || isVectorType(Ty));
1753 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1762 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1754 XmmArgs.push_back(Arg); 1763 XmmArgs.push_back(Arg);
1755 } else { 1764 } else {
1756 StackArgs.push_back(Arg); 1765 StackArgs.push_back(Arg);
1757 if (isVectorType(Arg->getType())) { 1766 if (isVectorType(Arg->getType())) {
1758 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1767 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1759 } 1768 }
1760 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1769 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1761 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); 1770 Constant *Loc =
1771 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
1762 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 1772 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1763 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1773 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1764 } 1774 }
1765 } 1775 }
1766 1776
1767 // Adjust the parameter area so that the stack is aligned. It is 1777 // Adjust the parameter area so that the stack is aligned. It is
1768 // assumed that the stack is already aligned at the start of the 1778 // assumed that the stack is already aligned at the start of the
1769 // calling sequence. 1779 // calling sequence.
1770 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1780 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1771 1781
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
1848 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); 1858 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
1849 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1859 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1850 Context.insert(NewCall); 1860 Context.insert(NewCall);
1851 if (ReturnRegHi) 1861 if (ReturnRegHi)
1852 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1862 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1853 1863
1854 // Add the appropriate offset to esp. The call instruction takes care 1864 // Add the appropriate offset to esp. The call instruction takes care
1855 // of resetting the stack offset during emission. 1865 // of resetting the stack offset during emission.
1856 if (ParameterAreaSizeBytes) { 1866 if (ParameterAreaSizeBytes) {
1857 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1867 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1858 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); 1868 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));
1859 } 1869 }
1860 1870
1861 // Insert a register-kill pseudo instruction. 1871 // Insert a register-kill pseudo instruction.
1862 VarList KilledRegs; 1872 VarList KilledRegs;
1863 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { 1873 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1864 if (ScratchRegs[i]) 1874 if (ScratchRegs[i])
1865 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); 1875 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1866 } 1876 }
1867 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); 1877 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1868 1878
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
1933 Variable *T = makeReg(DestTy); 1943 Variable *T = makeReg(DestTy);
1934 _movp(T, Src0RM); 1944 _movp(T, Src0RM);
1935 _pand(T, OneMask); 1945 _pand(T, OneMask);
1936 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1946 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1937 _pcmpgt(T, Zeros); 1947 _pcmpgt(T, Zeros);
1938 _movp(Dest, T); 1948 _movp(Dest, T);
1939 } else { 1949 } else {
1940 // width = width(elty) - 1; dest = (src << width) >> width 1950 // width = width(elty) - 1; dest = (src << width) >> width
1941 SizeT ShiftAmount = 1951 SizeT ShiftAmount =
1942 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 1952 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1943 Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount); 1953 Constant *ShiftConstant =
1954 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
1944 Variable *T = makeReg(DestTy); 1955 Variable *T = makeReg(DestTy);
1945 _movp(T, Src0RM); 1956 _movp(T, Src0RM);
1946 _psll(T, ShiftConstant); 1957 _psll(T, ShiftConstant);
1947 _psra(T, ShiftConstant); 1958 _psra(T, ShiftConstant);
1948 _movp(Dest, T); 1959 _movp(Dest, T);
1949 } 1960 }
1950 } else if (Dest->getType() == IceType_i64) { 1961 } else if (Dest->getType() == IceType_i64) {
1951 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1962 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1952 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31); 1963 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);
1953 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1964 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1954 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1955 Variable *T_Lo = makeReg(DestLo->getType()); 1966 Variable *T_Lo = makeReg(DestLo->getType());
1956 if (Src0RM->getType() == IceType_i32) { 1967 if (Src0RM->getType() == IceType_i32) {
1957 _mov(T_Lo, Src0RM); 1968 _mov(T_Lo, Src0RM);
1958 } else if (Src0RM->getType() == IceType_i1) { 1969 } else if (Src0RM->getType() == IceType_i1) {
1959 _movzx(T_Lo, Src0RM); 1970 _movzx(T_Lo, Src0RM);
1960 _shl(T_Lo, Shift); 1971 _shl(T_Lo, Shift);
1961 _sar(T_Lo, Shift); 1972 _sar(T_Lo, Shift);
1962 } else { 1973 } else {
1963 _movsx(T_Lo, Src0RM); 1974 _movsx(T_Lo, Src0RM);
1964 } 1975 }
1965 _mov(DestLo, T_Lo); 1976 _mov(DestLo, T_Lo);
1966 Variable *T_Hi = NULL; 1977 Variable *T_Hi = NULL;
1967 _mov(T_Hi, T_Lo); 1978 _mov(T_Hi, T_Lo);
1968 if (Src0RM->getType() != IceType_i1) 1979 if (Src0RM->getType() != IceType_i1)
1969 // For i1, the sar instruction is already done above. 1980 // For i1, the sar instruction is already done above.
1970 _sar(T_Hi, Shift); 1981 _sar(T_Hi, Shift);
1971 _mov(DestHi, T_Hi); 1982 _mov(DestHi, T_Hi);
1972 } else if (Src0RM->getType() == IceType_i1) { 1983 } else if (Src0RM->getType() == IceType_i1) {
1973 // t1 = src 1984 // t1 = src
1974 // shl t1, dst_bitwidth - 1 1985 // shl t1, dst_bitwidth - 1
1975 // sar t1, dst_bitwidth - 1 1986 // sar t1, dst_bitwidth - 1
1976 // dst = t1 1987 // dst = t1
1977 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 1988 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
1978 Constant *ShiftAmount = Ctx->getConstantInt(IceType_i32, DestBits - 1); 1989 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);
1979 Variable *T = makeReg(Dest->getType()); 1990 Variable *T = makeReg(Dest->getType());
1980 if (typeWidthInBytes(Dest->getType()) <= 1991 if (typeWidthInBytes(Dest->getType()) <=
1981 typeWidthInBytes(Src0RM->getType())) { 1992 typeWidthInBytes(Src0RM->getType())) {
1982 _mov(T, Src0RM); 1993 _mov(T, Src0RM);
1983 } else { 1994 } else {
1984 // Widen the source using movsx or movzx. (It doesn't matter 1995 // Widen the source using movsx or movzx. (It doesn't matter
1985 // which one, since the following shl/sar overwrite the bits.) 1996 // which one, since the following shl/sar overwrite the bits.)
1986 _movzx(T, Src0RM); 1997 _movzx(T, Src0RM);
1987 } 1998 }
1988 _shl(T, ShiftAmount); 1999 _shl(T, ShiftAmount);
(...skipping 22 matching lines...) Expand all
2011 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2022 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2012 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2023 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2013 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2024 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2014 Variable *Tmp = makeReg(DestLo->getType()); 2025 Variable *Tmp = makeReg(DestLo->getType());
2015 if (Src0RM->getType() == IceType_i32) { 2026 if (Src0RM->getType() == IceType_i32) {
2016 _mov(Tmp, Src0RM); 2027 _mov(Tmp, Src0RM);
2017 } else { 2028 } else {
2018 _movzx(Tmp, Src0RM); 2029 _movzx(Tmp, Src0RM);
2019 } 2030 }
2020 if (Src0RM->getType() == IceType_i1) { 2031 if (Src0RM->getType() == IceType_i1) {
2021 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2032 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2022 _and(Tmp, One); 2033 _and(Tmp, One);
2023 } 2034 }
2024 _mov(DestLo, Tmp); 2035 _mov(DestLo, Tmp);
2025 _mov(DestHi, Zero); 2036 _mov(DestHi, Zero);
2026 } else if (Src0RM->getType() == IceType_i1) { 2037 } else if (Src0RM->getType() == IceType_i1) {
2027 // t = Src0RM; t &= 1; Dest = t 2038 // t = Src0RM; t &= 1; Dest = t
2028 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2039 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2029 Variable *T = makeReg(IceType_i32); 2040 Variable *T = makeReg(IceType_i32);
2030 _movzx(T, Src0RM); 2041 _movzx(T, Src0RM);
2031 _and(T, One); 2042 _and(T, One);
2032 _mov(Dest, T); 2043 _mov(Dest, T);
2033 } else { 2044 } else {
2034 // t1 = movzx src; dst = t1 2045 // t1 = movzx src; dst = t1
2035 Variable *T = makeReg(Dest->getType()); 2046 Variable *T = makeReg(Dest->getType());
2036 _movzx(T, Src0RM); 2047 _movzx(T, Src0RM);
2037 _mov(Dest, T); 2048 _mov(Dest, T);
2038 } 2049 }
(...skipping 11 matching lines...) Expand all
2050 _movp(Dest, T); 2061 _movp(Dest, T);
2051 } else { 2062 } else {
2052 Operand *Src0 = Inst->getSrc(0); 2063 Operand *Src0 = Inst->getSrc(0);
2053 if (Src0->getType() == IceType_i64) 2064 if (Src0->getType() == IceType_i64)
2054 Src0 = loOperand(Src0); 2065 Src0 = loOperand(Src0);
2055 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2066 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2056 // t1 = trunc Src0RM; Dest = t1 2067 // t1 = trunc Src0RM; Dest = t1
2057 Variable *T = NULL; 2068 Variable *T = NULL;
2058 _mov(T, Src0RM); 2069 _mov(T, Src0RM);
2059 if (Dest->getType() == IceType_i1) 2070 if (Dest->getType() == IceType_i1)
2060 _and(T, Ctx->getConstantInt(IceType_i1, 1)); 2071 _and(T, Ctx->getConstantInt32(IceType_i1, 1));
2061 _mov(Dest, T); 2072 _mov(Dest, T);
2062 } 2073 }
2063 break; 2074 break;
2064 } 2075 }
2065 case InstCast::Fptrunc: 2076 case InstCast::Fptrunc:
2066 case InstCast::Fpext: { 2077 case InstCast::Fpext: {
2067 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2078 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2068 // t1 = cvt Src0RM; Dest = t1 2079 // t1 = cvt Src0RM; Dest = t1
2069 Variable *T = makeReg(Dest->getType()); 2080 Variable *T = makeReg(Dest->getType());
2070 _cvt(T, Src0RM); 2081 _cvt(T, Src0RM);
(...skipping 24 matching lines...) Expand all
2095 Call->addArg(Inst->getSrc(0)); 2106 Call->addArg(Inst->getSrc(0));
2096 lowerCall(Call); 2107 lowerCall(Call);
2097 } else { 2108 } else {
2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2110 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2100 Variable *T_1 = makeReg(IceType_i32); 2111 Variable *T_1 = makeReg(IceType_i32);
2101 Variable *T_2 = makeReg(Dest->getType()); 2112 Variable *T_2 = makeReg(Dest->getType());
2102 _cvtt(T_1, Src0RM); 2113 _cvtt(T_1, Src0RM);
2103 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2114 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2104 if (Dest->getType() == IceType_i1) 2115 if (Dest->getType() == IceType_i1)
2105 _and(T_2, Ctx->getConstantInt(IceType_i1, 1)); 2116 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
2106 _mov(Dest, T_2); 2117 _mov(Dest, T_2);
2107 T_2->setPreferredRegister(T_1, true); 2118 T_2->setPreferredRegister(T_1, true);
2108 } 2119 }
2109 break; 2120 break;
2110 case InstCast::Fptoui: 2121 case InstCast::Fptoui:
2111 if (isVectorType(Dest->getType())) { 2122 if (isVectorType(Dest->getType())) {
2112 assert(Dest->getType() == IceType_v4i32 && 2123 assert(Dest->getType() == IceType_v4i32 &&
2113 Inst->getSrc(0)->getType() == IceType_v4f32); 2124 Inst->getSrc(0)->getType() == IceType_v4f32);
2114 const SizeT MaxSrcs = 1; 2125 const SizeT MaxSrcs = 1;
2115 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); 2126 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
(...skipping 16 matching lines...) Expand all
2132 lowerCall(Call); 2143 lowerCall(Call);
2133 return; 2144 return;
2134 } else { 2145 } else {
2135 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2146 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2136 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2147 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2137 Variable *T_1 = makeReg(IceType_i32); 2148 Variable *T_1 = makeReg(IceType_i32);
2138 Variable *T_2 = makeReg(Dest->getType()); 2149 Variable *T_2 = makeReg(Dest->getType());
2139 _cvtt(T_1, Src0RM); 2150 _cvtt(T_1, Src0RM);
2140 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2151 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2141 if (Dest->getType() == IceType_i1) 2152 if (Dest->getType() == IceType_i1)
2142 _and(T_2, Ctx->getConstantInt(IceType_i1, 1)); 2153 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));
2143 _mov(Dest, T_2); 2154 _mov(Dest, T_2);
2144 T_2->setPreferredRegister(T_1, true); 2155 T_2->setPreferredRegister(T_1, true);
2145 } 2156 }
2146 break; 2157 break;
2147 case InstCast::Sitofp: 2158 case InstCast::Sitofp:
2148 if (isVectorType(Dest->getType())) { 2159 if (isVectorType(Dest->getType())) {
2149 assert(Dest->getType() == IceType_v4f32 && 2160 assert(Dest->getType() == IceType_v4f32 &&
2150 Inst->getSrc(0)->getType() == IceType_v4i32); 2161 Inst->getSrc(0)->getType() == IceType_v4i32);
2151 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2152 Variable *T = makeReg(Dest->getType()); 2163 Variable *T = makeReg(Dest->getType());
(...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after
2343 _movp(Dest, legalizeToVar(Src0)); 2354 _movp(Dest, legalizeToVar(Src0));
2344 } break; 2355 } break;
2345 } 2356 }
2346 break; 2357 break;
2347 } 2358 }
2348 } 2359 }
2349 } 2360 }
2350 2361
2351 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { 2362 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
2352 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2363 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2353 ConstantInteger *ElementIndex = 2364 ConstantInteger32 *ElementIndex =
2354 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1)); 2365 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
2355 // Only constant indices are allowed in PNaCl IR. 2366 // Only constant indices are allowed in PNaCl IR.
2356 assert(ElementIndex); 2367 assert(ElementIndex);
2357 2368
2358 unsigned Index = ElementIndex->getValue(); 2369 unsigned Index = ElementIndex->getValue();
2359 Type Ty = SourceVectNotLegalized->getType(); 2370 Type Ty = SourceVectNotLegalized->getType();
2360 Type ElementTy = typeElementType(Ty); 2371 Type ElementTy = typeElementType(Ty);
2361 Type InVectorElementTy = getInVectorElementType(Ty); 2372 Type InVectorElementTy = getInVectorElementType(Ty);
2362 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2373 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2363 2374
2364 // TODO(wala): Determine the best lowering sequences for each type. 2375 // TODO(wala): Determine the best lowering sequences for each type.
2365 bool CanUsePextr = 2376 bool CanUsePextr =
2366 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2377 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2367 if (CanUsePextr && Ty != IceType_v4f32) { 2378 if (CanUsePextr && Ty != IceType_v4f32) {
2368 // Use pextrb, pextrw, or pextrd. 2379 // Use pextrb, pextrw, or pextrd.
2369 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2380 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
2370 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2381 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2371 _pextr(ExtractedElementR, SourceVectR, Mask); 2382 _pextr(ExtractedElementR, SourceVectR, Mask);
2372 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2383 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2373 // Use pshufd and movd/movss. 2384 // Use pshufd and movd/movss.
2374 Variable *T = NULL; 2385 Variable *T = NULL;
2375 if (Index) { 2386 if (Index) {
2376 // The shuffle only needs to occur if the element to be extracted 2387 // The shuffle only needs to occur if the element to be extracted
2377 // is not at the lowest index. 2388 // is not at the lowest index.
2378 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2389 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);
2379 T = makeReg(Ty); 2390 T = makeReg(Ty);
2380 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); 2391 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2381 } else { 2392 } else {
2382 T = legalizeToVar(SourceVectNotLegalized); 2393 T = legalizeToVar(SourceVectNotLegalized);
2383 } 2394 }
2384 2395
2385 if (InVectorElementTy == IceType_i32) { 2396 if (InVectorElementTy == IceType_i32) {
2386 _movd(ExtractedElementR, T); 2397 _movd(ExtractedElementR, T);
2387 } else { // Ty == Icetype_f32 2398 } else { // Ty == Icetype_f32
2388 // TODO(wala): _movss is only used here because _mov does not 2399 // TODO(wala): _movss is only used here because _mov does not
(...skipping 118 matching lines...) Expand 10 before | Expand all | Expand 10 after
2507 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); 2518 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
2508 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); 2519 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
2509 if (HasC1) { 2520 if (HasC1) {
2510 Src0 = legalize(Src0); 2521 Src0 = legalize(Src0);
2511 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2522 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2512 Variable *T = NULL; 2523 Variable *T = NULL;
2513 _mov(T, Src0); 2524 _mov(T, Src0);
2514 _ucomiss(T, Src1RM); 2525 _ucomiss(T, Src1RM);
2515 } 2526 }
2516 Constant *Default = 2527 Constant *Default =
2517 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default); 2528 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
2518 _mov(Dest, Default); 2529 _mov(Dest, Default);
2519 if (HasC1) { 2530 if (HasC1) {
2520 InstX8632Label *Label = InstX8632Label::create(Func, this); 2531 InstX8632Label *Label = InstX8632Label::create(Func, this);
2521 _br(TableFcmp[Index].C1, Label); 2532 _br(TableFcmp[Index].C1, Label);
2522 if (HasC2) { 2533 if (HasC2) {
2523 _br(TableFcmp[Index].C2, Label); 2534 _br(TableFcmp[Index].C2, Label);
2524 } 2535 }
2525 Context.insert(InstFakeUse::create(Func, Dest)); 2536 Context.insert(InstFakeUse::create(Func, Dest));
2526 Constant *NonDefault = 2537 Constant *NonDefault =
2527 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default); 2538 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
2528 _mov(Dest, NonDefault); 2539 _mov(Dest, NonDefault);
2529 Context.insert(Label); 2540 Context.insert(Label);
2530 } 2541 }
2531 } 2542 }
2532 2543
2533 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2544 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2534 Operand *Src0 = legalize(Inst->getSrc(0)); 2545 Operand *Src0 = legalize(Inst->getSrc(0));
2535 Operand *Src1 = legalize(Inst->getSrc(1)); 2546 Operand *Src1 = legalize(Inst->getSrc(1));
2536 Variable *Dest = Inst->getDest(); 2547 Variable *Dest = Inst->getDest();
2537 2548
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
2658 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), 2669 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2659 NextBr->getTargetFalse()); 2670 NextBr->getTargetFalse());
2660 // Skip over the following branch instruction. 2671 // Skip over the following branch instruction.
2661 Context.advanceNext(); 2672 Context.advanceNext();
2662 return; 2673 return;
2663 } 2674 }
2664 } 2675 }
2665 2676
2666 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2677 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2667 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2678 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2668 Constant *One = Ctx->getConstantInt(IceType_i32, 1); 2679 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);
2669 if (Src0->getType() == IceType_i64) { 2680 if (Src0->getType() == IceType_i64) {
2670 InstIcmp::ICond Condition = Inst->getCondition(); 2681 InstIcmp::ICond Condition = Inst->getCondition();
2671 size_t Index = static_cast<size_t>(Condition); 2682 size_t Index = static_cast<size_t>(Condition);
2672 assert(Index < TableIcmp64Size); 2683 assert(Index < TableIcmp64Size);
2673 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2684 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2674 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2685 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2675 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2686 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2676 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2687 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2677 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { 2688 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2678 InstX8632Label *Label = InstX8632Label::create(Func, this); 2689 InstX8632Label *Label = InstX8632Label::create(Func, this);
(...skipping 30 matching lines...) Expand all
2709 _mov(Dest, One); 2720 _mov(Dest, One);
2710 _br(getIcmp32Mapping(Inst->getCondition()), Label); 2721 _br(getIcmp32Mapping(Inst->getCondition()), Label);
2711 Context.insert(InstFakeUse::create(Func, Dest)); 2722 Context.insert(InstFakeUse::create(Func, Dest));
2712 _mov(Dest, Zero); 2723 _mov(Dest, Zero);
2713 Context.insert(Label); 2724 Context.insert(Label);
2714 } 2725 }
2715 2726
2716 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { 2727 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
2717 Operand *SourceVectNotLegalized = Inst->getSrc(0); 2728 Operand *SourceVectNotLegalized = Inst->getSrc(0);
2718 Operand *ElementToInsertNotLegalized = Inst->getSrc(1); 2729 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
2719 ConstantInteger *ElementIndex = 2730 ConstantInteger32 *ElementIndex =
2720 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2)); 2731 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
2721 // Only constant indices are allowed in PNaCl IR. 2732 // Only constant indices are allowed in PNaCl IR.
2722 assert(ElementIndex); 2733 assert(ElementIndex);
2723 unsigned Index = ElementIndex->getValue(); 2734 unsigned Index = ElementIndex->getValue();
2724 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2735 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2725 2736
2726 Type Ty = SourceVectNotLegalized->getType(); 2737 Type Ty = SourceVectNotLegalized->getType();
2727 Type ElementTy = typeElementType(Ty); 2738 Type ElementTy = typeElementType(Ty);
2728 Type InVectorElementTy = getInVectorElementType(Ty); 2739 Type InVectorElementTy = getInVectorElementType(Ty);
2729 2740
2730 if (ElementTy == IceType_i1) { 2741 if (ElementTy == IceType_i1) {
2731 // Expand the element to the appropriate size for it to be inserted 2742 // Expand the element to the appropriate size for it to be inserted
2732 // in the vector. 2743 // in the vector.
2733 Variable *Expanded = 2744 Variable *Expanded =
2734 Func->makeVariable(InVectorElementTy, Context.getNode()); 2745 Func->makeVariable(InVectorElementTy, Context.getNode());
2735 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 2746 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2736 ElementToInsertNotLegalized); 2747 ElementToInsertNotLegalized);
2737 lowerCast(Cast); 2748 lowerCast(Cast);
2738 ElementToInsertNotLegalized = Expanded; 2749 ElementToInsertNotLegalized = Expanded;
2739 } 2750 }
2740 2751
2741 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 2752 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2742 // Use insertps, pinsrb, pinsrw, or pinsrd. 2753 // Use insertps, pinsrb, pinsrw, or pinsrd.
2743 Operand *ElementRM = 2754 Operand *ElementRM =
2744 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 2755 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2745 Operand *SourceVectRM = 2756 Operand *SourceVectRM =
2746 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2757 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2747 Variable *T = makeReg(Ty); 2758 Variable *T = makeReg(Ty);
2748 _movp(T, SourceVectRM); 2759 _movp(T, SourceVectRM);
2749 if (Ty == IceType_v4f32) 2760 if (Ty == IceType_v4f32)
2750 _insertps(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index << 4)); 2761 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));
2751 else 2762 else
2752 _pinsr(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index)); 2763 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));
2753 _movp(Inst->getDest(), T); 2764 _movp(Inst->getDest(), T);
2754 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2765 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2755 // Use shufps or movss. 2766 // Use shufps or movss.
2756 Variable *ElementR = NULL; 2767 Variable *ElementR = NULL;
2757 Operand *SourceVectRM = 2768 Operand *SourceVectRM =
2758 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2769 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2759 2770
2760 if (InVectorElementTy == IceType_f32) { 2771 if (InVectorElementTy == IceType_f32) {
2761 // ElementR will be in an XMM register since it is floating point. 2772 // ElementR will be in an XMM register since it is floating point.
2762 ElementR = legalizeToVar(ElementToInsertNotLegalized); 2773 ElementR = legalizeToVar(ElementToInsertNotLegalized);
(...skipping 30 matching lines...) Expand all
2793 // ElementR := ElementR[0, 0] T[0, 3] 2804 // ElementR := ElementR[0, 0] T[0, 3]
2794 // T := T[0, 1] ElementR[0, 3] 2805 // T := T[0, 1] ElementR[0, 3]
2795 // 2806 //
2796 // insertelement into index 3 (result is stored in T): 2807 // insertelement into index 3 (result is stored in T):
2797 // T := SourceVectRM 2808 // T := SourceVectRM
2798 // ElementR := ElementR[0, 0] T[0, 2] 2809 // ElementR := ElementR[0, 0] T[0, 2]
2799 // T := T[0, 1] ElementR[3, 0] 2810 // T := T[0, 1] ElementR[3, 0]
2800 const unsigned char Mask1[3] = {0, 192, 128}; 2811 const unsigned char Mask1[3] = {0, 192, 128};
2801 const unsigned char Mask2[3] = {227, 196, 52}; 2812 const unsigned char Mask2[3] = {227, 196, 52};
2802 2813
2803 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2814 Constant *Mask1Constant =
2804 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2815 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);
2816 Constant *Mask2Constant =
2817 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
2805 2818
2806 if (Index == 1) { 2819 if (Index == 1) {
2807 _shufps(ElementR, SourceVectRM, Mask1Constant); 2820 _shufps(ElementR, SourceVectRM, Mask1Constant);
2808 _shufps(ElementR, SourceVectRM, Mask2Constant); 2821 _shufps(ElementR, SourceVectRM, Mask2Constant);
2809 _movp(Inst->getDest(), ElementR); 2822 _movp(Inst->getDest(), ElementR);
2810 } else { 2823 } else {
2811 Variable *T = makeReg(Ty); 2824 Variable *T = makeReg(Ty);
2812 _movp(T, SourceVectRM); 2825 _movp(T, SourceVectRM);
2813 _shufps(ElementR, T, Mask1Constant); 2826 _shufps(ElementR, T, Mask1Constant);
2814 _shufps(T, ElementR, Mask2Constant); 2827 _shufps(T, ElementR, Mask2Constant);
(...skipping 19 matching lines...) Expand all
2834 Variable *T = makeReg(Ty); 2847 Variable *T = makeReg(Ty);
2835 _movp(T, Slot); 2848 _movp(T, Slot);
2836 _movp(Inst->getDest(), T); 2849 _movp(Inst->getDest(), T);
2837 } 2850 }
2838 } 2851 }
2839 2852
2840 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2853 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2841 switch (Instr->getIntrinsicInfo().ID) { 2854 switch (Instr->getIntrinsicInfo().ID) {
2842 case Intrinsics::AtomicCmpxchg: { 2855 case Intrinsics::AtomicCmpxchg: {
2843 if (!Intrinsics::VerifyMemoryOrder( 2856 if (!Intrinsics::VerifyMemoryOrder(
2844 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2857 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
2845 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg"); 2858 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");
2846 return; 2859 return;
2847 } 2860 }
2848 if (!Intrinsics::VerifyMemoryOrder( 2861 if (!Intrinsics::VerifyMemoryOrder(
2849 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) { 2862 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {
2850 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg"); 2863 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");
2851 return; 2864 return;
2852 } 2865 }
2853 Variable *DestPrev = Instr->getDest(); 2866 Variable *DestPrev = Instr->getDest();
2854 Operand *PtrToMem = Instr->getArg(0); 2867 Operand *PtrToMem = Instr->getArg(0);
2855 Operand *Expected = Instr->getArg(1); 2868 Operand *Expected = Instr->getArg(1);
2856 Operand *Desired = Instr->getArg(2); 2869 Operand *Desired = Instr->getArg(2);
2857 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired)) 2870 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
2858 return; 2871 return;
2859 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired); 2872 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
2860 return; 2873 return;
2861 } 2874 }
2862 case Intrinsics::AtomicFence: 2875 case Intrinsics::AtomicFence:
2863 if (!Intrinsics::VerifyMemoryOrder( 2876 if (!Intrinsics::VerifyMemoryOrder(
2864 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) { 2877 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {
2865 Func->setError("Unexpected memory ordering for AtomicFence"); 2878 Func->setError("Unexpected memory ordering for AtomicFence");
2866 return; 2879 return;
2867 } 2880 }
2868 _mfence(); 2881 _mfence();
2869 return; 2882 return;
2870 case Intrinsics::AtomicFenceAll: 2883 case Intrinsics::AtomicFenceAll:
2871 // NOTE: FenceAll should prevent and load/store from being moved 2884 // NOTE: FenceAll should prevent and load/store from being moved
2872 // across the fence (both atomic and non-atomic). The InstX8632Mfence 2885 // across the fence (both atomic and non-atomic). The InstX8632Mfence
2873 // instruction is currently marked coarsely as "HasSideEffects". 2886 // instruction is currently marked coarsely as "HasSideEffects".
2874 _mfence(); 2887 _mfence();
2875 return; 2888 return;
2876 case Intrinsics::AtomicIsLockFree: { 2889 case Intrinsics::AtomicIsLockFree: {
2877 // X86 is always lock free for 8/16/32/64 bit accesses. 2890 // X86 is always lock free for 8/16/32/64 bit accesses.
2878 // TODO(jvoung): Since the result is constant when given a constant 2891 // TODO(jvoung): Since the result is constant when given a constant
2879 // byte size, this opens up DCE opportunities. 2892 // byte size, this opens up DCE opportunities.
2880 Operand *ByteSize = Instr->getArg(0); 2893 Operand *ByteSize = Instr->getArg(0);
2881 Variable *Dest = Instr->getDest(); 2894 Variable *Dest = Instr->getDest();
2882 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) { 2895 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
2883 Constant *Result; 2896 Constant *Result;
2884 switch (CI->getValue()) { 2897 switch (CI->getValue()) {
2885 default: 2898 default:
2886 // Some x86-64 processors support the cmpxchg16b intruction, which 2899 // Some x86-64 processors support the cmpxchg16b intruction, which
2887 // can make 16-byte operations lock free (when used with the LOCK 2900 // can make 16-byte operations lock free (when used with the LOCK
2888 // prefix). However, that's not supported in 32-bit mode, so just 2901 // prefix). However, that's not supported in 32-bit mode, so just
2889 // return 0 even for large sizes. 2902 // return 0 even for large sizes.
2890 Result = Ctx->getConstantZero(IceType_i32); 2903 Result = Ctx->getConstantZero(IceType_i32);
2891 break; 2904 break;
2892 case 1: 2905 case 1:
2893 case 2: 2906 case 2:
2894 case 4: 2907 case 4:
2895 case 8: 2908 case 8:
2896 Result = Ctx->getConstantInt(IceType_i32, 1); 2909 Result = Ctx->getConstantInt32(IceType_i32, 1);
2897 break; 2910 break;
2898 } 2911 }
2899 _mov(Dest, Result); 2912 _mov(Dest, Result);
2900 return; 2913 return;
2901 } 2914 }
2902 // The PNaCl ABI requires the byte size to be a compile-time constant. 2915 // The PNaCl ABI requires the byte size to be a compile-time constant.
2903 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 2916 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2904 return; 2917 return;
2905 } 2918 }
2906 case Intrinsics::AtomicLoad: { 2919 case Intrinsics::AtomicLoad: {
2907 // We require the memory address to be naturally aligned. 2920 // We require the memory address to be naturally aligned.
2908 // Given that is the case, then normal loads are atomic. 2921 // Given that is the case, then normal loads are atomic.
2909 if (!Intrinsics::VerifyMemoryOrder( 2922 if (!Intrinsics::VerifyMemoryOrder(
2910 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) { 2923 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {
2911 Func->setError("Unexpected memory ordering for AtomicLoad"); 2924 Func->setError("Unexpected memory ordering for AtomicLoad");
2912 return; 2925 return;
2913 } 2926 }
2914 Variable *Dest = Instr->getDest(); 2927 Variable *Dest = Instr->getDest();
2915 if (Dest->getType() == IceType_i64) { 2928 if (Dest->getType() == IceType_i64) {
2916 // Follow what GCC does and use a movq instead of what lowerLoad() 2929 // Follow what GCC does and use a movq instead of what lowerLoad()
2917 // normally does (split the load into two). 2930 // normally does (split the load into two).
2918 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2931 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
2919 // can't happen anyway, since this is x86-32 and integer arithmetic only 2932 // can't happen anyway, since this is x86-32 and integer arithmetic only
2920 // happens on 32-bit quantities. 2933 // happens on 32-bit quantities.
(...skipping 12 matching lines...) Expand all
2933 lowerLoad(Load); 2946 lowerLoad(Load);
2934 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 2947 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2935 // Since lowerLoad may fuse the load w/ an arithmetic instruction, 2948 // Since lowerLoad may fuse the load w/ an arithmetic instruction,
2936 // insert the FakeUse on the last-inserted instruction's dest. 2949 // insert the FakeUse on the last-inserted instruction's dest.
2937 Context.insert( 2950 Context.insert(
2938 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 2951 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2939 return; 2952 return;
2940 } 2953 }
2941 case Intrinsics::AtomicRMW: 2954 case Intrinsics::AtomicRMW:
2942 if (!Intrinsics::VerifyMemoryOrder( 2955 if (!Intrinsics::VerifyMemoryOrder(
2943 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) { 2956 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {
2944 Func->setError("Unexpected memory ordering for AtomicRMW"); 2957 Func->setError("Unexpected memory ordering for AtomicRMW");
2945 return; 2958 return;
2946 } 2959 }
2947 lowerAtomicRMW(Instr->getDest(), 2960 lowerAtomicRMW(Instr->getDest(),
2948 static_cast<uint32_t>(llvm::cast<ConstantInteger>( 2961 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(
2949 Instr->getArg(0))->getValue()), 2962 Instr->getArg(0))->getValue()),
2950 Instr->getArg(1), Instr->getArg(2)); 2963 Instr->getArg(1), Instr->getArg(2));
2951 return; 2964 return;
2952 case Intrinsics::AtomicStore: { 2965 case Intrinsics::AtomicStore: {
2953 if (!Intrinsics::VerifyMemoryOrder( 2966 if (!Intrinsics::VerifyMemoryOrder(
2954 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) { 2967 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {
2955 Func->setError("Unexpected memory ordering for AtomicStore"); 2968 Func->setError("Unexpected memory ordering for AtomicStore");
2956 return; 2969 return;
2957 } 2970 }
2958 // We require the memory address to be naturally aligned. 2971 // We require the memory address to be naturally aligned.
2959 // Given that is the case, then normal stores are atomic. 2972 // Given that is the case, then normal stores are atomic.
2960 // Add a fence after the store to make it visible. 2973 // Add a fence after the store to make it visible.
2961 Operand *Value = Instr->getArg(0); 2974 Operand *Value = Instr->getArg(0);
2962 Operand *Ptr = Instr->getArg(1); 2975 Operand *Ptr = Instr->getArg(1);
2963 if (Value->getType() == IceType_i64) { 2976 if (Value->getType() == IceType_i64) {
2964 // Use a movq instead of what lowerStore() normally does 2977 // Use a movq instead of what lowerStore() normally does
(...skipping 27 matching lines...) Expand all
2992 _bswap(T_Hi); 3005 _bswap(T_Hi);
2993 _mov(DestLo, T_Hi); 3006 _mov(DestLo, T_Hi);
2994 _mov(DestHi, T_Lo); 3007 _mov(DestHi, T_Lo);
2995 } else if (Val->getType() == IceType_i32) { 3008 } else if (Val->getType() == IceType_i32) {
2996 Variable *T = legalizeToVar(Val); 3009 Variable *T = legalizeToVar(Val);
2997 _bswap(T); 3010 _bswap(T);
2998 _mov(Dest, T); 3011 _mov(Dest, T);
2999 } else { 3012 } else {
3000 assert(Val->getType() == IceType_i16); 3013 assert(Val->getType() == IceType_i16);
3001 Val = legalize(Val); 3014 Val = legalize(Val);
3002 Constant *Eight = Ctx->getConstantInt(IceType_i16, 8); 3015 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);
3003 Variable *T = NULL; 3016 Variable *T = NULL;
3004 _mov(T, Val); 3017 _mov(T, Val);
3005 _rol(T, Eight); 3018 _rol(T, Eight);
3006 _mov(Dest, T); 3019 _mov(Dest, T);
3007 } 3020 }
3008 return; 3021 return;
3009 } 3022 }
3010 case Intrinsics::Ctpop: { 3023 case Intrinsics::Ctpop: {
3011 Variable *Dest = Instr->getDest(); 3024 Variable *Dest = Instr->getDest();
3012 Operand *Val = Instr->getArg(0); 3025 Operand *Val = Instr->getArg(0);
(...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after
3475 // bit position conversion, and the speculation is reversed. 3488 // bit position conversion, and the speculation is reversed.
3476 assert(Ty == IceType_i32 || Ty == IceType_i64); 3489 assert(Ty == IceType_i32 || Ty == IceType_i64);
3477 Variable *T = makeReg(IceType_i32); 3490 Variable *T = makeReg(IceType_i32);
3478 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3491 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3479 if (Cttz) { 3492 if (Cttz) {
3480 _bsf(T, FirstValRM); 3493 _bsf(T, FirstValRM);
3481 } else { 3494 } else {
3482 _bsr(T, FirstValRM); 3495 _bsr(T, FirstValRM);
3483 } 3496 }
3484 Variable *T_Dest = makeReg(IceType_i32); 3497 Variable *T_Dest = makeReg(IceType_i32);
3485 Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32); 3498 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);
3486 Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31); 3499 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);
3487 if (Cttz) { 3500 if (Cttz) {
3488 _mov(T_Dest, ThirtyTwo); 3501 _mov(T_Dest, ThirtyTwo);
3489 } else { 3502 } else {
3490 Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63); 3503 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);
3491 _mov(T_Dest, SixtyThree); 3504 _mov(T_Dest, SixtyThree);
3492 } 3505 }
3493 _cmov(T_Dest, T, InstX8632::Br_ne); 3506 _cmov(T_Dest, T, InstX8632::Br_ne);
3494 if (!Cttz) { 3507 if (!Cttz) {
3495 _xor(T_Dest, ThirtyOne); 3508 _xor(T_Dest, ThirtyOne);
3496 } 3509 }
3497 if (Ty == IceType_i32) { 3510 if (Ty == IceType_i32) {
3498 _mov(Dest, T_Dest); 3511 _mov(Dest, T_Dest);
3499 return; 3512 return;
3500 } 3513 }
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
3609 if (Index == NULL) 3622 if (Index == NULL)
3610 return false; 3623 return false;
3611 const Inst *IndexInst = Index->getDefinition(); 3624 const Inst *IndexInst = Index->getDefinition();
3612 if (IndexInst == NULL) 3625 if (IndexInst == NULL)
3613 return false; 3626 return false;
3614 if (IndexInst->getSrcSize() < 2) 3627 if (IndexInst->getSrcSize() < 2)
3615 return false; 3628 return false;
3616 if (const InstArithmetic *ArithInst = 3629 if (const InstArithmetic *ArithInst =
3617 llvm::dyn_cast<InstArithmetic>(IndexInst)) { 3630 llvm::dyn_cast<InstArithmetic>(IndexInst)) {
3618 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { 3631 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3619 if (ConstantInteger *Const = 3632 if (ConstantInteger32 *Const =
3620 llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1))) { 3633 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
3621 if (ArithInst->getOp() == InstArithmetic::Mul && 3634 if (ArithInst->getOp() == InstArithmetic::Mul &&
3622 !Var->getIsMultidef() && Const->getType() == IceType_i32) { 3635 !Var->getIsMultidef() && Const->getType() == IceType_i32) {
3623 uint64_t Mult = Const->getValue(); 3636 uint64_t Mult = Const->getValue();
3624 uint32_t LogMult; 3637 uint32_t LogMult;
3625 switch (Mult) { 3638 switch (Mult) {
3626 case 1: 3639 case 1:
3627 LogMult = 0; 3640 LogMult = 0;
3628 break; 3641 break;
3629 case 2: 3642 case 2:
3630 LogMult = 1; 3643 LogMult = 1;
(...skipping 30 matching lines...) Expand all
3661 const Inst *BaseInst = Base->getDefinition(); 3674 const Inst *BaseInst = Base->getDefinition();
3662 if (BaseInst == NULL) 3675 if (BaseInst == NULL)
3663 return false; 3676 return false;
3664 if (const InstArithmetic *ArithInst = 3677 if (const InstArithmetic *ArithInst =
3665 llvm::dyn_cast<const InstArithmetic>(BaseInst)) { 3678 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
3666 if (ArithInst->getOp() != InstArithmetic::Add && 3679 if (ArithInst->getOp() != InstArithmetic::Add &&
3667 ArithInst->getOp() != InstArithmetic::Sub) 3680 ArithInst->getOp() != InstArithmetic::Sub)
3668 return false; 3681 return false;
3669 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add; 3682 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
3670 Variable *Var = NULL; 3683 Variable *Var = NULL;
3671 ConstantInteger *Const = NULL; 3684 ConstantInteger32 *Const = NULL;
3672 if (Variable *VariableOperand = 3685 if (Variable *VariableOperand =
3673 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) { 3686 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
3674 Var = VariableOperand; 3687 Var = VariableOperand;
3675 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1)); 3688 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
3676 } else if (IsAdd) { 3689 } else if (IsAdd) {
3677 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(0)); 3690 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
3678 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1)); 3691 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
3679 } 3692 }
3680 if (Var == NULL || Const == NULL || Var->getIsMultidef()) 3693 if (Var == NULL || Const == NULL || Var->getIsMultidef())
3681 return false; 3694 return false;
3682 Base = Var; 3695 Base = Var;
3683 Offset += IsAdd ? Const->getValue() : -Const->getValue(); 3696 Offset += IsAdd ? Const->getValue() : -Const->getValue();
3684 Reason = BaseInst; 3697 Reason = BaseInst;
3685 return true; 3698 return true;
3686 } 3699 }
3687 return false; 3700 return false;
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
3805 // Vanilla ICE load instructions should not use the segment registers, 3818 // Vanilla ICE load instructions should not use the segment registers,
3806 // and computeAddressOpt only works at the level of Variables and Constants, 3819 // and computeAddressOpt only works at the level of Variables and Constants,
3807 // not other OperandX8632Mem, so there should be no mention of segment 3820 // not other OperandX8632Mem, so there should be no mention of segment
3808 // registers there either. 3821 // registers there either.
3809 const OperandX8632Mem::SegmentRegisters SegmentReg = 3822 const OperandX8632Mem::SegmentRegisters SegmentReg =
3810 OperandX8632Mem::DefaultSegment; 3823 OperandX8632Mem::DefaultSegment;
3811 Variable *Base = llvm::dyn_cast<Variable>(Addr); 3824 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3812 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 3825 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3813 if (Base && Addr != Base) { 3826 if (Base && Addr != Base) {
3814 Inst->setDeleted(); 3827 Inst->setDeleted();
3815 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); 3828 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
3816 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 3829 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
3817 Shift, SegmentReg); 3830 Shift, SegmentReg);
3818 Context.insert(InstLoad::create(Func, Dest, Addr)); 3831 Context.insert(InstLoad::create(Func, Dest, Addr));
3819 } 3832 }
3820 } 3833 }
3821 3834
3822 void TargetX8632::randomlyInsertNop(float Probability) { 3835 void TargetX8632::randomlyInsertNop(float Probability) {
3823 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 3836 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3824 if (RNG.getTrueWithProbability(Probability)) { 3837 if (RNG.getTrueWithProbability(Probability)) {
3825 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); 3838 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
3872 if (InstructionSet >= SSE4_1) { 3885 if (InstructionSet >= SSE4_1) {
3873 // TODO(wala): If the condition operand is a constant, use blendps 3886 // TODO(wala): If the condition operand is a constant, use blendps
3874 // or pblendw. 3887 // or pblendw.
3875 // 3888 //
3876 // Use blendvps or pblendvb to implement select. 3889 // Use blendvps or pblendvb to implement select.
3877 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3890 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3878 SrcTy == IceType_v4f32) { 3891 SrcTy == IceType_v4f32) {
3879 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3892 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3880 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3893 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3881 _movp(xmm0, ConditionRM); 3894 _movp(xmm0, ConditionRM);
3882 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3895 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));
3883 _movp(T, SrcFRM); 3896 _movp(T, SrcFRM);
3884 _blendvps(T, SrcTRM, xmm0); 3897 _blendvps(T, SrcTRM, xmm0);
3885 _movp(Dest, T); 3898 _movp(Dest, T);
3886 } else { 3899 } else {
3887 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3900 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3888 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3901 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3889 : IceType_v16i8; 3902 : IceType_v16i8;
3890 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3903 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3891 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3904 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3892 _movp(T, SrcFRM); 3905 _movp(T, SrcFRM);
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
3987 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4000 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3988 // Vanilla ICE store instructions should not use the segment registers, 4001 // Vanilla ICE store instructions should not use the segment registers,
3989 // and computeAddressOpt only works at the level of Variables and Constants, 4002 // and computeAddressOpt only works at the level of Variables and Constants,
3990 // not other OperandX8632Mem, so there should be no mention of segment 4003 // not other OperandX8632Mem, so there should be no mention of segment
3991 // registers there either. 4004 // registers there either.
3992 const OperandX8632Mem::SegmentRegisters SegmentReg = 4005 const OperandX8632Mem::SegmentRegisters SegmentReg =
3993 OperandX8632Mem::DefaultSegment; 4006 OperandX8632Mem::DefaultSegment;
3994 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4007 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3995 if (Base && Addr != Base) { 4008 if (Base && Addr != Base) {
3996 Inst->setDeleted(); 4009 Inst->setDeleted();
3997 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); 4010 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);
3998 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4011 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
3999 Shift, SegmentReg); 4012 Shift, SegmentReg);
4000 Context.insert(InstStore::create(Func, Data, Addr)); 4013 Context.insert(InstStore::create(Func, Data, Addr));
4001 } 4014 }
4002 } 4015 }
4003 4016
4004 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4017 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4005 // This implements the most naive possible lowering. 4018 // This implements the most naive possible lowering.
4006 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4019 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4007 Operand *Src0 = Inst->getComparison(); 4020 Operand *Src0 = Inst->getComparison();
4008 SizeT NumCases = Inst->getNumCases(); 4021 SizeT NumCases = Inst->getNumCases();
4009 // OK, we'll be slightly less naive by forcing Src into a physical 4022 // OK, we'll be slightly less naive by forcing Src into a physical
4010 // register if there are 2 or more uses. 4023 // register if there are 2 or more uses.
4011 if (NumCases >= 2) 4024 if (NumCases >= 2)
4012 Src0 = legalizeToVar(Src0, true); 4025 Src0 = legalizeToVar(Src0, true);
4013 else 4026 else
4014 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true); 4027 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true);
4015 for (SizeT I = 0; I < NumCases; ++I) { 4028 for (SizeT I = 0; I < NumCases; ++I) {
4016 // TODO(stichnot): Correct lowering for IceType_i64. 4029 // TODO(stichnot): Correct lowering for IceType_i64.
4017 Constant *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); 4030 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
4018 _cmp(Src0, Value); 4031 _cmp(Src0, Value);
4019 _br(InstX8632Br::Br_e, Inst->getLabel(I)); 4032 _br(InstX8632Br::Br_e, Inst->getLabel(I));
4020 } 4033 }
4021 4034
4022 _br(Inst->getLabelDefault()); 4035 _br(Inst->getLabelDefault());
4023 } 4036 }
4024 4037
4025 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4038 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4026 Variable *Dest, Operand *Src0, 4039 Variable *Dest, Operand *Src0,
4027 Operand *Src1) { 4040 Operand *Src1) {
4028 assert(isVectorType(Dest->getType())); 4041 assert(isVectorType(Dest->getType()));
4029 Type Ty = Dest->getType(); 4042 Type Ty = Dest->getType();
4030 Type ElementTy = typeElementType(Ty); 4043 Type ElementTy = typeElementType(Ty);
4031 SizeT NumElements = typeNumElements(Ty); 4044 SizeT NumElements = typeNumElements(Ty);
4032 4045
4033 Operand *T = Ctx->getConstantUndef(Ty); 4046 Operand *T = Ctx->getConstantUndef(Ty);
4034 for (SizeT I = 0; I < NumElements; ++I) { 4047 for (SizeT I = 0; I < NumElements; ++I) {
4035 Constant *Index = Ctx->getConstantInt(IceType_i32, I); 4048 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);
4036 4049
4037 // Extract the next two inputs. 4050 // Extract the next two inputs.
4038 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode()); 4051 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());
4039 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 4052 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
4040 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode()); 4053 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());
4041 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4054 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4042 4055
4043 // Perform the arithmetic as a scalar operation. 4056 // Perform the arithmetic as a scalar operation.
4044 Variable *Res = Func->makeVariable(ElementTy, Context.getNode()); 4057 Variable *Res = Func->makeVariable(ElementTy, Context.getNode());
4045 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4058 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
4114 _psub(Dest, MinusOne); 4127 _psub(Dest, MinusOne);
4115 return Dest; 4128 return Dest;
4116 } 4129 }
4117 4130
4118 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4131 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4119 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4132 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4120 Ty == IceType_v16i8); 4133 Ty == IceType_v16i8);
4121 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4134 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4122 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4135 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4123 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4136 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
4124 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift)); 4137 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));
4125 return Reg; 4138 return Reg;
4126 } else { 4139 } else {
4127 // SSE has no left shift operation for vectors of 8 bit integers. 4140 // SSE has no left shift operation for vectors of 8 bit integers.
4128 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4141 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4129 Constant *ConstantMask = 4142 Constant *ConstantMask =
4130 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK); 4143 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
4131 Variable *Reg = makeReg(Ty, RegNum); 4144 Variable *Reg = makeReg(Ty, RegNum);
4132 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4145 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4133 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4146 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4134 return Reg; 4147 return Reg;
4135 } 4148 }
4136 } 4149 }
4137 4150
4138 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4151 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4139 Variable *Slot, 4152 Variable *Slot,
4140 uint32_t Offset) { 4153 uint32_t Offset) {
4141 // Ensure that Loc is a stack slot. 4154 // Ensure that Loc is a stack slot.
4142 assert(Slot->getWeight() == RegWeight::Zero); 4155 assert(Slot->getWeight() == RegWeight::Zero);
4143 assert(Slot->getRegNum() == Variable::NoRegister); 4156 assert(Slot->getRegNum() == Variable::NoRegister);
4144 // Compute the location of Loc in memory. 4157 // Compute the location of Loc in memory.
4145 // TODO(wala,stichnot): lea should not be required. The address of 4158 // TODO(wala,stichnot): lea should not be required. The address of
4146 // the stack slot is known at compile time (although not until after 4159 // the stack slot is known at compile time (although not until after
4147 // addProlog()). 4160 // addProlog()).
4148 const Type PointerType = IceType_i32; 4161 const Type PointerType = IceType_i32;
4149 Variable *Loc = makeReg(PointerType); 4162 Variable *Loc = makeReg(PointerType);
4150 _lea(Loc, Slot); 4163 _lea(Loc, Slot);
4151 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset); 4164 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);
4152 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4165 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4153 } 4166 }
4154 4167
4155 // Helper for legalize() to emit the right code to lower an operand to a 4168 // Helper for legalize() to emit the right code to lower an operand to a
4156 // register of the appropriate type. 4169 // register of the appropriate type.
4157 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 4170 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4158 Type Ty = Src->getType(); 4171 Type Ty = Src->getType();
4159 Variable *Reg = makeReg(Ty, RegNum); 4172 Variable *Reg = makeReg(Ty, RegNum);
4160 if (isVectorType(Ty)) { 4173 if (isVectorType(Ty)) {
4161 _movp(Reg, Src); 4174 _movp(Reg, Src);
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
4271 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) { 4284 OperandX8632Mem *TargetX8632::FormMemoryOperand(Operand *Operand, Type Ty) {
4272 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand); 4285 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);
4273 // It may be the case that address mode optimization already creates 4286 // It may be the case that address mode optimization already creates
4274 // an OperandX8632Mem, so in that case it wouldn't need another level 4287 // an OperandX8632Mem, so in that case it wouldn't need another level
4275 // of transformation. 4288 // of transformation.
4276 if (!Mem) { 4289 if (!Mem) {
4277 Variable *Base = llvm::dyn_cast<Variable>(Operand); 4290 Variable *Base = llvm::dyn_cast<Variable>(Operand);
4278 Constant *Offset = llvm::dyn_cast<Constant>(Operand); 4291 Constant *Offset = llvm::dyn_cast<Constant>(Operand);
4279 assert(Base || Offset); 4292 assert(Base || Offset);
4280 if (Offset) { 4293 if (Offset) {
4281 assert(llvm::isa<ConstantInteger>(Offset) || 4294 assert(llvm::isa<ConstantInteger32>(Offset) ||
4282 llvm::isa<ConstantRelocatable>(Offset)); 4295 llvm::isa<ConstantRelocatable>(Offset));
4283 } 4296 }
4284 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 4297 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
4285 } 4298 }
4286 return llvm::cast<OperandX8632Mem>(legalize(Mem)); 4299 return llvm::cast<OperandX8632Mem>(legalize(Mem));
4287 } 4300 }
4288 4301
4289 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { 4302 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
4290 // There aren't any 64-bit integer registers for x86-32. 4303 // There aren't any 64-bit integer registers for x86-32.
4291 assert(Type != IceType_i64); 4304 assert(Type != IceType_i64);
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
4395 if (WhiteList[RegNum]) 4408 if (WhiteList[RegNum])
4396 FreedRegisters[RegNum] = true; 4409 FreedRegisters[RegNum] = true;
4397 } 4410 }
4398 } 4411 }
4399 } 4412 }
4400 } 4413 }
4401 AvailableRegisters |= FreedRegisters; 4414 AvailableRegisters |= FreedRegisters;
4402 } 4415 }
4403 } 4416 }
4404 4417
4405 template <> void ConstantInteger::emit(GlobalContext *Ctx) const { 4418 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
4406 Ostream &Str = Ctx->getStrEmit(); 4419 Ostream &Str = Ctx->getStrEmit();
4407 Str << (int64_t) getValue(); 4420 Str << (int32_t)getValue();
4421 }
4422
4423 template <> void ConstantInteger64::emit(GlobalContext *) const {
4424 llvm_unreachable("Not expecting to emit 64-bit integers");
4408 } 4425 }
4409 4426
4410 template <> void ConstantFloat::emit(GlobalContext *Ctx) const { 4427 template <> void ConstantFloat::emit(GlobalContext *Ctx) const {
4411 Ostream &Str = Ctx->getStrEmit(); 4428 Ostream &Str = Ctx->getStrEmit();
4412 // It would be better to prefix with ".L$" instead of "L$", but 4429 // It would be better to prefix with ".L$" instead of "L$", but
4413 // llvm-mc doesn't parse "dword ptr [.L$foo]". 4430 // llvm-mc doesn't parse "dword ptr [.L$foo]".
4414 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; 4431 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";
4415 } 4432 }
4416 4433
4417 template <> void ConstantDouble::emit(GlobalContext *Ctx) const { 4434 template <> void ConstantDouble::emit(GlobalContext *Ctx) const {
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
4513 Str << "\t.align\t" << Align << "\n"; 4530 Str << "\t.align\t" << Align << "\n";
4514 Str << MangledName << ":\n"; 4531 Str << MangledName << ":\n";
4515 for (SizeT i = 0; i < Size; ++i) { 4532 for (SizeT i = 0; i < Size; ++i) {
4516 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4533 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4517 } 4534 }
4518 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4535 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4519 } 4536 }
4520 } 4537 }
4521 4538
4522 } // end of namespace Ice 4539 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceUtils.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698