OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 600 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
611 return; | 611 return; |
612 } | 612 } |
613 if (isVectorType(Ty)) { | 613 if (isVectorType(Ty)) { |
614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); | 614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); |
615 } | 615 } |
616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
618 if (Arg->hasReg()) { | 618 if (Arg->hasReg()) { |
619 assert(Ty != IceType_i64); | 619 assert(Ty != IceType_i64); |
620 OperandX8632Mem *Mem = OperandX8632Mem::create( | 620 OperandX8632Mem *Mem = OperandX8632Mem::create( |
621 Func, Ty, FramePtr, | 621 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); |
622 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset())); | |
623 if (isVectorType(Arg->getType())) { | 622 if (isVectorType(Arg->getType())) { |
624 _movp(Arg, Mem); | 623 _movp(Arg, Mem); |
625 } else { | 624 } else { |
626 _mov(Arg, Mem); | 625 _mov(Arg, Mem); |
627 } | 626 } |
628 // This argument-copying instruction uses an explicit | 627 // This argument-copying instruction uses an explicit |
629 // OperandX8632Mem operand instead of a Variable, so its | 628 // OperandX8632Mem operand instead of a Variable, so its |
630 // fill-from-stack operation has to be tracked separately for | 629 // fill-from-stack operation has to be tracked separately for |
631 // statistics. | 630 // statistics. |
632 Ctx->statsUpdateFills(); | 631 Ctx->statsUpdateFills(); |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
832 // Align esp if necessary. | 831 // Align esp if necessary. |
833 if (NeedsStackAlignment) { | 832 if (NeedsStackAlignment) { |
834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 833 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 834 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
836 SpillAreaSizeBytes = StackSize - StackOffset; | 835 SpillAreaSizeBytes = StackSize - StackOffset; |
837 } | 836 } |
838 | 837 |
839 // Generate "sub esp, SpillAreaSizeBytes" | 838 // Generate "sub esp, SpillAreaSizeBytes" |
840 if (SpillAreaSizeBytes) | 839 if (SpillAreaSizeBytes) |
841 _sub(getPhysicalRegister(RegX8632::Reg_esp), | 840 _sub(getPhysicalRegister(RegX8632::Reg_esp), |
842 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); | 841 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 842 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
844 | 843 |
845 resetStackAdjustment(); | 844 resetStackAdjustment(); |
846 | 845 |
847 // Fill in stack offsets for stack args, and copy args into registers | 846 // Fill in stack offsets for stack args, and copy args into registers |
848 // for those that were register-allocated. Args are pushed right to | 847 // for those that were register-allocated. Args are pushed right to |
849 // left, so Arg[0] is closest to the stack/frame pointer. | 848 // left, so Arg[0] is closest to the stack/frame pointer. |
850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 849 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 850 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
852 if (!IsEbpBasedFrame) | 851 if (!IsEbpBasedFrame) |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
944 Context.setInsertPoint(InsertPoint); | 943 Context.setInsertPoint(InsertPoint); |
945 | 944 |
946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); | 945 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
947 if (IsEbpBasedFrame) { | 946 if (IsEbpBasedFrame) { |
948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); | 947 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); |
949 _mov(esp, ebp); | 948 _mov(esp, ebp); |
950 _pop(ebp); | 949 _pop(ebp); |
951 } else { | 950 } else { |
952 // add esp, SpillAreaSizeBytes | 951 // add esp, SpillAreaSizeBytes |
953 if (SpillAreaSizeBytes) | 952 if (SpillAreaSizeBytes) |
954 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); | 953 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); |
955 } | 954 } |
956 | 955 |
957 // Add pop instructions for preserved registers. | 956 // Add pop instructions for preserved registers. |
958 llvm::SmallBitVector CalleeSaves = | 957 llvm::SmallBitVector CalleeSaves = |
959 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 958 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 959 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
961 SizeT j = CalleeSaves.size() - i - 1; | 960 SizeT j = CalleeSaves.size() - i - 1; |
962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) | 961 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) |
963 continue; | 962 continue; |
964 if (CalleeSaves[j] && RegsUsed[j]) { | 963 if (CalleeSaves[j] && RegsUsed[j]) { |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1062 | 1061 |
1063 Operand *TargetX8632::loOperand(Operand *Operand) { | 1062 Operand *TargetX8632::loOperand(Operand *Operand) { |
1064 assert(Operand->getType() == IceType_i64); | 1063 assert(Operand->getType() == IceType_i64); |
1065 if (Operand->getType() != IceType_i64) | 1064 if (Operand->getType() != IceType_i64) |
1066 return Operand; | 1065 return Operand; |
1067 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1066 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
1068 split64(Var); | 1067 split64(Var); |
1069 return Var->getLo(); | 1068 return Var->getLo(); |
1070 } | 1069 } |
1071 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1070 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
1072 return Ctx->getConstantInt32(IceType_i32, | 1071 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue())); |
1073 static_cast<uint32_t>(Const->getValue())); | |
1074 } | 1072 } |
1075 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | 1073 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { |
1076 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), | 1074 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), |
1077 Mem->getOffset(), Mem->getIndex(), | 1075 Mem->getOffset(), Mem->getIndex(), |
1078 Mem->getShift(), Mem->getSegmentRegister()); | 1076 Mem->getShift(), Mem->getSegmentRegister()); |
1079 } | 1077 } |
1080 llvm_unreachable("Unsupported operand type"); | 1078 llvm_unreachable("Unsupported operand type"); |
1081 return NULL; | 1079 return NULL; |
1082 } | 1080 } |
1083 | 1081 |
1084 Operand *TargetX8632::hiOperand(Operand *Operand) { | 1082 Operand *TargetX8632::hiOperand(Operand *Operand) { |
1085 assert(Operand->getType() == IceType_i64); | 1083 assert(Operand->getType() == IceType_i64); |
1086 if (Operand->getType() != IceType_i64) | 1084 if (Operand->getType() != IceType_i64) |
1087 return Operand; | 1085 return Operand; |
1088 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | 1086 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { |
1089 split64(Var); | 1087 split64(Var); |
1090 return Var->getHi(); | 1088 return Var->getHi(); |
1091 } | 1089 } |
1092 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1090 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
1093 return Ctx->getConstantInt32( | 1091 return Ctx->getConstantInt32( |
1094 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32)); | 1092 static_cast<uint32_t>(Const->getValue() >> 32)); |
1095 } | 1093 } |
1096 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | 1094 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { |
1097 Constant *Offset = Mem->getOffset(); | 1095 Constant *Offset = Mem->getOffset(); |
1098 if (Offset == NULL) | 1096 if (Offset == NULL) { |
1099 Offset = Ctx->getConstantInt32(IceType_i32, 4); | 1097 Offset = Ctx->getConstantInt32(4); |
1100 else if (ConstantInteger32 *IntOffset = | 1098 } else if (ConstantInteger32 *IntOffset = |
1101 llvm::dyn_cast<ConstantInteger32>(Offset)) { | 1099 llvm::dyn_cast<ConstantInteger32>(Offset)) { |
1102 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue()); | 1100 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); |
1103 } else if (ConstantRelocatable *SymOffset = | 1101 } else if (ConstantRelocatable *SymOffset = |
1104 llvm::dyn_cast<ConstantRelocatable>(Offset)) { | 1102 llvm::dyn_cast<ConstantRelocatable>(Offset)) { |
1105 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); | 1103 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); |
1106 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), | 1104 Offset = |
1107 SymOffset->getName()); | 1105 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), |
| 1106 SymOffset->getSuppressMangling()); |
1108 } | 1107 } |
1109 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, | 1108 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, |
1110 Mem->getIndex(), Mem->getShift(), | 1109 Mem->getIndex(), Mem->getShift(), |
1111 Mem->getSegmentRegister()); | 1110 Mem->getSegmentRegister()); |
1112 } | 1111 } |
1113 llvm_unreachable("Unsupported operand type"); | 1112 llvm_unreachable("Unsupported operand type"); |
1114 return NULL; | 1113 return NULL; |
1115 } | 1114 } |
1116 | 1115 |
1117 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | 1116 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1161 // For default align=0, set it to the real value 1, to avoid any | 1160 // For default align=0, set it to the real value 1, to avoid any |
1162 // bit-manipulation problems below. | 1161 // bit-manipulation problems below. |
1163 AlignmentParam = std::max(AlignmentParam, 1u); | 1162 AlignmentParam = std::max(AlignmentParam, 1u); |
1164 | 1163 |
1165 // LLVM enforces power of 2 alignment. | 1164 // LLVM enforces power of 2 alignment. |
1166 assert((AlignmentParam & (AlignmentParam - 1)) == 0); | 1165 assert((AlignmentParam & (AlignmentParam - 1)) == 0); |
1167 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | 1166 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
1168 | 1167 |
1169 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); | 1168 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); |
1170 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { | 1169 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { |
1171 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment)); | 1170 _and(esp, Ctx->getConstantInt32(-Alignment)); |
1172 } | 1171 } |
1173 if (ConstantInteger32 *ConstantTotalSize = | 1172 if (ConstantInteger32 *ConstantTotalSize = |
1174 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1173 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1175 uint32_t Value = ConstantTotalSize->getValue(); | 1174 uint32_t Value = ConstantTotalSize->getValue(); |
1176 Value = applyAlignment(Value, Alignment); | 1175 Value = applyAlignment(Value, Alignment); |
1177 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value)); | 1176 _sub(esp, Ctx->getConstantInt32(Value)); |
1178 } else { | 1177 } else { |
1179 // Non-constant sizes need to be adjusted to the next highest | 1178 // Non-constant sizes need to be adjusted to the next highest |
1180 // multiple of the required alignment at runtime. | 1179 // multiple of the required alignment at runtime. |
1181 Variable *T = makeReg(IceType_i32); | 1180 Variable *T = makeReg(IceType_i32); |
1182 _mov(T, TotalSize); | 1181 _mov(T, TotalSize); |
1183 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1)); | 1182 _add(T, Ctx->getConstantInt32(Alignment - 1)); |
1184 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment)); | 1183 _and(T, Ctx->getConstantInt32(-Alignment)); |
1185 _sub(esp, T); | 1184 _sub(esp, T); |
1186 } | 1185 } |
1187 _mov(Dest, esp); | 1186 _mov(Dest, esp); |
1188 } | 1187 } |
1189 | 1188 |
1190 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1189 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
1191 Variable *Dest = Inst->getDest(); | 1190 Variable *Dest = Inst->getDest(); |
1192 Operand *Src0 = legalize(Inst->getSrc(0)); | 1191 Operand *Src0 = legalize(Inst->getSrc(0)); |
1193 Operand *Src1 = legalize(Inst->getSrc(1)); | 1192 Operand *Src1 = legalize(Inst->getSrc(1)); |
1194 if (Dest->getType() == IceType_i64) { | 1193 if (Dest->getType() == IceType_i64) { |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1284 // t2 = shl t2, t1 | 1283 // t2 = shl t2, t1 |
1285 // test t1, 0x20 | 1284 // test t1, 0x20 |
1286 // je L1 | 1285 // je L1 |
1287 // use(t3) | 1286 // use(t3) |
1288 // t3 = t2 | 1287 // t3 = t2 |
1289 // t2 = 0 | 1288 // t2 = 0 |
1290 // L1: | 1289 // L1: |
1291 // a.lo = t2 | 1290 // a.lo = t2 |
1292 // a.hi = t3 | 1291 // a.hi = t3 |
1293 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1292 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1294 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1293 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1295 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1294 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1296 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1295 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1297 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1296 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1298 _mov(T_2, Src0Lo); | 1297 _mov(T_2, Src0Lo); |
1299 _mov(T_3, Src0Hi); | 1298 _mov(T_3, Src0Hi); |
1300 _shld(T_3, T_2, T_1); | 1299 _shld(T_3, T_2, T_1); |
1301 _shl(T_2, T_1); | 1300 _shl(T_2, T_1); |
1302 _test(T_1, BitTest); | 1301 _test(T_1, BitTest); |
1303 _br(CondX86::Br_e, Label); | 1302 _br(CondX86::Br_e, Label); |
1304 // T_2 and T_3 are being assigned again because of the | 1303 // T_2 and T_3 are being assigned again because of the |
(...skipping 14 matching lines...) Expand all Loading... |
1319 // t3 = shr t3, t1 | 1318 // t3 = shr t3, t1 |
1320 // test t1, 0x20 | 1319 // test t1, 0x20 |
1321 // je L1 | 1320 // je L1 |
1322 // use(t2) | 1321 // use(t2) |
1323 // t2 = t3 | 1322 // t2 = t3 |
1324 // t3 = 0 | 1323 // t3 = 0 |
1325 // L1: | 1324 // L1: |
1326 // a.lo = t2 | 1325 // a.lo = t2 |
1327 // a.hi = t3 | 1326 // a.hi = t3 |
1328 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1327 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1329 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1328 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1330 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1329 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1331 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1330 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1332 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1331 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1333 _mov(T_2, Src0Lo); | 1332 _mov(T_2, Src0Lo); |
1334 _mov(T_3, Src0Hi); | 1333 _mov(T_3, Src0Hi); |
1335 _shrd(T_2, T_3, T_1); | 1334 _shrd(T_2, T_3, T_1); |
1336 _shr(T_3, T_1); | 1335 _shr(T_3, T_1); |
1337 _test(T_1, BitTest); | 1336 _test(T_1, BitTest); |
1338 _br(CondX86::Br_e, Label); | 1337 _br(CondX86::Br_e, Label); |
1339 // T_2 and T_3 are being assigned again because of the | 1338 // T_2 and T_3 are being assigned again because of the |
(...skipping 14 matching lines...) Expand all Loading... |
1354 // t3 = sar t3, t1 | 1353 // t3 = sar t3, t1 |
1355 // test t1, 0x20 | 1354 // test t1, 0x20 |
1356 // je L1 | 1355 // je L1 |
1357 // use(t2) | 1356 // use(t2) |
1358 // t2 = t3 | 1357 // t2 = t3 |
1359 // t3 = sar t3, 0x1f | 1358 // t3 = sar t3, 0x1f |
1360 // L1: | 1359 // L1: |
1361 // a.lo = t2 | 1360 // a.lo = t2 |
1362 // a.hi = t3 | 1361 // a.hi = t3 |
1363 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1362 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1364 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1363 Constant *BitTest = Ctx->getConstantInt32(0x20); |
1365 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); | 1364 Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
1366 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1365 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1367 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); | 1366 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1368 _mov(T_2, Src0Lo); | 1367 _mov(T_2, Src0Lo); |
1369 _mov(T_3, Src0Hi); | 1368 _mov(T_3, Src0Hi); |
1370 _shrd(T_2, T_3, T_1); | 1369 _shrd(T_2, T_3, T_1); |
1371 _sar(T_3, T_1); | 1370 _sar(T_3, T_1); |
1372 _test(T_1, BitTest); | 1371 _test(T_1, BitTest); |
1373 _br(CondX86::Br_e, Label); | 1372 _br(CondX86::Br_e, Label); |
1374 // T_2 and T_3 are being assigned again because of the | 1373 // T_2 and T_3 are being assigned again because of the |
1375 // intra-block control flow, so T_2 needs the _mov_nonkillable | 1374 // intra-block control flow, so T_2 needs the _mov_nonkillable |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1476 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} | 1475 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} |
1477 // pmuludq T2, T3 | 1476 // pmuludq T2, T3 |
1478 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} | 1477 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} |
1479 // shufps T1, T2, {0,2,0,2} | 1478 // shufps T1, T2, {0,2,0,2} |
1480 // pshufd T4, T1, {0,2,1,3} | 1479 // pshufd T4, T1, {0,2,1,3} |
1481 // movups Dest, T4 | 1480 // movups Dest, T4 |
1482 | 1481 |
1483 // Mask that directs pshufd to create a vector with entries | 1482 // Mask that directs pshufd to create a vector with entries |
1484 // Src[1, 0, 3, 0] | 1483 // Src[1, 0, 3, 0] |
1485 const unsigned Constant1030 = 0x31; | 1484 const unsigned Constant1030 = 0x31; |
1486 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030); | 1485 Constant *Mask1030 = Ctx->getConstantInt8(Constant1030); |
1487 // Mask that directs shufps to create a vector with entries | 1486 // Mask that directs shufps to create a vector with entries |
1488 // Dest[0, 2], Src[0, 2] | 1487 // Dest[0, 2], Src[0, 2] |
1489 const unsigned Mask0202 = 0x88; | 1488 const unsigned Mask0202 = 0x88; |
1490 // Mask that directs pshufd to create a vector with entries | 1489 // Mask that directs pshufd to create a vector with entries |
1491 // Src[0, 2, 1, 3] | 1490 // Src[0, 2, 1, 3] |
1492 const unsigned Mask0213 = 0xd8; | 1491 const unsigned Mask0213 = 0xd8; |
1493 Variable *T1 = makeReg(IceType_v4i32); | 1492 Variable *T1 = makeReg(IceType_v4i32); |
1494 Variable *T2 = makeReg(IceType_v4i32); | 1493 Variable *T2 = makeReg(IceType_v4i32); |
1495 Variable *T3 = makeReg(IceType_v4i32); | 1494 Variable *T3 = makeReg(IceType_v4i32); |
1496 Variable *T4 = makeReg(IceType_v4i32); | 1495 Variable *T4 = makeReg(IceType_v4i32); |
1497 _movp(T1, Src0); | 1496 _movp(T1, Src0); |
1498 _pshufd(T2, Src0, Mask1030); | 1497 _pshufd(T2, Src0, Mask1030); |
1499 _pshufd(T3, Src1, Mask1030); | 1498 _pshufd(T3, Src1, Mask1030); |
1500 _pmuludq(T1, Src1); | 1499 _pmuludq(T1, Src1); |
1501 _pmuludq(T2, T3); | 1500 _pmuludq(T2, T3); |
1502 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202)); | 1501 _shufps(T1, T2, Ctx->getConstantInt8(Mask0202)); |
1503 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213)); | 1502 _pshufd(T4, T1, Ctx->getConstantInt8(Mask0213)); |
1504 _movp(Dest, T4); | 1503 _movp(Dest, T4); |
1505 } else { | 1504 } else { |
1506 assert(Dest->getType() == IceType_v16i8); | 1505 assert(Dest->getType() == IceType_v16i8); |
1507 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 1506 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
1508 } | 1507 } |
1509 } break; | 1508 } break; |
1510 case InstArithmetic::Shl: | 1509 case InstArithmetic::Shl: |
1511 case InstArithmetic::Lshr: | 1510 case InstArithmetic::Lshr: |
1512 case InstArithmetic::Ashr: | 1511 case InstArithmetic::Ashr: |
1513 case InstArithmetic::Udiv: | 1512 case InstArithmetic::Udiv: |
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1788 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 1787 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
1789 assert(typeWidthInBytes(Ty) >= 4); | 1788 assert(typeWidthInBytes(Ty) >= 4); |
1790 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 1789 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { |
1791 XmmArgs.push_back(Arg); | 1790 XmmArgs.push_back(Arg); |
1792 } else { | 1791 } else { |
1793 StackArgs.push_back(Arg); | 1792 StackArgs.push_back(Arg); |
1794 if (isVectorType(Arg->getType())) { | 1793 if (isVectorType(Arg->getType())) { |
1795 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1794 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1796 } | 1795 } |
1797 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 1796 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
1798 Constant *Loc = | 1797 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
1799 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes); | |
1800 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | 1798 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
1801 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 1799 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
1802 } | 1800 } |
1803 } | 1801 } |
1804 | 1802 |
1805 // Adjust the parameter area so that the stack is aligned. It is | 1803 // Adjust the parameter area so that the stack is aligned. It is |
1806 // assumed that the stack is already aligned at the start of the | 1804 // assumed that the stack is already aligned at the start of the |
1807 // calling sequence. | 1805 // calling sequence. |
1808 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1806 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1809 | 1807 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1881 Operand *CallTarget = legalize(Instr->getCallTarget()); | 1879 Operand *CallTarget = legalize(Instr->getCallTarget()); |
1882 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 1880 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
1883 Context.insert(NewCall); | 1881 Context.insert(NewCall); |
1884 if (ReturnRegHi) | 1882 if (ReturnRegHi) |
1885 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1883 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1886 | 1884 |
1887 // Add the appropriate offset to esp. The call instruction takes care | 1885 // Add the appropriate offset to esp. The call instruction takes care |
1888 // of resetting the stack offset during emission. | 1886 // of resetting the stack offset during emission. |
1889 if (ParameterAreaSizeBytes) { | 1887 if (ParameterAreaSizeBytes) { |
1890 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); | 1888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
1891 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); | 1889 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); |
1892 } | 1890 } |
1893 | 1891 |
1894 // Insert a register-kill pseudo instruction. | 1892 // Insert a register-kill pseudo instruction. |
1895 Context.insert(InstFakeKill::create(Func, NewCall)); | 1893 Context.insert(InstFakeKill::create(Func, NewCall)); |
1896 | 1894 |
1897 // Generate a FakeUse to keep the call live if necessary. | 1895 // Generate a FakeUse to keep the call live if necessary. |
1898 if (Instr->hasSideEffects() && ReturnReg) { | 1896 if (Instr->hasSideEffects() && ReturnReg) { |
1899 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 1897 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
1900 Context.insert(FakeUse); | 1898 Context.insert(FakeUse); |
1901 } | 1899 } |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1958 Variable *T = makeReg(DestTy); | 1956 Variable *T = makeReg(DestTy); |
1959 _movp(T, Src0RM); | 1957 _movp(T, Src0RM); |
1960 _pand(T, OneMask); | 1958 _pand(T, OneMask); |
1961 Variable *Zeros = makeVectorOfZeros(Dest->getType()); | 1959 Variable *Zeros = makeVectorOfZeros(Dest->getType()); |
1962 _pcmpgt(T, Zeros); | 1960 _pcmpgt(T, Zeros); |
1963 _movp(Dest, T); | 1961 _movp(Dest, T); |
1964 } else { | 1962 } else { |
1965 // width = width(elty) - 1; dest = (src << width) >> width | 1963 // width = width(elty) - 1; dest = (src << width) >> width |
1966 SizeT ShiftAmount = | 1964 SizeT ShiftAmount = |
1967 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; | 1965 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; |
1968 Constant *ShiftConstant = | 1966 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); |
1969 Ctx->getConstantInt32(IceType_i8, ShiftAmount); | |
1970 Variable *T = makeReg(DestTy); | 1967 Variable *T = makeReg(DestTy); |
1971 _movp(T, Src0RM); | 1968 _movp(T, Src0RM); |
1972 _psll(T, ShiftConstant); | 1969 _psll(T, ShiftConstant); |
1973 _psra(T, ShiftConstant); | 1970 _psra(T, ShiftConstant); |
1974 _movp(Dest, T); | 1971 _movp(Dest, T); |
1975 } | 1972 } |
1976 } else if (Dest->getType() == IceType_i64) { | 1973 } else if (Dest->getType() == IceType_i64) { |
1977 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | 1974 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 |
1978 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31); | 1975 Constant *Shift = Ctx->getConstantInt32(31); |
1979 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
1980 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1981 Variable *T_Lo = makeReg(DestLo->getType()); | 1978 Variable *T_Lo = makeReg(DestLo->getType()); |
1982 if (Src0RM->getType() == IceType_i32) { | 1979 if (Src0RM->getType() == IceType_i32) { |
1983 _mov(T_Lo, Src0RM); | 1980 _mov(T_Lo, Src0RM); |
1984 } else if (Src0RM->getType() == IceType_i1) { | 1981 } else if (Src0RM->getType() == IceType_i1) { |
1985 _movzx(T_Lo, Src0RM); | 1982 _movzx(T_Lo, Src0RM); |
1986 _shl(T_Lo, Shift); | 1983 _shl(T_Lo, Shift); |
1987 _sar(T_Lo, Shift); | 1984 _sar(T_Lo, Shift); |
1988 } else { | 1985 } else { |
1989 _movsx(T_Lo, Src0RM); | 1986 _movsx(T_Lo, Src0RM); |
1990 } | 1987 } |
1991 _mov(DestLo, T_Lo); | 1988 _mov(DestLo, T_Lo); |
1992 Variable *T_Hi = NULL; | 1989 Variable *T_Hi = NULL; |
1993 _mov(T_Hi, T_Lo); | 1990 _mov(T_Hi, T_Lo); |
1994 if (Src0RM->getType() != IceType_i1) | 1991 if (Src0RM->getType() != IceType_i1) |
1995 // For i1, the sar instruction is already done above. | 1992 // For i1, the sar instruction is already done above. |
1996 _sar(T_Hi, Shift); | 1993 _sar(T_Hi, Shift); |
1997 _mov(DestHi, T_Hi); | 1994 _mov(DestHi, T_Hi); |
1998 } else if (Src0RM->getType() == IceType_i1) { | 1995 } else if (Src0RM->getType() == IceType_i1) { |
1999 // t1 = src | 1996 // t1 = src |
2000 // shl t1, dst_bitwidth - 1 | 1997 // shl t1, dst_bitwidth - 1 |
2001 // sar t1, dst_bitwidth - 1 | 1998 // sar t1, dst_bitwidth - 1 |
2002 // dst = t1 | 1999 // dst = t1 |
2003 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); | 2000 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); |
2004 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1); | 2001 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); |
2005 Variable *T = makeReg(Dest->getType()); | 2002 Variable *T = makeReg(Dest->getType()); |
2006 if (typeWidthInBytes(Dest->getType()) <= | 2003 if (typeWidthInBytes(Dest->getType()) <= |
2007 typeWidthInBytes(Src0RM->getType())) { | 2004 typeWidthInBytes(Src0RM->getType())) { |
2008 _mov(T, Src0RM); | 2005 _mov(T, Src0RM); |
2009 } else { | 2006 } else { |
2010 // Widen the source using movsx or movzx. (It doesn't matter | 2007 // Widen the source using movsx or movzx. (It doesn't matter |
2011 // which one, since the following shl/sar overwrite the bits.) | 2008 // which one, since the following shl/sar overwrite the bits.) |
2012 _movzx(T, Src0RM); | 2009 _movzx(T, Src0RM); |
2013 } | 2010 } |
2014 _shl(T, ShiftAmount); | 2011 _shl(T, ShiftAmount); |
(...skipping 22 matching lines...) Expand all Loading... |
2037 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2034 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2038 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2035 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2039 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2036 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2040 Variable *Tmp = makeReg(DestLo->getType()); | 2037 Variable *Tmp = makeReg(DestLo->getType()); |
2041 if (Src0RM->getType() == IceType_i32) { | 2038 if (Src0RM->getType() == IceType_i32) { |
2042 _mov(Tmp, Src0RM); | 2039 _mov(Tmp, Src0RM); |
2043 } else { | 2040 } else { |
2044 _movzx(Tmp, Src0RM); | 2041 _movzx(Tmp, Src0RM); |
2045 } | 2042 } |
2046 if (Src0RM->getType() == IceType_i1) { | 2043 if (Src0RM->getType() == IceType_i1) { |
2047 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); | 2044 Constant *One = Ctx->getConstantInt32(1); |
2048 _and(Tmp, One); | 2045 _and(Tmp, One); |
2049 } | 2046 } |
2050 _mov(DestLo, Tmp); | 2047 _mov(DestLo, Tmp); |
2051 _mov(DestHi, Zero); | 2048 _mov(DestHi, Zero); |
2052 } else if (Src0RM->getType() == IceType_i1) { | 2049 } else if (Src0RM->getType() == IceType_i1) { |
2053 // t = Src0RM; t &= 1; Dest = t | 2050 // t = Src0RM; t &= 1; Dest = t |
2054 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); | 2051 Constant *One = Ctx->getConstantInt32(1); |
2055 Type DestTy = Dest->getType(); | 2052 Type DestTy = Dest->getType(); |
2056 Variable *T; | 2053 Variable *T; |
2057 if (DestTy == IceType_i8) { | 2054 if (DestTy == IceType_i8) { |
2058 T = makeReg(DestTy); | 2055 T = makeReg(DestTy); |
2059 _mov(T, Src0RM); | 2056 _mov(T, Src0RM); |
2060 } else { | 2057 } else { |
2061 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. | 2058 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. |
2062 T = makeReg(IceType_i32); | 2059 T = makeReg(IceType_i32); |
2063 _movzx(T, Src0RM); | 2060 _movzx(T, Src0RM); |
2064 } | 2061 } |
(...skipping 19 matching lines...) Expand all Loading... |
2084 _movp(Dest, T); | 2081 _movp(Dest, T); |
2085 } else { | 2082 } else { |
2086 Operand *Src0 = Inst->getSrc(0); | 2083 Operand *Src0 = Inst->getSrc(0); |
2087 if (Src0->getType() == IceType_i64) | 2084 if (Src0->getType() == IceType_i64) |
2088 Src0 = loOperand(Src0); | 2085 Src0 = loOperand(Src0); |
2089 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2086 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2090 // t1 = trunc Src0RM; Dest = t1 | 2087 // t1 = trunc Src0RM; Dest = t1 |
2091 Variable *T = NULL; | 2088 Variable *T = NULL; |
2092 _mov(T, Src0RM); | 2089 _mov(T, Src0RM); |
2093 if (Dest->getType() == IceType_i1) | 2090 if (Dest->getType() == IceType_i1) |
2094 _and(T, Ctx->getConstantInt32(IceType_i1, 1)); | 2091 _and(T, Ctx->getConstantInt1(1)); |
2095 _mov(Dest, T); | 2092 _mov(Dest, T); |
2096 } | 2093 } |
2097 break; | 2094 break; |
2098 } | 2095 } |
2099 case InstCast::Fptrunc: | 2096 case InstCast::Fptrunc: |
2100 case InstCast::Fpext: { | 2097 case InstCast::Fpext: { |
2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2102 // t1 = cvt Src0RM; Dest = t1 | 2099 // t1 = cvt Src0RM; Dest = t1 |
2103 Variable *T = makeReg(Dest->getType()); | 2100 Variable *T = makeReg(Dest->getType()); |
2104 _cvt(T, Src0RM, InstX8632Cvt::Float2float); | 2101 _cvt(T, Src0RM, InstX8632Cvt::Float2float); |
(...skipping 25 matching lines...) Expand all Loading... |
2130 Call->addArg(Inst->getSrc(0)); | 2127 Call->addArg(Inst->getSrc(0)); |
2131 lowerCall(Call); | 2128 lowerCall(Call); |
2132 } else { | 2129 } else { |
2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2130 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2134 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2131 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
2135 Variable *T_1 = makeReg(IceType_i32); | 2132 Variable *T_1 = makeReg(IceType_i32); |
2136 Variable *T_2 = makeReg(Dest->getType()); | 2133 Variable *T_2 = makeReg(Dest->getType()); |
2137 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); | 2134 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); |
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2135 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
2139 if (Dest->getType() == IceType_i1) | 2136 if (Dest->getType() == IceType_i1) |
2140 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); | 2137 _and(T_2, Ctx->getConstantInt1(1)); |
2141 _mov(Dest, T_2); | 2138 _mov(Dest, T_2); |
2142 } | 2139 } |
2143 break; | 2140 break; |
2144 case InstCast::Fptoui: | 2141 case InstCast::Fptoui: |
2145 if (isVectorType(Dest->getType())) { | 2142 if (isVectorType(Dest->getType())) { |
2146 assert(Dest->getType() == IceType_v4i32 && | 2143 assert(Dest->getType() == IceType_v4i32 && |
2147 Inst->getSrc(0)->getType() == IceType_v4f32); | 2144 Inst->getSrc(0)->getType() == IceType_v4f32); |
2148 const SizeT MaxSrcs = 1; | 2145 const SizeT MaxSrcs = 1; |
2149 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); | 2146 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); |
2150 Call->addArg(Inst->getSrc(0)); | 2147 Call->addArg(Inst->getSrc(0)); |
(...skipping 15 matching lines...) Expand all Loading... |
2166 lowerCall(Call); | 2163 lowerCall(Call); |
2167 return; | 2164 return; |
2168 } else { | 2165 } else { |
2169 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2166 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2170 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | 2167 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type |
2171 Variable *T_1 = makeReg(IceType_i32); | 2168 Variable *T_1 = makeReg(IceType_i32); |
2172 Variable *T_2 = makeReg(Dest->getType()); | 2169 Variable *T_2 = makeReg(Dest->getType()); |
2173 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); | 2170 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); |
2174 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | 2171 _mov(T_2, T_1); // T_1 and T_2 may have different integer types |
2175 if (Dest->getType() == IceType_i1) | 2172 if (Dest->getType() == IceType_i1) |
2176 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); | 2173 _and(T_2, Ctx->getConstantInt1(1)); |
2177 _mov(Dest, T_2); | 2174 _mov(Dest, T_2); |
2178 } | 2175 } |
2179 break; | 2176 break; |
2180 case InstCast::Sitofp: | 2177 case InstCast::Sitofp: |
2181 if (isVectorType(Dest->getType())) { | 2178 if (isVectorType(Dest->getType())) { |
2182 assert(Dest->getType() == IceType_v4f32 && | 2179 assert(Dest->getType() == IceType_v4f32 && |
2183 Inst->getSrc(0)->getType() == IceType_v4i32); | 2180 Inst->getSrc(0)->getType() == IceType_v4i32); |
2184 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); | 2181 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); |
2185 Variable *T = makeReg(Dest->getType()); | 2182 Variable *T = makeReg(Dest->getType()); |
2186 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); | 2183 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); |
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2395 Type Ty = SourceVectNotLegalized->getType(); | 2392 Type Ty = SourceVectNotLegalized->getType(); |
2396 Type ElementTy = typeElementType(Ty); | 2393 Type ElementTy = typeElementType(Ty); |
2397 Type InVectorElementTy = getInVectorElementType(Ty); | 2394 Type InVectorElementTy = getInVectorElementType(Ty); |
2398 Variable *ExtractedElementR = makeReg(InVectorElementTy); | 2395 Variable *ExtractedElementR = makeReg(InVectorElementTy); |
2399 | 2396 |
2400 // TODO(wala): Determine the best lowering sequences for each type. | 2397 // TODO(wala): Determine the best lowering sequences for each type. |
2401 bool CanUsePextr = | 2398 bool CanUsePextr = |
2402 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2399 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; |
2403 if (CanUsePextr && Ty != IceType_v4f32) { | 2400 if (CanUsePextr && Ty != IceType_v4f32) { |
2404 // Use pextrb, pextrw, or pextrd. | 2401 // Use pextrb, pextrw, or pextrd. |
2405 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); | 2402 Constant *Mask = Ctx->getConstantInt8(Index); |
2406 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2403 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
2407 _pextr(ExtractedElementR, SourceVectR, Mask); | 2404 _pextr(ExtractedElementR, SourceVectR, Mask); |
2408 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2405 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2409 // Use pshufd and movd/movss. | 2406 // Use pshufd and movd/movss. |
2410 Variable *T = NULL; | 2407 Variable *T = NULL; |
2411 if (Index) { | 2408 if (Index) { |
2412 // The shuffle only needs to occur if the element to be extracted | 2409 // The shuffle only needs to occur if the element to be extracted |
2413 // is not at the lowest index. | 2410 // is not at the lowest index. |
2414 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); | 2411 Constant *Mask = Ctx->getConstantInt8(Index); |
2415 T = makeReg(Ty); | 2412 T = makeReg(Ty); |
2416 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); | 2413 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); |
2417 } else { | 2414 } else { |
2418 T = legalizeToVar(SourceVectNotLegalized); | 2415 T = legalizeToVar(SourceVectNotLegalized); |
2419 } | 2416 } |
2420 | 2417 |
2421 if (InVectorElementTy == IceType_i32) { | 2418 if (InVectorElementTy == IceType_i32) { |
2422 _movd(ExtractedElementR, T); | 2419 _movd(ExtractedElementR, T); |
2423 } else { // Ty == IceType_f32 | 2420 } else { // Ty == IceType_f32 |
2424 // TODO(wala): _movss is only used here because _mov does not | 2421 // TODO(wala): _movss is only used here because _mov does not |
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2542 } | 2539 } |
2543 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); | 2540 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); |
2544 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); | 2541 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); |
2545 if (HasC1) { | 2542 if (HasC1) { |
2546 Src0 = legalize(Src0); | 2543 Src0 = legalize(Src0); |
2547 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2544 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2548 Variable *T = NULL; | 2545 Variable *T = NULL; |
2549 _mov(T, Src0); | 2546 _mov(T, Src0); |
2550 _ucomiss(T, Src1RM); | 2547 _ucomiss(T, Src1RM); |
2551 } | 2548 } |
2552 Constant *Default = | 2549 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default); |
2553 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default); | |
2554 _mov(Dest, Default); | 2550 _mov(Dest, Default); |
2555 if (HasC1) { | 2551 if (HasC1) { |
2556 InstX8632Label *Label = InstX8632Label::create(Func, this); | 2552 InstX8632Label *Label = InstX8632Label::create(Func, this); |
2557 _br(TableFcmp[Index].C1, Label); | 2553 _br(TableFcmp[Index].C1, Label); |
2558 if (HasC2) { | 2554 if (HasC2) { |
2559 _br(TableFcmp[Index].C2, Label); | 2555 _br(TableFcmp[Index].C2, Label); |
2560 } | 2556 } |
2561 Constant *NonDefault = | 2557 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default); |
2562 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default); | |
2563 _mov_nonkillable(Dest, NonDefault); | 2558 _mov_nonkillable(Dest, NonDefault); |
2564 Context.insert(Label); | 2559 Context.insert(Label); |
2565 } | 2560 } |
2566 } | 2561 } |
2567 | 2562 |
2568 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 2563 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
2569 Operand *Src0 = legalize(Inst->getSrc(0)); | 2564 Operand *Src0 = legalize(Inst->getSrc(0)); |
2570 Operand *Src1 = legalize(Inst->getSrc(1)); | 2565 Operand *Src1 = legalize(Inst->getSrc(1)); |
2571 Variable *Dest = Inst->getDest(); | 2566 Variable *Dest = Inst->getDest(); |
2572 | 2567 |
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2693 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), | 2688 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
2694 NextBr->getTargetFalse()); | 2689 NextBr->getTargetFalse()); |
2695 // Skip over the following branch instruction. | 2690 // Skip over the following branch instruction. |
2696 Context.advanceNext(); | 2691 Context.advanceNext(); |
2697 return; | 2692 return; |
2698 } | 2693 } |
2699 } | 2694 } |
2700 | 2695 |
2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 2696 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
2702 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2697 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2703 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); | 2698 Constant *One = Ctx->getConstantInt32(1); |
2704 if (Src0->getType() == IceType_i64) { | 2699 if (Src0->getType() == IceType_i64) { |
2705 InstIcmp::ICond Condition = Inst->getCondition(); | 2700 InstIcmp::ICond Condition = Inst->getCondition(); |
2706 size_t Index = static_cast<size_t>(Condition); | 2701 size_t Index = static_cast<size_t>(Condition); |
2707 assert(Index < TableIcmp64Size); | 2702 assert(Index < TableIcmp64Size); |
2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | 2703 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | 2704 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 2705 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 2706 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
2712 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | 2707 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
2713 InstX8632Label *Label = InstX8632Label::create(Func, this); | 2708 InstX8632Label *Label = InstX8632Label::create(Func, this); |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2771 | 2766 |
2772 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { | 2767 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { |
2773 // Use insertps, pinsrb, pinsrw, or pinsrd. | 2768 // Use insertps, pinsrb, pinsrw, or pinsrd. |
2774 Operand *ElementRM = | 2769 Operand *ElementRM = |
2775 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); | 2770 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); |
2776 Operand *SourceVectRM = | 2771 Operand *SourceVectRM = |
2777 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2772 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2778 Variable *T = makeReg(Ty); | 2773 Variable *T = makeReg(Ty); |
2779 _movp(T, SourceVectRM); | 2774 _movp(T, SourceVectRM); |
2780 if (Ty == IceType_v4f32) | 2775 if (Ty == IceType_v4f32) |
2781 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4)); | 2776 _insertps(T, ElementRM, Ctx->getConstantInt8(Index << 4)); |
2782 else | 2777 else |
2783 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index)); | 2778 _pinsr(T, ElementRM, Ctx->getConstantInt8(Index)); |
2784 _movp(Inst->getDest(), T); | 2779 _movp(Inst->getDest(), T); |
2785 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2780 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2786 // Use shufps or movss. | 2781 // Use shufps or movss. |
2787 Variable *ElementR = NULL; | 2782 Variable *ElementR = NULL; |
2788 Operand *SourceVectRM = | 2783 Operand *SourceVectRM = |
2789 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2784 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2790 | 2785 |
2791 if (InVectorElementTy == IceType_f32) { | 2786 if (InVectorElementTy == IceType_f32) { |
2792 // ElementR will be in an XMM register since it is floating point. | 2787 // ElementR will be in an XMM register since it is floating point. |
2793 ElementR = legalizeToVar(ElementToInsertNotLegalized); | 2788 ElementR = legalizeToVar(ElementToInsertNotLegalized); |
(...skipping 30 matching lines...) Expand all Loading... |
2824 // ElementR := ElementR[0, 0] T[0, 3] | 2819 // ElementR := ElementR[0, 0] T[0, 3] |
2825 // T := T[0, 1] ElementR[0, 3] | 2820 // T := T[0, 1] ElementR[0, 3] |
2826 // | 2821 // |
2827 // insertelement into index 3 (result is stored in T): | 2822 // insertelement into index 3 (result is stored in T): |
2828 // T := SourceVectRM | 2823 // T := SourceVectRM |
2829 // ElementR := ElementR[0, 0] T[0, 2] | 2824 // ElementR := ElementR[0, 0] T[0, 2] |
2830 // T := T[0, 1] ElementR[3, 0] | 2825 // T := T[0, 1] ElementR[3, 0] |
2831 const unsigned char Mask1[3] = { 0, 192, 128 }; | 2826 const unsigned char Mask1[3] = { 0, 192, 128 }; |
2832 const unsigned char Mask2[3] = { 227, 196, 52 }; | 2827 const unsigned char Mask2[3] = { 227, 196, 52 }; |
2833 | 2828 |
2834 Constant *Mask1Constant = | 2829 Constant *Mask1Constant = Ctx->getConstantInt8(Mask1[Index - 1]); |
2835 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); | 2830 Constant *Mask2Constant = Ctx->getConstantInt8(Mask2[Index - 1]); |
2836 Constant *Mask2Constant = | |
2837 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]); | |
2838 | 2831 |
2839 if (Index == 1) { | 2832 if (Index == 1) { |
2840 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2833 _shufps(ElementR, SourceVectRM, Mask1Constant); |
2841 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2834 _shufps(ElementR, SourceVectRM, Mask2Constant); |
2842 _movp(Inst->getDest(), ElementR); | 2835 _movp(Inst->getDest(), ElementR); |
2843 } else { | 2836 } else { |
2844 Variable *T = makeReg(Ty); | 2837 Variable *T = makeReg(Ty); |
2845 _movp(T, SourceVectRM); | 2838 _movp(T, SourceVectRM); |
2846 _shufps(ElementR, T, Mask1Constant); | 2839 _shufps(ElementR, T, Mask1Constant); |
2847 _shufps(T, ElementR, Mask2Constant); | 2840 _shufps(T, ElementR, Mask2Constant); |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2919 // Some x86-64 processors support the cmpxchg16b intruction, which | 2912 // Some x86-64 processors support the cmpxchg16b intruction, which |
2920 // can make 16-byte operations lock free (when used with the LOCK | 2913 // can make 16-byte operations lock free (when used with the LOCK |
2921 // prefix). However, that's not supported in 32-bit mode, so just | 2914 // prefix). However, that's not supported in 32-bit mode, so just |
2922 // return 0 even for large sizes. | 2915 // return 0 even for large sizes. |
2923 Result = Ctx->getConstantZero(IceType_i32); | 2916 Result = Ctx->getConstantZero(IceType_i32); |
2924 break; | 2917 break; |
2925 case 1: | 2918 case 1: |
2926 case 2: | 2919 case 2: |
2927 case 4: | 2920 case 4: |
2928 case 8: | 2921 case 8: |
2929 Result = Ctx->getConstantInt32(IceType_i32, 1); | 2922 Result = Ctx->getConstantInt32(1); |
2930 break; | 2923 break; |
2931 } | 2924 } |
2932 _mov(Dest, Result); | 2925 _mov(Dest, Result); |
2933 return; | 2926 return; |
2934 } | 2927 } |
2935 // The PNaCl ABI requires the byte size to be a compile-time constant. | 2928 // The PNaCl ABI requires the byte size to be a compile-time constant. |
2936 Func->setError("AtomicIsLockFree byte size should be compile-time const"); | 2929 Func->setError("AtomicIsLockFree byte size should be compile-time const"); |
2937 return; | 2930 return; |
2938 } | 2931 } |
2939 case Intrinsics::AtomicLoad: { | 2932 case Intrinsics::AtomicLoad: { |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3025 _bswap(T_Hi); | 3018 _bswap(T_Hi); |
3026 _mov(DestLo, T_Hi); | 3019 _mov(DestLo, T_Hi); |
3027 _mov(DestHi, T_Lo); | 3020 _mov(DestHi, T_Lo); |
3028 } else if (Val->getType() == IceType_i32) { | 3021 } else if (Val->getType() == IceType_i32) { |
3029 Variable *T = legalizeToVar(Val); | 3022 Variable *T = legalizeToVar(Val); |
3030 _bswap(T); | 3023 _bswap(T); |
3031 _mov(Dest, T); | 3024 _mov(Dest, T); |
3032 } else { | 3025 } else { |
3033 assert(Val->getType() == IceType_i16); | 3026 assert(Val->getType() == IceType_i16); |
3034 Val = legalize(Val); | 3027 Val = legalize(Val); |
3035 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8); | 3028 Constant *Eight = Ctx->getConstantInt16(8); |
3036 Variable *T = NULL; | 3029 Variable *T = NULL; |
3037 _mov(T, Val); | 3030 _mov(T, Val); |
3038 _rol(T, Eight); | 3031 _rol(T, Eight); |
3039 _mov(Dest, T); | 3032 _mov(Dest, T); |
3040 } | 3033 } |
3041 return; | 3034 return; |
3042 } | 3035 } |
3043 case Intrinsics::Ctpop: { | 3036 case Intrinsics::Ctpop: { |
3044 Variable *Dest = Instr->getDest(); | 3037 Variable *Dest = Instr->getDest(); |
3045 Operand *Val = Instr->getArg(0); | 3038 Operand *Val = Instr->getArg(0); |
(...skipping 463 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3509 // bit position conversion, and the speculation is reversed. | 3502 // bit position conversion, and the speculation is reversed. |
3510 assert(Ty == IceType_i32 || Ty == IceType_i64); | 3503 assert(Ty == IceType_i32 || Ty == IceType_i64); |
3511 Variable *T = makeReg(IceType_i32); | 3504 Variable *T = makeReg(IceType_i32); |
3512 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); | 3505 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); |
3513 if (Cttz) { | 3506 if (Cttz) { |
3514 _bsf(T, FirstValRM); | 3507 _bsf(T, FirstValRM); |
3515 } else { | 3508 } else { |
3516 _bsr(T, FirstValRM); | 3509 _bsr(T, FirstValRM); |
3517 } | 3510 } |
3518 Variable *T_Dest = makeReg(IceType_i32); | 3511 Variable *T_Dest = makeReg(IceType_i32); |
3519 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); | 3512 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
3520 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); | 3513 Constant *ThirtyOne = Ctx->getConstantInt32(31); |
3521 if (Cttz) { | 3514 if (Cttz) { |
3522 _mov(T_Dest, ThirtyTwo); | 3515 _mov(T_Dest, ThirtyTwo); |
3523 } else { | 3516 } else { |
3524 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); | 3517 Constant *SixtyThree = Ctx->getConstantInt32(63); |
3525 _mov(T_Dest, SixtyThree); | 3518 _mov(T_Dest, SixtyThree); |
3526 } | 3519 } |
3527 _cmov(T_Dest, T, CondX86::Br_ne); | 3520 _cmov(T_Dest, T, CondX86::Br_ne); |
3528 if (!Cttz) { | 3521 if (!Cttz) { |
3529 _xor(T_Dest, ThirtyOne); | 3522 _xor(T_Dest, ThirtyOne); |
3530 } | 3523 } |
3531 if (Ty == IceType_i32) { | 3524 if (Ty == IceType_i32) { |
3532 _mov(Dest, T_Dest); | 3525 _mov(Dest, T_Dest); |
3533 return; | 3526 return; |
3534 } | 3527 } |
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3853 // Vanilla ICE load instructions should not use the segment registers, | 3846 // Vanilla ICE load instructions should not use the segment registers, |
3854 // and computeAddressOpt only works at the level of Variables and Constants, | 3847 // and computeAddressOpt only works at the level of Variables and Constants, |
3855 // not other OperandX8632Mem, so there should be no mention of segment | 3848 // not other OperandX8632Mem, so there should be no mention of segment |
3856 // registers there either. | 3849 // registers there either. |
3857 const OperandX8632Mem::SegmentRegisters SegmentReg = | 3850 const OperandX8632Mem::SegmentRegisters SegmentReg = |
3858 OperandX8632Mem::DefaultSegment; | 3851 OperandX8632Mem::DefaultSegment; |
3859 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 3852 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
3860 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 3853 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
3861 if (Base && Addr != Base) { | 3854 if (Base && Addr != Base) { |
3862 Inst->setDeleted(); | 3855 Inst->setDeleted(); |
3863 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); | 3856 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
3864 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, | 3857 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
3865 Shift, SegmentReg); | 3858 Shift, SegmentReg); |
3866 Context.insert(InstLoad::create(Func, Dest, Addr)); | 3859 Context.insert(InstLoad::create(Func, Dest, Addr)); |
3867 } | 3860 } |
3868 } | 3861 } |
3869 | 3862 |
3870 void TargetX8632::randomlyInsertNop(float Probability) { | 3863 void TargetX8632::randomlyInsertNop(float Probability) { |
3871 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); | 3864 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); |
3872 if (RNG.getTrueWithProbability(Probability)) { | 3865 if (RNG.getTrueWithProbability(Probability)) { |
3873 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); | 3866 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3919 if (InstructionSet >= SSE4_1) { | 3912 if (InstructionSet >= SSE4_1) { |
3920 // TODO(wala): If the condition operand is a constant, use blendps | 3913 // TODO(wala): If the condition operand is a constant, use blendps |
3921 // or pblendw. | 3914 // or pblendw. |
3922 // | 3915 // |
3923 // Use blendvps or pblendvb to implement select. | 3916 // Use blendvps or pblendvb to implement select. |
3924 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3917 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
3925 SrcTy == IceType_v4f32) { | 3918 SrcTy == IceType_v4f32) { |
3926 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3919 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
3927 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); | 3920 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
3928 _movp(xmm0, ConditionRM); | 3921 _movp(xmm0, ConditionRM); |
3929 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); | 3922 _psll(xmm0, Ctx->getConstantInt8(31)); |
3930 _movp(T, SrcFRM); | 3923 _movp(T, SrcFRM); |
3931 _blendvps(T, SrcTRM, xmm0); | 3924 _blendvps(T, SrcTRM, xmm0); |
3932 _movp(Dest, T); | 3925 _movp(Dest, T); |
3933 } else { | 3926 } else { |
3934 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 3927 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
3935 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 3928 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
3936 : IceType_v16i8; | 3929 : IceType_v16i8; |
3937 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); | 3930 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
3938 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 3931 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
3939 _movp(T, SrcFRM); | 3932 _movp(T, SrcFRM); |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4031 Variable *Base = llvm::dyn_cast<Variable>(Addr); | 4024 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
4032 // Vanilla ICE store instructions should not use the segment registers, | 4025 // Vanilla ICE store instructions should not use the segment registers, |
4033 // and computeAddressOpt only works at the level of Variables and Constants, | 4026 // and computeAddressOpt only works at the level of Variables and Constants, |
4034 // not other OperandX8632Mem, so there should be no mention of segment | 4027 // not other OperandX8632Mem, so there should be no mention of segment |
4035 // registers there either. | 4028 // registers there either. |
4036 const OperandX8632Mem::SegmentRegisters SegmentReg = | 4029 const OperandX8632Mem::SegmentRegisters SegmentReg = |
4037 OperandX8632Mem::DefaultSegment; | 4030 OperandX8632Mem::DefaultSegment; |
4038 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); | 4031 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
4039 if (Base && Addr != Base) { | 4032 if (Base && Addr != Base) { |
4040 Inst->setDeleted(); | 4033 Inst->setDeleted(); |
4041 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); | 4034 Constant *OffsetOp = Ctx->getConstantInt32(Offset); |
4042 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, | 4035 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
4043 Shift, SegmentReg); | 4036 Shift, SegmentReg); |
4044 Context.insert(InstStore::create(Func, Data, Addr)); | 4037 Context.insert(InstStore::create(Func, Data, Addr)); |
4045 } | 4038 } |
4046 } | 4039 } |
4047 | 4040 |
4048 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 4041 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
4049 // This implements the most naive possible lowering. | 4042 // This implements the most naive possible lowering. |
4050 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 4043 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
4051 Operand *Src0 = Inst->getComparison(); | 4044 Operand *Src0 = Inst->getComparison(); |
4052 SizeT NumCases = Inst->getNumCases(); | 4045 SizeT NumCases = Inst->getNumCases(); |
4053 if (Src0->getType() == IceType_i64) { | 4046 if (Src0->getType() == IceType_i64) { |
4054 Src0 = legalize(Src0); // get Base/Index into physical registers | 4047 Src0 = legalize(Src0); // get Base/Index into physical registers |
4055 Operand *Src0Lo = loOperand(Src0); | 4048 Operand *Src0Lo = loOperand(Src0); |
4056 Operand *Src0Hi = hiOperand(Src0); | 4049 Operand *Src0Hi = hiOperand(Src0); |
4057 if (NumCases >= 2) { | 4050 if (NumCases >= 2) { |
4058 Src0Lo = legalizeToVar(Src0Lo); | 4051 Src0Lo = legalizeToVar(Src0Lo); |
4059 Src0Hi = legalizeToVar(Src0Hi); | 4052 Src0Hi = legalizeToVar(Src0Hi); |
4060 } else { | 4053 } else { |
4061 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); | 4054 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); |
4062 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); | 4055 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); |
4063 } | 4056 } |
4064 for (SizeT I = 0; I < NumCases; ++I) { | 4057 for (SizeT I = 0; I < NumCases; ++I) { |
4065 Constant *ValueLo = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); | 4058 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); |
4066 Constant *ValueHi = | 4059 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); |
4067 Ctx->getConstantInt32(IceType_i32, Inst->getValue(I) >> 32); | |
4068 InstX8632Label *Label = InstX8632Label::create(Func, this); | 4060 InstX8632Label *Label = InstX8632Label::create(Func, this); |
4069 _cmp(Src0Lo, ValueLo); | 4061 _cmp(Src0Lo, ValueLo); |
4070 _br(CondX86::Br_ne, Label); | 4062 _br(CondX86::Br_ne, Label); |
4071 _cmp(Src0Hi, ValueHi); | 4063 _cmp(Src0Hi, ValueHi); |
4072 _br(CondX86::Br_e, Inst->getLabel(I)); | 4064 _br(CondX86::Br_e, Inst->getLabel(I)); |
4073 Context.insert(Label); | 4065 Context.insert(Label); |
4074 } | 4066 } |
4075 _br(Inst->getLabelDefault()); | 4067 _br(Inst->getLabelDefault()); |
4076 return; | 4068 return; |
4077 } | 4069 } |
4078 // OK, we'll be slightly less naive by forcing Src into a physical | 4070 // OK, we'll be slightly less naive by forcing Src into a physical |
4079 // register if there are 2 or more uses. | 4071 // register if there are 2 or more uses. |
4080 if (NumCases >= 2) | 4072 if (NumCases >= 2) |
4081 Src0 = legalizeToVar(Src0); | 4073 Src0 = legalizeToVar(Src0); |
4082 else | 4074 else |
4083 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); | 4075 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); |
4084 for (SizeT I = 0; I < NumCases; ++I) { | 4076 for (SizeT I = 0; I < NumCases; ++I) { |
4085 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); | 4077 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I)); |
4086 _cmp(Src0, Value); | 4078 _cmp(Src0, Value); |
4087 _br(CondX86::Br_e, Inst->getLabel(I)); | 4079 _br(CondX86::Br_e, Inst->getLabel(I)); |
4088 } | 4080 } |
4089 | 4081 |
4090 _br(Inst->getLabelDefault()); | 4082 _br(Inst->getLabelDefault()); |
4091 } | 4083 } |
4092 | 4084 |
4093 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4085 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
4094 Variable *Dest, Operand *Src0, | 4086 Variable *Dest, Operand *Src0, |
4095 Operand *Src1) { | 4087 Operand *Src1) { |
4096 assert(isVectorType(Dest->getType())); | 4088 assert(isVectorType(Dest->getType())); |
4097 Type Ty = Dest->getType(); | 4089 Type Ty = Dest->getType(); |
4098 Type ElementTy = typeElementType(Ty); | 4090 Type ElementTy = typeElementType(Ty); |
4099 SizeT NumElements = typeNumElements(Ty); | 4091 SizeT NumElements = typeNumElements(Ty); |
4100 | 4092 |
4101 Operand *T = Ctx->getConstantUndef(Ty); | 4093 Operand *T = Ctx->getConstantUndef(Ty); |
4102 for (SizeT I = 0; I < NumElements; ++I) { | 4094 for (SizeT I = 0; I < NumElements; ++I) { |
4103 Constant *Index = Ctx->getConstantInt32(IceType_i32, I); | 4095 Constant *Index = Ctx->getConstantInt32(I); |
4104 | 4096 |
4105 // Extract the next two inputs. | 4097 // Extract the next two inputs. |
4106 Variable *Op0 = Func->makeVariable(ElementTy); | 4098 Variable *Op0 = Func->makeVariable(ElementTy); |
4107 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); | 4099 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); |
4108 Variable *Op1 = Func->makeVariable(ElementTy); | 4100 Variable *Op1 = Func->makeVariable(ElementTy); |
4109 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); | 4101 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); |
4110 | 4102 |
4111 // Perform the arithmetic as a scalar operation. | 4103 // Perform the arithmetic as a scalar operation. |
4112 Variable *Res = Func->makeVariable(ElementTy); | 4104 Variable *Res = Func->makeVariable(ElementTy); |
4113 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); | 4105 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4341 _psub(Dest, MinusOne); | 4333 _psub(Dest, MinusOne); |
4342 return Dest; | 4334 return Dest; |
4343 } | 4335 } |
4344 | 4336 |
4345 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { | 4337 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { |
4346 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || | 4338 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || |
4347 Ty == IceType_v16i8); | 4339 Ty == IceType_v16i8); |
4348 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { | 4340 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { |
4349 Variable *Reg = makeVectorOfOnes(Ty, RegNum); | 4341 Variable *Reg = makeVectorOfOnes(Ty, RegNum); |
4350 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; | 4342 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; |
4351 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift)); | 4343 _psll(Reg, Ctx->getConstantInt8(Shift)); |
4352 return Reg; | 4344 return Reg; |
4353 } else { | 4345 } else { |
4354 // SSE has no left shift operation for vectors of 8 bit integers. | 4346 // SSE has no left shift operation for vectors of 8 bit integers. |
4355 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; | 4347 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
4356 Constant *ConstantMask = | 4348 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
4357 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK); | |
4358 Variable *Reg = makeReg(Ty, RegNum); | 4349 Variable *Reg = makeReg(Ty, RegNum); |
4359 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); | 4350 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); |
4360 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); | 4351 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); |
4361 return Reg; | 4352 return Reg; |
4362 } | 4353 } |
4363 } | 4354 } |
4364 | 4355 |
4365 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, | 4356 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, |
4366 Variable *Slot, | 4357 Variable *Slot, |
4367 uint32_t Offset) { | 4358 uint32_t Offset) { |
4368 // Ensure that Loc is a stack slot. | 4359 // Ensure that Loc is a stack slot. |
4369 assert(Slot->getWeight() == RegWeight::Zero); | 4360 assert(Slot->getWeight() == RegWeight::Zero); |
4370 assert(Slot->getRegNum() == Variable::NoRegister); | 4361 assert(Slot->getRegNum() == Variable::NoRegister); |
4371 // Compute the location of Loc in memory. | 4362 // Compute the location of Loc in memory. |
4372 // TODO(wala,stichnot): lea should not be required. The address of | 4363 // TODO(wala,stichnot): lea should not be required. The address of |
4373 // the stack slot is known at compile time (although not until after | 4364 // the stack slot is known at compile time (although not until after |
4374 // addProlog()). | 4365 // addProlog()). |
4375 const Type PointerType = IceType_i32; | 4366 const Type PointerType = IceType_i32; |
4376 Variable *Loc = makeReg(PointerType); | 4367 Variable *Loc = makeReg(PointerType); |
4377 _lea(Loc, Slot); | 4368 _lea(Loc, Slot); |
4378 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset); | 4369 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); |
4379 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); | 4370 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
4380 } | 4371 } |
4381 | 4372 |
4382 // Helper for legalize() to emit the right code to lower an operand to a | 4373 // Helper for legalize() to emit the right code to lower an operand to a |
4383 // register of the appropriate type. | 4374 // register of the appropriate type. |
4384 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { | 4375 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { |
4385 Type Ty = Src->getType(); | 4376 Type Ty = Src->getType(); |
4386 Variable *Reg = makeReg(Ty, RegNum); | 4377 Variable *Reg = makeReg(Ty, RegNum); |
4387 if (isVectorType(Ty)) { | 4378 if (isVectorType(Ty)) { |
4388 _movp(Reg, Src); | 4379 _movp(Reg, Src); |
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4650 } else if (IsConstant || IsExternal) | 4641 } else if (IsConstant || IsExternal) |
4651 Str << "\t.zero\t" << Size << "\n"; | 4642 Str << "\t.zero\t" << Size << "\n"; |
4652 // Size is part of .comm. | 4643 // Size is part of .comm. |
4653 | 4644 |
4654 if (IsConstant || HasNonzeroInitializer || IsExternal) | 4645 if (IsConstant || HasNonzeroInitializer || IsExternal) |
4655 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4646 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4656 // Size is part of .comm. | 4647 // Size is part of .comm. |
4657 } | 4648 } |
4658 | 4649 |
4659 } // end of namespace Ice | 4650 } // end of namespace Ice |
OLD | NEW |