Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(368)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 737513008: Subzero: Simplify the constant pools. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Minor cleanup Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 600 matching lines...) Expand 10 before | Expand all | Expand 10 after
611 return; 611 return;
612 } 612 }
613 if (isVectorType(Ty)) { 613 if (isVectorType(Ty)) {
614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
615 } 615 }
616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
618 if (Arg->hasReg()) { 618 if (Arg->hasReg()) {
619 assert(Ty != IceType_i64); 619 assert(Ty != IceType_i64);
620 OperandX8632Mem *Mem = OperandX8632Mem::create( 620 OperandX8632Mem *Mem = OperandX8632Mem::create(
621 Func, Ty, FramePtr, 621 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
622 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
623 if (isVectorType(Arg->getType())) { 622 if (isVectorType(Arg->getType())) {
624 _movp(Arg, Mem); 623 _movp(Arg, Mem);
625 } else { 624 } else {
626 _mov(Arg, Mem); 625 _mov(Arg, Mem);
627 } 626 }
628 // This argument-copying instruction uses an explicit 627 // This argument-copying instruction uses an explicit
629 // OperandX8632Mem operand instead of a Variable, so its 628 // OperandX8632Mem operand instead of a Variable, so its
630 // fill-from-stack operation has to be tracked separately for 629 // fill-from-stack operation has to be tracked separately for
631 // statistics. 630 // statistics.
632 Ctx->statsUpdateFills(); 631 Ctx->statsUpdateFills();
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
832 // Align esp if necessary. 831 // Align esp if necessary.
833 if (NeedsStackAlignment) { 832 if (NeedsStackAlignment) {
834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 833 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 834 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
836 SpillAreaSizeBytes = StackSize - StackOffset; 835 SpillAreaSizeBytes = StackSize - StackOffset;
837 } 836 }
838 837
839 // Generate "sub esp, SpillAreaSizeBytes" 838 // Generate "sub esp, SpillAreaSizeBytes"
840 if (SpillAreaSizeBytes) 839 if (SpillAreaSizeBytes)
841 _sub(getPhysicalRegister(RegX8632::Reg_esp), 840 _sub(getPhysicalRegister(RegX8632::Reg_esp),
842 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 841 Ctx->getConstantInt32(SpillAreaSizeBytes));
843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 842 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
844 843
845 resetStackAdjustment(); 844 resetStackAdjustment();
846 845
847 // Fill in stack offsets for stack args, and copy args into registers 846 // Fill in stack offsets for stack args, and copy args into registers
848 // for those that were register-allocated. Args are pushed right to 847 // for those that were register-allocated. Args are pushed right to
849 // left, so Arg[0] is closest to the stack/frame pointer. 848 // left, so Arg[0] is closest to the stack/frame pointer.
850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 849 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 850 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
852 if (!IsEbpBasedFrame) 851 if (!IsEbpBasedFrame)
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
944 Context.setInsertPoint(InsertPoint); 943 Context.setInsertPoint(InsertPoint);
945 944
946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 945 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
947 if (IsEbpBasedFrame) { 946 if (IsEbpBasedFrame) {
948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); 947 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
949 _mov(esp, ebp); 948 _mov(esp, ebp);
950 _pop(ebp); 949 _pop(ebp);
951 } else { 950 } else {
952 // add esp, SpillAreaSizeBytes 951 // add esp, SpillAreaSizeBytes
953 if (SpillAreaSizeBytes) 952 if (SpillAreaSizeBytes)
954 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 953 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
955 } 954 }
956 955
957 // Add pop instructions for preserved registers. 956 // Add pop instructions for preserved registers.
958 llvm::SmallBitVector CalleeSaves = 957 llvm::SmallBitVector CalleeSaves =
959 getRegisterSet(RegSet_CalleeSave, RegSet_None); 958 getRegisterSet(RegSet_CalleeSave, RegSet_None);
960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 959 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
961 SizeT j = CalleeSaves.size() - i - 1; 960 SizeT j = CalleeSaves.size() - i - 1;
962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) 961 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
963 continue; 962 continue;
964 if (CalleeSaves[j] && RegsUsed[j]) { 963 if (CalleeSaves[j] && RegsUsed[j]) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1062 1061
1063 Operand *TargetX8632::loOperand(Operand *Operand) { 1062 Operand *TargetX8632::loOperand(Operand *Operand) {
1064 assert(Operand->getType() == IceType_i64); 1063 assert(Operand->getType() == IceType_i64);
1065 if (Operand->getType() != IceType_i64) 1064 if (Operand->getType() != IceType_i64)
1066 return Operand; 1065 return Operand;
1067 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1066 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1068 split64(Var); 1067 split64(Var);
1069 return Var->getLo(); 1068 return Var->getLo();
1070 } 1069 }
1071 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1070 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1072 return Ctx->getConstantInt32(IceType_i32, 1071 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1073 static_cast<uint32_t>(Const->getValue()));
1074 } 1072 }
1075 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1073 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1076 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), 1074 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1077 Mem->getOffset(), Mem->getIndex(), 1075 Mem->getOffset(), Mem->getIndex(),
1078 Mem->getShift(), Mem->getSegmentRegister()); 1076 Mem->getShift(), Mem->getSegmentRegister());
1079 } 1077 }
1080 llvm_unreachable("Unsupported operand type"); 1078 llvm_unreachable("Unsupported operand type");
1081 return NULL; 1079 return NULL;
1082 } 1080 }
1083 1081
1084 Operand *TargetX8632::hiOperand(Operand *Operand) { 1082 Operand *TargetX8632::hiOperand(Operand *Operand) {
1085 assert(Operand->getType() == IceType_i64); 1083 assert(Operand->getType() == IceType_i64);
1086 if (Operand->getType() != IceType_i64) 1084 if (Operand->getType() != IceType_i64)
1087 return Operand; 1085 return Operand;
1088 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1086 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1089 split64(Var); 1087 split64(Var);
1090 return Var->getHi(); 1088 return Var->getHi();
1091 } 1089 }
1092 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1090 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1093 return Ctx->getConstantInt32( 1091 return Ctx->getConstantInt32(
1094 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32)); 1092 static_cast<uint32_t>(Const->getValue() >> 32));
1095 } 1093 }
1096 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1094 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1097 Constant *Offset = Mem->getOffset(); 1095 Constant *Offset = Mem->getOffset();
1098 if (Offset == NULL) 1096 if (Offset == NULL)
1099 Offset = Ctx->getConstantInt32(IceType_i32, 4); 1097 Offset = Ctx->getConstantInt32(4);
1100 else if (ConstantInteger32 *IntOffset = 1098 else if (ConstantInteger32 *IntOffset =
1101 llvm::dyn_cast<ConstantInteger32>(Offset)) { 1099 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1102 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue()); 1100 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
1103 } else if (ConstantRelocatable *SymOffset = 1101 } else if (ConstantRelocatable *SymOffset =
1104 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1102 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1105 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); 1103 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1106 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), 1104 Offset =
1107 SymOffset->getName()); 1105 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName());
1108 } 1106 }
1109 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, 1107 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
1110 Mem->getIndex(), Mem->getShift(), 1108 Mem->getIndex(), Mem->getShift(),
1111 Mem->getSegmentRegister()); 1109 Mem->getSegmentRegister());
1112 } 1110 }
1113 llvm_unreachable("Unsupported operand type"); 1111 llvm_unreachable("Unsupported operand type");
1114 return NULL; 1112 return NULL;
1115 } 1113 }
1116 1114
1117 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, 1115 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1161 // For default align=0, set it to the real value 1, to avoid any 1159 // For default align=0, set it to the real value 1, to avoid any
1162 // bit-manipulation problems below. 1160 // bit-manipulation problems below.
1163 AlignmentParam = std::max(AlignmentParam, 1u); 1161 AlignmentParam = std::max(AlignmentParam, 1u);
1164 1162
1165 // LLVM enforces power of 2 alignment. 1163 // LLVM enforces power of 2 alignment.
1166 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1164 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1167 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1165 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1168 1166
1169 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1167 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1170 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1168 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
1171 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment)); 1169 _and(esp, Ctx->getConstantInt32(-Alignment));
1172 } 1170 }
1173 if (ConstantInteger32 *ConstantTotalSize = 1171 if (ConstantInteger32 *ConstantTotalSize =
1174 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1172 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1175 uint32_t Value = ConstantTotalSize->getValue(); 1173 uint32_t Value = ConstantTotalSize->getValue();
1176 Value = applyAlignment(Value, Alignment); 1174 Value = applyAlignment(Value, Alignment);
1177 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value)); 1175 _sub(esp, Ctx->getConstantInt32(Value));
1178 } else { 1176 } else {
1179 // Non-constant sizes need to be adjusted to the next highest 1177 // Non-constant sizes need to be adjusted to the next highest
1180 // multiple of the required alignment at runtime. 1178 // multiple of the required alignment at runtime.
1181 Variable *T = makeReg(IceType_i32); 1179 Variable *T = makeReg(IceType_i32);
1182 _mov(T, TotalSize); 1180 _mov(T, TotalSize);
1183 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1)); 1181 _add(T, Ctx->getConstantInt32(Alignment - 1));
1184 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment)); 1182 _and(T, Ctx->getConstantInt32(-Alignment));
1185 _sub(esp, T); 1183 _sub(esp, T);
1186 } 1184 }
1187 _mov(Dest, esp); 1185 _mov(Dest, esp);
1188 } 1186 }
1189 1187
1190 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1188 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1191 Variable *Dest = Inst->getDest(); 1189 Variable *Dest = Inst->getDest();
1192 Operand *Src0 = legalize(Inst->getSrc(0)); 1190 Operand *Src0 = legalize(Inst->getSrc(0));
1193 Operand *Src1 = legalize(Inst->getSrc(1)); 1191 Operand *Src1 = legalize(Inst->getSrc(1));
1194 if (Dest->getType() == IceType_i64) { 1192 if (Dest->getType() == IceType_i64) {
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1284 // t2 = shl t2, t1 1282 // t2 = shl t2, t1
1285 // test t1, 0x20 1283 // test t1, 0x20
1286 // je L1 1284 // je L1
1287 // use(t3) 1285 // use(t3)
1288 // t3 = t2 1286 // t3 = t2
1289 // t2 = 0 1287 // t2 = 0
1290 // L1: 1288 // L1:
1291 // a.lo = t2 1289 // a.lo = t2
1292 // a.hi = t3 1290 // a.hi = t3
1293 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1291 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1294 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1292 Constant *BitTest = Ctx->getConstantInt32(0x20);
1295 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1293 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1296 InstX8632Label *Label = InstX8632Label::create(Func, this); 1294 InstX8632Label *Label = InstX8632Label::create(Func, this);
1297 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1295 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1298 _mov(T_2, Src0Lo); 1296 _mov(T_2, Src0Lo);
1299 _mov(T_3, Src0Hi); 1297 _mov(T_3, Src0Hi);
1300 _shld(T_3, T_2, T_1); 1298 _shld(T_3, T_2, T_1);
1301 _shl(T_2, T_1); 1299 _shl(T_2, T_1);
1302 _test(T_1, BitTest); 1300 _test(T_1, BitTest);
1303 _br(CondX86::Br_e, Label); 1301 _br(CondX86::Br_e, Label);
1304 // T_2 and T_3 are being assigned again because of the 1302 // T_2 and T_3 are being assigned again because of the
(...skipping 14 matching lines...) Expand all
1319 // t3 = shr t3, t1 1317 // t3 = shr t3, t1
1320 // test t1, 0x20 1318 // test t1, 0x20
1321 // je L1 1319 // je L1
1322 // use(t2) 1320 // use(t2)
1323 // t2 = t3 1321 // t2 = t3
1324 // t3 = 0 1322 // t3 = 0
1325 // L1: 1323 // L1:
1326 // a.lo = t2 1324 // a.lo = t2
1327 // a.hi = t3 1325 // a.hi = t3
1328 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1326 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1329 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1327 Constant *BitTest = Ctx->getConstantInt32(0x20);
1330 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1328 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1331 InstX8632Label *Label = InstX8632Label::create(Func, this); 1329 InstX8632Label *Label = InstX8632Label::create(Func, this);
1332 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1330 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1333 _mov(T_2, Src0Lo); 1331 _mov(T_2, Src0Lo);
1334 _mov(T_3, Src0Hi); 1332 _mov(T_3, Src0Hi);
1335 _shrd(T_2, T_3, T_1); 1333 _shrd(T_2, T_3, T_1);
1336 _shr(T_3, T_1); 1334 _shr(T_3, T_1);
1337 _test(T_1, BitTest); 1335 _test(T_1, BitTest);
1338 _br(CondX86::Br_e, Label); 1336 _br(CondX86::Br_e, Label);
1339 // T_2 and T_3 are being assigned again because of the 1337 // T_2 and T_3 are being assigned again because of the
(...skipping 14 matching lines...) Expand all
1354 // t3 = sar t3, t1 1352 // t3 = sar t3, t1
1355 // test t1, 0x20 1353 // test t1, 0x20
1356 // je L1 1354 // je L1
1357 // use(t2) 1355 // use(t2)
1358 // t2 = t3 1356 // t2 = t3
1359 // t3 = sar t3, 0x1f 1357 // t3 = sar t3, 0x1f
1360 // L1: 1358 // L1:
1361 // a.lo = t2 1359 // a.lo = t2
1362 // a.hi = t3 1360 // a.hi = t3
1363 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1361 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1364 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1362 Constant *BitTest = Ctx->getConstantInt32(0x20);
1365 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); 1363 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1366 InstX8632Label *Label = InstX8632Label::create(Func, this); 1364 InstX8632Label *Label = InstX8632Label::create(Func, this);
1367 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1365 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1368 _mov(T_2, Src0Lo); 1366 _mov(T_2, Src0Lo);
1369 _mov(T_3, Src0Hi); 1367 _mov(T_3, Src0Hi);
1370 _shrd(T_2, T_3, T_1); 1368 _shrd(T_2, T_3, T_1);
1371 _sar(T_3, T_1); 1369 _sar(T_3, T_1);
1372 _test(T_1, BitTest); 1370 _test(T_1, BitTest);
1373 _br(CondX86::Br_e, Label); 1371 _br(CondX86::Br_e, Label);
1374 // T_2 and T_3 are being assigned again because of the 1372 // T_2 and T_3 are being assigned again because of the
1375 // intra-block control flow, so T_2 needs the _mov_nonkillable 1373 // intra-block control flow, so T_2 needs the _mov_nonkillable
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
1476 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1474 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1477 // pmuludq T2, T3 1475 // pmuludq T2, T3
1478 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1476 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1479 // shufps T1, T2, {0,2,0,2} 1477 // shufps T1, T2, {0,2,0,2}
1480 // pshufd T4, T1, {0,2,1,3} 1478 // pshufd T4, T1, {0,2,1,3}
1481 // movups Dest, T4 1479 // movups Dest, T4
1482 1480
1483 // Mask that directs pshufd to create a vector with entries 1481 // Mask that directs pshufd to create a vector with entries
1484 // Src[1, 0, 3, 0] 1482 // Src[1, 0, 3, 0]
1485 const unsigned Constant1030 = 0x31; 1483 const unsigned Constant1030 = 0x31;
1486 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030); 1484 Constant *Mask1030 = Ctx->getConstantInt8(Constant1030);
1487 // Mask that directs shufps to create a vector with entries 1485 // Mask that directs shufps to create a vector with entries
1488 // Dest[0, 2], Src[0, 2] 1486 // Dest[0, 2], Src[0, 2]
1489 const unsigned Mask0202 = 0x88; 1487 const unsigned Mask0202 = 0x88;
1490 // Mask that directs pshufd to create a vector with entries 1488 // Mask that directs pshufd to create a vector with entries
1491 // Src[0, 2, 1, 3] 1489 // Src[0, 2, 1, 3]
1492 const unsigned Mask0213 = 0xd8; 1490 const unsigned Mask0213 = 0xd8;
1493 Variable *T1 = makeReg(IceType_v4i32); 1491 Variable *T1 = makeReg(IceType_v4i32);
1494 Variable *T2 = makeReg(IceType_v4i32); 1492 Variable *T2 = makeReg(IceType_v4i32);
1495 Variable *T3 = makeReg(IceType_v4i32); 1493 Variable *T3 = makeReg(IceType_v4i32);
1496 Variable *T4 = makeReg(IceType_v4i32); 1494 Variable *T4 = makeReg(IceType_v4i32);
1497 _movp(T1, Src0); 1495 _movp(T1, Src0);
1498 _pshufd(T2, Src0, Mask1030); 1496 _pshufd(T2, Src0, Mask1030);
1499 _pshufd(T3, Src1, Mask1030); 1497 _pshufd(T3, Src1, Mask1030);
1500 _pmuludq(T1, Src1); 1498 _pmuludq(T1, Src1);
1501 _pmuludq(T2, T3); 1499 _pmuludq(T2, T3);
1502 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202)); 1500 _shufps(T1, T2, Ctx->getConstantInt8(Mask0202));
1503 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213)); 1501 _pshufd(T4, T1, Ctx->getConstantInt8(Mask0213));
1504 _movp(Dest, T4); 1502 _movp(Dest, T4);
1505 } else { 1503 } else {
1506 assert(Dest->getType() == IceType_v16i8); 1504 assert(Dest->getType() == IceType_v16i8);
1507 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1505 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1508 } 1506 }
1509 } break; 1507 } break;
1510 case InstArithmetic::Shl: 1508 case InstArithmetic::Shl:
1511 case InstArithmetic::Lshr: 1509 case InstArithmetic::Lshr:
1512 case InstArithmetic::Ashr: 1510 case InstArithmetic::Ashr:
1513 case InstArithmetic::Udiv: 1511 case InstArithmetic::Udiv:
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
1788 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 1786 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
1789 assert(typeWidthInBytes(Ty) >= 4); 1787 assert(typeWidthInBytes(Ty) >= 4);
1790 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1788 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1791 XmmArgs.push_back(Arg); 1789 XmmArgs.push_back(Arg);
1792 } else { 1790 } else {
1793 StackArgs.push_back(Arg); 1791 StackArgs.push_back(Arg);
1794 if (isVectorType(Arg->getType())) { 1792 if (isVectorType(Arg->getType())) {
1795 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1793 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1796 } 1794 }
1797 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 1795 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1798 Constant *Loc = 1796 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
1799 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
1800 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 1797 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1801 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1798 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1802 } 1799 }
1803 } 1800 }
1804 1801
1805 // Adjust the parameter area so that the stack is aligned. It is 1802 // Adjust the parameter area so that the stack is aligned. It is
1806 // assumed that the stack is already aligned at the start of the 1803 // assumed that the stack is already aligned at the start of the
1807 // calling sequence. 1804 // calling sequence.
1808 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1805 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1809 1806
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1881 Operand *CallTarget = legalize(Instr->getCallTarget()); 1878 Operand *CallTarget = legalize(Instr->getCallTarget());
1882 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1879 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1883 Context.insert(NewCall); 1880 Context.insert(NewCall);
1884 if (ReturnRegHi) 1881 if (ReturnRegHi)
1885 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1882 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1886 1883
1887 // Add the appropriate offset to esp. The call instruction takes care 1884 // Add the appropriate offset to esp. The call instruction takes care
1888 // of resetting the stack offset during emission. 1885 // of resetting the stack offset during emission.
1889 if (ParameterAreaSizeBytes) { 1886 if (ParameterAreaSizeBytes) {
1890 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 1887 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1891 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); 1888 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
1892 } 1889 }
1893 1890
1894 // Insert a register-kill pseudo instruction. 1891 // Insert a register-kill pseudo instruction.
1895 Context.insert(InstFakeKill::create(Func, NewCall)); 1892 Context.insert(InstFakeKill::create(Func, NewCall));
1896 1893
1897 // Generate a FakeUse to keep the call live if necessary. 1894 // Generate a FakeUse to keep the call live if necessary.
1898 if (Instr->hasSideEffects() && ReturnReg) { 1895 if (Instr->hasSideEffects() && ReturnReg) {
1899 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 1896 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1900 Context.insert(FakeUse); 1897 Context.insert(FakeUse);
1901 } 1898 }
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1958 Variable *T = makeReg(DestTy); 1955 Variable *T = makeReg(DestTy);
1959 _movp(T, Src0RM); 1956 _movp(T, Src0RM);
1960 _pand(T, OneMask); 1957 _pand(T, OneMask);
1961 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1958 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1962 _pcmpgt(T, Zeros); 1959 _pcmpgt(T, Zeros);
1963 _movp(Dest, T); 1960 _movp(Dest, T);
1964 } else { 1961 } else {
1965 // width = width(elty) - 1; dest = (src << width) >> width 1962 // width = width(elty) - 1; dest = (src << width) >> width
1966 SizeT ShiftAmount = 1963 SizeT ShiftAmount =
1967 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 1964 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1968 Constant *ShiftConstant = 1965 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1969 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
1970 Variable *T = makeReg(DestTy); 1966 Variable *T = makeReg(DestTy);
1971 _movp(T, Src0RM); 1967 _movp(T, Src0RM);
1972 _psll(T, ShiftConstant); 1968 _psll(T, ShiftConstant);
1973 _psra(T, ShiftConstant); 1969 _psra(T, ShiftConstant);
1974 _movp(Dest, T); 1970 _movp(Dest, T);
1975 } 1971 }
1976 } else if (Dest->getType() == IceType_i64) { 1972 } else if (Dest->getType() == IceType_i64) {
1977 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1973 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1978 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31); 1974 Constant *Shift = Ctx->getConstantInt32(31);
1979 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1975 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1980 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1976 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1981 Variable *T_Lo = makeReg(DestLo->getType()); 1977 Variable *T_Lo = makeReg(DestLo->getType());
1982 if (Src0RM->getType() == IceType_i32) { 1978 if (Src0RM->getType() == IceType_i32) {
1983 _mov(T_Lo, Src0RM); 1979 _mov(T_Lo, Src0RM);
1984 } else if (Src0RM->getType() == IceType_i1) { 1980 } else if (Src0RM->getType() == IceType_i1) {
1985 _movzx(T_Lo, Src0RM); 1981 _movzx(T_Lo, Src0RM);
1986 _shl(T_Lo, Shift); 1982 _shl(T_Lo, Shift);
1987 _sar(T_Lo, Shift); 1983 _sar(T_Lo, Shift);
1988 } else { 1984 } else {
1989 _movsx(T_Lo, Src0RM); 1985 _movsx(T_Lo, Src0RM);
1990 } 1986 }
1991 _mov(DestLo, T_Lo); 1987 _mov(DestLo, T_Lo);
1992 Variable *T_Hi = NULL; 1988 Variable *T_Hi = NULL;
1993 _mov(T_Hi, T_Lo); 1989 _mov(T_Hi, T_Lo);
1994 if (Src0RM->getType() != IceType_i1) 1990 if (Src0RM->getType() != IceType_i1)
1995 // For i1, the sar instruction is already done above. 1991 // For i1, the sar instruction is already done above.
1996 _sar(T_Hi, Shift); 1992 _sar(T_Hi, Shift);
1997 _mov(DestHi, T_Hi); 1993 _mov(DestHi, T_Hi);
1998 } else if (Src0RM->getType() == IceType_i1) { 1994 } else if (Src0RM->getType() == IceType_i1) {
1999 // t1 = src 1995 // t1 = src
2000 // shl t1, dst_bitwidth - 1 1996 // shl t1, dst_bitwidth - 1
2001 // sar t1, dst_bitwidth - 1 1997 // sar t1, dst_bitwidth - 1
2002 // dst = t1 1998 // dst = t1
2003 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 1999 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2004 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1); 2000 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2005 Variable *T = makeReg(Dest->getType()); 2001 Variable *T = makeReg(Dest->getType());
2006 if (typeWidthInBytes(Dest->getType()) <= 2002 if (typeWidthInBytes(Dest->getType()) <=
2007 typeWidthInBytes(Src0RM->getType())) { 2003 typeWidthInBytes(Src0RM->getType())) {
2008 _mov(T, Src0RM); 2004 _mov(T, Src0RM);
2009 } else { 2005 } else {
2010 // Widen the source using movsx or movzx. (It doesn't matter 2006 // Widen the source using movsx or movzx. (It doesn't matter
2011 // which one, since the following shl/sar overwrite the bits.) 2007 // which one, since the following shl/sar overwrite the bits.)
2012 _movzx(T, Src0RM); 2008 _movzx(T, Src0RM);
2013 } 2009 }
2014 _shl(T, ShiftAmount); 2010 _shl(T, ShiftAmount);
(...skipping 22 matching lines...) Expand all
2037 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2033 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2038 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2034 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2039 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2035 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2040 Variable *Tmp = makeReg(DestLo->getType()); 2036 Variable *Tmp = makeReg(DestLo->getType());
2041 if (Src0RM->getType() == IceType_i32) { 2037 if (Src0RM->getType() == IceType_i32) {
2042 _mov(Tmp, Src0RM); 2038 _mov(Tmp, Src0RM);
2043 } else { 2039 } else {
2044 _movzx(Tmp, Src0RM); 2040 _movzx(Tmp, Src0RM);
2045 } 2041 }
2046 if (Src0RM->getType() == IceType_i1) { 2042 if (Src0RM->getType() == IceType_i1) {
2047 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2043 Constant *One = Ctx->getConstantInt32(1);
2048 _and(Tmp, One); 2044 _and(Tmp, One);
2049 } 2045 }
2050 _mov(DestLo, Tmp); 2046 _mov(DestLo, Tmp);
2051 _mov(DestHi, Zero); 2047 _mov(DestHi, Zero);
2052 } else if (Src0RM->getType() == IceType_i1) { 2048 } else if (Src0RM->getType() == IceType_i1) {
2053 // t = Src0RM; t &= 1; Dest = t 2049 // t = Src0RM; t &= 1; Dest = t
2054 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2050 Constant *One = Ctx->getConstantInt32(1);
2055 Type DestTy = Dest->getType(); 2051 Type DestTy = Dest->getType();
2056 Variable *T; 2052 Variable *T;
2057 if (DestTy == IceType_i8) { 2053 if (DestTy == IceType_i8) {
2058 T = makeReg(DestTy); 2054 T = makeReg(DestTy);
2059 _mov(T, Src0RM); 2055 _mov(T, Src0RM);
2060 } else { 2056 } else {
2061 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 2057 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2062 T = makeReg(IceType_i32); 2058 T = makeReg(IceType_i32);
2063 _movzx(T, Src0RM); 2059 _movzx(T, Src0RM);
2064 } 2060 }
(...skipping 19 matching lines...) Expand all
2084 _movp(Dest, T); 2080 _movp(Dest, T);
2085 } else { 2081 } else {
2086 Operand *Src0 = Inst->getSrc(0); 2082 Operand *Src0 = Inst->getSrc(0);
2087 if (Src0->getType() == IceType_i64) 2083 if (Src0->getType() == IceType_i64)
2088 Src0 = loOperand(Src0); 2084 Src0 = loOperand(Src0);
2089 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2085 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2090 // t1 = trunc Src0RM; Dest = t1 2086 // t1 = trunc Src0RM; Dest = t1
2091 Variable *T = NULL; 2087 Variable *T = NULL;
2092 _mov(T, Src0RM); 2088 _mov(T, Src0RM);
2093 if (Dest->getType() == IceType_i1) 2089 if (Dest->getType() == IceType_i1)
2094 _and(T, Ctx->getConstantInt32(IceType_i1, 1)); 2090 _and(T, Ctx->getConstantInt1(1));
2095 _mov(Dest, T); 2091 _mov(Dest, T);
2096 } 2092 }
2097 break; 2093 break;
2098 } 2094 }
2099 case InstCast::Fptrunc: 2095 case InstCast::Fptrunc:
2100 case InstCast::Fpext: { 2096 case InstCast::Fpext: {
2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2097 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2102 // t1 = cvt Src0RM; Dest = t1 2098 // t1 = cvt Src0RM; Dest = t1
2103 Variable *T = makeReg(Dest->getType()); 2099 Variable *T = makeReg(Dest->getType());
2104 _cvt(T, Src0RM, InstX8632Cvt::Float2float); 2100 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
(...skipping 25 matching lines...) Expand all
2130 Call->addArg(Inst->getSrc(0)); 2126 Call->addArg(Inst->getSrc(0));
2131 lowerCall(Call); 2127 lowerCall(Call);
2132 } else { 2128 } else {
2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2129 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2134 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2130 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2135 Variable *T_1 = makeReg(IceType_i32); 2131 Variable *T_1 = makeReg(IceType_i32);
2136 Variable *T_2 = makeReg(Dest->getType()); 2132 Variable *T_2 = makeReg(Dest->getType());
2137 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2133 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2134 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2139 if (Dest->getType() == IceType_i1) 2135 if (Dest->getType() == IceType_i1)
2140 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); 2136 _and(T_2, Ctx->getConstantInt1(1));
2141 _mov(Dest, T_2); 2137 _mov(Dest, T_2);
2142 } 2138 }
2143 break; 2139 break;
2144 case InstCast::Fptoui: 2140 case InstCast::Fptoui:
2145 if (isVectorType(Dest->getType())) { 2141 if (isVectorType(Dest->getType())) {
2146 assert(Dest->getType() == IceType_v4i32 && 2142 assert(Dest->getType() == IceType_v4i32 &&
2147 Inst->getSrc(0)->getType() == IceType_v4f32); 2143 Inst->getSrc(0)->getType() == IceType_v4f32);
2148 const SizeT MaxSrcs = 1; 2144 const SizeT MaxSrcs = 1;
2149 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); 2145 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2150 Call->addArg(Inst->getSrc(0)); 2146 Call->addArg(Inst->getSrc(0));
(...skipping 15 matching lines...) Expand all
2166 lowerCall(Call); 2162 lowerCall(Call);
2167 return; 2163 return;
2168 } else { 2164 } else {
2169 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2165 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2170 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2166 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2171 Variable *T_1 = makeReg(IceType_i32); 2167 Variable *T_1 = makeReg(IceType_i32);
2172 Variable *T_2 = makeReg(Dest->getType()); 2168 Variable *T_2 = makeReg(Dest->getType());
2173 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2169 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
2174 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2170 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2175 if (Dest->getType() == IceType_i1) 2171 if (Dest->getType() == IceType_i1)
2176 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); 2172 _and(T_2, Ctx->getConstantInt1(1));
2177 _mov(Dest, T_2); 2173 _mov(Dest, T_2);
2178 } 2174 }
2179 break; 2175 break;
2180 case InstCast::Sitofp: 2176 case InstCast::Sitofp:
2181 if (isVectorType(Dest->getType())) { 2177 if (isVectorType(Dest->getType())) {
2182 assert(Dest->getType() == IceType_v4f32 && 2178 assert(Dest->getType() == IceType_v4f32 &&
2183 Inst->getSrc(0)->getType() == IceType_v4i32); 2179 Inst->getSrc(0)->getType() == IceType_v4i32);
2184 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2180 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2185 Variable *T = makeReg(Dest->getType()); 2181 Variable *T = makeReg(Dest->getType());
2186 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); 2182 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after
2395 Type Ty = SourceVectNotLegalized->getType(); 2391 Type Ty = SourceVectNotLegalized->getType();
2396 Type ElementTy = typeElementType(Ty); 2392 Type ElementTy = typeElementType(Ty);
2397 Type InVectorElementTy = getInVectorElementType(Ty); 2393 Type InVectorElementTy = getInVectorElementType(Ty);
2398 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2394 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2399 2395
2400 // TODO(wala): Determine the best lowering sequences for each type. 2396 // TODO(wala): Determine the best lowering sequences for each type.
2401 bool CanUsePextr = 2397 bool CanUsePextr =
2402 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2398 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2403 if (CanUsePextr && Ty != IceType_v4f32) { 2399 if (CanUsePextr && Ty != IceType_v4f32) {
2404 // Use pextrb, pextrw, or pextrd. 2400 // Use pextrb, pextrw, or pextrd.
2405 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); 2401 Constant *Mask = Ctx->getConstantInt8(Index);
2406 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2402 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2407 _pextr(ExtractedElementR, SourceVectR, Mask); 2403 _pextr(ExtractedElementR, SourceVectR, Mask);
2408 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2404 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2409 // Use pshufd and movd/movss. 2405 // Use pshufd and movd/movss.
2410 Variable *T = NULL; 2406 Variable *T = NULL;
2411 if (Index) { 2407 if (Index) {
2412 // The shuffle only needs to occur if the element to be extracted 2408 // The shuffle only needs to occur if the element to be extracted
2413 // is not at the lowest index. 2409 // is not at the lowest index.
2414 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); 2410 Constant *Mask = Ctx->getConstantInt8(Index);
2415 T = makeReg(Ty); 2411 T = makeReg(Ty);
2416 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); 2412 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2417 } else { 2413 } else {
2418 T = legalizeToVar(SourceVectNotLegalized); 2414 T = legalizeToVar(SourceVectNotLegalized);
2419 } 2415 }
2420 2416
2421 if (InVectorElementTy == IceType_i32) { 2417 if (InVectorElementTy == IceType_i32) {
2422 _movd(ExtractedElementR, T); 2418 _movd(ExtractedElementR, T);
2423 } else { // Ty == IceType_f32 2419 } else { // Ty == IceType_f32
2424 // TODO(wala): _movss is only used here because _mov does not 2420 // TODO(wala): _movss is only used here because _mov does not
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
2542 } 2538 }
2543 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); 2539 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2544 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); 2540 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
2545 if (HasC1) { 2541 if (HasC1) {
2546 Src0 = legalize(Src0); 2542 Src0 = legalize(Src0);
2547 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2543 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2548 Variable *T = NULL; 2544 Variable *T = NULL;
2549 _mov(T, Src0); 2545 _mov(T, Src0);
2550 _ucomiss(T, Src1RM); 2546 _ucomiss(T, Src1RM);
2551 } 2547 }
2552 Constant *Default = 2548 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
2553 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
2554 _mov(Dest, Default); 2549 _mov(Dest, Default);
2555 if (HasC1) { 2550 if (HasC1) {
2556 InstX8632Label *Label = InstX8632Label::create(Func, this); 2551 InstX8632Label *Label = InstX8632Label::create(Func, this);
2557 _br(TableFcmp[Index].C1, Label); 2552 _br(TableFcmp[Index].C1, Label);
2558 if (HasC2) { 2553 if (HasC2) {
2559 _br(TableFcmp[Index].C2, Label); 2554 _br(TableFcmp[Index].C2, Label);
2560 } 2555 }
2561 Constant *NonDefault = 2556 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
2562 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
2563 _mov_nonkillable(Dest, NonDefault); 2557 _mov_nonkillable(Dest, NonDefault);
2564 Context.insert(Label); 2558 Context.insert(Label);
2565 } 2559 }
2566 } 2560 }
2567 2561
2568 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2562 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2569 Operand *Src0 = legalize(Inst->getSrc(0)); 2563 Operand *Src0 = legalize(Inst->getSrc(0));
2570 Operand *Src1 = legalize(Inst->getSrc(1)); 2564 Operand *Src1 = legalize(Inst->getSrc(1));
2571 Variable *Dest = Inst->getDest(); 2565 Variable *Dest = Inst->getDest();
2572 2566
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
2693 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), 2687 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2694 NextBr->getTargetFalse()); 2688 NextBr->getTargetFalse());
2695 // Skip over the following branch instruction. 2689 // Skip over the following branch instruction.
2696 Context.advanceNext(); 2690 Context.advanceNext();
2697 return; 2691 return;
2698 } 2692 }
2699 } 2693 }
2700 2694
2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2695 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2702 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2696 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2703 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2697 Constant *One = Ctx->getConstantInt32(1);
2704 if (Src0->getType() == IceType_i64) { 2698 if (Src0->getType() == IceType_i64) {
2705 InstIcmp::ICond Condition = Inst->getCondition(); 2699 InstIcmp::ICond Condition = Inst->getCondition();
2706 size_t Index = static_cast<size_t>(Condition); 2700 size_t Index = static_cast<size_t>(Condition);
2707 assert(Index < TableIcmp64Size); 2701 assert(Index < TableIcmp64Size);
2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2702 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2703 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2704 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2705 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2712 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { 2706 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2713 InstX8632Label *Label = InstX8632Label::create(Func, this); 2707 InstX8632Label *Label = InstX8632Label::create(Func, this);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
2771 2765
2772 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 2766 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2773 // Use insertps, pinsrb, pinsrw, or pinsrd. 2767 // Use insertps, pinsrb, pinsrw, or pinsrd.
2774 Operand *ElementRM = 2768 Operand *ElementRM =
2775 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 2769 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2776 Operand *SourceVectRM = 2770 Operand *SourceVectRM =
2777 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2771 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2778 Variable *T = makeReg(Ty); 2772 Variable *T = makeReg(Ty);
2779 _movp(T, SourceVectRM); 2773 _movp(T, SourceVectRM);
2780 if (Ty == IceType_v4f32) 2774 if (Ty == IceType_v4f32)
2781 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4)); 2775 _insertps(T, ElementRM, Ctx->getConstantInt8(Index << 4));
2782 else 2776 else
2783 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index)); 2777 _pinsr(T, ElementRM, Ctx->getConstantInt8(Index));
2784 _movp(Inst->getDest(), T); 2778 _movp(Inst->getDest(), T);
2785 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2779 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2786 // Use shufps or movss. 2780 // Use shufps or movss.
2787 Variable *ElementR = NULL; 2781 Variable *ElementR = NULL;
2788 Operand *SourceVectRM = 2782 Operand *SourceVectRM =
2789 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2783 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2790 2784
2791 if (InVectorElementTy == IceType_f32) { 2785 if (InVectorElementTy == IceType_f32) {
2792 // ElementR will be in an XMM register since it is floating point. 2786 // ElementR will be in an XMM register since it is floating point.
2793 ElementR = legalizeToVar(ElementToInsertNotLegalized); 2787 ElementR = legalizeToVar(ElementToInsertNotLegalized);
(...skipping 30 matching lines...) Expand all
2824 // ElementR := ElementR[0, 0] T[0, 3] 2818 // ElementR := ElementR[0, 0] T[0, 3]
2825 // T := T[0, 1] ElementR[0, 3] 2819 // T := T[0, 1] ElementR[0, 3]
2826 // 2820 //
2827 // insertelement into index 3 (result is stored in T): 2821 // insertelement into index 3 (result is stored in T):
2828 // T := SourceVectRM 2822 // T := SourceVectRM
2829 // ElementR := ElementR[0, 0] T[0, 2] 2823 // ElementR := ElementR[0, 0] T[0, 2]
2830 // T := T[0, 1] ElementR[3, 0] 2824 // T := T[0, 1] ElementR[3, 0]
2831 const unsigned char Mask1[3] = { 0, 192, 128 }; 2825 const unsigned char Mask1[3] = { 0, 192, 128 };
2832 const unsigned char Mask2[3] = { 227, 196, 52 }; 2826 const unsigned char Mask2[3] = { 227, 196, 52 };
2833 2827
2834 Constant *Mask1Constant = 2828 Constant *Mask1Constant = Ctx->getConstantInt8(Mask1[Index - 1]);
2835 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); 2829 Constant *Mask2Constant = Ctx->getConstantInt8(Mask2[Index - 1]);
2836 Constant *Mask2Constant =
2837 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
2838 2830
2839 if (Index == 1) { 2831 if (Index == 1) {
2840 _shufps(ElementR, SourceVectRM, Mask1Constant); 2832 _shufps(ElementR, SourceVectRM, Mask1Constant);
2841 _shufps(ElementR, SourceVectRM, Mask2Constant); 2833 _shufps(ElementR, SourceVectRM, Mask2Constant);
2842 _movp(Inst->getDest(), ElementR); 2834 _movp(Inst->getDest(), ElementR);
2843 } else { 2835 } else {
2844 Variable *T = makeReg(Ty); 2836 Variable *T = makeReg(Ty);
2845 _movp(T, SourceVectRM); 2837 _movp(T, SourceVectRM);
2846 _shufps(ElementR, T, Mask1Constant); 2838 _shufps(ElementR, T, Mask1Constant);
2847 _shufps(T, ElementR, Mask2Constant); 2839 _shufps(T, ElementR, Mask2Constant);
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
2919 // Some x86-64 processors support the cmpxchg16b intruction, which 2911 // Some x86-64 processors support the cmpxchg16b intruction, which
2920 // can make 16-byte operations lock free (when used with the LOCK 2912 // can make 16-byte operations lock free (when used with the LOCK
2921 // prefix). However, that's not supported in 32-bit mode, so just 2913 // prefix). However, that's not supported in 32-bit mode, so just
2922 // return 0 even for large sizes. 2914 // return 0 even for large sizes.
2923 Result = Ctx->getConstantZero(IceType_i32); 2915 Result = Ctx->getConstantZero(IceType_i32);
2924 break; 2916 break;
2925 case 1: 2917 case 1:
2926 case 2: 2918 case 2:
2927 case 4: 2919 case 4:
2928 case 8: 2920 case 8:
2929 Result = Ctx->getConstantInt32(IceType_i32, 1); 2921 Result = Ctx->getConstantInt32(1);
2930 break; 2922 break;
2931 } 2923 }
2932 _mov(Dest, Result); 2924 _mov(Dest, Result);
2933 return; 2925 return;
2934 } 2926 }
2935 // The PNaCl ABI requires the byte size to be a compile-time constant. 2927 // The PNaCl ABI requires the byte size to be a compile-time constant.
2936 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 2928 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2937 return; 2929 return;
2938 } 2930 }
2939 case Intrinsics::AtomicLoad: { 2931 case Intrinsics::AtomicLoad: {
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
3025 _bswap(T_Hi); 3017 _bswap(T_Hi);
3026 _mov(DestLo, T_Hi); 3018 _mov(DestLo, T_Hi);
3027 _mov(DestHi, T_Lo); 3019 _mov(DestHi, T_Lo);
3028 } else if (Val->getType() == IceType_i32) { 3020 } else if (Val->getType() == IceType_i32) {
3029 Variable *T = legalizeToVar(Val); 3021 Variable *T = legalizeToVar(Val);
3030 _bswap(T); 3022 _bswap(T);
3031 _mov(Dest, T); 3023 _mov(Dest, T);
3032 } else { 3024 } else {
3033 assert(Val->getType() == IceType_i16); 3025 assert(Val->getType() == IceType_i16);
3034 Val = legalize(Val); 3026 Val = legalize(Val);
3035 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8); 3027 Constant *Eight = Ctx->getConstantInt16(8);
3036 Variable *T = NULL; 3028 Variable *T = NULL;
3037 _mov(T, Val); 3029 _mov(T, Val);
3038 _rol(T, Eight); 3030 _rol(T, Eight);
3039 _mov(Dest, T); 3031 _mov(Dest, T);
3040 } 3032 }
3041 return; 3033 return;
3042 } 3034 }
3043 case Intrinsics::Ctpop: { 3035 case Intrinsics::Ctpop: {
3044 Variable *Dest = Instr->getDest(); 3036 Variable *Dest = Instr->getDest();
3045 Operand *Val = Instr->getArg(0); 3037 Operand *Val = Instr->getArg(0);
(...skipping 463 matching lines...) Expand 10 before | Expand all | Expand 10 after
3509 // bit position conversion, and the speculation is reversed. 3501 // bit position conversion, and the speculation is reversed.
3510 assert(Ty == IceType_i32 || Ty == IceType_i64); 3502 assert(Ty == IceType_i32 || Ty == IceType_i64);
3511 Variable *T = makeReg(IceType_i32); 3503 Variable *T = makeReg(IceType_i32);
3512 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3504 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3513 if (Cttz) { 3505 if (Cttz) {
3514 _bsf(T, FirstValRM); 3506 _bsf(T, FirstValRM);
3515 } else { 3507 } else {
3516 _bsr(T, FirstValRM); 3508 _bsr(T, FirstValRM);
3517 } 3509 }
3518 Variable *T_Dest = makeReg(IceType_i32); 3510 Variable *T_Dest = makeReg(IceType_i32);
3519 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); 3511 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3520 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); 3512 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3521 if (Cttz) { 3513 if (Cttz) {
3522 _mov(T_Dest, ThirtyTwo); 3514 _mov(T_Dest, ThirtyTwo);
3523 } else { 3515 } else {
3524 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); 3516 Constant *SixtyThree = Ctx->getConstantInt32(63);
3525 _mov(T_Dest, SixtyThree); 3517 _mov(T_Dest, SixtyThree);
3526 } 3518 }
3527 _cmov(T_Dest, T, CondX86::Br_ne); 3519 _cmov(T_Dest, T, CondX86::Br_ne);
3528 if (!Cttz) { 3520 if (!Cttz) {
3529 _xor(T_Dest, ThirtyOne); 3521 _xor(T_Dest, ThirtyOne);
3530 } 3522 }
3531 if (Ty == IceType_i32) { 3523 if (Ty == IceType_i32) {
3532 _mov(Dest, T_Dest); 3524 _mov(Dest, T_Dest);
3533 return; 3525 return;
3534 } 3526 }
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
3853 // Vanilla ICE load instructions should not use the segment registers, 3845 // Vanilla ICE load instructions should not use the segment registers,
3854 // and computeAddressOpt only works at the level of Variables and Constants, 3846 // and computeAddressOpt only works at the level of Variables and Constants,
3855 // not other OperandX8632Mem, so there should be no mention of segment 3847 // not other OperandX8632Mem, so there should be no mention of segment
3856 // registers there either. 3848 // registers there either.
3857 const OperandX8632Mem::SegmentRegisters SegmentReg = 3849 const OperandX8632Mem::SegmentRegisters SegmentReg =
3858 OperandX8632Mem::DefaultSegment; 3850 OperandX8632Mem::DefaultSegment;
3859 Variable *Base = llvm::dyn_cast<Variable>(Addr); 3851 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3860 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 3852 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3861 if (Base && Addr != Base) { 3853 if (Base && Addr != Base) {
3862 Inst->setDeleted(); 3854 Inst->setDeleted();
3863 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); 3855 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
3864 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 3856 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
3865 Shift, SegmentReg); 3857 Shift, SegmentReg);
3866 Context.insert(InstLoad::create(Func, Dest, Addr)); 3858 Context.insert(InstLoad::create(Func, Dest, Addr));
3867 } 3859 }
3868 } 3860 }
3869 3861
3870 void TargetX8632::randomlyInsertNop(float Probability) { 3862 void TargetX8632::randomlyInsertNop(float Probability) {
3871 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 3863 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3872 if (RNG.getTrueWithProbability(Probability)) { 3864 if (RNG.getTrueWithProbability(Probability)) {
3873 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); 3865 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
3919 if (InstructionSet >= SSE4_1) { 3911 if (InstructionSet >= SSE4_1) {
3920 // TODO(wala): If the condition operand is a constant, use blendps 3912 // TODO(wala): If the condition operand is a constant, use blendps
3921 // or pblendw. 3913 // or pblendw.
3922 // 3914 //
3923 // Use blendvps or pblendvb to implement select. 3915 // Use blendvps or pblendvb to implement select.
3924 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3916 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3925 SrcTy == IceType_v4f32) { 3917 SrcTy == IceType_v4f32) {
3926 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3918 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3927 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); 3919 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
3928 _movp(xmm0, ConditionRM); 3920 _movp(xmm0, ConditionRM);
3929 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); 3921 _psll(xmm0, Ctx->getConstantInt8(31));
3930 _movp(T, SrcFRM); 3922 _movp(T, SrcFRM);
3931 _blendvps(T, SrcTRM, xmm0); 3923 _blendvps(T, SrcTRM, xmm0);
3932 _movp(Dest, T); 3924 _movp(Dest, T);
3933 } else { 3925 } else {
3934 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3926 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3935 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3927 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3936 : IceType_v16i8; 3928 : IceType_v16i8;
3937 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); 3929 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
3938 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3930 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3939 _movp(T, SrcFRM); 3931 _movp(T, SrcFRM);
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
4031 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4023 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4032 // Vanilla ICE store instructions should not use the segment registers, 4024 // Vanilla ICE store instructions should not use the segment registers,
4033 // and computeAddressOpt only works at the level of Variables and Constants, 4025 // and computeAddressOpt only works at the level of Variables and Constants,
4034 // not other OperandX8632Mem, so there should be no mention of segment 4026 // not other OperandX8632Mem, so there should be no mention of segment
4035 // registers there either. 4027 // registers there either.
4036 const OperandX8632Mem::SegmentRegisters SegmentReg = 4028 const OperandX8632Mem::SegmentRegisters SegmentReg =
4037 OperandX8632Mem::DefaultSegment; 4029 OperandX8632Mem::DefaultSegment;
4038 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4030 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4039 if (Base && Addr != Base) { 4031 if (Base && Addr != Base) {
4040 Inst->setDeleted(); 4032 Inst->setDeleted();
4041 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); 4033 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4042 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4034 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4043 Shift, SegmentReg); 4035 Shift, SegmentReg);
4044 Context.insert(InstStore::create(Func, Data, Addr)); 4036 Context.insert(InstStore::create(Func, Data, Addr));
4045 } 4037 }
4046 } 4038 }
4047 4039
4048 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4040 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4049 // This implements the most naive possible lowering. 4041 // This implements the most naive possible lowering.
4050 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4042 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4051 Operand *Src0 = Inst->getComparison(); 4043 Operand *Src0 = Inst->getComparison();
4052 SizeT NumCases = Inst->getNumCases(); 4044 SizeT NumCases = Inst->getNumCases();
4053 if (Src0->getType() == IceType_i64) { 4045 if (Src0->getType() == IceType_i64) {
4054 Src0 = legalize(Src0); // get Base/Index into physical registers 4046 Src0 = legalize(Src0); // get Base/Index into physical registers
4055 Operand *Src0Lo = loOperand(Src0); 4047 Operand *Src0Lo = loOperand(Src0);
4056 Operand *Src0Hi = hiOperand(Src0); 4048 Operand *Src0Hi = hiOperand(Src0);
4057 if (NumCases >= 2) { 4049 if (NumCases >= 2) {
4058 Src0Lo = legalizeToVar(Src0Lo); 4050 Src0Lo = legalizeToVar(Src0Lo);
4059 Src0Hi = legalizeToVar(Src0Hi); 4051 Src0Hi = legalizeToVar(Src0Hi);
4060 } else { 4052 } else {
4061 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4053 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4062 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4054 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4063 } 4055 }
4064 for (SizeT I = 0; I < NumCases; ++I) { 4056 for (SizeT I = 0; I < NumCases; ++I) {
4065 Constant *ValueLo = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); 4057 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4066 Constant *ValueHi = 4058 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4067 Ctx->getConstantInt32(IceType_i32, Inst->getValue(I) >> 32);
4068 InstX8632Label *Label = InstX8632Label::create(Func, this); 4059 InstX8632Label *Label = InstX8632Label::create(Func, this);
4069 _cmp(Src0Lo, ValueLo); 4060 _cmp(Src0Lo, ValueLo);
4070 _br(CondX86::Br_ne, Label); 4061 _br(CondX86::Br_ne, Label);
4071 _cmp(Src0Hi, ValueHi); 4062 _cmp(Src0Hi, ValueHi);
4072 _br(CondX86::Br_e, Inst->getLabel(I)); 4063 _br(CondX86::Br_e, Inst->getLabel(I));
4073 Context.insert(Label); 4064 Context.insert(Label);
4074 } 4065 }
4075 _br(Inst->getLabelDefault()); 4066 _br(Inst->getLabelDefault());
4076 return; 4067 return;
4077 } 4068 }
4078 // OK, we'll be slightly less naive by forcing Src into a physical 4069 // OK, we'll be slightly less naive by forcing Src into a physical
4079 // register if there are 2 or more uses. 4070 // register if there are 2 or more uses.
4080 if (NumCases >= 2) 4071 if (NumCases >= 2)
4081 Src0 = legalizeToVar(Src0); 4072 Src0 = legalizeToVar(Src0);
4082 else 4073 else
4083 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); 4074 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4084 for (SizeT I = 0; I < NumCases; ++I) { 4075 for (SizeT I = 0; I < NumCases; ++I) {
4085 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); 4076 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4086 _cmp(Src0, Value); 4077 _cmp(Src0, Value);
4087 _br(CondX86::Br_e, Inst->getLabel(I)); 4078 _br(CondX86::Br_e, Inst->getLabel(I));
4088 } 4079 }
4089 4080
4090 _br(Inst->getLabelDefault()); 4081 _br(Inst->getLabelDefault());
4091 } 4082 }
4092 4083
4093 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4084 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4094 Variable *Dest, Operand *Src0, 4085 Variable *Dest, Operand *Src0,
4095 Operand *Src1) { 4086 Operand *Src1) {
4096 assert(isVectorType(Dest->getType())); 4087 assert(isVectorType(Dest->getType()));
4097 Type Ty = Dest->getType(); 4088 Type Ty = Dest->getType();
4098 Type ElementTy = typeElementType(Ty); 4089 Type ElementTy = typeElementType(Ty);
4099 SizeT NumElements = typeNumElements(Ty); 4090 SizeT NumElements = typeNumElements(Ty);
4100 4091
4101 Operand *T = Ctx->getConstantUndef(Ty); 4092 Operand *T = Ctx->getConstantUndef(Ty);
4102 for (SizeT I = 0; I < NumElements; ++I) { 4093 for (SizeT I = 0; I < NumElements; ++I) {
4103 Constant *Index = Ctx->getConstantInt32(IceType_i32, I); 4094 Constant *Index = Ctx->getConstantInt32(I);
4104 4095
4105 // Extract the next two inputs. 4096 // Extract the next two inputs.
4106 Variable *Op0 = Func->makeVariable(ElementTy); 4097 Variable *Op0 = Func->makeVariable(ElementTy);
4107 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 4098 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
4108 Variable *Op1 = Func->makeVariable(ElementTy); 4099 Variable *Op1 = Func->makeVariable(ElementTy);
4109 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4100 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4110 4101
4111 // Perform the arithmetic as a scalar operation. 4102 // Perform the arithmetic as a scalar operation.
4112 Variable *Res = Func->makeVariable(ElementTy); 4103 Variable *Res = Func->makeVariable(ElementTy);
4113 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4104 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
4341 _psub(Dest, MinusOne); 4332 _psub(Dest, MinusOne);
4342 return Dest; 4333 return Dest;
4343 } 4334 }
4344 4335
4345 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4336 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4346 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4337 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4347 Ty == IceType_v16i8); 4338 Ty == IceType_v16i8);
4348 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4339 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4349 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4340 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4350 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4341 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
4351 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift)); 4342 _psll(Reg, Ctx->getConstantInt8(Shift));
4352 return Reg; 4343 return Reg;
4353 } else { 4344 } else {
4354 // SSE has no left shift operation for vectors of 8 bit integers. 4345 // SSE has no left shift operation for vectors of 8 bit integers.
4355 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4346 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4356 Constant *ConstantMask = 4347 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4357 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
4358 Variable *Reg = makeReg(Ty, RegNum); 4348 Variable *Reg = makeReg(Ty, RegNum);
4359 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4349 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4360 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4350 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4361 return Reg; 4351 return Reg;
4362 } 4352 }
4363 } 4353 }
4364 4354
4365 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4355 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4366 Variable *Slot, 4356 Variable *Slot,
4367 uint32_t Offset) { 4357 uint32_t Offset) {
4368 // Ensure that Loc is a stack slot. 4358 // Ensure that Loc is a stack slot.
4369 assert(Slot->getWeight() == RegWeight::Zero); 4359 assert(Slot->getWeight() == RegWeight::Zero);
4370 assert(Slot->getRegNum() == Variable::NoRegister); 4360 assert(Slot->getRegNum() == Variable::NoRegister);
4371 // Compute the location of Loc in memory. 4361 // Compute the location of Loc in memory.
4372 // TODO(wala,stichnot): lea should not be required. The address of 4362 // TODO(wala,stichnot): lea should not be required. The address of
4373 // the stack slot is known at compile time (although not until after 4363 // the stack slot is known at compile time (although not until after
4374 // addProlog()). 4364 // addProlog()).
4375 const Type PointerType = IceType_i32; 4365 const Type PointerType = IceType_i32;
4376 Variable *Loc = makeReg(PointerType); 4366 Variable *Loc = makeReg(PointerType);
4377 _lea(Loc, Slot); 4367 _lea(Loc, Slot);
4378 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset); 4368 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
4379 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4369 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4380 } 4370 }
4381 4371
4382 // Helper for legalize() to emit the right code to lower an operand to a 4372 // Helper for legalize() to emit the right code to lower an operand to a
4383 // register of the appropriate type. 4373 // register of the appropriate type.
4384 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 4374 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4385 Type Ty = Src->getType(); 4375 Type Ty = Src->getType();
4386 Variable *Reg = makeReg(Ty, RegNum); 4376 Variable *Reg = makeReg(Ty, RegNum);
4387 if (isVectorType(Ty)) { 4377 if (isVectorType(Ty)) {
4388 _movp(Reg, Src); 4378 _movp(Reg, Src);
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after
4650 } else if (IsConstant || IsExternal) 4640 } else if (IsConstant || IsExternal)
4651 Str << "\t.zero\t" << Size << "\n"; 4641 Str << "\t.zero\t" << Size << "\n";
4652 // Size is part of .comm. 4642 // Size is part of .comm.
4653 4643
4654 if (IsConstant || HasNonzeroInitializer || IsExternal) 4644 if (IsConstant || HasNonzeroInitializer || IsExternal)
4655 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4645 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4656 // Size is part of .comm. 4646 // Size is part of .comm.
4657 } 4647 }
4658 4648
4659 } // end of namespace Ice 4649 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698