Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 737513008: Subzero: Simplify the constant pools. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Better fix for the int8/uint8 tests Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 600 matching lines...) Expand 10 before | Expand all | Expand 10 after
611 return; 611 return;
612 } 612 }
613 if (isVectorType(Ty)) { 613 if (isVectorType(Ty)) {
614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); 614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
615 } 615 }
616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
618 if (Arg->hasReg()) { 618 if (Arg->hasReg()) {
619 assert(Ty != IceType_i64); 619 assert(Ty != IceType_i64);
620 OperandX8632Mem *Mem = OperandX8632Mem::create( 620 OperandX8632Mem *Mem = OperandX8632Mem::create(
621 Func, Ty, FramePtr, 621 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
622 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));
623 if (isVectorType(Arg->getType())) { 622 if (isVectorType(Arg->getType())) {
624 _movp(Arg, Mem); 623 _movp(Arg, Mem);
625 } else { 624 } else {
626 _mov(Arg, Mem); 625 _mov(Arg, Mem);
627 } 626 }
628 // This argument-copying instruction uses an explicit 627 // This argument-copying instruction uses an explicit
629 // OperandX8632Mem operand instead of a Variable, so its 628 // OperandX8632Mem operand instead of a Variable, so its
630 // fill-from-stack operation has to be tracked separately for 629 // fill-from-stack operation has to be tracked separately for
631 // statistics. 630 // statistics.
632 Ctx->statsUpdateFills(); 631 Ctx->statsUpdateFills();
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
832 // Align esp if necessary. 831 // Align esp if necessary.
833 if (NeedsStackAlignment) { 832 if (NeedsStackAlignment) {
834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 833 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 834 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
836 SpillAreaSizeBytes = StackSize - StackOffset; 835 SpillAreaSizeBytes = StackSize - StackOffset;
837 } 836 }
838 837
839 // Generate "sub esp, SpillAreaSizeBytes" 838 // Generate "sub esp, SpillAreaSizeBytes"
840 if (SpillAreaSizeBytes) 839 if (SpillAreaSizeBytes)
841 _sub(getPhysicalRegister(RegX8632::Reg_esp), 840 _sub(getPhysicalRegister(RegX8632::Reg_esp),
842 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 841 Ctx->getConstantInt32(SpillAreaSizeBytes));
843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 842 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
844 843
845 resetStackAdjustment(); 844 resetStackAdjustment();
846 845
847 // Fill in stack offsets for stack args, and copy args into registers 846 // Fill in stack offsets for stack args, and copy args into registers
848 // for those that were register-allocated. Args are pushed right to 847 // for those that were register-allocated. Args are pushed right to
849 // left, so Arg[0] is closest to the stack/frame pointer. 848 // left, so Arg[0] is closest to the stack/frame pointer.
850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 849 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 850 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
852 if (!IsEbpBasedFrame) 851 if (!IsEbpBasedFrame)
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
944 Context.setInsertPoint(InsertPoint); 943 Context.setInsertPoint(InsertPoint);
945 944
946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); 945 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
947 if (IsEbpBasedFrame) { 946 if (IsEbpBasedFrame) {
948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); 947 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
949 _mov(esp, ebp); 948 _mov(esp, ebp);
950 _pop(ebp); 949 _pop(ebp);
951 } else { 950 } else {
952 // add esp, SpillAreaSizeBytes 951 // add esp, SpillAreaSizeBytes
953 if (SpillAreaSizeBytes) 952 if (SpillAreaSizeBytes)
954 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 953 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
955 } 954 }
956 955
957 // Add pop instructions for preserved registers. 956 // Add pop instructions for preserved registers.
958 llvm::SmallBitVector CalleeSaves = 957 llvm::SmallBitVector CalleeSaves =
959 getRegisterSet(RegSet_CalleeSave, RegSet_None); 958 getRegisterSet(RegSet_CalleeSave, RegSet_None);
960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 959 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
961 SizeT j = CalleeSaves.size() - i - 1; 960 SizeT j = CalleeSaves.size() - i - 1;
962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) 961 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
963 continue; 962 continue;
964 if (CalleeSaves[j] && RegsUsed[j]) { 963 if (CalleeSaves[j] && RegsUsed[j]) {
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
1062 1061
1063 Operand *TargetX8632::loOperand(Operand *Operand) { 1062 Operand *TargetX8632::loOperand(Operand *Operand) {
1064 assert(Operand->getType() == IceType_i64); 1063 assert(Operand->getType() == IceType_i64);
1065 if (Operand->getType() != IceType_i64) 1064 if (Operand->getType() != IceType_i64)
1066 return Operand; 1065 return Operand;
1067 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1066 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1068 split64(Var); 1067 split64(Var);
1069 return Var->getLo(); 1068 return Var->getLo();
1070 } 1069 }
1071 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1070 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1072 return Ctx->getConstantInt32(IceType_i32, 1071 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1073 static_cast<uint32_t>(Const->getValue()));
1074 } 1072 }
1075 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1073 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1076 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), 1074 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
1077 Mem->getOffset(), Mem->getIndex(), 1075 Mem->getOffset(), Mem->getIndex(),
1078 Mem->getShift(), Mem->getSegmentRegister()); 1076 Mem->getShift(), Mem->getSegmentRegister());
1079 } 1077 }
1080 llvm_unreachable("Unsupported operand type"); 1078 llvm_unreachable("Unsupported operand type");
1081 return NULL; 1079 return NULL;
1082 } 1080 }
1083 1081
1084 Operand *TargetX8632::hiOperand(Operand *Operand) { 1082 Operand *TargetX8632::hiOperand(Operand *Operand) {
1085 assert(Operand->getType() == IceType_i64); 1083 assert(Operand->getType() == IceType_i64);
1086 if (Operand->getType() != IceType_i64) 1084 if (Operand->getType() != IceType_i64)
1087 return Operand; 1085 return Operand;
1088 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1086 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1089 split64(Var); 1087 split64(Var);
1090 return Var->getHi(); 1088 return Var->getHi();
1091 } 1089 }
1092 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1090 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1093 return Ctx->getConstantInt32( 1091 return Ctx->getConstantInt32(
1094 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32)); 1092 static_cast<uint32_t>(Const->getValue() >> 32));
1095 } 1093 }
1096 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1094 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
1097 Constant *Offset = Mem->getOffset(); 1095 Constant *Offset = Mem->getOffset();
1098 if (Offset == NULL) 1096 if (Offset == NULL) {
1099 Offset = Ctx->getConstantInt32(IceType_i32, 4); 1097 Offset = Ctx->getConstantInt32(4);
1100 else if (ConstantInteger32 *IntOffset = 1098 } else if (ConstantInteger32 *IntOffset =
1101 llvm::dyn_cast<ConstantInteger32>(Offset)) { 1099 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1102 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue()); 1100 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
1103 } else if (ConstantRelocatable *SymOffset = 1101 } else if (ConstantRelocatable *SymOffset =
1104 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1102 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1105 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); 1103 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1106 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), 1104 Offset =
1107 SymOffset->getName()); 1105 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1106 SymOffset->getSuppressMangling());
1108 } 1107 }
1109 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, 1108 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
1110 Mem->getIndex(), Mem->getShift(), 1109 Mem->getIndex(), Mem->getShift(),
1111 Mem->getSegmentRegister()); 1110 Mem->getSegmentRegister());
1112 } 1111 }
1113 llvm_unreachable("Unsupported operand type"); 1112 llvm_unreachable("Unsupported operand type");
1114 return NULL; 1113 return NULL;
1115 } 1114 }
1116 1115
1117 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, 1116 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
1161 // For default align=0, set it to the real value 1, to avoid any 1160 // For default align=0, set it to the real value 1, to avoid any
1162 // bit-manipulation problems below. 1161 // bit-manipulation problems below.
1163 AlignmentParam = std::max(AlignmentParam, 1u); 1162 AlignmentParam = std::max(AlignmentParam, 1u);
1164 1163
1165 // LLVM enforces power of 2 alignment. 1164 // LLVM enforces power of 2 alignment.
1166 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1165 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1167 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1166 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
1168 1167
1169 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1168 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
1170 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1169 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
1171 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment)); 1170 _and(esp, Ctx->getConstantInt32(-Alignment));
1172 } 1171 }
1173 if (ConstantInteger32 *ConstantTotalSize = 1172 if (ConstantInteger32 *ConstantTotalSize =
1174 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1173 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1175 uint32_t Value = ConstantTotalSize->getValue(); 1174 uint32_t Value = ConstantTotalSize->getValue();
1176 Value = applyAlignment(Value, Alignment); 1175 Value = applyAlignment(Value, Alignment);
1177 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value)); 1176 _sub(esp, Ctx->getConstantInt32(Value));
1178 } else { 1177 } else {
1179 // Non-constant sizes need to be adjusted to the next highest 1178 // Non-constant sizes need to be adjusted to the next highest
1180 // multiple of the required alignment at runtime. 1179 // multiple of the required alignment at runtime.
1181 Variable *T = makeReg(IceType_i32); 1180 Variable *T = makeReg(IceType_i32);
1182 _mov(T, TotalSize); 1181 _mov(T, TotalSize);
1183 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1)); 1182 _add(T, Ctx->getConstantInt32(Alignment - 1));
1184 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment)); 1183 _and(T, Ctx->getConstantInt32(-Alignment));
1185 _sub(esp, T); 1184 _sub(esp, T);
1186 } 1185 }
1187 _mov(Dest, esp); 1186 _mov(Dest, esp);
1188 } 1187 }
1189 1188
1190 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1189 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
1191 Variable *Dest = Inst->getDest(); 1190 Variable *Dest = Inst->getDest();
1192 Operand *Src0 = legalize(Inst->getSrc(0)); 1191 Operand *Src0 = legalize(Inst->getSrc(0));
1193 Operand *Src1 = legalize(Inst->getSrc(1)); 1192 Operand *Src1 = legalize(Inst->getSrc(1));
1194 if (Dest->getType() == IceType_i64) { 1193 if (Dest->getType() == IceType_i64) {
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1284 // t2 = shl t2, t1 1283 // t2 = shl t2, t1
1285 // test t1, 0x20 1284 // test t1, 0x20
1286 // je L1 1285 // je L1
1287 // use(t3) 1286 // use(t3)
1288 // t3 = t2 1287 // t3 = t2
1289 // t2 = 0 1288 // t2 = 0
1290 // L1: 1289 // L1:
1291 // a.lo = t2 1290 // a.lo = t2
1292 // a.hi = t3 1291 // a.hi = t3
1293 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1292 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1294 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1293 Constant *BitTest = Ctx->getConstantInt32(0x20);
1295 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1294 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1296 InstX8632Label *Label = InstX8632Label::create(Func, this); 1295 InstX8632Label *Label = InstX8632Label::create(Func, this);
1297 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1296 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1298 _mov(T_2, Src0Lo); 1297 _mov(T_2, Src0Lo);
1299 _mov(T_3, Src0Hi); 1298 _mov(T_3, Src0Hi);
1300 _shld(T_3, T_2, T_1); 1299 _shld(T_3, T_2, T_1);
1301 _shl(T_2, T_1); 1300 _shl(T_2, T_1);
1302 _test(T_1, BitTest); 1301 _test(T_1, BitTest);
1303 _br(CondX86::Br_e, Label); 1302 _br(CondX86::Br_e, Label);
1304 // T_2 and T_3 are being assigned again because of the 1303 // T_2 and T_3 are being assigned again because of the
(...skipping 14 matching lines...) Expand all
1319 // t3 = shr t3, t1 1318 // t3 = shr t3, t1
1320 // test t1, 0x20 1319 // test t1, 0x20
1321 // je L1 1320 // je L1
1322 // use(t2) 1321 // use(t2)
1323 // t2 = t3 1322 // t2 = t3
1324 // t3 = 0 1323 // t3 = 0
1325 // L1: 1324 // L1:
1326 // a.lo = t2 1325 // a.lo = t2
1327 // a.hi = t3 1326 // a.hi = t3
1328 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1327 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1329 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1328 Constant *BitTest = Ctx->getConstantInt32(0x20);
1330 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1329 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1331 InstX8632Label *Label = InstX8632Label::create(Func, this); 1330 InstX8632Label *Label = InstX8632Label::create(Func, this);
1332 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1331 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1333 _mov(T_2, Src0Lo); 1332 _mov(T_2, Src0Lo);
1334 _mov(T_3, Src0Hi); 1333 _mov(T_3, Src0Hi);
1335 _shrd(T_2, T_3, T_1); 1334 _shrd(T_2, T_3, T_1);
1336 _shr(T_3, T_1); 1335 _shr(T_3, T_1);
1337 _test(T_1, BitTest); 1336 _test(T_1, BitTest);
1338 _br(CondX86::Br_e, Label); 1337 _br(CondX86::Br_e, Label);
1339 // T_2 and T_3 are being assigned again because of the 1338 // T_2 and T_3 are being assigned again because of the
(...skipping 14 matching lines...) Expand all
1354 // t3 = sar t3, t1 1353 // t3 = sar t3, t1
1355 // test t1, 0x20 1354 // test t1, 0x20
1356 // je L1 1355 // je L1
1357 // use(t2) 1356 // use(t2)
1358 // t2 = t3 1357 // t2 = t3
1359 // t3 = sar t3, 0x1f 1358 // t3 = sar t3, 0x1f
1360 // L1: 1359 // L1:
1361 // a.lo = t2 1360 // a.lo = t2
1362 // a.hi = t3 1361 // a.hi = t3
1363 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1362 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1364 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1363 Constant *BitTest = Ctx->getConstantInt32(0x20);
1365 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); 1364 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1366 InstX8632Label *Label = InstX8632Label::create(Func, this); 1365 InstX8632Label *Label = InstX8632Label::create(Func, this);
1367 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); 1366 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1368 _mov(T_2, Src0Lo); 1367 _mov(T_2, Src0Lo);
1369 _mov(T_3, Src0Hi); 1368 _mov(T_3, Src0Hi);
1370 _shrd(T_2, T_3, T_1); 1369 _shrd(T_2, T_3, T_1);
1371 _sar(T_3, T_1); 1370 _sar(T_3, T_1);
1372 _test(T_1, BitTest); 1371 _test(T_1, BitTest);
1373 _br(CondX86::Br_e, Label); 1372 _br(CondX86::Br_e, Label);
1374 // T_2 and T_3 are being assigned again because of the 1373 // T_2 and T_3 are being assigned again because of the
1375 // intra-block control flow, so T_2 needs the _mov_nonkillable 1374 // intra-block control flow, so T_2 needs the _mov_nonkillable
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
1476 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1475 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1477 // pmuludq T2, T3 1476 // pmuludq T2, T3
1478 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1477 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1479 // shufps T1, T2, {0,2,0,2} 1478 // shufps T1, T2, {0,2,0,2}
1480 // pshufd T4, T1, {0,2,1,3} 1479 // pshufd T4, T1, {0,2,1,3}
1481 // movups Dest, T4 1480 // movups Dest, T4
1482 1481
1483 // Mask that directs pshufd to create a vector with entries 1482 // Mask that directs pshufd to create a vector with entries
1484 // Src[1, 0, 3, 0] 1483 // Src[1, 0, 3, 0]
1485 const unsigned Constant1030 = 0x31; 1484 const unsigned Constant1030 = 0x31;
1486 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030); 1485 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1487 // Mask that directs shufps to create a vector with entries 1486 // Mask that directs shufps to create a vector with entries
1488 // Dest[0, 2], Src[0, 2] 1487 // Dest[0, 2], Src[0, 2]
1489 const unsigned Mask0202 = 0x88; 1488 const unsigned Mask0202 = 0x88;
1490 // Mask that directs pshufd to create a vector with entries 1489 // Mask that directs pshufd to create a vector with entries
1491 // Src[0, 2, 1, 3] 1490 // Src[0, 2, 1, 3]
1492 const unsigned Mask0213 = 0xd8; 1491 const unsigned Mask0213 = 0xd8;
1493 Variable *T1 = makeReg(IceType_v4i32); 1492 Variable *T1 = makeReg(IceType_v4i32);
1494 Variable *T2 = makeReg(IceType_v4i32); 1493 Variable *T2 = makeReg(IceType_v4i32);
1495 Variable *T3 = makeReg(IceType_v4i32); 1494 Variable *T3 = makeReg(IceType_v4i32);
1496 Variable *T4 = makeReg(IceType_v4i32); 1495 Variable *T4 = makeReg(IceType_v4i32);
1497 _movp(T1, Src0); 1496 _movp(T1, Src0);
1498 _pshufd(T2, Src0, Mask1030); 1497 _pshufd(T2, Src0, Mask1030);
1499 _pshufd(T3, Src1, Mask1030); 1498 _pshufd(T3, Src1, Mask1030);
1500 _pmuludq(T1, Src1); 1499 _pmuludq(T1, Src1);
1501 _pmuludq(T2, T3); 1500 _pmuludq(T2, T3);
1502 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202)); 1501 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
1503 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213)); 1502 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
1504 _movp(Dest, T4); 1503 _movp(Dest, T4);
1505 } else { 1504 } else {
1506 assert(Dest->getType() == IceType_v16i8); 1505 assert(Dest->getType() == IceType_v16i8);
1507 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1506 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1508 } 1507 }
1509 } break; 1508 } break;
1510 case InstArithmetic::Shl: 1509 case InstArithmetic::Shl:
1511 case InstArithmetic::Lshr: 1510 case InstArithmetic::Lshr:
1512 case InstArithmetic::Ashr: 1511 case InstArithmetic::Ashr:
1513 case InstArithmetic::Udiv: 1512 case InstArithmetic::Udiv:
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
1788 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 1787 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
1789 assert(typeWidthInBytes(Ty) >= 4); 1788 assert(typeWidthInBytes(Ty) >= 4);
1790 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1789 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1791 XmmArgs.push_back(Arg); 1790 XmmArgs.push_back(Arg);
1792 } else { 1791 } else {
1793 StackArgs.push_back(Arg); 1792 StackArgs.push_back(Arg);
1794 if (isVectorType(Arg->getType())) { 1793 if (isVectorType(Arg->getType())) {
1795 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1794 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1796 } 1795 }
1797 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 1796 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1798 Constant *Loc = 1797 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
1799 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
1800 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 1798 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1801 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1799 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1802 } 1800 }
1803 } 1801 }
1804 1802
1805 // Adjust the parameter area so that the stack is aligned. It is 1803 // Adjust the parameter area so that the stack is aligned. It is
1806 // assumed that the stack is already aligned at the start of the 1804 // assumed that the stack is already aligned at the start of the
1807 // calling sequence. 1805 // calling sequence.
1808 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1806 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1809 1807
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
1881 Operand *CallTarget = legalize(Instr->getCallTarget()); 1879 Operand *CallTarget = legalize(Instr->getCallTarget());
1882 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1880 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1883 Context.insert(NewCall); 1881 Context.insert(NewCall);
1884 if (ReturnRegHi) 1882 if (ReturnRegHi)
1885 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1883 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1886 1884
1887 // Add the appropriate offset to esp. The call instruction takes care 1885 // Add the appropriate offset to esp. The call instruction takes care
1888 // of resetting the stack offset during emission. 1886 // of resetting the stack offset during emission.
1889 if (ParameterAreaSizeBytes) { 1887 if (ParameterAreaSizeBytes) {
1890 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); 1888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1891 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); 1889 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
1892 } 1890 }
1893 1891
1894 // Insert a register-kill pseudo instruction. 1892 // Insert a register-kill pseudo instruction.
1895 Context.insert(InstFakeKill::create(Func, NewCall)); 1893 Context.insert(InstFakeKill::create(Func, NewCall));
1896 1894
1897 // Generate a FakeUse to keep the call live if necessary. 1895 // Generate a FakeUse to keep the call live if necessary.
1898 if (Instr->hasSideEffects() && ReturnReg) { 1896 if (Instr->hasSideEffects() && ReturnReg) {
1899 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 1897 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
1900 Context.insert(FakeUse); 1898 Context.insert(FakeUse);
1901 } 1899 }
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
1958 Variable *T = makeReg(DestTy); 1956 Variable *T = makeReg(DestTy);
1959 _movp(T, Src0RM); 1957 _movp(T, Src0RM);
1960 _pand(T, OneMask); 1958 _pand(T, OneMask);
1961 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1959 Variable *Zeros = makeVectorOfZeros(Dest->getType());
1962 _pcmpgt(T, Zeros); 1960 _pcmpgt(T, Zeros);
1963 _movp(Dest, T); 1961 _movp(Dest, T);
1964 } else { 1962 } else {
1965 // width = width(elty) - 1; dest = (src << width) >> width 1963 // width = width(elty) - 1; dest = (src << width) >> width
1966 SizeT ShiftAmount = 1964 SizeT ShiftAmount =
1967 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1; 1965 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
1968 Constant *ShiftConstant = 1966 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
1969 Ctx->getConstantInt32(IceType_i8, ShiftAmount);
1970 Variable *T = makeReg(DestTy); 1967 Variable *T = makeReg(DestTy);
1971 _movp(T, Src0RM); 1968 _movp(T, Src0RM);
1972 _psll(T, ShiftConstant); 1969 _psll(T, ShiftConstant);
1973 _psra(T, ShiftConstant); 1970 _psra(T, ShiftConstant);
1974 _movp(Dest, T); 1971 _movp(Dest, T);
1975 } 1972 }
1976 } else if (Dest->getType() == IceType_i64) { 1973 } else if (Dest->getType() == IceType_i64) {
1977 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 1974 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1978 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31); 1975 Constant *Shift = Ctx->getConstantInt32(31);
1979 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1980 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1981 Variable *T_Lo = makeReg(DestLo->getType()); 1978 Variable *T_Lo = makeReg(DestLo->getType());
1982 if (Src0RM->getType() == IceType_i32) { 1979 if (Src0RM->getType() == IceType_i32) {
1983 _mov(T_Lo, Src0RM); 1980 _mov(T_Lo, Src0RM);
1984 } else if (Src0RM->getType() == IceType_i1) { 1981 } else if (Src0RM->getType() == IceType_i1) {
1985 _movzx(T_Lo, Src0RM); 1982 _movzx(T_Lo, Src0RM);
1986 _shl(T_Lo, Shift); 1983 _shl(T_Lo, Shift);
1987 _sar(T_Lo, Shift); 1984 _sar(T_Lo, Shift);
1988 } else { 1985 } else {
1989 _movsx(T_Lo, Src0RM); 1986 _movsx(T_Lo, Src0RM);
1990 } 1987 }
1991 _mov(DestLo, T_Lo); 1988 _mov(DestLo, T_Lo);
1992 Variable *T_Hi = NULL; 1989 Variable *T_Hi = NULL;
1993 _mov(T_Hi, T_Lo); 1990 _mov(T_Hi, T_Lo);
1994 if (Src0RM->getType() != IceType_i1) 1991 if (Src0RM->getType() != IceType_i1)
1995 // For i1, the sar instruction is already done above. 1992 // For i1, the sar instruction is already done above.
1996 _sar(T_Hi, Shift); 1993 _sar(T_Hi, Shift);
1997 _mov(DestHi, T_Hi); 1994 _mov(DestHi, T_Hi);
1998 } else if (Src0RM->getType() == IceType_i1) { 1995 } else if (Src0RM->getType() == IceType_i1) {
1999 // t1 = src 1996 // t1 = src
2000 // shl t1, dst_bitwidth - 1 1997 // shl t1, dst_bitwidth - 1
2001 // sar t1, dst_bitwidth - 1 1998 // sar t1, dst_bitwidth - 1
2002 // dst = t1 1999 // dst = t1
2003 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 2000 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
2004 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1); 2001 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
2005 Variable *T = makeReg(Dest->getType()); 2002 Variable *T = makeReg(Dest->getType());
2006 if (typeWidthInBytes(Dest->getType()) <= 2003 if (typeWidthInBytes(Dest->getType()) <=
2007 typeWidthInBytes(Src0RM->getType())) { 2004 typeWidthInBytes(Src0RM->getType())) {
2008 _mov(T, Src0RM); 2005 _mov(T, Src0RM);
2009 } else { 2006 } else {
2010 // Widen the source using movsx or movzx. (It doesn't matter 2007 // Widen the source using movsx or movzx. (It doesn't matter
2011 // which one, since the following shl/sar overwrite the bits.) 2008 // which one, since the following shl/sar overwrite the bits.)
2012 _movzx(T, Src0RM); 2009 _movzx(T, Src0RM);
2013 } 2010 }
2014 _shl(T, ShiftAmount); 2011 _shl(T, ShiftAmount);
(...skipping 22 matching lines...) Expand all
2037 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2034 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2038 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2035 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2039 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2036 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2040 Variable *Tmp = makeReg(DestLo->getType()); 2037 Variable *Tmp = makeReg(DestLo->getType());
2041 if (Src0RM->getType() == IceType_i32) { 2038 if (Src0RM->getType() == IceType_i32) {
2042 _mov(Tmp, Src0RM); 2039 _mov(Tmp, Src0RM);
2043 } else { 2040 } else {
2044 _movzx(Tmp, Src0RM); 2041 _movzx(Tmp, Src0RM);
2045 } 2042 }
2046 if (Src0RM->getType() == IceType_i1) { 2043 if (Src0RM->getType() == IceType_i1) {
2047 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2044 Constant *One = Ctx->getConstantInt32(1);
2048 _and(Tmp, One); 2045 _and(Tmp, One);
2049 } 2046 }
2050 _mov(DestLo, Tmp); 2047 _mov(DestLo, Tmp);
2051 _mov(DestHi, Zero); 2048 _mov(DestHi, Zero);
2052 } else if (Src0RM->getType() == IceType_i1) { 2049 } else if (Src0RM->getType() == IceType_i1) {
2053 // t = Src0RM; t &= 1; Dest = t 2050 // t = Src0RM; t &= 1; Dest = t
2054 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2051 Constant *One = Ctx->getConstantInt32(1);
2055 Type DestTy = Dest->getType(); 2052 Type DestTy = Dest->getType();
2056 Variable *T; 2053 Variable *T;
2057 if (DestTy == IceType_i8) { 2054 if (DestTy == IceType_i8) {
2058 T = makeReg(DestTy); 2055 T = makeReg(DestTy);
2059 _mov(T, Src0RM); 2056 _mov(T, Src0RM);
2060 } else { 2057 } else {
2061 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter. 2058 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
2062 T = makeReg(IceType_i32); 2059 T = makeReg(IceType_i32);
2063 _movzx(T, Src0RM); 2060 _movzx(T, Src0RM);
2064 } 2061 }
(...skipping 19 matching lines...) Expand all
2084 _movp(Dest, T); 2081 _movp(Dest, T);
2085 } else { 2082 } else {
2086 Operand *Src0 = Inst->getSrc(0); 2083 Operand *Src0 = Inst->getSrc(0);
2087 if (Src0->getType() == IceType_i64) 2084 if (Src0->getType() == IceType_i64)
2088 Src0 = loOperand(Src0); 2085 Src0 = loOperand(Src0);
2089 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2086 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2090 // t1 = trunc Src0RM; Dest = t1 2087 // t1 = trunc Src0RM; Dest = t1
2091 Variable *T = NULL; 2088 Variable *T = NULL;
2092 _mov(T, Src0RM); 2089 _mov(T, Src0RM);
2093 if (Dest->getType() == IceType_i1) 2090 if (Dest->getType() == IceType_i1)
2094 _and(T, Ctx->getConstantInt32(IceType_i1, 1)); 2091 _and(T, Ctx->getConstantInt1(1));
2095 _mov(Dest, T); 2092 _mov(Dest, T);
2096 } 2093 }
2097 break; 2094 break;
2098 } 2095 }
2099 case InstCast::Fptrunc: 2096 case InstCast::Fptrunc:
2100 case InstCast::Fpext: { 2097 case InstCast::Fpext: {
2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2102 // t1 = cvt Src0RM; Dest = t1 2099 // t1 = cvt Src0RM; Dest = t1
2103 Variable *T = makeReg(Dest->getType()); 2100 Variable *T = makeReg(Dest->getType());
2104 _cvt(T, Src0RM, InstX8632Cvt::Float2float); 2101 _cvt(T, Src0RM, InstX8632Cvt::Float2float);
(...skipping 25 matching lines...) Expand all
2130 Call->addArg(Inst->getSrc(0)); 2127 Call->addArg(Inst->getSrc(0));
2131 lowerCall(Call); 2128 lowerCall(Call);
2132 } else { 2129 } else {
2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2130 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2134 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2131 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2135 Variable *T_1 = makeReg(IceType_i32); 2132 Variable *T_1 = makeReg(IceType_i32);
2136 Variable *T_2 = makeReg(Dest->getType()); 2133 Variable *T_2 = makeReg(Dest->getType());
2137 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2134 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2135 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2139 if (Dest->getType() == IceType_i1) 2136 if (Dest->getType() == IceType_i1)
2140 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); 2137 _and(T_2, Ctx->getConstantInt1(1));
2141 _mov(Dest, T_2); 2138 _mov(Dest, T_2);
2142 } 2139 }
2143 break; 2140 break;
2144 case InstCast::Fptoui: 2141 case InstCast::Fptoui:
2145 if (isVectorType(Dest->getType())) { 2142 if (isVectorType(Dest->getType())) {
2146 assert(Dest->getType() == IceType_v4i32 && 2143 assert(Dest->getType() == IceType_v4i32 &&
2147 Inst->getSrc(0)->getType() == IceType_v4f32); 2144 Inst->getSrc(0)->getType() == IceType_v4f32);
2148 const SizeT MaxSrcs = 1; 2145 const SizeT MaxSrcs = 1;
2149 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs); 2146 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);
2150 Call->addArg(Inst->getSrc(0)); 2147 Call->addArg(Inst->getSrc(0));
(...skipping 15 matching lines...) Expand all
2166 lowerCall(Call); 2163 lowerCall(Call);
2167 return; 2164 return;
2168 } else { 2165 } else {
2169 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2166 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2170 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2167 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2171 Variable *T_1 = makeReg(IceType_i32); 2168 Variable *T_1 = makeReg(IceType_i32);
2172 Variable *T_2 = makeReg(Dest->getType()); 2169 Variable *T_2 = makeReg(Dest->getType());
2173 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2170 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
2174 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2171 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2175 if (Dest->getType() == IceType_i1) 2172 if (Dest->getType() == IceType_i1)
2176 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1)); 2173 _and(T_2, Ctx->getConstantInt1(1));
2177 _mov(Dest, T_2); 2174 _mov(Dest, T_2);
2178 } 2175 }
2179 break; 2176 break;
2180 case InstCast::Sitofp: 2177 case InstCast::Sitofp:
2181 if (isVectorType(Dest->getType())) { 2178 if (isVectorType(Dest->getType())) {
2182 assert(Dest->getType() == IceType_v4f32 && 2179 assert(Dest->getType() == IceType_v4f32 &&
2183 Inst->getSrc(0)->getType() == IceType_v4i32); 2180 Inst->getSrc(0)->getType() == IceType_v4i32);
2184 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2181 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2185 Variable *T = makeReg(Dest->getType()); 2182 Variable *T = makeReg(Dest->getType());
2186 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); 2183 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after
2395 Type Ty = SourceVectNotLegalized->getType(); 2392 Type Ty = SourceVectNotLegalized->getType();
2396 Type ElementTy = typeElementType(Ty); 2393 Type ElementTy = typeElementType(Ty);
2397 Type InVectorElementTy = getInVectorElementType(Ty); 2394 Type InVectorElementTy = getInVectorElementType(Ty);
2398 Variable *ExtractedElementR = makeReg(InVectorElementTy); 2395 Variable *ExtractedElementR = makeReg(InVectorElementTy);
2399 2396
2400 // TODO(wala): Determine the best lowering sequences for each type. 2397 // TODO(wala): Determine the best lowering sequences for each type.
2401 bool CanUsePextr = 2398 bool CanUsePextr =
2402 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2399 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2403 if (CanUsePextr && Ty != IceType_v4f32) { 2400 if (CanUsePextr && Ty != IceType_v4f32) {
2404 // Use pextrb, pextrw, or pextrd. 2401 // Use pextrb, pextrw, or pextrd.
2405 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); 2402 Constant *Mask = Ctx->getConstantInt32(Index);
2406 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2403 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2407 _pextr(ExtractedElementR, SourceVectR, Mask); 2404 _pextr(ExtractedElementR, SourceVectR, Mask);
2408 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2405 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2409 // Use pshufd and movd/movss. 2406 // Use pshufd and movd/movss.
2410 Variable *T = NULL; 2407 Variable *T = NULL;
2411 if (Index) { 2408 if (Index) {
2412 // The shuffle only needs to occur if the element to be extracted 2409 // The shuffle only needs to occur if the element to be extracted
2413 // is not at the lowest index. 2410 // is not at the lowest index.
2414 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index); 2411 Constant *Mask = Ctx->getConstantInt32(Index);
2415 T = makeReg(Ty); 2412 T = makeReg(Ty);
2416 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); 2413 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2417 } else { 2414 } else {
2418 T = legalizeToVar(SourceVectNotLegalized); 2415 T = legalizeToVar(SourceVectNotLegalized);
2419 } 2416 }
2420 2417
2421 if (InVectorElementTy == IceType_i32) { 2418 if (InVectorElementTy == IceType_i32) {
2422 _movd(ExtractedElementR, T); 2419 _movd(ExtractedElementR, T);
2423 } else { // Ty == IceType_f32 2420 } else { // Ty == IceType_f32
2424 // TODO(wala): _movss is only used here because _mov does not 2421 // TODO(wala): _movss is only used here because _mov does not
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
2542 } 2539 }
2543 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); 2540 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2544 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); 2541 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
2545 if (HasC1) { 2542 if (HasC1) {
2546 Src0 = legalize(Src0); 2543 Src0 = legalize(Src0);
2547 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2544 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2548 Variable *T = NULL; 2545 Variable *T = NULL;
2549 _mov(T, Src0); 2546 _mov(T, Src0);
2550 _ucomiss(T, Src1RM); 2547 _ucomiss(T, Src1RM);
2551 } 2548 }
2552 Constant *Default = 2549 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
2553 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
2554 _mov(Dest, Default); 2550 _mov(Dest, Default);
2555 if (HasC1) { 2551 if (HasC1) {
2556 InstX8632Label *Label = InstX8632Label::create(Func, this); 2552 InstX8632Label *Label = InstX8632Label::create(Func, this);
2557 _br(TableFcmp[Index].C1, Label); 2553 _br(TableFcmp[Index].C1, Label);
2558 if (HasC2) { 2554 if (HasC2) {
2559 _br(TableFcmp[Index].C2, Label); 2555 _br(TableFcmp[Index].C2, Label);
2560 } 2556 }
2561 Constant *NonDefault = 2557 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
2562 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);
2563 _mov_nonkillable(Dest, NonDefault); 2558 _mov_nonkillable(Dest, NonDefault);
2564 Context.insert(Label); 2559 Context.insert(Label);
2565 } 2560 }
2566 } 2561 }
2567 2562
2568 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { 2563 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
2569 Operand *Src0 = legalize(Inst->getSrc(0)); 2564 Operand *Src0 = legalize(Inst->getSrc(0));
2570 Operand *Src1 = legalize(Inst->getSrc(1)); 2565 Operand *Src1 = legalize(Inst->getSrc(1));
2571 Variable *Dest = Inst->getDest(); 2566 Variable *Dest = Inst->getDest();
2572 2567
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
2693 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), 2688 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),
2694 NextBr->getTargetFalse()); 2689 NextBr->getTargetFalse());
2695 // Skip over the following branch instruction. 2690 // Skip over the following branch instruction.
2696 Context.advanceNext(); 2691 Context.advanceNext();
2697 return; 2692 return;
2698 } 2693 }
2699 } 2694 }
2700 2695
2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 2696 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
2702 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2697 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2703 Constant *One = Ctx->getConstantInt32(IceType_i32, 1); 2698 Constant *One = Ctx->getConstantInt32(1);
2704 if (Src0->getType() == IceType_i64) { 2699 if (Src0->getType() == IceType_i64) {
2705 InstIcmp::ICond Condition = Inst->getCondition(); 2700 InstIcmp::ICond Condition = Inst->getCondition();
2706 size_t Index = static_cast<size_t>(Condition); 2701 size_t Index = static_cast<size_t>(Condition);
2707 assert(Index < TableIcmp64Size); 2702 assert(Index < TableIcmp64Size);
2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2703 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2704 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2705 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2706 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2712 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { 2707 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2713 InstX8632Label *Label = InstX8632Label::create(Func, this); 2708 InstX8632Label *Label = InstX8632Label::create(Func, this);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
2771 2766
2772 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) { 2767 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
2773 // Use insertps, pinsrb, pinsrw, or pinsrd. 2768 // Use insertps, pinsrb, pinsrw, or pinsrd.
2774 Operand *ElementRM = 2769 Operand *ElementRM =
2775 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem); 2770 legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
2776 Operand *SourceVectRM = 2771 Operand *SourceVectRM =
2777 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2772 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2778 Variable *T = makeReg(Ty); 2773 Variable *T = makeReg(Ty);
2779 _movp(T, SourceVectRM); 2774 _movp(T, SourceVectRM);
2780 if (Ty == IceType_v4f32) 2775 if (Ty == IceType_v4f32)
2781 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4)); 2776 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
2782 else 2777 else
2783 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index)); 2778 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
2784 _movp(Inst->getDest(), T); 2779 _movp(Inst->getDest(), T);
2785 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2780 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2786 // Use shufps or movss. 2781 // Use shufps or movss.
2787 Variable *ElementR = NULL; 2782 Variable *ElementR = NULL;
2788 Operand *SourceVectRM = 2783 Operand *SourceVectRM =
2789 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2784 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2790 2785
2791 if (InVectorElementTy == IceType_f32) { 2786 if (InVectorElementTy == IceType_f32) {
2792 // ElementR will be in an XMM register since it is floating point. 2787 // ElementR will be in an XMM register since it is floating point.
2793 ElementR = legalizeToVar(ElementToInsertNotLegalized); 2788 ElementR = legalizeToVar(ElementToInsertNotLegalized);
(...skipping 30 matching lines...) Expand all
2824 // ElementR := ElementR[0, 0] T[0, 3] 2819 // ElementR := ElementR[0, 0] T[0, 3]
2825 // T := T[0, 1] ElementR[0, 3] 2820 // T := T[0, 1] ElementR[0, 3]
2826 // 2821 //
2827 // insertelement into index 3 (result is stored in T): 2822 // insertelement into index 3 (result is stored in T):
2828 // T := SourceVectRM 2823 // T := SourceVectRM
2829 // ElementR := ElementR[0, 0] T[0, 2] 2824 // ElementR := ElementR[0, 0] T[0, 2]
2830 // T := T[0, 1] ElementR[3, 0] 2825 // T := T[0, 1] ElementR[3, 0]
2831 const unsigned char Mask1[3] = { 0, 192, 128 }; 2826 const unsigned char Mask1[3] = { 0, 192, 128 };
2832 const unsigned char Mask2[3] = { 227, 196, 52 }; 2827 const unsigned char Mask2[3] = { 227, 196, 52 };
2833 2828
2834 Constant *Mask1Constant = 2829 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
2835 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]); 2830 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
2836 Constant *Mask2Constant =
2837 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);
2838 2831
2839 if (Index == 1) { 2832 if (Index == 1) {
2840 _shufps(ElementR, SourceVectRM, Mask1Constant); 2833 _shufps(ElementR, SourceVectRM, Mask1Constant);
2841 _shufps(ElementR, SourceVectRM, Mask2Constant); 2834 _shufps(ElementR, SourceVectRM, Mask2Constant);
2842 _movp(Inst->getDest(), ElementR); 2835 _movp(Inst->getDest(), ElementR);
2843 } else { 2836 } else {
2844 Variable *T = makeReg(Ty); 2837 Variable *T = makeReg(Ty);
2845 _movp(T, SourceVectRM); 2838 _movp(T, SourceVectRM);
2846 _shufps(ElementR, T, Mask1Constant); 2839 _shufps(ElementR, T, Mask1Constant);
2847 _shufps(T, ElementR, Mask2Constant); 2840 _shufps(T, ElementR, Mask2Constant);
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
2919 // Some x86-64 processors support the cmpxchg16b intruction, which 2912 // Some x86-64 processors support the cmpxchg16b intruction, which
2920 // can make 16-byte operations lock free (when used with the LOCK 2913 // can make 16-byte operations lock free (when used with the LOCK
2921 // prefix). However, that's not supported in 32-bit mode, so just 2914 // prefix). However, that's not supported in 32-bit mode, so just
2922 // return 0 even for large sizes. 2915 // return 0 even for large sizes.
2923 Result = Ctx->getConstantZero(IceType_i32); 2916 Result = Ctx->getConstantZero(IceType_i32);
2924 break; 2917 break;
2925 case 1: 2918 case 1:
2926 case 2: 2919 case 2:
2927 case 4: 2920 case 4:
2928 case 8: 2921 case 8:
2929 Result = Ctx->getConstantInt32(IceType_i32, 1); 2922 Result = Ctx->getConstantInt32(1);
2930 break; 2923 break;
2931 } 2924 }
2932 _mov(Dest, Result); 2925 _mov(Dest, Result);
2933 return; 2926 return;
2934 } 2927 }
2935 // The PNaCl ABI requires the byte size to be a compile-time constant. 2928 // The PNaCl ABI requires the byte size to be a compile-time constant.
2936 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 2929 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2937 return; 2930 return;
2938 } 2931 }
2939 case Intrinsics::AtomicLoad: { 2932 case Intrinsics::AtomicLoad: {
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
3025 _bswap(T_Hi); 3018 _bswap(T_Hi);
3026 _mov(DestLo, T_Hi); 3019 _mov(DestLo, T_Hi);
3027 _mov(DestHi, T_Lo); 3020 _mov(DestHi, T_Lo);
3028 } else if (Val->getType() == IceType_i32) { 3021 } else if (Val->getType() == IceType_i32) {
3029 Variable *T = legalizeToVar(Val); 3022 Variable *T = legalizeToVar(Val);
3030 _bswap(T); 3023 _bswap(T);
3031 _mov(Dest, T); 3024 _mov(Dest, T);
3032 } else { 3025 } else {
3033 assert(Val->getType() == IceType_i16); 3026 assert(Val->getType() == IceType_i16);
3034 Val = legalize(Val); 3027 Val = legalize(Val);
3035 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8); 3028 Constant *Eight = Ctx->getConstantInt16(8);
3036 Variable *T = NULL; 3029 Variable *T = NULL;
3037 _mov(T, Val); 3030 _mov(T, Val);
3038 _rol(T, Eight); 3031 _rol(T, Eight);
3039 _mov(Dest, T); 3032 _mov(Dest, T);
3040 } 3033 }
3041 return; 3034 return;
3042 } 3035 }
3043 case Intrinsics::Ctpop: { 3036 case Intrinsics::Ctpop: {
3044 Variable *Dest = Instr->getDest(); 3037 Variable *Dest = Instr->getDest();
3045 Operand *Val = Instr->getArg(0); 3038 Operand *Val = Instr->getArg(0);
(...skipping 463 matching lines...) Expand 10 before | Expand all | Expand 10 after
3509 // bit position conversion, and the speculation is reversed. 3502 // bit position conversion, and the speculation is reversed.
3510 assert(Ty == IceType_i32 || Ty == IceType_i64); 3503 assert(Ty == IceType_i32 || Ty == IceType_i64);
3511 Variable *T = makeReg(IceType_i32); 3504 Variable *T = makeReg(IceType_i32);
3512 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg); 3505 Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
3513 if (Cttz) { 3506 if (Cttz) {
3514 _bsf(T, FirstValRM); 3507 _bsf(T, FirstValRM);
3515 } else { 3508 } else {
3516 _bsr(T, FirstValRM); 3509 _bsr(T, FirstValRM);
3517 } 3510 }
3518 Variable *T_Dest = makeReg(IceType_i32); 3511 Variable *T_Dest = makeReg(IceType_i32);
3519 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); 3512 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
3520 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); 3513 Constant *ThirtyOne = Ctx->getConstantInt32(31);
3521 if (Cttz) { 3514 if (Cttz) {
3522 _mov(T_Dest, ThirtyTwo); 3515 _mov(T_Dest, ThirtyTwo);
3523 } else { 3516 } else {
3524 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); 3517 Constant *SixtyThree = Ctx->getConstantInt32(63);
3525 _mov(T_Dest, SixtyThree); 3518 _mov(T_Dest, SixtyThree);
3526 } 3519 }
3527 _cmov(T_Dest, T, CondX86::Br_ne); 3520 _cmov(T_Dest, T, CondX86::Br_ne);
3528 if (!Cttz) { 3521 if (!Cttz) {
3529 _xor(T_Dest, ThirtyOne); 3522 _xor(T_Dest, ThirtyOne);
3530 } 3523 }
3531 if (Ty == IceType_i32) { 3524 if (Ty == IceType_i32) {
3532 _mov(Dest, T_Dest); 3525 _mov(Dest, T_Dest);
3533 return; 3526 return;
3534 } 3527 }
(...skipping 318 matching lines...) Expand 10 before | Expand all | Expand 10 after
3853 // Vanilla ICE load instructions should not use the segment registers, 3846 // Vanilla ICE load instructions should not use the segment registers,
3854 // and computeAddressOpt only works at the level of Variables and Constants, 3847 // and computeAddressOpt only works at the level of Variables and Constants,
3855 // not other OperandX8632Mem, so there should be no mention of segment 3848 // not other OperandX8632Mem, so there should be no mention of segment
3856 // registers there either. 3849 // registers there either.
3857 const OperandX8632Mem::SegmentRegisters SegmentReg = 3850 const OperandX8632Mem::SegmentRegisters SegmentReg =
3858 OperandX8632Mem::DefaultSegment; 3851 OperandX8632Mem::DefaultSegment;
3859 Variable *Base = llvm::dyn_cast<Variable>(Addr); 3852 Variable *Base = llvm::dyn_cast<Variable>(Addr);
3860 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 3853 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
3861 if (Base && Addr != Base) { 3854 if (Base && Addr != Base) {
3862 Inst->setDeleted(); 3855 Inst->setDeleted();
3863 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); 3856 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
3864 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 3857 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
3865 Shift, SegmentReg); 3858 Shift, SegmentReg);
3866 Context.insert(InstLoad::create(Func, Dest, Addr)); 3859 Context.insert(InstLoad::create(Func, Dest, Addr));
3867 } 3860 }
3868 } 3861 }
3869 3862
3870 void TargetX8632::randomlyInsertNop(float Probability) { 3863 void TargetX8632::randomlyInsertNop(float Probability) {
3871 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 3864 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
3872 if (RNG.getTrueWithProbability(Probability)) { 3865 if (RNG.getTrueWithProbability(Probability)) {
3873 _nop(RNG.next(X86_NUM_NOP_VARIANTS)); 3866 _nop(RNG.next(X86_NUM_NOP_VARIANTS));
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
3919 if (InstructionSet >= SSE4_1) { 3912 if (InstructionSet >= SSE4_1) {
3920 // TODO(wala): If the condition operand is a constant, use blendps 3913 // TODO(wala): If the condition operand is a constant, use blendps
3921 // or pblendw. 3914 // or pblendw.
3922 // 3915 //
3923 // Use blendvps or pblendvb to implement select. 3916 // Use blendvps or pblendvb to implement select.
3924 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3917 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3925 SrcTy == IceType_v4f32) { 3918 SrcTy == IceType_v4f32) {
3926 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3919 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3927 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); 3920 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
3928 _movp(xmm0, ConditionRM); 3921 _movp(xmm0, ConditionRM);
3929 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); 3922 _psll(xmm0, Ctx->getConstantInt8(31));
3930 _movp(T, SrcFRM); 3923 _movp(T, SrcFRM);
3931 _blendvps(T, SrcTRM, xmm0); 3924 _blendvps(T, SrcTRM, xmm0);
3932 _movp(Dest, T); 3925 _movp(Dest, T);
3933 } else { 3926 } else {
3934 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3927 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3935 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3928 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3936 : IceType_v16i8; 3929 : IceType_v16i8;
3937 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); 3930 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
3938 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3931 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3939 _movp(T, SrcFRM); 3932 _movp(T, SrcFRM);
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after
4031 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4024 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4032 // Vanilla ICE store instructions should not use the segment registers, 4025 // Vanilla ICE store instructions should not use the segment registers,
4033 // and computeAddressOpt only works at the level of Variables and Constants, 4026 // and computeAddressOpt only works at the level of Variables and Constants,
4034 // not other OperandX8632Mem, so there should be no mention of segment 4027 // not other OperandX8632Mem, so there should be no mention of segment
4035 // registers there either. 4028 // registers there either.
4036 const OperandX8632Mem::SegmentRegisters SegmentReg = 4029 const OperandX8632Mem::SegmentRegisters SegmentReg =
4037 OperandX8632Mem::DefaultSegment; 4030 OperandX8632Mem::DefaultSegment;
4038 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4031 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4039 if (Base && Addr != Base) { 4032 if (Base && Addr != Base) {
4040 Inst->setDeleted(); 4033 Inst->setDeleted();
4041 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset); 4034 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4042 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4035 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4043 Shift, SegmentReg); 4036 Shift, SegmentReg);
4044 Context.insert(InstStore::create(Func, Data, Addr)); 4037 Context.insert(InstStore::create(Func, Data, Addr));
4045 } 4038 }
4046 } 4039 }
4047 4040
4048 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { 4041 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
4049 // This implements the most naive possible lowering. 4042 // This implements the most naive possible lowering.
4050 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4043 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4051 Operand *Src0 = Inst->getComparison(); 4044 Operand *Src0 = Inst->getComparison();
4052 SizeT NumCases = Inst->getNumCases(); 4045 SizeT NumCases = Inst->getNumCases();
4053 if (Src0->getType() == IceType_i64) { 4046 if (Src0->getType() == IceType_i64) {
4054 Src0 = legalize(Src0); // get Base/Index into physical registers 4047 Src0 = legalize(Src0); // get Base/Index into physical registers
4055 Operand *Src0Lo = loOperand(Src0); 4048 Operand *Src0Lo = loOperand(Src0);
4056 Operand *Src0Hi = hiOperand(Src0); 4049 Operand *Src0Hi = hiOperand(Src0);
4057 if (NumCases >= 2) { 4050 if (NumCases >= 2) {
4058 Src0Lo = legalizeToVar(Src0Lo); 4051 Src0Lo = legalizeToVar(Src0Lo);
4059 Src0Hi = legalizeToVar(Src0Hi); 4052 Src0Hi = legalizeToVar(Src0Hi);
4060 } else { 4053 } else {
4061 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4054 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4062 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4055 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4063 } 4056 }
4064 for (SizeT I = 0; I < NumCases; ++I) { 4057 for (SizeT I = 0; I < NumCases; ++I) {
4065 Constant *ValueLo = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); 4058 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4066 Constant *ValueHi = 4059 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4067 Ctx->getConstantInt32(IceType_i32, Inst->getValue(I) >> 32);
4068 InstX8632Label *Label = InstX8632Label::create(Func, this); 4060 InstX8632Label *Label = InstX8632Label::create(Func, this);
4069 _cmp(Src0Lo, ValueLo); 4061 _cmp(Src0Lo, ValueLo);
4070 _br(CondX86::Br_ne, Label); 4062 _br(CondX86::Br_ne, Label);
4071 _cmp(Src0Hi, ValueHi); 4063 _cmp(Src0Hi, ValueHi);
4072 _br(CondX86::Br_e, Inst->getLabel(I)); 4064 _br(CondX86::Br_e, Inst->getLabel(I));
4073 Context.insert(Label); 4065 Context.insert(Label);
4074 } 4066 }
4075 _br(Inst->getLabelDefault()); 4067 _br(Inst->getLabelDefault());
4076 return; 4068 return;
4077 } 4069 }
4078 // OK, we'll be slightly less naive by forcing Src into a physical 4070 // OK, we'll be slightly less naive by forcing Src into a physical
4079 // register if there are 2 or more uses. 4071 // register if there are 2 or more uses.
4080 if (NumCases >= 2) 4072 if (NumCases >= 2)
4081 Src0 = legalizeToVar(Src0); 4073 Src0 = legalizeToVar(Src0);
4082 else 4074 else
4083 Src0 = legalize(Src0, Legal_Reg | Legal_Mem); 4075 Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
4084 for (SizeT I = 0; I < NumCases; ++I) { 4076 for (SizeT I = 0; I < NumCases; ++I) {
4085 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); 4077 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
4086 _cmp(Src0, Value); 4078 _cmp(Src0, Value);
4087 _br(CondX86::Br_e, Inst->getLabel(I)); 4079 _br(CondX86::Br_e, Inst->getLabel(I));
4088 } 4080 }
4089 4081
4090 _br(Inst->getLabelDefault()); 4082 _br(Inst->getLabelDefault());
4091 } 4083 }
4092 4084
4093 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4085 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4094 Variable *Dest, Operand *Src0, 4086 Variable *Dest, Operand *Src0,
4095 Operand *Src1) { 4087 Operand *Src1) {
4096 assert(isVectorType(Dest->getType())); 4088 assert(isVectorType(Dest->getType()));
4097 Type Ty = Dest->getType(); 4089 Type Ty = Dest->getType();
4098 Type ElementTy = typeElementType(Ty); 4090 Type ElementTy = typeElementType(Ty);
4099 SizeT NumElements = typeNumElements(Ty); 4091 SizeT NumElements = typeNumElements(Ty);
4100 4092
4101 Operand *T = Ctx->getConstantUndef(Ty); 4093 Operand *T = Ctx->getConstantUndef(Ty);
4102 for (SizeT I = 0; I < NumElements; ++I) { 4094 for (SizeT I = 0; I < NumElements; ++I) {
4103 Constant *Index = Ctx->getConstantInt32(IceType_i32, I); 4095 Constant *Index = Ctx->getConstantInt32(I);
4104 4096
4105 // Extract the next two inputs. 4097 // Extract the next two inputs.
4106 Variable *Op0 = Func->makeVariable(ElementTy); 4098 Variable *Op0 = Func->makeVariable(ElementTy);
4107 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index)); 4099 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
4108 Variable *Op1 = Func->makeVariable(ElementTy); 4100 Variable *Op1 = Func->makeVariable(ElementTy);
4109 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4101 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4110 4102
4111 // Perform the arithmetic as a scalar operation. 4103 // Perform the arithmetic as a scalar operation.
4112 Variable *Res = Func->makeVariable(ElementTy); 4104 Variable *Res = Func->makeVariable(ElementTy);
4113 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4105 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
4341 _psub(Dest, MinusOne); 4333 _psub(Dest, MinusOne);
4342 return Dest; 4334 return Dest;
4343 } 4335 }
4344 4336
4345 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) { 4337 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
4346 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4338 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4347 Ty == IceType_v16i8); 4339 Ty == IceType_v16i8);
4348 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4340 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4349 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4341 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4350 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1; 4342 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
4351 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift)); 4343 _psll(Reg, Ctx->getConstantInt8(Shift));
4352 return Reg; 4344 return Reg;
4353 } else { 4345 } else {
4354 // SSE has no left shift operation for vectors of 8 bit integers. 4346 // SSE has no left shift operation for vectors of 8 bit integers.
4355 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4347 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4356 Constant *ConstantMask = 4348 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4357 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);
4358 Variable *Reg = makeReg(Ty, RegNum); 4349 Variable *Reg = makeReg(Ty, RegNum);
4359 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4350 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4360 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4351 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4361 return Reg; 4352 return Reg;
4362 } 4353 }
4363 } 4354 }
4364 4355
4365 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty, 4356 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
4366 Variable *Slot, 4357 Variable *Slot,
4367 uint32_t Offset) { 4358 uint32_t Offset) {
4368 // Ensure that Loc is a stack slot. 4359 // Ensure that Loc is a stack slot.
4369 assert(Slot->getWeight() == RegWeight::Zero); 4360 assert(Slot->getWeight() == RegWeight::Zero);
4370 assert(Slot->getRegNum() == Variable::NoRegister); 4361 assert(Slot->getRegNum() == Variable::NoRegister);
4371 // Compute the location of Loc in memory. 4362 // Compute the location of Loc in memory.
4372 // TODO(wala,stichnot): lea should not be required. The address of 4363 // TODO(wala,stichnot): lea should not be required. The address of
4373 // the stack slot is known at compile time (although not until after 4364 // the stack slot is known at compile time (although not until after
4374 // addProlog()). 4365 // addProlog()).
4375 const Type PointerType = IceType_i32; 4366 const Type PointerType = IceType_i32;
4376 Variable *Loc = makeReg(PointerType); 4367 Variable *Loc = makeReg(PointerType);
4377 _lea(Loc, Slot); 4368 _lea(Loc, Slot);
4378 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset); 4369 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
4379 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4370 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4380 } 4371 }
4381 4372
4382 // Helper for legalize() to emit the right code to lower an operand to a 4373 // Helper for legalize() to emit the right code to lower an operand to a
4383 // register of the appropriate type. 4374 // register of the appropriate type.
4384 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) { 4375 Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
4385 Type Ty = Src->getType(); 4376 Type Ty = Src->getType();
4386 Variable *Reg = makeReg(Ty, RegNum); 4377 Variable *Reg = makeReg(Ty, RegNum);
4387 if (isVectorType(Ty)) { 4378 if (isVectorType(Ty)) {
4388 _movp(Reg, Src); 4379 _movp(Reg, Src);
(...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after
4650 } else if (IsConstant || IsExternal) 4641 } else if (IsConstant || IsExternal)
4651 Str << "\t.zero\t" << Size << "\n"; 4642 Str << "\t.zero\t" << Size << "\n";
4652 // Size is part of .comm. 4643 // Size is part of .comm.
4653 4644
4654 if (IsConstant || HasNonzeroInitializer || IsExternal) 4645 if (IsConstant || HasNonzeroInitializer || IsExternal)
4655 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4646 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4656 // Size is part of .comm. 4647 // Size is part of .comm.
4657 } 4648 }
4658 4649
4659 } // end of namespace Ice 4650 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698