src/IceTargetLoweringX8632.cpp - Issue 476323004: Start adding an integrated assembler.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 476323004: Start adding an integrated assembler. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: make fixups part of address Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 494 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
505 if (!hasFramePointer())	505 if (!hasFramePointer())

506 Offset += getStackAdjustment();	506 Offset += getStackAdjustment();

507 if (Offset) {	507 if (Offset) {

508 if (Offset > 0)	508 if (Offset > 0)

509 Str << "+";	509 Str << "+";

510 Str << Offset;	510 Str << Offset;

511 }	511 }

512 Str << "]";	512 Str << "]";

513 }	513 }

514	514

	515 x86::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {

	516 assert(!Var->hasReg());

	517 int32_t Offset = Var->getStackOffset();

	518 if (!hasFramePointer())

	519 Offset += getStackAdjustment();

	520 return x86::Address(x86::Register(getFrameOrStackReg()), Offset);

	521 }

	522

515 void TargetX8632::lowerArguments() {	523 void TargetX8632::lowerArguments() {

516 VarList &Args = Func->getArgs();	524 VarList &Args = Func->getArgs();

517 // The first four arguments of vector type, regardless of their	525 // The first four arguments of vector type, regardless of their

518 // position relative to the other arguments in the argument list, are	526 // position relative to the other arguments in the argument list, are

519 // passed in registers xmm0 - xmm3.	527 // passed in registers xmm0 - xmm3.

520 unsigned NumXmmArgs = 0;	528 unsigned NumXmmArgs = 0;

521	529

522 Context.init(Func->getEntryNode());	530 Context.init(Func->getEntryNode());

523 Context.setInsertPoint(Context.getCur());	531 Context.setInsertPoint(Context.getCur());

524	532

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
593 }	601 }

594 if (isVectorType(Ty)) {	602 if (isVectorType(Ty)) {

595 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);	603 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);

596 }	604 }

597 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);	605 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

598 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);	606 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

599 if (Arg->hasReg()) {	607 if (Arg->hasReg()) {

600 assert(Ty != IceType_i64);	608 assert(Ty != IceType_i64);

601 OperandX8632Mem *Mem = OperandX8632Mem::create(	609 OperandX8632Mem *Mem = OperandX8632Mem::create(

602 Func, Ty, FramePtr,	610 Func, Ty, FramePtr,

603 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));	611 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));

604 if (isVectorType(Arg->getType())) {	612 if (isVectorType(Arg->getType())) {

605 _movp(Arg, Mem);	613 _movp(Arg, Mem);

606 } else {	614 } else {

607 _mov(Arg, Mem);	615 _mov(Arg, Mem);

608 }	616 }

609 }	617 }

610 }	618 }

611	619

612 Type TargetX8632::stackSlotType() { return IceType_i32; }	620 Type TargetX8632::stackSlotType() { return IceType_i32; }

613	621

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
792 // Align esp if necessary.	800 // Align esp if necessary.

793 if (NeedsStackAlignment) {	801 if (NeedsStackAlignment) {

794 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;	802 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

795 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	803 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

796 SpillAreaSizeBytes = StackSize - StackOffset;	804 SpillAreaSizeBytes = StackSize - StackOffset;

797 }	805 }

798	806

799 // Generate "sub esp, SpillAreaSizeBytes"	807 // Generate "sub esp, SpillAreaSizeBytes"

800 if (SpillAreaSizeBytes)	808 if (SpillAreaSizeBytes)

801 _sub(getPhysicalRegister(Reg_esp),	809 _sub(getPhysicalRegister(Reg_esp),

802 Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));	810 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));

803	811

804 resetStackAdjustment();	812 resetStackAdjustment();

805	813

806 // Fill in stack offsets for stack args, and copy args into registers	814 // Fill in stack offsets for stack args, and copy args into registers

807 // for those that were register-allocated. Args are pushed right to	815 // for those that were register-allocated. Args are pushed right to

808 // left, so Arg[0] is closest to the stack/frame pointer.	816 // left, so Arg[0] is closest to the stack/frame pointer.

809 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	817 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

810 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	818 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

811 if (!IsEbpBasedFrame)	819 if (!IsEbpBasedFrame)

812 BasicFrameOffset += SpillAreaSizeBytes;	820 BasicFrameOffset += SpillAreaSizeBytes;

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
908 Context.setInsertPoint(InsertPoint);	916 Context.setInsertPoint(InsertPoint);

909	917

910 Variable *esp = getPhysicalRegister(Reg_esp);	918 Variable *esp = getPhysicalRegister(Reg_esp);

911 if (IsEbpBasedFrame) {	919 if (IsEbpBasedFrame) {

912 Variable *ebp = getPhysicalRegister(Reg_ebp);	920 Variable *ebp = getPhysicalRegister(Reg_ebp);

913 _mov(esp, ebp);	921 _mov(esp, ebp);

914 _pop(ebp);	922 _pop(ebp);

915 } else {	923 } else {

916 // add esp, SpillAreaSizeBytes	924 // add esp, SpillAreaSizeBytes

917 if (SpillAreaSizeBytes)	925 if (SpillAreaSizeBytes)

918 _add(esp, Ctx->getConstantInt(IceType_i32, SpillAreaSizeBytes));	926 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));

919 }	927 }

920	928

921 // Add pop instructions for preserved registers.	929 // Add pop instructions for preserved registers.

922 llvm::SmallBitVector CalleeSaves =	930 llvm::SmallBitVector CalleeSaves =

923 getRegisterSet(RegSet_CalleeSave, RegSet_None);	931 getRegisterSet(RegSet_CalleeSave, RegSet_None);

924 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	932 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

925 SizeT j = CalleeSaves.size() - i - 1;	933 SizeT j = CalleeSaves.size() - i - 1;

926 if (j == Reg_ebp && IsEbpBasedFrame)	934 if (j == Reg_ebp && IsEbpBasedFrame)

927 continue;	935 continue;

928 if (CalleeSaves[j] && RegsUsed[j]) {	936 if (CalleeSaves[j] && RegsUsed[j]) {

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1026 }	1034 }

1027	1035

1028 Operand TargetX8632::loOperand(Operand Operand) {	1036 Operand TargetX8632::loOperand(Operand Operand) {

1029 assert(Operand->getType() == IceType_i64);	1037 assert(Operand->getType() == IceType_i64);

1030 if (Operand->getType() != IceType_i64)	1038 if (Operand->getType() != IceType_i64)

1031 return Operand;	1039 return Operand;

1032 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1040 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1033 split64(Var);	1041 split64(Var);

1034 return Var->getLo();	1042 return Var->getLo();

1035 }	1043 }

1036 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {	1044 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1037 uint64_t Mask = (1ull << 32) - 1;	1045 return Ctx->getConstantInt32(IceType_i32,

1038 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);	1046 static_cast<uint32_t>(Const->getValue()));

1039 }	1047 }

1040 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {	1048 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1041 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),	1049 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),

1042 Mem->getOffset(), Mem->getIndex(),	1050 Mem->getOffset(), Mem->getIndex(),

1043 Mem->getShift(), Mem->getSegmentRegister());	1051 Mem->getShift(), Mem->getSegmentRegister());

1044 }	1052 }

1045 llvm_unreachable("Unsupported operand type");	1053 llvm_unreachable("Unsupported operand type");

1046 return NULL;	1054 return NULL;

1047 }	1055 }

1048	1056

1049 Operand TargetX8632::hiOperand(Operand Operand) {	1057 Operand TargetX8632::hiOperand(Operand Operand) {

1050 assert(Operand->getType() == IceType_i64);	1058 assert(Operand->getType() == IceType_i64);

1051 if (Operand->getType() != IceType_i64)	1059 if (Operand->getType() != IceType_i64)

1052 return Operand;	1060 return Operand;

1053 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1061 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1054 split64(Var);	1062 split64(Var);

1055 return Var->getHi();	1063 return Var->getHi();

1056 }	1064 }

1057 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {	1065 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1058 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);	1066 return Ctx->getConstantInt32(

	1067 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));

1059 }	1068 }

1060 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {	1069 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1061 Constant *Offset = Mem->getOffset();	1070 Constant *Offset = Mem->getOffset();

1062 if (Offset == NULL)	1071 if (Offset == NULL)

1063 Offset = Ctx->getConstantInt(IceType_i32, 4);	1072 Offset = Ctx->getConstantInt32(IceType_i32, 4);

1064 else if (ConstantInteger *IntOffset =	1073 else if (ConstantInteger32 *IntOffset =

1065 llvm::dyn_cast<ConstantInteger>(Offset)) {	1074 llvm::dyn_cast<ConstantInteger32>(Offset)) {

1066 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());	1075 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());

1067 } else if (ConstantRelocatable *SymOffset =	1076 } else if (ConstantRelocatable *SymOffset =

1068 llvm::dyn_cast<ConstantRelocatable>(Offset)) {	1077 llvm::dyn_cast<ConstantRelocatable>(Offset)) {

1069 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),	1078 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),

1070 SymOffset->getName());	1079 SymOffset->getName());

1071 }	1080 }

1072 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,	1081 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

1073 Mem->getIndex(), Mem->getShift(),	1082 Mem->getIndex(), Mem->getShift(),

1074 Mem->getSegmentRegister());	1083 Mem->getSegmentRegister());

1075 }	1084 }

1076 llvm_unreachable("Unsupported operand type");	1085 llvm_unreachable("Unsupported operand type");

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1124 // For default align=0, set it to the real value 1, to avoid any	1133 // For default align=0, set it to the real value 1, to avoid any

1125 // bit-manipulation problems below.	1134 // bit-manipulation problems below.

1126 AlignmentParam = std::max(AlignmentParam, 1u);	1135 AlignmentParam = std::max(AlignmentParam, 1u);

1127	1136

1128 // LLVM enforces power of 2 alignment.	1137 // LLVM enforces power of 2 alignment.

1129 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1138 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

1130 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1139 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

1131	1140

1132 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1141 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

1133 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1142 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

1134 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));	1143 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));

1135 }	1144 }

1136 if (ConstantInteger *ConstantTotalSize =	1145 if (ConstantInteger32 *ConstantTotalSize =

1137 llvm::dyn_cast<ConstantInteger>(TotalSize)) {	1146 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

1138 uint32_t Value = ConstantTotalSize->getValue();	1147 uint32_t Value = ConstantTotalSize->getValue();

1139 Value = applyAlignment(Value, Alignment);	1148 Value = applyAlignment(Value, Alignment);

1140 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));	1149 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));

1141 } else {	1150 } else {

1142 // Non-constant sizes need to be adjusted to the next highest	1151 // Non-constant sizes need to be adjusted to the next highest

1143 // multiple of the required alignment at runtime.	1152 // multiple of the required alignment at runtime.

1144 Variable *T = makeReg(IceType_i32);	1153 Variable *T = makeReg(IceType_i32);

1145 _mov(T, TotalSize);	1154 _mov(T, TotalSize);

1146 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));	1155 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));

1147 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));	1156 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));

1148 _sub(esp, T);	1157 _sub(esp, T);

1149 }	1158 }

1150 _mov(Dest, esp);	1159 _mov(Dest, esp);

1151 }	1160 }

1152	1161

1153 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1162 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1154 Variable *Dest = Inst->getDest();	1163 Variable *Dest = Inst->getDest();

1155 Operand *Src0 = legalize(Inst->getSrc(0));	1164 Operand *Src0 = legalize(Inst->getSrc(0));

1156 Operand *Src1 = legalize(Inst->getSrc(1));	1165 Operand *Src1 = legalize(Inst->getSrc(1));

1157 if (Dest->getType() == IceType_i64) {	1166 if (Dest->getType() == IceType_i64) {

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1247 // t2 = shl t2, t1	1256 // t2 = shl t2, t1

1248 // test t1, 0x20	1257 // test t1, 0x20

1249 // je L1	1258 // je L1

1250 // use(t3)	1259 // use(t3)

1251 // t3 = t2	1260 // t3 = t2

1252 // t2 = 0	1261 // t2 = 0

1253 // L1:	1262 // L1:

1254 // a.lo = t2	1263 // a.lo = t2

1255 // a.hi = t3	1264 // a.hi = t3

1256 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1265 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1257 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);	1266 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1258 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1267 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1259 InstX8632Label *Label = InstX8632Label::create(Func, this);	1268 InstX8632Label *Label = InstX8632Label::create(Func, this);

1260 _mov(T_1, Src1Lo, Reg_ecx);	1269 _mov(T_1, Src1Lo, Reg_ecx);

1261 _mov(T_2, Src0Lo);	1270 _mov(T_2, Src0Lo);

1262 _mov(T_3, Src0Hi);	1271 _mov(T_3, Src0Hi);

1263 _shld(T_3, T_2, T_1);	1272 _shld(T_3, T_2, T_1);

1264 _shl(T_2, T_1);	1273 _shl(T_2, T_1);

1265 _test(T_1, BitTest);	1274 _test(T_1, BitTest);

1266 _br(InstX8632Br::Br_e, Label);	1275 _br(InstX8632Br::Br_e, Label);

1267 // Because of the intra-block control flow, we need to fake a use	1276 // Because of the intra-block control flow, we need to fake a use

(...skipping 15 matching lines...) Expand all Loading...
1283 // t3 = shr t3, t1	1292 // t3 = shr t3, t1

1284 // test t1, 0x20	1293 // test t1, 0x20

1285 // je L1	1294 // je L1

1286 // use(t2)	1295 // use(t2)

1287 // t2 = t3	1296 // t2 = t3

1288 // t3 = 0	1297 // t3 = 0

1289 // L1:	1298 // L1:

1290 // a.lo = t2	1299 // a.lo = t2

1291 // a.hi = t3	1300 // a.hi = t3

1292 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1301 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1293 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);	1302 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1294 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1303 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1295 InstX8632Label *Label = InstX8632Label::create(Func, this);	1304 InstX8632Label *Label = InstX8632Label::create(Func, this);

1296 _mov(T_1, Src1Lo, Reg_ecx);	1305 _mov(T_1, Src1Lo, Reg_ecx);

1297 _mov(T_2, Src0Lo);	1306 _mov(T_2, Src0Lo);

1298 _mov(T_3, Src0Hi);	1307 _mov(T_3, Src0Hi);

1299 _shrd(T_2, T_3, T_1);	1308 _shrd(T_2, T_3, T_1);

1300 _shr(T_3, T_1);	1309 _shr(T_3, T_1);

1301 _test(T_1, BitTest);	1310 _test(T_1, BitTest);

1302 _br(InstX8632Br::Br_e, Label);	1311 _br(InstX8632Br::Br_e, Label);

1303 // Because of the intra-block control flow, we need to fake a use	1312 // Because of the intra-block control flow, we need to fake a use

(...skipping 15 matching lines...) Expand all Loading...
1319 // t3 = sar t3, t1	1328 // t3 = sar t3, t1

1320 // test t1, 0x20	1329 // test t1, 0x20

1321 // je L1	1330 // je L1

1322 // use(t2)	1331 // use(t2)

1323 // t2 = t3	1332 // t2 = t3

1324 // t3 = sar t3, 0x1f	1333 // t3 = sar t3, 0x1f

1325 // L1:	1334 // L1:

1326 // a.lo = t2	1335 // a.lo = t2

1327 // a.hi = t3	1336 // a.hi = t3

1328 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1337 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1329 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);	1338 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1330 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);	1339 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);

1331 InstX8632Label *Label = InstX8632Label::create(Func, this);	1340 InstX8632Label *Label = InstX8632Label::create(Func, this);

1332 _mov(T_1, Src1Lo, Reg_ecx);	1341 _mov(T_1, Src1Lo, Reg_ecx);

1333 _mov(T_2, Src0Lo);	1342 _mov(T_2, Src0Lo);

1334 _mov(T_3, Src0Hi);	1343 _mov(T_3, Src0Hi);

1335 _shrd(T_2, T_3, T_1);	1344 _shrd(T_2, T_3, T_1);

1336 _sar(T_3, T_1);	1345 _sar(T_3, T_1);

1337 _test(T_1, BitTest);	1346 _test(T_1, BitTest);

1338 _br(InstX8632Br::Br_e, Label);	1347 _br(InstX8632Br::Br_e, Label);

1339 // Because of the intra-block control flow, we need to fake a use	1348 // Because of the intra-block control flow, we need to fake a use

1340 // of T_3 to prevent its earlier definition from being dead-code	1349 // of T_3 to prevent its earlier definition from being dead-code

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1441 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}	1450 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1442 // pmuludq T2, T3	1451 // pmuludq T2, T3

1443 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}	1452 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

1444 // shufps T1, T2, {0,2,0,2}	1453 // shufps T1, T2, {0,2,0,2}

1445 // pshufd T4, T1, {0,2,1,3}	1454 // pshufd T4, T1, {0,2,1,3}

1446 // movups Dest, T4	1455 // movups Dest, T4

1447	1456

1448 // Mask that directs pshufd to create a vector with entries	1457 // Mask that directs pshufd to create a vector with entries

1449 // Src[1, 0, 3, 0]	1458 // Src[1, 0, 3, 0]

1450 const unsigned Constant1030 = 0x31;	1459 const unsigned Constant1030 = 0x31;

1451 Constant *Mask1030 = Ctx->getConstantInt(IceType_i8, Constant1030);	1460 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);

1452 // Mask that directs shufps to create a vector with entries	1461 // Mask that directs shufps to create a vector with entries

1453 // Dest[0, 2], Src[0, 2]	1462 // Dest[0, 2], Src[0, 2]

1454 const unsigned Mask0202 = 0x88;	1463 const unsigned Mask0202 = 0x88;

1455 // Mask that directs pshufd to create a vector with entries	1464 // Mask that directs pshufd to create a vector with entries

1456 // Src[0, 2, 1, 3]	1465 // Src[0, 2, 1, 3]

1457 const unsigned Mask0213 = 0xd8;	1466 const unsigned Mask0213 = 0xd8;

1458 Variable *T1 = makeReg(IceType_v4i32);	1467 Variable *T1 = makeReg(IceType_v4i32);

1459 Variable *T2 = makeReg(IceType_v4i32);	1468 Variable *T2 = makeReg(IceType_v4i32);

1460 Variable *T3 = makeReg(IceType_v4i32);	1469 Variable *T3 = makeReg(IceType_v4i32);

1461 Variable *T4 = makeReg(IceType_v4i32);	1470 Variable *T4 = makeReg(IceType_v4i32);

1462 _movp(T1, Src0);	1471 _movp(T1, Src0);

1463 _pshufd(T2, Src0, Mask1030);	1472 _pshufd(T2, Src0, Mask1030);

1464 _pshufd(T3, Src1, Mask1030);	1473 _pshufd(T3, Src1, Mask1030);

1465 _pmuludq(T1, Src1);	1474 _pmuludq(T1, Src1);

1466 _pmuludq(T2, T3);	1475 _pmuludq(T2, T3);

1467 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));	1476 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));

1468 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));	1477 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));

1469 _movp(Dest, T4);	1478 _movp(Dest, T4);

1470 } else {	1479 } else {

1471 assert(Dest->getType() == IceType_v16i8);	1480 assert(Dest->getType() == IceType_v16i8);

1472 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1481 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1473 }	1482 }

1474 } break;	1483 } break;

1475 case InstArithmetic::Shl:	1484 case InstArithmetic::Shl:

1476 case InstArithmetic::Lshr:	1485 case InstArithmetic::Lshr:

1477 case InstArithmetic::Ashr:	1486 case InstArithmetic::Ashr:

1478 case InstArithmetic::Udiv:	1487 case InstArithmetic::Udiv:

(...skipping 272 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1751 assert(Ty == IceType_i32 \|\| Ty == IceType_f32 \|\| Ty == IceType_i64 \|\|	1760 assert(Ty == IceType_i32 \|\| Ty == IceType_f32 \|\| Ty == IceType_i64 \|\|

1752 Ty == IceType_f64 \|\| isVectorType(Ty));	1761 Ty == IceType_f64 \|\| isVectorType(Ty));

1753 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {	1762 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {

1754 XmmArgs.push_back(Arg);	1763 XmmArgs.push_back(Arg);

1755 } else {	1764 } else {

1756 StackArgs.push_back(Arg);	1765 StackArgs.push_back(Arg);

1757 if (isVectorType(Arg->getType())) {	1766 if (isVectorType(Arg->getType())) {

1758 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	1767 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

1759 }	1768 }

1760 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	1769 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

1761 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);	1770 Constant *Loc =

	1771 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);

1762 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));	1772 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));

1763 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());	1773 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

1764 }	1774 }

1765 }	1775 }

1766	1776

1767 // Adjust the parameter area so that the stack is aligned. It is	1777 // Adjust the parameter area so that the stack is aligned. It is

1768 // assumed that the stack is already aligned at the start of the	1778 // assumed that the stack is already aligned at the start of the

1769 // calling sequence.	1779 // calling sequence.

1770 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	1780 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

1771	1781

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1848 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);	1858 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);

1849 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);	1859 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);

1850 Context.insert(NewCall);	1860 Context.insert(NewCall);

1851 if (ReturnRegHi)	1861 if (ReturnRegHi)

1852 Context.insert(InstFakeDef::create(Func, ReturnRegHi));	1862 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

1853	1863

1854 // Add the appropriate offset to esp. The call instruction takes care	1864 // Add the appropriate offset to esp. The call instruction takes care

1855 // of resetting the stack offset during emission.	1865 // of resetting the stack offset during emission.

1856 if (ParameterAreaSizeBytes) {	1866 if (ParameterAreaSizeBytes) {

1857 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	1867 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

1858 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));	1868 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));

1859 }	1869 }

1860	1870

1861 // Insert a register-kill pseudo instruction.	1871 // Insert a register-kill pseudo instruction.

1862 VarList KilledRegs;	1872 VarList KilledRegs;

1863 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {	1873 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {

1864 if (ScratchRegs[i])	1874 if (ScratchRegs[i])

1865 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));	1875 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));

1866 }	1876 }

1867 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));	1877 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));

1868	1878

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1933 Variable *T = makeReg(DestTy);	1943 Variable *T = makeReg(DestTy);

1934 _movp(T, Src0RM);	1944 _movp(T, Src0RM);

1935 _pand(T, OneMask);	1945 _pand(T, OneMask);

1936 Variable *Zeros = makeVectorOfZeros(Dest->getType());	1946 Variable *Zeros = makeVectorOfZeros(Dest->getType());

1937 _pcmpgt(T, Zeros);	1947 _pcmpgt(T, Zeros);

1938 _movp(Dest, T);	1948 _movp(Dest, T);

1939 } else {	1949 } else {

1940 // width = width(elty) - 1; dest = (src << width) >> width	1950 // width = width(elty) - 1; dest = (src << width) >> width

1941 SizeT ShiftAmount =	1951 SizeT ShiftAmount =

1942 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;	1952 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;

1943 Constant *ShiftConstant = Ctx->getConstantInt(IceType_i8, ShiftAmount);	1953 Constant *ShiftConstant =

	1954 Ctx->getConstantInt32(IceType_i8, ShiftAmount);

1944 Variable *T = makeReg(DestTy);	1955 Variable *T = makeReg(DestTy);

1945 _movp(T, Src0RM);	1956 _movp(T, Src0RM);

1946 _psll(T, ShiftConstant);	1957 _psll(T, ShiftConstant);

1947 _psra(T, ShiftConstant);	1958 _psra(T, ShiftConstant);

1948 _movp(Dest, T);	1959 _movp(Dest, T);

1949 }	1960 }

1950 } else if (Dest->getType() == IceType_i64) {	1961 } else if (Dest->getType() == IceType_i64) {

1951 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2	1962 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

1952 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);	1963 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);

1953 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1964 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1954 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1965 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1955 Variable *T_Lo = makeReg(DestLo->getType());	1966 Variable *T_Lo = makeReg(DestLo->getType());

1956 if (Src0RM->getType() == IceType_i32) {	1967 if (Src0RM->getType() == IceType_i32) {

1957 _mov(T_Lo, Src0RM);	1968 _mov(T_Lo, Src0RM);

1958 } else if (Src0RM->getType() == IceType_i1) {	1969 } else if (Src0RM->getType() == IceType_i1) {

1959 _movzx(T_Lo, Src0RM);	1970 _movzx(T_Lo, Src0RM);

1960 _shl(T_Lo, Shift);	1971 _shl(T_Lo, Shift);

1961 _sar(T_Lo, Shift);	1972 _sar(T_Lo, Shift);

1962 } else {	1973 } else {

1963 _movsx(T_Lo, Src0RM);	1974 _movsx(T_Lo, Src0RM);

1964 }	1975 }

1965 _mov(DestLo, T_Lo);	1976 _mov(DestLo, T_Lo);

1966 Variable *T_Hi = NULL;	1977 Variable *T_Hi = NULL;

1967 _mov(T_Hi, T_Lo);	1978 _mov(T_Hi, T_Lo);

1968 if (Src0RM->getType() != IceType_i1)	1979 if (Src0RM->getType() != IceType_i1)

1969 // For i1, the sar instruction is already done above.	1980 // For i1, the sar instruction is already done above.

1970 _sar(T_Hi, Shift);	1981 _sar(T_Hi, Shift);

1971 _mov(DestHi, T_Hi);	1982 _mov(DestHi, T_Hi);

1972 } else if (Src0RM->getType() == IceType_i1) {	1983 } else if (Src0RM->getType() == IceType_i1) {

1973 // t1 = src	1984 // t1 = src

1974 // shl t1, dst_bitwidth - 1	1985 // shl t1, dst_bitwidth - 1

1975 // sar t1, dst_bitwidth - 1	1986 // sar t1, dst_bitwidth - 1

1976 // dst = t1	1987 // dst = t1

1977 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());	1988 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

1978 Constant *ShiftAmount = Ctx->getConstantInt(IceType_i32, DestBits - 1);	1989 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);

1979 Variable *T = makeReg(Dest->getType());	1990 Variable *T = makeReg(Dest->getType());

1980 if (typeWidthInBytes(Dest->getType()) <=	1991 if (typeWidthInBytes(Dest->getType()) <=

1981 typeWidthInBytes(Src0RM->getType())) {	1992 typeWidthInBytes(Src0RM->getType())) {

1982 _mov(T, Src0RM);	1993 _mov(T, Src0RM);

1983 } else {	1994 } else {

1984 // Widen the source using movsx or movzx. (It doesn't matter	1995 // Widen the source using movsx or movzx. (It doesn't matter

1985 // which one, since the following shl/sar overwrite the bits.)	1996 // which one, since the following shl/sar overwrite the bits.)

1986 _movzx(T, Src0RM);	1997 _movzx(T, Src0RM);

1987 }	1998 }

1988 _shl(T, ShiftAmount);	1999 _shl(T, ShiftAmount);

(...skipping 22 matching lines...) Expand all Loading...
2011 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2022 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2012 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2023 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2013 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2024 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2014 Variable *Tmp = makeReg(DestLo->getType());	2025 Variable *Tmp = makeReg(DestLo->getType());

2015 if (Src0RM->getType() == IceType_i32) {	2026 if (Src0RM->getType() == IceType_i32) {

2016 _mov(Tmp, Src0RM);	2027 _mov(Tmp, Src0RM);

2017 } else {	2028 } else {

2018 _movzx(Tmp, Src0RM);	2029 _movzx(Tmp, Src0RM);

2019 }	2030 }

2020 if (Src0RM->getType() == IceType_i1) {	2031 if (Src0RM->getType() == IceType_i1) {

2021 Constant *One = Ctx->getConstantInt(IceType_i32, 1);	2032 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);

2022 _and(Tmp, One);	2033 _and(Tmp, One);

2023 }	2034 }

2024 _mov(DestLo, Tmp);	2035 _mov(DestLo, Tmp);

2025 _mov(DestHi, Zero);	2036 _mov(DestHi, Zero);

2026 } else if (Src0RM->getType() == IceType_i1) {	2037 } else if (Src0RM->getType() == IceType_i1) {

2027 // t = Src0RM; t &= 1; Dest = t	2038 // t = Src0RM; t &= 1; Dest = t

2028 Constant *One = Ctx->getConstantInt(IceType_i32, 1);	2039 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);

2029 Variable *T = makeReg(IceType_i32);	2040 Variable *T = makeReg(IceType_i32);

2030 _movzx(T, Src0RM);	2041 _movzx(T, Src0RM);

2031 _and(T, One);	2042 _and(T, One);

2032 _mov(Dest, T);	2043 _mov(Dest, T);

2033 } else {	2044 } else {

2034 // t1 = movzx src; dst = t1	2045 // t1 = movzx src; dst = t1

2035 Variable *T = makeReg(Dest->getType());	2046 Variable *T = makeReg(Dest->getType());

2036 _movzx(T, Src0RM);	2047 _movzx(T, Src0RM);

2037 _mov(Dest, T);	2048 _mov(Dest, T);

2038 }	2049 }

(...skipping 11 matching lines...) Expand all Loading...
2050 _movp(Dest, T);	2061 _movp(Dest, T);

2051 } else {	2062 } else {

2052 Operand *Src0 = Inst->getSrc(0);	2063 Operand *Src0 = Inst->getSrc(0);

2053 if (Src0->getType() == IceType_i64)	2064 if (Src0->getType() == IceType_i64)

2054 Src0 = loOperand(Src0);	2065 Src0 = loOperand(Src0);

2055 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2066 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2056 // t1 = trunc Src0RM; Dest = t1	2067 // t1 = trunc Src0RM; Dest = t1

2057 Variable *T = NULL;	2068 Variable *T = NULL;

2058 _mov(T, Src0RM);	2069 _mov(T, Src0RM);

2059 if (Dest->getType() == IceType_i1)	2070 if (Dest->getType() == IceType_i1)

2060 _and(T, Ctx->getConstantInt(IceType_i1, 1));	2071 _and(T, Ctx->getConstantInt32(IceType_i1, 1));

2061 _mov(Dest, T);	2072 _mov(Dest, T);

2062 }	2073 }

2063 break;	2074 break;

2064 }	2075 }

2065 case InstCast::Fptrunc:	2076 case InstCast::Fptrunc:

2066 case InstCast::Fpext: {	2077 case InstCast::Fpext: {

2067 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2078 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2068 // t1 = cvt Src0RM; Dest = t1	2079 // t1 = cvt Src0RM; Dest = t1

2069 Variable *T = makeReg(Dest->getType());	2080 Variable *T = makeReg(Dest->getType());

2070 _cvt(T, Src0RM);	2081 _cvt(T, Src0RM);

(...skipping 24 matching lines...) Expand all Loading...
2095 Call->addArg(Inst->getSrc(0));	2106 Call->addArg(Inst->getSrc(0));

2096 lowerCall(Call);	2107 lowerCall(Call);

2097 } else {	2108 } else {

2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2109 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2099 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2110 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2100 Variable *T_1 = makeReg(IceType_i32);	2111 Variable *T_1 = makeReg(IceType_i32);

2101 Variable *T_2 = makeReg(Dest->getType());	2112 Variable *T_2 = makeReg(Dest->getType());

2102 _cvtt(T_1, Src0RM);	2113 _cvtt(T_1, Src0RM);

2103 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2114 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2104 if (Dest->getType() == IceType_i1)	2115 if (Dest->getType() == IceType_i1)

2105 _and(T_2, Ctx->getConstantInt(IceType_i1, 1));	2116 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));

2106 _mov(Dest, T_2);	2117 _mov(Dest, T_2);

2107 T_2->setPreferredRegister(T_1, true);	2118 T_2->setPreferredRegister(T_1, true);

2108 }	2119 }

2109 break;	2120 break;

2110 case InstCast::Fptoui:	2121 case InstCast::Fptoui:

2111 if (isVectorType(Dest->getType())) {	2122 if (isVectorType(Dest->getType())) {

2112 assert(Dest->getType() == IceType_v4i32 &&	2123 assert(Dest->getType() == IceType_v4i32 &&

2113 Inst->getSrc(0)->getType() == IceType_v4f32);	2124 Inst->getSrc(0)->getType() == IceType_v4f32);

2114 const SizeT MaxSrcs = 1;	2125 const SizeT MaxSrcs = 1;

2115 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);	2126 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);

(...skipping 16 matching lines...) Expand all Loading...
2132 lowerCall(Call);	2143 lowerCall(Call);

2133 return;	2144 return;

2134 } else {	2145 } else {

2135 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2146 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2136 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2147 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2137 Variable *T_1 = makeReg(IceType_i32);	2148 Variable *T_1 = makeReg(IceType_i32);

2138 Variable *T_2 = makeReg(Dest->getType());	2149 Variable *T_2 = makeReg(Dest->getType());

2139 _cvtt(T_1, Src0RM);	2150 _cvtt(T_1, Src0RM);

2140 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2151 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2141 if (Dest->getType() == IceType_i1)	2152 if (Dest->getType() == IceType_i1)

2142 _and(T_2, Ctx->getConstantInt(IceType_i1, 1));	2153 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));

2143 _mov(Dest, T_2);	2154 _mov(Dest, T_2);

2144 T_2->setPreferredRegister(T_1, true);	2155 T_2->setPreferredRegister(T_1, true);

2145 }	2156 }

2146 break;	2157 break;

2147 case InstCast::Sitofp:	2158 case InstCast::Sitofp:

2148 if (isVectorType(Dest->getType())) {	2159 if (isVectorType(Dest->getType())) {

2149 assert(Dest->getType() == IceType_v4f32 &&	2160 assert(Dest->getType() == IceType_v4f32 &&

2150 Inst->getSrc(0)->getType() == IceType_v4i32);	2161 Inst->getSrc(0)->getType() == IceType_v4i32);

2151 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2162 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2152 Variable *T = makeReg(Dest->getType());	2163 Variable *T = makeReg(Dest->getType());

(...skipping 190 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2343 _movp(Dest, legalizeToVar(Src0));	2354 _movp(Dest, legalizeToVar(Src0));

2344 } break;	2355 } break;

2345 }	2356 }

2346 break;	2357 break;

2347 }	2358 }

2348 }	2359 }

2349 }	2360 }

2350	2361

2351 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {	2362 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {

2352 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2363 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2353 ConstantInteger *ElementIndex =	2364 ConstantInteger32 *ElementIndex =

2354 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));	2365 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

2355 // Only constant indices are allowed in PNaCl IR.	2366 // Only constant indices are allowed in PNaCl IR.

2356 assert(ElementIndex);	2367 assert(ElementIndex);

2357	2368

2358 unsigned Index = ElementIndex->getValue();	2369 unsigned Index = ElementIndex->getValue();

2359 Type Ty = SourceVectNotLegalized->getType();	2370 Type Ty = SourceVectNotLegalized->getType();

2360 Type ElementTy = typeElementType(Ty);	2371 Type ElementTy = typeElementType(Ty);

2361 Type InVectorElementTy = getInVectorElementType(Ty);	2372 Type InVectorElementTy = getInVectorElementType(Ty);

2362 Variable *ExtractedElementR = makeReg(InVectorElementTy);	2373 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2363	2374

2364 // TODO(wala): Determine the best lowering sequences for each type.	2375 // TODO(wala): Determine the best lowering sequences for each type.

2365 bool CanUsePextr =	2376 bool CanUsePextr =

2366 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2377 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2367 if (CanUsePextr && Ty != IceType_v4f32) {	2378 if (CanUsePextr && Ty != IceType_v4f32) {

2368 // Use pextrb, pextrw, or pextrd.	2379 // Use pextrb, pextrw, or pextrd.

2369 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2380 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);

2370 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2381 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2371 _pextr(ExtractedElementR, SourceVectR, Mask);	2382 _pextr(ExtractedElementR, SourceVectR, Mask);

2372 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2383 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2373 // Use pshufd and movd/movss.	2384 // Use pshufd and movd/movss.

2374 Variable *T = NULL;	2385 Variable *T = NULL;

2375 if (Index) {	2386 if (Index) {

2376 // The shuffle only needs to occur if the element to be extracted	2387 // The shuffle only needs to occur if the element to be extracted

2377 // is not at the lowest index.	2388 // is not at the lowest index.

2378 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);	2389 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);

2379 T = makeReg(Ty);	2390 T = makeReg(Ty);

2380 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);	2391 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2381 } else {	2392 } else {

2382 T = legalizeToVar(SourceVectNotLegalized);	2393 T = legalizeToVar(SourceVectNotLegalized);

2383 }	2394 }

2384	2395

2385 if (InVectorElementTy == IceType_i32) {	2396 if (InVectorElementTy == IceType_i32) {

2386 _movd(ExtractedElementR, T);	2397 _movd(ExtractedElementR, T);

2387 } else { // Ty == Icetype_f32	2398 } else { // Ty == Icetype_f32

2388 // TODO(wala): _movss is only used here because _mov does not	2399 // TODO(wala): _movss is only used here because _mov does not

(...skipping 118 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2507 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);	2518 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

2508 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);	2519 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

2509 if (HasC1) {	2520 if (HasC1) {

2510 Src0 = legalize(Src0);	2521 Src0 = legalize(Src0);

2511 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2522 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2512 Variable *T = NULL;	2523 Variable *T = NULL;

2513 _mov(T, Src0);	2524 _mov(T, Src0);

2514 _ucomiss(T, Src1RM);	2525 _ucomiss(T, Src1RM);

2515 }	2526 }

2516 Constant *Default =	2527 Constant *Default =

2517 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);	2528 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);

2518 _mov(Dest, Default);	2529 _mov(Dest, Default);

2519 if (HasC1) {	2530 if (HasC1) {

2520 InstX8632Label *Label = InstX8632Label::create(Func, this);	2531 InstX8632Label *Label = InstX8632Label::create(Func, this);

2521 _br(TableFcmp[Index].C1, Label);	2532 _br(TableFcmp[Index].C1, Label);

2522 if (HasC2) {	2533 if (HasC2) {

2523 _br(TableFcmp[Index].C2, Label);	2534 _br(TableFcmp[Index].C2, Label);

2524 }	2535 }

2525 Context.insert(InstFakeUse::create(Func, Dest));	2536 Context.insert(InstFakeUse::create(Func, Dest));

2526 Constant *NonDefault =	2537 Constant *NonDefault =

2527 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);	2538 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);

2528 _mov(Dest, NonDefault);	2539 _mov(Dest, NonDefault);

2529 Context.insert(Label);	2540 Context.insert(Label);

2530 }	2541 }

2531 }	2542 }

2532	2543

2533 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {	2544 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

2534 Operand *Src0 = legalize(Inst->getSrc(0));	2545 Operand *Src0 = legalize(Inst->getSrc(0));

2535 Operand *Src1 = legalize(Inst->getSrc(1));	2546 Operand *Src1 = legalize(Inst->getSrc(1));

2536 Variable *Dest = Inst->getDest();	2547 Variable *Dest = Inst->getDest();

2537	2548

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2658 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),	2669 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),

2659 NextBr->getTargetFalse());	2670 NextBr->getTargetFalse());

2660 // Skip over the following branch instruction.	2671 // Skip over the following branch instruction.

2661 Context.advanceNext();	2672 Context.advanceNext();

2662 return;	2673 return;

2663 }	2674 }

2664 }	2675 }

2665	2676

2666 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:	2677 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

2667 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2678 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2668 Constant *One = Ctx->getConstantInt(IceType_i32, 1);	2679 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);

2669 if (Src0->getType() == IceType_i64) {	2680 if (Src0->getType() == IceType_i64) {

2670 InstIcmp::ICond Condition = Inst->getCondition();	2681 InstIcmp::ICond Condition = Inst->getCondition();

2671 size_t Index = static_cast<size_t>(Condition);	2682 size_t Index = static_cast<size_t>(Condition);

2672 assert(Index < TableIcmp64Size);	2683 assert(Index < TableIcmp64Size);

2673 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);	2684 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

2674 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);	2685 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

2675 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);	2686 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

2676 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);	2687 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

2677 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {	2688 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {

2678 InstX8632Label *Label = InstX8632Label::create(Func, this);	2689 InstX8632Label *Label = InstX8632Label::create(Func, this);

(...skipping 30 matching lines...) Expand all Loading...
2709 _mov(Dest, One);	2720 _mov(Dest, One);

2710 _br(getIcmp32Mapping(Inst->getCondition()), Label);	2721 _br(getIcmp32Mapping(Inst->getCondition()), Label);

2711 Context.insert(InstFakeUse::create(Func, Dest));	2722 Context.insert(InstFakeUse::create(Func, Dest));

2712 _mov(Dest, Zero);	2723 _mov(Dest, Zero);

2713 Context.insert(Label);	2724 Context.insert(Label);

2714 }	2725 }

2715	2726

2716 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {	2727 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

2717 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2728 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2718 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);	2729 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);

2719 ConstantInteger *ElementIndex =	2730 ConstantInteger32 *ElementIndex =

2720 llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));	2731 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));

2721 // Only constant indices are allowed in PNaCl IR.	2732 // Only constant indices are allowed in PNaCl IR.

2722 assert(ElementIndex);	2733 assert(ElementIndex);

2723 unsigned Index = ElementIndex->getValue();	2734 unsigned Index = ElementIndex->getValue();

2724 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));	2735 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));

2725	2736

2726 Type Ty = SourceVectNotLegalized->getType();	2737 Type Ty = SourceVectNotLegalized->getType();

2727 Type ElementTy = typeElementType(Ty);	2738 Type ElementTy = typeElementType(Ty);

2728 Type InVectorElementTy = getInVectorElementType(Ty);	2739 Type InVectorElementTy = getInVectorElementType(Ty);

2729	2740

2730 if (ElementTy == IceType_i1) {	2741 if (ElementTy == IceType_i1) {

2731 // Expand the element to the appropriate size for it to be inserted	2742 // Expand the element to the appropriate size for it to be inserted

2732 // in the vector.	2743 // in the vector.

2733 Variable *Expanded =	2744 Variable *Expanded =

2734 Func->makeVariable(InVectorElementTy, Context.getNode());	2745 Func->makeVariable(InVectorElementTy, Context.getNode());

2735 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,	2746 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,

2736 ElementToInsertNotLegalized);	2747 ElementToInsertNotLegalized);

2737 lowerCast(Cast);	2748 lowerCast(Cast);

2738 ElementToInsertNotLegalized = Expanded;	2749 ElementToInsertNotLegalized = Expanded;

2739 }	2750 }

2740	2751

2741 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {	2752 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {

2742 // Use insertps, pinsrb, pinsrw, or pinsrd.	2753 // Use insertps, pinsrb, pinsrw, or pinsrd.

2743 Operand *ElementRM =	2754 Operand *ElementRM =

2744 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	2755 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

2745 Operand *SourceVectRM =	2756 Operand *SourceVectRM =

2746 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	2757 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2747 Variable *T = makeReg(Ty);	2758 Variable *T = makeReg(Ty);

2748 _movp(T, SourceVectRM);	2759 _movp(T, SourceVectRM);

2749 if (Ty == IceType_v4f32)	2760 if (Ty == IceType_v4f32)

2750 _insertps(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index << 4));	2761 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));

2751 else	2762 else

2752 _pinsr(T, ElementRM, Ctx->getConstantInt(IceType_i8, Index));	2763 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));

2753 _movp(Inst->getDest(), T);	2764 _movp(Inst->getDest(), T);

2754 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2765 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2755 // Use shufps or movss.	2766 // Use shufps or movss.

2756 Variable *ElementR = NULL;	2767 Variable *ElementR = NULL;

2757 Operand *SourceVectRM =	2768 Operand *SourceVectRM =

2758 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	2769 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2759	2770

2760 if (InVectorElementTy == IceType_f32) {	2771 if (InVectorElementTy == IceType_f32) {

2761 // ElementR will be in an XMM register since it is floating point.	2772 // ElementR will be in an XMM register since it is floating point.

2762 ElementR = legalizeToVar(ElementToInsertNotLegalized);	2773 ElementR = legalizeToVar(ElementToInsertNotLegalized);

(...skipping 30 matching lines...) Expand all Loading...
2793 // ElementR := ElementR[0, 0] T[0, 3]	2804 // ElementR := ElementR[0, 0] T[0, 3]

2794 // T := T[0, 1] ElementR[0, 3]	2805 // T := T[0, 1] ElementR[0, 3]

2795 //	2806 //

2796 // insertelement into index 3 (result is stored in T):	2807 // insertelement into index 3 (result is stored in T):

2797 // T := SourceVectRM	2808 // T := SourceVectRM

2798 // ElementR := ElementR[0, 0] T[0, 2]	2809 // ElementR := ElementR[0, 0] T[0, 2]

2799 // T := T[0, 1] ElementR[3, 0]	2810 // T := T[0, 1] ElementR[3, 0]

2800 const unsigned char Mask1[3] = {0, 192, 128};	2811 const unsigned char Mask1[3] = {0, 192, 128};

2801 const unsigned char Mask2[3] = {227, 196, 52};	2812 const unsigned char Mask2[3] = {227, 196, 52};

2802	2813

2803 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);	2814 Constant *Mask1Constant =

2804 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);	2815 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);

	2816 Constant *Mask2Constant =

	2817 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);

2805	2818

2806 if (Index == 1) {	2819 if (Index == 1) {

2807 _shufps(ElementR, SourceVectRM, Mask1Constant);	2820 _shufps(ElementR, SourceVectRM, Mask1Constant);

2808 _shufps(ElementR, SourceVectRM, Mask2Constant);	2821 _shufps(ElementR, SourceVectRM, Mask2Constant);

2809 _movp(Inst->getDest(), ElementR);	2822 _movp(Inst->getDest(), ElementR);

2810 } else {	2823 } else {

2811 Variable *T = makeReg(Ty);	2824 Variable *T = makeReg(Ty);

2812 _movp(T, SourceVectRM);	2825 _movp(T, SourceVectRM);

2813 _shufps(ElementR, T, Mask1Constant);	2826 _shufps(ElementR, T, Mask1Constant);

2814 _shufps(T, ElementR, Mask2Constant);	2827 _shufps(T, ElementR, Mask2Constant);

(...skipping 19 matching lines...) Expand all Loading...
2834 Variable *T = makeReg(Ty);	2847 Variable *T = makeReg(Ty);

2835 _movp(T, Slot);	2848 _movp(T, Slot);

2836 _movp(Inst->getDest(), T);	2849 _movp(Inst->getDest(), T);

2837 }	2850 }

2838 }	2851 }

2839	2852

2840 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {	2853 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

2841 switch (Instr->getIntrinsicInfo().ID) {	2854 switch (Instr->getIntrinsicInfo().ID) {

2842 case Intrinsics::AtomicCmpxchg: {	2855 case Intrinsics::AtomicCmpxchg: {

2843 if (!Intrinsics::VerifyMemoryOrder(	2856 if (!Intrinsics::VerifyMemoryOrder(

2844 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {	2857 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {

2845 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");	2858 Func->setError("Unexpected memory ordering (success) for AtomicCmpxchg");

2846 return;	2859 return;

2847 }	2860 }

2848 if (!Intrinsics::VerifyMemoryOrder(	2861 if (!Intrinsics::VerifyMemoryOrder(

2849 llvm::cast<ConstantInteger>(Instr->getArg(4))->getValue())) {	2862 llvm::cast<ConstantInteger32>(Instr->getArg(4))->getValue())) {

2850 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");	2863 Func->setError("Unexpected memory ordering (failure) for AtomicCmpxchg");

2851 return;	2864 return;

2852 }	2865 }

2853 Variable *DestPrev = Instr->getDest();	2866 Variable *DestPrev = Instr->getDest();

2854 Operand *PtrToMem = Instr->getArg(0);	2867 Operand *PtrToMem = Instr->getArg(0);

2855 Operand *Expected = Instr->getArg(1);	2868 Operand *Expected = Instr->getArg(1);

2856 Operand *Desired = Instr->getArg(2);	2869 Operand *Desired = Instr->getArg(2);

2857 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))	2870 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))

2858 return;	2871 return;

2859 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);	2872 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);

2860 return;	2873 return;

2861 }	2874 }

2862 case Intrinsics::AtomicFence:	2875 case Intrinsics::AtomicFence:

2863 if (!Intrinsics::VerifyMemoryOrder(	2876 if (!Intrinsics::VerifyMemoryOrder(

2864 llvm::cast<ConstantInteger>(Instr->getArg(0))->getValue())) {	2877 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue())) {

2865 Func->setError("Unexpected memory ordering for AtomicFence");	2878 Func->setError("Unexpected memory ordering for AtomicFence");

2866 return;	2879 return;

2867 }	2880 }

2868 _mfence();	2881 _mfence();

2869 return;	2882 return;

2870 case Intrinsics::AtomicFenceAll:	2883 case Intrinsics::AtomicFenceAll:

2871 // NOTE: FenceAll should prevent and load/store from being moved	2884 // NOTE: FenceAll should prevent and load/store from being moved

2872 // across the fence (both atomic and non-atomic). The InstX8632Mfence	2885 // across the fence (both atomic and non-atomic). The InstX8632Mfence

2873 // instruction is currently marked coarsely as "HasSideEffects".	2886 // instruction is currently marked coarsely as "HasSideEffects".

2874 _mfence();	2887 _mfence();

2875 return;	2888 return;

2876 case Intrinsics::AtomicIsLockFree: {	2889 case Intrinsics::AtomicIsLockFree: {

2877 // X86 is always lock free for 8/16/32/64 bit accesses.	2890 // X86 is always lock free for 8/16/32/64 bit accesses.

2878 // TODO(jvoung): Since the result is constant when given a constant	2891 // TODO(jvoung): Since the result is constant when given a constant

2879 // byte size, this opens up DCE opportunities.	2892 // byte size, this opens up DCE opportunities.

2880 Operand *ByteSize = Instr->getArg(0);	2893 Operand *ByteSize = Instr->getArg(0);

2881 Variable *Dest = Instr->getDest();	2894 Variable *Dest = Instr->getDest();

2882 if (ConstantInteger *CI = llvm::dyn_cast<ConstantInteger>(ByteSize)) {	2895 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {

2883 Constant *Result;	2896 Constant *Result;

2884 switch (CI->getValue()) {	2897 switch (CI->getValue()) {

2885 default:	2898 default:

2886 // Some x86-64 processors support the cmpxchg16b intruction, which	2899 // Some x86-64 processors support the cmpxchg16b intruction, which

2887 // can make 16-byte operations lock free (when used with the LOCK	2900 // can make 16-byte operations lock free (when used with the LOCK

2888 // prefix). However, that's not supported in 32-bit mode, so just	2901 // prefix). However, that's not supported in 32-bit mode, so just

2889 // return 0 even for large sizes.	2902 // return 0 even for large sizes.

2890 Result = Ctx->getConstantZero(IceType_i32);	2903 Result = Ctx->getConstantZero(IceType_i32);

2891 break;	2904 break;

2892 case 1:	2905 case 1:

2893 case 2:	2906 case 2:

2894 case 4:	2907 case 4:

2895 case 8:	2908 case 8:

2896 Result = Ctx->getConstantInt(IceType_i32, 1);	2909 Result = Ctx->getConstantInt32(IceType_i32, 1);

2897 break;	2910 break;

2898 }	2911 }

2899 _mov(Dest, Result);	2912 _mov(Dest, Result);

2900 return;	2913 return;

2901 }	2914 }

2902 // The PNaCl ABI requires the byte size to be a compile-time constant.	2915 // The PNaCl ABI requires the byte size to be a compile-time constant.

2903 Func->setError("AtomicIsLockFree byte size should be compile-time const");	2916 Func->setError("AtomicIsLockFree byte size should be compile-time const");

2904 return;	2917 return;

2905 }	2918 }

2906 case Intrinsics::AtomicLoad: {	2919 case Intrinsics::AtomicLoad: {

2907 // We require the memory address to be naturally aligned.	2920 // We require the memory address to be naturally aligned.

2908 // Given that is the case, then normal loads are atomic.	2921 // Given that is the case, then normal loads are atomic.

2909 if (!Intrinsics::VerifyMemoryOrder(	2922 if (!Intrinsics::VerifyMemoryOrder(

2910 llvm::cast<ConstantInteger>(Instr->getArg(1))->getValue())) {	2923 llvm::cast<ConstantInteger32>(Instr->getArg(1))->getValue())) {

2911 Func->setError("Unexpected memory ordering for AtomicLoad");	2924 Func->setError("Unexpected memory ordering for AtomicLoad");

2912 return;	2925 return;

2913 }	2926 }

2914 Variable *Dest = Instr->getDest();	2927 Variable *Dest = Instr->getDest();

2915 if (Dest->getType() == IceType_i64) {	2928 if (Dest->getType() == IceType_i64) {

2916 // Follow what GCC does and use a movq instead of what lowerLoad()	2929 // Follow what GCC does and use a movq instead of what lowerLoad()

2917 // normally does (split the load into two).	2930 // normally does (split the load into two).

2918 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding	2931 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding

2919 // can't happen anyway, since this is x86-32 and integer arithmetic only	2932 // can't happen anyway, since this is x86-32 and integer arithmetic only

2920 // happens on 32-bit quantities.	2933 // happens on 32-bit quantities.

(...skipping 12 matching lines...) Expand all Loading...
2933 lowerLoad(Load);	2946 lowerLoad(Load);

2934 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.	2947 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.

2935 // Since lowerLoad may fuse the load w/ an arithmetic instruction,	2948 // Since lowerLoad may fuse the load w/ an arithmetic instruction,

2936 // insert the FakeUse on the last-inserted instruction's dest.	2949 // insert the FakeUse on the last-inserted instruction's dest.

2937 Context.insert(	2950 Context.insert(

2938 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	2951 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

2939 return;	2952 return;

2940 }	2953 }

2941 case Intrinsics::AtomicRMW:	2954 case Intrinsics::AtomicRMW:

2942 if (!Intrinsics::VerifyMemoryOrder(	2955 if (!Intrinsics::VerifyMemoryOrder(

2943 llvm::cast<ConstantInteger>(Instr->getArg(3))->getValue())) {	2956 llvm::cast<ConstantInteger32>(Instr->getArg(3))->getValue())) {

2944 Func->setError("Unexpected memory ordering for AtomicRMW");	2957 Func->setError("Unexpected memory ordering for AtomicRMW");

2945 return;	2958 return;

2946 }	2959 }

2947 lowerAtomicRMW(Instr->getDest(),	2960 lowerAtomicRMW(Instr->getDest(),

2948 static_cast<uint32_t>(llvm::cast<ConstantInteger>(	2961 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(

2949 Instr->getArg(0))->getValue()),	2962 Instr->getArg(0))->getValue()),

2950 Instr->getArg(1), Instr->getArg(2));	2963 Instr->getArg(1), Instr->getArg(2));

2951 return;	2964 return;

2952 case Intrinsics::AtomicStore: {	2965 case Intrinsics::AtomicStore: {

2953 if (!Intrinsics::VerifyMemoryOrder(	2966 if (!Intrinsics::VerifyMemoryOrder(

2954 llvm::cast<ConstantInteger>(Instr->getArg(2))->getValue())) {	2967 llvm::cast<ConstantInteger32>(Instr->getArg(2))->getValue())) {

2955 Func->setError("Unexpected memory ordering for AtomicStore");	2968 Func->setError("Unexpected memory ordering for AtomicStore");

2956 return;	2969 return;

2957 }	2970 }

2958 // We require the memory address to be naturally aligned.	2971 // We require the memory address to be naturally aligned.

2959 // Given that is the case, then normal stores are atomic.	2972 // Given that is the case, then normal stores are atomic.

2960 // Add a fence after the store to make it visible.	2973 // Add a fence after the store to make it visible.

2961 Operand *Value = Instr->getArg(0);	2974 Operand *Value = Instr->getArg(0);

2962 Operand *Ptr = Instr->getArg(1);	2975 Operand *Ptr = Instr->getArg(1);

2963 if (Value->getType() == IceType_i64) {	2976 if (Value->getType() == IceType_i64) {

2964 // Use a movq instead of what lowerStore() normally does	2977 // Use a movq instead of what lowerStore() normally does

(...skipping 27 matching lines...) Expand all Loading...
2992 _bswap(T_Hi);	3005 _bswap(T_Hi);

2993 _mov(DestLo, T_Hi);	3006 _mov(DestLo, T_Hi);

2994 _mov(DestHi, T_Lo);	3007 _mov(DestHi, T_Lo);

2995 } else if (Val->getType() == IceType_i32) {	3008 } else if (Val->getType() == IceType_i32) {

2996 Variable *T = legalizeToVar(Val);	3009 Variable *T = legalizeToVar(Val);

2997 _bswap(T);	3010 _bswap(T);

2998 _mov(Dest, T);	3011 _mov(Dest, T);

2999 } else {	3012 } else {

3000 assert(Val->getType() == IceType_i16);	3013 assert(Val->getType() == IceType_i16);

3001 Val = legalize(Val);	3014 Val = legalize(Val);

3002 Constant *Eight = Ctx->getConstantInt(IceType_i16, 8);	3015 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);

3003 Variable *T = NULL;	3016 Variable *T = NULL;

3004 _mov(T, Val);	3017 _mov(T, Val);

3005 _rol(T, Eight);	3018 _rol(T, Eight);

3006 _mov(Dest, T);	3019 _mov(Dest, T);

3007 }	3020 }

3008 return;	3021 return;

3009 }	3022 }

3010 case Intrinsics::Ctpop: {	3023 case Intrinsics::Ctpop: {

3011 Variable *Dest = Instr->getDest();	3024 Variable *Dest = Instr->getDest();

3012 Operand *Val = Instr->getArg(0);	3025 Operand *Val = Instr->getArg(0);

(...skipping 462 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3475 // bit position conversion, and the speculation is reversed.	3488 // bit position conversion, and the speculation is reversed.

3476 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);	3489 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);

3477 Variable *T = makeReg(IceType_i32);	3490 Variable *T = makeReg(IceType_i32);

3478 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);	3491 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);

3479 if (Cttz) {	3492 if (Cttz) {

3480 _bsf(T, FirstValRM);	3493 _bsf(T, FirstValRM);

3481 } else {	3494 } else {

3482 _bsr(T, FirstValRM);	3495 _bsr(T, FirstValRM);

3483 }	3496 }

3484 Variable *T_Dest = makeReg(IceType_i32);	3497 Variable *T_Dest = makeReg(IceType_i32);

3485 Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32);	3498 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);

3486 Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31);	3499 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);

3487 if (Cttz) {	3500 if (Cttz) {

3488 _mov(T_Dest, ThirtyTwo);	3501 _mov(T_Dest, ThirtyTwo);

3489 } else {	3502 } else {

3490 Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63);	3503 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);

3491 _mov(T_Dest, SixtyThree);	3504 _mov(T_Dest, SixtyThree);

3492 }	3505 }

3493 _cmov(T_Dest, T, InstX8632::Br_ne);	3506 _cmov(T_Dest, T, InstX8632::Br_ne);

3494 if (!Cttz) {	3507 if (!Cttz) {

3495 _xor(T_Dest, ThirtyOne);	3508 _xor(T_Dest, ThirtyOne);

3496 }	3509 }

3497 if (Ty == IceType_i32) {	3510 if (Ty == IceType_i32) {

3498 _mov(Dest, T_Dest);	3511 _mov(Dest, T_Dest);

3499 return;	3512 return;

3500 }	3513 }

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3609 if (Index == NULL)	3622 if (Index == NULL)

3610 return false;	3623 return false;

3611 const Inst *IndexInst = Index->getDefinition();	3624 const Inst *IndexInst = Index->getDefinition();

3612 if (IndexInst == NULL)	3625 if (IndexInst == NULL)

3613 return false;	3626 return false;

3614 if (IndexInst->getSrcSize() < 2)	3627 if (IndexInst->getSrcSize() < 2)

3615 return false;	3628 return false;

3616 if (const InstArithmetic *ArithInst =	3629 if (const InstArithmetic *ArithInst =

3617 llvm::dyn_cast<InstArithmetic>(IndexInst)) {	3630 llvm::dyn_cast<InstArithmetic>(IndexInst)) {

3618 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {	3631 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {

3619 if (ConstantInteger *Const =	3632 if (ConstantInteger32 *Const =

3620 llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1))) {	3633 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {

3621 if (ArithInst->getOp() == InstArithmetic::Mul &&	3634 if (ArithInst->getOp() == InstArithmetic::Mul &&

3622 !Var->getIsMultidef() && Const->getType() == IceType_i32) {	3635 !Var->getIsMultidef() && Const->getType() == IceType_i32) {

3623 uint64_t Mult = Const->getValue();	3636 uint64_t Mult = Const->getValue();

3624 uint32_t LogMult;	3637 uint32_t LogMult;

3625 switch (Mult) {	3638 switch (Mult) {

3626 case 1:	3639 case 1:

3627 LogMult = 0;	3640 LogMult = 0;

3628 break;	3641 break;

3629 case 2:	3642 case 2:

3630 LogMult = 1;	3643 LogMult = 1;

(...skipping 30 matching lines...) Expand all Loading...
3661 const Inst *BaseInst = Base->getDefinition();	3674 const Inst *BaseInst = Base->getDefinition();

3662 if (BaseInst == NULL)	3675 if (BaseInst == NULL)

3663 return false;	3676 return false;

3664 if (const InstArithmetic *ArithInst =	3677 if (const InstArithmetic *ArithInst =

3665 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {	3678 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {

3666 if (ArithInst->getOp() != InstArithmetic::Add &&	3679 if (ArithInst->getOp() != InstArithmetic::Add &&

3667 ArithInst->getOp() != InstArithmetic::Sub)	3680 ArithInst->getOp() != InstArithmetic::Sub)

3668 return false;	3681 return false;

3669 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;	3682 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;

3670 Variable *Var = NULL;	3683 Variable *Var = NULL;

3671 ConstantInteger *Const = NULL;	3684 ConstantInteger32 *Const = NULL;

3672 if (Variable *VariableOperand =	3685 if (Variable *VariableOperand =

3673 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {	3686 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {

3674 Var = VariableOperand;	3687 Var = VariableOperand;

3675 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(1));	3688 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));

3676 } else if (IsAdd) {	3689 } else if (IsAdd) {

3677 Const = llvm::dyn_cast<ConstantInteger>(ArithInst->getSrc(0));	3690 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));

3678 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));	3691 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));

3679 }	3692 }

3680 if (Var == NULL \|\| Const == NULL \|\| Var->getIsMultidef())	3693 if (Var == NULL \|\| Const == NULL \|\| Var->getIsMultidef())

3681 return false;	3694 return false;

3682 Base = Var;	3695 Base = Var;

3683 Offset += IsAdd ? Const->getValue() : -Const->getValue();	3696 Offset += IsAdd ? Const->getValue() : -Const->getValue();

3684 Reason = BaseInst;	3697 Reason = BaseInst;

3685 return true;	3698 return true;

3686 }	3699 }

3687 return false;	3700 return false;

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3805 // Vanilla ICE load instructions should not use the segment registers,	3818 // Vanilla ICE load instructions should not use the segment registers,

3806 // and computeAddressOpt only works at the level of Variables and Constants,	3819 // and computeAddressOpt only works at the level of Variables and Constants,

3807 // not other OperandX8632Mem, so there should be no mention of segment	3820 // not other OperandX8632Mem, so there should be no mention of segment

3808 // registers there either.	3821 // registers there either.

3809 const OperandX8632Mem::SegmentRegisters SegmentReg =	3822 const OperandX8632Mem::SegmentRegisters SegmentReg =

3810 OperandX8632Mem::DefaultSegment;	3823 OperandX8632Mem::DefaultSegment;

3811 Variable *Base = llvm::dyn_cast<Variable>(Addr);	3824 Variable *Base = llvm::dyn_cast<Variable>(Addr);

3812 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	3825 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

3813 if (Base && Addr != Base) {	3826 if (Base && Addr != Base) {

3814 Inst->setDeleted();	3827 Inst->setDeleted();

3815 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);	3828 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);

3816 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,	3829 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,

3817 Shift, SegmentReg);	3830 Shift, SegmentReg);

3818 Context.insert(InstLoad::create(Func, Dest, Addr));	3831 Context.insert(InstLoad::create(Func, Dest, Addr));

3819 }	3832 }

3820 }	3833 }

3821	3834

3822 void TargetX8632::randomlyInsertNop(float Probability) {	3835 void TargetX8632::randomlyInsertNop(float Probability) {

3823 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());	3836 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

3824 if (RNG.getTrueWithProbability(Probability)) {	3837 if (RNG.getTrueWithProbability(Probability)) {

3825 _nop(RNG.next(X86_NUM_NOP_VARIANTS));	3838 _nop(RNG.next(X86_NUM_NOP_VARIANTS));

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3872 if (InstructionSet >= SSE4_1) {	3885 if (InstructionSet >= SSE4_1) {

3873 // TODO(wala): If the condition operand is a constant, use blendps	3886 // TODO(wala): If the condition operand is a constant, use blendps

3874 // or pblendw.	3887 // or pblendw.

3875 //	3888 //

3876 // Use blendvps or pblendvb to implement select.	3889 // Use blendvps or pblendvb to implement select.

3877 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3890 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3878 SrcTy == IceType_v4f32) {	3891 SrcTy == IceType_v4f32) {

3879 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3892 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3880 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);	3893 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);

3881 _movp(xmm0, ConditionRM);	3894 _movp(xmm0, ConditionRM);

3882 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));	3895 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));

3883 _movp(T, SrcFRM);	3896 _movp(T, SrcFRM);

3884 _blendvps(T, SrcTRM, xmm0);	3897 _blendvps(T, SrcTRM, xmm0);

3885 _movp(Dest, T);	3898 _movp(Dest, T);

3886 } else {	3899 } else {

3887 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3900 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3888 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3901 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3889 : IceType_v16i8;	3902 : IceType_v16i8;

3890 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);	3903 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);

3891 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3904 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3892 _movp(T, SrcFRM);	3905 _movp(T, SrcFRM);

(...skipping 94 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3987 Variable *Base = llvm::dyn_cast<Variable>(Addr);	4000 Variable *Base = llvm::dyn_cast<Variable>(Addr);

3988 // Vanilla ICE store instructions should not use the segment registers,	4001 // Vanilla ICE store instructions should not use the segment registers,

3989 // and computeAddressOpt only works at the level of Variables and Constants,	4002 // and computeAddressOpt only works at the level of Variables and Constants,

3990 // not other OperandX8632Mem, so there should be no mention of segment	4003 // not other OperandX8632Mem, so there should be no mention of segment

3991 // registers there either.	4004 // registers there either.

3992 const OperandX8632Mem::SegmentRegisters SegmentReg =	4005 const OperandX8632Mem::SegmentRegisters SegmentReg =

3993 OperandX8632Mem::DefaultSegment;	4006 OperandX8632Mem::DefaultSegment;

3994 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	4007 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

3995 if (Base && Addr != Base) {	4008 if (Base && Addr != Base) {

3996 Inst->setDeleted();	4009 Inst->setDeleted();

3997 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset);	4010 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);

3998 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,	4011 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,

3999 Shift, SegmentReg);	4012 Shift, SegmentReg);

4000 Context.insert(InstStore::create(Func, Data, Addr));	4013 Context.insert(InstStore::create(Func, Data, Addr));

4001 }	4014 }

4002 }	4015 }

4003	4016

4004 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {	4017 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

4005 // This implements the most naive possible lowering.	4018 // This implements the most naive possible lowering.

4006 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default	4019 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

4007 Operand *Src0 = Inst->getComparison();	4020 Operand *Src0 = Inst->getComparison();

4008 SizeT NumCases = Inst->getNumCases();	4021 SizeT NumCases = Inst->getNumCases();

4009 // OK, we'll be slightly less naive by forcing Src into a physical	4022 // OK, we'll be slightly less naive by forcing Src into a physical

4010 // register if there are 2 or more uses.	4023 // register if there are 2 or more uses.

4011 if (NumCases >= 2)	4024 if (NumCases >= 2)

4012 Src0 = legalizeToVar(Src0, true);	4025 Src0 = legalizeToVar(Src0, true);

4013 else	4026 else

4014 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem, true);	4027 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem, true);

4015 for (SizeT I = 0; I < NumCases; ++I) {	4028 for (SizeT I = 0; I < NumCases; ++I) {

4016 // TODO(stichnot): Correct lowering for IceType_i64.	4029 // TODO(stichnot): Correct lowering for IceType_i64.

4017 Constant *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));	4030 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));

4018 _cmp(Src0, Value);	4031 _cmp(Src0, Value);

4019 _br(InstX8632Br::Br_e, Inst->getLabel(I));	4032 _br(InstX8632Br::Br_e, Inst->getLabel(I));

4020 }	4033 }

4021	4034

4022 _br(Inst->getLabelDefault());	4035 _br(Inst->getLabelDefault());

4023 }	4036 }

4024	4037

4025 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,	4038 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

4026 Variable Dest, Operand Src0,	4039 Variable Dest, Operand Src0,

4027 Operand *Src1) {	4040 Operand *Src1) {

4028 assert(isVectorType(Dest->getType()));	4041 assert(isVectorType(Dest->getType()));

4029 Type Ty = Dest->getType();	4042 Type Ty = Dest->getType();

4030 Type ElementTy = typeElementType(Ty);	4043 Type ElementTy = typeElementType(Ty);

4031 SizeT NumElements = typeNumElements(Ty);	4044 SizeT NumElements = typeNumElements(Ty);

4032	4045

4033 Operand *T = Ctx->getConstantUndef(Ty);	4046 Operand *T = Ctx->getConstantUndef(Ty);

4034 for (SizeT I = 0; I < NumElements; ++I) {	4047 for (SizeT I = 0; I < NumElements; ++I) {

4035 Constant *Index = Ctx->getConstantInt(IceType_i32, I);	4048 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);

4036	4049

4037 // Extract the next two inputs.	4050 // Extract the next two inputs.

4038 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());	4051 Variable *Op0 = Func->makeVariable(ElementTy, Context.getNode());

4039 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));	4052 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));

4040 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());	4053 Variable *Op1 = Func->makeVariable(ElementTy, Context.getNode());

4041 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));	4054 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));

4042	4055

4043 // Perform the arithmetic as a scalar operation.	4056 // Perform the arithmetic as a scalar operation.

4044 Variable *Res = Func->makeVariable(ElementTy, Context.getNode());	4057 Variable *Res = Func->makeVariable(ElementTy, Context.getNode());

4045 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));	4058 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4114 _psub(Dest, MinusOne);	4127 _psub(Dest, MinusOne);

4115 return Dest;	4128 return Dest;

4116 }	4129 }

4117	4130

4118 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {	4131 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

4119 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|	4132 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

4120 Ty == IceType_v16i8);	4133 Ty == IceType_v16i8);

4121 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {	4134 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

4122 Variable *Reg = makeVectorOfOnes(Ty, RegNum);	4135 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

4123 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;	4136 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

4124 _psll(Reg, Ctx->getConstantInt(IceType_i8, Shift));	4137 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));

4125 return Reg;	4138 return Reg;

4126 } else {	4139 } else {

4127 // SSE has no left shift operation for vectors of 8 bit integers.	4140 // SSE has no left shift operation for vectors of 8 bit integers.

4128 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	4141 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

4129 Constant *ConstantMask =	4142 Constant *ConstantMask =

4130 Ctx->getConstantInt(IceType_i32, HIGH_ORDER_BITS_MASK);	4143 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);

4131 Variable *Reg = makeReg(Ty, RegNum);	4144 Variable *Reg = makeReg(Ty, RegNum);

4132 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	4145 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

4133 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	4146 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

4134 return Reg;	4147 return Reg;

4135 }	4148 }

4136 }	4149 }

4137	4150

4138 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,	4151 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

4139 Variable *Slot,	4152 Variable *Slot,

4140 uint32_t Offset) {	4153 uint32_t Offset) {

4141 // Ensure that Loc is a stack slot.	4154 // Ensure that Loc is a stack slot.

4142 assert(Slot->getWeight() == RegWeight::Zero);	4155 assert(Slot->getWeight() == RegWeight::Zero);

4143 assert(Slot->getRegNum() == Variable::NoRegister);	4156 assert(Slot->getRegNum() == Variable::NoRegister);

4144 // Compute the location of Loc in memory.	4157 // Compute the location of Loc in memory.

4145 // TODO(wala,stichnot): lea should not be required. The address of	4158 // TODO(wala,stichnot): lea should not be required. The address of

4146 // the stack slot is known at compile time (although not until after	4159 // the stack slot is known at compile time (although not until after

4147 // addProlog()).	4160 // addProlog()).

4148 const Type PointerType = IceType_i32;	4161 const Type PointerType = IceType_i32;

4149 Variable *Loc = makeReg(PointerType);	4162 Variable *Loc = makeReg(PointerType);

4150 _lea(Loc, Slot);	4163 _lea(Loc, Slot);

4151 Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);	4164 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);

4152 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);	4165 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);

4153 }	4166 }

4154	4167

4155 // Helper for legalize() to emit the right code to lower an operand to a	4168 // Helper for legalize() to emit the right code to lower an operand to a

4156 // register of the appropriate type.	4169 // register of the appropriate type.

4157 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {	4170 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {

4158 Type Ty = Src->getType();	4171 Type Ty = Src->getType();

4159 Variable *Reg = makeReg(Ty, RegNum);	4172 Variable *Reg = makeReg(Ty, RegNum);

4160 if (isVectorType(Ty)) {	4173 if (isVectorType(Ty)) {

4161 _movp(Reg, Src);	4174 _movp(Reg, Src);

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4271 OperandX8632Mem TargetX8632::FormMemoryOperand(Operand Operand, Type Ty) {	4284 OperandX8632Mem TargetX8632::FormMemoryOperand(Operand Operand, Type Ty) {

4272 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);	4285 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand);

4273 // It may be the case that address mode optimization already creates	4286 // It may be the case that address mode optimization already creates

4274 // an OperandX8632Mem, so in that case it wouldn't need another level	4287 // an OperandX8632Mem, so in that case it wouldn't need another level

4275 // of transformation.	4288 // of transformation.

4276 if (!Mem) {	4289 if (!Mem) {

4277 Variable *Base = llvm::dyn_cast<Variable>(Operand);	4290 Variable *Base = llvm::dyn_cast<Variable>(Operand);

4278 Constant *Offset = llvm::dyn_cast<Constant>(Operand);	4291 Constant *Offset = llvm::dyn_cast<Constant>(Operand);

4279 assert(Base \|\| Offset);	4292 assert(Base \|\| Offset);

4280 if (Offset) {	4293 if (Offset) {

4281 assert(llvm::isa<ConstantInteger>(Offset) \|\|	4294 assert(llvm::isa<ConstantInteger32>(Offset) \|\|

4282 llvm::isa<ConstantRelocatable>(Offset));	4295 llvm::isa<ConstantRelocatable>(Offset));

4283 }	4296 }

4284 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);	4297 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);

4285 }	4298 }

4286 return llvm::cast<OperandX8632Mem>(legalize(Mem));	4299 return llvm::cast<OperandX8632Mem>(legalize(Mem));

4287 }	4300 }

4288	4301

4289 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {	4302 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

4290 // There aren't any 64-bit integer registers for x86-32.	4303 // There aren't any 64-bit integer registers for x86-32.

4291 assert(Type != IceType_i64);	4304 assert(Type != IceType_i64);

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4395 if (WhiteList[RegNum])	4408 if (WhiteList[RegNum])

4396 FreedRegisters[RegNum] = true;	4409 FreedRegisters[RegNum] = true;

4397 }	4410 }

4398 }	4411 }

4399 }	4412 }

4400 }	4413 }

4401 AvailableRegisters \|= FreedRegisters;	4414 AvailableRegisters \|= FreedRegisters;

4402 }	4415 }

4403 }	4416 }

4404	4417

4405 template <> void ConstantInteger::emit(GlobalContext *Ctx) const {	4418 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {

4406 Ostream &Str = Ctx->getStrEmit();	4419 Ostream &Str = Ctx->getStrEmit();

4407 Str << (int64_t) getValue();	4420 Str << (int32_t)getValue();

	4421 }

	4422

	4423 template <> void ConstantInteger64::emit(GlobalContext *) const {

	4424 llvm_unreachable("Not expecting to emit 64-bit integers");

4408 }	4425 }

4409	4426

4410 template <> void ConstantFloat::emit(GlobalContext *Ctx) const {	4427 template <> void ConstantFloat::emit(GlobalContext *Ctx) const {

4411 Ostream &Str = Ctx->getStrEmit();	4428 Ostream &Str = Ctx->getStrEmit();

4412 // It would be better to prefix with ".L$" instead of "L$", but	4429 // It would be better to prefix with ".L$" instead of "L$", but

4413 // llvm-mc doesn't parse "dword ptr [.L$foo]".	4430 // llvm-mc doesn't parse "dword ptr [.L$foo]".

4414 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";	4431 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]";

4415 }	4432 }

4416	4433

4417 template <> void ConstantDouble::emit(GlobalContext *Ctx) const {	4434 template <> void ConstantDouble::emit(GlobalContext *Ctx) const {

(...skipping 95 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4513 Str << "\t.align\t" << Align << "\n";	4530 Str << "\t.align\t" << Align << "\n";

4514 Str << MangledName << ":\n";	4531 Str << MangledName << ":\n";

4515 for (SizeT i = 0; i < Size; ++i) {	4532 for (SizeT i = 0; i < Size; ++i) {

4516 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4533 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

4517 }	4534 }

4518 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4535 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4519 }	4536 }

4520 }	4537 }

4521	4538

4522 } // end of namespace Ice	4539 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceUtils.h » ('j') | no next file with comments »