src/IceTargetLoweringX8632.cpp - Issue 737513008: Subzero: Simplify the constant pools.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 737513008: Subzero: Simplify the constant pools. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Better fix for the int8/uint8 tests Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

(...skipping 600 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
611 return;	611 return;

612 }	612 }

613 if (isVectorType(Ty)) {	613 if (isVectorType(Ty)) {

614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);	614 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);

615 }	615 }

616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);	616 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);	617 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

618 if (Arg->hasReg()) {	618 if (Arg->hasReg()) {

619 assert(Ty != IceType_i64);	619 assert(Ty != IceType_i64);

620 OperandX8632Mem *Mem = OperandX8632Mem::create(	620 OperandX8632Mem *Mem = OperandX8632Mem::create(

621 Func, Ty, FramePtr,	621 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));

622 Ctx->getConstantInt32(IceType_i32, Arg->getStackOffset()));

623 if (isVectorType(Arg->getType())) {	622 if (isVectorType(Arg->getType())) {

624 _movp(Arg, Mem);	623 _movp(Arg, Mem);

625 } else {	624 } else {

626 _mov(Arg, Mem);	625 _mov(Arg, Mem);

627 }	626 }

628 // This argument-copying instruction uses an explicit	627 // This argument-copying instruction uses an explicit

629 // OperandX8632Mem operand instead of a Variable, so its	628 // OperandX8632Mem operand instead of a Variable, so its

630 // fill-from-stack operation has to be tracked separately for	629 // fill-from-stack operation has to be tracked separately for

631 // statistics.	630 // statistics.

632 Ctx->statsUpdateFills();	631 Ctx->statsUpdateFills();

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
832 // Align esp if necessary.	831 // Align esp if necessary.

833 if (NeedsStackAlignment) {	832 if (NeedsStackAlignment) {

834 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;	833 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

835 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	834 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

836 SpillAreaSizeBytes = StackSize - StackOffset;	835 SpillAreaSizeBytes = StackSize - StackOffset;

837 }	836 }

838	837

839 // Generate "sub esp, SpillAreaSizeBytes"	838 // Generate "sub esp, SpillAreaSizeBytes"

840 if (SpillAreaSizeBytes)	839 if (SpillAreaSizeBytes)

841 _sub(getPhysicalRegister(RegX8632::Reg_esp),	840 _sub(getPhysicalRegister(RegX8632::Reg_esp),

842 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));	841 Ctx->getConstantInt32(SpillAreaSizeBytes));

843 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);	842 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

844	843

845 resetStackAdjustment();	844 resetStackAdjustment();

846	845

847 // Fill in stack offsets for stack args, and copy args into registers	846 // Fill in stack offsets for stack args, and copy args into registers

848 // for those that were register-allocated. Args are pushed right to	847 // for those that were register-allocated. Args are pushed right to

849 // left, so Arg[0] is closest to the stack/frame pointer.	848 // left, so Arg[0] is closest to the stack/frame pointer.

850 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	849 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

851 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	850 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

852 if (!IsEbpBasedFrame)	851 if (!IsEbpBasedFrame)

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
944 Context.setInsertPoint(InsertPoint);	943 Context.setInsertPoint(InsertPoint);

945	944

946 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);	945 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

947 if (IsEbpBasedFrame) {	946 if (IsEbpBasedFrame) {

948 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);	947 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);

949 _mov(esp, ebp);	948 _mov(esp, ebp);

950 _pop(ebp);	949 _pop(ebp);

951 } else {	950 } else {

952 // add esp, SpillAreaSizeBytes	951 // add esp, SpillAreaSizeBytes

953 if (SpillAreaSizeBytes)	952 if (SpillAreaSizeBytes)

954 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));	953 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

955 }	954 }

956	955

957 // Add pop instructions for preserved registers.	956 // Add pop instructions for preserved registers.

958 llvm::SmallBitVector CalleeSaves =	957 llvm::SmallBitVector CalleeSaves =

959 getRegisterSet(RegSet_CalleeSave, RegSet_None);	958 getRegisterSet(RegSet_CalleeSave, RegSet_None);

960 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	959 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

961 SizeT j = CalleeSaves.size() - i - 1;	960 SizeT j = CalleeSaves.size() - i - 1;

962 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)	961 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)

963 continue;	962 continue;

964 if (CalleeSaves[j] && RegsUsed[j]) {	963 if (CalleeSaves[j] && RegsUsed[j]) {

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1062	1061

1063 Operand TargetX8632::loOperand(Operand Operand) {	1062 Operand TargetX8632::loOperand(Operand Operand) {

1064 assert(Operand->getType() == IceType_i64);	1063 assert(Operand->getType() == IceType_i64);

1065 if (Operand->getType() != IceType_i64)	1064 if (Operand->getType() != IceType_i64)

1066 return Operand;	1065 return Operand;

1067 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1066 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1068 split64(Var);	1067 split64(Var);

1069 return Var->getLo();	1068 return Var->getLo();

1070 }	1069 }

1071 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	1070 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1072 return Ctx->getConstantInt32(IceType_i32,	1071 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));

1073 static_cast<uint32_t>(Const->getValue()));

1074 }	1072 }

1075 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {	1073 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1076 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),	1074 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),

1077 Mem->getOffset(), Mem->getIndex(),	1075 Mem->getOffset(), Mem->getIndex(),

1078 Mem->getShift(), Mem->getSegmentRegister());	1076 Mem->getShift(), Mem->getSegmentRegister());

1079 }	1077 }

1080 llvm_unreachable("Unsupported operand type");	1078 llvm_unreachable("Unsupported operand type");

1081 return NULL;	1079 return NULL;

1082 }	1080 }

1083	1081

1084 Operand TargetX8632::hiOperand(Operand Operand) {	1082 Operand TargetX8632::hiOperand(Operand Operand) {

1085 assert(Operand->getType() == IceType_i64);	1083 assert(Operand->getType() == IceType_i64);

1086 if (Operand->getType() != IceType_i64)	1084 if (Operand->getType() != IceType_i64)

1087 return Operand;	1085 return Operand;

1088 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1086 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1089 split64(Var);	1087 split64(Var);

1090 return Var->getHi();	1088 return Var->getHi();

1091 }	1089 }

1092 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	1090 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1093 return Ctx->getConstantInt32(	1091 return Ctx->getConstantInt32(

1094 IceType_i32, static_cast<uint32_t>(Const->getValue() >> 32));	1092 static_cast<uint32_t>(Const->getValue() >> 32));

1095 }	1093 }

1096 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {	1094 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1097 Constant *Offset = Mem->getOffset();	1095 Constant *Offset = Mem->getOffset();

1098 if (Offset == NULL)	1096 if (Offset == NULL) {

1099 Offset = Ctx->getConstantInt32(IceType_i32, 4);	1097 Offset = Ctx->getConstantInt32(4);

1100 else if (ConstantInteger32 *IntOffset =	1098 } else if (ConstantInteger32 *IntOffset =

1101 llvm::dyn_cast<ConstantInteger32>(Offset)) {	1099 llvm::dyn_cast<ConstantInteger32>(Offset)) {

1102 Offset = Ctx->getConstantInt32(IceType_i32, 4 + IntOffset->getValue());	1100 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());

1103 } else if (ConstantRelocatable *SymOffset =	1101 } else if (ConstantRelocatable *SymOffset =

1104 llvm::dyn_cast<ConstantRelocatable>(Offset)) {	1102 llvm::dyn_cast<ConstantRelocatable>(Offset)) {

1105 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));	1103 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));

1106 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),	1104 Offset =

1107 SymOffset->getName());	1105 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),

	1106 SymOffset->getSuppressMangling());

1108 }	1107 }

1109 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,	1108 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

1110 Mem->getIndex(), Mem->getShift(),	1109 Mem->getIndex(), Mem->getShift(),

1111 Mem->getSegmentRegister());	1110 Mem->getSegmentRegister());

1112 }	1111 }

1113 llvm_unreachable("Unsupported operand type");	1112 llvm_unreachable("Unsupported operand type");

1114 return NULL;	1113 return NULL;

1115 }	1114 }

1116	1115

1117 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,	1116 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1161 // For default align=0, set it to the real value 1, to avoid any	1160 // For default align=0, set it to the real value 1, to avoid any

1162 // bit-manipulation problems below.	1161 // bit-manipulation problems below.

1163 AlignmentParam = std::max(AlignmentParam, 1u);	1162 AlignmentParam = std::max(AlignmentParam, 1u);

1164	1163

1165 // LLVM enforces power of 2 alignment.	1164 // LLVM enforces power of 2 alignment.

1166 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1165 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

1167 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1166 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

1168	1167

1169 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1168 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

1170 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1169 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

1171 _and(esp, Ctx->getConstantInt32(IceType_i32, -Alignment));	1170 _and(esp, Ctx->getConstantInt32(-Alignment));

1172 }	1171 }

1173 if (ConstantInteger32 *ConstantTotalSize =	1172 if (ConstantInteger32 *ConstantTotalSize =

1174 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	1173 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

1175 uint32_t Value = ConstantTotalSize->getValue();	1174 uint32_t Value = ConstantTotalSize->getValue();

1176 Value = applyAlignment(Value, Alignment);	1175 Value = applyAlignment(Value, Alignment);

1177 _sub(esp, Ctx->getConstantInt32(IceType_i32, Value));	1176 _sub(esp, Ctx->getConstantInt32(Value));

1178 } else {	1177 } else {

1179 // Non-constant sizes need to be adjusted to the next highest	1178 // Non-constant sizes need to be adjusted to the next highest

1180 // multiple of the required alignment at runtime.	1179 // multiple of the required alignment at runtime.

1181 Variable *T = makeReg(IceType_i32);	1180 Variable *T = makeReg(IceType_i32);

1182 _mov(T, TotalSize);	1181 _mov(T, TotalSize);

1183 _add(T, Ctx->getConstantInt32(IceType_i32, Alignment - 1));	1182 _add(T, Ctx->getConstantInt32(Alignment - 1));

1184 _and(T, Ctx->getConstantInt32(IceType_i32, -Alignment));	1183 _and(T, Ctx->getConstantInt32(-Alignment));

1185 _sub(esp, T);	1184 _sub(esp, T);

1186 }	1185 }

1187 _mov(Dest, esp);	1186 _mov(Dest, esp);

1188 }	1187 }

1189	1188

1190 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1189 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1191 Variable *Dest = Inst->getDest();	1190 Variable *Dest = Inst->getDest();

1192 Operand *Src0 = legalize(Inst->getSrc(0));	1191 Operand *Src0 = legalize(Inst->getSrc(0));

1193 Operand *Src1 = legalize(Inst->getSrc(1));	1192 Operand *Src1 = legalize(Inst->getSrc(1));

1194 if (Dest->getType() == IceType_i64) {	1193 if (Dest->getType() == IceType_i64) {

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1284 // t2 = shl t2, t1	1283 // t2 = shl t2, t1

1285 // test t1, 0x20	1284 // test t1, 0x20

1286 // je L1	1285 // je L1

1287 // use(t3)	1286 // use(t3)

1288 // t3 = t2	1287 // t3 = t2

1289 // t2 = 0	1288 // t2 = 0

1290 // L1:	1289 // L1:

1291 // a.lo = t2	1290 // a.lo = t2

1292 // a.hi = t3	1291 // a.hi = t3

1293 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1292 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1294 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1293 Constant *BitTest = Ctx->getConstantInt32(0x20);

1295 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1294 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1296 InstX8632Label *Label = InstX8632Label::create(Func, this);	1295 InstX8632Label *Label = InstX8632Label::create(Func, this);

1297 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);	1296 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1298 _mov(T_2, Src0Lo);	1297 _mov(T_2, Src0Lo);

1299 _mov(T_3, Src0Hi);	1298 _mov(T_3, Src0Hi);

1300 _shld(T_3, T_2, T_1);	1299 _shld(T_3, T_2, T_1);

1301 _shl(T_2, T_1);	1300 _shl(T_2, T_1);

1302 _test(T_1, BitTest);	1301 _test(T_1, BitTest);

1303 _br(CondX86::Br_e, Label);	1302 _br(CondX86::Br_e, Label);

1304 // T_2 and T_3 are being assigned again because of the	1303 // T_2 and T_3 are being assigned again because of the

(...skipping 14 matching lines...) Expand all Loading...
1319 // t3 = shr t3, t1	1318 // t3 = shr t3, t1

1320 // test t1, 0x20	1319 // test t1, 0x20

1321 // je L1	1320 // je L1

1322 // use(t2)	1321 // use(t2)

1323 // t2 = t3	1322 // t2 = t3

1324 // t3 = 0	1323 // t3 = 0

1325 // L1:	1324 // L1:

1326 // a.lo = t2	1325 // a.lo = t2

1327 // a.hi = t3	1326 // a.hi = t3

1328 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1327 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1329 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1328 Constant *BitTest = Ctx->getConstantInt32(0x20);

1330 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1329 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1331 InstX8632Label *Label = InstX8632Label::create(Func, this);	1330 InstX8632Label *Label = InstX8632Label::create(Func, this);

1332 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);	1331 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1333 _mov(T_2, Src0Lo);	1332 _mov(T_2, Src0Lo);

1334 _mov(T_3, Src0Hi);	1333 _mov(T_3, Src0Hi);

1335 _shrd(T_2, T_3, T_1);	1334 _shrd(T_2, T_3, T_1);

1336 _shr(T_3, T_1);	1335 _shr(T_3, T_1);

1337 _test(T_1, BitTest);	1336 _test(T_1, BitTest);

1338 _br(CondX86::Br_e, Label);	1337 _br(CondX86::Br_e, Label);

1339 // T_2 and T_3 are being assigned again because of the	1338 // T_2 and T_3 are being assigned again because of the

(...skipping 14 matching lines...) Expand all Loading...
1354 // t3 = sar t3, t1	1353 // t3 = sar t3, t1

1355 // test t1, 0x20	1354 // test t1, 0x20

1356 // je L1	1355 // je L1

1357 // use(t2)	1356 // use(t2)

1358 // t2 = t3	1357 // t2 = t3

1359 // t3 = sar t3, 0x1f	1358 // t3 = sar t3, 0x1f

1360 // L1:	1359 // L1:

1361 // a.lo = t2	1360 // a.lo = t2

1362 // a.hi = t3	1361 // a.hi = t3

1363 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1362 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1364 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1363 Constant *BitTest = Ctx->getConstantInt32(0x20);

1365 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);	1364 Constant *SignExtend = Ctx->getConstantInt32(0x1f);

1366 InstX8632Label *Label = InstX8632Label::create(Func, this);	1365 InstX8632Label *Label = InstX8632Label::create(Func, this);

1367 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);	1366 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1368 _mov(T_2, Src0Lo);	1367 _mov(T_2, Src0Lo);

1369 _mov(T_3, Src0Hi);	1368 _mov(T_3, Src0Hi);

1370 _shrd(T_2, T_3, T_1);	1369 _shrd(T_2, T_3, T_1);

1371 _sar(T_3, T_1);	1370 _sar(T_3, T_1);

1372 _test(T_1, BitTest);	1371 _test(T_1, BitTest);

1373 _br(CondX86::Br_e, Label);	1372 _br(CondX86::Br_e, Label);

1374 // T_2 and T_3 are being assigned again because of the	1373 // T_2 and T_3 are being assigned again because of the

1375 // intra-block control flow, so T_2 needs the _mov_nonkillable	1374 // intra-block control flow, so T_2 needs the _mov_nonkillable

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1476 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}	1475 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1477 // pmuludq T2, T3	1476 // pmuludq T2, T3

1478 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}	1477 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

1479 // shufps T1, T2, {0,2,0,2}	1478 // shufps T1, T2, {0,2,0,2}

1480 // pshufd T4, T1, {0,2,1,3}	1479 // pshufd T4, T1, {0,2,1,3}

1481 // movups Dest, T4	1480 // movups Dest, T4

1482	1481

1483 // Mask that directs pshufd to create a vector with entries	1482 // Mask that directs pshufd to create a vector with entries

1484 // Src[1, 0, 3, 0]	1483 // Src[1, 0, 3, 0]

1485 const unsigned Constant1030 = 0x31;	1484 const unsigned Constant1030 = 0x31;

1486 Constant *Mask1030 = Ctx->getConstantInt32(IceType_i8, Constant1030);	1485 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);

1487 // Mask that directs shufps to create a vector with entries	1486 // Mask that directs shufps to create a vector with entries

1488 // Dest[0, 2], Src[0, 2]	1487 // Dest[0, 2], Src[0, 2]

1489 const unsigned Mask0202 = 0x88;	1488 const unsigned Mask0202 = 0x88;

1490 // Mask that directs pshufd to create a vector with entries	1489 // Mask that directs pshufd to create a vector with entries

1491 // Src[0, 2, 1, 3]	1490 // Src[0, 2, 1, 3]

1492 const unsigned Mask0213 = 0xd8;	1491 const unsigned Mask0213 = 0xd8;

1493 Variable *T1 = makeReg(IceType_v4i32);	1492 Variable *T1 = makeReg(IceType_v4i32);

1494 Variable *T2 = makeReg(IceType_v4i32);	1493 Variable *T2 = makeReg(IceType_v4i32);

1495 Variable *T3 = makeReg(IceType_v4i32);	1494 Variable *T3 = makeReg(IceType_v4i32);

1496 Variable *T4 = makeReg(IceType_v4i32);	1495 Variable *T4 = makeReg(IceType_v4i32);

1497 _movp(T1, Src0);	1496 _movp(T1, Src0);

1498 _pshufd(T2, Src0, Mask1030);	1497 _pshufd(T2, Src0, Mask1030);

1499 _pshufd(T3, Src1, Mask1030);	1498 _pshufd(T3, Src1, Mask1030);

1500 _pmuludq(T1, Src1);	1499 _pmuludq(T1, Src1);

1501 _pmuludq(T2, T3);	1500 _pmuludq(T2, T3);

1502 _shufps(T1, T2, Ctx->getConstantInt32(IceType_i8, Mask0202));	1501 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

1503 _pshufd(T4, T1, Ctx->getConstantInt32(IceType_i8, Mask0213));	1502 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));

1504 _movp(Dest, T4);	1503 _movp(Dest, T4);

1505 } else {	1504 } else {

1506 assert(Dest->getType() == IceType_v16i8);	1505 assert(Dest->getType() == IceType_v16i8);

1507 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);	1506 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1508 }	1507 }

1509 } break;	1508 } break;

1510 case InstArithmetic::Shl:	1509 case InstArithmetic::Shl:

1511 case InstArithmetic::Lshr:	1510 case InstArithmetic::Lshr:

1512 case InstArithmetic::Ashr:	1511 case InstArithmetic::Ashr:

1513 case InstArithmetic::Udiv:	1512 case InstArithmetic::Udiv:

(...skipping 274 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1788 // The PNaCl ABI requires the width of arguments to be at least 32 bits.	1787 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

1789 assert(typeWidthInBytes(Ty) >= 4);	1788 assert(typeWidthInBytes(Ty) >= 4);

1790 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {	1789 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {

1791 XmmArgs.push_back(Arg);	1790 XmmArgs.push_back(Arg);

1792 } else {	1791 } else {

1793 StackArgs.push_back(Arg);	1792 StackArgs.push_back(Arg);

1794 if (isVectorType(Arg->getType())) {	1793 if (isVectorType(Arg->getType())) {

1795 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	1794 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

1796 }	1795 }

1797 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);	1796 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

1798 Constant *Loc =	1797 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

1799 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);

1800 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));	1798 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));

1801 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());	1799 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

1802 }	1800 }

1803 }	1801 }

1804	1802

1805 // Adjust the parameter area so that the stack is aligned. It is	1803 // Adjust the parameter area so that the stack is aligned. It is

1806 // assumed that the stack is already aligned at the start of the	1804 // assumed that the stack is already aligned at the start of the

1807 // calling sequence.	1805 // calling sequence.

1808 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	1806 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

1809	1807

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1881 Operand *CallTarget = legalize(Instr->getCallTarget());	1879 Operand *CallTarget = legalize(Instr->getCallTarget());

1882 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);	1880 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);

1883 Context.insert(NewCall);	1881 Context.insert(NewCall);

1884 if (ReturnRegHi)	1882 if (ReturnRegHi)

1885 Context.insert(InstFakeDef::create(Func, ReturnRegHi));	1883 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

1886	1884

1887 // Add the appropriate offset to esp. The call instruction takes care	1885 // Add the appropriate offset to esp. The call instruction takes care

1888 // of resetting the stack offset during emission.	1886 // of resetting the stack offset during emission.

1889 if (ParameterAreaSizeBytes) {	1887 if (ParameterAreaSizeBytes) {

1890 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);	1888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

1891 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));	1889 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

1892 }	1890 }

1893	1891

1894 // Insert a register-kill pseudo instruction.	1892 // Insert a register-kill pseudo instruction.

1895 Context.insert(InstFakeKill::create(Func, NewCall));	1893 Context.insert(InstFakeKill::create(Func, NewCall));

1896	1894

1897 // Generate a FakeUse to keep the call live if necessary.	1895 // Generate a FakeUse to keep the call live if necessary.

1898 if (Instr->hasSideEffects() && ReturnReg) {	1896 if (Instr->hasSideEffects() && ReturnReg) {

1899 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);	1897 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

1900 Context.insert(FakeUse);	1898 Context.insert(FakeUse);

1901 }	1899 }

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1958 Variable *T = makeReg(DestTy);	1956 Variable *T = makeReg(DestTy);

1959 _movp(T, Src0RM);	1957 _movp(T, Src0RM);

1960 _pand(T, OneMask);	1958 _pand(T, OneMask);

1961 Variable *Zeros = makeVectorOfZeros(Dest->getType());	1959 Variable *Zeros = makeVectorOfZeros(Dest->getType());

1962 _pcmpgt(T, Zeros);	1960 _pcmpgt(T, Zeros);

1963 _movp(Dest, T);	1961 _movp(Dest, T);

1964 } else {	1962 } else {

1965 // width = width(elty) - 1; dest = (src << width) >> width	1963 // width = width(elty) - 1; dest = (src << width) >> width

1966 SizeT ShiftAmount =	1964 SizeT ShiftAmount =

1967 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;	1965 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;

1968 Constant *ShiftConstant =	1966 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

1969 Ctx->getConstantInt32(IceType_i8, ShiftAmount);

1970 Variable *T = makeReg(DestTy);	1967 Variable *T = makeReg(DestTy);

1971 _movp(T, Src0RM);	1968 _movp(T, Src0RM);

1972 _psll(T, ShiftConstant);	1969 _psll(T, ShiftConstant);

1973 _psra(T, ShiftConstant);	1970 _psra(T, ShiftConstant);

1974 _movp(Dest, T);	1971 _movp(Dest, T);

1975 }	1972 }

1976 } else if (Dest->getType() == IceType_i64) {	1973 } else if (Dest->getType() == IceType_i64) {

1977 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2	1974 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

1978 Constant *Shift = Ctx->getConstantInt32(IceType_i32, 31);	1975 Constant *Shift = Ctx->getConstantInt32(31);

1979 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	1976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1980 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	1977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1981 Variable *T_Lo = makeReg(DestLo->getType());	1978 Variable *T_Lo = makeReg(DestLo->getType());

1982 if (Src0RM->getType() == IceType_i32) {	1979 if (Src0RM->getType() == IceType_i32) {

1983 _mov(T_Lo, Src0RM);	1980 _mov(T_Lo, Src0RM);

1984 } else if (Src0RM->getType() == IceType_i1) {	1981 } else if (Src0RM->getType() == IceType_i1) {

1985 _movzx(T_Lo, Src0RM);	1982 _movzx(T_Lo, Src0RM);

1986 _shl(T_Lo, Shift);	1983 _shl(T_Lo, Shift);

1987 _sar(T_Lo, Shift);	1984 _sar(T_Lo, Shift);

1988 } else {	1985 } else {

1989 _movsx(T_Lo, Src0RM);	1986 _movsx(T_Lo, Src0RM);

1990 }	1987 }

1991 _mov(DestLo, T_Lo);	1988 _mov(DestLo, T_Lo);

1992 Variable *T_Hi = NULL;	1989 Variable *T_Hi = NULL;

1993 _mov(T_Hi, T_Lo);	1990 _mov(T_Hi, T_Lo);

1994 if (Src0RM->getType() != IceType_i1)	1991 if (Src0RM->getType() != IceType_i1)

1995 // For i1, the sar instruction is already done above.	1992 // For i1, the sar instruction is already done above.

1996 _sar(T_Hi, Shift);	1993 _sar(T_Hi, Shift);

1997 _mov(DestHi, T_Hi);	1994 _mov(DestHi, T_Hi);

1998 } else if (Src0RM->getType() == IceType_i1) {	1995 } else if (Src0RM->getType() == IceType_i1) {

1999 // t1 = src	1996 // t1 = src

2000 // shl t1, dst_bitwidth - 1	1997 // shl t1, dst_bitwidth - 1

2001 // sar t1, dst_bitwidth - 1	1998 // sar t1, dst_bitwidth - 1

2002 // dst = t1	1999 // dst = t1

2003 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());	2000 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

2004 Constant *ShiftAmount = Ctx->getConstantInt32(IceType_i32, DestBits - 1);	2001 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

2005 Variable *T = makeReg(Dest->getType());	2002 Variable *T = makeReg(Dest->getType());

2006 if (typeWidthInBytes(Dest->getType()) <=	2003 if (typeWidthInBytes(Dest->getType()) <=

2007 typeWidthInBytes(Src0RM->getType())) {	2004 typeWidthInBytes(Src0RM->getType())) {

2008 _mov(T, Src0RM);	2005 _mov(T, Src0RM);

2009 } else {	2006 } else {

2010 // Widen the source using movsx or movzx. (It doesn't matter	2007 // Widen the source using movsx or movzx. (It doesn't matter

2011 // which one, since the following shl/sar overwrite the bits.)	2008 // which one, since the following shl/sar overwrite the bits.)

2012 _movzx(T, Src0RM);	2009 _movzx(T, Src0RM);

2013 }	2010 }

2014 _shl(T, ShiftAmount);	2011 _shl(T, ShiftAmount);

(...skipping 22 matching lines...) Expand all Loading...
2037 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2034 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2038 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2035 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2039 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2036 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2040 Variable *Tmp = makeReg(DestLo->getType());	2037 Variable *Tmp = makeReg(DestLo->getType());

2041 if (Src0RM->getType() == IceType_i32) {	2038 if (Src0RM->getType() == IceType_i32) {

2042 _mov(Tmp, Src0RM);	2039 _mov(Tmp, Src0RM);

2043 } else {	2040 } else {

2044 _movzx(Tmp, Src0RM);	2041 _movzx(Tmp, Src0RM);

2045 }	2042 }

2046 if (Src0RM->getType() == IceType_i1) {	2043 if (Src0RM->getType() == IceType_i1) {

2047 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);	2044 Constant *One = Ctx->getConstantInt32(1);

2048 _and(Tmp, One);	2045 _and(Tmp, One);

2049 }	2046 }

2050 _mov(DestLo, Tmp);	2047 _mov(DestLo, Tmp);

2051 _mov(DestHi, Zero);	2048 _mov(DestHi, Zero);

2052 } else if (Src0RM->getType() == IceType_i1) {	2049 } else if (Src0RM->getType() == IceType_i1) {

2053 // t = Src0RM; t &= 1; Dest = t	2050 // t = Src0RM; t &= 1; Dest = t

2054 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);	2051 Constant *One = Ctx->getConstantInt32(1);

2055 Type DestTy = Dest->getType();	2052 Type DestTy = Dest->getType();

2056 Variable *T;	2053 Variable *T;

2057 if (DestTy == IceType_i8) {	2054 if (DestTy == IceType_i8) {

2058 T = makeReg(DestTy);	2055 T = makeReg(DestTy);

2059 _mov(T, Src0RM);	2056 _mov(T, Src0RM);

2060 } else {	2057 } else {

2061 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.	2058 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.

2062 T = makeReg(IceType_i32);	2059 T = makeReg(IceType_i32);

2063 _movzx(T, Src0RM);	2060 _movzx(T, Src0RM);

2064 }	2061 }

(...skipping 19 matching lines...) Expand all Loading...
2084 _movp(Dest, T);	2081 _movp(Dest, T);

2085 } else {	2082 } else {

2086 Operand *Src0 = Inst->getSrc(0);	2083 Operand *Src0 = Inst->getSrc(0);

2087 if (Src0->getType() == IceType_i64)	2084 if (Src0->getType() == IceType_i64)

2088 Src0 = loOperand(Src0);	2085 Src0 = loOperand(Src0);

2089 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2086 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2090 // t1 = trunc Src0RM; Dest = t1	2087 // t1 = trunc Src0RM; Dest = t1

2091 Variable *T = NULL;	2088 Variable *T = NULL;

2092 _mov(T, Src0RM);	2089 _mov(T, Src0RM);

2093 if (Dest->getType() == IceType_i1)	2090 if (Dest->getType() == IceType_i1)

2094 _and(T, Ctx->getConstantInt32(IceType_i1, 1));	2091 _and(T, Ctx->getConstantInt1(1));

2095 _mov(Dest, T);	2092 _mov(Dest, T);

2096 }	2093 }

2097 break;	2094 break;

2098 }	2095 }

2099 case InstCast::Fptrunc:	2096 case InstCast::Fptrunc:

2100 case InstCast::Fpext: {	2097 case InstCast::Fpext: {

2101 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2098 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2102 // t1 = cvt Src0RM; Dest = t1	2099 // t1 = cvt Src0RM; Dest = t1

2103 Variable *T = makeReg(Dest->getType());	2100 Variable *T = makeReg(Dest->getType());

2104 _cvt(T, Src0RM, InstX8632Cvt::Float2float);	2101 _cvt(T, Src0RM, InstX8632Cvt::Float2float);

(...skipping 25 matching lines...) Expand all Loading...
2130 Call->addArg(Inst->getSrc(0));	2127 Call->addArg(Inst->getSrc(0));

2131 lowerCall(Call);	2128 lowerCall(Call);

2132 } else {	2129 } else {

2133 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2130 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2134 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2131 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2135 Variable *T_1 = makeReg(IceType_i32);	2132 Variable *T_1 = makeReg(IceType_i32);

2136 Variable *T_2 = makeReg(Dest->getType());	2133 Variable *T_2 = makeReg(Dest->getType());

2137 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);	2134 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);

2138 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2135 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2139 if (Dest->getType() == IceType_i1)	2136 if (Dest->getType() == IceType_i1)

2140 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));	2137 _and(T_2, Ctx->getConstantInt1(1));

2141 _mov(Dest, T_2);	2138 _mov(Dest, T_2);

2142 }	2139 }

2143 break;	2140 break;

2144 case InstCast::Fptoui:	2141 case InstCast::Fptoui:

2145 if (isVectorType(Dest->getType())) {	2142 if (isVectorType(Dest->getType())) {

2146 assert(Dest->getType() == IceType_v4i32 &&	2143 assert(Dest->getType() == IceType_v4i32 &&

2147 Inst->getSrc(0)->getType() == IceType_v4f32);	2144 Inst->getSrc(0)->getType() == IceType_v4f32);

2148 const SizeT MaxSrcs = 1;	2145 const SizeT MaxSrcs = 1;

2149 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);	2146 InstCall *Call = makeHelperCall("Sz_fptoui_v4f32", Dest, MaxSrcs);

2150 Call->addArg(Inst->getSrc(0));	2147 Call->addArg(Inst->getSrc(0));

(...skipping 15 matching lines...) Expand all Loading...
2166 lowerCall(Call);	2163 lowerCall(Call);

2167 return;	2164 return;

2168 } else {	2165 } else {

2169 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2166 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2170 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type	2167 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2171 Variable *T_1 = makeReg(IceType_i32);	2168 Variable *T_1 = makeReg(IceType_i32);

2172 Variable *T_2 = makeReg(Dest->getType());	2169 Variable *T_2 = makeReg(Dest->getType());

2173 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);	2170 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);

2174 _mov(T_2, T_1); // T_1 and T_2 may have different integer types	2171 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2175 if (Dest->getType() == IceType_i1)	2172 if (Dest->getType() == IceType_i1)

2176 _and(T_2, Ctx->getConstantInt32(IceType_i1, 1));	2173 _and(T_2, Ctx->getConstantInt1(1));

2177 _mov(Dest, T_2);	2174 _mov(Dest, T_2);

2178 }	2175 }

2179 break;	2176 break;

2180 case InstCast::Sitofp:	2177 case InstCast::Sitofp:

2181 if (isVectorType(Dest->getType())) {	2178 if (isVectorType(Dest->getType())) {

2182 assert(Dest->getType() == IceType_v4f32 &&	2179 assert(Dest->getType() == IceType_v4f32 &&

2183 Inst->getSrc(0)->getType() == IceType_v4i32);	2180 Inst->getSrc(0)->getType() == IceType_v4i32);

2184 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2181 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2185 Variable *T = makeReg(Dest->getType());	2182 Variable *T = makeReg(Dest->getType());

2186 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);	2183 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);

(...skipping 208 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2395 Type Ty = SourceVectNotLegalized->getType();	2392 Type Ty = SourceVectNotLegalized->getType();

2396 Type ElementTy = typeElementType(Ty);	2393 Type ElementTy = typeElementType(Ty);

2397 Type InVectorElementTy = getInVectorElementType(Ty);	2394 Type InVectorElementTy = getInVectorElementType(Ty);

2398 Variable *ExtractedElementR = makeReg(InVectorElementTy);	2395 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2399	2396

2400 // TODO(wala): Determine the best lowering sequences for each type.	2397 // TODO(wala): Determine the best lowering sequences for each type.

2401 bool CanUsePextr =	2398 bool CanUsePextr =

2402 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2399 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2403 if (CanUsePextr && Ty != IceType_v4f32) {	2400 if (CanUsePextr && Ty != IceType_v4f32) {

2404 // Use pextrb, pextrw, or pextrd.	2401 // Use pextrb, pextrw, or pextrd.

2405 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);	2402 Constant *Mask = Ctx->getConstantInt32(Index);

2406 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2403 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2407 _pextr(ExtractedElementR, SourceVectR, Mask);	2404 _pextr(ExtractedElementR, SourceVectR, Mask);

2408 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2405 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2409 // Use pshufd and movd/movss.	2406 // Use pshufd and movd/movss.

2410 Variable *T = NULL;	2407 Variable *T = NULL;

2411 if (Index) {	2408 if (Index) {

2412 // The shuffle only needs to occur if the element to be extracted	2409 // The shuffle only needs to occur if the element to be extracted

2413 // is not at the lowest index.	2410 // is not at the lowest index.

2414 Constant *Mask = Ctx->getConstantInt32(IceType_i8, Index);	2411 Constant *Mask = Ctx->getConstantInt32(Index);

2415 T = makeReg(Ty);	2412 T = makeReg(Ty);

2416 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);	2413 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2417 } else {	2414 } else {

2418 T = legalizeToVar(SourceVectNotLegalized);	2415 T = legalizeToVar(SourceVectNotLegalized);

2419 }	2416 }

2420	2417

2421 if (InVectorElementTy == IceType_i32) {	2418 if (InVectorElementTy == IceType_i32) {

2422 _movd(ExtractedElementR, T);	2419 _movd(ExtractedElementR, T);

2423 } else { // Ty == IceType_f32	2420 } else { // Ty == IceType_f32

2424 // TODO(wala): _movss is only used here because _mov does not	2421 // TODO(wala): _movss is only used here because _mov does not

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2542 }	2539 }

2543 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);	2540 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);

2544 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);	2541 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);

2545 if (HasC1) {	2542 if (HasC1) {

2546 Src0 = legalize(Src0);	2543 Src0 = legalize(Src0);

2547 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2544 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2548 Variable *T = NULL;	2545 Variable *T = NULL;

2549 _mov(T, Src0);	2546 _mov(T, Src0);

2550 _ucomiss(T, Src1RM);	2547 _ucomiss(T, Src1RM);

2551 }	2548 }

2552 Constant *Default =	2549 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);

2553 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);

2554 _mov(Dest, Default);	2550 _mov(Dest, Default);

2555 if (HasC1) {	2551 if (HasC1) {

2556 InstX8632Label *Label = InstX8632Label::create(Func, this);	2552 InstX8632Label *Label = InstX8632Label::create(Func, this);

2557 _br(TableFcmp[Index].C1, Label);	2553 _br(TableFcmp[Index].C1, Label);

2558 if (HasC2) {	2554 if (HasC2) {

2559 _br(TableFcmp[Index].C2, Label);	2555 _br(TableFcmp[Index].C2, Label);

2560 }	2556 }

2561 Constant *NonDefault =	2557 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);

2562 Ctx->getConstantInt32(IceType_i32, !TableFcmp[Index].Default);

2563 _mov_nonkillable(Dest, NonDefault);	2558 _mov_nonkillable(Dest, NonDefault);

2564 Context.insert(Label);	2559 Context.insert(Label);

2565 }	2560 }

2566 }	2561 }

2567	2562

2568 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {	2563 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

2569 Operand *Src0 = legalize(Inst->getSrc(0));	2564 Operand *Src0 = legalize(Inst->getSrc(0));

2570 Operand *Src1 = legalize(Inst->getSrc(1));	2565 Operand *Src1 = legalize(Inst->getSrc(1));

2571 Variable *Dest = Inst->getDest();	2566 Variable *Dest = Inst->getDest();

2572	2567

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2693 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),	2688 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(),

2694 NextBr->getTargetFalse());	2689 NextBr->getTargetFalse());

2695 // Skip over the following branch instruction.	2690 // Skip over the following branch instruction.

2696 Context.advanceNext();	2691 Context.advanceNext();

2697 return;	2692 return;

2698 }	2693 }

2699 }	2694 }

2700	2695

2701 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:	2696 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

2702 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2697 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2703 Constant *One = Ctx->getConstantInt32(IceType_i32, 1);	2698 Constant *One = Ctx->getConstantInt32(1);

2704 if (Src0->getType() == IceType_i64) {	2699 if (Src0->getType() == IceType_i64) {

2705 InstIcmp::ICond Condition = Inst->getCondition();	2700 InstIcmp::ICond Condition = Inst->getCondition();

2706 size_t Index = static_cast<size_t>(Condition);	2701 size_t Index = static_cast<size_t>(Condition);

2707 assert(Index < TableIcmp64Size);	2702 assert(Index < TableIcmp64Size);

2708 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);	2703 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

2709 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);	2704 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

2710 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);	2705 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

2711 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);	2706 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

2712 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {	2707 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {

2713 InstX8632Label *Label = InstX8632Label::create(Func, this);	2708 InstX8632Label *Label = InstX8632Label::create(Func, this);

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2771	2766

2772 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {	2767 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {

2773 // Use insertps, pinsrb, pinsrw, or pinsrd.	2768 // Use insertps, pinsrb, pinsrw, or pinsrd.

2774 Operand *ElementRM =	2769 Operand *ElementRM =

2775 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	2770 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

2776 Operand *SourceVectRM =	2771 Operand *SourceVectRM =

2777 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	2772 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2778 Variable *T = makeReg(Ty);	2773 Variable *T = makeReg(Ty);

2779 _movp(T, SourceVectRM);	2774 _movp(T, SourceVectRM);

2780 if (Ty == IceType_v4f32)	2775 if (Ty == IceType_v4f32)

2781 _insertps(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index << 4));	2776 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

2782 else	2777 else

2783 _pinsr(T, ElementRM, Ctx->getConstantInt32(IceType_i8, Index));	2778 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));

2784 _movp(Inst->getDest(), T);	2779 _movp(Inst->getDest(), T);

2785 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2780 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2786 // Use shufps or movss.	2781 // Use shufps or movss.

2787 Variable *ElementR = NULL;	2782 Variable *ElementR = NULL;

2788 Operand *SourceVectRM =	2783 Operand *SourceVectRM =

2789 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	2784 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

2790	2785

2791 if (InVectorElementTy == IceType_f32) {	2786 if (InVectorElementTy == IceType_f32) {

2792 // ElementR will be in an XMM register since it is floating point.	2787 // ElementR will be in an XMM register since it is floating point.

2793 ElementR = legalizeToVar(ElementToInsertNotLegalized);	2788 ElementR = legalizeToVar(ElementToInsertNotLegalized);

(...skipping 30 matching lines...) Expand all Loading...
2824 // ElementR := ElementR[0, 0] T[0, 3]	2819 // ElementR := ElementR[0, 0] T[0, 3]

2825 // T := T[0, 1] ElementR[0, 3]	2820 // T := T[0, 1] ElementR[0, 3]

2826 //	2821 //

2827 // insertelement into index 3 (result is stored in T):	2822 // insertelement into index 3 (result is stored in T):

2828 // T := SourceVectRM	2823 // T := SourceVectRM

2829 // ElementR := ElementR[0, 0] T[0, 2]	2824 // ElementR := ElementR[0, 0] T[0, 2]

2830 // T := T[0, 1] ElementR[3, 0]	2825 // T := T[0, 1] ElementR[3, 0]

2831 const unsigned char Mask1[3] = { 0, 192, 128 };	2826 const unsigned char Mask1[3] = { 0, 192, 128 };

2832 const unsigned char Mask2[3] = { 227, 196, 52 };	2827 const unsigned char Mask2[3] = { 227, 196, 52 };

2833	2828

2834 Constant *Mask1Constant =	2829 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);

2835 Ctx->getConstantInt32(IceType_i8, Mask1[Index - 1]);	2830 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);

2836 Constant *Mask2Constant =

2837 Ctx->getConstantInt32(IceType_i8, Mask2[Index - 1]);

2838	2831

2839 if (Index == 1) {	2832 if (Index == 1) {

2840 _shufps(ElementR, SourceVectRM, Mask1Constant);	2833 _shufps(ElementR, SourceVectRM, Mask1Constant);

2841 _shufps(ElementR, SourceVectRM, Mask2Constant);	2834 _shufps(ElementR, SourceVectRM, Mask2Constant);

2842 _movp(Inst->getDest(), ElementR);	2835 _movp(Inst->getDest(), ElementR);

2843 } else {	2836 } else {

2844 Variable *T = makeReg(Ty);	2837 Variable *T = makeReg(Ty);

2845 _movp(T, SourceVectRM);	2838 _movp(T, SourceVectRM);

2846 _shufps(ElementR, T, Mask1Constant);	2839 _shufps(ElementR, T, Mask1Constant);

2847 _shufps(T, ElementR, Mask2Constant);	2840 _shufps(T, ElementR, Mask2Constant);

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2919 // Some x86-64 processors support the cmpxchg16b intruction, which	2912 // Some x86-64 processors support the cmpxchg16b intruction, which

2920 // can make 16-byte operations lock free (when used with the LOCK	2913 // can make 16-byte operations lock free (when used with the LOCK

2921 // prefix). However, that's not supported in 32-bit mode, so just	2914 // prefix). However, that's not supported in 32-bit mode, so just

2922 // return 0 even for large sizes.	2915 // return 0 even for large sizes.

2923 Result = Ctx->getConstantZero(IceType_i32);	2916 Result = Ctx->getConstantZero(IceType_i32);

2924 break;	2917 break;

2925 case 1:	2918 case 1:

2926 case 2:	2919 case 2:

2927 case 4:	2920 case 4:

2928 case 8:	2921 case 8:

2929 Result = Ctx->getConstantInt32(IceType_i32, 1);	2922 Result = Ctx->getConstantInt32(1);

2930 break;	2923 break;

2931 }	2924 }

2932 _mov(Dest, Result);	2925 _mov(Dest, Result);

2933 return;	2926 return;

2934 }	2927 }

2935 // The PNaCl ABI requires the byte size to be a compile-time constant.	2928 // The PNaCl ABI requires the byte size to be a compile-time constant.

2936 Func->setError("AtomicIsLockFree byte size should be compile-time const");	2929 Func->setError("AtomicIsLockFree byte size should be compile-time const");

2937 return;	2930 return;

2938 }	2931 }

2939 case Intrinsics::AtomicLoad: {	2932 case Intrinsics::AtomicLoad: {

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3025 _bswap(T_Hi);	3018 _bswap(T_Hi);

3026 _mov(DestLo, T_Hi);	3019 _mov(DestLo, T_Hi);

3027 _mov(DestHi, T_Lo);	3020 _mov(DestHi, T_Lo);

3028 } else if (Val->getType() == IceType_i32) {	3021 } else if (Val->getType() == IceType_i32) {

3029 Variable *T = legalizeToVar(Val);	3022 Variable *T = legalizeToVar(Val);

3030 _bswap(T);	3023 _bswap(T);

3031 _mov(Dest, T);	3024 _mov(Dest, T);

3032 } else {	3025 } else {

3033 assert(Val->getType() == IceType_i16);	3026 assert(Val->getType() == IceType_i16);

3034 Val = legalize(Val);	3027 Val = legalize(Val);

3035 Constant *Eight = Ctx->getConstantInt32(IceType_i16, 8);	3028 Constant *Eight = Ctx->getConstantInt16(8);

3036 Variable *T = NULL;	3029 Variable *T = NULL;

3037 _mov(T, Val);	3030 _mov(T, Val);

3038 _rol(T, Eight);	3031 _rol(T, Eight);

3039 _mov(Dest, T);	3032 _mov(Dest, T);

3040 }	3033 }

3041 return;	3034 return;

3042 }	3035 }

3043 case Intrinsics::Ctpop: {	3036 case Intrinsics::Ctpop: {

3044 Variable *Dest = Instr->getDest();	3037 Variable *Dest = Instr->getDest();

3045 Operand *Val = Instr->getArg(0);	3038 Operand *Val = Instr->getArg(0);

(...skipping 463 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3509 // bit position conversion, and the speculation is reversed.	3502 // bit position conversion, and the speculation is reversed.

3510 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);	3503 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);

3511 Variable *T = makeReg(IceType_i32);	3504 Variable *T = makeReg(IceType_i32);

3512 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);	3505 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);

3513 if (Cttz) {	3506 if (Cttz) {

3514 _bsf(T, FirstValRM);	3507 _bsf(T, FirstValRM);

3515 } else {	3508 } else {

3516 _bsr(T, FirstValRM);	3509 _bsr(T, FirstValRM);

3517 }	3510 }

3518 Variable *T_Dest = makeReg(IceType_i32);	3511 Variable *T_Dest = makeReg(IceType_i32);

3519 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);	3512 Constant *ThirtyTwo = Ctx->getConstantInt32(32);

3520 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);	3513 Constant *ThirtyOne = Ctx->getConstantInt32(31);

3521 if (Cttz) {	3514 if (Cttz) {

3522 _mov(T_Dest, ThirtyTwo);	3515 _mov(T_Dest, ThirtyTwo);

3523 } else {	3516 } else {

3524 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);	3517 Constant *SixtyThree = Ctx->getConstantInt32(63);

3525 _mov(T_Dest, SixtyThree);	3518 _mov(T_Dest, SixtyThree);

3526 }	3519 }

3527 _cmov(T_Dest, T, CondX86::Br_ne);	3520 _cmov(T_Dest, T, CondX86::Br_ne);

3528 if (!Cttz) {	3521 if (!Cttz) {

3529 _xor(T_Dest, ThirtyOne);	3522 _xor(T_Dest, ThirtyOne);

3530 }	3523 }

3531 if (Ty == IceType_i32) {	3524 if (Ty == IceType_i32) {

3532 _mov(Dest, T_Dest);	3525 _mov(Dest, T_Dest);

3533 return;	3526 return;

3534 }	3527 }

(...skipping 318 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3853 // Vanilla ICE load instructions should not use the segment registers,	3846 // Vanilla ICE load instructions should not use the segment registers,

3854 // and computeAddressOpt only works at the level of Variables and Constants,	3847 // and computeAddressOpt only works at the level of Variables and Constants,

3855 // not other OperandX8632Mem, so there should be no mention of segment	3848 // not other OperandX8632Mem, so there should be no mention of segment

3856 // registers there either.	3849 // registers there either.

3857 const OperandX8632Mem::SegmentRegisters SegmentReg =	3850 const OperandX8632Mem::SegmentRegisters SegmentReg =

3858 OperandX8632Mem::DefaultSegment;	3851 OperandX8632Mem::DefaultSegment;

3859 Variable *Base = llvm::dyn_cast<Variable>(Addr);	3852 Variable *Base = llvm::dyn_cast<Variable>(Addr);

3860 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	3853 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

3861 if (Base && Addr != Base) {	3854 if (Base && Addr != Base) {

3862 Inst->setDeleted();	3855 Inst->setDeleted();

3863 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);	3856 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

3864 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,	3857 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,

3865 Shift, SegmentReg);	3858 Shift, SegmentReg);

3866 Context.insert(InstLoad::create(Func, Dest, Addr));	3859 Context.insert(InstLoad::create(Func, Dest, Addr));

3867 }	3860 }

3868 }	3861 }

3869	3862

3870 void TargetX8632::randomlyInsertNop(float Probability) {	3863 void TargetX8632::randomlyInsertNop(float Probability) {

3871 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());	3864 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

3872 if (RNG.getTrueWithProbability(Probability)) {	3865 if (RNG.getTrueWithProbability(Probability)) {

3873 _nop(RNG.next(X86_NUM_NOP_VARIANTS));	3866 _nop(RNG.next(X86_NUM_NOP_VARIANTS));

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3919 if (InstructionSet >= SSE4_1) {	3912 if (InstructionSet >= SSE4_1) {

3920 // TODO(wala): If the condition operand is a constant, use blendps	3913 // TODO(wala): If the condition operand is a constant, use blendps

3921 // or pblendw.	3914 // or pblendw.

3922 //	3915 //

3923 // Use blendvps or pblendvb to implement select.	3916 // Use blendvps or pblendvb to implement select.

3924 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3917 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3925 SrcTy == IceType_v4f32) {	3918 SrcTy == IceType_v4f32) {

3926 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3919 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3927 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);	3920 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);

3928 _movp(xmm0, ConditionRM);	3921 _movp(xmm0, ConditionRM);

3929 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));	3922 _psll(xmm0, Ctx->getConstantInt8(31));

3930 _movp(T, SrcFRM);	3923 _movp(T, SrcFRM);

3931 _blendvps(T, SrcTRM, xmm0);	3924 _blendvps(T, SrcTRM, xmm0);

3932 _movp(Dest, T);	3925 _movp(Dest, T);

3933 } else {	3926 } else {

3934 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3927 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3935 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3928 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3936 : IceType_v16i8;	3929 : IceType_v16i8;

3937 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);	3930 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);

3938 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3931 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3939 _movp(T, SrcFRM);	3932 _movp(T, SrcFRM);

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4031 Variable *Base = llvm::dyn_cast<Variable>(Addr);	4024 Variable *Base = llvm::dyn_cast<Variable>(Addr);

4032 // Vanilla ICE store instructions should not use the segment registers,	4025 // Vanilla ICE store instructions should not use the segment registers,

4033 // and computeAddressOpt only works at the level of Variables and Constants,	4026 // and computeAddressOpt only works at the level of Variables and Constants,

4034 // not other OperandX8632Mem, so there should be no mention of segment	4027 // not other OperandX8632Mem, so there should be no mention of segment

4035 // registers there either.	4028 // registers there either.

4036 const OperandX8632Mem::SegmentRegisters SegmentReg =	4029 const OperandX8632Mem::SegmentRegisters SegmentReg =

4037 OperandX8632Mem::DefaultSegment;	4030 OperandX8632Mem::DefaultSegment;

4038 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	4031 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

4039 if (Base && Addr != Base) {	4032 if (Base && Addr != Base) {

4040 Inst->setDeleted();	4033 Inst->setDeleted();

4041 Constant *OffsetOp = Ctx->getConstantInt32(IceType_i32, Offset);	4034 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

4042 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,	4035 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,

4043 Shift, SegmentReg);	4036 Shift, SegmentReg);

4044 Context.insert(InstStore::create(Func, Data, Addr));	4037 Context.insert(InstStore::create(Func, Data, Addr));

4045 }	4038 }

4046 }	4039 }

4047	4040

4048 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {	4041 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

4049 // This implements the most naive possible lowering.	4042 // This implements the most naive possible lowering.

4050 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default	4043 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

4051 Operand *Src0 = Inst->getComparison();	4044 Operand *Src0 = Inst->getComparison();

4052 SizeT NumCases = Inst->getNumCases();	4045 SizeT NumCases = Inst->getNumCases();

4053 if (Src0->getType() == IceType_i64) {	4046 if (Src0->getType() == IceType_i64) {

4054 Src0 = legalize(Src0); // get Base/Index into physical registers	4047 Src0 = legalize(Src0); // get Base/Index into physical registers

4055 Operand *Src0Lo = loOperand(Src0);	4048 Operand *Src0Lo = loOperand(Src0);

4056 Operand *Src0Hi = hiOperand(Src0);	4049 Operand *Src0Hi = hiOperand(Src0);

4057 if (NumCases >= 2) {	4050 if (NumCases >= 2) {

4058 Src0Lo = legalizeToVar(Src0Lo);	4051 Src0Lo = legalizeToVar(Src0Lo);

4059 Src0Hi = legalizeToVar(Src0Hi);	4052 Src0Hi = legalizeToVar(Src0Hi);

4060 } else {	4053 } else {

4061 Src0Lo = legalize(Src0Lo, Legal_Reg \| Legal_Mem);	4054 Src0Lo = legalize(Src0Lo, Legal_Reg \| Legal_Mem);

4062 Src0Hi = legalize(Src0Hi, Legal_Reg \| Legal_Mem);	4055 Src0Hi = legalize(Src0Hi, Legal_Reg \| Legal_Mem);

4063 }	4056 }

4064 for (SizeT I = 0; I < NumCases; ++I) {	4057 for (SizeT I = 0; I < NumCases; ++I) {

4065 Constant *ValueLo = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));	4058 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));

4066 Constant *ValueHi =	4059 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);

4067 Ctx->getConstantInt32(IceType_i32, Inst->getValue(I) >> 32);

4068 InstX8632Label *Label = InstX8632Label::create(Func, this);	4060 InstX8632Label *Label = InstX8632Label::create(Func, this);

4069 _cmp(Src0Lo, ValueLo);	4061 _cmp(Src0Lo, ValueLo);

4070 _br(CondX86::Br_ne, Label);	4062 _br(CondX86::Br_ne, Label);

4071 _cmp(Src0Hi, ValueHi);	4063 _cmp(Src0Hi, ValueHi);

4072 _br(CondX86::Br_e, Inst->getLabel(I));	4064 _br(CondX86::Br_e, Inst->getLabel(I));

4073 Context.insert(Label);	4065 Context.insert(Label);

4074 }	4066 }

4075 _br(Inst->getLabelDefault());	4067 _br(Inst->getLabelDefault());

4076 return;	4068 return;

4077 }	4069 }

4078 // OK, we'll be slightly less naive by forcing Src into a physical	4070 // OK, we'll be slightly less naive by forcing Src into a physical

4079 // register if there are 2 or more uses.	4071 // register if there are 2 or more uses.

4080 if (NumCases >= 2)	4072 if (NumCases >= 2)

4081 Src0 = legalizeToVar(Src0);	4073 Src0 = legalizeToVar(Src0);

4082 else	4074 else

4083 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem);	4075 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem);

4084 for (SizeT I = 0; I < NumCases; ++I) {	4076 for (SizeT I = 0; I < NumCases; ++I) {

4085 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));	4077 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));

4086 _cmp(Src0, Value);	4078 _cmp(Src0, Value);

4087 _br(CondX86::Br_e, Inst->getLabel(I));	4079 _br(CondX86::Br_e, Inst->getLabel(I));

4088 }	4080 }

4089	4081

4090 _br(Inst->getLabelDefault());	4082 _br(Inst->getLabelDefault());

4091 }	4083 }

4092	4084

4093 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,	4085 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

4094 Variable Dest, Operand Src0,	4086 Variable Dest, Operand Src0,

4095 Operand *Src1) {	4087 Operand *Src1) {

4096 assert(isVectorType(Dest->getType()));	4088 assert(isVectorType(Dest->getType()));

4097 Type Ty = Dest->getType();	4089 Type Ty = Dest->getType();

4098 Type ElementTy = typeElementType(Ty);	4090 Type ElementTy = typeElementType(Ty);

4099 SizeT NumElements = typeNumElements(Ty);	4091 SizeT NumElements = typeNumElements(Ty);

4100	4092

4101 Operand *T = Ctx->getConstantUndef(Ty);	4093 Operand *T = Ctx->getConstantUndef(Ty);

4102 for (SizeT I = 0; I < NumElements; ++I) {	4094 for (SizeT I = 0; I < NumElements; ++I) {

4103 Constant *Index = Ctx->getConstantInt32(IceType_i32, I);	4095 Constant *Index = Ctx->getConstantInt32(I);

4104	4096

4105 // Extract the next two inputs.	4097 // Extract the next two inputs.

4106 Variable *Op0 = Func->makeVariable(ElementTy);	4098 Variable *Op0 = Func->makeVariable(ElementTy);

4107 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));	4099 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));

4108 Variable *Op1 = Func->makeVariable(ElementTy);	4100 Variable *Op1 = Func->makeVariable(ElementTy);

4109 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));	4101 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));

4110	4102

4111 // Perform the arithmetic as a scalar operation.	4103 // Perform the arithmetic as a scalar operation.

4112 Variable *Res = Func->makeVariable(ElementTy);	4104 Variable *Res = Func->makeVariable(ElementTy);

4113 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));	4105 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));

(...skipping 227 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4341 _psub(Dest, MinusOne);	4333 _psub(Dest, MinusOne);

4342 return Dest;	4334 return Dest;

4343 }	4335 }

4344	4336

4345 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {	4337 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

4346 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|	4338 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

4347 Ty == IceType_v16i8);	4339 Ty == IceType_v16i8);

4348 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {	4340 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

4349 Variable *Reg = makeVectorOfOnes(Ty, RegNum);	4341 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

4350 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;	4342 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

4351 _psll(Reg, Ctx->getConstantInt32(IceType_i8, Shift));	4343 _psll(Reg, Ctx->getConstantInt8(Shift));

4352 return Reg;	4344 return Reg;

4353 } else {	4345 } else {

4354 // SSE has no left shift operation for vectors of 8 bit integers.	4346 // SSE has no left shift operation for vectors of 8 bit integers.

4355 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	4347 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

4356 Constant *ConstantMask =	4348 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

4357 Ctx->getConstantInt32(IceType_i32, HIGH_ORDER_BITS_MASK);

4358 Variable *Reg = makeReg(Ty, RegNum);	4349 Variable *Reg = makeReg(Ty, RegNum);

4359 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	4350 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

4360 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	4351 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

4361 return Reg;	4352 return Reg;

4362 }	4353 }

4363 }	4354 }

4364	4355

4365 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,	4356 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

4366 Variable *Slot,	4357 Variable *Slot,

4367 uint32_t Offset) {	4358 uint32_t Offset) {

4368 // Ensure that Loc is a stack slot.	4359 // Ensure that Loc is a stack slot.

4369 assert(Slot->getWeight() == RegWeight::Zero);	4360 assert(Slot->getWeight() == RegWeight::Zero);

4370 assert(Slot->getRegNum() == Variable::NoRegister);	4361 assert(Slot->getRegNum() == Variable::NoRegister);

4371 // Compute the location of Loc in memory.	4362 // Compute the location of Loc in memory.

4372 // TODO(wala,stichnot): lea should not be required. The address of	4363 // TODO(wala,stichnot): lea should not be required. The address of

4373 // the stack slot is known at compile time (although not until after	4364 // the stack slot is known at compile time (although not until after

4374 // addProlog()).	4365 // addProlog()).

4375 const Type PointerType = IceType_i32;	4366 const Type PointerType = IceType_i32;

4376 Variable *Loc = makeReg(PointerType);	4367 Variable *Loc = makeReg(PointerType);

4377 _lea(Loc, Slot);	4368 _lea(Loc, Slot);

4378 Constant *ConstantOffset = Ctx->getConstantInt32(IceType_i32, Offset);	4369 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

4379 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);	4370 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);

4380 }	4371 }

4381	4372

4382 // Helper for legalize() to emit the right code to lower an operand to a	4373 // Helper for legalize() to emit the right code to lower an operand to a

4383 // register of the appropriate type.	4374 // register of the appropriate type.

4384 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {	4375 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {

4385 Type Ty = Src->getType();	4376 Type Ty = Src->getType();

4386 Variable *Reg = makeReg(Ty, RegNum);	4377 Variable *Reg = makeReg(Ty, RegNum);

4387 if (isVectorType(Ty)) {	4378 if (isVectorType(Ty)) {

4388 _movp(Reg, Src);	4379 _movp(Reg, Src);

(...skipping 261 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4650 } else if (IsConstant \|\| IsExternal)	4641 } else if (IsConstant \|\| IsExternal)

4651 Str << "\t.zero\t" << Size << "\n";	4642 Str << "\t.zero\t" << Size << "\n";

4652 // Size is part of .comm.	4643 // Size is part of .comm.

4653	4644

4654 if (IsConstant \|\| HasNonzeroInitializer \|\| IsExternal)	4645 if (IsConstant \|\| HasNonzeroInitializer \|\| IsExternal)

4655 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4646 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4656 // Size is part of .comm.	4647 // Size is part of .comm.

4657 }	4648 }

4658	4649

4659 } // end of namespace Ice	4650 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/PNaClTranslator.cpp » ('j') | no next file with comments »