src/IceTargetLoweringX86BaseImpl.h - Issue 1341423002: Reflow comments to use the full width.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fix spelling and rebase Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// This file implements the TargetLoweringX86Base class, which	11 /// This file implements the TargetLoweringX86Base class, which consists almost

12 /// consists almost entirely of the lowering sequence for each	12 /// entirely of the lowering sequence for each high-level instruction.

13 /// high-level instruction.

14 ///	13 ///

15 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

16	15

17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

19	18

20 #include "IceCfg.h"	19 #include "IceCfg.h"

21 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

22 #include "IceClFlags.h"	21 #include "IceClFlags.h"

23 #include "IceDefs.h"	22 #include "IceDefs.h"

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
56	55

57 public:	56 public:

58 BoolFoldingEntry() = default;	57 BoolFoldingEntry() = default;

59 explicit BoolFoldingEntry(Inst *I);	58 explicit BoolFoldingEntry(Inst *I);

60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;	59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;

61 /// Instr is the instruction producing the i1-type variable of interest.	60 /// Instr is the instruction producing the i1-type variable of interest.

62 Inst *Instr = nullptr;	61 Inst *Instr = nullptr;

63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).	62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).

64 bool IsComplex = false;	63 bool IsComplex = false;

65 /// IsLiveOut is initialized conservatively to true, and is set to false when	64 /// IsLiveOut is initialized conservatively to true, and is set to false when

66 /// we encounter an instruction that ends Var's live range. We disable the	65 /// we encounter an instruction that ends Var's live range. We disable the

67 /// folding optimization when Var is live beyond this basic block. Note that	66 /// folding optimization when Var is live beyond this basic block. Note that

68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will	67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will

69 /// always be true and the folding optimization will never be performed.	68 /// always be true and the folding optimization will never be performed.

70 bool IsLiveOut = true;	69 bool IsLiveOut = true;

71 // NumUses counts the number of times Var is used as a source operand in the	70 // NumUses counts the number of times Var is used as a source operand in the

72 // basic block. If IsComplex is true and there is more than one use of Var,	71 // basic block. If IsComplex is true and there is more than one use of Var,

73 // then the folding optimization is disabled for Var.	72 // then the folding optimization is disabled for Var.

74 uint32_t NumUses = 0;	73 uint32_t NumUses = 0;

75 };	74 };

76	75

77 template <class MachineTraits> class BoolFolding {	76 template <class MachineTraits> class BoolFolding {

78 public:	77 public:

79 enum BoolFoldingProducerKind {	78 enum BoolFoldingProducerKind {

80 PK_None,	79 PK_None,

81 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.	80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.

82 PK_Icmp32,	81 PK_Icmp32,

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
159 case InstCast::Zext:	158 case InstCast::Zext:

160 return CK_Zext;	159 return CK_Zext;

161 }	160 }

162 }	161 }

163 return CK_None;	162 return CK_None;

164 }	163 }

165	164

166 /// Returns true if the producing instruction has a "complex" lowering sequence.	165 /// Returns true if the producing instruction has a "complex" lowering sequence.

167 /// This generally means that its lowering sequence requires more than one	166 /// This generally means that its lowering sequence requires more than one

168 /// conditional branch, namely 64-bit integer compares and some floating-point	167 /// conditional branch, namely 64-bit integer compares and some floating-point

169 /// compares. When this is true, and there is more than one consumer, we prefer	168 /// compares. When this is true, and there is more than one consumer, we prefer

170 /// to disable the folding optimization because it minimizes branches.	169 /// to disable the folding optimization because it minimizes branches.

171 template <class MachineTraits>	170 template <class MachineTraits>

172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {	171 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

173 switch (getProducerKind(Instr)) {	172 switch (getProducerKind(Instr)) {

174 default:	173 default:

175 return false;	174 return false;

176 case PK_Icmp64:	175 case PK_Icmp64:

177 return true;	176 return true;

178 case PK_Fcmp:	177 case PK_Fcmp:

179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]	178 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
215 }	214 }

216 for (auto &I : Producers) {	215 for (auto &I : Producers) {

217 // Ignore entries previously marked invalid.	216 // Ignore entries previously marked invalid.

218 if (I.second.Instr == nullptr)	217 if (I.second.Instr == nullptr)

219 continue;	218 continue;

220 // Disable the producer if its dest may be live beyond this block.	219 // Disable the producer if its dest may be live beyond this block.

221 if (I.second.IsLiveOut) {	220 if (I.second.IsLiveOut) {

222 setInvalid(I.first);	221 setInvalid(I.first);

223 continue;	222 continue;

224 }	223 }

225 // Mark as "dead" rather than outright deleting. This is so that other	224 // Mark as "dead" rather than outright deleting. This is so that other

226 // peephole style optimizations during or before lowering have access to	225 // peephole style optimizations during or before lowering have access to

227 // this instruction in undeleted form. See for example	226 // this instruction in undeleted form. See for example

228 // tryOptimizedCmpxchgCmpBr().	227 // tryOptimizedCmpxchgCmpBr().

229 I.second.Instr->setDead();	228 I.second.Instr->setDead();

230 }	229 }

231 }	230 }

232	231

233 template <class MachineTraits>	232 template <class MachineTraits>

234 const Inst *	233 const Inst *

235 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {	234 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {

236 auto *Var = llvm::dyn_cast<const Variable>(Opnd);	235 auto *Var = llvm::dyn_cast<const Variable>(Opnd);

237 if (Var == nullptr)	236 if (Var == nullptr)

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296 if (Func->hasError())	295 if (Func->hasError())

297 return;	296 return;

298 Func->deletePhis();	297 Func->deletePhis();

299 if (Func->hasError())	298 if (Func->hasError())

300 return;	299 return;

301 Func->dump("After Phi lowering");	300 Func->dump("After Phi lowering");

302 }	301 }

303	302

304 // Run this early so it can be used to focus optimizations on potentially hot	303 // Run this early so it can be used to focus optimizations on potentially hot

305 // code.	304 // code.

306 // TODO(stichnot,ascull): currently only used for regalloc not expensive high	305 // TODO(stichnot,ascull): currently only used for regalloc not

307 // level optimizations which could be focused on potentially hot code.	306 // expensive high level optimizations which could be focused on potentially

	307 // hot code.

308 Func->computeLoopNestDepth();	308 Func->computeLoopNestDepth();

309 Func->dump("After loop nest depth analysis");	309 Func->dump("After loop nest depth analysis");

310	310

311 // Address mode optimization.	311 // Address mode optimization.

312 Func->getVMetadata()->init(VMK_SingleDefs);	312 Func->getVMetadata()->init(VMK_SingleDefs);

313 Func->doAddressOpt();	313 Func->doAddressOpt();

314	314

315 // Find read-modify-write opportunities. Do this after address mode	315 // Find read-modify-write opportunities. Do this after address mode

316 // optimization so that doAddressOpt() doesn't need to be applied to RMW	316 // optimization so that doAddressOpt() doesn't need to be applied to RMW

317 // instructions as well.	317 // instructions as well.

318 findRMW();	318 findRMW();

319 Func->dump("After RMW transform");	319 Func->dump("After RMW transform");

320	320

321 // Argument lowering	321 // Argument lowering

322 Func->doArgLowering();	322 Func->doArgLowering();

323	323

324 // Target lowering. This requires liveness analysis for some parts of the	324 // Target lowering. This requires liveness analysis for some parts of the

325 // lowering decisions, such as compare/branch fusing. If non-lightweight	325 // lowering decisions, such as compare/branch fusing. If non-lightweight

326 // liveness analysis is used, the instructions need to be renumbered first	326 // liveness analysis is used, the instructions need to be renumbered first

327 // TODO: This renumbering should only be necessary if we're actually	327 // TODO: This renumbering should only be necessary if we're actually

328 // calculating live intervals, which we only do for register allocation.	328 // calculating live intervals, which we only do for register allocation.

329 Func->renumberInstructions();	329 Func->renumberInstructions();

330 if (Func->hasError())	330 if (Func->hasError())

331 return;	331 return;

332	332

333 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.	333 // TODO: It should be sufficient to use the fastest liveness calculation,

334 // livenessLightweight(). However, for some reason that slows down the rest	334 // i.e. livenessLightweight(). However, for some reason that slows down the

335 // of the translation. Investigate.	335 // rest of the translation. Investigate.

336 Func->liveness(Liveness_Basic);	336 Func->liveness(Liveness_Basic);

337 if (Func->hasError())	337 if (Func->hasError())

338 return;	338 return;

339 Func->dump("After x86 address mode opt");	339 Func->dump("After x86 address mode opt");

340	340

341 // Disable constant blinding or pooling for load optimization.	341 // Disable constant blinding or pooling for load optimization.

342 {	342 {

343 BoolFlagSaver B(RandomizationPoolingPaused, true);	343 BoolFlagSaver B(RandomizationPoolingPaused, true);

344 doLoadOpt();	344 doLoadOpt();

345 }	345 }

346 Func->genCode();	346 Func->genCode();

347 if (Func->hasError())	347 if (Func->hasError())

348 return;	348 return;

349 Func->dump("After x86 codegen");	349 Func->dump("After x86 codegen");

350	350

351 // Register allocation. This requires instruction renumbering and full	351 // Register allocation. This requires instruction renumbering and full

352 // liveness analysis. Loops must be identified before liveness so variable	352 // liveness analysis. Loops must be identified before liveness so variable

353 // use weights are correct.	353 // use weights are correct.

354 Func->renumberInstructions();	354 Func->renumberInstructions();

355 if (Func->hasError())	355 if (Func->hasError())

356 return;	356 return;

357 Func->liveness(Liveness_Intervals);	357 Func->liveness(Liveness_Intervals);

358 if (Func->hasError())	358 if (Func->hasError())

359 return;	359 return;

360 // Validate the live range computations. The expensive validation call is	360 // Validate the live range computations. The expensive validation call is

361 // deliberately only made when assertions are enabled.	361 // deliberately only made when assertions are enabled.

362 assert(Func->validateLiveness());	362 assert(Func->validateLiveness());

363 // The post-codegen dump is done here, after liveness analysis and associated	363 // The post-codegen dump is done here, after liveness analysis and associated

364 // cleanup, to make the dump cleaner and more useful.	364 // cleanup, to make the dump cleaner and more useful.

365 Func->dump("After initial x8632 codegen");	365 Func->dump("After initial x8632 codegen");

366 Func->getVMetadata()->init(VMK_All);	366 Func->getVMetadata()->init(VMK_All);

367 regAlloc(RAK_Global);	367 regAlloc(RAK_Global);

368 if (Func->hasError())	368 if (Func->hasError())

369 return;	369 return;

370 Func->dump("After linear scan regalloc");	370 Func->dump("After linear scan regalloc");

371	371

372 if (Ctx->getFlags().getPhiEdgeSplit()) {	372 if (Ctx->getFlags().getPhiEdgeSplit()) {

373 Func->advancedPhiLowering();	373 Func->advancedPhiLowering();

374 Func->dump("After advanced Phi lowering");	374 Func->dump("After advanced Phi lowering");

375 }	375 }

376	376

377 // Stack frame mapping.	377 // Stack frame mapping.

378 Func->genFrame();	378 Func->genFrame();

379 if (Func->hasError())	379 if (Func->hasError())

380 return;	380 return;

381 Func->dump("After stack frame mapping");	381 Func->dump("After stack frame mapping");

382	382

383 Func->contractEmptyNodes();	383 Func->contractEmptyNodes();

384 Func->reorderNodes();	384 Func->reorderNodes();

385	385

386 // Shuffle basic block order if -reorder-basic-blocks is enabled.	386 // Shuffle basic block order if -reorder-basic-blocks is enabled.

387 Func->shuffleNodes();	387 Func->shuffleNodes();

388	388

389 // Branch optimization. This needs to be done just before code emission. In	389 // Branch optimization. This needs to be done just before code emission. In

390 // particular, no transformations that insert or reorder CfgNodes should be	390 // particular, no transformations that insert or reorder CfgNodes should be

391 // done after branch optimization. We go ahead and do it before nop insertion	391 // done after branch optimization. We go ahead and do it before nop insertion

392 // to reduce the amount of work needed for searching for opportunities.	392 // to reduce the amount of work needed for searching for opportunities.

393 Func->doBranchOpt();	393 Func->doBranchOpt();

394 Func->dump("After branch optimization");	394 Func->dump("After branch optimization");

395	395

396 // Nop insertion if -nop-insertion is enabled.	396 // Nop insertion if -nop-insertion is enabled.

397 Func->doNopInsertion();	397 Func->doNopInsertion();

398	398

399 // Mark nodes that require sandbox alignment	399 // Mark nodes that require sandbox alignment

400 if (Ctx->getFlags().getUseSandboxing())	400 if (Ctx->getFlags().getUseSandboxing())

401 Func->markNodesForSandboxing();	401 Func->markNodesForSandboxing();

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
488 }	488 }

489 return false;	489 return false;

490 }	490 }

491	491

492 template <class Machine> void TargetX86Base<Machine>::findRMW() {	492 template <class Machine> void TargetX86Base<Machine>::findRMW() {

493 Func->dump("Before RMW");	493 Func->dump("Before RMW");

494 OstreamLocker L(Func->getContext());	494 OstreamLocker L(Func->getContext());

495 Ostream &Str = Func->getContext()->getStrDump();	495 Ostream &Str = Func->getContext()->getStrDump();

496 for (CfgNode *Node : Func->getNodes()) {	496 for (CfgNode *Node : Func->getNodes()) {

497 // Walk through the instructions, considering each sequence of 3	497 // Walk through the instructions, considering each sequence of 3

498 // instructions, and look for the particular RMW pattern. Note that this	498 // instructions, and look for the particular RMW pattern. Note that this

499 // search can be "broken" (false negatives) if there are intervening deleted	499 // search can be "broken" (false negatives) if there are intervening

500 // instructions, or intervening instructions that could be safely moved out	500 // deleted instructions, or intervening instructions that could be safely

501 // of the way to reveal an RMW pattern.	501 // moved out of the way to reveal an RMW pattern.

502 auto E = Node->getInsts().end();	502 auto E = Node->getInsts().end();

503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();	503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();

504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {	504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {

505 // Make I3 skip over deleted instructions.	505 // Make I3 skip over deleted instructions.

506 while (I3 != E && I3->isDeleted())	506 while (I3 != E && I3->isDeleted())

507 ++I3;	507 ++I3;

508 if (I1 == E \|\| I2 == E \|\| I3 == E)	508 if (I1 == E \|\| I2 == E \|\| I3 == E)

509 continue;	509 continue;

510 assert(!I1->isDeleted());	510 assert(!I1->isDeleted());

511 assert(!I2->isDeleted());	511 assert(!I2->isDeleted());

512 assert(!I3->isDeleted());	512 assert(!I3->isDeleted());

513 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {	513 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {

514 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {	514 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {

515 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {	515 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {

516 // Look for:	516 // Look for:

517 // a = Load addr	517 // a = Load addr

518 // b = <op> a, other	518 // b = <op> a, other

519 // Store b, addr	519 // Store b, addr

520 // Change to:	520 // Change to:

521 // a = Load addr	521 // a = Load addr

522 // b = <op> a, other	522 // b = <op> a, other

523 // x = FakeDef	523 // x = FakeDef

524 // RMW <op>, addr, other, x	524 // RMW <op>, addr, other, x

525 // b = Store b, addr, x	525 // b = Store b, addr, x

526 // Note that inferTwoAddress() makes sure setDestNonKillable() gets	526 // Note that inferTwoAddress() makes sure setDestNonKillable() gets

527 // called on the updated Store instruction, to avoid liveness	527 // called on the updated Store instruction, to avoid liveness

528 // problems later.	528 // problems later.

529 //	529 //

530 // With this transformation, the Store instruction acquires a Dest	530 // With this transformation, the Store instruction acquires a Dest

531 // variable and is now subject to dead code elimination if there are	531 // variable and is now subject to dead code elimination if there

532 // no more uses of "b". Variable "x" is a beacon for determining	532 // are no more uses of "b". Variable "x" is a beacon for

533 // whether the Store instruction gets dead-code eliminated. If the	533 // determining whether the Store instruction gets dead-code

534 // Store instruction is eliminated, then it must be the case that	534 // eliminated. If the Store instruction is eliminated, then it

535 // the RMW instruction ends x's live range, and therefore the RMW	535 // must be the case that the RMW instruction ends x's live range,

536 // instruction will be retained and later lowered. On the other	536 // and therefore the RMW instruction will be retained and later

537 // hand, if the RMW instruction does not end x's live range, then	537 // lowered. On the other hand, if the RMW instruction does not end

538 // the Store instruction must still be present, and therefore the	538 // x's live range, then the Store instruction must still be

539 // RMW instruction is ignored during lowering because it is	539 // present, and therefore the RMW instruction is ignored during

540 // redundant with the Store instruction.	540 // lowering because it is redundant with the Store instruction.

541 //	541 //

542 // Note that if "a" has further uses, the RMW transformation may	542 // Note that if "a" has further uses, the RMW transformation may

543 // still trigger, resulting in two loads and one store, which is	543 // still trigger, resulting in two loads and one store, which is

544 // worse than the original one load and one store. However, this is	544 // worse than the original one load and one store. However, this

545 // probably rare, and caching probably keeps it just as fast.	545 // is probably rare, and caching probably keeps it just as fast.

546 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),	546 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),

547 Store->getAddr()))	547 Store->getAddr()))

548 continue;	548 continue;

549 Operand *ArithSrcFromLoad = Arith->getSrc(0);	549 Operand *ArithSrcFromLoad = Arith->getSrc(0);

550 Operand *ArithSrcOther = Arith->getSrc(1);	550 Operand *ArithSrcOther = Arith->getSrc(1);

551 if (ArithSrcFromLoad != Load->getDest()) {	551 if (ArithSrcFromLoad != Load->getDest()) {

552 if (!Arith->isCommutative() \|\| ArithSrcOther != Load->getDest())	552 if (!Arith->isCommutative() \|\| ArithSrcOther != Load->getDest())

553 continue;	553 continue;

554 std::swap(ArithSrcFromLoad, ArithSrcOther);	554 std::swap(ArithSrcFromLoad, ArithSrcOther);

555 }	555 }

(...skipping 26 matching lines...) Expand all Loading...
582 }	582 }

583	583

584 // Converts a ConstantInteger32 operand into its constant value, or	584 // Converts a ConstantInteger32 operand into its constant value, or

585 // MemoryOrderInvalid if the operand is not a ConstantInteger32.	585 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

586 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {	586 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {

587 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))	587 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

588 return Integer->getValue();	588 return Integer->getValue();

589 return Intrinsics::MemoryOrderInvalid;	589 return Intrinsics::MemoryOrderInvalid;

590 }	590 }

591	591

592 /// Determines whether the dest of a Load instruction can be folded	592 /// Determines whether the dest of a Load instruction can be folded into one of

593 /// into one of the src operands of a 2-operand instruction. This is	593 /// the src operands of a 2-operand instruction. This is true as long as the

594 /// true as long as the load dest matches exactly one of the binary	594 /// load dest matches exactly one of the binary instruction's src operands.

595 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if	595 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.

596 /// the answer is true.

597 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,	596 inline bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

598 Operand &Src0, Operand &Src1) {	597 Operand &Src0, Operand &Src1) {

599 if (Src0 == LoadDest && Src1 != LoadDest) {	598 if (Src0 == LoadDest && Src1 != LoadDest) {

600 Src0 = LoadSrc;	599 Src0 = LoadSrc;

601 return true;	600 return true;

602 }	601 }

603 if (Src0 != LoadDest && Src1 == LoadDest) {	602 if (Src0 != LoadDest && Src1 == LoadDest) {

604 Src1 = LoadSrc;	603 Src1 = LoadSrc;

605 return true;	604 return true;

606 }	605 }

607 return false;	606 return false;

608 }	607 }

609	608

610 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {	609 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

611 for (CfgNode *Node : Func->getNodes()) {	610 for (CfgNode *Node : Func->getNodes()) {

612 Context.init(Node);	611 Context.init(Node);

613 while (!Context.atEnd()) {	612 while (!Context.atEnd()) {

614 Variable *LoadDest = nullptr;	613 Variable *LoadDest = nullptr;

615 Operand *LoadSrc = nullptr;	614 Operand *LoadSrc = nullptr;

616 Inst *CurInst = Context.getCur();	615 Inst *CurInst = Context.getCur();

617 Inst *Next = Context.getNextInst();	616 Inst *Next = Context.getNextInst();

618 // Determine whether the current instruction is a Load	617 // Determine whether the current instruction is a Load instruction or

619 // instruction or equivalent.	618 // equivalent.

620 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {	619 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

621 // An InstLoad always qualifies.	620 // An InstLoad always qualifies.

622 LoadDest = Load->getDest();	621 LoadDest = Load->getDest();

623 const bool DoLegalize = false;	622 const bool DoLegalize = false;

624 LoadSrc = formMemoryOperand(Load->getSourceAddress(),	623 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

625 LoadDest->getType(), DoLegalize);	624 LoadDest->getType(), DoLegalize);

626 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {	625 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

627 // An AtomicLoad intrinsic qualifies as long as it has a valid	626 // An AtomicLoad intrinsic qualifies as long as it has a valid memory

628 // memory ordering, and can be implemented in a single	627 // ordering, and can be implemented in a single instruction (i.e., not

629 // instruction (i.e., not i64 on x86-32).	628 // i64 on x86-32).

630 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;	629 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

631 if (ID == Intrinsics::AtomicLoad &&	630 if (ID == Intrinsics::AtomicLoad &&

632 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&	631 (Traits::Is64Bit \|\| Intrin->getDest()->getType() != IceType_i64) &&

633 Intrinsics::isMemoryOrderValid(	632 Intrinsics::isMemoryOrderValid(

634 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {	633 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

635 LoadDest = Intrin->getDest();	634 LoadDest = Intrin->getDest();

636 const bool DoLegalize = false;	635 const bool DoLegalize = false;

637 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),	636 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

638 DoLegalize);	637 DoLegalize);

639 }	638 }

640 }	639 }

641 // A Load instruction can be folded into the following	640 // A Load instruction can be folded into the following instruction only

642 // instruction only if the following instruction ends the Load's	641 // if the following instruction ends the Load's Dest variable's live

643 // Dest variable's live range.	642 // range.

644 if (LoadDest && Next && Next->isLastUse(LoadDest)) {	643 if (LoadDest && Next && Next->isLastUse(LoadDest)) {

645 assert(LoadSrc);	644 assert(LoadSrc);

646 Inst *NewInst = nullptr;	645 Inst *NewInst = nullptr;

647 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {	646 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {

648 Operand *Src0 = Arith->getSrc(0);	647 Operand *Src0 = Arith->getSrc(0);

649 Operand *Src1 = Arith->getSrc(1);	648 Operand *Src1 = Arith->getSrc(1);

650 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {	649 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

651 NewInst = InstArithmetic::create(Func, Arith->getOp(),	650 NewInst = InstArithmetic::create(Func, Arith->getOp(),

652 Arith->getDest(), Src0, Src1);	651 Arith->getDest(), Src0, Src1);

653 }	652 }

(...skipping 12 matching lines...) Expand all Loading...
666 Fcmp->getDest(), Src0, Src1);	665 Fcmp->getDest(), Src0, Src1);

667 }	666 }

668 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {	667 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {

669 Operand *Src0 = Select->getTrueOperand();	668 Operand *Src0 = Select->getTrueOperand();

670 Operand *Src1 = Select->getFalseOperand();	669 Operand *Src1 = Select->getFalseOperand();

671 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {	670 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

672 NewInst = InstSelect::create(Func, Select->getDest(),	671 NewInst = InstSelect::create(Func, Select->getDest(),

673 Select->getCondition(), Src0, Src1);	672 Select->getCondition(), Src0, Src1);

674 }	673 }

675 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {	674 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {

676 // The load dest can always be folded into a Cast	675 // The load dest can always be folded into a Cast instruction.

677 // instruction.

678 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));	676 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));

679 if (Src0 == LoadDest) {	677 if (Src0 == LoadDest) {

680 NewInst = InstCast::create(Func, Cast->getCastKind(),	678 NewInst = InstCast::create(Func, Cast->getCastKind(),

681 Cast->getDest(), LoadSrc);	679 Cast->getDest(), LoadSrc);

682 }	680 }

683 }	681 }

684 if (NewInst) {	682 if (NewInst) {

685 CurInst->setDeleted();	683 CurInst->setDeleted();

686 Next->setDeleted();	684 Next->setDeleted();

687 Context.insert(NewInst);	685 Context.insert(NewInst);

688 // Update NewInst->LiveRangesEnded so that target lowering	686 // Update NewInst->LiveRangesEnded so that target lowering may

689 // may benefit. Also update NewInst->HasSideEffects.	687 // benefit. Also update NewInst->HasSideEffects.

690 NewInst->spliceLivenessInfo(Next, CurInst);	688 NewInst->spliceLivenessInfo(Next, CurInst);

691 }	689 }

692 }	690 }

693 Context.advanceCur();	691 Context.advanceCur();

694 Context.advanceNext();	692 Context.advanceNext();

695 }	693 }

696 }	694 }

697 Func->dump("After load optimization");	695 Func->dump("After load optimization");

698 }	696 }

699	697

(...skipping 14 matching lines...) Expand all Loading...
714 if (Ty == IceType_void)	712 if (Ty == IceType_void)

715 Ty = IceType_i32;	713 Ty = IceType_i32;

716 if (PhysicalRegisters[Ty].empty())	714 if (PhysicalRegisters[Ty].empty())

717 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);	715 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);

718 assert(RegNum < PhysicalRegisters[Ty].size());	716 assert(RegNum < PhysicalRegisters[Ty].size());

719 Variable *Reg = PhysicalRegisters[Ty][RegNum];	717 Variable *Reg = PhysicalRegisters[Ty][RegNum];

720 if (Reg == nullptr) {	718 if (Reg == nullptr) {

721 Reg = Func->makeVariable(Ty);	719 Reg = Func->makeVariable(Ty);

722 Reg->setRegNum(RegNum);	720 Reg->setRegNum(RegNum);

723 PhysicalRegisters[Ty][RegNum] = Reg;	721 PhysicalRegisters[Ty][RegNum] = Reg;

724 // Specially mark esp as an "argument" so that it is considered	722 // Specially mark esp as an "argument" so that it is considered live upon

725 // live upon function entry.	723 // function entry.

726 if (RegNum == Traits::RegisterSet::Reg_esp) {	724 if (RegNum == Traits::RegisterSet::Reg_esp) {

727 Func->addImplicitArg(Reg);	725 Func->addImplicitArg(Reg);

728 Reg->setIgnoreLiveness();	726 Reg->setIgnoreLiveness();

729 }	727 }

730 }	728 }

731 return Reg;	729 return Reg;

732 }	730 }

733	731

734 template <class Machine>	732 template <class Machine>

735 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {	733 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
775 BaseRegNum = getFrameOrStackReg();	773 BaseRegNum = getFrameOrStackReg();

776 if (!hasFramePointer())	774 if (!hasFramePointer())

777 Offset += getStackAdjustment();	775 Offset += getStackAdjustment();

778 }	776 }

779 return typename Traits::Address(	777 return typename Traits::Address(

780 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);	778 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);

781 }	779 }

782	780

783 /// Helper function for addProlog().	781 /// Helper function for addProlog().

784 ///	782 ///

785 /// This assumes Arg is an argument passed on the stack. This sets the	783 /// This assumes Arg is an argument passed on the stack. This sets the frame

786 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's	784 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

787 /// width. For an I64 arg that has been split into Lo and Hi components,	785 /// I64 arg that has been split into Lo and Hi components, it calls itself

788 /// it calls itself recursively on the components, taking care to handle	786 /// recursively on the components, taking care to handle Lo first because of the

789 /// Lo first because of the little-endian architecture. Lastly, this	787 /// little-endian architecture. Lastly, this function generates an instruction

790 /// function generates an instruction to copy Arg into its assigned	788 /// to copy Arg into its assigned register if applicable.

791 /// register if applicable.

792 template <class Machine>	789 template <class Machine>

793 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,	790 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

794 Variable *FramePtr,	791 Variable *FramePtr,

795 size_t BasicFrameOffset,	792 size_t BasicFrameOffset,

796 size_t &InArgsSizeBytes) {	793 size_t &InArgsSizeBytes) {

797 Variable *Lo = Arg->getLo();	794 Variable *Lo = Arg->getLo();

798 Variable *Hi = Arg->getHi();	795 Variable *Hi = Arg->getHi();

799 Type Ty = Arg->getType();	796 Type Ty = Arg->getType();

800 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {	797 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {

801 assert(Lo->getType() != IceType_i64); // don't want infinite recursion	798 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

(...skipping 10 matching lines...) Expand all Loading...
812 if (Arg->hasReg()) {	809 if (Arg->hasReg()) {

813 assert(Ty != IceType_i64 \|\| Traits::Is64Bit);	810 assert(Ty != IceType_i64 \|\| Traits::Is64Bit);

814 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(	811 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(

815 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));	812 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));

816 if (isVectorType(Arg->getType())) {	813 if (isVectorType(Arg->getType())) {

817 _movp(Arg, Mem);	814 _movp(Arg, Mem);

818 } else {	815 } else {

819 _mov(Arg, Mem);	816 _mov(Arg, Mem);

820 }	817 }

821 // This argument-copying instruction uses an explicit Traits::X86OperandMem	818 // This argument-copying instruction uses an explicit Traits::X86OperandMem

822 // operand instead of a Variable, so its fill-from-stack operation has to be	819 // operand instead of a Variable, so its fill-from-stack operation has to

823 // tracked separately for statistics.	820 // be tracked separately for statistics.

824 Ctx->statsUpdateFills();	821 Ctx->statsUpdateFills();

825 }	822 }

826 }	823 }

827	824

828 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {	825 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {

829 return Traits::WordType;	826 return Traits::WordType;

830 }	827 }

831	828

832 template <class Machine>	829 template <class Machine>

833 template <typename T>	830 template <typename T>

834 typename std::enable_if<!T::Is64Bit, void>::type	831 typename std::enable_if<!T::Is64Bit, void>::type

835 TargetX86Base<Machine>::split64(Variable *Var) {	832 TargetX86Base<Machine>::split64(Variable *Var) {

836 switch (Var->getType()) {	833 switch (Var->getType()) {

837 default:	834 default:

838 return;	835 return;

839 case IceType_i64:	836 case IceType_i64:

840 // TODO: Only consider F64 if we need to push each half when	837 // TODO: Only consider F64 if we need to push each half when passing as an

841 // passing as an argument to a function call. Note that each half	838 // argument to a function call. Note that each half is still typed as I32.

842 // is still typed as I32.

843 case IceType_f64:	839 case IceType_f64:

844 break;	840 break;

845 }	841 }

846 Variable *Lo = Var->getLo();	842 Variable *Lo = Var->getLo();

847 Variable *Hi = Var->getHi();	843 Variable *Hi = Var->getHi();

848 if (Lo) {	844 if (Lo) {

849 assert(Hi);	845 assert(Hi);

850 return;	846 return;

851 }	847 }

852 assert(Hi == nullptr);	848 assert(Hi == nullptr);

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
939 template <class Machine>	935 template <class Machine>

940 llvm::SmallBitVector	936 llvm::SmallBitVector

941 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,	937 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

942 RegSetMask Exclude) const {	938 RegSetMask Exclude) const {

943 return Traits::getRegisterSet(Include, Exclude);	939 return Traits::getRegisterSet(Include, Exclude);

944 }	940 }

945	941

946 template <class Machine>	942 template <class Machine>

947 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {	943 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

948 IsEbpBasedFrame = true;	944 IsEbpBasedFrame = true;

949 // Conservatively require the stack to be aligned. Some stack	945 // Conservatively require the stack to be aligned. Some stack adjustment

950 // adjustment operations implemented below assume that the stack is	946 // operations implemented below assume that the stack is aligned before the

951 // aligned before the alloca. All the alloca code ensures that the	947 // alloca. All the alloca code ensures that the stack alignment is preserved

952 // stack alignment is preserved after the alloca. The stack alignment	948 // after the alloca. The stack alignment restriction can be relaxed in some

953 // restriction can be relaxed in some cases.	949 // cases.

954 NeedsStackAlignment = true;	950 NeedsStackAlignment = true;

955	951

956 // TODO(stichnot): minimize the number of adjustments of esp, etc.	952 // TODO(stichnot): minimize the number of adjustments of esp, etc.

957 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);	953 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

958 Operand *TotalSize = legalize(Inst->getSizeInBytes());	954 Operand *TotalSize = legalize(Inst->getSizeInBytes());

959 Variable *Dest = Inst->getDest();	955 Variable *Dest = Inst->getDest();

960 uint32_t AlignmentParam = Inst->getAlignInBytes();	956 uint32_t AlignmentParam = Inst->getAlignInBytes();

961 // For default align=0, set it to the real value 1, to avoid any	957 // For default align=0, set it to the real value 1, to avoid any

962 // bit-manipulation problems below.	958 // bit-manipulation problems below.

963 AlignmentParam = std::max(AlignmentParam, 1u);	959 AlignmentParam = std::max(AlignmentParam, 1u);

964	960

965 // LLVM enforces power of 2 alignment.	961 // LLVM enforces power of 2 alignment.

966 assert(llvm::isPowerOf2_32(AlignmentParam));	962 assert(llvm::isPowerOf2_32(AlignmentParam));

967 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));	963 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

968	964

969 uint32_t Alignment =	965 uint32_t Alignment =

970 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);	966 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

971 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {	967 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {

972 _and(esp, Ctx->getConstantInt32(-Alignment));	968 _and(esp, Ctx->getConstantInt32(-Alignment));

973 }	969 }

974 if (const auto *ConstantTotalSize =	970 if (const auto *ConstantTotalSize =

975 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	971 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

976 uint32_t Value = ConstantTotalSize->getValue();	972 uint32_t Value = ConstantTotalSize->getValue();

977 Value = Utils::applyAlignment(Value, Alignment);	973 Value = Utils::applyAlignment(Value, Alignment);

978 _sub(esp, Ctx->getConstantInt32(Value));	974 _sub(esp, Ctx->getConstantInt32(Value));

979 } else {	975 } else {

980 // Non-constant sizes need to be adjusted to the next highest	976 // Non-constant sizes need to be adjusted to the next highest multiple of

981 // multiple of the required alignment at runtime.	977 // the required alignment at runtime.

982 Variable *T = makeReg(IceType_i32);	978 Variable *T = makeReg(IceType_i32);

983 _mov(T, TotalSize);	979 _mov(T, TotalSize);

984 _add(T, Ctx->getConstantInt32(Alignment - 1));	980 _add(T, Ctx->getConstantInt32(Alignment - 1));

985 _and(T, Ctx->getConstantInt32(-Alignment));	981 _and(T, Ctx->getConstantInt32(-Alignment));

986 _sub(esp, T);	982 _sub(esp, T);

987 }	983 }

988 _mov(Dest, esp);	984 _mov(Dest, esp);

989 }	985 }

990	986

991 /// Strength-reduce scalar integer multiplication by a constant (for	987 /// Strength-reduce scalar integer multiplication by a constant (for i32 or

992 /// i32 or narrower) for certain constants. The lea instruction can be	988 /// narrower) for certain constants. The lea instruction can be used to multiply

993 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used	989 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of

994 /// to multiply by powers of 2. These can be combined such that	990 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2

995 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,	991 /// lea-based multiplies by 5, combined with left-shifting by 2.

996 /// combined with left-shifting by 2.

997 template <class Machine>	992 template <class Machine>

998 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,	993 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,

999 int32_t Src1) {	994 int32_t Src1) {

1000 // Disable this optimization for Om1 and O0, just to keep things	995 // Disable this optimization for Om1 and O0, just to keep things simple

1001 // simple there.	996 // there.

1002 if (Ctx->getFlags().getOptLevel() < Opt_1)	997 if (Ctx->getFlags().getOptLevel() < Opt_1)

1003 return false;	998 return false;

1004 Type Ty = Dest->getType();	999 Type Ty = Dest->getType();

1005 Variable *T = nullptr;	1000 Variable *T = nullptr;

1006 if (Src1 == -1) {	1001 if (Src1 == -1) {

1007 _mov(T, Src0);	1002 _mov(T, Src0);

1008 _neg(T);	1003 _neg(T);

1009 _mov(Dest, T);	1004 _mov(Dest, T);

1010 return true;	1005 return true;

1011 }	1006 }

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1047 ++CountOps;	1042 ++CountOps;

1048 ++Count2;	1043 ++Count2;

1049 Src1 /= 2;	1044 Src1 /= 2;

1050 } else {	1045 } else {

1051 return false;	1046 return false;

1052 }	1047 }

1053 }	1048 }

1054 // Lea optimization only works for i16 and i32 types, not i8.	1049 // Lea optimization only works for i16 and i32 types, not i8.

1055 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))	1050 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

1056 return false;	1051 return false;

1057 // Limit the number of lea/shl operations for a single multiply, to	1052 // Limit the number of lea/shl operations for a single multiply, to a

1058 // a somewhat arbitrary choice of 3.	1053 // somewhat arbitrary choice of 3.

1059 const uint32_t MaxOpsForOptimizedMul = 3;	1054 const uint32_t MaxOpsForOptimizedMul = 3;

1060 if (CountOps > MaxOpsForOptimizedMul)	1055 if (CountOps > MaxOpsForOptimizedMul)

1061 return false;	1056 return false;

1062 _mov(T, Src0);	1057 _mov(T, Src0);

1063 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1058 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1064 for (uint32_t i = 0; i < Count9; ++i) {	1059 for (uint32_t i = 0; i < Count9; ++i) {

1065 const uint16_t Shift = 3; // log2(9-1)	1060 const uint16_t Shift = 3; // log2(9-1)

1066 _lea(T,	1061 _lea(T,

1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));	1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));

1068 _set_dest_nonkillable();	1063 _set_dest_nonkillable();

(...skipping 25 matching lines...) Expand all Loading...
1094 Operand *Src0 = legalize(Inst->getSrc(0));	1089 Operand *Src0 = legalize(Inst->getSrc(0));

1095 Operand *Src1 = legalize(Inst->getSrc(1));	1090 Operand *Src1 = legalize(Inst->getSrc(1));

1096 if (Inst->isCommutative()) {	1091 if (Inst->isCommutative()) {

1097 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))	1092 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1098 std::swap(Src0, Src1);	1093 std::swap(Src0, Src1);

1099 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))	1094 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

1100 std::swap(Src0, Src1);	1095 std::swap(Src0, Src1);

1101 }	1096 }

1102 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	1097 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

1103 // These x86-32 helper-call-involved instructions are lowered in this	1098 // These x86-32 helper-call-involved instructions are lowered in this

1104 // separate switch. This is because loOperand() and hiOperand()	1099 // separate switch. This is because loOperand() and hiOperand() may insert

1105 // may insert redundant instructions for constant blinding and	1100 // redundant instructions for constant blinding and pooling. Such redundant

1106 // pooling. Such redundant instructions will fail liveness analysis	1101 // instructions will fail liveness analysis under -Om1 setting. And,

1107 // under -Om1 setting. And, actually these arguments do not need	1102 // actually these arguments do not need to be processed with loOperand()

1108 // to be processed with loOperand() and hiOperand() to be used.	1103 // and hiOperand() to be used.

1109 switch (Inst->getOp()) {	1104 switch (Inst->getOp()) {

1110 case InstArithmetic::Udiv: {	1105 case InstArithmetic::Udiv: {

1111 const SizeT MaxSrcs = 2;	1106 const SizeT MaxSrcs = 2;

1112 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);	1107 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

1113 Call->addArg(Inst->getSrc(0));	1108 Call->addArg(Inst->getSrc(0));

1114 Call->addArg(Inst->getSrc(1));	1109 Call->addArg(Inst->getSrc(1));

1115 lowerCall(Call);	1110 lowerCall(Call);

1116 return;	1111 return;

1117 }	1112 }

1118 case InstArithmetic::Sdiv: {	1113 case InstArithmetic::Sdiv: {

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1209 // t4.hi += t2	1204 // t4.hi += t2

1210 // a.hi = t4.hi	1205 // a.hi = t4.hi

1211 // The mul instruction cannot take an immediate operand.	1206 // The mul instruction cannot take an immediate operand.

1212 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Mem);	1207 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Mem);

1213 _mov(T_1, Src0Hi);	1208 _mov(T_1, Src0Hi);

1214 _imul(T_1, Src1Lo);	1209 _imul(T_1, Src1Lo);

1215 _mov(T_2, Src1Hi);	1210 _mov(T_2, Src1Hi);

1216 _imul(T_2, Src0Lo);	1211 _imul(T_2, Src0Lo);

1217 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);	1212 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);

1218 _mul(T_4Lo, T_3, Src1Lo);	1213 _mul(T_4Lo, T_3, Src1Lo);

1219 // The mul instruction produces two dest variables, edx:eax. We	1214 // The mul instruction produces two dest variables, edx:eax. We create a

1220 // create a fake definition of edx to account for this.	1215 // fake definition of edx to account for this.

1221 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));	1216 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

1222 _mov(DestLo, T_4Lo);	1217 _mov(DestLo, T_4Lo);

1223 _add(T_4Hi, T_1);	1218 _add(T_4Hi, T_1);

1224 _add(T_4Hi, T_2);	1219 _add(T_4Hi, T_2);

1225 _mov(DestHi, T_4Hi);	1220 _mov(DestHi, T_4Hi);

1226 } break;	1221 } break;

1227 case InstArithmetic::Shl: {	1222 case InstArithmetic::Shl: {

1228 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.	1223 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

1229 // gcc does the following:	1224 // gcc does the following:

1230 // a=b<<c ==>	1225 // a=b<<c ==>

(...skipping 15 matching lines...) Expand all Loading...
1246 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1241 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1247 typename Traits::Insts::Label *Label =	1242 typename Traits::Insts::Label *Label =

1248 Traits::Insts::Label::create(Func, this);	1243 Traits::Insts::Label::create(Func, this);

1249 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);	1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);

1250 _mov(T_2, Src0Lo);	1245 _mov(T_2, Src0Lo);

1251 _mov(T_3, Src0Hi);	1246 _mov(T_3, Src0Hi);

1252 _shld(T_3, T_2, T_1);	1247 _shld(T_3, T_2, T_1);

1253 _shl(T_2, T_1);	1248 _shl(T_2, T_1);

1254 _test(T_1, BitTest);	1249 _test(T_1, BitTest);

1255 _br(Traits::Cond::Br_e, Label);	1250 _br(Traits::Cond::Br_e, Label);

1256 // T_2 and T_3 are being assigned again because of the	1251 // T_2 and T_3 are being assigned again because of the intra-block

1257 // intra-block control flow, so we need the _mov_nonkillable	1252 // control flow, so we need the _mov_nonkillable variant to avoid

1258 // variant to avoid liveness problems.	1253 // liveness problems.

1259 _mov_nonkillable(T_3, T_2);	1254 _mov_nonkillable(T_3, T_2);

1260 _mov_nonkillable(T_2, Zero);	1255 _mov_nonkillable(T_2, Zero);

1261 Context.insert(Label);	1256 Context.insert(Label);

1262 _mov(DestLo, T_2);	1257 _mov(DestLo, T_2);

1263 _mov(DestHi, T_3);	1258 _mov(DestHi, T_3);

1264 } break;	1259 } break;

1265 case InstArithmetic::Lshr: {	1260 case InstArithmetic::Lshr: {

1266 // a=b>>c (unsigned) ==>	1261 // a=b>>c (unsigned) ==>

1267 // t1:ecx = c.lo & 0xff	1262 // t1:ecx = c.lo & 0xff

1268 // t2 = b.lo	1263 // t2 = b.lo

(...skipping 13 matching lines...) Expand all Loading...
1282 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1277 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1283 typename Traits::Insts::Label *Label =	1278 typename Traits::Insts::Label *Label =

1284 Traits::Insts::Label::create(Func, this);	1279 Traits::Insts::Label::create(Func, this);

1285 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);	1280 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);

1286 _mov(T_2, Src0Lo);	1281 _mov(T_2, Src0Lo);

1287 _mov(T_3, Src0Hi);	1282 _mov(T_3, Src0Hi);

1288 _shrd(T_2, T_3, T_1);	1283 _shrd(T_2, T_3, T_1);

1289 _shr(T_3, T_1);	1284 _shr(T_3, T_1);

1290 _test(T_1, BitTest);	1285 _test(T_1, BitTest);

1291 _br(Traits::Cond::Br_e, Label);	1286 _br(Traits::Cond::Br_e, Label);

1292 // T_2 and T_3 are being assigned again because of the	1287 // T_2 and T_3 are being assigned again because of the intra-block

1293 // intra-block control flow, so we need the _mov_nonkillable	1288 // control flow, so we need the _mov_nonkillable variant to avoid

1294 // variant to avoid liveness problems.	1289 // liveness problems.

1295 _mov_nonkillable(T_2, T_3);	1290 _mov_nonkillable(T_2, T_3);

1296 _mov_nonkillable(T_3, Zero);	1291 _mov_nonkillable(T_3, Zero);

1297 Context.insert(Label);	1292 Context.insert(Label);

1298 _mov(DestLo, T_2);	1293 _mov(DestLo, T_2);

1299 _mov(DestHi, T_3);	1294 _mov(DestHi, T_3);

1300 } break;	1295 } break;

1301 case InstArithmetic::Ashr: {	1296 case InstArithmetic::Ashr: {

1302 // a=b>>c (signed) ==>	1297 // a=b>>c (signed) ==>

1303 // t1:ecx = c.lo & 0xff	1298 // t1:ecx = c.lo & 0xff

1304 // t2 = b.lo	1299 // t2 = b.lo

(...skipping 13 matching lines...) Expand all Loading...
1318 Constant *SignExtend = Ctx->getConstantInt32(0x1f);	1313 Constant *SignExtend = Ctx->getConstantInt32(0x1f);

1319 typename Traits::Insts::Label *Label =	1314 typename Traits::Insts::Label *Label =

1320 Traits::Insts::Label::create(Func, this);	1315 Traits::Insts::Label::create(Func, this);

1321 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);	1316 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);

1322 _mov(T_2, Src0Lo);	1317 _mov(T_2, Src0Lo);

1323 _mov(T_3, Src0Hi);	1318 _mov(T_3, Src0Hi);

1324 _shrd(T_2, T_3, T_1);	1319 _shrd(T_2, T_3, T_1);

1325 _sar(T_3, T_1);	1320 _sar(T_3, T_1);

1326 _test(T_1, BitTest);	1321 _test(T_1, BitTest);

1327 _br(Traits::Cond::Br_e, Label);	1322 _br(Traits::Cond::Br_e, Label);

1328 // T_2 and T_3 are being assigned again because of the	1323 // T_2 and T_3 are being assigned again because of the intra-block

1329 // intra-block control flow, so T_2 needs the _mov_nonkillable	1324 // control flow, so T_2 needs the _mov_nonkillable variant to avoid

1330 // variant to avoid liveness problems. T_3 doesn't need special	1325 // liveness problems. T_3 doesn't need special treatment because it is

1331 // treatment because it is reassigned via _sar instead of _mov.	1326 // reassigned via _sar instead of _mov.

1332 _mov_nonkillable(T_2, T_3);	1327 _mov_nonkillable(T_2, T_3);

1333 _sar(T_3, SignExtend);	1328 _sar(T_3, SignExtend);

1334 Context.insert(Label);	1329 Context.insert(Label);

1335 _mov(DestLo, T_2);	1330 _mov(DestLo, T_2);

1336 _mov(DestHi, T_3);	1331 _mov(DestHi, T_3);

1337 } break;	1332 } break;

1338 case InstArithmetic::Fadd:	1333 case InstArithmetic::Fadd:

1339 case InstArithmetic::Fsub:	1334 case InstArithmetic::Fsub:

1340 case InstArithmetic::Fmul:	1335 case InstArithmetic::Fmul:

1341 case InstArithmetic::Fdiv:	1336 case InstArithmetic::Fdiv:

1342 case InstArithmetic::Frem:	1337 case InstArithmetic::Frem:

1343 llvm_unreachable("FP instruction with i64 type");	1338 llvm_unreachable("FP instruction with i64 type");

1344 break;	1339 break;

1345 case InstArithmetic::Udiv:	1340 case InstArithmetic::Udiv:

1346 case InstArithmetic::Sdiv:	1341 case InstArithmetic::Sdiv:

1347 case InstArithmetic::Urem:	1342 case InstArithmetic::Urem:

1348 case InstArithmetic::Srem:	1343 case InstArithmetic::Srem:

1349 llvm_unreachable("Call-helper-involved instruction for i64 type \	1344 llvm_unreachable("Call-helper-involved instruction for i64 type \

1350 should have already been handled before");	1345 should have already been handled before");

1351 break;	1346 break;

1352 }	1347 }

1353 return;	1348 return;

1354 }	1349 }

1355 if (isVectorType(Dest->getType())) {	1350 if (isVectorType(Dest->getType())) {

1356 // TODO: Trap on integer divide and integer modulo by zero.	1351 // TODO: Trap on integer divide and integer modulo by zero. See:

1357 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899	1352 // https://code.google.com/p/nativeclient/issues/detail?id=3899

1358 if (llvm::isa<typename Traits::X86OperandMem>(Src1))	1353 if (llvm::isa<typename Traits::X86OperandMem>(Src1))

1359 Src1 = legalizeToReg(Src1);	1354 Src1 = legalizeToReg(Src1);

1360 switch (Inst->getOp()) {	1355 switch (Inst->getOp()) {

1361 case InstArithmetic::_num:	1356 case InstArithmetic::_num:

1362 llvm_unreachable("Unknown arithmetic operator");	1357 llvm_unreachable("Unknown arithmetic operator");

1363 break;	1358 break;

1364 case InstArithmetic::Add: {	1359 case InstArithmetic::Add: {

1365 Variable *T = makeReg(Dest->getType());	1360 Variable *T = makeReg(Dest->getType());

1366 _movp(T, Src0);	1361 _movp(T, Src0);

1367 _padd(T, Src1);	1362 _padd(T, Src1);

(...skipping 144 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1512 case InstArithmetic::Sub:	1507 case InstArithmetic::Sub:

1513 _mov(T, Src0);	1508 _mov(T, Src0);

1514 _sub(T, Src1);	1509 _sub(T, Src1);

1515 _mov(Dest, T);	1510 _mov(Dest, T);

1516 break;	1511 break;

1517 case InstArithmetic::Mul:	1512 case InstArithmetic::Mul:

1518 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1513 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1519 if (optimizeScalarMul(Dest, Src0, C->getValue()))	1514 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1520 return;	1515 return;

1521 }	1516 }

1522 // The 8-bit version of imul only allows the form "imul r/m8"	1517 // The 8-bit version of imul only allows the form "imul r/m8" where T must

1523 // where T must be in eax.	1518 // be in eax.

1524 if (isByteSizedArithType(Dest->getType())) {	1519 if (isByteSizedArithType(Dest->getType())) {

1525 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1520 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1526 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1521 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1527 } else {	1522 } else {

1528 _mov(T, Src0);	1523 _mov(T, Src0);

1529 }	1524 }

1530 _imul(T, Src1);	1525 _imul(T, Src1);

1531 _mov(Dest, T);	1526 _mov(Dest, T);

1532 break;	1527 break;

1533 case InstArithmetic::Shl:	1528 case InstArithmetic::Shl:

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1573 Context.insert(InstFakeUse::create(Func, T_eax));	1568 Context.insert(InstFakeUse::create(Func, T_eax));

1574 } else {	1569 } else {

1575 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1570 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1576 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1571 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1577 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);	1572 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);

1578 _div(T, Src1, T_edx);	1573 _div(T, Src1, T_edx);

1579 _mov(Dest, T);	1574 _mov(Dest, T);

1580 }	1575 }

1581 break;	1576 break;

1582 case InstArithmetic::Sdiv:	1577 case InstArithmetic::Sdiv:

1583 // TODO(stichnot): Enable this after doing better performance	1578 // TODO(stichnot): Enable this after doing better performance and cross

1584 // and cross testing.	1579 // testing.

1585 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1580 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1586 // Optimize division by constant power of 2, but not for Om1	1581 // Optimize division by constant power of 2, but not for Om1 or O0, just

1587 // or O0, just to keep things simple there.	1582 // to keep things simple there.

1588 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1583 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1589 int32_t Divisor = C->getValue();	1584 int32_t Divisor = C->getValue();

1590 uint32_t UDivisor = static_cast<uint32_t>(Divisor);	1585 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

1591 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	1586 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

1592 uint32_t LogDiv = llvm::Log2_32(UDivisor);	1587 uint32_t LogDiv = llvm::Log2_32(UDivisor);

1593 Type Ty = Dest->getType();	1588 Type Ty = Dest->getType();

1594 // LLVM does the following for dest=src/(1<<log):	1589 // LLVM does the following for dest=src/(1<<log):

1595 // t=src	1590 // t=src

1596 // sar t,typewidth-1 // -1 if src is negative, 0 if not	1591 // sar t,typewidth-1 // -1 if src is negative, 0 if not

1597 // shr t,typewidth-log	1592 // shr t,typewidth-log

1598 // add t,src	1593 // add t,src

1599 // sar t,log	1594 // sar t,log

1600 // dest=t	1595 // dest=t

1601 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);	1596 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

1602 _mov(T, Src0);	1597 _mov(T, Src0);

1603 // If for some reason we are dividing by 1, just treat it	1598 // If for some reason we are dividing by 1, just treat it like an

1604 // like an assignment.	1599 // assignment.

1605 if (LogDiv > 0) {	1600 if (LogDiv > 0) {

1606 // The initial sar is unnecessary when dividing by 2.	1601 // The initial sar is unnecessary when dividing by 2.

1607 if (LogDiv > 1)	1602 if (LogDiv > 1)

1608 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));	1603 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

1609 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	1604 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

1610 _add(T, Src0);	1605 _add(T, Src0);

1611 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1606 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

1612 }	1607 }

1613 _mov(Dest, T);	1608 _mov(Dest, T);

1614 return;	1609 return;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1649 } else {	1644 } else {

1650 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1645 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1651 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);	1646 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);

1652 _mov(T_edx, Zero);	1647 _mov(T_edx, Zero);

1653 _mov(T, Src0, Traits::RegisterSet::Reg_eax);	1648 _mov(T, Src0, Traits::RegisterSet::Reg_eax);

1654 _div(T_edx, Src1, T);	1649 _div(T_edx, Src1, T);

1655 _mov(Dest, T_edx);	1650 _mov(Dest, T_edx);

1656 }	1651 }

1657 break;	1652 break;

1658 case InstArithmetic::Srem:	1653 case InstArithmetic::Srem:

1659 // TODO(stichnot): Enable this after doing better performance	1654 // TODO(stichnot): Enable this after doing better performance and cross

1660 // and cross testing.	1655 // testing.

1661 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {	1656 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

1662 // Optimize mod by constant power of 2, but not for Om1 or O0,	1657 // Optimize mod by constant power of 2, but not for Om1 or O0, just to

1663 // just to keep things simple there.	1658 // keep things simple there.

1664 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {	1659 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1665 int32_t Divisor = C->getValue();	1660 int32_t Divisor = C->getValue();

1666 uint32_t UDivisor = static_cast<uint32_t>(Divisor);	1661 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

1667 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	1662 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

1668 uint32_t LogDiv = llvm::Log2_32(UDivisor);	1663 uint32_t LogDiv = llvm::Log2_32(UDivisor);

1669 Type Ty = Dest->getType();	1664 Type Ty = Dest->getType();

1670 // LLVM does the following for dest=src%(1<<log):	1665 // LLVM does the following for dest=src%(1<<log):

1671 // t=src	1666 // t=src

1672 // sar t,typewidth-1 // -1 if src is negative, 0 if not	1667 // sar t,typewidth-1 // -1 if src is negative, 0 if not

1673 // shr t,typewidth-log	1668 // shr t,typewidth-log

(...skipping 96 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1770 _mov(T_Hi, Src0Hi);	1765 _mov(T_Hi, Src0Hi);

1771 _mov(DestHi, T_Hi);	1766 _mov(DestHi, T_Hi);

1772 } else {	1767 } else {

1773 Operand *Src0Legal;	1768 Operand *Src0Legal;

1774 if (Dest->hasReg()) {	1769 if (Dest->hasReg()) {

1775 // If Dest already has a physical register, then only basic legalization	1770 // If Dest already has a physical register, then only basic legalization

1776 // is needed, as the source operand can be a register, immediate, or	1771 // is needed, as the source operand can be a register, immediate, or

1777 // memory.	1772 // memory.

1778 Src0Legal = legalize(Src0);	1773 Src0Legal = legalize(Src0);

1779 } else {	1774 } else {

1780 // If Dest could be a stack operand, then RI must be a physical	1775 // If Dest could be a stack operand, then RI must be a physical register

1781 // register or a scalar integer immediate.	1776 // or a scalar integer immediate.

1782 Src0Legal = legalize(Src0, Legal_Reg \| Legal_Imm);	1777 Src0Legal = legalize(Src0, Legal_Reg \| Legal_Imm);

1783 }	1778 }

1784 if (isVectorType(Dest->getType()))	1779 if (isVectorType(Dest->getType()))

1785 _movp(Dest, Src0Legal);	1780 _movp(Dest, Src0Legal);

1786 else	1781 else

1787 _mov(Dest, Src0Legal);	1782 _mov(Dest, Src0Legal);

1788 }	1783 }

1789 }	1784 }

1790	1785

1791 template <class Machine>	1786 template <class Machine>

1792 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {	1787 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {

1793 if (Inst->isUnconditional()) {	1788 if (Inst->isUnconditional()) {

1794 _br(Inst->getTargetUnconditional());	1789 _br(Inst->getTargetUnconditional());

1795 return;	1790 return;

1796 }	1791 }

1797 Operand *Cond = Inst->getCondition();	1792 Operand *Cond = Inst->getCondition();

1798	1793

1799 // Handle folding opportunities.	1794 // Handle folding opportunities.

1800 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {	1795 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {

1801 assert(Producer->isDeleted());	1796 assert(Producer->isDeleted());

1802 switch (BoolFolding::getProducerKind(Producer)) {	1797 switch (BoolFolding::getProducerKind(Producer)) {

1803 default:	1798 default:

1804 break;	1799 break;

1805 case BoolFolding::PK_Icmp32: {	1800 case BoolFolding::PK_Icmp32: {

1806 // TODO(stichnot): Refactor similarities between this block and	1801 // TODO(stichnot): Refactor similarities between this block and the

1807 // the corresponding code in lowerIcmp().	1802 // corresponding code in lowerIcmp().

1808 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);	1803 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

1809 Operand *Src0 = Producer->getSrc(0);	1804 Operand *Src0 = Producer->getSrc(0);

1810 Operand *Src1 = legalize(Producer->getSrc(1));	1805 Operand *Src1 = legalize(Producer->getSrc(1));

1811 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);	1806 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

1812 _cmp(Src0RM, Src1);	1807 _cmp(Src0RM, Src1);

1813 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),	1808 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),

1814 Inst->getTargetFalse());	1809 Inst->getTargetFalse());

1815 return;	1810 return;

1816 }	1811 }

1817 }	1812 }

(...skipping 10 matching lines...) Expand all Loading...
1828 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)	1823 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

1829 InstCast::OpKind CastKind = Inst->getCastKind();	1824 InstCast::OpKind CastKind = Inst->getCastKind();

1830 Variable *Dest = Inst->getDest();	1825 Variable *Dest = Inst->getDest();

1831 switch (CastKind) {	1826 switch (CastKind) {

1832 default:	1827 default:

1833 Func->setError("Cast type not supported");	1828 Func->setError("Cast type not supported");

1834 return;	1829 return;

1835 case InstCast::Sext: {	1830 case InstCast::Sext: {

1836 // Src0RM is the source operand legalized to physical register or memory,	1831 // Src0RM is the source operand legalized to physical register or memory,

1837 // but not immediate, since the relevant x86 native instructions don't	1832 // but not immediate, since the relevant x86 native instructions don't

1838 // allow an immediate operand. If the operand is an immediate, we could	1833 // allow an immediate operand. If the operand is an immediate, we could

1839 // consider computing the strength-reduced result at translation time,	1834 // consider computing the strength-reduced result at translation time, but

1840 // but we're unlikely to see something like that in the bitcode that	1835 // we're unlikely to see something like that in the bitcode that the

1841 // the optimizer wouldn't have already taken care of.	1836 // optimizer wouldn't have already taken care of.

1842 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	1837 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

1843 if (isVectorType(Dest->getType())) {	1838 if (isVectorType(Dest->getType())) {

1844 Type DestTy = Dest->getType();	1839 Type DestTy = Dest->getType();

1845 if (DestTy == IceType_v16i8) {	1840 if (DestTy == IceType_v16i8) {

1846 // onemask = materialize(1,1,...); dst = (src & onemask) > 0	1841 // onemask = materialize(1,1,...); dst = (src & onemask) > 0

1847 Variable *OneMask = makeVectorOfOnes(Dest->getType());	1842 Variable *OneMask = makeVectorOfOnes(Dest->getType());

1848 Variable *T = makeReg(DestTy);	1843 Variable *T = makeReg(DestTy);

1849 _movp(T, Src0RM);	1844 _movp(T, Src0RM);

1850 _pand(T, OneMask);	1845 _pand(T, OneMask);

1851 Variable *Zeros = makeVectorOfZeros(Dest->getType());	1846 Variable *Zeros = makeVectorOfZeros(Dest->getType());

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1891 // sar t1, dst_bitwidth - 1	1886 // sar t1, dst_bitwidth - 1

1892 // dst = t1	1887 // dst = t1

1893 size_t DestBits =	1888 size_t DestBits =

1894 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());	1889 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

1895 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);	1890 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

1896 Variable *T = makeReg(Dest->getType());	1891 Variable *T = makeReg(Dest->getType());

1897 if (typeWidthInBytes(Dest->getType()) <=	1892 if (typeWidthInBytes(Dest->getType()) <=

1898 typeWidthInBytes(Src0RM->getType())) {	1893 typeWidthInBytes(Src0RM->getType())) {

1899 _mov(T, Src0RM);	1894 _mov(T, Src0RM);

1900 } else {	1895 } else {

1901 // Widen the source using movsx or movzx. (It doesn't matter	1896 // Widen the source using movsx or movzx. (It doesn't matter which one,

1902 // which one, since the following shl/sar overwrite the bits.)	1897 // since the following shl/sar overwrite the bits.)

1903 _movzx(T, Src0RM);	1898 _movzx(T, Src0RM);

1904 }	1899 }

1905 _shl(T, ShiftAmount);	1900 _shl(T, ShiftAmount);

1906 _sar(T, ShiftAmount);	1901 _sar(T, ShiftAmount);

1907 _mov(Dest, T);	1902 _mov(Dest, T);

1908 } else {	1903 } else {

1909 // t1 = movsx src; dst = t1	1904 // t1 = movsx src; dst = t1

1910 Variable *T = makeReg(Dest->getType());	1905 Variable *T = makeReg(Dest->getType());

1911 _movsx(T, Src0RM);	1906 _movsx(T, Src0RM);

1912 _mov(Dest, T);	1907 _mov(Dest, T);

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2003 if (isVectorType(Dest->getType())) {	1998 if (isVectorType(Dest->getType())) {

2004 assert(Dest->getType() == IceType_v4i32 &&	1999 assert(Dest->getType() == IceType_v4i32 &&

2005 Inst->getSrc(0)->getType() == IceType_v4f32);	2000 Inst->getSrc(0)->getType() == IceType_v4f32);

2006 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2001 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2007 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))	2002 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))

2008 Src0RM = legalizeToReg(Src0RM);	2003 Src0RM = legalizeToReg(Src0RM);

2009 Variable *T = makeReg(Dest->getType());	2004 Variable *T = makeReg(Dest->getType());

2010 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);	2005 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

2011 _movp(Dest, T);	2006 _movp(Dest, T);

2012 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2007 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2013 // Use a helper for converting floating-point values to 64-bit	2008 // Use a helper for converting floating-point values to 64-bit integers.

2014 // integers. SSE2 appears to have no way to convert from xmm	2009 // SSE2 appears to have no way to convert from xmm registers to something

2015 // registers to something like the edx:eax register pair, and	2010 // like the edx:eax register pair, and gcc and clang both want to use x87

2016 // gcc and clang both want to use x87 instructions complete with	2011 // instructions complete with temporary manipulation of the status word.

2017 // temporary manipulation of the status word. This helper is	2012 // This helper is not needed for x86-64.

2018 // not needed for x86-64.

2019 split64(Dest);	2013 split64(Dest);

2020 const SizeT MaxSrcs = 1;	2014 const SizeT MaxSrcs = 1;

2021 Type SrcType = Inst->getSrc(0)->getType();	2015 Type SrcType = Inst->getSrc(0)->getType();

2022 InstCall *Call =	2016 InstCall *Call =

2023 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64	2017 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2024 : H_fptosi_f64_i64,	2018 : H_fptosi_f64_i64,

2025 Dest, MaxSrcs);	2019 Dest, MaxSrcs);

2026 Call->addArg(Inst->getSrc(0));	2020 Call->addArg(Inst->getSrc(0));

2027 lowerCall(Call);	2021 lowerCall(Call);

2028 } else {	2022 } else {

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2143 Operand *Src0 = Inst->getSrc(0);	2137 Operand *Src0 = Inst->getSrc(0);

2144 if (isVectorType(Src0->getType())) {	2138 if (isVectorType(Src0->getType())) {

2145 assert(Dest->getType() == IceType_v4f32 &&	2139 assert(Dest->getType() == IceType_v4f32 &&

2146 Src0->getType() == IceType_v4i32);	2140 Src0->getType() == IceType_v4i32);

2147 const SizeT MaxSrcs = 1;	2141 const SizeT MaxSrcs = 1;

2148 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);	2142 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2149 Call->addArg(Src0);	2143 Call->addArg(Src0);

2150 lowerCall(Call);	2144 lowerCall(Call);

2151 } else if (Src0->getType() == IceType_i64 \|\|	2145 } else if (Src0->getType() == IceType_i64 \|\|

2152 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {	2146 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

2153 // Use a helper for x86-32 and x86-64. Also use a helper for	2147 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on

2154 // i32 on x86-32.	2148 // x86-32.

2155 const SizeT MaxSrcs = 1;	2149 const SizeT MaxSrcs = 1;

2156 Type DestType = Dest->getType();	2150 Type DestType = Dest->getType();

2157 IceString TargetString;	2151 IceString TargetString;

2158 if (isInt32Asserting32Or64(Src0->getType())) {	2152 if (isInt32Asserting32Or64(Src0->getType())) {

2159 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32	2153 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32

2160 : H_uitofp_i32_f64;	2154 : H_uitofp_i32_f64;

2161 } else {	2155 } else {

2162 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32	2156 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32

2163 : H_uitofp_i64_f64;	2157 : H_uitofp_i64_f64;

2164 }	2158 }

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2278 _mov(DestLo, T_Lo);	2272 _mov(DestLo, T_Lo);

2279 _mov(T_Hi, SpillHi);	2273 _mov(T_Hi, SpillHi);

2280 _mov(DestHi, T_Hi);	2274 _mov(DestHi, T_Hi);

2281 }	2275 }

2282 } break;	2276 } break;

2283 case IceType_f64: {	2277 case IceType_f64: {

2284 assert(Src0->getType() == IceType_i64);	2278 assert(Src0->getType() == IceType_i64);

2285 if (Traits::Is64Bit) {	2279 if (Traits::Is64Bit) {

2286 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2280 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2287 Variable *T = makeReg(IceType_f64);	2281 Variable *T = makeReg(IceType_f64);

2288 // Movd requires its fp argument (in this case, the bitcast destination)	2282 // Movd requires its fp argument (in this case, the bitcast

2289 // to be an xmm register.	2283 // destination) to be an xmm register.

2290 T->setMustHaveReg();	2284 T->setMustHaveReg();

2291 _movd(T, Src0RM);	2285 _movd(T, Src0RM);

2292 _mov(Dest, T);	2286 _mov(Dest, T);

2293 } else {	2287 } else {

2294 Src0 = legalize(Src0);	2288 Src0 = legalize(Src0);

2295 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {	2289 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {

2296 Variable *T = Func->makeVariable(Dest->getType());	2290 Variable *T = Func->makeVariable(Dest->getType());

2297 _movq(T, Src0);	2291 _movq(T, Src0);

2298 _movq(Dest, T);	2292 _movq(Dest, T);

2299 break;	2293 break;

(...skipping 11 matching lines...) Expand all Loading...
2311 Variable *Spill = SpillVar;	2305 Variable *Spill = SpillVar;

2312 Spill->setMustNotHaveReg();	2306 Spill->setMustNotHaveReg();

2313	2307

2314 Variable T_Lo = nullptr, T_Hi = nullptr;	2308 Variable T_Lo = nullptr, T_Hi = nullptr;

2315 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(	2309 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(

2316 Func, Spill, Traits::VariableSplit::Low);	2310 Func, Spill, Traits::VariableSplit::Low);

2317 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(	2311 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(

2318 Func, Spill, Traits::VariableSplit::High);	2312 Func, Spill, Traits::VariableSplit::High);

2319 _mov(T_Lo, loOperand(Src0));	2313 _mov(T_Lo, loOperand(Src0));

2320 // Technically, the Spill is defined after the _store happens, but	2314 // Technically, the Spill is defined after the _store happens, but

2321 // SpillLo is considered a "use" of Spill so define Spill before it	2315 // SpillLo is considered a "use" of Spill so define Spill before it is

2322 // is used.	2316 // used.

2323 Context.insert(InstFakeDef::create(Func, Spill));	2317 Context.insert(InstFakeDef::create(Func, Spill));

2324 _store(T_Lo, SpillLo);	2318 _store(T_Lo, SpillLo);

2325 _mov(T_Hi, hiOperand(Src0));	2319 _mov(T_Hi, hiOperand(Src0));

2326 _store(T_Hi, SpillHi);	2320 _store(T_Hi, SpillHi);

2327 _movq(Dest, Spill);	2321 _movq(Dest, Spill);

2328 }	2322 }

2329 } break;	2323 } break;

2330 case IceType_v8i1: {	2324 case IceType_v8i1: {

2331 assert(Src0->getType() == IceType_i8);	2325 assert(Src0->getType() == IceType_i8);

2332 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);	2326 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2377 InstructionSet >= Traits::SSE4_1;	2371 InstructionSet >= Traits::SSE4_1;

2378 if (CanUsePextr && Ty != IceType_v4f32) {	2372 if (CanUsePextr && Ty != IceType_v4f32) {

2379 // Use pextrb, pextrw, or pextrd.	2373 // Use pextrb, pextrw, or pextrd.

2380 Constant *Mask = Ctx->getConstantInt32(Index);	2374 Constant *Mask = Ctx->getConstantInt32(Index);

2381 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);	2375 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);

2382 _pextr(ExtractedElementR, SourceVectR, Mask);	2376 _pextr(ExtractedElementR, SourceVectR, Mask);

2383 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2377 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2384 // Use pshufd and movd/movss.	2378 // Use pshufd and movd/movss.

2385 Variable *T = nullptr;	2379 Variable *T = nullptr;

2386 if (Index) {	2380 if (Index) {

2387 // The shuffle only needs to occur if the element to be extracted	2381 // The shuffle only needs to occur if the element to be extracted is not

2388 // is not at the lowest index.	2382 // at the lowest index.

2389 Constant *Mask = Ctx->getConstantInt32(Index);	2383 Constant *Mask = Ctx->getConstantInt32(Index);

2390 T = makeReg(Ty);	2384 T = makeReg(Ty);

2391 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);	2385 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2392 } else {	2386 } else {

2393 T = legalizeToReg(SourceVectNotLegalized);	2387 T = legalizeToReg(SourceVectNotLegalized);

2394 }	2388 }

2395	2389

2396 if (InVectorElementTy == IceType_i32) {	2390 if (InVectorElementTy == IceType_i32) {

2397 _movd(ExtractedElementR, T);	2391 _movd(ExtractedElementR, T);

2398 } else { // Ty == IceType_f32	2392 } else { // Ty == IceType_f32

2399 // TODO(wala): _movss is only used here because _mov does not	2393 // TODO(wala): _movss is only used here because _mov does not allow a

2400 // allow a vector source and a scalar destination. _mov should be	2394 // vector source and a scalar destination. _mov should be able to be

2401 // able to be used here.	2395 // used here.

2402 // _movss is a binary instruction, so the FakeDef is needed to	2396 // _movss is a binary instruction, so the FakeDef is needed to keep the

2403 // keep the live range analysis consistent.	2397 // live range analysis consistent.

2404 Context.insert(InstFakeDef::create(Func, ExtractedElementR));	2398 Context.insert(InstFakeDef::create(Func, ExtractedElementR));

2405 _movss(ExtractedElementR, T);	2399 _movss(ExtractedElementR, T);

2406 }	2400 }

2407 } else {	2401 } else {

2408 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2402 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2409 // Spill the value to a stack slot and do the extraction in memory.	2403 // Spill the value to a stack slot and do the extraction in memory.

2410 //	2404 //

2411 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2405 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support

2412 // support for legalizing to mem is implemented.	2406 // for legalizing to mem is implemented.

2413 Variable *Slot = Func->makeVariable(Ty);	2407 Variable *Slot = Func->makeVariable(Ty);

2414 Slot->setMustNotHaveReg();	2408 Slot->setMustNotHaveReg();

2415 _movp(Slot, legalizeToReg(SourceVectNotLegalized));	2409 _movp(Slot, legalizeToReg(SourceVectNotLegalized));

2416	2410

2417 // Compute the location of the element in memory.	2411 // Compute the location of the element in memory.

2418 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);	2412 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);

2419 typename Traits::X86OperandMem *Loc =	2413 typename Traits::X86OperandMem *Loc =

2420 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);	2414 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

2421 _mov(ExtractedElementR, Loc);	2415 _mov(ExtractedElementR, Loc);

2422 }	2416 }

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2582 Src0 = NewSrc0;	2576 Src0 = NewSrc0;

2583 Src1 = NewSrc1;	2577 Src1 = NewSrc1;

2584 Ty = NewTy;	2578 Ty = NewTy;

2585 }	2579 }

2586	2580

2587 InstIcmp::ICond Condition = Inst->getCondition();	2581 InstIcmp::ICond Condition = Inst->getCondition();

2588	2582

2589 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2583 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2590 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2584 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2591	2585

2592 // SSE2 only has signed comparison operations. Transform unsigned	2586 // SSE2 only has signed comparison operations. Transform unsigned inputs in

2593 // inputs in a manner that allows for the use of signed comparison	2587 // a manner that allows for the use of signed comparison operations by

2594 // operations by flipping the high order bits.	2588 // flipping the high order bits.

2595 if (Condition == InstIcmp::Ugt \|\| Condition == InstIcmp::Uge \|\|	2589 if (Condition == InstIcmp::Ugt \|\| Condition == InstIcmp::Uge \|\|

2596 Condition == InstIcmp::Ult \|\| Condition == InstIcmp::Ule) {	2590 Condition == InstIcmp::Ult \|\| Condition == InstIcmp::Ule) {

2597 Variable *T0 = makeReg(Ty);	2591 Variable *T0 = makeReg(Ty);

2598 Variable *T1 = makeReg(Ty);	2592 Variable *T1 = makeReg(Ty);

2599 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);	2593 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

2600 _movp(T0, Src0RM);	2594 _movp(T0, Src0RM);

2601 _pxor(T0, HighOrderBits);	2595 _pxor(T0, HighOrderBits);

2602 _movp(T1, Src1RM);	2596 _movp(T1, Src1RM);

2603 _pxor(T1, HighOrderBits);	2597 _pxor(T1, HighOrderBits);

2604 Src0RM = T0;	2598 Src0RM = T0;

(...skipping 114 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2719 // Only constant indices are allowed in PNaCl IR.	2713 // Only constant indices are allowed in PNaCl IR.

2720 assert(ElementIndex);	2714 assert(ElementIndex);

2721 unsigned Index = ElementIndex->getValue();	2715 unsigned Index = ElementIndex->getValue();

2722 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));	2716 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));

2723	2717

2724 Type Ty = SourceVectNotLegalized->getType();	2718 Type Ty = SourceVectNotLegalized->getType();

2725 Type ElementTy = typeElementType(Ty);	2719 Type ElementTy = typeElementType(Ty);

2726 Type InVectorElementTy = Traits::getInVectorElementType(Ty);	2720 Type InVectorElementTy = Traits::getInVectorElementType(Ty);

2727	2721

2728 if (ElementTy == IceType_i1) {	2722 if (ElementTy == IceType_i1) {

2729 // Expand the element to the appropriate size for it to be inserted	2723 // Expand the element to the appropriate size for it to be inserted in the

2730 // in the vector.	2724 // vector.

2731 Variable *Expanded = Func->makeVariable(InVectorElementTy);	2725 Variable *Expanded = Func->makeVariable(InVectorElementTy);

2732 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,	2726 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,

2733 ElementToInsertNotLegalized);	2727 ElementToInsertNotLegalized);

2734 lowerCast(Cast);	2728 lowerCast(Cast);

2735 ElementToInsertNotLegalized = Expanded;	2729 ElementToInsertNotLegalized = Expanded;

2736 }	2730 }

2737	2731

2738 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|	2732 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

2739 InstructionSet >= Traits::SSE4_1) {	2733 InstructionSet >= Traits::SSE4_1) {

2740 // Use insertps, pinsrb, pinsrw, or pinsrd.	2734 // Use insertps, pinsrb, pinsrw, or pinsrd.

(...skipping 25 matching lines...) Expand all Loading...
2766 }	2760 }

2767	2761

2768 if (Index == 0) {	2762 if (Index == 0) {

2769 Variable *T = makeReg(Ty);	2763 Variable *T = makeReg(Ty);

2770 _movp(T, SourceVectRM);	2764 _movp(T, SourceVectRM);

2771 _movss(T, ElementR);	2765 _movss(T, ElementR);

2772 _movp(Inst->getDest(), T);	2766 _movp(Inst->getDest(), T);

2773 return;	2767 return;

2774 }	2768 }

2775	2769

2776 // shufps treats the source and desination operands as vectors of	2770 // shufps treats the source and destination operands as vectors of four

2777 // four doublewords. The destination's two high doublewords are	2771 // doublewords. The destination's two high doublewords are selected from

2778 // selected from the source operand and the two low doublewords are	2772 // the source operand and the two low doublewords are selected from the

2779 // selected from the (original value of) the destination operand.	2773 // (original value of) the destination operand. An insertelement operation

2780 // An insertelement operation can be effected with a sequence of two	2774 // can be effected with a sequence of two shufps operations with

2781 // shufps operations with appropriate masks. In all cases below,	2775 // appropriate masks. In all cases below, Element[0] is being inserted into

2782 // Element[0] is being inserted into SourceVectOperand. Indices are	2776 // SourceVectOperand. Indices are ordered from left to right.

2783 // ordered from left to right.

2784 //	2777 //

2785 // insertelement into index 1 (result is stored in ElementR):	2778 // insertelement into index 1 (result is stored in ElementR):

2786 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]	2779 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]

2787 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]	2780 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]

2788 //	2781 //

2789 // insertelement into index 2 (result is stored in T):	2782 // insertelement into index 2 (result is stored in T):

2790 // T := SourceVectRM	2783 // T := SourceVectRM

2791 // ElementR := ElementR[0, 0] T[0, 3]	2784 // ElementR := ElementR[0, 0] T[0, 3]

2792 // T := T[0, 1] ElementR[0, 3]	2785 // T := T[0, 1] ElementR[0, 3]

2793 //	2786 //

(...skipping 13 matching lines...) Expand all Loading...
2807 _movp(Inst->getDest(), ElementR);	2800 _movp(Inst->getDest(), ElementR);

2808 } else {	2801 } else {

2809 Variable *T = makeReg(Ty);	2802 Variable *T = makeReg(Ty);

2810 _movp(T, SourceVectRM);	2803 _movp(T, SourceVectRM);

2811 _shufps(ElementR, T, Mask1Constant);	2804 _shufps(ElementR, T, Mask1Constant);

2812 _shufps(T, ElementR, Mask2Constant);	2805 _shufps(T, ElementR, Mask2Constant);

2813 _movp(Inst->getDest(), T);	2806 _movp(Inst->getDest(), T);

2814 }	2807 }

2815 } else {	2808 } else {

2816 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);	2809 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

2817 // Spill the value to a stack slot and perform the insertion in	2810 // Spill the value to a stack slot and perform the insertion in memory.

2818 // memory.

2819 //	2811 //

2820 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when	2812 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support

2821 // support for legalizing to mem is implemented.	2813 // for legalizing to mem is implemented.

2822 Variable *Slot = Func->makeVariable(Ty);	2814 Variable *Slot = Func->makeVariable(Ty);

2823 Slot->setMustNotHaveReg();	2815 Slot->setMustNotHaveReg();

2824 _movp(Slot, legalizeToReg(SourceVectNotLegalized));	2816 _movp(Slot, legalizeToReg(SourceVectNotLegalized));

2825	2817

2826 // Compute the location of the position to insert in memory.	2818 // Compute the location of the position to insert in memory.

2827 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);	2819 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);

2828 typename Traits::X86OperandMem *Loc =	2820 typename Traits::X86OperandMem *Loc =

2829 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);	2821 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

2830 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);	2822 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);

2831	2823

(...skipping 25 matching lines...) Expand all Loading...
2857 }	2849 }

2858 case Intrinsics::AtomicFence:	2850 case Intrinsics::AtomicFence:

2859 if (!Intrinsics::isMemoryOrderValid(	2851 if (!Intrinsics::isMemoryOrderValid(

2860 ID, getConstantMemoryOrder(Instr->getArg(0)))) {	2852 ID, getConstantMemoryOrder(Instr->getArg(0)))) {

2861 Func->setError("Unexpected memory ordering for AtomicFence");	2853 Func->setError("Unexpected memory ordering for AtomicFence");

2862 return;	2854 return;

2863 }	2855 }

2864 _mfence();	2856 _mfence();

2865 return;	2857 return;

2866 case Intrinsics::AtomicFenceAll:	2858 case Intrinsics::AtomicFenceAll:

2867 // NOTE: FenceAll should prevent and load/store from being moved	2859 // NOTE: FenceAll should prevent and load/store from being moved across the

2868 // across the fence (both atomic and non-atomic). The InstX8632Mfence	2860 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is

2869 // instruction is currently marked coarsely as "HasSideEffects".	2861 // currently marked coarsely as "HasSideEffects".

2870 _mfence();	2862 _mfence();

2871 return;	2863 return;

2872 case Intrinsics::AtomicIsLockFree: {	2864 case Intrinsics::AtomicIsLockFree: {

2873 // X86 is always lock free for 8/16/32/64 bit accesses.	2865 // X86 is always lock free for 8/16/32/64 bit accesses.

2874 // TODO(jvoung): Since the result is constant when given a constant	2866 // TODO(jvoung): Since the result is constant when given a constant byte

2875 // byte size, this opens up DCE opportunities.	2867 // size, this opens up DCE opportunities.

2876 Operand *ByteSize = Instr->getArg(0);	2868 Operand *ByteSize = Instr->getArg(0);

2877 Variable *Dest = Instr->getDest();	2869 Variable *Dest = Instr->getDest();

2878 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {	2870 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {

2879 Constant *Result;	2871 Constant *Result;

2880 switch (CI->getValue()) {	2872 switch (CI->getValue()) {

2881 default:	2873 default:

2882 // Some x86-64 processors support the cmpxchg16b intruction, which	2874 // Some x86-64 processors support the cmpxchg16b instruction, which can

2883 // can make 16-byte operations lock free (when used with the LOCK	2875 // make 16-byte operations lock free (when used with the LOCK prefix).

2884 // prefix). However, that's not supported in 32-bit mode, so just	2876 // However, that's not supported in 32-bit mode, so just return 0 even

2885 // return 0 even for large sizes.	2877 // for large sizes.

2886 Result = Ctx->getConstantZero(IceType_i32);	2878 Result = Ctx->getConstantZero(IceType_i32);

2887 break;	2879 break;

2888 case 1:	2880 case 1:

2889 case 2:	2881 case 2:

2890 case 4:	2882 case 4:

2891 case 8:	2883 case 8:

2892 Result = Ctx->getConstantInt32(1);	2884 Result = Ctx->getConstantInt32(1);

2893 break;	2885 break;

2894 }	2886 }

2895 _mov(Dest, Result);	2887 _mov(Dest, Result);

2896 return;	2888 return;

2897 }	2889 }

2898 // The PNaCl ABI requires the byte size to be a compile-time constant.	2890 // The PNaCl ABI requires the byte size to be a compile-time constant.

2899 Func->setError("AtomicIsLockFree byte size should be compile-time const");	2891 Func->setError("AtomicIsLockFree byte size should be compile-time const");

2900 return;	2892 return;

2901 }	2893 }

2902 case Intrinsics::AtomicLoad: {	2894 case Intrinsics::AtomicLoad: {

2903 // We require the memory address to be naturally aligned.	2895 // We require the memory address to be naturally aligned. Given that is the

2904 // Given that is the case, then normal loads are atomic.	2896 // case, then normal loads are atomic.

2905 if (!Intrinsics::isMemoryOrderValid(	2897 if (!Intrinsics::isMemoryOrderValid(

2906 ID, getConstantMemoryOrder(Instr->getArg(1)))) {	2898 ID, getConstantMemoryOrder(Instr->getArg(1)))) {

2907 Func->setError("Unexpected memory ordering for AtomicLoad");	2899 Func->setError("Unexpected memory ordering for AtomicLoad");

2908 return;	2900 return;

2909 }	2901 }

2910 Variable *Dest = Instr->getDest();	2902 Variable *Dest = Instr->getDest();

2911 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {	2903 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

2912 // Follow what GCC does and use a movq instead of what lowerLoad()	2904 // Follow what GCC does and use a movq instead of what lowerLoad()

2913 // normally does (split the load into two).	2905 // normally does (split the load into two). Thus, this skips

2914 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding	2906 // load/arithmetic op folding. Load/arithmetic folding can't happen

2915 // can't happen anyway, since this is x86-32 and integer arithmetic only	2907 // anyway, since this is x86-32 and integer arithmetic only happens on

2916 // happens on 32-bit quantities.	2908 // 32-bit quantities.

2917 Variable *T = makeReg(IceType_f64);	2909 Variable *T = makeReg(IceType_f64);

2918 typename Traits::X86OperandMem *Addr =	2910 typename Traits::X86OperandMem *Addr =

2919 formMemoryOperand(Instr->getArg(0), IceType_f64);	2911 formMemoryOperand(Instr->getArg(0), IceType_f64);

2920 _movq(T, Addr);	2912 _movq(T, Addr);

2921 // Then cast the bits back out of the XMM register to the i64 Dest.	2913 // Then cast the bits back out of the XMM register to the i64 Dest.

2922 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);	2914 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);

2923 lowerCast(Cast);	2915 lowerCast(Cast);

2924 // Make sure that the atomic load isn't elided when unused.	2916 // Make sure that the atomic load isn't elided when unused.

2925 Context.insert(InstFakeUse::create(Func, Dest->getLo()));	2917 Context.insert(InstFakeUse::create(Func, Dest->getLo()));

2926 Context.insert(InstFakeUse::create(Func, Dest->getHi()));	2918 Context.insert(InstFakeUse::create(Func, Dest->getHi()));

2927 return;	2919 return;

2928 }	2920 }

2929 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));	2921 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));

2930 lowerLoad(Load);	2922 lowerLoad(Load);

2931 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.	2923 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.

2932 // Since lowerLoad may fuse the load w/ an arithmetic instruction,	2924 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert

2933 // insert the FakeUse on the last-inserted instruction's dest.	2925 // the FakeUse on the last-inserted instruction's dest.

2934 Context.insert(	2926 Context.insert(

2935 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	2927 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

2936 return;	2928 return;

2937 }	2929 }

2938 case Intrinsics::AtomicRMW:	2930 case Intrinsics::AtomicRMW:

2939 if (!Intrinsics::isMemoryOrderValid(	2931 if (!Intrinsics::isMemoryOrderValid(

2940 ID, getConstantMemoryOrder(Instr->getArg(3)))) {	2932 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

2941 Func->setError("Unexpected memory ordering for AtomicRMW");	2933 Func->setError("Unexpected memory ordering for AtomicRMW");

2942 return;	2934 return;

2943 }	2935 }

2944 lowerAtomicRMW(	2936 lowerAtomicRMW(

2945 Instr->getDest(),	2937 Instr->getDest(),

2946 static_cast<uint32_t>(	2938 static_cast<uint32_t>(

2947 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),	2939 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

2948 Instr->getArg(1), Instr->getArg(2));	2940 Instr->getArg(1), Instr->getArg(2));

2949 return;	2941 return;

2950 case Intrinsics::AtomicStore: {	2942 case Intrinsics::AtomicStore: {

2951 if (!Intrinsics::isMemoryOrderValid(	2943 if (!Intrinsics::isMemoryOrderValid(

2952 ID, getConstantMemoryOrder(Instr->getArg(2)))) {	2944 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

2953 Func->setError("Unexpected memory ordering for AtomicStore");	2945 Func->setError("Unexpected memory ordering for AtomicStore");

2954 return;	2946 return;

2955 }	2947 }

2956 // We require the memory address to be naturally aligned.	2948 // We require the memory address to be naturally aligned. Given that is the

2957 // Given that is the case, then normal stores are atomic.	2949 // case, then normal stores are atomic. Add a fence after the store to make

2958 // Add a fence after the store to make it visible.	2950 // it visible.

2959 Operand *Value = Instr->getArg(0);	2951 Operand *Value = Instr->getArg(0);

2960 Operand *Ptr = Instr->getArg(1);	2952 Operand *Ptr = Instr->getArg(1);

2961 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {	2953 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {

2962 // Use a movq instead of what lowerStore() normally does	2954 // Use a movq instead of what lowerStore() normally does (split the store

2963 // (split the store into two), following what GCC does.	2955 // into two), following what GCC does. Cast the bits from int -> to an

2964 // Cast the bits from int -> to an xmm register first.	2956 // xmm register first.

2965 Variable *T = makeReg(IceType_f64);	2957 Variable *T = makeReg(IceType_f64);

2966 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);	2958 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);

2967 lowerCast(Cast);	2959 lowerCast(Cast);

2968 // Then store XMM w/ a movq.	2960 // Then store XMM w/ a movq.

2969 typename Traits::X86OperandMem *Addr =	2961 typename Traits::X86OperandMem *Addr =

2970 formMemoryOperand(Ptr, IceType_f64);	2962 formMemoryOperand(Ptr, IceType_f64);

2971 _storeq(T, Addr);	2963 _storeq(T, Addr);

2972 _mfence();	2964 _mfence();

2973 return;	2965 return;

2974 }	2966 }

2975 InstStore *Store = InstStore::create(Func, Value, Ptr);	2967 InstStore *Store = InstStore::create(Func, Value, Ptr);

2976 lowerStore(Store);	2968 lowerStore(Store);

2977 _mfence();	2969 _mfence();

2978 return;	2970 return;

2979 }	2971 }

2980 case Intrinsics::Bswap: {	2972 case Intrinsics::Bswap: {

2981 Variable *Dest = Instr->getDest();	2973 Variable *Dest = Instr->getDest();

2982 Operand *Val = Instr->getArg(0);	2974 Operand *Val = Instr->getArg(0);

2983 // In 32-bit mode, bswap only works on 32-bit arguments, and the	2975 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument

2984 // argument must be a register. Use rotate left for 16-bit bswap.	2976 // must be a register. Use rotate left for 16-bit bswap.

2985 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	2977 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

2986 Val = legalizeUndef(Val);	2978 Val = legalizeUndef(Val);

2987 Variable *T_Lo = legalizeToReg(loOperand(Val));	2979 Variable *T_Lo = legalizeToReg(loOperand(Val));

2988 Variable *T_Hi = legalizeToReg(hiOperand(Val));	2980 Variable *T_Hi = legalizeToReg(hiOperand(Val));

2989 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2981 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2990 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2982 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2991 _bswap(T_Lo);	2983 _bswap(T_Lo);

2992 _bswap(T_Hi);	2984 _bswap(T_Hi);

2993 _mov(DestLo, T_Hi);	2985 _mov(DestLo, T_Hi);

2994 _mov(DestHi, T_Lo);	2986 _mov(DestHi, T_Lo);

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3063 // another 64-bit wide.)	3055 // another 64-bit wide.)

3064 Variable *T_1 = makeReg(IceType_i32);	3056 Variable *T_1 = makeReg(IceType_i32);

3065 _mov(T_1, T);	3057 _mov(T_1, T);

3066 Variable *T_2 = makeReg(IceType_i64);	3058 Variable *T_2 = makeReg(IceType_i64);

3067 _movzx(T_2, T_1);	3059 _movzx(T_2, T_1);

3068 _mov(Dest, T_2);	3060 _mov(Dest, T_2);

3069 }	3061 }

3070 return;	3062 return;

3071 }	3063 }

3072 case Intrinsics::Ctlz: {	3064 case Intrinsics::Ctlz: {

3073 // The "is zero undef" parameter is ignored and we always return	3065 // The "is zero undef" parameter is ignored and we always return a

3074 // a well-defined value.	3066 // well-defined value.

3075 Operand *Val = legalize(Instr->getArg(0));	3067 Operand *Val = legalize(Instr->getArg(0));

3076 Operand *FirstVal;	3068 Operand *FirstVal;

3077 Operand *SecondVal = nullptr;	3069 Operand *SecondVal = nullptr;

3078 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3070 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3079 FirstVal = loOperand(Val);	3071 FirstVal = loOperand(Val);

3080 SecondVal = hiOperand(Val);	3072 SecondVal = hiOperand(Val);

3081 } else {	3073 } else {

3082 FirstVal = Val;	3074 FirstVal = Val;

3083 }	3075 }

3084 const bool IsCttz = false;	3076 const bool IsCttz = false;

3085 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3077 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3086 SecondVal);	3078 SecondVal);

3087 return;	3079 return;

3088 }	3080 }

3089 case Intrinsics::Cttz: {	3081 case Intrinsics::Cttz: {

3090 // The "is zero undef" parameter is ignored and we always return	3082 // The "is zero undef" parameter is ignored and we always return a

3091 // a well-defined value.	3083 // well-defined value.

3092 Operand *Val = legalize(Instr->getArg(0));	3084 Operand *Val = legalize(Instr->getArg(0));

3093 Operand *FirstVal;	3085 Operand *FirstVal;

3094 Operand *SecondVal = nullptr;	3086 Operand *SecondVal = nullptr;

3095 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {	3087 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

3096 FirstVal = hiOperand(Val);	3088 FirstVal = hiOperand(Val);

3097 SecondVal = loOperand(Val);	3089 SecondVal = loOperand(Val);

3098 } else {	3090 } else {

3099 FirstVal = Val;	3091 FirstVal = Val;

3100 }	3092 }

3101 const bool IsCttz = true;	3093 const bool IsCttz = true;

3102 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,	3094 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3103 SecondVal);	3095 SecondVal);

3104 return;	3096 return;

3105 }	3097 }

3106 case Intrinsics::Fabs: {	3098 case Intrinsics::Fabs: {

3107 Operand *Src = legalize(Instr->getArg(0));	3099 Operand *Src = legalize(Instr->getArg(0));

3108 Type Ty = Src->getType();	3100 Type Ty = Src->getType();

3109 Variable *Dest = Instr->getDest();	3101 Variable *Dest = Instr->getDest();

3110 Variable *T = makeVectorOfFabsMask(Ty);	3102 Variable *T = makeVectorOfFabsMask(Ty);

3111 // The pand instruction operates on an m128 memory operand, so if	3103 // The pand instruction operates on an m128 memory operand, so if Src is an

3112 // Src is an f32 or f64, we need to make sure it's in a register.	3104 // f32 or f64, we need to make sure it's in a register.

3113 if (isVectorType(Ty)) {	3105 if (isVectorType(Ty)) {

3114 if (llvm::isa<typename Traits::X86OperandMem>(Src))	3106 if (llvm::isa<typename Traits::X86OperandMem>(Src))

3115 Src = legalizeToReg(Src);	3107 Src = legalizeToReg(Src);

3116 } else {	3108 } else {

3117 Src = legalizeToReg(Src);	3109 Src = legalizeToReg(Src);

3118 }	3110 }

3119 _pand(T, Src);	3111 _pand(T, Src);

3120 if (isVectorType(Ty))	3112 if (isVectorType(Ty))

3121 _movp(Dest, T);	3113 _movp(Dest, T);

3122 else	3114 else

(...skipping 564 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3687	3679

3688 Variable *SrcBase = legalizeToReg(Src);	3680 Variable *SrcBase = legalizeToReg(Src);

3689 Variable *DestBase = legalizeToReg(Dest);	3681 Variable *DestBase = legalizeToReg(Dest);

3690	3682

3691 std::tuple<Type, Constant , Variable >	3683 std::tuple<Type, Constant , Variable >

3692 Moves[Traits::MEMMOVE_UNROLL_LIMIT];	3684 Moves[Traits::MEMMOVE_UNROLL_LIMIT];

3693 Constant *Offset;	3685 Constant *Offset;

3694 Variable *Reg;	3686 Variable *Reg;

3695	3687

3696 // Copy the data into registers as the source and destination could overlap	3688 // Copy the data into registers as the source and destination could overlap

3697 // so make sure not to clobber the memory. This also means overlapping moves	3689 // so make sure not to clobber the memory. This also means overlapping

3698 // can be used as we are taking a safe snapshot of the memory.	3690 // moves can be used as we are taking a safe snapshot of the memory.

3699 Type Ty = largestTypeInSize(CountValue);	3691 Type Ty = largestTypeInSize(CountValue);

3700 uint32_t TyWidth = typeWidthInBytes(Ty);	3692 uint32_t TyWidth = typeWidthInBytes(Ty);

3701	3693

3702 uint32_t RemainingBytes = CountValue;	3694 uint32_t RemainingBytes = CountValue;

3703 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;	3695 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;

3704 size_t N = 0;	3696 size_t N = 0;

3705 while (RemainingBytes >= TyWidth) {	3697 while (RemainingBytes >= TyWidth) {

3706 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);	3698 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);

3707 Offset = Ctx->getConstantInt32(OffsetAmt);	3699 Offset = Ctx->getConstantInt32(OffsetAmt);

3708 Reg = makeReg(Ty);	3700 Reg = makeReg(Ty);

(...skipping 180 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3889 Str << ", Index=";	3881 Str << ", Index=";

3890 if (Index)	3882 if (Index)

3891 Index->dump(Func);	3883 Index->dump(Func);

3892 else	3884 else

3893 Str << "<null>";	3885 Str << "<null>";

3894 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";	3886 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";

3895 }	3887 }

3896	3888

3897 inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,	3889 inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,

3898 Variable &Var, const Inst &Reason) {	3890 Variable &Var, const Inst &Reason) {

3899 // Var originates from Var=SrcVar ==>	3891 // Var originates from Var=SrcVar ==> set Var:=SrcVar

3900 // set Var:=SrcVar

3901 if (Var == nullptr)	3892 if (Var == nullptr)

3902 return false;	3893 return false;

3903 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {	3894 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {

3904 assert(!VMetadata->isMultiDef(Var));	3895 assert(!VMetadata->isMultiDef(Var));

3905 if (llvm::isa<InstAssign>(VarAssign)) {	3896 if (llvm::isa<InstAssign>(VarAssign)) {

3906 Operand *SrcOp = VarAssign->getSrc(0);	3897 Operand *SrcOp = VarAssign->getSrc(0);

3907 assert(SrcOp);	3898 assert(SrcOp);

3908 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {	3899 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {

3909 if (!VMetadata->isMultiDef(SrcVar) &&	3900 if (!VMetadata->isMultiDef(SrcVar) &&

3910 // TODO: ensure SrcVar stays single-BB	3901 // TODO: ensure SrcVar stays single-BB

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4052 Func->resetCurrentNode();	4043 Func->resetCurrentNode();

4053 if (Func->isVerbose(IceV_AddrOpt)) {	4044 if (Func->isVerbose(IceV_AddrOpt)) {

4054 OstreamLocker L(Func->getContext());	4045 OstreamLocker L(Func->getContext());

4055 Ostream &Str = Func->getContext()->getStrDump();	4046 Ostream &Str = Func->getContext()->getStrDump();

4056 Str << "\nStarting computeAddressOpt for instruction:\n ";	4047 Str << "\nStarting computeAddressOpt for instruction:\n ";

4057 Instr->dumpDecorated(Func);	4048 Instr->dumpDecorated(Func);

4058 }	4049 }

4059 (void)Offset; // TODO: pattern-match for non-zero offsets.	4050 (void)Offset; // TODO: pattern-match for non-zero offsets.

4060 if (Base == nullptr)	4051 if (Base == nullptr)

4061 return;	4052 return;

4062 // If the Base has more than one use or is live across multiple	4053 // If the Base has more than one use or is live across multiple blocks, then

4063 // blocks, then don't go further. Alternatively (?), never consider	4054 // don't go further. Alternatively (?), never consider a transformation that

4064 // a transformation that would change a variable that is currently	4055 // would change a variable that is currently not live across basic block

4065 // not live across basic block boundaries into one that is.	4056 // boundaries into one that is.

4066 if (Func->getVMetadata()->isMultiBlock(Base) /* \|\| Base->getUseCount() > 1*/)	4057 if (Func->getVMetadata()->isMultiBlock(Base) /* \|\| Base->getUseCount() > 1*/)

4067 return;	4058 return;

4068	4059

4069 const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck();	4060 const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck();

4070 const VariablesMetadata *VMetadata = Func->getVMetadata();	4061 const VariablesMetadata *VMetadata = Func->getVMetadata();

4071 bool Continue = true;	4062 bool Continue = true;

4072 while (Continue) {	4063 while (Continue) {

4073 const Inst *Reason = nullptr;	4064 const Inst *Reason = nullptr;

4074 if (matchTransitiveAssign(VMetadata, Base, Reason) \|\|	4065 if (matchTransitiveAssign(VMetadata, Base, Reason) \|\|

4075 matchTransitiveAssign(VMetadata, Index, Reason) \|\|	4066 matchTransitiveAssign(VMetadata, Index, Reason) \|\|

(...skipping 149 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4225 Operand *SrcT = Inst->getTrueOperand();	4216 Operand *SrcT = Inst->getTrueOperand();

4226 Operand *SrcF = Inst->getFalseOperand();	4217 Operand *SrcF = Inst->getFalseOperand();

4227 Operand *Condition = Inst->getCondition();	4218 Operand *Condition = Inst->getCondition();

4228	4219

4229 if (isVectorType(DestTy)) {	4220 if (isVectorType(DestTy)) {

4230 Type SrcTy = SrcT->getType();	4221 Type SrcTy = SrcT->getType();

4231 Variable *T = makeReg(SrcTy);	4222 Variable *T = makeReg(SrcTy);

4232 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	4223 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

4233 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	4224 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

4234 if (InstructionSet >= Traits::SSE4_1) {	4225 if (InstructionSet >= Traits::SSE4_1) {

4235 // TODO(wala): If the condition operand is a constant, use blendps	4226 // TODO(wala): If the condition operand is a constant, use blendps or

4236 // or pblendw.	4227 // pblendw.

4237 //	4228 //

4238 // Use blendvps or pblendvb to implement select.	4229 // Use blendvps or pblendvb to implement select.

4239 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	4230 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

4240 SrcTy == IceType_v4f32) {	4231 SrcTy == IceType_v4f32) {

4241 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	4232 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

4242 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);	4233 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);

4243 _movp(xmm0, ConditionRM);	4234 _movp(xmm0, ConditionRM);

4244 _psll(xmm0, Ctx->getConstantInt8(31));	4235 _psll(xmm0, Ctx->getConstantInt8(31));

4245 _movp(T, SrcFRM);	4236 _movp(T, SrcFRM);

4246 _blendvps(T, SrcTRM, xmm0);	4237 _blendvps(T, SrcTRM, xmm0);

(...skipping 56 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4303 }	4294 }

4304 if (CmpOpnd0 == nullptr) {	4295 if (CmpOpnd0 == nullptr) {

4305 CmpOpnd0 = legalize(Condition, Legal_Reg \| Legal_Mem);	4296 CmpOpnd0 = legalize(Condition, Legal_Reg \| Legal_Mem);

4306 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);	4297 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);

4307 }	4298 }

4308 assert(CmpOpnd0);	4299 assert(CmpOpnd0);

4309 assert(CmpOpnd1);	4300 assert(CmpOpnd1);

4310	4301

4311 _cmp(CmpOpnd0, CmpOpnd1);	4302 _cmp(CmpOpnd0, CmpOpnd1);

4312 if (typeWidthInBytes(DestTy) == 1 \|\| isFloatingType(DestTy)) {	4303 if (typeWidthInBytes(DestTy) == 1 \|\| isFloatingType(DestTy)) {

4313 // The cmov instruction doesn't allow 8-bit or FP operands, so	4304 // The cmov instruction doesn't allow 8-bit or FP operands, so we need

4314 // we need explicit control flow.	4305 // explicit control flow.

4315 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:	4306 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:

4316 typename Traits::Insts::Label *Label =	4307 typename Traits::Insts::Label *Label =

4317 Traits::Insts::Label::create(Func, this);	4308 Traits::Insts::Label::create(Func, this);

4318 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm);	4309 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm);

4319 _mov(Dest, SrcT);	4310 _mov(Dest, SrcT);

4320 _br(Cond, Label);	4311 _br(Cond, Label);

4321 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm);	4312 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm);

4322 _mov_nonkillable(Dest, SrcF);	4313 _mov_nonkillable(Dest, SrcF);

4323 Context.insert(Label);	4314 Context.insert(Label);

4324 return;	4315 return;

4325 }	4316 }

4326 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t	4317 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t

4327 // But if SrcT is immediate, we might be able to do better, as	4318 // But if SrcT is immediate, we might be able to do better, as the cmov

4328 // the cmov instruction doesn't allow an immediate operand:	4319 // instruction doesn't allow an immediate operand:

4329 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t	4320 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t

4330 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {	4321 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {

4331 std::swap(SrcT, SrcF);	4322 std::swap(SrcT, SrcF);

4332 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);	4323 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);

4333 }	4324 }

4334 if (!Traits::Is64Bit && DestTy == IceType_i64) {	4325 if (!Traits::Is64Bit && DestTy == IceType_i64) {

4335 SrcT = legalizeUndef(SrcT);	4326 SrcT = legalizeUndef(SrcT);

4336 SrcF = legalizeUndef(SrcF);	4327 SrcF = legalizeUndef(SrcF);

4337 // Set the low portion.	4328 // Set the low portion.

4338 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	4329 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

(...skipping 340 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4679	4670

4680 lowerAssign(InstAssign::create(Func, Dest, T));	4671 lowerAssign(InstAssign::create(Func, Dest, T));

4681 }	4672 }

4682	4673

4683 /// The following pattern occurs often in lowered C and C++ code:	4674 /// The following pattern occurs often in lowered C and C++ code:

4684 ///	4675 ///

4685 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1	4676 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1

4686 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>	4677 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>

4687 ///	4678 ///

4688 /// We can eliminate the sext operation by copying the result of pcmpeqd,	4679 /// We can eliminate the sext operation by copying the result of pcmpeqd,

4689 /// pcmpgtd, or cmpps (which produce sign extended results) to the result	4680 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the

4690 /// of the sext operation.	4681 /// sext operation.

4691 template <class Machine>	4682 template <class Machine>

4692 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(	4683 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(

4693 Variable *SignExtendedResult) {	4684 Variable *SignExtendedResult) {

4694 if (InstCast *NextCast =	4685 if (InstCast *NextCast =

4695 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {	4686 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

4696 if (NextCast->getCastKind() == InstCast::Sext &&	4687 if (NextCast->getCastKind() == InstCast::Sext &&

4697 NextCast->getSrc(0) == SignExtendedResult) {	4688 NextCast->getSrc(0) == SignExtendedResult) {

4698 NextCast->setDeleted();	4689 NextCast->setDeleted();

4699 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));	4690 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));

4700 // Skip over the instruction.	4691 // Skip over the instruction.

4701 Context.advanceNext();	4692 Context.advanceNext();

4702 }	4693 }

4703 }	4694 }

4704 }	4695 }

4705	4696

4706 template <class Machine>	4697 template <class Machine>

4707 void TargetX86Base<Machine>::lowerUnreachable(	4698 void TargetX86Base<Machine>::lowerUnreachable(

4708 const InstUnreachable * /Inst/) {	4699 const InstUnreachable * /Inst/) {

4709 _ud2();	4700 _ud2();

4710 }	4701 }

4711	4702

4712 template <class Machine>	4703 template <class Machine>

4713 void TargetX86Base<Machine>::lowerRMW(	4704 void TargetX86Base<Machine>::lowerRMW(

4714 const typename Traits::Insts::FakeRMW *RMW) {	4705 const typename Traits::Insts::FakeRMW *RMW) {

4715 // If the beacon variable's live range does not end in this	4706 // If the beacon variable's live range does not end in this instruction, then

4716 // instruction, then it must end in the modified Store instruction	4707 // it must end in the modified Store instruction that follows. This means

4717 // that follows. This means that the original Store instruction is	4708 // that the original Store instruction is still there, either because the

4718 // still there, either because the value being stored is used beyond	4709 // value being stored is used beyond the Store instruction, or because dead

4719 // the Store instruction, or because dead code elimination did not	4710 // code elimination did not happen. In either case, we cancel RMW lowering

4720 // happen. In either case, we cancel RMW lowering (and the caller	4711 // (and the caller deletes the RMW instruction).

4721 // deletes the RMW instruction).

4722 if (!RMW->isLastUse(RMW->getBeacon()))	4712 if (!RMW->isLastUse(RMW->getBeacon()))

4723 return;	4713 return;

4724 Operand *Src = RMW->getData();	4714 Operand *Src = RMW->getData();

4725 Type Ty = Src->getType();	4715 Type Ty = Src->getType();

4726 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);	4716 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);

4727 doMockBoundsCheck(Addr);	4717 doMockBoundsCheck(Addr);

4728 if (!Traits::Is64Bit && Ty == IceType_i64) {	4718 if (!Traits::Is64Bit && Ty == IceType_i64) {

4729 Src = legalizeUndef(Src);	4719 Src = legalizeUndef(Src);

4730 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg \| Legal_Imm);	4720 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg \| Legal_Imm);

4731 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg \| Legal_Imm);	4721 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg \| Legal_Imm);

(...skipping 61 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4793 template <class Machine>	4783 template <class Machine>

4794 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {	4784 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {

4795 if (const auto *RMW =	4785 if (const auto *RMW =

4796 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {	4786 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {

4797 lowerRMW(RMW);	4787 lowerRMW(RMW);

4798 } else {	4788 } else {

4799 TargetLowering::lowerOther(Instr);	4789 TargetLowering::lowerOther(Instr);

4800 }	4790 }

4801 }	4791 }

4802	4792

4803 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to	4793 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve

4804 /// preserve integrity of liveness analysis. Undef values are also	4794 /// integrity of liveness analysis. Undef values are also turned into zeroes,

4805 /// turned into zeroes, since loOperand() and hiOperand() don't expect	4795 /// since loOperand() and hiOperand() don't expect Undef input.

4806 /// Undef input.

4807 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {	4796 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

4808 if (Traits::Is64Bit) {	4797 if (Traits::Is64Bit) {

4809 // On x86-64 we don't need to prelower phis -- the architecture can handle	4798 // On x86-64 we don't need to prelower phis -- the architecture can handle

4810 // 64-bit integer natively.	4799 // 64-bit integer natively.

4811 return;	4800 return;

4812 }	4801 }

4813	4802

4814 // Pause constant blinding or pooling, blinding or pooling will be done	4803 // Pause constant blinding or pooling, blinding or pooling will be done later

4815 // later during phi lowering assignments	4804 // during phi lowering assignments

4816 BoolFlagSaver B(RandomizationPoolingPaused, true);	4805 BoolFlagSaver B(RandomizationPoolingPaused, true);

4817 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(	4806 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(

4818 this, Context.getNode(), Func);	4807 this, Context.getNode(), Func);

4819 }	4808 }

4820	4809

4821 // There is no support for loading or emitting vector constants, so the	4810 // There is no support for loading or emitting vector constants, so the vector

4822 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,	4811 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are

4823 // etc. are initialized with register operations.	4812 // initialized with register operations.

4824 //	4813 //

4825 // TODO(wala): Add limited support for vector constants so that	4814 // TODO(wala): Add limited support for vector constants so that complex

4826 // complex initialization in registers is unnecessary.	4815 // initialization in registers is unnecessary.

4827	4816

4828 template <class Machine>	4817 template <class Machine>

4829 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {	4818 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {

4830 Variable *Reg = makeReg(Ty, RegNum);	4819 Variable *Reg = makeReg(Ty, RegNum);

4831 // Insert a FakeDef, since otherwise the live range of Reg might	4820 // Insert a FakeDef, since otherwise the live range of Reg might be

4832 // be overestimated.	4821 // overestimated.

4833 Context.insert(InstFakeDef::create(Func, Reg));	4822 Context.insert(InstFakeDef::create(Func, Reg));

4834 _pxor(Reg, Reg);	4823 _pxor(Reg, Reg);

4835 return Reg;	4824 return Reg;

4836 }	4825 }

4837	4826

4838 template <class Machine>	4827 template <class Machine>

4839 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,	4828 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,

4840 int32_t RegNum) {	4829 int32_t RegNum) {

4841 Variable *MinusOnes = makeReg(Ty, RegNum);	4830 Variable *MinusOnes = makeReg(Ty, RegNum);

4842 // Insert a FakeDef so the live range of MinusOnes is not overestimated.	4831 // Insert a FakeDef so the live range of MinusOnes is not overestimated.

(...skipping 25 matching lines...) Expand all Loading...
4868 // SSE has no left shift operation for vectors of 8 bit integers.	4857 // SSE has no left shift operation for vectors of 8 bit integers.

4869 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	4858 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

4870 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	4859 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

4871 Variable *Reg = makeReg(Ty, RegNum);	4860 Variable *Reg = makeReg(Ty, RegNum);

4872 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	4861 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

4873 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	4862 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

4874 return Reg;	4863 return Reg;

4875 }	4864 }

4876 }	4865 }

4877	4866

4878 /// Construct a mask in a register that can be and'ed with a	4867 /// Construct a mask in a register that can be and'ed with a floating-point

4879 /// floating-point value to mask off its sign bit. The value will be	4868 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32

4880 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>	4869 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of

4881 /// for f64. Construct it as vector of ones logically right shifted	4870 /// ones logically right shifted one bit.

4882 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent	4871 // TODO(stichnot): Fix the wala

4883 /// vector constants in memory.	4872 // TODO: above, to represent vector constants in memory.

4884 template <class Machine>	4873 template <class Machine>

4885 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,	4874 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,

4886 int32_t RegNum) {	4875 int32_t RegNum) {

4887 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);	4876 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);

4888 _psrl(Reg, Ctx->getConstantInt8(1));	4877 _psrl(Reg, Ctx->getConstantInt8(1));

4889 return Reg;	4878 return Reg;

4890 }	4879 }

4891	4880

4892 template <class Machine>	4881 template <class Machine>

4893 typename TargetX86Base<Machine>::Traits::X86OperandMem *	4882 typename TargetX86Base<Machine>::Traits::X86OperandMem *

4894 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,	4883 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

4895 uint32_t Offset) {	4884 uint32_t Offset) {

4896 // Ensure that Loc is a stack slot.	4885 // Ensure that Loc is a stack slot.

4897 assert(Slot->mustNotHaveReg());	4886 assert(Slot->mustNotHaveReg());

4898 assert(Slot->getRegNum() == Variable::NoRegister);	4887 assert(Slot->getRegNum() == Variable::NoRegister);

4899 // Compute the location of Loc in memory.	4888 // Compute the location of Loc in memory.

4900 // TODO(wala,stichnot): lea should not be required. The address of	4889 // TODO(wala,stichnot): lea should not

4901 // the stack slot is known at compile time (although not until after	4890 // be required. The address of the stack slot is known at compile time

4902 // addProlog()).	4891 // (although not until after addProlog()).

4903 const Type PointerType = IceType_i32;	4892 const Type PointerType = IceType_i32;

4904 Variable *Loc = makeReg(PointerType);	4893 Variable *Loc = makeReg(PointerType);

4905 _lea(Loc, Slot);	4894 _lea(Loc, Slot);

4906 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	4895 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

4907 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);	4896 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);

4908 }	4897 }

4909	4898

4910 /// Helper for legalize() to emit the right code to lower an operand to a	4899 /// Helper for legalize() to emit the right code to lower an operand to a

4911 /// register of the appropriate type.	4900 /// register of the appropriate type.

4912 template <class Machine>	4901 template <class Machine>

4913 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {	4902 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

4914 Type Ty = Src->getType();	4903 Type Ty = Src->getType();

4915 Variable *Reg = makeReg(Ty, RegNum);	4904 Variable *Reg = makeReg(Ty, RegNum);

4916 if (isVectorType(Ty)) {	4905 if (isVectorType(Ty)) {

4917 _movp(Reg, Src);	4906 _movp(Reg, Src);

4918 } else {	4907 } else {

4919 _mov(Reg, Src);	4908 _mov(Reg, Src);

4920 }	4909 }

4921 return Reg;	4910 return Reg;

4922 }	4911 }

4923	4912

4924 template <class Machine>	4913 template <class Machine>

4925 Operand TargetX86Base<Machine>::legalize(Operand From, LegalMask Allowed,	4914 Operand TargetX86Base<Machine>::legalize(Operand From, LegalMask Allowed,

4926 int32_t RegNum) {	4915 int32_t RegNum) {

4927 Type Ty = From->getType();	4916 Type Ty = From->getType();

4928 // Assert that a physical register is allowed. To date, all calls	4917 // Assert that a physical register is allowed. To date, all calls to

4929 // to legalize() allow a physical register. If a physical register	4918 // legalize() allow a physical register. If a physical register needs to be

4930 // needs to be explicitly disallowed, then new code will need to be	4919 // explicitly disallowed, then new code will need to be written to force a

4931 // written to force a spill.	4920 // spill.

4932 assert(Allowed & Legal_Reg);	4921 assert(Allowed & Legal_Reg);

4933 // If we're asking for a specific physical register, make sure we're	4922 // If we're asking for a specific physical register, make sure we're not

4934 // not allowing any other operand kinds. (This could be future	4923 // allowing any other operand kinds. (This could be future work, e.g. allow

4935 // work, e.g. allow the shl shift amount to be either an immediate	4924 // the shl shift amount to be either an immediate or in ecx.)

4936 // or in ecx.)

4937 assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);	4925 assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);

4938	4926

4939 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {	4927 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {

4940 // Before doing anything with a Mem operand, we need to ensure	4928 // Before doing anything with a Mem operand, we need to ensure that the

4941 // that the Base and Index components are in physical registers.	4929 // Base and Index components are in physical registers.

4942 Variable *Base = Mem->getBase();	4930 Variable *Base = Mem->getBase();

4943 Variable *Index = Mem->getIndex();	4931 Variable *Index = Mem->getIndex();

4944 Variable *RegBase = nullptr;	4932 Variable *RegBase = nullptr;

4945 Variable *RegIndex = nullptr;	4933 Variable *RegIndex = nullptr;

4946 if (Base) {	4934 if (Base) {

4947 RegBase = legalizeToReg(Base);	4935 RegBase = legalizeToReg(Base);

4948 }	4936 }

4949 if (Index) {	4937 if (Index) {

4950 RegIndex = legalizeToReg(Index);	4938 RegIndex = legalizeToReg(Index);

4951 }	4939 }

(...skipping 24 matching lines...) Expand all Loading...
4976 // If the operand is a 64 bit constant integer we need to legalize it to a	4964 // If the operand is a 64 bit constant integer we need to legalize it to a

4977 // register in x86-64.	4965 // register in x86-64.

4978 if (Traits::Is64Bit) {	4966 if (Traits::Is64Bit) {

4979 if (llvm::isa<ConstantInteger64>(Const)) {	4967 if (llvm::isa<ConstantInteger64>(Const)) {

4980 Variable *V = copyToReg(Const, RegNum);	4968 Variable *V = copyToReg(Const, RegNum);

4981 V->setMustHaveReg();	4969 V->setMustHaveReg();

4982 return V;	4970 return V;

4983 }	4971 }

4984 }	4972 }

4985	4973

4986 // If the operand is an 32 bit constant integer, we should check	4974 // If the operand is an 32 bit constant integer, we should check whether we

4987 // whether we need to randomize it or pool it.	4975 // need to randomize it or pool it.

4988 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {	4976 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {

4989 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);	4977 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);

4990 if (NewConst != Const) {	4978 if (NewConst != Const) {

4991 return NewConst;	4979 return NewConst;

4992 }	4980 }

4993 }	4981 }

4994	4982

4995 // Convert a scalar floating point constant into an explicit	4983 // Convert a scalar floating point constant into an explicit memory

4996 // memory operand.	4984 // operand.

4997 if (isScalarFloatingType(Ty)) {	4985 if (isScalarFloatingType(Ty)) {

4998 Variable *Base = nullptr;	4986 Variable *Base = nullptr;

4999 std::string Buffer;	4987 std::string Buffer;

5000 llvm::raw_string_ostream StrBuf(Buffer);	4988 llvm::raw_string_ostream StrBuf(Buffer);

5001 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);	4989 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);

5002 llvm::cast<Constant>(From)->setShouldBePooled(true);	4990 llvm::cast<Constant>(From)->setShouldBePooled(true);

5003 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);	4991 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

5004 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);	4992 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);

5005 }	4993 }

5006 bool NeedsReg = false;	4994 bool NeedsReg = false;

5007 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))	4995 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))

5008 // Immediate specifically not allowed	4996 // Immediate specifically not allowed

5009 NeedsReg = true;	4997 NeedsReg = true;

5010 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))	4998 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))

5011 // On x86, FP constants are lowered to mem operands.	4999 // On x86, FP constants are lowered to mem operands.

5012 NeedsReg = true;	5000 NeedsReg = true;

5013 if (NeedsReg) {	5001 if (NeedsReg) {

5014 From = copyToReg(From, RegNum);	5002 From = copyToReg(From, RegNum);

5015 }	5003 }

5016 return From;	5004 return From;

5017 }	5005 }

5018 if (auto Var = llvm::dyn_cast<Variable>(From)) {	5006 if (auto Var = llvm::dyn_cast<Variable>(From)) {

5019 // Check if the variable is guaranteed a physical register. This	5007 // Check if the variable is guaranteed a physical register. This can happen

5020 // can happen either when the variable is pre-colored or when it is	5008 // either when the variable is pre-colored or when it is assigned infinite

5021 // assigned infinite weight.	5009 // weight.

5022 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());	5010 bool MustHaveRegister = (Var->hasReg() \|\| Var->mustHaveReg());

5023 // We need a new physical register for the operand if:	5011 // We need a new physical register for the operand if:

5024 // Mem is not allowed and Var isn't guaranteed a physical	5012 // Mem is not allowed and Var isn't guaranteed a physical

5025 // register, or	5013 // register, or

5026 // RegNum is required and Var->getRegNum() doesn't match.	5014 // RegNum is required and Var->getRegNum() doesn't match.

5027 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|	5015 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|

5028 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {	5016 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

5029 From = copyToReg(From, RegNum);	5017 From = copyToReg(From, RegNum);

5030 }	5018 }

5031 return From;	5019 return From;

5032 }	5020 }

5033 llvm_unreachable("Unhandled operand kind in legalize()");	5021 llvm_unreachable("Unhandled operand kind in legalize()");

5034 return From;	5022 return From;

5035 }	5023 }

5036	5024

5037 /// Provide a trivial wrapper to legalize() for this common usage.	5025 /// Provide a trivial wrapper to legalize() for this common usage.

5038 template <class Machine>	5026 template <class Machine>

5039 Variable TargetX86Base<Machine>::legalizeToReg(Operand From, int32_t RegNum) {	5027 Variable TargetX86Base<Machine>::legalizeToReg(Operand From, int32_t RegNum) {

5040 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));	5028 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));

5041 }	5029 }

5042	5030

5043 /// Legalize undef values to concrete values.	5031 /// Legalize undef values to concrete values.

5044 template <class Machine>	5032 template <class Machine>

5045 Operand TargetX86Base<Machine>::legalizeUndef(Operand From, int32_t RegNum) {	5033 Operand TargetX86Base<Machine>::legalizeUndef(Operand From, int32_t RegNum) {

5046 Type Ty = From->getType();	5034 Type Ty = From->getType();

5047 if (llvm::isa<ConstantUndef>(From)) {	5035 if (llvm::isa<ConstantUndef>(From)) {

5048 // Lower undefs to zero. Another option is to lower undefs to an	5036 // Lower undefs to zero. Another option is to lower undefs to an

5049 // uninitialized register; however, using an uninitialized register	5037 // uninitialized register; however, using an uninitialized register results

5050 // results in less predictable code.	5038 // in less predictable code.

5051 //	5039 //

5052 // If in the future the implementation is changed to lower undef	5040 // If in the future the implementation is changed to lower undef values to

5053 // values to uninitialized registers, a FakeDef will be needed:	5041 // uninitialized registers, a FakeDef will be needed:

5054 // Context.insert(InstFakeDef::create(Func, Reg));	5042 // Context.insert(InstFakeDef::create(Func, Reg));

5055 // This is in order to ensure that the live range of Reg is not	5043 // This is in order to ensure that the live range of Reg is not

5056 // overestimated. If the constant being lowered is a 64 bit value,	5044 // overestimated. If the constant being lowered is a 64 bit value, then

5057 // then the result should be split and the lo and hi components will	5045 // the result should be split and the lo and hi components will need to go

5058 // need to go in uninitialized registers.	5046 // in uninitialized registers.

5059 if (isVectorType(Ty))	5047 if (isVectorType(Ty))

5060 return makeVectorOfZeros(Ty, RegNum);	5048 return makeVectorOfZeros(Ty, RegNum);

5061 return Ctx->getConstantZero(Ty);	5049 return Ctx->getConstantZero(Ty);

5062 }	5050 }

5063 return From;	5051 return From;

5064 }	5052 }

5065	5053

5066 /// For the cmp instruction, if Src1 is an immediate, or known to be a	5054 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical

5067 /// physical register, we can allow Src0 to be a memory operand.	5055 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be

5068 /// Otherwise, Src0 must be copied into a physical register.	5056 /// copied into a physical register. (Actually, either Src0 or Src1 can be

5069 /// (Actually, either Src0 or Src1 can be chosen for the physical	5057 /// chosen for the physical register, but unfortunately we have to commit to one

5070 /// register, but unfortunately we have to commit to one or the other	5058 /// or the other before register allocation.)

5071 /// before register allocation.)

5072 template <class Machine>	5059 template <class Machine>

5073 Operand TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand Src0,	5060 Operand TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand Src0,

5074 Operand *Src1) {	5061 Operand *Src1) {

5075 bool IsSrc1ImmOrReg = false;	5062 bool IsSrc1ImmOrReg = false;

5076 if (llvm::isa<Constant>(Src1)) {	5063 if (llvm::isa<Constant>(Src1)) {

5077 IsSrc1ImmOrReg = true;	5064 IsSrc1ImmOrReg = true;

5078 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {	5065 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {

5079 if (Var->hasReg())	5066 if (Var->hasReg())

5080 IsSrc1ImmOrReg = true;	5067 IsSrc1ImmOrReg = true;

5081 }	5068 }

5082 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);	5069 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);

5083 }	5070 }

5084	5071

5085 template <class Machine>	5072 template <class Machine>

5086 typename TargetX86Base<Machine>::Traits::X86OperandMem *	5073 typename TargetX86Base<Machine>::Traits::X86OperandMem *

5087 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,	5074 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,

5088 bool DoLegalize) {	5075 bool DoLegalize) {

5089 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);	5076 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);

5090 // It may be the case that address mode optimization already creates an	5077 // It may be the case that address mode optimization already creates an

5091 // Traits::X86OperandMem, so in that case it wouldn't need another level of	5078 // Traits::X86OperandMem, so in that case it wouldn't need another level of

5092 // transformation.	5079 // transformation.

5093 if (!Mem) {	5080 if (!Mem) {

5094 Variable *Base = llvm::dyn_cast<Variable>(Opnd);	5081 Variable *Base = llvm::dyn_cast<Variable>(Opnd);

5095 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);	5082 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);

5096 assert(Base \|\| Offset);	5083 assert(Base \|\| Offset);

5097 if (Offset) {	5084 if (Offset) {

5098 // During memory operand building, we do not blind or pool	5085 // During memory operand building, we do not blind or pool the constant

5099 // the constant offset, we will work on the whole memory	5086 // offset, we will work on the whole memory operand later as one entity

5100 // operand later as one entity later, this save one instruction.	5087 // later, this save one instruction. By turning blinding and pooling off,

5101 // By turning blinding and pooling off, we guarantee	5088 // we guarantee legalize(Offset) will return a Constant*.

5102 // legalize(Offset) will return a Constant*.

5103 {	5089 {

5104 BoolFlagSaver B(RandomizationPoolingPaused, true);	5090 BoolFlagSaver B(RandomizationPoolingPaused, true);

5105	5091

5106 Offset = llvm::cast<Constant>(legalize(Offset));	5092 Offset = llvm::cast<Constant>(legalize(Offset));

5107 }	5093 }

5108	5094

5109 assert(llvm::isa<ConstantInteger32>(Offset) \|\|	5095 assert(llvm::isa<ConstantInteger32>(Offset) \|\|

5110 llvm::isa<ConstantRelocatable>(Offset));	5096 llvm::isa<ConstantRelocatable>(Offset));

5111 }	5097 }

5112 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);	5098 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);

5113 }	5099 }

5114 // Do legalization, which contains randomization/pooling	5100 // Do legalization, which contains randomization/pooling or do

5115 // or do randomization/pooling.	5101 // randomization/pooling.

5116 return llvm::cast<typename Traits::X86OperandMem>(	5102 return llvm::cast<typename Traits::X86OperandMem>(

5117 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));	5103 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

5118 }	5104 }

5119	5105

5120 template <class Machine>	5106 template <class Machine>

5121 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {	5107 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {

5122 // There aren't any 64-bit integer registers for x86-32.	5108 // There aren't any 64-bit integer registers for x86-32.

5123 assert(Traits::Is64Bit \|\| Type != IceType_i64);	5109 assert(Traits::Is64Bit \|\| Type != IceType_i64);

5124 Variable *Reg = Func->makeVariable(Type);	5110 Variable *Reg = Func->makeVariable(Type);

5125 if (RegNum == Variable::NoRegister)	5111 if (RegNum == Variable::NoRegister)

(...skipping 102 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5228 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	5214 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5229 RPI_Randomize) {	5215 RPI_Randomize) {

5230 // blind the constant	5216 // blind the constant

5231 // FROM:	5217 // FROM:

5232 // imm	5218 // imm

5233 // TO:	5219 // TO:

5234 // insert: mov imm+cookie, Reg	5220 // insert: mov imm+cookie, Reg

5235 // insert: lea -cookie[Reg], Reg	5221 // insert: lea -cookie[Reg], Reg

5236 // => Reg	5222 // => Reg

5237 // If we have already assigned a phy register, we must come from	5223 // If we have already assigned a phy register, we must come from

5238 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse	5224 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the

5239 // the assigned register as this assignment is that start of its use-def	5225 // assigned register as this assignment is that start of its use-def

5240 // chain. So we add RegNum argument here.	5226 // chain. So we add RegNum argument here. Note we use 'lea' instruction

5241 // Note we use 'lea' instruction instead of 'xor' to avoid affecting	5227 // instead of 'xor' to avoid affecting the flags.

5242 // the flags.

5243 Variable *Reg = makeReg(IceType_i32, RegNum);	5228 Variable *Reg = makeReg(IceType_i32, RegNum);

5244 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);	5229 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);

5245 uint32_t Value = Integer->getValue();	5230 uint32_t Value = Integer->getValue();

5246 uint32_t Cookie = Func->getConstantBlindingCookie();	5231 uint32_t Cookie = Func->getConstantBlindingCookie();

5247 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));	5232 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));

5248 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);	5233 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);

5249 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,	5234 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,

5250 nullptr, 0));	5235 nullptr, 0));

5251 // make sure liveness analysis won't kill this variable, otherwise a	5236 // make sure liveness analysis won't kill this variable, otherwise a

5252 // liveness assertion will be triggered.	5237 // liveness assertion will be triggered.

5253 _set_dest_nonkillable();	5238 _set_dest_nonkillable();

5254 if (Immediate->getType() != IceType_i32) {	5239 if (Immediate->getType() != IceType_i32) {

5255 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);	5240 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);

5256 _mov(TruncReg, Reg);	5241 _mov(TruncReg, Reg);

5257 return TruncReg;	5242 return TruncReg;

5258 }	5243 }

5259 return Reg;	5244 return Reg;

5260 }	5245 }

5261 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {	5246 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {

5262 // pool the constant	5247 // pool the constant

5263 // FROM:	5248 // FROM:

5264 // imm	5249 // imm

5265 // TO:	5250 // TO:

5266 // insert: mov $label, Reg	5251 // insert: mov $label, Reg

5267 // => Reg	5252 // => Reg

5268 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);	5253 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);

5269 Immediate->setShouldBePooled(true);	5254 Immediate->setShouldBePooled(true);

5270 // if we have already assigned a phy register, we must come from	5255 // if we have already assigned a phy register, we must come from

5271 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse	5256 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the

5272 // the assigned register as this assignment is that start of its use-def	5257 // assigned register as this assignment is that start of its use-def

5273 // chain. So we add RegNum argument here.	5258 // chain. So we add RegNum argument here.

5274 Variable *Reg = makeReg(Immediate->getType(), RegNum);	5259 Variable *Reg = makeReg(Immediate->getType(), RegNum);

5275 IceString Label;	5260 IceString Label;

5276 llvm::raw_string_ostream Label_stream(Label);	5261 llvm::raw_string_ostream Label_stream(Label);

5277 Immediate->emitPoolLabel(Label_stream);	5262 Immediate->emitPoolLabel(Label_stream);

5278 const RelocOffsetT Offset = 0;	5263 const RelocOffsetT Offset = 0;

5279 const bool SuppressMangling = true;	5264 const bool SuppressMangling = true;

5280 Constant *Symbol =	5265 Constant *Symbol =

5281 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);	5266 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);

5282 typename Traits::X86OperandMem *MemOperand =	5267 typename Traits::X86OperandMem *MemOperand =

(...skipping 12 matching lines...) Expand all Loading...
5295 typename TargetX86Base<Machine>::Traits::X86OperandMem *	5280 typename TargetX86Base<Machine>::Traits::X86OperandMem *

5296 TargetX86Base<Machine>::randomizeOrPoolImmediate(	5281 TargetX86Base<Machine>::randomizeOrPoolImmediate(

5297 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {	5282 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {

5298 assert(MemOperand);	5283 assert(MemOperand);

5299 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	5284 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|

5300 RandomizationPoolingPaused == true) {	5285 RandomizationPoolingPaused == true) {

5301 // immediates randomization/pooling is turned off	5286 // immediates randomization/pooling is turned off

5302 return MemOperand;	5287 return MemOperand;

5303 }	5288 }

5304	5289

5305 // If this memory operand is already a randommized one, we do	5290 // If this memory operand is already a randomized one, we do not randomize it

5306 // not randomize it again.	5291 // again.

5307 if (MemOperand->getRandomized())	5292 if (MemOperand->getRandomized())

5308 return MemOperand;	5293 return MemOperand;

5309	5294

5310 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {	5295 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {

5311 if (C->shouldBeRandomizedOrPooled(Ctx)) {	5296 if (C->shouldBeRandomizedOrPooled(Ctx)) {

5312 // The offset of this mem operand should be blinded or pooled	5297 // The offset of this mem operand should be blinded or pooled

5313 Ctx->statsUpdateRPImms();	5298 Ctx->statsUpdateRPImms();

5314 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	5299 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5315 RPI_Randomize) {	5300 RPI_Randomize) {

5316 // blind the constant offset	5301 // blind the constant offset

(...skipping 14 matching lines...) Expand all Loading...
5331 typename Traits::X86OperandMem *TempMemOperand =	5316 typename Traits::X86OperandMem *TempMemOperand =

5332 Traits::X86OperandMem::create(Func, MemOperand->getType(),	5317 Traits::X86OperandMem::create(Func, MemOperand->getType(),

5333 MemOperand->getBase(), Mask1);	5318 MemOperand->getBase(), Mask1);

5334 // If we have already assigned a physical register, we must come from	5319 // If we have already assigned a physical register, we must come from

5335 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse	5320 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse

5336 // the assigned register as this assignment is that start of its	5321 // the assigned register as this assignment is that start of its

5337 // use-def chain. So we add RegNum argument here.	5322 // use-def chain. So we add RegNum argument here.

5338 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);	5323 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);

5339 _lea(RegTemp, TempMemOperand);	5324 _lea(RegTemp, TempMemOperand);

5340 // As source operand doesn't use the dstreg, we don't need to add	5325 // As source operand doesn't use the dstreg, we don't need to add

5341 // _set_dest_nonkillable().	5326 // _set_dest_nonkillable(). But if we use the same Dest Reg, that is,

5342 // But if we use the same Dest Reg, that is, with RegNum	5327 // with RegNum assigned, we should add this _set_dest_nonkillable()

5343 // assigned, we should add this _set_dest_nonkillable()

5344 if (RegNum != Variable::NoRegister)	5328 if (RegNum != Variable::NoRegister)

5345 _set_dest_nonkillable();	5329 _set_dest_nonkillable();

5346	5330

5347 typename Traits::X86OperandMem *NewMemOperand =	5331 typename Traits::X86OperandMem *NewMemOperand =

5348 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,	5332 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,

5349 Mask2, MemOperand->getIndex(),	5333 Mask2, MemOperand->getIndex(),

5350 MemOperand->getShift(),	5334 MemOperand->getShift(),

5351 MemOperand->getSegmentRegister());	5335 MemOperand->getSegmentRegister());

5352	5336

5353 // Label this memory operand as randomized, so we won't randomize it	5337 // Label this memory operand as randomized, so we won't randomize it

5354 // again in case we call legalize() multiple times on this memory	5338 // again in case we call legalize() multiple times on this memory

5355 // operand.	5339 // operand.

5356 NewMemOperand->setRandomized(true);	5340 NewMemOperand->setRandomized(true);

5357 return NewMemOperand;	5341 return NewMemOperand;

5358 }	5342 }

5359 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {	5343 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {

5360 // pool the constant offset	5344 // pool the constant offset

5361 // FROM:	5345 // FROM:

5362 // offset[base, index, shift]	5346 // offset[base, index, shift]

5363 // TO:	5347 // TO:

5364 // insert: mov $label, RegTemp	5348 // insert: mov $label, RegTemp

5365 // insert: lea [base, RegTemp], RegTemp	5349 // insert: lea [base, RegTemp], RegTemp

5366 // =>[RegTemp, index, shift]	5350 // =>[RegTemp, index, shift]

5367 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	5351 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5368 RPI_Pool);	5352 RPI_Pool);

5369 // Memory operand should never exist as source operands in phi	5353 // Memory operand should never exist as source operands in phi lowering

5370 // lowering assignments, so there is no need to reuse any registers	5354 // assignments, so there is no need to reuse any registers here. For

5371 // here. For phi lowering, we should not ask for new physical	5355 // phi lowering, we should not ask for new physical registers in

5372 // registers in general.	5356 // general. However, if we do meet Memory Operand during phi lowering,

5373 // However, if we do meet Memory Operand during phi lowering, we	5357 // we should not blind or pool the immediates for now.

5374 // should not blind or pool the immediates for now.

5375 if (RegNum != Variable::NoRegister)	5358 if (RegNum != Variable::NoRegister)

5376 return MemOperand;	5359 return MemOperand;

5377 Variable *RegTemp = makeReg(IceType_i32);	5360 Variable *RegTemp = makeReg(IceType_i32);

5378 IceString Label;	5361 IceString Label;

5379 llvm::raw_string_ostream Label_stream(Label);	5362 llvm::raw_string_ostream Label_stream(Label);

5380 MemOperand->getOffset()->emitPoolLabel(Label_stream);	5363 MemOperand->getOffset()->emitPoolLabel(Label_stream);

5381 MemOperand->getOffset()->setShouldBePooled(true);	5364 MemOperand->getOffset()->setShouldBePooled(true);

5382 const RelocOffsetT SymOffset = 0;	5365 const RelocOffsetT SymOffset = 0;

5383 bool SuppressMangling = true;	5366 bool SuppressMangling = true;

5384 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),	5367 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),

(...skipping 25 matching lines...) Expand all Loading...
5410 }	5393 }

5411 // the offset is not eligible for blinding or pooling, return the original	5394 // the offset is not eligible for blinding or pooling, return the original

5412 // mem operand	5395 // mem operand

5413 return MemOperand;	5396 return MemOperand;

5414 }	5397 }

5415	5398

5416 } // end of namespace X86Internal	5399 } // end of namespace X86Internal

5417 } // end of namespace Ice	5400 } // end of namespace Ice

5418	5401

5419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5402 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »