Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1409863006: Subzero. ARM32. Refactors atomic intrinsics lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after
378 } 378 }
379 379
380 namespace { 380 namespace {
381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { 381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
382 for (Variable *Var : Vars) { 382 for (Variable *Var : Vars) {
383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); 383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
384 if (!Var64) { 384 if (!Var64) {
385 // This is not the variable we are looking for. 385 // This is not the variable we are looking for.
386 continue; 386 continue;
387 } 387 }
388 assert(Var64->hasReg() || !Var64->mustHaveReg()); 388 // only allow infinite-weight i64 temporaries to be register allocated.
389 assert(!Var64->hasReg() || Var64->mustHaveReg());
389 if (!Var64->hasReg()) { 390 if (!Var64->hasReg()) {
390 continue; 391 continue;
391 } 392 }
392 const auto FirstReg = 393 const auto FirstReg =
393 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); 394 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
394 // This assumes little endian. 395 // This assumes little endian.
395 Variable *Lo = Var64->getLo(); 396 Variable *Lo = Var64->getLo();
396 Variable *Hi = Var64->getHi(); 397 Variable *Hi = Var64->getHi();
397 assert(Lo->hasReg() == Hi->hasReg()); 398 assert(Lo->hasReg() == Hi->hasReg());
398 if (Lo->hasReg()) { 399 if (Lo->hasReg()) {
(...skipping 3002 matching lines...) Expand 10 before | Expand all | Expand 10 after
3401 if (Dest->isRematerializable()) { 3402 if (Dest->isRematerializable()) {
3402 Context.insert<InstFakeDef>(Dest); 3403 Context.insert<InstFakeDef>(Dest);
3403 return; 3404 return;
3404 } 3405 }
3405 3406
3406 Operand *Src0 = Instr->getSrc(0); 3407 Operand *Src0 = Instr->getSrc(0);
3407 assert(Dest->getType() == Src0->getType()); 3408 assert(Dest->getType() == Src0->getType());
3408 if (Dest->getType() == IceType_i64) { 3409 if (Dest->getType() == IceType_i64) {
3409 Src0 = legalizeUndef(Src0); 3410 Src0 = legalizeUndef(Src0);
3410 3411
3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
Jim Stichnoth 2016/02/17 03:50:17 Just curious - why these changes?
John 2016/02/17 13:01:38 I was fiddling around trying to avoid some unneces
3411 Variable *T_Lo = makeReg(IceType_i32); 3413 Variable *T_Lo = makeReg(IceType_i32);
3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3413 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 3414 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3414 _mov(T_Lo, Src0Lo); 3415 _mov(T_Lo, Src0Lo);
3415 _mov(DestLo, T_Lo); 3416 _mov(DestLo, T_Lo);
3416 3417
3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3417 Variable *T_Hi = makeReg(IceType_i32); 3419 Variable *T_Hi = makeReg(IceType_i32);
3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3419 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 3420 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3420 _mov(T_Hi, Src0Hi); 3421 _mov(T_Hi, Src0Hi);
3421 _mov(DestHi, T_Hi); 3422 _mov(DestHi, T_Hi);
3422 3423
3423 return; 3424 return;
3424 } 3425 }
3425 3426
3426 Operand *NewSrc; 3427 Operand *NewSrc;
3427 if (Dest->hasReg()) { 3428 if (Dest->hasReg()) {
3428 // If Dest already has a physical register, then legalize the Src operand 3429 // If Dest already has a physical register, then legalize the Src operand
(...skipping 965 matching lines...) Expand 10 before | Expand all | Expand 10 after
4394 _cmp(ConstR, NonConstF); 4395 _cmp(ConstR, NonConstF);
4395 } else { 4396 } else {
4396 Variable *T = makeReg(IceType_i32); 4397 Variable *T = makeReg(IceType_i32);
4397 _rsbs(T, ConstR, NonConstF); 4398 _rsbs(T, ConstR, NonConstF);
4398 Context.insert<InstFakeUse>(T); 4399 Context.insert<InstFakeUse>(T);
4399 } 4400 }
4400 return CondWhenTrue(getIcmp32Mapping(Condition)); 4401 return CondWhenTrue(getIcmp32Mapping(Condition));
4401 } 4402 }
4402 4403
4403 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { 4404 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4404 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); 4405 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4405 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); 4406 Instr->getSrc(1));
4407 }
4406 4408
4407 const InstIcmp::ICond Condition = Instr->getCondition(); 4409 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4410 Operand *Src0,
4411 Operand *Src1) {
4412 Src0 = legalizeUndef(Src0);
4413 Src1 = legalizeUndef(Src1);
4414
4408 // a=icmp cond b, c ==> 4415 // a=icmp cond b, c ==>
4409 // GCC does: 4416 // GCC does:
4410 // <u/s>xtb tb, b 4417 // <u/s>xtb tb, b
4411 // <u/s>xtb tc, c 4418 // <u/s>xtb tc, c
4412 // cmp tb, tc 4419 // cmp tb, tc
4413 // mov.C1 t, #0 4420 // mov.C1 t, #0
4414 // mov.C2 t, #1 4421 // mov.C2 t, #1
4415 // mov a, t 4422 // mov a, t
4416 // where the unsigned/sign extension is not needed for 32-bit. They also have 4423 // where the unsigned/sign extension is not needed for 32-bit. They also have
4417 // special cases for EQ and NE. E.g., for NE: 4424 // special cases for EQ and NE. E.g., for NE:
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
4497 } 4504 }
4498 4505
4499 namespace { 4506 namespace {
4500 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 4507 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4501 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 4508 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4502 return Integer->getValue(); 4509 return Integer->getValue();
4503 return Intrinsics::MemoryOrderInvalid; 4510 return Intrinsics::MemoryOrderInvalid;
4504 } 4511 }
4505 } // end of anonymous namespace 4512 } // end of anonymous namespace
4506 4513
4514 void TargetARM32::lowerLoadLinkedStoreExclusive(
4515 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4516 CondARM32::Cond Cond) {
4517
4518 auto *Retry = Context.insert<InstARM32Label>(this);
4519 { // scoping for loop highlighting.
4520 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4521 auto *Success = makeReg(IceType_i32);
4522 auto *_0 = Ctx->getConstantZero(IceType_i32);
4523
4524 Context.insert<InstFakeDef>(Tmp);
4525 Context.insert<InstFakeUse>(Tmp);
4526 Variable *AddrR = legalizeToReg(Addr);
4527 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4528 auto *StoreValue = Operation(Tmp);
4529 assert(StoreValue->mustHaveReg());
4530 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond);
4531 _cmp(Success, _0, Cond);
4532 }
4533 _br(Retry, CondARM32::NE);
4534 }
4535
4536 namespace {
4537 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4538 Variable *Src0, Operand *Src1) {
4539 InstArithmetic::OpKind Oper;
4540 switch (Operation) {
4541 default:
4542 llvm::report_fatal_error("Unknown AtomicRMW operation");
4543 case Intrinsics::AtomicExchange:
4544 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4545 case Intrinsics::AtomicAdd:
4546 Oper = InstArithmetic::Add;
4547 break;
4548 case Intrinsics::AtomicAnd:
4549 Oper = InstArithmetic::And;
4550 break;
4551 case Intrinsics::AtomicSub:
4552 Oper = InstArithmetic::Sub;
4553 break;
4554 case Intrinsics::AtomicOr:
4555 Oper = InstArithmetic::Or;
4556 break;
4557 case Intrinsics::AtomicXor:
4558 Oper = InstArithmetic::Xor;
4559 break;
4560 }
4561 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4562 }
4563 } // end of anonymous namespace
4564
4507 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, 4565 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4508 Operand *Ptr, Operand *Val) { 4566 Operand *Addr, Operand *Val) {
4509 // retry: 4567 // retry:
4510 // ldrex contents, [addr] 4568 // ldrex tmp, [addr]
4511 // op tmp, contents, operand 4569 // mov contents, tmp
4512 // strex success, tmp, [addr] 4570 // op result, contents, Val
4571 // strex success, result, [addr]
4572 // cmp success, 0
4513 // jne retry 4573 // jne retry
4514 // fake-use(addr, operand) @ prevents undesirable clobbering. 4574 // fake-use(addr, operand) @ prevents undesirable clobbering.
4515 // mov dest, contents 4575 // mov dest, contents
4516 assert(Dest != nullptr); 4576 auto DestTy = Dest->getType();
4517 Type DestTy = Dest->getType();
4518 (void)Ptr;
4519 (void)Val;
4520
4521 OperandARM32Mem *Mem;
4522 Variable *PtrContentsReg;
4523 Variable *PtrContentsHiReg;
4524 Variable *PtrContentsLoReg;
4525 Variable *Value = Func->makeVariable(DestTy);
4526 Variable *ValueReg;
4527 Variable *ValueHiReg;
4528 Variable *ValueLoReg;
4529 Variable *Success = makeReg(IceType_i32);
4530 Variable *TmpReg;
4531 Variable *TmpHiReg;
4532 Variable *TmpLoReg;
4533 Operand *_0 = Ctx->getConstantZero(IceType_i32);
4534 auto *Retry = InstARM32Label::create(Func, this);
4535 4577
4536 if (DestTy == IceType_i64) { 4578 if (DestTy == IceType_i64) {
4537 Variable64On32 *PtrContentsReg64 = makeI64RegPair(); 4579 lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4538 PtrContentsHiReg = PtrContentsReg64->getHi(); 4580 return;
4539 PtrContentsLoReg = PtrContentsReg64->getLo();
4540 PtrContentsReg = PtrContentsReg64;
4541
4542 llvm::cast<Variable64On32>(Value)->initHiLo(Func);
4543 Variable64On32 *ValueReg64 = makeI64RegPair();
4544 ValueHiReg = ValueReg64->getHi();
4545 ValueLoReg = ValueReg64->getLo();
4546 ValueReg = ValueReg64;
4547
4548 Variable64On32 *TmpReg64 = makeI64RegPair();
4549 TmpHiReg = TmpReg64->getHi();
4550 TmpLoReg = TmpReg64->getLo();
4551 TmpReg = TmpReg64;
4552 } else {
4553 PtrContentsReg = makeReg(DestTy);
4554 PtrContentsHiReg = nullptr;
4555 PtrContentsLoReg = PtrContentsReg;
4556
4557 ValueReg = makeReg(DestTy);
4558 ValueHiReg = nullptr;
4559 ValueLoReg = ValueReg;
4560
4561 TmpReg = makeReg(DestTy);
4562 TmpHiReg = nullptr;
4563 TmpLoReg = TmpReg;
4564 } 4581 }
4565 4582
4566 if (DestTy == IceType_i64) { 4583 Operand *ValRF = nullptr;
4567 Context.insert<InstFakeDef>(Value); 4584 if (llvm::isa<ConstantInteger32>(Val)) {
4585 ValRF = Val;
4586 } else {
4587 ValRF = legalizeToReg(Val);
4568 } 4588 }
4569 lowerAssign(InstAssign::create(Func, Value, Val)); 4589 auto *ContentsR = makeReg(DestTy);
4570 4590 auto *ResultR = makeReg(DestTy);
4571 Variable *PtrVar = Func->makeVariable(IceType_i32);
4572 lowerAssign(InstAssign::create(Func, PtrVar, Ptr));
4573 4591
4574 _dmb(); 4592 _dmb();
4575 Context.insert(Retry); 4593 lowerLoadLinkedStoreExclusive(
4576 Mem = formMemoryOperand(PtrVar, DestTy); 4594 DestTy, Addr,
4577 if (DestTy == IceType_i64) { 4595 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4578 Context.insert<InstFakeDef>(ValueReg, Value); 4596 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4597 if (Operation == Intrinsics::AtomicExchange) {
4598 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4599 } else {
4600 lowerArithmetic(
4601 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4602 }
4603 return ResultR;
4604 });
4605 _dmb();
4606 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4607 Context.insert<InstFakeUse>(ValR);
4579 } 4608 }
4580 lowerAssign(InstAssign::create(Func, ValueReg, Value)); 4609 // Can't dce ContentsR.
4581 if (DestTy == IceType_i8 || DestTy == IceType_i16) { 4610 Context.insert<InstFakeUse>(ContentsR);
4582 _uxt(ValueReg, ValueReg); 4611 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4583 } 4612 }
4584 _ldrex(PtrContentsReg, Mem);
4585 4613
4586 if (DestTy == IceType_i64) { 4614 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4587 Context.insert<InstFakeDef>(TmpReg, ValueReg); 4615 Operand *Addr, Operand *Val) {
4588 } 4616 assert(Dest->getType() == IceType_i64);
4589 switch (Operation) {
4590 default:
4591 Func->setError("Unknown AtomicRMW operation");
4592 return;
4593 case Intrinsics::AtomicAdd:
4594 if (DestTy == IceType_i64) {
4595 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4596 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
4597 } else {
4598 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4599 }
4600 break;
4601 case Intrinsics::AtomicSub:
4602 if (DestTy == IceType_i64) {
4603 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4604 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg);
4605 } else {
4606 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4607 }
4608 break;
4609 case Intrinsics::AtomicOr:
4610 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4611 if (DestTy == IceType_i64) {
4612 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg);
4613 }
4614 break;
4615 case Intrinsics::AtomicAnd:
4616 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4617 if (DestTy == IceType_i64) {
4618 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg);
4619 }
4620 break;
4621 case Intrinsics::AtomicXor:
4622 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg);
4623 if (DestTy == IceType_i64) {
4624 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg);
4625 }
4626 break;
4627 case Intrinsics::AtomicExchange:
4628 _mov(TmpLoReg, ValueLoReg);
4629 if (DestTy == IceType_i64) {
4630 _mov(TmpHiReg, ValueHiReg);
4631 }
4632 break;
4633 }
4634 _strex(Success, TmpReg, Mem);
4635 _cmp(Success, _0);
4636 _br(Retry, CondARM32::NE);
4637 4617
4638 // The following fake-uses ensure that Subzero will not clobber them in the 4618 auto *ResultR = makeI64RegPair();
4639 // load-linked/store-conditional loop above. We might have to spill them, but 4619
4640 // spilling is preferable over incorrect behavior. 4620 Context.insert<InstFakeDef>(ResultR);
4641 Context.insert<InstFakeUse>(PtrVar); 4621
4642 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { 4622 Operand *ValRF = nullptr;
4643 Context.insert<InstFakeUse>(Value64->getHi()); 4623 if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4644 Context.insert<InstFakeUse>(Value64->getLo()); 4624 ValRF = Val;
4645 } else { 4625 } else {
4646 Context.insert<InstFakeUse>(Value); 4626 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4647 } 4627 ValR64->initHiLo(Func);
4648 _dmb(); 4628 ValR64->setMustNotHaveReg();
4649 if (DestTy == IceType_i8 || DestTy == IceType_i16) { 4629 ValR64->getLo()->setMustHaveReg();
4650 _uxt(PtrContentsReg, PtrContentsReg); 4630 ValR64->getHi()->setMustHaveReg();
4631 lowerAssign(InstAssign::create(Func, ValR64, Val));
4632 ValRF = ValR64;
4651 } 4633 }
4652 4634
4653 if (DestTy == IceType_i64) { 4635 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4654 Context.insert<InstFakeUse>(PtrContentsReg); 4636 ContentsR->initHiLo(Func);
4637 ContentsR->setMustNotHaveReg();
4638 ContentsR->getLo()->setMustHaveReg();
4639 ContentsR->getHi()->setMustHaveReg();
4640
4641 _dmb();
4642 lowerLoadLinkedStoreExclusive(
4643 IceType_i64, Addr,
4644 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4645 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4646 Context.insert<InstFakeUse>(Tmp);
4647 if (Operation == Intrinsics::AtomicExchange) {
4648 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4649 } else {
4650 lowerArithmetic(
4651 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4652 }
4653 Context.insert<InstFakeUse>(ResultR->getHi());
4654 Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4655 ->setDestRedefined();
4656 return ResultR;
4657 });
4658 _dmb();
4659 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4660 Context.insert<InstFakeUse>(ValR64->getLo());
4661 Context.insert<InstFakeUse>(ValR64->getHi());
4655 } 4662 }
4656 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); 4663 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4657 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
4658 Context.insert<InstFakeUse>(Dest64->getLo());
4659 Context.insert<InstFakeUse>(Dest64->getHi());
4660 } else {
4661 Context.insert<InstFakeUse>(Dest);
4662 }
4663 } 4664 }
4664 4665
4665 void TargetARM32::postambleCtpop64(const InstCall *Instr) { 4666 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
4666 Operand *Arg0 = Instr->getArg(0); 4667 Operand *Arg0 = Instr->getArg(0);
4667 if (isInt32Asserting32Or64(Arg0->getType())) { 4668 if (isInt32Asserting32Or64(Arg0->getType())) {
4668 return; 4669 return;
4669 } 4670 }
4670 // The popcount helpers always return 32-bit values, while the intrinsic's 4671 // The popcount helpers always return 32-bit values, while the intrinsic's
4671 // signature matches some 64-bit platform's native instructions and expect to 4672 // signature matches some 64-bit platform's native instructions and expect to
4672 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the 4673 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
4726 // ldrex is the only arm instruction that is guaranteed to load a 64-bit 4727 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
4727 // integer atomically. Everything else works with a regular ldr. 4728 // integer atomically. Everything else works with a regular ldr.
4728 T = makeI64RegPair(); 4729 T = makeI64RegPair();
4729 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); 4730 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
4730 } else { 4731 } else {
4731 T = makeReg(DestTy); 4732 T = makeReg(DestTy);
4732 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); 4733 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
4733 } 4734 }
4734 _dmb(); 4735 _dmb();
4735 lowerAssign(InstAssign::create(Func, Dest, T)); 4736 lowerAssign(InstAssign::create(Func, Dest, T));
4736 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 4737 // Adding a fake-use T to ensure the atomic load is not removed if Dest is
4737 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert 4738 // unused.
4738 // the FakeUse on the last-inserted instruction's dest. 4739 Context.insert<InstFakeUse>(T);
4739 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
4740 return; 4740 return;
4741 } 4741 }
4742 case Intrinsics::AtomicStore: { 4742 case Intrinsics::AtomicStore: {
4743 // We require the memory address to be naturally aligned. Given that is the 4743 // We require the memory address to be naturally aligned. Given that is the
4744 // case, then normal loads are atomic. 4744 // case, then normal loads are atomic.
4745 if (!Intrinsics::isMemoryOrderValid( 4745 if (!Intrinsics::isMemoryOrderValid(
4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4747 Func->setError("Unexpected memory ordering for AtomicStore"); 4747 Func->setError("Unexpected memory ordering for AtomicStore");
4748 return; 4748 return;
4749 } 4749 }
4750 Operand *Value = Instr->getArg(0);
4751 Type ValueTy = Value->getType();
4752 assert(isScalarIntegerType(ValueTy));
4753 Operand *Addr = Instr->getArg(1);
4754 4750
4755 if (ValueTy == IceType_i64) { 4751 auto *Value = Instr->getArg(0);
4756 // Atomic 64-bit stores require a load-locked/store-conditional loop using 4752 if (Value->getType() == IceType_i64) {
4757 // ldrexd, and strexd. The lowered code is: 4753 auto *ValueR = makeI64RegPair();
4758 // 4754 Context.insert<InstFakeDef>(ValueR);
4759 // retry: 4755 lowerAssign(InstAssign::create(Func, ValueR, Value));
4760 // ldrexd t.lo, t.hi, [addr]
4761 // strexd success, value.lo, value.hi, [addr]
4762 // cmp success, #0
4763 // bne retry
4764 // fake-use(addr, value.lo, value.hi)
4765 //
4766 // The fake-use is needed to prevent those variables from being clobbered
4767 // in the loop (which will happen under register pressure.)
4768 Variable64On32 *Tmp = makeI64RegPair();
4769 Variable64On32 *ValueVar =
4770 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4771 Variable *AddrVar = makeReg(IceType_i32);
4772 Variable *Success = makeReg(IceType_i32);
4773 OperandARM32Mem *Mem;
4774 Operand *_0 = Ctx->getConstantZero(IceType_i32);
4775 auto *Retry = InstARM32Label::create(Func, this);
4776 Variable64On32 *NewReg = makeI64RegPair();
4777 ValueVar->initHiLo(Func);
4778 ValueVar->mustNotHaveReg();
4779
4780 _dmb(); 4756 _dmb();
4781 lowerAssign(InstAssign::create(Func, ValueVar, Value)); 4757 lowerLoadLinkedStoreExclusive(
4782 lowerAssign(InstAssign::create(Func, AddrVar, Addr)); 4758 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
4783 4759 // The following fake-use prevents the ldrex instruction from being
4784 Context.insert(Retry); 4760 // dead code eliminated.
4785 Context.insert<InstFakeDef>(NewReg); 4761 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
4786 lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); 4762 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
4787 Mem = formMemoryOperand(AddrVar, IceType_i64); 4763 Context.insert<InstFakeUse>(Tmp);
4788 _ldrex(Tmp, Mem); 4764 return ValueR;
4789 // This fake-use both prevents the ldrex from being dead-code eliminated, 4765 });
4790 // while also keeping liveness happy about all defs being used. 4766 Context.insert<InstFakeUse>(ValueR);
4791 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
4792 _strex(Success, NewReg, Mem);
4793 _cmp(Success, _0);
4794 _br(Retry, CondARM32::NE);
4795
4796 Context.insert<InstFakeUse>(ValueVar->getLo());
4797 Context.insert<InstFakeUse>(ValueVar->getHi());
4798 Context.insert<InstFakeUse>(AddrVar);
4799 _dmb(); 4767 _dmb();
4800 return; 4768 return;
4801 } 4769 }
4770
4771 auto *ValueR = legalizeToReg(Instr->getArg(0));
4772 Type ValueTy = ValueR->getType();
Jim Stichnoth 2016/02/17 03:50:17 Surprised that "auto" wasn't used here.
John 2016/02/17 13:01:39 not to mention the lack of constness... :) Done.
4773 assert(isScalarIntegerType(ValueTy));
4774 auto *Addr = legalizeToReg(Instr->getArg(1));
4775
4802 // non-64-bit stores are atomically as long as the address is aligned. This 4776 // non-64-bit stores are atomically as long as the address is aligned. This
4803 // is PNaCl, so addresses are aligned. 4777 // is PNaCl, so addresses are aligned.
4804 Variable *T = makeReg(ValueTy);
4805
4806 _dmb(); 4778 _dmb();
4807 lowerAssign(InstAssign::create(Func, T, Value)); 4779 _str(ValueR, formMemoryOperand(Addr, ValueTy));
4808 _str(T, formMemoryOperand(Addr, ValueTy));
4809 _dmb(); 4780 _dmb();
4810 return; 4781 return;
4811 } 4782 }
4812 case Intrinsics::AtomicCmpxchg: { 4783 case Intrinsics::AtomicCmpxchg: {
4813 // The initial lowering for cmpxchg was:
4814 //
4815 // retry: 4784 // retry:
4816 // ldrex tmp, [addr] 4785 // ldrex tmp, [addr]
4817 // cmp tmp, expected 4786 // cmp tmp, expected
4818 // mov expected, tmp 4787 // mov expected, tmp
4819 // jne retry
4820 // strex success, new, [addr]
4821 // cmp success, #0
4822 // bne retry
4823 // mov dest, expected
4824 //
4825 // Besides requiring two branches, that lowering could also potentially
4826 // write to memory (in mov expected, tmp) unless we were OK with increasing
4827 // the register pressure and requiring expected to be an infinite-weight
4828 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through
4829 // careful rewritting, and thanks to predication, we now implement the
4830 // lowering as:
4831 //
4832 // retry:
4833 // ldrex tmp, [addr]
4834 // cmp tmp, expected
4835 // strexeq success, new, [addr] 4788 // strexeq success, new, [addr]
4836 // movne expected, tmp
4837 // cmpeq success, #0 4789 // cmpeq success, #0
4838 // bne retry 4790 // bne retry
4839 // mov dest, expected 4791 // mov dest, expected
4840 //
4841 // Predication lets us move the strex ahead of the mov expected, tmp, which
4842 // allows tmp to be a non-infinite weight temporary. We wanted to avoid
4843 // writing to memory between ldrex and strex because, even though most times
4844 // that would cause no issues, if any interleaving memory write aliased
4845 // [addr] than we would have undefined behavior. Undefined behavior isn't
4846 // cool, so we try to avoid it. See the "Synchronization and semaphores"
4847 // section of the "ARM Architecture Reference Manual."
4848
4849 assert(isScalarIntegerType(DestTy)); 4792 assert(isScalarIntegerType(DestTy));
4850 // We require the memory address to be naturally aligned. Given that is the 4793 // We require the memory address to be naturally aligned. Given that is the
4851 // case, then normal loads are atomic. 4794 // case, then normal loads are atomic.
4852 if (!Intrinsics::isMemoryOrderValid( 4795 if (!Intrinsics::isMemoryOrderValid(
4853 ID, getConstantMemoryOrder(Instr->getArg(3)), 4796 ID, getConstantMemoryOrder(Instr->getArg(3)),
4854 getConstantMemoryOrder(Instr->getArg(4)))) { 4797 getConstantMemoryOrder(Instr->getArg(4)))) {
4855 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); 4798 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4856 return; 4799 return;
4857 } 4800 }
4858 4801
4859 OperandARM32Mem *Mem; 4802 if (DestTy == IceType_i64) {
4860 Variable *TmpReg; 4803 auto *New = makeI64RegPair();
4861 Variable *Expected, *ExpectedReg; 4804 Context.insert<InstFakeDef>(New);
4862 Variable *New, *NewReg; 4805 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
4863 Variable *Success = makeReg(IceType_i32);
4864 Operand *_0 = Ctx->getConstantZero(IceType_i32);
4865 auto *Retry = InstARM32Label::create(Func, this);
4866 4806
4867 if (DestTy == IceType_i64) { 4807 auto *Expected = makeI64RegPair();
4868 Variable64On32 *TmpReg64 = makeI64RegPair(); 4808 Context.insert<InstFakeDef>(Expected);
4869 Variable64On32 *New64 = 4809 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
4870 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4871 Variable64On32 *NewReg64 = makeI64RegPair();
4872 Variable64On32 *Expected64 =
4873 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4874 Variable64On32 *ExpectedReg64 = makeI64RegPair();
4875 4810
4876 New64->initHiLo(Func); 4811 _dmb();
4877 New64->mustNotHaveReg(); 4812 lowerLoadLinkedStoreExclusive(
4878 Expected64->initHiLo(Func); 4813 DestTy, Instr->getArg(0),
4879 Expected64->mustNotHaveReg(); 4814 [this, Expected, New, Instr, DestTy](Variable *Tmp) {
4815 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
4816 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
4817 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
4818 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
4819 _cmp(TmpLoR, ExpectedLoR);
4820 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
4821 // Adding an explicit use of Tmp here, or its live range will not
4822 // reach here (only those of Tmp.Lo and Tmp.Hi will.)
4823 Context.insert<InstFakeUse>(Tmp);
4824 _mov_redefined(ExpectedLoR, TmpLoR);
4825 _mov_redefined(ExpectedHiR, TmpHiR);
4826 // Same as above.
4827 Context.insert<InstFakeUse>(Tmp);
4828 return New;
4829 },
4830 CondARM32::EQ);
4831 _dmb();
4880 4832
4881 TmpReg = TmpReg64; 4833 lowerAssign(InstAssign::create(Func, Dest, Expected));
4882 New = New64; 4834 // The fake-use Expected prevents the assignments to Expected (above)
4883 NewReg = NewReg64; 4835 // from being removed if Dest is not used.
4884 Expected = Expected64; 4836 Context.insert<InstFakeUse>(Expected);
4885 ExpectedReg = ExpectedReg64; 4837 // New needs to be alive here, or its live range will end in the
4886 } else { 4838 // strex instruction.
4887 TmpReg = makeReg(DestTy); 4839 Context.insert<InstFakeUse>(New);
4888 New = Func->makeVariable(DestTy); 4840 return;
4889 NewReg = makeReg(DestTy);
4890 Expected = Func->makeVariable(DestTy);
4891 ExpectedReg = makeReg(DestTy);
4892 } 4841 }
4893 4842
4894 Mem = formMemoryOperand(Instr->getArg(0), DestTy); 4843 auto *New = legalizeToReg(Instr->getArg(2));
4895 if (DestTy == IceType_i64) { 4844 auto *Expected = legalizeToReg(Instr->getArg(1));
4896 Context.insert<InstFakeDef>(Expected); 4845
4897 } 4846 _dmb();
4898 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); 4847 lowerLoadLinkedStoreExclusive(
4899 if (DestTy == IceType_i64) { 4848 DestTy,
4900 Context.insert<InstFakeDef>(New); 4849 Instr->getArg(0), [this, Expected, New, Instr, DestTy](Variable *Tmp) {
4901 } 4850 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
4902 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); 4851 _mov_redefined(Expected, Tmp);
4852 return New;
4853 }, CondARM32::EQ);
4903 _dmb(); 4854 _dmb();
4904 4855
4905 Context.insert(Retry);
4906 if (DestTy == IceType_i64) {
4907 Context.insert<InstFakeDef>(ExpectedReg, Expected);
4908 }
4909 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected));
4910 if (DestTy == IceType_i64) {
4911 Context.insert<InstFakeDef>(NewReg, New);
4912 }
4913 lowerAssign(InstAssign::create(Func, NewReg, New));
4914
4915 _ldrex(TmpReg, Mem);
4916 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest());
4917 if (DestTy == IceType_i64) {
4918 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
4919 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg);
4920 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's
4921 // keep liveness happy, shall we?
4922 Context.insert<InstFakeUse>(TmpReg);
4923 Context.insert<InstFakeUse>(ExpectedReg);
4924 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi());
4925 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ);
4926 } else {
4927 _cmp(TmpReg, ExpectedReg);
4928 }
4929 _strex(Success, NewReg, Mem, CondARM32::EQ);
4930 if (DestTy == IceType_i64) {
4931 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg);
4932 auto *Expected64 = llvm::cast<Variable64On32>(Expected);
4933 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE);
4934 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE);
4935 Context.insert<InstFakeDef>(Expected, TmpReg);
4936 _set_dest_redefined();
4937 } else {
4938 _mov_redefined(Expected, TmpReg, CondARM32::NE);
4939 }
4940 _cmp(Success, _0, CondARM32::EQ);
4941 _br(Retry, CondARM32::NE);
4942 _dmb();
4943 lowerAssign(InstAssign::create(Func, Dest, Expected)); 4856 lowerAssign(InstAssign::create(Func, Dest, Expected));
4944 Context.insert<InstFakeUse>(Expected); 4857 Context.insert<InstFakeUse>(Expected);
4945 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { 4858 Context.insert<InstFakeUse>(New);
4946 Context.insert<InstFakeUse>(New64->getLo());
4947 Context.insert<InstFakeUse>(New64->getHi());
4948 } else {
4949 Context.insert<InstFakeUse>(New);
4950 }
4951 return; 4859 return;
4952 } 4860 }
4953 case Intrinsics::AtomicRMW: { 4861 case Intrinsics::AtomicRMW: {
4954 if (!Intrinsics::isMemoryOrderValid( 4862 if (!Intrinsics::isMemoryOrderValid(
4955 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 4863 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4956 Func->setError("Unexpected memory ordering for AtomicRMW"); 4864 Func->setError("Unexpected memory ordering for AtomicRMW");
4957 return; 4865 return;
4958 } 4866 }
4959 lowerAtomicRMW( 4867 lowerAtomicRMW(
4960 Dest, static_cast<uint32_t>( 4868 Dest, static_cast<uint32_t>(
(...skipping 1909 matching lines...) Expand 10 before | Expand all | Expand 10 after
6870 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 6778 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6871 } 6779 }
6872 6780
6873 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; 6781 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
6874 llvm::SmallBitVector 6782 llvm::SmallBitVector
6875 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 6783 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
6876 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 6784 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6877 6785
6878 } // end of namespace ARM32 6786 } // end of namespace ARM32
6879 } // end of namespace Ice 6787 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698