OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
378 } | 378 } |
379 | 379 |
380 namespace { | 380 namespace { |
381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { | 381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
382 for (Variable *Var : Vars) { | 382 for (Variable *Var : Vars) { |
383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); | 383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); |
384 if (!Var64) { | 384 if (!Var64) { |
385 // This is not the variable we are looking for. | 385 // This is not the variable we are looking for. |
386 continue; | 386 continue; |
387 } | 387 } |
388 assert(Var64->hasReg() || !Var64->mustHaveReg()); | 388 // only allow infinite-weight i64 temporaries to be register allocated. |
389 assert(!Var64->hasReg() || Var64->mustHaveReg()); | |
389 if (!Var64->hasReg()) { | 390 if (!Var64->hasReg()) { |
390 continue; | 391 continue; |
391 } | 392 } |
392 const auto FirstReg = | 393 const auto FirstReg = |
393 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); | 394 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); |
394 // This assumes little endian. | 395 // This assumes little endian. |
395 Variable *Lo = Var64->getLo(); | 396 Variable *Lo = Var64->getLo(); |
396 Variable *Hi = Var64->getHi(); | 397 Variable *Hi = Var64->getHi(); |
397 assert(Lo->hasReg() == Hi->hasReg()); | 398 assert(Lo->hasReg() == Hi->hasReg()); |
398 if (Lo->hasReg()) { | 399 if (Lo->hasReg()) { |
(...skipping 3002 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3401 if (Dest->isRematerializable()) { | 3402 if (Dest->isRematerializable()) { |
3402 Context.insert<InstFakeDef>(Dest); | 3403 Context.insert<InstFakeDef>(Dest); |
3403 return; | 3404 return; |
3404 } | 3405 } |
3405 | 3406 |
3406 Operand *Src0 = Instr->getSrc(0); | 3407 Operand *Src0 = Instr->getSrc(0); |
3407 assert(Dest->getType() == Src0->getType()); | 3408 assert(Dest->getType() == Src0->getType()); |
3408 if (Dest->getType() == IceType_i64) { | 3409 if (Dest->getType() == IceType_i64) { |
3409 Src0 = legalizeUndef(Src0); | 3410 Src0 = legalizeUndef(Src0); |
3410 | 3411 |
3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
Jim Stichnoth
2016/02/17 03:50:17
Just curious - why these changes?
John
2016/02/17 13:01:38
I was fiddling around trying to avoid some unneces
| |
3411 Variable *T_Lo = makeReg(IceType_i32); | 3413 Variable *T_Lo = makeReg(IceType_i32); |
3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
3413 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 3414 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
3414 _mov(T_Lo, Src0Lo); | 3415 _mov(T_Lo, Src0Lo); |
3415 _mov(DestLo, T_Lo); | 3416 _mov(DestLo, T_Lo); |
3416 | 3417 |
3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
3417 Variable *T_Hi = makeReg(IceType_i32); | 3419 Variable *T_Hi = makeReg(IceType_i32); |
3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
3419 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 3420 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
3420 _mov(T_Hi, Src0Hi); | 3421 _mov(T_Hi, Src0Hi); |
3421 _mov(DestHi, T_Hi); | 3422 _mov(DestHi, T_Hi); |
3422 | 3423 |
3423 return; | 3424 return; |
3424 } | 3425 } |
3425 | 3426 |
3426 Operand *NewSrc; | 3427 Operand *NewSrc; |
3427 if (Dest->hasReg()) { | 3428 if (Dest->hasReg()) { |
3428 // If Dest already has a physical register, then legalize the Src operand | 3429 // If Dest already has a physical register, then legalize the Src operand |
(...skipping 965 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4394 _cmp(ConstR, NonConstF); | 4395 _cmp(ConstR, NonConstF); |
4395 } else { | 4396 } else { |
4396 Variable *T = makeReg(IceType_i32); | 4397 Variable *T = makeReg(IceType_i32); |
4397 _rsbs(T, ConstR, NonConstF); | 4398 _rsbs(T, ConstR, NonConstF); |
4398 Context.insert<InstFakeUse>(T); | 4399 Context.insert<InstFakeUse>(T); |
4399 } | 4400 } |
4400 return CondWhenTrue(getIcmp32Mapping(Condition)); | 4401 return CondWhenTrue(getIcmp32Mapping(Condition)); |
4401 } | 4402 } |
4402 | 4403 |
4403 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { | 4404 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { |
4404 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); | 4405 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0), |
4405 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); | 4406 Instr->getSrc(1)); |
4407 } | |
4406 | 4408 |
4407 const InstIcmp::ICond Condition = Instr->getCondition(); | 4409 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition, |
4410 Operand *Src0, | |
4411 Operand *Src1) { | |
4412 Src0 = legalizeUndef(Src0); | |
4413 Src1 = legalizeUndef(Src1); | |
4414 | |
4408 // a=icmp cond b, c ==> | 4415 // a=icmp cond b, c ==> |
4409 // GCC does: | 4416 // GCC does: |
4410 // <u/s>xtb tb, b | 4417 // <u/s>xtb tb, b |
4411 // <u/s>xtb tc, c | 4418 // <u/s>xtb tc, c |
4412 // cmp tb, tc | 4419 // cmp tb, tc |
4413 // mov.C1 t, #0 | 4420 // mov.C1 t, #0 |
4414 // mov.C2 t, #1 | 4421 // mov.C2 t, #1 |
4415 // mov a, t | 4422 // mov a, t |
4416 // where the unsigned/sign extension is not needed for 32-bit. They also have | 4423 // where the unsigned/sign extension is not needed for 32-bit. They also have |
4417 // special cases for EQ and NE. E.g., for NE: | 4424 // special cases for EQ and NE. E.g., for NE: |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4497 } | 4504 } |
4498 | 4505 |
4499 namespace { | 4506 namespace { |
4500 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 4507 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
4501 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 4508 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
4502 return Integer->getValue(); | 4509 return Integer->getValue(); |
4503 return Intrinsics::MemoryOrderInvalid; | 4510 return Intrinsics::MemoryOrderInvalid; |
4504 } | 4511 } |
4505 } // end of anonymous namespace | 4512 } // end of anonymous namespace |
4506 | 4513 |
4514 void TargetARM32::lowerLoadLinkedStoreExclusive( | |
4515 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation, | |
4516 CondARM32::Cond Cond) { | |
4517 | |
4518 auto *Retry = Context.insert<InstARM32Label>(this); | |
4519 { // scoping for loop highlighting. | |
4520 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty); | |
4521 auto *Success = makeReg(IceType_i32); | |
4522 auto *_0 = Ctx->getConstantZero(IceType_i32); | |
4523 | |
4524 Context.insert<InstFakeDef>(Tmp); | |
4525 Context.insert<InstFakeUse>(Tmp); | |
4526 Variable *AddrR = legalizeToReg(Addr); | |
4527 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined(); | |
4528 auto *StoreValue = Operation(Tmp); | |
4529 assert(StoreValue->mustHaveReg()); | |
4530 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond); | |
4531 _cmp(Success, _0, Cond); | |
4532 } | |
4533 _br(Retry, CondARM32::NE); | |
4534 } | |
4535 | |
4536 namespace { | |
4537 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest, | |
4538 Variable *Src0, Operand *Src1) { | |
4539 InstArithmetic::OpKind Oper; | |
4540 switch (Operation) { | |
4541 default: | |
4542 llvm::report_fatal_error("Unknown AtomicRMW operation"); | |
4543 case Intrinsics::AtomicExchange: | |
4544 llvm::report_fatal_error("Can't handle Atomic xchg operation"); | |
4545 case Intrinsics::AtomicAdd: | |
4546 Oper = InstArithmetic::Add; | |
4547 break; | |
4548 case Intrinsics::AtomicAnd: | |
4549 Oper = InstArithmetic::And; | |
4550 break; | |
4551 case Intrinsics::AtomicSub: | |
4552 Oper = InstArithmetic::Sub; | |
4553 break; | |
4554 case Intrinsics::AtomicOr: | |
4555 Oper = InstArithmetic::Or; | |
4556 break; | |
4557 case Intrinsics::AtomicXor: | |
4558 Oper = InstArithmetic::Xor; | |
4559 break; | |
4560 } | |
4561 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1); | |
4562 } | |
4563 } // end of anonymous namespace | |
4564 | |
4507 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 4565 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
4508 Operand *Ptr, Operand *Val) { | 4566 Operand *Addr, Operand *Val) { |
4509 // retry: | 4567 // retry: |
4510 // ldrex contents, [addr] | 4568 // ldrex tmp, [addr] |
4511 // op tmp, contents, operand | 4569 // mov contents, tmp |
4512 // strex success, tmp, [addr] | 4570 // op result, contents, Val |
4571 // strex success, result, [addr] | |
4572 // cmp success, 0 | |
4513 // jne retry | 4573 // jne retry |
4514 // fake-use(addr, operand) @ prevents undesirable clobbering. | 4574 // fake-use(addr, operand) @ prevents undesirable clobbering. |
4515 // mov dest, contents | 4575 // mov dest, contents |
4516 assert(Dest != nullptr); | 4576 auto DestTy = Dest->getType(); |
4517 Type DestTy = Dest->getType(); | |
4518 (void)Ptr; | |
4519 (void)Val; | |
4520 | |
4521 OperandARM32Mem *Mem; | |
4522 Variable *PtrContentsReg; | |
4523 Variable *PtrContentsHiReg; | |
4524 Variable *PtrContentsLoReg; | |
4525 Variable *Value = Func->makeVariable(DestTy); | |
4526 Variable *ValueReg; | |
4527 Variable *ValueHiReg; | |
4528 Variable *ValueLoReg; | |
4529 Variable *Success = makeReg(IceType_i32); | |
4530 Variable *TmpReg; | |
4531 Variable *TmpHiReg; | |
4532 Variable *TmpLoReg; | |
4533 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
4534 auto *Retry = InstARM32Label::create(Func, this); | |
4535 | 4577 |
4536 if (DestTy == IceType_i64) { | 4578 if (DestTy == IceType_i64) { |
4537 Variable64On32 *PtrContentsReg64 = makeI64RegPair(); | 4579 lowerInt64AtomicRMW(Dest, Operation, Addr, Val); |
4538 PtrContentsHiReg = PtrContentsReg64->getHi(); | 4580 return; |
4539 PtrContentsLoReg = PtrContentsReg64->getLo(); | |
4540 PtrContentsReg = PtrContentsReg64; | |
4541 | |
4542 llvm::cast<Variable64On32>(Value)->initHiLo(Func); | |
4543 Variable64On32 *ValueReg64 = makeI64RegPair(); | |
4544 ValueHiReg = ValueReg64->getHi(); | |
4545 ValueLoReg = ValueReg64->getLo(); | |
4546 ValueReg = ValueReg64; | |
4547 | |
4548 Variable64On32 *TmpReg64 = makeI64RegPair(); | |
4549 TmpHiReg = TmpReg64->getHi(); | |
4550 TmpLoReg = TmpReg64->getLo(); | |
4551 TmpReg = TmpReg64; | |
4552 } else { | |
4553 PtrContentsReg = makeReg(DestTy); | |
4554 PtrContentsHiReg = nullptr; | |
4555 PtrContentsLoReg = PtrContentsReg; | |
4556 | |
4557 ValueReg = makeReg(DestTy); | |
4558 ValueHiReg = nullptr; | |
4559 ValueLoReg = ValueReg; | |
4560 | |
4561 TmpReg = makeReg(DestTy); | |
4562 TmpHiReg = nullptr; | |
4563 TmpLoReg = TmpReg; | |
4564 } | 4581 } |
4565 | 4582 |
4566 if (DestTy == IceType_i64) { | 4583 Operand *ValRF = nullptr; |
4567 Context.insert<InstFakeDef>(Value); | 4584 if (llvm::isa<ConstantInteger32>(Val)) { |
4585 ValRF = Val; | |
4586 } else { | |
4587 ValRF = legalizeToReg(Val); | |
4568 } | 4588 } |
4569 lowerAssign(InstAssign::create(Func, Value, Val)); | 4589 auto *ContentsR = makeReg(DestTy); |
4570 | 4590 auto *ResultR = makeReg(DestTy); |
4571 Variable *PtrVar = Func->makeVariable(IceType_i32); | |
4572 lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); | |
4573 | 4591 |
4574 _dmb(); | 4592 _dmb(); |
4575 Context.insert(Retry); | 4593 lowerLoadLinkedStoreExclusive( |
4576 Mem = formMemoryOperand(PtrVar, DestTy); | 4594 DestTy, Addr, |
4577 if (DestTy == IceType_i64) { | 4595 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { |
4578 Context.insert<InstFakeDef>(ValueReg, Value); | 4596 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); |
4597 if (Operation == Intrinsics::AtomicExchange) { | |
4598 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | |
4599 } else { | |
4600 lowerArithmetic( | |
4601 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | |
4602 } | |
4603 return ResultR; | |
4604 }); | |
4605 _dmb(); | |
4606 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) { | |
4607 Context.insert<InstFakeUse>(ValR); | |
4579 } | 4608 } |
4580 lowerAssign(InstAssign::create(Func, ValueReg, Value)); | 4609 // Can't dce ContentsR. |
4581 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | 4610 Context.insert<InstFakeUse>(ContentsR); |
4582 _uxt(ValueReg, ValueReg); | 4611 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); |
4583 } | 4612 } |
4584 _ldrex(PtrContentsReg, Mem); | |
4585 | 4613 |
4586 if (DestTy == IceType_i64) { | 4614 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, |
4587 Context.insert<InstFakeDef>(TmpReg, ValueReg); | 4615 Operand *Addr, Operand *Val) { |
4588 } | 4616 assert(Dest->getType() == IceType_i64); |
4589 switch (Operation) { | |
4590 default: | |
4591 Func->setError("Unknown AtomicRMW operation"); | |
4592 return; | |
4593 case Intrinsics::AtomicAdd: | |
4594 if (DestTy == IceType_i64) { | |
4595 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4596 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
4597 } else { | |
4598 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4599 } | |
4600 break; | |
4601 case Intrinsics::AtomicSub: | |
4602 if (DestTy == IceType_i64) { | |
4603 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4604 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
4605 } else { | |
4606 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4607 } | |
4608 break; | |
4609 case Intrinsics::AtomicOr: | |
4610 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4611 if (DestTy == IceType_i64) { | |
4612 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
4613 } | |
4614 break; | |
4615 case Intrinsics::AtomicAnd: | |
4616 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4617 if (DestTy == IceType_i64) { | |
4618 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
4619 } | |
4620 break; | |
4621 case Intrinsics::AtomicXor: | |
4622 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
4623 if (DestTy == IceType_i64) { | |
4624 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
4625 } | |
4626 break; | |
4627 case Intrinsics::AtomicExchange: | |
4628 _mov(TmpLoReg, ValueLoReg); | |
4629 if (DestTy == IceType_i64) { | |
4630 _mov(TmpHiReg, ValueHiReg); | |
4631 } | |
4632 break; | |
4633 } | |
4634 _strex(Success, TmpReg, Mem); | |
4635 _cmp(Success, _0); | |
4636 _br(Retry, CondARM32::NE); | |
4637 | 4617 |
4638 // The following fake-uses ensure that Subzero will not clobber them in the | 4618 auto *ResultR = makeI64RegPair(); |
4639 // load-linked/store-conditional loop above. We might have to spill them, but | 4619 |
4640 // spilling is preferable over incorrect behavior. | 4620 Context.insert<InstFakeDef>(ResultR); |
4641 Context.insert<InstFakeUse>(PtrVar); | 4621 |
4642 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { | 4622 Operand *ValRF = nullptr; |
4643 Context.insert<InstFakeUse>(Value64->getHi()); | 4623 if (llvm::dyn_cast<ConstantInteger64>(Val)) { |
4644 Context.insert<InstFakeUse>(Value64->getLo()); | 4624 ValRF = Val; |
4645 } else { | 4625 } else { |
4646 Context.insert<InstFakeUse>(Value); | 4626 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
4647 } | 4627 ValR64->initHiLo(Func); |
4648 _dmb(); | 4628 ValR64->setMustNotHaveReg(); |
4649 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | 4629 ValR64->getLo()->setMustHaveReg(); |
4650 _uxt(PtrContentsReg, PtrContentsReg); | 4630 ValR64->getHi()->setMustHaveReg(); |
4631 lowerAssign(InstAssign::create(Func, ValR64, Val)); | |
4632 ValRF = ValR64; | |
4651 } | 4633 } |
4652 | 4634 |
4653 if (DestTy == IceType_i64) { | 4635 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
4654 Context.insert<InstFakeUse>(PtrContentsReg); | 4636 ContentsR->initHiLo(Func); |
4637 ContentsR->setMustNotHaveReg(); | |
4638 ContentsR->getLo()->setMustHaveReg(); | |
4639 ContentsR->getHi()->setMustHaveReg(); | |
4640 | |
4641 _dmb(); | |
4642 lowerLoadLinkedStoreExclusive( | |
4643 IceType_i64, Addr, | |
4644 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { | |
4645 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); | |
4646 Context.insert<InstFakeUse>(Tmp); | |
4647 if (Operation == Intrinsics::AtomicExchange) { | |
4648 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | |
4649 } else { | |
4650 lowerArithmetic( | |
4651 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | |
4652 } | |
4653 Context.insert<InstFakeUse>(ResultR->getHi()); | |
4654 Context.insert<InstFakeDef>(ResultR, ResultR->getLo()) | |
4655 ->setDestRedefined(); | |
4656 return ResultR; | |
4657 }); | |
4658 _dmb(); | |
4659 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) { | |
4660 Context.insert<InstFakeUse>(ValR64->getLo()); | |
4661 Context.insert<InstFakeUse>(ValR64->getHi()); | |
4655 } | 4662 } |
4656 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); | 4663 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); |
4657 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { | |
4658 Context.insert<InstFakeUse>(Dest64->getLo()); | |
4659 Context.insert<InstFakeUse>(Dest64->getHi()); | |
4660 } else { | |
4661 Context.insert<InstFakeUse>(Dest); | |
4662 } | |
4663 } | 4664 } |
4664 | 4665 |
4665 void TargetARM32::postambleCtpop64(const InstCall *Instr) { | 4666 void TargetARM32::postambleCtpop64(const InstCall *Instr) { |
4666 Operand *Arg0 = Instr->getArg(0); | 4667 Operand *Arg0 = Instr->getArg(0); |
4667 if (isInt32Asserting32Or64(Arg0->getType())) { | 4668 if (isInt32Asserting32Or64(Arg0->getType())) { |
4668 return; | 4669 return; |
4669 } | 4670 } |
4670 // The popcount helpers always return 32-bit values, while the intrinsic's | 4671 // The popcount helpers always return 32-bit values, while the intrinsic's |
4671 // signature matches some 64-bit platform's native instructions and expect to | 4672 // signature matches some 64-bit platform's native instructions and expect to |
4672 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the | 4673 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the |
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4726 // ldrex is the only arm instruction that is guaranteed to load a 64-bit | 4727 // ldrex is the only arm instruction that is guaranteed to load a 64-bit |
4727 // integer atomically. Everything else works with a regular ldr. | 4728 // integer atomically. Everything else works with a regular ldr. |
4728 T = makeI64RegPair(); | 4729 T = makeI64RegPair(); |
4729 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); | 4730 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); |
4730 } else { | 4731 } else { |
4731 T = makeReg(DestTy); | 4732 T = makeReg(DestTy); |
4732 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); | 4733 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); |
4733 } | 4734 } |
4734 _dmb(); | 4735 _dmb(); |
4735 lowerAssign(InstAssign::create(Func, Dest, T)); | 4736 lowerAssign(InstAssign::create(Func, Dest, T)); |
4736 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 4737 // Adding a fake-use T to ensure the atomic load is not removed if Dest is |
4737 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 4738 // unused. |
4738 // the FakeUse on the last-inserted instruction's dest. | 4739 Context.insert<InstFakeUse>(T); |
4739 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
4740 return; | 4740 return; |
4741 } | 4741 } |
4742 case Intrinsics::AtomicStore: { | 4742 case Intrinsics::AtomicStore: { |
4743 // We require the memory address to be naturally aligned. Given that is the | 4743 // We require the memory address to be naturally aligned. Given that is the |
4744 // case, then normal loads are atomic. | 4744 // case, then normal loads are atomic. |
4745 if (!Intrinsics::isMemoryOrderValid( | 4745 if (!Intrinsics::isMemoryOrderValid( |
4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
4747 Func->setError("Unexpected memory ordering for AtomicStore"); | 4747 Func->setError("Unexpected memory ordering for AtomicStore"); |
4748 return; | 4748 return; |
4749 } | 4749 } |
4750 Operand *Value = Instr->getArg(0); | |
4751 Type ValueTy = Value->getType(); | |
4752 assert(isScalarIntegerType(ValueTy)); | |
4753 Operand *Addr = Instr->getArg(1); | |
4754 | 4750 |
4755 if (ValueTy == IceType_i64) { | 4751 auto *Value = Instr->getArg(0); |
4756 // Atomic 64-bit stores require a load-locked/store-conditional loop using | 4752 if (Value->getType() == IceType_i64) { |
4757 // ldrexd, and strexd. The lowered code is: | 4753 auto *ValueR = makeI64RegPair(); |
4758 // | 4754 Context.insert<InstFakeDef>(ValueR); |
4759 // retry: | 4755 lowerAssign(InstAssign::create(Func, ValueR, Value)); |
4760 // ldrexd t.lo, t.hi, [addr] | |
4761 // strexd success, value.lo, value.hi, [addr] | |
4762 // cmp success, #0 | |
4763 // bne retry | |
4764 // fake-use(addr, value.lo, value.hi) | |
4765 // | |
4766 // The fake-use is needed to prevent those variables from being clobbered | |
4767 // in the loop (which will happen under register pressure.) | |
4768 Variable64On32 *Tmp = makeI64RegPair(); | |
4769 Variable64On32 *ValueVar = | |
4770 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
4771 Variable *AddrVar = makeReg(IceType_i32); | |
4772 Variable *Success = makeReg(IceType_i32); | |
4773 OperandARM32Mem *Mem; | |
4774 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
4775 auto *Retry = InstARM32Label::create(Func, this); | |
4776 Variable64On32 *NewReg = makeI64RegPair(); | |
4777 ValueVar->initHiLo(Func); | |
4778 ValueVar->mustNotHaveReg(); | |
4779 | |
4780 _dmb(); | 4756 _dmb(); |
4781 lowerAssign(InstAssign::create(Func, ValueVar, Value)); | 4757 lowerLoadLinkedStoreExclusive( |
4782 lowerAssign(InstAssign::create(Func, AddrVar, Addr)); | 4758 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) { |
4783 | 4759 // The following fake-use prevents the ldrex instruction from being |
4784 Context.insert(Retry); | 4760 // dead code eliminated. |
4785 Context.insert<InstFakeDef>(NewReg); | 4761 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp))); |
4786 lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); | 4762 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp))); |
4787 Mem = formMemoryOperand(AddrVar, IceType_i64); | 4763 Context.insert<InstFakeUse>(Tmp); |
4788 _ldrex(Tmp, Mem); | 4764 return ValueR; |
4789 // This fake-use both prevents the ldrex from being dead-code eliminated, | 4765 }); |
4790 // while also keeping liveness happy about all defs being used. | 4766 Context.insert<InstFakeUse>(ValueR); |
4791 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
4792 _strex(Success, NewReg, Mem); | |
4793 _cmp(Success, _0); | |
4794 _br(Retry, CondARM32::NE); | |
4795 | |
4796 Context.insert<InstFakeUse>(ValueVar->getLo()); | |
4797 Context.insert<InstFakeUse>(ValueVar->getHi()); | |
4798 Context.insert<InstFakeUse>(AddrVar); | |
4799 _dmb(); | 4767 _dmb(); |
4800 return; | 4768 return; |
4801 } | 4769 } |
4770 | |
4771 auto *ValueR = legalizeToReg(Instr->getArg(0)); | |
4772 Type ValueTy = ValueR->getType(); | |
Jim Stichnoth
2016/02/17 03:50:17
Surprised that "auto" wasn't used here.
John
2016/02/17 13:01:39
not to mention the lack of constness... :)
Done.
| |
4773 assert(isScalarIntegerType(ValueTy)); | |
4774 auto *Addr = legalizeToReg(Instr->getArg(1)); | |
4775 | |
4802 // non-64-bit stores are atomically as long as the address is aligned. This | 4776 // non-64-bit stores are atomically as long as the address is aligned. This |
4803 // is PNaCl, so addresses are aligned. | 4777 // is PNaCl, so addresses are aligned. |
4804 Variable *T = makeReg(ValueTy); | |
4805 | |
4806 _dmb(); | 4778 _dmb(); |
4807 lowerAssign(InstAssign::create(Func, T, Value)); | 4779 _str(ValueR, formMemoryOperand(Addr, ValueTy)); |
4808 _str(T, formMemoryOperand(Addr, ValueTy)); | |
4809 _dmb(); | 4780 _dmb(); |
4810 return; | 4781 return; |
4811 } | 4782 } |
4812 case Intrinsics::AtomicCmpxchg: { | 4783 case Intrinsics::AtomicCmpxchg: { |
4813 // The initial lowering for cmpxchg was: | |
4814 // | |
4815 // retry: | 4784 // retry: |
4816 // ldrex tmp, [addr] | 4785 // ldrex tmp, [addr] |
4817 // cmp tmp, expected | 4786 // cmp tmp, expected |
4818 // mov expected, tmp | 4787 // mov expected, tmp |
4819 // jne retry | |
4820 // strex success, new, [addr] | |
4821 // cmp success, #0 | |
4822 // bne retry | |
4823 // mov dest, expected | |
4824 // | |
4825 // Besides requiring two branches, that lowering could also potentially | |
4826 // write to memory (in mov expected, tmp) unless we were OK with increasing | |
4827 // the register pressure and requiring expected to be an infinite-weight | |
4828 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through | |
4829 // careful rewritting, and thanks to predication, we now implement the | |
4830 // lowering as: | |
4831 // | |
4832 // retry: | |
4833 // ldrex tmp, [addr] | |
4834 // cmp tmp, expected | |
4835 // strexeq success, new, [addr] | 4788 // strexeq success, new, [addr] |
4836 // movne expected, tmp | |
4837 // cmpeq success, #0 | 4789 // cmpeq success, #0 |
4838 // bne retry | 4790 // bne retry |
4839 // mov dest, expected | 4791 // mov dest, expected |
4840 // | |
4841 // Predication lets us move the strex ahead of the mov expected, tmp, which | |
4842 // allows tmp to be a non-infinite weight temporary. We wanted to avoid | |
4843 // writing to memory between ldrex and strex because, even though most times | |
4844 // that would cause no issues, if any interleaving memory write aliased | |
4845 // [addr] than we would have undefined behavior. Undefined behavior isn't | |
4846 // cool, so we try to avoid it. See the "Synchronization and semaphores" | |
4847 // section of the "ARM Architecture Reference Manual." | |
4848 | |
4849 assert(isScalarIntegerType(DestTy)); | 4792 assert(isScalarIntegerType(DestTy)); |
4850 // We require the memory address to be naturally aligned. Given that is the | 4793 // We require the memory address to be naturally aligned. Given that is the |
4851 // case, then normal loads are atomic. | 4794 // case, then normal loads are atomic. |
4852 if (!Intrinsics::isMemoryOrderValid( | 4795 if (!Intrinsics::isMemoryOrderValid( |
4853 ID, getConstantMemoryOrder(Instr->getArg(3)), | 4796 ID, getConstantMemoryOrder(Instr->getArg(3)), |
4854 getConstantMemoryOrder(Instr->getArg(4)))) { | 4797 getConstantMemoryOrder(Instr->getArg(4)))) { |
4855 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 4798 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
4856 return; | 4799 return; |
4857 } | 4800 } |
4858 | 4801 |
4859 OperandARM32Mem *Mem; | 4802 if (DestTy == IceType_i64) { |
4860 Variable *TmpReg; | 4803 auto *New = makeI64RegPair(); |
4861 Variable *Expected, *ExpectedReg; | 4804 Context.insert<InstFakeDef>(New); |
4862 Variable *New, *NewReg; | 4805 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); |
4863 Variable *Success = makeReg(IceType_i32); | |
4864 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
4865 auto *Retry = InstARM32Label::create(Func, this); | |
4866 | 4806 |
4867 if (DestTy == IceType_i64) { | 4807 auto *Expected = makeI64RegPair(); |
4868 Variable64On32 *TmpReg64 = makeI64RegPair(); | 4808 Context.insert<InstFakeDef>(Expected); |
4869 Variable64On32 *New64 = | 4809 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); |
4870 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
4871 Variable64On32 *NewReg64 = makeI64RegPair(); | |
4872 Variable64On32 *Expected64 = | |
4873 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
4874 Variable64On32 *ExpectedReg64 = makeI64RegPair(); | |
4875 | 4810 |
4876 New64->initHiLo(Func); | 4811 _dmb(); |
4877 New64->mustNotHaveReg(); | 4812 lowerLoadLinkedStoreExclusive( |
4878 Expected64->initHiLo(Func); | 4813 DestTy, Instr->getArg(0), |
4879 Expected64->mustNotHaveReg(); | 4814 [this, Expected, New, Instr, DestTy](Variable *Tmp) { |
4815 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected)); | |
4816 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected)); | |
4817 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp)); | |
4818 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp)); | |
4819 _cmp(TmpLoR, ExpectedLoR); | |
4820 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ); | |
4821 // Adding an explicit use of Tmp here, or its live range will not | |
4822 // reach here (only those of Tmp.Lo and Tmp.Hi will.) | |
4823 Context.insert<InstFakeUse>(Tmp); | |
4824 _mov_redefined(ExpectedLoR, TmpLoR); | |
4825 _mov_redefined(ExpectedHiR, TmpHiR); | |
4826 // Same as above. | |
4827 Context.insert<InstFakeUse>(Tmp); | |
4828 return New; | |
4829 }, | |
4830 CondARM32::EQ); | |
4831 _dmb(); | |
4880 | 4832 |
4881 TmpReg = TmpReg64; | 4833 lowerAssign(InstAssign::create(Func, Dest, Expected)); |
4882 New = New64; | 4834 // The fake-use Expected prevents the assignments to Expected (above) |
4883 NewReg = NewReg64; | 4835 // from being removed if Dest is not used. |
4884 Expected = Expected64; | 4836 Context.insert<InstFakeUse>(Expected); |
4885 ExpectedReg = ExpectedReg64; | 4837 // New needs to be alive here, or its live range will end in the |
4886 } else { | 4838 // strex instruction. |
4887 TmpReg = makeReg(DestTy); | 4839 Context.insert<InstFakeUse>(New); |
4888 New = Func->makeVariable(DestTy); | 4840 return; |
4889 NewReg = makeReg(DestTy); | |
4890 Expected = Func->makeVariable(DestTy); | |
4891 ExpectedReg = makeReg(DestTy); | |
4892 } | 4841 } |
4893 | 4842 |
4894 Mem = formMemoryOperand(Instr->getArg(0), DestTy); | 4843 auto *New = legalizeToReg(Instr->getArg(2)); |
4895 if (DestTy == IceType_i64) { | 4844 auto *Expected = legalizeToReg(Instr->getArg(1)); |
4896 Context.insert<InstFakeDef>(Expected); | 4845 |
4897 } | 4846 _dmb(); |
4898 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); | 4847 lowerLoadLinkedStoreExclusive( |
4899 if (DestTy == IceType_i64) { | 4848 DestTy, |
4900 Context.insert<InstFakeDef>(New); | 4849 Instr->getArg(0), [this, Expected, New, Instr, DestTy](Variable *Tmp) { |
4901 } | 4850 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected); |
4902 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); | 4851 _mov_redefined(Expected, Tmp); |
4852 return New; | |
4853 }, CondARM32::EQ); | |
4903 _dmb(); | 4854 _dmb(); |
4904 | 4855 |
4905 Context.insert(Retry); | |
4906 if (DestTy == IceType_i64) { | |
4907 Context.insert<InstFakeDef>(ExpectedReg, Expected); | |
4908 } | |
4909 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); | |
4910 if (DestTy == IceType_i64) { | |
4911 Context.insert<InstFakeDef>(NewReg, New); | |
4912 } | |
4913 lowerAssign(InstAssign::create(Func, NewReg, New)); | |
4914 | |
4915 _ldrex(TmpReg, Mem); | |
4916 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
4917 if (DestTy == IceType_i64) { | |
4918 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
4919 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); | |
4920 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's | |
4921 // keep liveness happy, shall we? | |
4922 Context.insert<InstFakeUse>(TmpReg); | |
4923 Context.insert<InstFakeUse>(ExpectedReg); | |
4924 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); | |
4925 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); | |
4926 } else { | |
4927 _cmp(TmpReg, ExpectedReg); | |
4928 } | |
4929 _strex(Success, NewReg, Mem, CondARM32::EQ); | |
4930 if (DestTy == IceType_i64) { | |
4931 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
4932 auto *Expected64 = llvm::cast<Variable64On32>(Expected); | |
4933 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); | |
4934 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); | |
4935 Context.insert<InstFakeDef>(Expected, TmpReg); | |
4936 _set_dest_redefined(); | |
4937 } else { | |
4938 _mov_redefined(Expected, TmpReg, CondARM32::NE); | |
4939 } | |
4940 _cmp(Success, _0, CondARM32::EQ); | |
4941 _br(Retry, CondARM32::NE); | |
4942 _dmb(); | |
4943 lowerAssign(InstAssign::create(Func, Dest, Expected)); | 4856 lowerAssign(InstAssign::create(Func, Dest, Expected)); |
4944 Context.insert<InstFakeUse>(Expected); | 4857 Context.insert<InstFakeUse>(Expected); |
4945 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { | 4858 Context.insert<InstFakeUse>(New); |
4946 Context.insert<InstFakeUse>(New64->getLo()); | |
4947 Context.insert<InstFakeUse>(New64->getHi()); | |
4948 } else { | |
4949 Context.insert<InstFakeUse>(New); | |
4950 } | |
4951 return; | 4859 return; |
4952 } | 4860 } |
4953 case Intrinsics::AtomicRMW: { | 4861 case Intrinsics::AtomicRMW: { |
4954 if (!Intrinsics::isMemoryOrderValid( | 4862 if (!Intrinsics::isMemoryOrderValid( |
4955 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 4863 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
4956 Func->setError("Unexpected memory ordering for AtomicRMW"); | 4864 Func->setError("Unexpected memory ordering for AtomicRMW"); |
4957 return; | 4865 return; |
4958 } | 4866 } |
4959 lowerAtomicRMW( | 4867 lowerAtomicRMW( |
4960 Dest, static_cast<uint32_t>( | 4868 Dest, static_cast<uint32_t>( |
(...skipping 1909 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6870 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6778 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
6871 } | 6779 } |
6872 | 6780 |
6873 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6781 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
6874 llvm::SmallBitVector | 6782 llvm::SmallBitVector |
6875 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6783 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
6876 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6784 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
6877 | 6785 |
6878 } // end of namespace ARM32 | 6786 } // end of namespace ARM32 |
6879 } // end of namespace Ice | 6787 } // end of namespace Ice |
OLD | NEW |