Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 378 } | 378 } |
| 379 | 379 |
| 380 namespace { | 380 namespace { |
| 381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { | 381 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { |
| 382 for (Variable *Var : Vars) { | 382 for (Variable *Var : Vars) { |
| 383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); | 383 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var); |
| 384 if (!Var64) { | 384 if (!Var64) { |
| 385 // This is not the variable we are looking for. | 385 // This is not the variable we are looking for. |
| 386 continue; | 386 continue; |
| 387 } | 387 } |
| 388 assert(Var64->hasReg() || !Var64->mustHaveReg()); | 388 // only allow infinite-weight i64 temporaries to be register allocated. |
| 389 assert(!Var64->hasReg() || Var64->mustHaveReg()); | |
| 389 if (!Var64->hasReg()) { | 390 if (!Var64->hasReg()) { |
| 390 continue; | 391 continue; |
| 391 } | 392 } |
| 392 const auto FirstReg = | 393 const auto FirstReg = |
| 393 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); | 394 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum())); |
| 394 // This assumes little endian. | 395 // This assumes little endian. |
| 395 Variable *Lo = Var64->getLo(); | 396 Variable *Lo = Var64->getLo(); |
| 396 Variable *Hi = Var64->getHi(); | 397 Variable *Hi = Var64->getHi(); |
| 397 assert(Lo->hasReg() == Hi->hasReg()); | 398 assert(Lo->hasReg() == Hi->hasReg()); |
| 398 if (Lo->hasReg()) { | 399 if (Lo->hasReg()) { |
| (...skipping 3002 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3401 if (Dest->isRematerializable()) { | 3402 if (Dest->isRematerializable()) { |
| 3402 Context.insert<InstFakeDef>(Dest); | 3403 Context.insert<InstFakeDef>(Dest); |
| 3403 return; | 3404 return; |
| 3404 } | 3405 } |
| 3405 | 3406 |
| 3406 Operand *Src0 = Instr->getSrc(0); | 3407 Operand *Src0 = Instr->getSrc(0); |
| 3407 assert(Dest->getType() == Src0->getType()); | 3408 assert(Dest->getType() == Src0->getType()); |
| 3408 if (Dest->getType() == IceType_i64) { | 3409 if (Dest->getType() == IceType_i64) { |
| 3409 Src0 = legalizeUndef(Src0); | 3410 Src0 = legalizeUndef(Src0); |
| 3410 | 3411 |
| 3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
|
Jim Stichnoth
2016/02/17 03:50:17
Just curious - why these changes?
John
2016/02/17 13:01:38
I was fiddling around trying to avoid some unneces
| |
| 3411 Variable *T_Lo = makeReg(IceType_i32); | 3413 Variable *T_Lo = makeReg(IceType_i32); |
| 3412 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
| 3413 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 3414 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| 3414 _mov(T_Lo, Src0Lo); | 3415 _mov(T_Lo, Src0Lo); |
| 3415 _mov(DestLo, T_Lo); | 3416 _mov(DestLo, T_Lo); |
| 3416 | 3417 |
| 3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 3417 Variable *T_Hi = makeReg(IceType_i32); | 3419 Variable *T_Hi = makeReg(IceType_i32); |
| 3418 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
| 3419 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 3420 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
| 3420 _mov(T_Hi, Src0Hi); | 3421 _mov(T_Hi, Src0Hi); |
| 3421 _mov(DestHi, T_Hi); | 3422 _mov(DestHi, T_Hi); |
| 3422 | 3423 |
| 3423 return; | 3424 return; |
| 3424 } | 3425 } |
| 3425 | 3426 |
| 3426 Operand *NewSrc; | 3427 Operand *NewSrc; |
| 3427 if (Dest->hasReg()) { | 3428 if (Dest->hasReg()) { |
| 3428 // If Dest already has a physical register, then legalize the Src operand | 3429 // If Dest already has a physical register, then legalize the Src operand |
| (...skipping 965 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4394 _cmp(ConstR, NonConstF); | 4395 _cmp(ConstR, NonConstF); |
| 4395 } else { | 4396 } else { |
| 4396 Variable *T = makeReg(IceType_i32); | 4397 Variable *T = makeReg(IceType_i32); |
| 4397 _rsbs(T, ConstR, NonConstF); | 4398 _rsbs(T, ConstR, NonConstF); |
| 4398 Context.insert<InstFakeUse>(T); | 4399 Context.insert<InstFakeUse>(T); |
| 4399 } | 4400 } |
| 4400 return CondWhenTrue(getIcmp32Mapping(Condition)); | 4401 return CondWhenTrue(getIcmp32Mapping(Condition)); |
| 4401 } | 4402 } |
| 4402 | 4403 |
| 4403 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { | 4404 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) { |
| 4404 Operand *Src0 = legalizeUndef(Instr->getSrc(0)); | 4405 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0), |
| 4405 Operand *Src1 = legalizeUndef(Instr->getSrc(1)); | 4406 Instr->getSrc(1)); |
| 4407 } | |
| 4406 | 4408 |
| 4407 const InstIcmp::ICond Condition = Instr->getCondition(); | 4409 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition, |
| 4410 Operand *Src0, | |
| 4411 Operand *Src1) { | |
| 4412 Src0 = legalizeUndef(Src0); | |
| 4413 Src1 = legalizeUndef(Src1); | |
| 4414 | |
| 4408 // a=icmp cond b, c ==> | 4415 // a=icmp cond b, c ==> |
| 4409 // GCC does: | 4416 // GCC does: |
| 4410 // <u/s>xtb tb, b | 4417 // <u/s>xtb tb, b |
| 4411 // <u/s>xtb tc, c | 4418 // <u/s>xtb tc, c |
| 4412 // cmp tb, tc | 4419 // cmp tb, tc |
| 4413 // mov.C1 t, #0 | 4420 // mov.C1 t, #0 |
| 4414 // mov.C2 t, #1 | 4421 // mov.C2 t, #1 |
| 4415 // mov a, t | 4422 // mov a, t |
| 4416 // where the unsigned/sign extension is not needed for 32-bit. They also have | 4423 // where the unsigned/sign extension is not needed for 32-bit. They also have |
| 4417 // special cases for EQ and NE. E.g., for NE: | 4424 // special cases for EQ and NE. E.g., for NE: |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4497 } | 4504 } |
| 4498 | 4505 |
| 4499 namespace { | 4506 namespace { |
| 4500 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { | 4507 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| 4501 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) | 4508 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) |
| 4502 return Integer->getValue(); | 4509 return Integer->getValue(); |
| 4503 return Intrinsics::MemoryOrderInvalid; | 4510 return Intrinsics::MemoryOrderInvalid; |
| 4504 } | 4511 } |
| 4505 } // end of anonymous namespace | 4512 } // end of anonymous namespace |
| 4506 | 4513 |
| 4514 void TargetARM32::lowerLoadLinkedStoreExclusive( | |
| 4515 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation, | |
| 4516 CondARM32::Cond Cond) { | |
| 4517 | |
| 4518 auto *Retry = Context.insert<InstARM32Label>(this); | |
| 4519 { // scoping for loop highlighting. | |
| 4520 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty); | |
| 4521 auto *Success = makeReg(IceType_i32); | |
| 4522 auto *_0 = Ctx->getConstantZero(IceType_i32); | |
| 4523 | |
| 4524 Context.insert<InstFakeDef>(Tmp); | |
| 4525 Context.insert<InstFakeUse>(Tmp); | |
| 4526 Variable *AddrR = legalizeToReg(Addr); | |
| 4527 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined(); | |
| 4528 auto *StoreValue = Operation(Tmp); | |
| 4529 assert(StoreValue->mustHaveReg()); | |
| 4530 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond); | |
| 4531 _cmp(Success, _0, Cond); | |
| 4532 } | |
| 4533 _br(Retry, CondARM32::NE); | |
| 4534 } | |
| 4535 | |
| 4536 namespace { | |
| 4537 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest, | |
| 4538 Variable *Src0, Operand *Src1) { | |
| 4539 InstArithmetic::OpKind Oper; | |
| 4540 switch (Operation) { | |
| 4541 default: | |
| 4542 llvm::report_fatal_error("Unknown AtomicRMW operation"); | |
| 4543 case Intrinsics::AtomicExchange: | |
| 4544 llvm::report_fatal_error("Can't handle Atomic xchg operation"); | |
| 4545 case Intrinsics::AtomicAdd: | |
| 4546 Oper = InstArithmetic::Add; | |
| 4547 break; | |
| 4548 case Intrinsics::AtomicAnd: | |
| 4549 Oper = InstArithmetic::And; | |
| 4550 break; | |
| 4551 case Intrinsics::AtomicSub: | |
| 4552 Oper = InstArithmetic::Sub; | |
| 4553 break; | |
| 4554 case Intrinsics::AtomicOr: | |
| 4555 Oper = InstArithmetic::Or; | |
| 4556 break; | |
| 4557 case Intrinsics::AtomicXor: | |
| 4558 Oper = InstArithmetic::Xor; | |
| 4559 break; | |
| 4560 } | |
| 4561 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1); | |
| 4562 } | |
| 4563 } // end of anonymous namespace | |
| 4564 | |
| 4507 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, | 4565 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation, |
| 4508 Operand *Ptr, Operand *Val) { | 4566 Operand *Addr, Operand *Val) { |
| 4509 // retry: | 4567 // retry: |
| 4510 // ldrex contents, [addr] | 4568 // ldrex tmp, [addr] |
| 4511 // op tmp, contents, operand | 4569 // mov contents, tmp |
| 4512 // strex success, tmp, [addr] | 4570 // op result, contents, Val |
| 4571 // strex success, result, [addr] | |
| 4572 // cmp success, 0 | |
| 4513 // jne retry | 4573 // jne retry |
| 4514 // fake-use(addr, operand) @ prevents undesirable clobbering. | 4574 // fake-use(addr, operand) @ prevents undesirable clobbering. |
| 4515 // mov dest, contents | 4575 // mov dest, contents |
| 4516 assert(Dest != nullptr); | 4576 auto DestTy = Dest->getType(); |
| 4517 Type DestTy = Dest->getType(); | |
| 4518 (void)Ptr; | |
| 4519 (void)Val; | |
| 4520 | |
| 4521 OperandARM32Mem *Mem; | |
| 4522 Variable *PtrContentsReg; | |
| 4523 Variable *PtrContentsHiReg; | |
| 4524 Variable *PtrContentsLoReg; | |
| 4525 Variable *Value = Func->makeVariable(DestTy); | |
| 4526 Variable *ValueReg; | |
| 4527 Variable *ValueHiReg; | |
| 4528 Variable *ValueLoReg; | |
| 4529 Variable *Success = makeReg(IceType_i32); | |
| 4530 Variable *TmpReg; | |
| 4531 Variable *TmpHiReg; | |
| 4532 Variable *TmpLoReg; | |
| 4533 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
| 4534 auto *Retry = InstARM32Label::create(Func, this); | |
| 4535 | 4577 |
| 4536 if (DestTy == IceType_i64) { | 4578 if (DestTy == IceType_i64) { |
| 4537 Variable64On32 *PtrContentsReg64 = makeI64RegPair(); | 4579 lowerInt64AtomicRMW(Dest, Operation, Addr, Val); |
| 4538 PtrContentsHiReg = PtrContentsReg64->getHi(); | 4580 return; |
| 4539 PtrContentsLoReg = PtrContentsReg64->getLo(); | |
| 4540 PtrContentsReg = PtrContentsReg64; | |
| 4541 | |
| 4542 llvm::cast<Variable64On32>(Value)->initHiLo(Func); | |
| 4543 Variable64On32 *ValueReg64 = makeI64RegPair(); | |
| 4544 ValueHiReg = ValueReg64->getHi(); | |
| 4545 ValueLoReg = ValueReg64->getLo(); | |
| 4546 ValueReg = ValueReg64; | |
| 4547 | |
| 4548 Variable64On32 *TmpReg64 = makeI64RegPair(); | |
| 4549 TmpHiReg = TmpReg64->getHi(); | |
| 4550 TmpLoReg = TmpReg64->getLo(); | |
| 4551 TmpReg = TmpReg64; | |
| 4552 } else { | |
| 4553 PtrContentsReg = makeReg(DestTy); | |
| 4554 PtrContentsHiReg = nullptr; | |
| 4555 PtrContentsLoReg = PtrContentsReg; | |
| 4556 | |
| 4557 ValueReg = makeReg(DestTy); | |
| 4558 ValueHiReg = nullptr; | |
| 4559 ValueLoReg = ValueReg; | |
| 4560 | |
| 4561 TmpReg = makeReg(DestTy); | |
| 4562 TmpHiReg = nullptr; | |
| 4563 TmpLoReg = TmpReg; | |
| 4564 } | 4581 } |
| 4565 | 4582 |
| 4566 if (DestTy == IceType_i64) { | 4583 Operand *ValRF = nullptr; |
| 4567 Context.insert<InstFakeDef>(Value); | 4584 if (llvm::isa<ConstantInteger32>(Val)) { |
| 4585 ValRF = Val; | |
| 4586 } else { | |
| 4587 ValRF = legalizeToReg(Val); | |
| 4568 } | 4588 } |
| 4569 lowerAssign(InstAssign::create(Func, Value, Val)); | 4589 auto *ContentsR = makeReg(DestTy); |
| 4570 | 4590 auto *ResultR = makeReg(DestTy); |
| 4571 Variable *PtrVar = Func->makeVariable(IceType_i32); | |
| 4572 lowerAssign(InstAssign::create(Func, PtrVar, Ptr)); | |
| 4573 | 4591 |
| 4574 _dmb(); | 4592 _dmb(); |
| 4575 Context.insert(Retry); | 4593 lowerLoadLinkedStoreExclusive( |
| 4576 Mem = formMemoryOperand(PtrVar, DestTy); | 4594 DestTy, Addr, |
| 4577 if (DestTy == IceType_i64) { | 4595 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { |
| 4578 Context.insert<InstFakeDef>(ValueReg, Value); | 4596 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); |
| 4597 if (Operation == Intrinsics::AtomicExchange) { | |
| 4598 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | |
| 4599 } else { | |
| 4600 lowerArithmetic( | |
| 4601 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | |
| 4602 } | |
| 4603 return ResultR; | |
| 4604 }); | |
| 4605 _dmb(); | |
| 4606 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) { | |
| 4607 Context.insert<InstFakeUse>(ValR); | |
| 4579 } | 4608 } |
| 4580 lowerAssign(InstAssign::create(Func, ValueReg, Value)); | 4609 // Can't dce ContentsR. |
| 4581 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | 4610 Context.insert<InstFakeUse>(ContentsR); |
| 4582 _uxt(ValueReg, ValueReg); | 4611 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); |
| 4583 } | 4612 } |
| 4584 _ldrex(PtrContentsReg, Mem); | |
| 4585 | 4613 |
| 4586 if (DestTy == IceType_i64) { | 4614 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, |
| 4587 Context.insert<InstFakeDef>(TmpReg, ValueReg); | 4615 Operand *Addr, Operand *Val) { |
| 4588 } | 4616 assert(Dest->getType() == IceType_i64); |
| 4589 switch (Operation) { | |
| 4590 default: | |
| 4591 Func->setError("Unknown AtomicRMW operation"); | |
| 4592 return; | |
| 4593 case Intrinsics::AtomicAdd: | |
| 4594 if (DestTy == IceType_i64) { | |
| 4595 _adds(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4596 _adc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
| 4597 } else { | |
| 4598 _add(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4599 } | |
| 4600 break; | |
| 4601 case Intrinsics::AtomicSub: | |
| 4602 if (DestTy == IceType_i64) { | |
| 4603 _subs(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4604 _sbc(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
| 4605 } else { | |
| 4606 _sub(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4607 } | |
| 4608 break; | |
| 4609 case Intrinsics::AtomicOr: | |
| 4610 _orr(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4611 if (DestTy == IceType_i64) { | |
| 4612 _orr(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
| 4613 } | |
| 4614 break; | |
| 4615 case Intrinsics::AtomicAnd: | |
| 4616 _and(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4617 if (DestTy == IceType_i64) { | |
| 4618 _and(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
| 4619 } | |
| 4620 break; | |
| 4621 case Intrinsics::AtomicXor: | |
| 4622 _eor(TmpLoReg, PtrContentsLoReg, ValueLoReg); | |
| 4623 if (DestTy == IceType_i64) { | |
| 4624 _eor(TmpHiReg, PtrContentsHiReg, ValueHiReg); | |
| 4625 } | |
| 4626 break; | |
| 4627 case Intrinsics::AtomicExchange: | |
| 4628 _mov(TmpLoReg, ValueLoReg); | |
| 4629 if (DestTy == IceType_i64) { | |
| 4630 _mov(TmpHiReg, ValueHiReg); | |
| 4631 } | |
| 4632 break; | |
| 4633 } | |
| 4634 _strex(Success, TmpReg, Mem); | |
| 4635 _cmp(Success, _0); | |
| 4636 _br(Retry, CondARM32::NE); | |
| 4637 | 4617 |
| 4638 // The following fake-uses ensure that Subzero will not clobber them in the | 4618 auto *ResultR = makeI64RegPair(); |
| 4639 // load-linked/store-conditional loop above. We might have to spill them, but | 4619 |
| 4640 // spilling is preferable over incorrect behavior. | 4620 Context.insert<InstFakeDef>(ResultR); |
| 4641 Context.insert<InstFakeUse>(PtrVar); | 4621 |
| 4642 if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) { | 4622 Operand *ValRF = nullptr; |
| 4643 Context.insert<InstFakeUse>(Value64->getHi()); | 4623 if (llvm::dyn_cast<ConstantInteger64>(Val)) { |
| 4644 Context.insert<InstFakeUse>(Value64->getLo()); | 4624 ValRF = Val; |
| 4645 } else { | 4625 } else { |
| 4646 Context.insert<InstFakeUse>(Value); | 4626 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 4647 } | 4627 ValR64->initHiLo(Func); |
| 4648 _dmb(); | 4628 ValR64->setMustNotHaveReg(); |
| 4649 if (DestTy == IceType_i8 || DestTy == IceType_i16) { | 4629 ValR64->getLo()->setMustHaveReg(); |
| 4650 _uxt(PtrContentsReg, PtrContentsReg); | 4630 ValR64->getHi()->setMustHaveReg(); |
| 4631 lowerAssign(InstAssign::create(Func, ValR64, Val)); | |
| 4632 ValRF = ValR64; | |
| 4651 } | 4633 } |
| 4652 | 4634 |
| 4653 if (DestTy == IceType_i64) { | 4635 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); |
| 4654 Context.insert<InstFakeUse>(PtrContentsReg); | 4636 ContentsR->initHiLo(Func); |
| 4637 ContentsR->setMustNotHaveReg(); | |
| 4638 ContentsR->getLo()->setMustHaveReg(); | |
| 4639 ContentsR->getHi()->setMustHaveReg(); | |
| 4640 | |
| 4641 _dmb(); | |
| 4642 lowerLoadLinkedStoreExclusive( | |
| 4643 IceType_i64, Addr, | |
| 4644 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) { | |
| 4645 lowerAssign(InstAssign::create(Func, ContentsR, Tmp)); | |
| 4646 Context.insert<InstFakeUse>(Tmp); | |
| 4647 if (Operation == Intrinsics::AtomicExchange) { | |
| 4648 lowerAssign(InstAssign::create(Func, ResultR, ValRF)); | |
| 4649 } else { | |
| 4650 lowerArithmetic( | |
| 4651 createArithInst(Func, Operation, ResultR, ContentsR, ValRF)); | |
| 4652 } | |
| 4653 Context.insert<InstFakeUse>(ResultR->getHi()); | |
| 4654 Context.insert<InstFakeDef>(ResultR, ResultR->getLo()) | |
| 4655 ->setDestRedefined(); | |
| 4656 return ResultR; | |
| 4657 }); | |
| 4658 _dmb(); | |
| 4659 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) { | |
| 4660 Context.insert<InstFakeUse>(ValR64->getLo()); | |
| 4661 Context.insert<InstFakeUse>(ValR64->getHi()); | |
| 4655 } | 4662 } |
| 4656 lowerAssign(InstAssign::create(Func, Dest, PtrContentsReg)); | 4663 lowerAssign(InstAssign::create(Func, Dest, ContentsR)); |
| 4657 if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) { | |
| 4658 Context.insert<InstFakeUse>(Dest64->getLo()); | |
| 4659 Context.insert<InstFakeUse>(Dest64->getHi()); | |
| 4660 } else { | |
| 4661 Context.insert<InstFakeUse>(Dest); | |
| 4662 } | |
| 4663 } | 4664 } |
| 4664 | 4665 |
| 4665 void TargetARM32::postambleCtpop64(const InstCall *Instr) { | 4666 void TargetARM32::postambleCtpop64(const InstCall *Instr) { |
| 4666 Operand *Arg0 = Instr->getArg(0); | 4667 Operand *Arg0 = Instr->getArg(0); |
| 4667 if (isInt32Asserting32Or64(Arg0->getType())) { | 4668 if (isInt32Asserting32Or64(Arg0->getType())) { |
| 4668 return; | 4669 return; |
| 4669 } | 4670 } |
| 4670 // The popcount helpers always return 32-bit values, while the intrinsic's | 4671 // The popcount helpers always return 32-bit values, while the intrinsic's |
| 4671 // signature matches some 64-bit platform's native instructions and expect to | 4672 // signature matches some 64-bit platform's native instructions and expect to |
| 4672 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the | 4673 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4726 // ldrex is the only arm instruction that is guaranteed to load a 64-bit | 4727 // ldrex is the only arm instruction that is guaranteed to load a 64-bit |
| 4727 // integer atomically. Everything else works with a regular ldr. | 4728 // integer atomically. Everything else works with a regular ldr. |
| 4728 T = makeI64RegPair(); | 4729 T = makeI64RegPair(); |
| 4729 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); | 4730 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64)); |
| 4730 } else { | 4731 } else { |
| 4731 T = makeReg(DestTy); | 4732 T = makeReg(DestTy); |
| 4732 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); | 4733 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy)); |
| 4733 } | 4734 } |
| 4734 _dmb(); | 4735 _dmb(); |
| 4735 lowerAssign(InstAssign::create(Func, Dest, T)); | 4736 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 4736 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. | 4737 // Adding a fake-use T to ensure the atomic load is not removed if Dest is |
| 4737 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert | 4738 // unused. |
| 4738 // the FakeUse on the last-inserted instruction's dest. | 4739 Context.insert<InstFakeUse>(T); |
| 4739 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
| 4740 return; | 4740 return; |
| 4741 } | 4741 } |
| 4742 case Intrinsics::AtomicStore: { | 4742 case Intrinsics::AtomicStore: { |
| 4743 // We require the memory address to be naturally aligned. Given that is the | 4743 // We require the memory address to be naturally aligned. Given that is the |
| 4744 // case, then normal loads are atomic. | 4744 // case, then normal loads are atomic. |
| 4745 if (!Intrinsics::isMemoryOrderValid( | 4745 if (!Intrinsics::isMemoryOrderValid( |
| 4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) { | 4746 ID, getConstantMemoryOrder(Instr->getArg(2)))) { |
| 4747 Func->setError("Unexpected memory ordering for AtomicStore"); | 4747 Func->setError("Unexpected memory ordering for AtomicStore"); |
| 4748 return; | 4748 return; |
| 4749 } | 4749 } |
| 4750 Operand *Value = Instr->getArg(0); | |
| 4751 Type ValueTy = Value->getType(); | |
| 4752 assert(isScalarIntegerType(ValueTy)); | |
| 4753 Operand *Addr = Instr->getArg(1); | |
| 4754 | 4750 |
| 4755 if (ValueTy == IceType_i64) { | 4751 auto *Value = Instr->getArg(0); |
| 4756 // Atomic 64-bit stores require a load-locked/store-conditional loop using | 4752 if (Value->getType() == IceType_i64) { |
| 4757 // ldrexd, and strexd. The lowered code is: | 4753 auto *ValueR = makeI64RegPair(); |
| 4758 // | 4754 Context.insert<InstFakeDef>(ValueR); |
| 4759 // retry: | 4755 lowerAssign(InstAssign::create(Func, ValueR, Value)); |
| 4760 // ldrexd t.lo, t.hi, [addr] | |
| 4761 // strexd success, value.lo, value.hi, [addr] | |
| 4762 // cmp success, #0 | |
| 4763 // bne retry | |
| 4764 // fake-use(addr, value.lo, value.hi) | |
| 4765 // | |
| 4766 // The fake-use is needed to prevent those variables from being clobbered | |
| 4767 // in the loop (which will happen under register pressure.) | |
| 4768 Variable64On32 *Tmp = makeI64RegPair(); | |
| 4769 Variable64On32 *ValueVar = | |
| 4770 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
| 4771 Variable *AddrVar = makeReg(IceType_i32); | |
| 4772 Variable *Success = makeReg(IceType_i32); | |
| 4773 OperandARM32Mem *Mem; | |
| 4774 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
| 4775 auto *Retry = InstARM32Label::create(Func, this); | |
| 4776 Variable64On32 *NewReg = makeI64RegPair(); | |
| 4777 ValueVar->initHiLo(Func); | |
| 4778 ValueVar->mustNotHaveReg(); | |
| 4779 | |
| 4780 _dmb(); | 4756 _dmb(); |
| 4781 lowerAssign(InstAssign::create(Func, ValueVar, Value)); | 4757 lowerLoadLinkedStoreExclusive( |
| 4782 lowerAssign(InstAssign::create(Func, AddrVar, Addr)); | 4758 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) { |
| 4783 | 4759 // The following fake-use prevents the ldrex instruction from being |
| 4784 Context.insert(Retry); | 4760 // dead code eliminated. |
| 4785 Context.insert<InstFakeDef>(NewReg); | 4761 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp))); |
| 4786 lowerAssign(InstAssign::create(Func, NewReg, ValueVar)); | 4762 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp))); |
| 4787 Mem = formMemoryOperand(AddrVar, IceType_i64); | 4763 Context.insert<InstFakeUse>(Tmp); |
| 4788 _ldrex(Tmp, Mem); | 4764 return ValueR; |
| 4789 // This fake-use both prevents the ldrex from being dead-code eliminated, | 4765 }); |
| 4790 // while also keeping liveness happy about all defs being used. | 4766 Context.insert<InstFakeUse>(ValueR); |
| 4791 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
| 4792 _strex(Success, NewReg, Mem); | |
| 4793 _cmp(Success, _0); | |
| 4794 _br(Retry, CondARM32::NE); | |
| 4795 | |
| 4796 Context.insert<InstFakeUse>(ValueVar->getLo()); | |
| 4797 Context.insert<InstFakeUse>(ValueVar->getHi()); | |
| 4798 Context.insert<InstFakeUse>(AddrVar); | |
| 4799 _dmb(); | 4767 _dmb(); |
| 4800 return; | 4768 return; |
| 4801 } | 4769 } |
| 4770 | |
| 4771 auto *ValueR = legalizeToReg(Instr->getArg(0)); | |
| 4772 Type ValueTy = ValueR->getType(); | |
|
Jim Stichnoth
2016/02/17 03:50:17
Surprised that "auto" wasn't used here.
John
2016/02/17 13:01:39
not to mention the lack of constness... :)
Done.
| |
| 4773 assert(isScalarIntegerType(ValueTy)); | |
| 4774 auto *Addr = legalizeToReg(Instr->getArg(1)); | |
| 4775 | |
| 4802 // non-64-bit stores are atomically as long as the address is aligned. This | 4776 // non-64-bit stores are atomically as long as the address is aligned. This |
| 4803 // is PNaCl, so addresses are aligned. | 4777 // is PNaCl, so addresses are aligned. |
| 4804 Variable *T = makeReg(ValueTy); | |
| 4805 | |
| 4806 _dmb(); | 4778 _dmb(); |
| 4807 lowerAssign(InstAssign::create(Func, T, Value)); | 4779 _str(ValueR, formMemoryOperand(Addr, ValueTy)); |
| 4808 _str(T, formMemoryOperand(Addr, ValueTy)); | |
| 4809 _dmb(); | 4780 _dmb(); |
| 4810 return; | 4781 return; |
| 4811 } | 4782 } |
| 4812 case Intrinsics::AtomicCmpxchg: { | 4783 case Intrinsics::AtomicCmpxchg: { |
| 4813 // The initial lowering for cmpxchg was: | |
| 4814 // | |
| 4815 // retry: | 4784 // retry: |
| 4816 // ldrex tmp, [addr] | 4785 // ldrex tmp, [addr] |
| 4817 // cmp tmp, expected | 4786 // cmp tmp, expected |
| 4818 // mov expected, tmp | 4787 // mov expected, tmp |
| 4819 // jne retry | |
| 4820 // strex success, new, [addr] | |
| 4821 // cmp success, #0 | |
| 4822 // bne retry | |
| 4823 // mov dest, expected | |
| 4824 // | |
| 4825 // Besides requiring two branches, that lowering could also potentially | |
| 4826 // write to memory (in mov expected, tmp) unless we were OK with increasing | |
| 4827 // the register pressure and requiring expected to be an infinite-weight | |
| 4828 // variable (spoiler alert: that was a problem for i64 cmpxchg.) Through | |
| 4829 // careful rewritting, and thanks to predication, we now implement the | |
| 4830 // lowering as: | |
| 4831 // | |
| 4832 // retry: | |
| 4833 // ldrex tmp, [addr] | |
| 4834 // cmp tmp, expected | |
| 4835 // strexeq success, new, [addr] | 4788 // strexeq success, new, [addr] |
| 4836 // movne expected, tmp | |
| 4837 // cmpeq success, #0 | 4789 // cmpeq success, #0 |
| 4838 // bne retry | 4790 // bne retry |
| 4839 // mov dest, expected | 4791 // mov dest, expected |
| 4840 // | |
| 4841 // Predication lets us move the strex ahead of the mov expected, tmp, which | |
| 4842 // allows tmp to be a non-infinite weight temporary. We wanted to avoid | |
| 4843 // writing to memory between ldrex and strex because, even though most times | |
| 4844 // that would cause no issues, if any interleaving memory write aliased | |
| 4845 // [addr] than we would have undefined behavior. Undefined behavior isn't | |
| 4846 // cool, so we try to avoid it. See the "Synchronization and semaphores" | |
| 4847 // section of the "ARM Architecture Reference Manual." | |
| 4848 | |
| 4849 assert(isScalarIntegerType(DestTy)); | 4792 assert(isScalarIntegerType(DestTy)); |
| 4850 // We require the memory address to be naturally aligned. Given that is the | 4793 // We require the memory address to be naturally aligned. Given that is the |
| 4851 // case, then normal loads are atomic. | 4794 // case, then normal loads are atomic. |
| 4852 if (!Intrinsics::isMemoryOrderValid( | 4795 if (!Intrinsics::isMemoryOrderValid( |
| 4853 ID, getConstantMemoryOrder(Instr->getArg(3)), | 4796 ID, getConstantMemoryOrder(Instr->getArg(3)), |
| 4854 getConstantMemoryOrder(Instr->getArg(4)))) { | 4797 getConstantMemoryOrder(Instr->getArg(4)))) { |
| 4855 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); | 4798 Func->setError("Unexpected memory ordering for AtomicCmpxchg"); |
| 4856 return; | 4799 return; |
| 4857 } | 4800 } |
| 4858 | 4801 |
| 4859 OperandARM32Mem *Mem; | 4802 if (DestTy == IceType_i64) { |
| 4860 Variable *TmpReg; | 4803 auto *New = makeI64RegPair(); |
| 4861 Variable *Expected, *ExpectedReg; | 4804 Context.insert<InstFakeDef>(New); |
| 4862 Variable *New, *NewReg; | 4805 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); |
| 4863 Variable *Success = makeReg(IceType_i32); | |
| 4864 Operand *_0 = Ctx->getConstantZero(IceType_i32); | |
| 4865 auto *Retry = InstARM32Label::create(Func, this); | |
| 4866 | 4806 |
| 4867 if (DestTy == IceType_i64) { | 4807 auto *Expected = makeI64RegPair(); |
| 4868 Variable64On32 *TmpReg64 = makeI64RegPair(); | 4808 Context.insert<InstFakeDef>(Expected); |
| 4869 Variable64On32 *New64 = | 4809 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); |
| 4870 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
| 4871 Variable64On32 *NewReg64 = makeI64RegPair(); | |
| 4872 Variable64On32 *Expected64 = | |
| 4873 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); | |
| 4874 Variable64On32 *ExpectedReg64 = makeI64RegPair(); | |
| 4875 | 4810 |
| 4876 New64->initHiLo(Func); | 4811 _dmb(); |
| 4877 New64->mustNotHaveReg(); | 4812 lowerLoadLinkedStoreExclusive( |
| 4878 Expected64->initHiLo(Func); | 4813 DestTy, Instr->getArg(0), |
| 4879 Expected64->mustNotHaveReg(); | 4814 [this, Expected, New, Instr, DestTy](Variable *Tmp) { |
| 4815 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected)); | |
| 4816 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected)); | |
| 4817 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp)); | |
| 4818 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp)); | |
| 4819 _cmp(TmpLoR, ExpectedLoR); | |
| 4820 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ); | |
| 4821 // Adding an explicit use of Tmp here, or its live range will not | |
| 4822 // reach here (only those of Tmp.Lo and Tmp.Hi will.) | |
| 4823 Context.insert<InstFakeUse>(Tmp); | |
| 4824 _mov_redefined(ExpectedLoR, TmpLoR); | |
| 4825 _mov_redefined(ExpectedHiR, TmpHiR); | |
| 4826 // Same as above. | |
| 4827 Context.insert<InstFakeUse>(Tmp); | |
| 4828 return New; | |
| 4829 }, | |
| 4830 CondARM32::EQ); | |
| 4831 _dmb(); | |
| 4880 | 4832 |
| 4881 TmpReg = TmpReg64; | 4833 lowerAssign(InstAssign::create(Func, Dest, Expected)); |
| 4882 New = New64; | 4834 // The fake-use Expected prevents the assignments to Expected (above) |
| 4883 NewReg = NewReg64; | 4835 // from being removed if Dest is not used. |
| 4884 Expected = Expected64; | 4836 Context.insert<InstFakeUse>(Expected); |
| 4885 ExpectedReg = ExpectedReg64; | 4837 // New needs to be alive here, or its live range will end in the |
| 4886 } else { | 4838 // strex instruction. |
| 4887 TmpReg = makeReg(DestTy); | 4839 Context.insert<InstFakeUse>(New); |
| 4888 New = Func->makeVariable(DestTy); | 4840 return; |
| 4889 NewReg = makeReg(DestTy); | |
| 4890 Expected = Func->makeVariable(DestTy); | |
| 4891 ExpectedReg = makeReg(DestTy); | |
| 4892 } | 4841 } |
| 4893 | 4842 |
| 4894 Mem = formMemoryOperand(Instr->getArg(0), DestTy); | 4843 auto *New = legalizeToReg(Instr->getArg(2)); |
| 4895 if (DestTy == IceType_i64) { | 4844 auto *Expected = legalizeToReg(Instr->getArg(1)); |
| 4896 Context.insert<InstFakeDef>(Expected); | 4845 |
| 4897 } | 4846 _dmb(); |
| 4898 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1))); | 4847 lowerLoadLinkedStoreExclusive( |
| 4899 if (DestTy == IceType_i64) { | 4848 DestTy, |
| 4900 Context.insert<InstFakeDef>(New); | 4849 Instr->getArg(0), [this, Expected, New, Instr, DestTy](Variable *Tmp) { |
| 4901 } | 4850 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected); |
| 4902 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2))); | 4851 _mov_redefined(Expected, Tmp); |
| 4852 return New; | |
| 4853 }, CondARM32::EQ); | |
| 4903 _dmb(); | 4854 _dmb(); |
| 4904 | 4855 |
| 4905 Context.insert(Retry); | |
| 4906 if (DestTy == IceType_i64) { | |
| 4907 Context.insert<InstFakeDef>(ExpectedReg, Expected); | |
| 4908 } | |
| 4909 lowerAssign(InstAssign::create(Func, ExpectedReg, Expected)); | |
| 4910 if (DestTy == IceType_i64) { | |
| 4911 Context.insert<InstFakeDef>(NewReg, New); | |
| 4912 } | |
| 4913 lowerAssign(InstAssign::create(Func, NewReg, New)); | |
| 4914 | |
| 4915 _ldrex(TmpReg, Mem); | |
| 4916 Context.insert<InstFakeUse>(Context.getLastInserted()->getDest()); | |
| 4917 if (DestTy == IceType_i64) { | |
| 4918 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
| 4919 auto *ExpectedReg64 = llvm::cast<Variable64On32>(ExpectedReg); | |
| 4920 // lowerAssign above has added fake-defs for TmpReg and ExpectedReg. Let's | |
| 4921 // keep liveness happy, shall we? | |
| 4922 Context.insert<InstFakeUse>(TmpReg); | |
| 4923 Context.insert<InstFakeUse>(ExpectedReg); | |
| 4924 _cmp(TmpReg64->getHi(), ExpectedReg64->getHi()); | |
| 4925 _cmp(TmpReg64->getLo(), ExpectedReg64->getLo(), CondARM32::EQ); | |
| 4926 } else { | |
| 4927 _cmp(TmpReg, ExpectedReg); | |
| 4928 } | |
| 4929 _strex(Success, NewReg, Mem, CondARM32::EQ); | |
| 4930 if (DestTy == IceType_i64) { | |
| 4931 auto *TmpReg64 = llvm::cast<Variable64On32>(TmpReg); | |
| 4932 auto *Expected64 = llvm::cast<Variable64On32>(Expected); | |
| 4933 _mov_redefined(Expected64->getHi(), TmpReg64->getHi(), CondARM32::NE); | |
| 4934 _mov_redefined(Expected64->getLo(), TmpReg64->getLo(), CondARM32::NE); | |
| 4935 Context.insert<InstFakeDef>(Expected, TmpReg); | |
| 4936 _set_dest_redefined(); | |
| 4937 } else { | |
| 4938 _mov_redefined(Expected, TmpReg, CondARM32::NE); | |
| 4939 } | |
| 4940 _cmp(Success, _0, CondARM32::EQ); | |
| 4941 _br(Retry, CondARM32::NE); | |
| 4942 _dmb(); | |
| 4943 lowerAssign(InstAssign::create(Func, Dest, Expected)); | 4856 lowerAssign(InstAssign::create(Func, Dest, Expected)); |
| 4944 Context.insert<InstFakeUse>(Expected); | 4857 Context.insert<InstFakeUse>(Expected); |
| 4945 if (auto *New64 = llvm::dyn_cast<Variable64On32>(New)) { | 4858 Context.insert<InstFakeUse>(New); |
| 4946 Context.insert<InstFakeUse>(New64->getLo()); | |
| 4947 Context.insert<InstFakeUse>(New64->getHi()); | |
| 4948 } else { | |
| 4949 Context.insert<InstFakeUse>(New); | |
| 4950 } | |
| 4951 return; | 4859 return; |
| 4952 } | 4860 } |
| 4953 case Intrinsics::AtomicRMW: { | 4861 case Intrinsics::AtomicRMW: { |
| 4954 if (!Intrinsics::isMemoryOrderValid( | 4862 if (!Intrinsics::isMemoryOrderValid( |
| 4955 ID, getConstantMemoryOrder(Instr->getArg(3)))) { | 4863 ID, getConstantMemoryOrder(Instr->getArg(3)))) { |
| 4956 Func->setError("Unexpected memory ordering for AtomicRMW"); | 4864 Func->setError("Unexpected memory ordering for AtomicRMW"); |
| 4957 return; | 4865 return; |
| 4958 } | 4866 } |
| 4959 lowerAtomicRMW( | 4867 lowerAtomicRMW( |
| 4960 Dest, static_cast<uint32_t>( | 4868 Dest, static_cast<uint32_t>( |
| (...skipping 1909 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6870 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6778 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 6871 } | 6779 } |
| 6872 | 6780 |
| 6873 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6781 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
| 6874 llvm::SmallBitVector | 6782 llvm::SmallBitVector |
| 6875 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6783 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
| 6876 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6784 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 6877 | 6785 |
| 6878 } // end of namespace ARM32 | 6786 } // end of namespace ARM32 |
| 6879 } // end of namespace Ice | 6787 } // end of namespace Ice |
| OLD | NEW |