src/IceTargetLoweringARM32.cpp - Issue 1266263003: Add the ARM32 FP register table entries, simple arith, and args.

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1266263003: Add the ARM32 FP register table entries, simple arith, and args. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: format more Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringARM32.cpp

diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index ec6a98b20fbec9cdeeedbb3f590fda8a7b3d2675..14fa072ee0cec5386201c29c44d376d7a7826229 100644

--- a/src/IceTargetLoweringARM32.cpp

+++ b/src/IceTargetLoweringARM32.cpp

@@ -174,16 +174,19 @@ TargetARM32::TargetARM32(Cfg *Func)

// TODO: Don't initialize IntegerRegisters and friends every time.

// Instead, initialize in some sort of static initializer for the

// class.

+ // Limit this size (or do all bitsets need to be the same width)???

llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);

- llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);

+ llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);

+ llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);

llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);

llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);

ScratchRegs.resize(RegARM32::Reg_NUM);

#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \

- isFP) \

+ isFP32, isFP64, isVec128) \

IntegerRegisters[RegARM32::val] = isInt; \

- FloatRegisters[RegARM32::val] = isFP; \

- VectorRegisters[RegARM32::val] = isFP; \

+ Float32Registers[RegARM32::val] = isFP32; \

+ Float64Registers[RegARM32::val] = isFP64; \

+ VectorRegisters[RegARM32::val] = isVec128; \

ScratchRegs[RegARM32::val] = scratch;

REGARM32_TABLE;

#undef X

@@ -193,8 +196,8 @@ TargetARM32::TargetARM32(Cfg *Func)

TypeToRegisterSet[IceType_i16] = IntegerRegisters;

TypeToRegisterSet[IceType_i32] = IntegerRegisters;

TypeToRegisterSet[IceType_i64] = IntegerRegisters;

- TypeToRegisterSet[IceType_f32] = FloatRegisters;

- TypeToRegisterSet[IceType_f64] = FloatRegisters;

+ TypeToRegisterSet[IceType_f32] = Float32Registers;

+ TypeToRegisterSet[IceType_f64] = Float64Registers;

TypeToRegisterSet[IceType_v4i1] = VectorRegisters;

TypeToRegisterSet[IceType_v8i1] = VectorRegisters;

TypeToRegisterSet[IceType_v16i1] = VectorRegisters;

@@ -363,7 +366,7 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const {

(void)Ty;

static const char *RegNames[] = {

#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \

- isFP) \

+ isFP32, isFP64, isVec128) \

name,

REGARM32_TABLE

#undef X

@@ -435,9 +438,7 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {

int32_t RegLo, RegHi;

// Always start i64 registers at an even register, so this may end

// up padding away a register.

- if (NumGPRRegsUsed % 2 != 0) {

- ++NumGPRRegsUsed;

- }

+ NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);

RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;

++NumGPRRegsUsed;

RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;

@@ -459,6 +460,33 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {

return true;

}

+bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {

+ if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)

+ return false;

+ if (isVectorType(Ty)) {

+ NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);

+ *Reg = RegARM32::Reg_q0 + (NumFPRegUnits / 4);

+ NumFPRegUnits += 4;

+ // If this bumps us past the boundary, don't allocate to a register

+ // and leave any previously speculatively consumed registers as consumed.

+ if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)

+ return false;

+ } else if (Ty == IceType_f64) {

+ NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);

+ *Reg = RegARM32::Reg_d0 + (NumFPRegUnits / 2);

+ NumFPRegUnits += 2;

+ // If this bumps us past the boundary, don't allocate to a register

+ // and leave any previously speculatively consumed registers as consumed.

+ if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)

+ return false;

+ } else {

+ assert(Ty == IceType_f32);

+ *Reg = RegARM32::Reg_s0 + NumFPRegUnits;

+ ++NumFPRegUnits;

+ }

+ return true;

void TargetARM32::lowerArguments() {

VarList &Args = Func->getArgs();

TargetARM32::CallingConv CC;

@@ -472,14 +500,7 @@ void TargetARM32::lowerArguments() {

for (SizeT I = 0, E = Args.size(); I < E; ++I) {

Variable *Arg = Args[I];

Type Ty = Arg->getType();

- // TODO(jvoung): handle float/vector types.

- if (isVectorType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- continue;

- } else if (isFloatingType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- continue;

- } else if (Ty == IceType_i64) {

+ if (Ty == IceType_i64) {

std::pair<int32_t, int32_t> RegPair;

if (!CC.I64InRegs(&RegPair))

continue;

@@ -503,10 +524,15 @@ void TargetARM32::lowerArguments() {

Context.insert(InstAssign::create(Func, Arg, RegisterArg));

continue;

} else {

- assert(Ty == IceType_i32);

int32_t RegNum;

- if (!CC.I32InReg(&RegNum))

- continue;

+ if (isVectorType(Ty) || isFloatingType(Ty)) {

+ if (!CC.FPInReg(Ty, &RegNum))

+ continue;

+ } else {

+ assert(Ty == IceType_i32);

+ if (!CC.I32InReg(&RegNum))

+ continue;

+ }

Variable *RegisterArg = Func->makeVariable(Ty);

if (BuildDefs::dump()) {

RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

@@ -517,6 +543,7 @@ void TargetARM32::lowerArguments() {

Args[I] = RegisterArg;

Context.insert(InstAssign::create(Func, Arg, RegisterArg));

+ continue;

}

@@ -554,7 +581,10 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(

Ctx->getConstantInt32(Arg->getStackOffset())));

if (isVectorType(Arg->getType())) {

+ // Use vld1.$elem or something?

UnimplementedError(Func->getContext()->getFlags());

+ } else if (isFloatingType(Arg->getType())) {

+ _vldr(Arg, Mem);

} else {

_ldr(Arg, Mem);

}

@@ -725,12 +755,9 @@ void TargetARM32::addProlog(CfgNode *Node) {

Type Ty = Arg->getType();

bool InRegs = false;

// Skip arguments passed in registers.

- if (isVectorType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- continue;

- } else if (isFloatingType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- continue;

+ if (isVectorType(Ty) || isFloatingType(Ty)) {

+ int32_t DummyReg;

+ InRegs = CC.FPInReg(Ty, &DummyReg);

} else if (Ty == IceType_i64) {

std::pair<int32_t, int32_t> DummyRegs;

InRegs = CC.I64InRegs(&DummyRegs);

@@ -858,6 +885,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {

bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {

constexpr bool SignExt = false;

+ // TODO(jvoung): vldr of FP stack slots has a different limit from the

+ // plain stackSlotType().

return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);

}

@@ -1121,7 +1150,7 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,

llvm::SmallBitVector Registers(RegARM32::Reg_NUM);

#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \

- isFP) \

+ isFP32, isFP64, isVec128) \

if (scratch && (Include & RegSet_CallerSave)) \

Registers[RegARM32::val] = true; \

if (preserved && (Include & RegSet_CalleeSave)) \

@@ -1518,6 +1547,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

return;

} else if (isVectorType(Dest->getType())) {

UnimplementedError(Func->getContext()->getFlags());

+ // Add a fake def to keep liveness consistent in the meantime.

+ Context.insert(InstFakeDef::create(Func, Dest));

return;

}

// Dest->getType() is a non-i64 scalar.

@@ -1553,6 +1584,47 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

H_srem_i32, IsRemainder);

return;

}

+ case InstArithmetic::Frem: {

+ const SizeT MaxSrcs = 2;

+ Type Ty = Dest->getType();

+ InstCall *Call = makeHelperCall(

+ isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

+ Call->addArg(Src0R);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ return;

+ }

+ // Handle floating point arithmetic separately: they require Src1 to be

+ // legalized to a register.

+ switch (Inst->getOp()) {

+ default:

+ break;

+ case InstArithmetic::Fadd: {

+ Variable *Src1R = legalizeToReg(Src1);

+ _vadd(T, Src0R, Src1R);

+ _vmov(Dest, T);

+ return;

+ }

+ case InstArithmetic::Fsub: {

+ Variable *Src1R = legalizeToReg(Src1);

+ _vsub(T, Src0R, Src1R);

+ _vmov(Dest, T);

+ return;

+ }

+ case InstArithmetic::Fmul: {

+ Variable *Src1R = legalizeToReg(Src1);

+ _vmul(T, Src0R, Src1R);

+ _vmov(Dest, T);

+ return;

+ }

+ case InstArithmetic::Fdiv: {

+ Variable *Src1R = legalizeToReg(Src1);

+ _vdiv(T, Src0R, Src1R);

+ _vmov(Dest, T);

+ return;

+ }

}

Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);

@@ -1605,19 +1677,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

llvm_unreachable("Integer div/rem should have been handled earlier.");

return;

case InstArithmetic::Fadd:

- UnimplementedError(Func->getContext()->getFlags());

- return;

case InstArithmetic::Fsub:

- UnimplementedError(Func->getContext()->getFlags());

- return;

case InstArithmetic::Fmul:

- UnimplementedError(Func->getContext()->getFlags());

- return;

case InstArithmetic::Fdiv:

- UnimplementedError(Func->getContext()->getFlags());

- return;

case InstArithmetic::Frem:

- UnimplementedError(Func->getContext()->getFlags());

+ llvm_unreachable("Floating point arith should have been handled earlier.");

return;

}

@@ -1652,6 +1716,9 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {

}

if (isVectorType(Dest->getType())) {

UnimplementedError(Func->getContext()->getFlags());

+ } else if (isFloatingType(Dest->getType())) {

+ Variable *SrcR = legalizeToReg(NewSrc);

+ _vmov(Dest, SrcR);

} else {

_mov(Dest, NewSrc);

}

@@ -1681,6 +1748,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

// Pair of Arg Operand -> GPR number assignments.

llvm::SmallVector<std::pair<Operand *, int32_t>,

TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;

+ llvm::SmallVector<std::pair<Operand *, int32_t>,

+ TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;

// Pair of Arg Operand -> stack offset.

llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;

int32_t ParameterAreaSizeBytes = 0;

@@ -1691,11 +1760,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

Operand *Arg = legalizeUndef(Instr->getArg(i));

Type Ty = Arg->getType();

bool InRegs = false;

- if (isVectorType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- } else if (isFloatingType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

- } else if (Ty == IceType_i64) {

+ if (Ty == IceType_i64) {

std::pair<int32_t, int32_t> Regs;

if (CC.I64InRegs(&Regs)) {

InRegs = true;

@@ -1704,6 +1769,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

GPRArgs.push_back(std::make_pair(Lo, Regs.first));

GPRArgs.push_back(std::make_pair(Hi, Regs.second));

}

+ } else if (isVectorType(Ty) || isFloatingType(Ty)) {

+ int32_t Reg;

+ if (CC.FPInReg(Ty, &Reg)) {

+ InRegs = true;

+ FPArgs.push_back(std::make_pair(Arg, Reg));

+ }

} else {

assert(Ty == IceType_i32);

int32_t Reg;

@@ -1766,6 +1837,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

// registers after the call.

Context.insert(InstFakeUse::create(Func, Reg));

}

+ for (auto &FPArg : FPArgs) {

+ Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);

+ Context.insert(InstFakeUse::create(Func, Reg));

+ }

// Generate the call instruction. Assign its result to a temporary

// with high register allocation weight.

@@ -1791,9 +1866,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);

break;

case IceType_f32:

+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);

+ break;

case IceType_f64:

- // Use S and D regs.

- UnimplementedError(Func->getContext()->getFlags());

+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);

break;

case IceType_v4i1:

case IceType_v8i1:

@@ -1802,8 +1878,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

case IceType_v8i16:

case IceType_v4i32:

case IceType_v4f32:

- // Use Q regs.

- UnimplementedError(Func->getContext()->getFlags());

+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);

break;

}

@@ -1853,12 +1928,11 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

_mov(DestLo, ReturnReg);

_mov(DestHi, ReturnRegHi);

} else {

- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||

- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||

- isVectorType(Dest->getType()));

if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {

- UnimplementedError(Func->getContext()->getFlags());

+ _vmov(Dest, ReturnReg);

} else {

+ assert(isIntegerType(Dest->getType()) &&

+ typeWidthInBytes(Dest->getType()) <= 4);

_mov(Dest, ReturnReg);

}

@@ -2291,6 +2365,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

case Intrinsics::Fabs: {

+ // Add a fake def to keep liveness consistent in the meantime.

+ Context.insert(InstFakeDef::create(Func, Instr->getDest()));

UnimplementedError(Func->getContext()->getFlags());

return;

}

@@ -2352,7 +2428,11 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

case Intrinsics::Sqrt: {

- UnimplementedError(Func->getContext()->getFlags());

+ Variable *Src = legalizeToReg(Instr->getArg(0));

+ Variable *Dest = Instr->getDest();

+ Variable *T = makeReg(Dest->getType());

+ _vsqrt(T, Src);

+ _vmov(Dest, T);

return;

}

case Intrinsics::Stacksave: {

@@ -2440,16 +2520,22 @@ void TargetARM32::lowerRet(const InstRet *Inst) {

Variable *Reg = nullptr;

if (Inst->hasRetValue()) {

Operand *Src0 = Inst->getRetValue();

- if (Src0->getType() == IceType_i64) {

+ Type Ty = Src0->getType();

+ if (Ty == IceType_i64) {

Src0 = legalizeUndef(Src0);

Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);

Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);

Reg = R0;

Context.insert(InstFakeUse::create(Func, R1));

- } else if (isScalarFloatingType(Src0->getType())) {

- UnimplementedError(Func->getContext()->getFlags());

+ } else if (Ty == IceType_f32) {

+ Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);

+ Reg = S0;

+ } else if (Ty == IceType_f64) {

+ Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);

+ Reg = D0;

} else if (isVectorType(Src0->getType())) {

- UnimplementedError(Func->getContext()->getFlags());

+ Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);

+ Reg = Q0;

} else {

Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);

_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);

@@ -2596,8 +2682,8 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) {

Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {

Type Ty = Src->getType();

Variable *Reg = makeReg(Ty, RegNum);

- if (isVectorType(Ty)) {

- UnimplementedError(Func->getContext()->getFlags());

+ if (isVectorType(Ty) || isFloatingType(Ty)) {

+ _vmov(Reg, Src);

} else {

// Mov's Src operand can really only be the flexible second operand type

// or a register. Users should guarantee that.

@@ -2646,7 +2732,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

}

if (!(Allowed & Legal_Mem)) {

Variable *Reg = makeReg(Ty, RegNum);

- _ldr(Reg, Mem);

+ if (isVectorType(Ty)) {

+ UnimplementedError(Func->getContext()->getFlags());

+ } else if (isFloatingType(Ty)) {

+ _vldr(Reg, Mem);

+ } else {

+ _ldr(Reg, Mem);

+ }

From = Reg;

} else {

From = Mem;

@@ -2716,11 +2808,25 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

_movt(Reg, C);

return Reg;

} else {

+ assert(isScalarFloatingType(Ty));

// Load floats/doubles from literal pool.

- UnimplementedError(Func->getContext()->getFlags());

- From = copyToReg(From, RegNum);

+ // TODO(jvoung): Allow certain immediates to be encoded directly in

+ // an operand. See Table A7-18 of the ARM manual:

+ // "Floating-point modified immediate constants".

+ // Or, for 32-bit floating point numbers, just encode the raw bits

+ // into a movw/movt pair to GPR, and vmov to an SREG, instead of using

+ // a movw/movt pair to get the const-pool address then loading to SREG.

+ std::string Buffer;

+ llvm::raw_string_ostream StrBuf(Buffer);

+ llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);

+ llvm::cast<Constant>(From)->setShouldBePooled(true);

+ Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

+ Variable *BaseReg = makeReg(getPointerType());

+ _movw(BaseReg, Offset);

+ _movt(BaseReg, Offset);

+ From = formMemoryOperand(BaseReg, Ty);

+ return copyToReg(From, RegNum);

}

- return From;

}

if (auto Var = llvm::dyn_cast<Variable>(From)) {

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/fp.arith.ll » ('j') | no next file with comments »