src/IceInstARM32.cpp - Issue 1438773004: Subzero. ARM32. Improve constant lowering.

Unified Diff: src/IceInstARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceInstARM32.cpp

diff --git a/src/IceInstARM32.cpp b/src/IceInstARM32.cpp

index f8f6b4f17f7dc5e63ff1411c302a50dff660c78a..3f0d2e609e55e2a1d21fcdef477f17e51573f408 100644

--- a/src/IceInstARM32.cpp

+++ b/src/IceInstARM32.cpp

@@ -284,6 +284,87 @@ bool OperandARM32FlexImm::canHoldImm(uint32_t Immediate, uint32_t *RotateAmt,

return false;

}

+OperandARM32FlexFpImm::OperandARM32FlexFpImm(Cfg * /*Func*/, Type Ty,

+ uint32_t ModifiedImm)

+ : OperandARM32Flex(kFlexFpImm, Ty), ModifiedImm(ModifiedImm) {}

+bool OperandARM32FlexFpImm::canHoldImm(Operand *C, uint32_t *ModifiedImm) {

+ switch (C->getType()) {

+ default:

+ llvm::report_fatal_error("Unhandled fp constant type.");

+ case IceType_f32: {

+ // We violate llvm naming conventions a bit here so that the constants are

+ // named after the bit fields they represent. See "A7.5.1 Operation of

+ // modified immediate constants, Floating-point" in the ARM ARM.

+ static constexpr uint32_t a = 0x80000000u;

+ static constexpr uint32_t B = 0x40000000;

+ static constexpr uint32_t bbbbb = 0x3E000000;

+ static constexpr uint32_t cdefgh = 0x01F80000;

+ static constexpr uint32_t AllowedBits = a | B | bbbbb | cdefgh;

+ static_assert(AllowedBits == 0xFFF80000u,

+ "Invalid mask for f32 modified immediates.");

+ const float F32 = llvm::cast<ConstantFloat>(C)->getValue();

+ const uint32_t I32 = *reinterpret_cast<const uint32_t *>(&F32);

+ if (I32 & ~AllowedBits) {

+ // constant has disallowed bits.

+ return false;

+ }

+ if ((I32 & bbbbb) != bbbbb && (I32 & bbbbb)) {

+ // not all bbbbb bits are 0 or 1.

+ return false;

+ }

+ if (((I32 & B) != 0) == ((I32 & bbbbb) != 0)) {

+ // B ^ b = 0;

+ return false;

+ }

+ *ModifiedImm = ((I32 & a) ? 0x80 : 0x00) | ((I32 & bbbbb) ? 0x40 : 0x00) |

+ ((I32 & cdefgh) >> 19);

+ return true;

+ }

+ case IceType_f64: {

+ static constexpr uint32_t a = 0x80000000u;

+ static constexpr uint32_t B = 0x40000000;

+ static constexpr uint32_t bbbbbbbb = 0x3FC00000;

+ static constexpr uint32_t cdefgh = 0x003F0000;

+ static constexpr uint32_t AllowedBits = a | B | bbbbbbbb | cdefgh;

+ static_assert(AllowedBits == 0xFFFF0000u,

+ "Invalid mask for f64 modified immediates.");

+ const double F64 = llvm::cast<ConstantDouble>(C)->getValue();

+ const uint64_t I64 = *reinterpret_cast<const uint64_t *>(&F64);

+ if (I64 & 0xFFFFFFFFu) {

+ // constant has disallowed bits.

+ return false;

+ }

+ const uint32_t I32 = I64 >> 32;

+ if (I32 & ~AllowedBits) {

+ // constant has disallowed bits.

+ return false;

+ }

+ if ((I32 & bbbbbbbb) != bbbbbbbb && (I32 & bbbbbbbb)) {

+ // not all bbbbb bits are 0 or 1.

+ return false;

+ }

+ if (((I32 & B) != 0) == ((I32 & bbbbbbbb) != 0)) {

+ // B ^ b = 0;

+ return false;

+ }

+ *ModifiedImm = ((I32 & a) ? 0x80 : 0x00) |

+ ((I32 & bbbbbbbb) ? 0x40 : 0x00) | ((I32 & cdefgh) >> 16);

+ return true;

+ }

+OperandARM32FlexFpZero::OperandARM32FlexFpZero(Cfg * /*Func*/, Type Ty)

+ : OperandARM32Flex(kFlexFpZero, Ty) {}

OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,

ShiftKind ShiftOp, Operand *ShiftAmt)

: OperandARM32Flex(kFlexReg, Ty), Reg(Reg), ShiftOp(ShiftOp),

@@ -557,15 +638,18 @@ template <> void InstARM32Tst::emitIAS(const Cfg *Func) const {

emitUsingTextFixup(Func);

}

-InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Variable *Src1,

+InstARM32Vcmp::InstARM32Vcmp(Cfg *Func, Variable *Src0, Operand *Src1,

CondARM32::Cond Predicate)

: InstARM32Pred(Func, InstARM32::Vcmp, 2, nullptr, Predicate) {

+ HasSideEffects = true;

addSource(Src0);

addSource(Src1);

}

InstARM32Vmrs::InstARM32Vmrs(Cfg *Func, CondARM32::Cond Predicate)

- : InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {}

+ : InstARM32Pred(Func, InstARM32::Vmrs, 0, nullptr, Predicate) {

+ HasSideEffects = true;

InstARM32Vabs::InstARM32Vabs(Cfg *Func, Variable *Dest, Variable *Src,

CondARM32::Cond Predicate)

@@ -605,6 +689,7 @@ template <> const char *InstARM32Lsr::Opcode = "lsr";

template <> const char *InstARM32Mul::Opcode = "mul";

template <> const char *InstARM32Orr::Opcode = "orr";

template <> const char *InstARM32Rsb::Opcode = "rsb";

+template <> const char *InstARM32Rsc::Opcode = "rsc";

template <> const char *InstARM32Sbc::Opcode = "sbc";

template <> const char *InstARM32Sdiv::Opcode = "sdiv";

template <> const char *InstARM32Sub::Opcode = "sub";

@@ -613,11 +698,13 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";

template <> const char *InstARM32Vadd::Opcode = "vadd";

template <> const char *InstARM32Vdiv::Opcode = "vdiv";

template <> const char *InstARM32Vmul::Opcode = "vmul";

+template <> const char *InstARM32Veor::Opcode = "veor";

template <> const char *InstARM32Vsub::Opcode = "vsub";

// Four-addr ops

template <> const char *InstARM32Mla::Opcode = "mla";

template <> const char *InstARM32Mls::Opcode = "mls";

// Cmp-like ops

+template <> const char *InstARM32Cmn::Opcode = "cmn";

template <> const char *InstARM32Cmp::Opcode = "cmp";

template <> const char *InstARM32Tst::Opcode = "tst";

@@ -1701,6 +1788,67 @@ void OperandARM32FlexImm::dump(const Cfg * /* Func */, Ostream &Str) const {

Str << "#(" << Imm << " ror 2*" << RotateAmt << ")";

}

+namespace {

+static constexpr uint32_t a = 0x80;

+static constexpr uint32_t b = 0x40;

+static constexpr uint32_t cdefgh = 0x3F;

+static constexpr uint32_t AllowedBits = a | b | cdefgh;

+static_assert(AllowedBits == 0xFF,

+ "Invalid mask for f32/f64 constant rematerialization.");

+// There's no loss in always returning the modified immediate as float.

+// TODO(jpp): returning a double causes problems when outputting the constants

+// for filetype=asm. Why?

+float materializeFloatImmediate(uint32_t ModifiedImm) {

+ const uint32_t Ret = ((ModifiedImm & a) ? 0x80000000 : 0) |

+ ((ModifiedImm & b) ? 0x3E000000 : 0x40000000) |

+ ((ModifiedImm & cdefgh) << 19);

+ return *reinterpret_cast<const float *>(&Ret);

+} // end of anonymous namespace

+void OperandARM32FlexFpImm::emit(const Cfg *Func) const {

+ if (!BuildDefs::dump())

+ return;

+ Ostream &Str = Func->getContext()->getStrEmit();

+ switch (Ty) {

+ default:

+ llvm::report_fatal_error("Invalid flex fp imm type.");

+ case IceType_f64:

+ case IceType_f32:

+ Str << "#" << materializeFloatImmediate(ModifiedImm)

+ << " @ Modified: " << ModifiedImm;

+ break;

+ }

+void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {

+ if (!BuildDefs::dump())

+ return;

+ Str << "#" << materializeFloatImmediate(ModifiedImm)

+ << InstARM32::getVecWidthString(Ty);

+void OperandARM32FlexFpZero::emit(const Cfg *Func) const {

+ if (!BuildDefs::dump())

+ return;

+ Ostream &Str = Func->getContext()->getStrEmit();

+ switch (Ty) {

+ default:

+ llvm::report_fatal_error("Invalid flex fp imm type.");

+ case IceType_f64:

+ case IceType_f32:

+ Str << "#0.0";

+ }

+void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {

+ if (!BuildDefs::dump())

+ return;

+ Str << "#0.0" << InstARM32::getVecWidthString(Ty);

void OperandARM32FlexReg::emit(const Cfg *Func) const {

if (!BuildDefs::dump())

return;

@@ -1741,6 +1889,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Lsr>;

template class InstARM32ThreeAddrGPR<InstARM32::Mul>;

template class InstARM32ThreeAddrGPR<InstARM32::Orr>;

template class InstARM32ThreeAddrGPR<InstARM32::Rsb>;

+template class InstARM32ThreeAddrGPR<InstARM32::Rsc>;

template class InstARM32ThreeAddrGPR<InstARM32::Sbc>;

template class InstARM32ThreeAddrGPR<InstARM32::Sdiv>;

template class InstARM32ThreeAddrGPR<InstARM32::Sub>;

@@ -1749,6 +1898,7 @@ template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;

template class InstARM32ThreeAddrFP<InstARM32::Vadd>;

template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;

template class InstARM32ThreeAddrFP<InstARM32::Vmul>;

+template class InstARM32ThreeAddrFP<InstARM32::Veor>;

template class InstARM32ThreeAddrFP<InstARM32::Vsub>;

template class InstARM32LoadBase<InstARM32::Ldr>;

@@ -1768,6 +1918,7 @@ template class InstARM32UnaryopFP<InstARM32::Vsqrt>;

template class InstARM32FourAddrGPR<InstARM32::Mla>;

template class InstARM32FourAddrGPR<InstARM32::Mls>;

+template class InstARM32CmpLike<InstARM32::Cmn>;

template class InstARM32CmpLike<InstARM32::Cmp>;

template class InstARM32CmpLike<InstARM32::Tst>;

« no previous file with comments | « src/IceInstARM32.h ('k') | src/IceTargetLoweringARM32.h » ('j') | no next file with comments »