src/arm/simulator-arm.cc - Issue 2773303002: [ARM] Implement widening and narrowing integer moves, vmovl, vqmovn.

Unified Diff: src/arm/simulator-arm.cc

Issue 2773303002: [ARM] Implement widening and narrowing integer moves, vmovl, vqmovn. (Closed)

Patch Set: Missed file. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/arm/simulator-arm.cc

diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc

index 370ddd002248d279b5762bd6f71b4777b7aaba5d..2bf2a6a3a52b549a548bc384081e975344b63a69 100644

--- a/src/arm/simulator-arm.cc

+++ b/src/arm/simulator-arm.cc

@@ -3996,10 +3996,24 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) {

// Templated operations for NEON instructions.

// TODO(bbudge) Add more templates for use in DecodeSpecialCondition.

-template <typename T>

-int64_t Widen(T value) {

+template <typename T, typename U>

+U Widen(T value) {

static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");

- return static_cast<int64_t>(value);

+ static_assert(sizeof(U) > sizeof(T), "T must smaller than U");

+ return static_cast<U>(value);

+template <typename T, typename U>

+U Narrow(T value) {

+ static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");

+ static_assert(sizeof(U) < sizeof(T), "T must larger than U");

+ static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(),

+ "Signed-ness of T and U must match");

+ // Make sure value can be expressed in the smaller type; otherwise, the

+ // casted result is implementation defined.

+ DCHECK_LE(std::numeric_limits<T>::min(), value);

+ DCHECK_GE(std::numeric_limits<T>::max(), value);

+ return static_cast<U>(value);

}

template <typename T>

@@ -4016,6 +4030,30 @@ T MinMax(T a, T b, bool is_min) {

return is_min ? std::min(a, b) : std::max(a, b);

}

+template <typename T, typename U>

+void Widen(Simulator* simulator, int Vd, int Vm) {

+ static const int kLanes = 8 / sizeof(T);

+ T src[kLanes];

+ U dst[kLanes];

+ simulator->get_d_register(Vm, src);

+ for (int i = 0; i < kLanes; i++) {

+ dst[i] = Widen<T, U>(src[i]);

+ }

+ simulator->set_q_register(Vd, dst);

+template <typename T, typename U>

+void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {

+ static const int kLanes = 16 / sizeof(T);

+ T src[kLanes];

+ U dst[kLanes];

+ simulator->get_q_register(Vm, src);

+ for (int i = 0; i < kLanes; i++) {

+ dst[i] = Narrow<T, U>(Clamp<U>(src[i]));

+ }

+ simulator->set_d_register(Vd, dst);

template <typename T>

void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {

static const int kLanes = 16 / sizeof(T);

@@ -4023,7 +4061,7 @@ void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {

simulator->get_q_register(Vn, src1);

simulator->get_q_register(Vm, src2);

for (int i = 0; i < kLanes; i++) {

- src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));

+ src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));

}

simulator->set_q_register(Vd, src1);

}

@@ -4035,7 +4073,7 @@ void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {

simulator->get_q_register(Vn, src1);

simulator->get_q_register(Vm, src2);

for (int i = 0; i < kLanes; i++) {

- src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));

+ src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));

}

simulator->set_q_register(Vd, src1);

}

@@ -4464,21 +4502,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {

(instr->Bit(4) == 1)) {

// vmovl signed

if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();

- int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);

- int Vm = (instr->Bit(5) << 4) | instr->VmValue();

+ int Vd = instr->VFPDRegValue(kSimd128Precision);

+ int Vm = instr->VFPMRegValue(kDoublePrecision);

int imm3 = instr->Bits(21, 19);

- if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();

- int esize = 8 * imm3;

- int elements = 64 / esize;

- int8_t from[8];

- get_d_register(Vm, reinterpret_cast<uint64_t*>(from));

- int16_t to[8];

- int e = 0;

- while (e < elements) {

- to[e] = from[e];

- e++;

+ switch (imm3) {

+ case 1:

+ Widen<int8_t, int16_t>(this, Vd, Vm);

+ break;

+ case 2:

+ Widen<int16_t, int32_t>(this, Vd, Vm);

+ break;

+ case 4:

+ Widen<int32_t, int64_t>(this, Vd, Vm);

+ break;

+ default:

+ UNIMPLEMENTED();

+ break;

}

- set_q_register(Vd, reinterpret_cast<uint64_t*>(to));

} else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {

// vext.

int imm4 = instr->Bits(11, 8);

@@ -4930,21 +4970,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {

(instr->Bit(4) == 1)) {

// vmovl unsigned

if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();

- int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);

- int Vm = (instr->Bit(5) << 4) | instr->VmValue();

+ int Vd = instr->VFPDRegValue(kSimd128Precision);

+ int Vm = instr->VFPMRegValue(kDoublePrecision);

int imm3 = instr->Bits(21, 19);

- if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();

- int esize = 8 * imm3;

- int elements = 64 / esize;

- uint8_t from[8];

- get_d_register(Vm, reinterpret_cast<uint64_t*>(from));

- uint16_t to[8];

- int e = 0;

- while (e < elements) {

- to[e] = from[e];

- e++;

+ switch (imm3) {

+ case 1:

+ Widen<uint8_t, uint16_t>(this, Vd, Vm);

+ break;

+ case 2:

+ Widen<uint16_t, uint32_t>(this, Vd, Vm);

+ break;

+ case 4:

+ Widen<uint32_t, uint64_t>(this, Vd, Vm);

+ break;

+ default:

+ UNIMPLEMENTED();

+ break;

}

- set_q_register(Vd, reinterpret_cast<uint64_t*>(to));

} else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {

if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&

instr->Bit(6) == 1) {

@@ -5392,6 +5434,42 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {

}

set_q_register(Vd, src);

+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&

+ instr->Bits(7, 6) != 0) {

+ // vqmovn.<type><size> Dd, Qm.

+ int Vd = instr->VFPDRegValue(kDoublePrecision);

+ int Vm = instr->VFPMRegValue(kSimd128Precision);

+ NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));

+ bool is_unsigned = instr->Bit(6) != 0;

+ switch (size) {

+ case Neon8: {

+ if (is_unsigned) {

+ SaturatingNarrow<uint16_t, uint8_t>(this, Vd, Vm);

+ } else {

+ SaturatingNarrow<int16_t, int8_t>(this, Vd, Vm);

+ }

+ break;

+ }

+ case Neon16: {

+ if (is_unsigned) {

+ SaturatingNarrow<uint32_t, uint16_t>(this, Vd, Vm);

+ } else {

+ SaturatingNarrow<int32_t, int16_t>(this, Vd, Vm);

+ }

+ break;

+ }

+ case Neon32: {

+ if (is_unsigned) {

+ SaturatingNarrow<uint64_t, uint32_t>(this, Vd, Vm);

+ } else {

+ SaturatingNarrow<int64_t, int32_t>(this, Vd, Vm);

+ }

+ break;

+ }

+ default:

+ UNIMPLEMENTED();

+ break;

+ }

} else {

UNIMPLEMENTED();

}

« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »