Index: src/arm/simulator-arm.cc |
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
index 370ddd002248d279b5762bd6f71b4777b7aaba5d..2bf2a6a3a52b549a548bc384081e975344b63a69 100644 |
--- a/src/arm/simulator-arm.cc |
+++ b/src/arm/simulator-arm.cc |
@@ -3996,10 +3996,24 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
// Templated operations for NEON instructions. |
// TODO(bbudge) Add more templates for use in DecodeSpecialCondition. |
-template <typename T> |
-int64_t Widen(T value) { |
+template <typename T, typename U> |
+U Widen(T value) { |
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
- return static_cast<int64_t>(value); |
+ static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); |
+ return static_cast<U>(value); |
+} |
+ |
+template <typename T, typename U> |
+U Narrow(T value) { |
+ static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); |
+ static_assert(sizeof(U) < sizeof(T), "T must larger than U"); |
+ static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), |
+ "Signed-ness of T and U must match"); |
+ // Make sure value can be expressed in the smaller type; otherwise, the |
+ // casted result is implementation defined. |
+ DCHECK_LE(std::numeric_limits<T>::min(), value); |
+ DCHECK_GE(std::numeric_limits<T>::max(), value); |
+ return static_cast<U>(value); |
} |
template <typename T> |
@@ -4016,6 +4030,30 @@ T MinMax(T a, T b, bool is_min) { |
return is_min ? std::min(a, b) : std::max(a, b); |
} |
+template <typename T, typename U> |
+void Widen(Simulator* simulator, int Vd, int Vm) { |
+ static const int kLanes = 8 / sizeof(T); |
+ T src[kLanes]; |
+ U dst[kLanes]; |
+ simulator->get_d_register(Vm, src); |
+ for (int i = 0; i < kLanes; i++) { |
+ dst[i] = Widen<T, U>(src[i]); |
+ } |
+ simulator->set_q_register(Vd, dst); |
+} |
+ |
+template <typename T, typename U> |
+void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { |
+ static const int kLanes = 16 / sizeof(T); |
+ T src[kLanes]; |
+ U dst[kLanes]; |
+ simulator->get_q_register(Vm, src); |
+ for (int i = 0; i < kLanes; i++) { |
+ dst[i] = Narrow<T, U>(Clamp<U>(src[i])); |
+ } |
+ simulator->set_d_register(Vd, dst); |
+} |
+ |
template <typename T> |
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
static const int kLanes = 16 / sizeof(T); |
@@ -4023,7 +4061,7 @@ void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
simulator->get_q_register(Vn, src1); |
simulator->get_q_register(Vm, src2); |
for (int i = 0; i < kLanes; i++) { |
- src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i])); |
+ src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); |
} |
simulator->set_q_register(Vd, src1); |
} |
@@ -4035,7 +4073,7 @@ void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
simulator->get_q_register(Vn, src1); |
simulator->get_q_register(Vm, src2); |
for (int i = 0; i < kLanes; i++) { |
- src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i])); |
+ src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); |
} |
simulator->set_q_register(Vd, src1); |
} |
@@ -4464,21 +4502,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
(instr->Bit(4) == 1)) { |
// vmovl signed |
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); |
- int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); |
- int Vm = (instr->Bit(5) << 4) | instr->VmValue(); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kDoublePrecision); |
int imm3 = instr->Bits(21, 19); |
- if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); |
- int esize = 8 * imm3; |
- int elements = 64 / esize; |
- int8_t from[8]; |
- get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); |
- int16_t to[8]; |
- int e = 0; |
- while (e < elements) { |
- to[e] = from[e]; |
- e++; |
+ switch (imm3) { |
+ case 1: |
+ Widen<int8_t, int16_t>(this, Vd, Vm); |
+ break; |
+ case 2: |
+ Widen<int16_t, int32_t>(this, Vd, Vm); |
+ break; |
+ case 4: |
+ Widen<int32_t, int64_t>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
} |
- set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
} else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
// vext. |
int imm4 = instr->Bits(11, 8); |
@@ -4930,21 +4970,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
(instr->Bit(4) == 1)) { |
// vmovl unsigned |
if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); |
- int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); |
- int Vm = (instr->Bit(5) << 4) | instr->VmValue(); |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kDoublePrecision); |
int imm3 = instr->Bits(21, 19); |
- if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); |
- int esize = 8 * imm3; |
- int elements = 64 / esize; |
- uint8_t from[8]; |
- get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); |
- uint16_t to[8]; |
- int e = 0; |
- while (e < elements) { |
- to[e] = from[e]; |
- e++; |
+ switch (imm3) { |
+ case 1: |
+ Widen<uint8_t, uint16_t>(this, Vd, Vm); |
+ break; |
+ case 2: |
+ Widen<uint16_t, uint32_t>(this, Vd, Vm); |
+ break; |
+ case 4: |
+ Widen<uint32_t, uint64_t>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
} |
- set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
} else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
instr->Bit(6) == 1) { |
@@ -5392,6 +5434,42 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} |
} |
set_q_register(Vd, src); |
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && |
+ instr->Bits(7, 6) != 0) { |
+ // vqmovn.<type><size> Dd, Qm. |
+ int Vd = instr->VFPDRegValue(kDoublePrecision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
+ bool is_unsigned = instr->Bit(6) != 0; |
+ switch (size) { |
+ case Neon8: { |
+ if (is_unsigned) { |
+ SaturatingNarrow<uint16_t, uint8_t>(this, Vd, Vm); |
+ } else { |
+ SaturatingNarrow<int16_t, int8_t>(this, Vd, Vm); |
+ } |
+ break; |
+ } |
+ case Neon16: { |
+ if (is_unsigned) { |
+ SaturatingNarrow<uint32_t, uint16_t>(this, Vd, Vm); |
+ } else { |
+ SaturatingNarrow<int32_t, int16_t>(this, Vd, Vm); |
+ } |
+ break; |
+ } |
+ case Neon32: { |
+ if (is_unsigned) { |
+ SaturatingNarrow<uint64_t, uint32_t>(this, Vd, Vm); |
+ } else { |
+ SaturatingNarrow<int64_t, int32_t>(this, Vd, Vm); |
+ } |
+ break; |
+ } |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
+ } |
} else { |
UNIMPLEMENTED(); |
} |