Index: src/arm/simulator-arm.cc |
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
index ad7c4daae19d9323935de5f2adeca98dcea13292..3a3a90225bf0af1fa7ba72f454fca9fd8487eb42 100644 |
--- a/src/arm/simulator-arm.cc |
+++ b/src/arm/simulator-arm.cc |
@@ -3988,61 +3988,177 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
} |
} |
+// Templated operations for NEON instructions. |
+// TODO(bbudge) Add more templates for use in DecodeSpecialCondition. |
+template <typename T> |
+int64_t Widen(T value) { |
+ static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
+ return static_cast<int64_t>(value); |
+} |
+ |
+template <typename T> |
+T Clamp(int64_t value) { |
+ static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
+ int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); |
+ int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); |
+ int64_t clamped = std::max(min, std::min(max, value)); |
+ return static_cast<T>(clamped); |
+} |
+ |
+template <typename T> |
+void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kLanes = 16 / sizeof(T); |
+ T src1[kLanes], src2[kLanes]; |
+ simulator->get_q_register(Vn, src1); |
+ simulator->get_q_register(Vm, src2); |
+ for (int i = 0; i < kLanes; i++) { |
+ src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i])); |
+ } |
+ simulator->set_q_register(Vd, src1); |
+} |
+ |
+template <typename T> |
+void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kLanes = 16 / sizeof(T); |
+ T src1[kLanes], src2[kLanes]; |
+ simulator->get_q_register(Vn, src1); |
+ simulator->get_q_register(Vm, src2); |
+ for (int i = 0; i < kLanes; i++) { |
+ src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i])); |
+ } |
+ simulator->set_q_register(Vd, src1); |
+} |
+ |
void Simulator::DecodeSpecialCondition(Instruction* instr) { |
switch (instr->SpecialValue()) { |
- case 4: |
- if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 2 && |
- instr->Bit(4) == 1) { |
- // vmov Qd, Qm. |
- // vorr, Qd, Qm, Qn. |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- uint32_t src1[4]; |
- get_q_register(Vm, src1); |
- if (Vm != Vn) { |
- uint32_t src2[4]; |
- get_q_register(Vn, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = src1[i] | src2[i]; |
+ case 4: { |
+ int Vd, Vm, Vn; |
+ if (instr->Bit(6) == 0) { |
+ Vd = instr->VFPDRegValue(kDoublePrecision); |
+ Vm = instr->VFPMRegValue(kDoublePrecision); |
+ Vn = instr->VFPNRegValue(kDoublePrecision); |
+ } else { |
+ Vd = instr->VFPDRegValue(kSimd128Precision); |
+ Vm = instr->VFPMRegValue(kSimd128Precision); |
+ Vn = instr->VFPNRegValue(kSimd128Precision); |
+ } |
+ switch (instr->Bits(11, 8)) { |
+ case 0x0: { |
+ if (instr->Bit(4) == 1) { |
+ // vqadd.s<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: |
+ AddSaturate<int8_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon16: |
+ AddSaturate<int16_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon32: |
+ AddSaturate<int32_t>(this, Vd, Vm, Vn); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ } else { |
+ UNIMPLEMENTED(); |
} |
+ break; |
} |
- set_q_register(Vd, src1); |
- } else if (instr->Bits(11, 8) == 8) { |
- // vadd/vtst |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- if (instr->Bit(4) == 0) { |
- // vadd.i<size> Qd, Qm, Qn. |
+ case 0x1: { |
+ if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && |
+ instr->Bit(4) == 1) { |
+ // vmov Qd, Qm. |
+ // vorr, Qd, Qm, Qn. |
+ uint32_t src1[4]; |
+ get_q_register(Vm, src1); |
+ if (Vm != Vn) { |
+ uint32_t src2[4]; |
+ get_q_register(Vn, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = src1[i] | src2[i]; |
+ } |
+ } |
+ set_q_register(Vd, src1); |
+ } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
+ instr->Bit(4) == 1) { |
+ // vand Qd, Qm, Qn. |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = src1[i] & src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ } else { |
+ UNIMPLEMENTED(); |
+ } |
+ break; |
+ } |
+ case 0x2: { |
+ if (instr->Bit(4) == 1) { |
+ // vqsub.s<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: |
+ SubSaturate<int8_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon16: |
+ SubSaturate<int16_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon32: |
+ SubSaturate<int32_t>(this, Vd, Vm, Vn); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ } else { |
+ UNIMPLEMENTED(); |
+ } |
+ break; |
+ } |
+ case 0x3: { |
+ // vcge/vcgt.s<size> Qd, Qm, Qn. |
+ bool ge = instr->Bit(4) == 1; |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
case Neon8: { |
- uint8_t src1[16], src2[16]; |
+ int8_t src1[16], src2[16]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 16; i++) { |
- src1[i] += src2[i]; |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
} |
set_q_register(Vd, src1); |
break; |
} |
case Neon16: { |
- uint16_t src1[8], src2[8]; |
+ int16_t src1[8], src2[8]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 8; i++) { |
- src1[i] += src2[i]; |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
} |
set_q_register(Vd, src1); |
break; |
} |
case Neon32: { |
- uint32_t src1[4], src2[4]; |
+ int32_t src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
- src1[i] += src2[i]; |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
} |
set_q_register(Vd, src1); |
break; |
@@ -4051,35 +4167,48 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
UNREACHABLE(); |
break; |
} |
- } else { |
- // vtst.i<size> Qd, Qm, Qn. |
+ break; |
+ } |
+ case 0x6: { |
+ // vmin/vmax.s<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ bool min = instr->Bit(4) != 0; |
switch (size) { |
case Neon8: { |
- uint8_t src1[16], src2[16]; |
+ int8_t src1[16], src2[16]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 16; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
} |
set_q_register(Vd, src1); |
break; |
} |
case Neon16: { |
- uint16_t src1[8], src2[8]; |
+ int16_t src1[8], src2[8]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 8; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
} |
set_q_register(Vd, src1); |
break; |
} |
case Neon32: { |
- uint32_t src1[4], src2[4]; |
+ int32_t src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
} |
set_q_register(Vd, src1); |
break; |
@@ -4088,234 +4217,210 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
UNREACHABLE(); |
break; |
} |
+ break; |
} |
- } else if (instr->Bits(11, 8) == 0xd && instr->Bit(20) == 0 && |
- instr->Bit(4) == 0) { |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- if (instr->Bit(21) == 0) { |
- // vadd.f32 Qd, Qm, Qn. |
- src1[i] = src1[i] + src2[i]; |
+ case 0x8: { |
+ // vadd/vtst |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ if (instr->Bit(4) == 0) { |
+ // vadd.i<size> Qd, Qm, Qn. |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ src1[i] += src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ src1[i] += src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] += src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
} else { |
- // vsub.f32 Qd, Qm, Qn. |
- src1[i] = src1[i] - src2[i]; |
- } |
- } |
- set_q_register(Vd, src1); |
- } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && |
- instr->Bit(4) == 1) { |
- // vmul.i<size> Qd, Qm, Qn. |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] *= src2[i]; |
+ // vtst.i<size> Qd, Qm, Qn. |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
} |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] *= src2[i]; |
+ break; |
+ } |
+ case 0x9: { |
+ if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
+ // vmul.i<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ src1[i] *= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ src1[i] *= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] *= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
+ break; |
+ } |
+ case 0xd: { |
+ if (instr->Bit(4) == 0) { |
+ float src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
- src1[i] *= src2[i]; |
+ if (instr->Bit(21) == 0) { |
+ // vadd.f32 Qd, Qm, Qn. |
+ src1[i] = src1[i] + src2[i]; |
+ } else { |
+ // vsub.f32 Qd, Qm, Qn. |
+ src1[i] = src1[i] - src2[i]; |
+ } |
} |
set_q_register(Vd, src1); |
- break; |
- } |
- default: |
+ } else { |
UNIMPLEMENTED(); |
- break; |
- } |
- } else if (instr->Bits(11, 8) == 0xe && instr->Bits(21, 20) == 0 && |
- instr->Bit(4) == 0) { |
- // vceq.f32. |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- uint32_t dst[4]; |
- for (int i = 0; i < 4; i++) { |
- dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
- } |
- set_q_register(Vd, dst); |
- } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && |
- instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- // vand Qd, Qm, Qn. |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = src1[i] & src2[i]; |
- } |
- set_q_register(Vd, src1); |
- } else if (instr->Bits(11, 8) == 0x3) { |
- // vcge/vcgt.s<size> Qd, Qm, Qn. |
- bool ge = instr->Bit(4) == 1; |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- switch (size) { |
- case Neon8: { |
- int8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
- } |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon16: { |
- int16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
- } |
- set_q_register(Vd, src1); |
- break; |
} |
- case Neon32: { |
- int32_t src1[4], src2[4]; |
+ break; |
+ } |
+ case 0xe: { |
+ if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
+ // vceq.f32. |
+ float src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
+ uint32_t dst[4]; |
for (int i = 0; i < 4; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
- } |
- set_q_register(Vd, src1); |
- break; |
- } |
- default: |
- UNREACHABLE(); |
- break; |
- } |
- } else if (instr->Bits(11, 8) == 0xf && instr->Bit(20) == 0 && |
- instr->Bit(6) == 1) { |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- if (instr->Bit(4) == 1) { |
- if (instr->Bit(21) == 0) { |
- // vrecps.f32 Qd, Qm, Qn. |
- for (int i = 0; i < 4; i++) { |
- src1[i] = 2.0f - src1[i] * src2[i]; |
- } |
- } else { |
- // vrsqrts.f32 Qd, Qm, Qn. |
- for (int i = 0; i < 4; i++) { |
- src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
- } |
- } |
- } else { |
- if (instr->Bit(21) == 1) { |
- // vmin.f32 Qd, Qm, Qn. |
- for (int i = 0; i < 4; i++) { |
- src1[i] = std::min(src1[i], src2[i]); |
+ dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
} |
+ set_q_register(Vd, dst); |
} else { |
- // vmax.f32 Qd, Qm, Qn. |
- for (int i = 0; i < 4; i++) { |
- src1[i] = std::max(src1[i], src2[i]); |
- } |
+ UNIMPLEMENTED(); |
} |
+ break; |
} |
- set_q_register(Vd, src1); |
- } else if (instr->Bits(11, 8) == 0x6) { |
- // vmin/vmax.s<size> Qd, Qm, Qn. |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- bool min = instr->Bit(4) != 0; |
- switch (size) { |
- case Neon8: { |
- int8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
- } |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon16: { |
- int16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
- } |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon32: { |
- int32_t src1[4], src2[4]; |
+ case 0xf: { |
+ if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
+ float src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
+ if (instr->Bit(4) == 1) { |
+ if (instr->Bit(21) == 0) { |
+ // vrecps.f32 Qd, Qm, Qn. |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = 2.0f - src1[i] * src2[i]; |
+ } |
+ } else { |
+ // vrsqrts.f32 Qd, Qm, Qn. |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; |
+ } |
+ } |
+ } else { |
+ if (instr->Bit(21) == 1) { |
+ // vmin.f32 Qd, Qm, Qn. |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = std::min(src1[i], src2[i]); |
+ } |
+ } else { |
+ // vmax.f32 Qd, Qm, Qn. |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = std::max(src1[i], src2[i]); |
+ } |
+ } |
} |
set_q_register(Vd, src1); |
- break; |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- default: |
- UNREACHABLE(); |
- break; |
+ break; |
} |
- } else { |
- UNIMPLEMENTED(); |
+ default: |
+ UNIMPLEMENTED(); |
+ break; |
} |
break; |
+ } |
case 5: |
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
(instr->Bit(4) == 1)) { |
@@ -4436,265 +4541,319 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
UNIMPLEMENTED(); |
} |
break; |
- case 6: |
- if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { |
- // vsub.size Qd, Qm, Qn. |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] -= src2[i]; |
- } |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] -= src2[i]; |
+ case 6: { |
+ int Vd, Vm, Vn; |
+ if (instr->Bit(6) == 0) { |
+ Vd = instr->VFPDRegValue(kDoublePrecision); |
+ Vm = instr->VFPMRegValue(kDoublePrecision); |
+ Vn = instr->VFPNRegValue(kDoublePrecision); |
+ } else { |
+ Vd = instr->VFPDRegValue(kSimd128Precision); |
+ Vm = instr->VFPMRegValue(kSimd128Precision); |
+ Vn = instr->VFPNRegValue(kSimd128Precision); |
+ } |
+ switch (instr->Bits(11, 8)) { |
+ case 0x0: { |
+ if (instr->Bit(4) == 1) { |
+ // vqadd.u<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: |
+ AddSaturate<uint8_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon16: |
+ AddSaturate<uint16_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon32: |
+ AddSaturate<uint32_t>(this, Vd, Vm, Vn); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
+ break; |
+ } |
+ case 0x1: { |
+ if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
+ // vbsl.size Qd, Qm, Qn. |
+ uint32_t dst[4], src1[4], src2[4]; |
+ get_q_register(Vd, dst); |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
- src1[i] -= src2[i]; |
+ dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
} |
- set_q_register(Vd, src1); |
- break; |
+ set_q_register(Vd, dst); |
+ } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
+ if (instr->Bit(6) == 0) { |
+ // veor Dd, Dn, Dm |
+ uint64_t src1, src2; |
+ get_d_register(Vn, &src1); |
+ get_d_register(Vm, &src2); |
+ src1 ^= src2; |
+ set_d_register(Vd, &src1); |
+ |
+ } else { |
+ // veor Qd, Qn, Qm |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
+ set_q_register(Vd, src1); |
+ } |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- default: |
- UNREACHABLE(); |
- break; |
+ break; |
} |
- } else if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 1) { |
- // vceq.size Qd, Qm, Qn. |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
+ case 0x2: { |
+ if (instr->Bit(4) == 1) { |
+ // vqsub.u<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: |
+ SubSaturate<uint8_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon16: |
+ SubSaturate<uint16_t>(this, Vd, Vm, Vn); |
+ break; |
+ case Neon32: |
+ SubSaturate<uint32_t>(this, Vd, Vm, Vn); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
+ break; |
+ } |
+ case 0x3: { |
+ // vcge/vcgt.u<size> Qd, Qm, Qn. |
+ bool ge = instr->Bit(4) == 1; |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
- } |
- default: |
- UNREACHABLE(); |
- break; |
- } |
- } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 1 && |
- instr->Bit(4) == 1) { |
- // vbsl.size Qd, Qm, Qn. |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- uint32_t dst[4], src1[4], src2[4]; |
- get_q_register(Vd, dst); |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
- } |
- set_q_register(Vd, dst); |
- } else if (instr->Bits(11, 8) == 1 && instr->Bits(21, 20) == 0 && |
- instr->Bit(4) == 1) { |
- if (instr->Bit(6) == 0) { |
- // veor Dd, Dn, Dm |
- int Vd = instr->VFPDRegValue(kDoublePrecision); |
- int Vn = instr->VFPNRegValue(kDoublePrecision); |
- int Vm = instr->VFPMRegValue(kDoublePrecision); |
- uint64_t src1, src2; |
- get_d_register(Vn, &src1); |
- get_d_register(Vm, &src2); |
- src1 ^= src2; |
- set_d_register(Vd, &src1); |
- |
- } else { |
- // veor Qd, Qn, Qm |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
- set_q_register(Vd, src1); |
- } |
- } else if (instr->Bits(11, 8) == 0xd && instr->Bit(21) == 0 && |
- instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
- // vmul.f32 Qd, Qn, Qm |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = src1[i] * src2[i]; |
- } |
- set_q_register(Vd, src1); |
- } else if (instr->Bits(11, 8) == 0xe && instr->Bit(20) == 0 && |
- instr->Bit(4) == 0) { |
- // vcge/vcgt.f32 Qd, Qm, Qn |
- bool ge = instr->Bit(21) == 0; |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- uint32_t dst[4]; |
- for (int i = 0; i < 4; i++) { |
- if (ge) { |
- dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
- } else { |
- dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
+ break; |
} |
- set_q_register(Vd, dst); |
- } else if (instr->Bits(11, 8) == 0x3) { |
- // vcge/vcgt.u<size> Qd, Qm, Qn. |
- bool ge = instr->Bit(4) == 1; |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
+ case 0x6: { |
+ // vmin/vmax.u<size> Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ bool min = instr->Bit(4) != 0; |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ if (min) |
+ src1[i] = std::min(src1[i], src2[i]); |
+ else |
+ src1[i] = std::max(src1[i], src2[i]); |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- default: |
- UNREACHABLE(); |
- break; |
+ break; |
} |
- } else if (instr->Bits(11, 8) == 0x6) { |
- // vmin/vmax.u<size> Qd, Qm, Qn. |
- NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- int Vn = instr->VFPNRegValue(kSimd128Precision); |
- bool min = instr->Bit(4) != 0; |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
+ case 0x8: { |
+ if (instr->Bit(4) == 0) { |
+ // vsub.size Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ src1[i] -= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ src1[i] -= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] -= src2[i]; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
+ } |
+ } else { |
+ // vceq.size Qd, Qm, Qn. |
+ NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
+ switch (size) { |
+ case Neon8: { |
+ uint8_t src1[16], src2[16]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 16; i++) { |
+ src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon16: { |
+ uint16_t src1[8], src2[8]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 8; i++) { |
+ src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ case Neon32: { |
+ uint32_t src1[4], src2[4]; |
+ get_q_register(Vn, src1); |
+ get_q_register(Vm, src2); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
+ } |
+ set_q_register(Vd, src1); |
+ break; |
+ } |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- set_q_register(Vd, src1); |
- break; |
} |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
+ break; |
+ } |
+ case 0xd: { |
+ if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
+ // vmul.f32 Qd, Qn, Qm |
+ float src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
+ for (int i = 0; i < 4; i++) { |
+ src1[i] = src1[i] * src2[i]; |
} |
set_q_register(Vd, src1); |
- break; |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
+ break; |
+ } |
+ case 0xe: { |
+ if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { |
+ // vcge/vcgt.f32 Qd, Qm, Qn |
+ bool ge = instr->Bit(21) == 0; |
+ float src1[4], src2[4]; |
get_q_register(Vn, src1); |
get_q_register(Vm, src2); |
+ uint32_t dst[4]; |
for (int i = 0; i < 4; i++) { |
- if (min) |
- src1[i] = std::min(src1[i], src2[i]); |
- else |
- src1[i] = std::max(src1[i], src2[i]); |
+ if (ge) { |
+ dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
+ } else { |
+ dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
+ } |
} |
- set_q_register(Vd, src1); |
- break; |
+ set_q_register(Vd, dst); |
+ } else { |
+ UNIMPLEMENTED(); |
} |
- default: |
- UNREACHABLE(); |
- break; |
+ break; |
} |
- } else { |
- UNIMPLEMENTED(); |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
break; |
+ } |
case 7: |
if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
(instr->Bit(4) == 1)) { |