Index: src/arm/simulator-arm.cc |
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc |
index 2bf2a6a3a52b549a548bc384081e975344b63a69..c1e6fc1f4db743a152e0952b8ae16ea55a98fa41 100644 |
--- a/src/arm/simulator-arm.cc |
+++ b/src/arm/simulator-arm.cc |
@@ -896,28 +896,20 @@ void Simulator::set_d_register(int dreg, const uint32_t* value) { |
memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); |
} |
-template <typename T> |
-void Simulator::get_d_register(int dreg, T* value) { |
- DCHECK((dreg >= 0) && (dreg < num_d_registers)); |
- memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); |
+template <typename T, int SIZE> |
+void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) { |
+ DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
+ DCHECK_LE(0, reg); |
+ DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
+ memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE); |
} |
-template <typename T> |
-void Simulator::set_d_register(int dreg, const T* value) { |
- DCHECK((dreg >= 0) && (dreg < num_d_registers)); |
- memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); |
-} |
- |
-template <typename T> |
-void Simulator::get_q_register(int qreg, T* value) { |
- DCHECK((qreg >= 0) && (qreg < num_q_registers)); |
- memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size); |
-} |
- |
-template <typename T> |
-void Simulator::set_q_register(int qreg, const T* value) { |
- DCHECK((qreg >= 0) && (qreg < num_q_registers)); |
- memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size); |
+template <typename T, int SIZE> |
+void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) { |
+ DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize); |
+ DCHECK_LE(0, reg); |
+ DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg); |
+ memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE); |
} |
// Raw access to the PC register. |
@@ -3508,7 +3500,7 @@ void Simulator::DecodeTypeVFP(Instruction* instr) { |
UNREACHABLE(); |
break; |
} |
- set_q_register(vd, q_data); |
+ set_neon_register(vd, q_data); |
} |
} else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { |
// vmov (scalar to ARM core register) |
@@ -3995,7 +3987,6 @@ void Simulator::DecodeType6CoprocessorIns(Instruction* instr) { |
} |
// Templated operations for NEON instructions. |
-// TODO(bbudge) Add more templates for use in DecodeSpecialCondition. |
template <typename T, typename U> |
U Widen(T value) { |
static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); |
@@ -4025,21 +4016,38 @@ T Clamp(int64_t value) { |
return static_cast<T>(clamped); |
} |
-template <typename T> |
-T MinMax(T a, T b, bool is_min) { |
- return is_min ? std::min(a, b) : std::max(a, b); |
-} |
- |
template <typename T, typename U> |
void Widen(Simulator* simulator, int Vd, int Vm) { |
static const int kLanes = 8 / sizeof(T); |
T src[kLanes]; |
U dst[kLanes]; |
- simulator->get_d_register(Vm, src); |
+ simulator->get_neon_register<T, kDoubleSize>(Vm, src); |
for (int i = 0; i < kLanes; i++) { |
dst[i] = Widen<T, U>(src[i]); |
} |
- simulator->set_q_register(Vd, dst); |
+ simulator->set_neon_register(Vd, dst); |
+} |
+ |
+template <typename T, int SIZE> |
+void Abs(Simulator* simulator, int Vd, int Vm) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vm, src); |
+ for (int i = 0; i < kElems; i++) { |
+ src[i] = std::abs(src[i]); |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src); |
+} |
+ |
+template <typename T, int SIZE> |
+void Neg(Simulator* simulator, int Vd, int Vm) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vm, src); |
+ for (int i = 0; i < kElems; i++) { |
+ src[i] = -src[i]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src); |
} |
template <typename T, typename U> |
@@ -4047,35 +4055,222 @@ void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { |
static const int kLanes = 16 / sizeof(T); |
T src[kLanes]; |
U dst[kLanes]; |
- simulator->get_q_register(Vm, src); |
+ simulator->get_neon_register(Vm, src); |
for (int i = 0; i < kLanes; i++) { |
dst[i] = Narrow<T, U>(Clamp<U>(src[i])); |
} |
- simulator->set_d_register(Vd, dst); |
+ simulator->set_neon_register<U, kDoubleSize>(Vd, dst); |
} |
template <typename T> |
void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
static const int kLanes = 16 / sizeof(T); |
T src1[kLanes], src2[kLanes]; |
- simulator->get_q_register(Vn, src1); |
- simulator->get_q_register(Vm, src2); |
+ simulator->get_neon_register(Vn, src1); |
+ simulator->get_neon_register(Vm, src2); |
for (int i = 0; i < kLanes; i++) { |
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); |
} |
- simulator->set_q_register(Vd, src1); |
+ simulator->set_neon_register(Vd, src1); |
} |
template <typename T> |
void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { |
static const int kLanes = 16 / sizeof(T); |
T src1[kLanes], src2[kLanes]; |
- simulator->get_q_register(Vn, src1); |
- simulator->get_q_register(Vm, src2); |
+ simulator->get_neon_register(Vn, src1); |
+ simulator->get_neon_register(Vm, src2); |
for (int i = 0; i < kLanes; i++) { |
src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); |
} |
- simulator->set_q_register(Vd, src1); |
+ simulator->set_neon_register(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void Zip(Simulator* simulator, int Vd, int Vm) { |
+ static const int kElems = SIZE / sizeof(T); |
+ static const int kPairs = kElems / 2; |
+ T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vd, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kPairs; i++) { |
+ dst1[i * 2] = src1[i]; |
+ dst1[i * 2 + 1] = src2[i]; |
+ dst2[i * 2] = src1[i + kPairs]; |
+ dst2[i * 2 + 1] = src2[i + kPairs]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, dst1); |
+ simulator->set_neon_register<T, SIZE>(Vm, dst2); |
+} |
+ |
+template <typename T, int SIZE> |
+void Unzip(Simulator* simulator, int Vd, int Vm) { |
+ static const int kElems = SIZE / sizeof(T); |
+ static const int kPairs = kElems / 2; |
+ T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vd, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kPairs; i++) { |
+ dst1[i] = src1[i * 2]; |
+ dst1[i + kPairs] = src2[i * 2]; |
+ dst2[i] = src1[i * 2 + 1]; |
+ dst2[i + kPairs] = src2[i * 2 + 1]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, dst1); |
+ simulator->set_neon_register<T, SIZE>(Vm, dst2); |
+} |
+ |
+template <typename T, int SIZE> |
+void Transpose(Simulator* simulator, int Vd, int Vm) { |
+ static const int kElems = SIZE / sizeof(T); |
+ static const int kPairs = kElems / 2; |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vd, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kPairs; i++) { |
+ std::swap(src1[2 * i + 1], src2[2 * i]); |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+ simulator->set_neon_register<T, SIZE>(Vm, src2); |
+} |
+ |
+template <typename T, int SIZE> |
+void Test(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void Add(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] += src2[i]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void Sub(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] -= src2[i]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void Mul(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] *= src2[i]; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vm, src); |
+ for (int i = 0; i < kElems; i++) { |
+ src[i] <<= shift; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src); |
+} |
+ |
+template <typename T, int SIZE> |
+void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vm, src); |
+ for (int i = 0; i < kElems; i++) { |
+ src[i] >>= shift; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src); |
+} |
+ |
+template <typename T, int SIZE> |
+void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vm, src); |
+ for (int i = 0; i < kElems; i++) { |
+ src[i] = ArithmeticShiftRight(src[i], shift); |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src); |
+} |
+ |
+template <typename T, int SIZE> |
+void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] = src1[i] == src2[i] ? -1 : 0; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T, int SIZE> |
+void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ if (ge) |
+ src1[i] = src1[i] >= src2[i] ? -1 : 0; |
+ else |
+ src1[i] = src1[i] > src2[i] ? -1 : 0; |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T> |
+T MinMax(T a, T b, bool is_min) { |
+ return is_min ? std::min(a, b) : std::max(a, b); |
+} |
+ |
+template <typename T, int SIZE> |
+void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
+ static const int kElems = SIZE / sizeof(T); |
+ T src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, SIZE>(Vn, src1); |
+ simulator->get_neon_register<T, SIZE>(Vm, src2); |
+ for (int i = 0; i < kElems; i++) { |
+ src1[i] = MinMax(src1[i], src2[i], min); |
+ } |
+ simulator->set_neon_register<T, SIZE>(Vd, src1); |
+} |
+ |
+template <typename T> |
+void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { |
+ static const int kElems = kDoubleSize / sizeof(T); |
+ static const int kPairs = kElems / 2; |
+ T dst[kElems], src1[kElems], src2[kElems]; |
+ simulator->get_neon_register<T, kDoubleSize>(Vn, src1); |
+ simulator->get_neon_register<T, kDoubleSize>(Vm, src2); |
+ for (int i = 0; i < kPairs; i++) { |
+ dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
+ dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
+ } |
+ simulator->set_neon_register<T, kDoubleSize>(Vd, dst); |
} |
void Simulator::DecodeSpecialCondition(Instruction* instr) { |
@@ -4121,25 +4316,25 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
// vmov Qd, Qm. |
// vorr, Qd, Qm, Qn. |
uint32_t src1[4]; |
- get_q_register(Vm, src1); |
+ get_neon_register(Vm, src1); |
if (Vm != Vn) { |
uint32_t src2[4]; |
- get_q_register(Vn, src2); |
+ get_neon_register(Vn, src2); |
for (int i = 0; i < 4; i++) { |
src1[i] = src1[i] | src2[i]; |
} |
} |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && |
instr->Bit(4) == 1) { |
// vand Qd, Qm, Qn. |
uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
src1[i] = src1[i] & src2[i]; |
} |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4173,45 +4368,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
bool ge = instr->Bit(4) == 1; |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
- case Neon8: { |
- int8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFF : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
- case Neon16: { |
- int16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFF : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
- case Neon32: { |
- int32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4223,36 +4388,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
bool min = instr->Bit(4) != 0; |
switch (size) { |
- case Neon8: { |
- int8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon16: { |
- int16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon32: { |
- int32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4265,36 +4409,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bit(4) == 0) { |
// vadd.i<size> Qd, Qm, Qn. |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] += src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] += src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] += src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4302,36 +4425,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} else { |
// vtst.i<size> Qd, Qm, Qn. |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4344,36 +4446,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
// vmul.i<size> Qd, Qm, Qn. |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] *= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] *= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] *= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4388,37 +4469,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
bool min = instr->Bit(4) != 0; |
switch (size) { |
- case Neon8: { |
- int8_t dst[8], src1[8], src2[8]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
- dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
- } |
- set_d_register(Vd, dst); |
+ case Neon8: |
+ PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon16: { |
- int16_t dst[4], src1[4], src2[4]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- for (int i = 0; i < 2; i++) { |
- dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
- dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
- } |
- set_d_register(Vd, dst); |
+ case Neon16: |
+ PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon32: { |
- int32_t dst[2], src1[2], src2[2]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- dst[0] = MinMax(src1[0], src1[1], min); |
- dst[1] = MinMax(src2[0], src2[1], min); |
- set_d_register(Vd, dst); |
+ case Neon32: |
+ PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4428,8 +4487,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
case 0xd: { |
if (instr->Bit(4) == 0) { |
float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
if (instr->Bit(21) == 0) { |
// vadd.f32 Qd, Qm, Qn. |
@@ -4439,7 +4498,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
src1[i] = src1[i] - src2[i]; |
} |
} |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4449,13 +4508,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { |
// vceq.f32. |
float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
uint32_t dst[4]; |
for (int i = 0; i < 4; i++) { |
dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; |
} |
- set_q_register(Vd, dst); |
+ set_neon_register(Vd, dst); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4464,8 +4523,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
case 0xf: { |
if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { |
float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
if (instr->Bit(4) == 1) { |
if (instr->Bit(21) == 0) { |
// vrecps.f32 Qd, Qm, Qn. |
@@ -4485,7 +4544,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
src1[i] = MinMax(src1[i], src2[i], min); |
} |
} |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4526,8 +4585,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
int Vn = instr->VFPNRegValue(kSimd128Precision); |
uint8_t src1[16], src2[16], dst[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
int boundary = kSimd128Size - imm4; |
int i = 0; |
for (; i < boundary; i++) { |
@@ -4536,7 +4595,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
for (; i < 16; i++) { |
dst[i] = src2[i - boundary]; |
} |
- set_q_register(Vd, dst); |
+ set_neon_register(Vd, dst); |
} else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { |
// vshl.i<size> Qd, Qm, shift |
int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); |
@@ -4545,33 +4604,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
NeonSize ns = static_cast<NeonSize>(size / 16); |
switch (ns) { |
- case Neon8: { |
- uint8_t src[16]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 16; i++) { |
- src[i] <<= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon8: |
+ ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon16: { |
- uint16_t src[8]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- src[i] <<= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon16: |
+ ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon32: { |
- uint32_t src[4]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- src[i] <<= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon32: |
+ ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4584,33 +4625,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
NeonSize ns = static_cast<NeonSize>(size / 16); |
switch (ns) { |
- case Neon8: { |
- int8_t src[16]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 16; i++) { |
- src[i] = ArithmeticShiftRight(src[i], shift); |
- } |
- set_q_register(Vd, src); |
+ case Neon8: |
+ ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon16: { |
- int16_t src[8]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- src[i] = ArithmeticShiftRight(src[i], shift); |
- } |
- set_q_register(Vd, src); |
+ case Neon16: |
+ ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon32: { |
- int32_t src[4]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- src[i] = ArithmeticShiftRight(src[i], shift); |
- } |
- set_q_register(Vd, src); |
+ case Neon32: |
+ ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4658,13 +4681,13 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { |
// vbsl.size Qd, Qm, Qn. |
uint32_t dst[4], src1[4], src2[4]; |
- get_q_register(Vd, dst); |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vd, dst); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
} |
- set_q_register(Vd, dst); |
+ set_neon_register(Vd, dst); |
} else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { |
if (instr->Bit(6) == 0) { |
// veor Dd, Dn, Dm |
@@ -4677,10 +4700,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} else { |
// veor Qd, Qn, Qm |
uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} |
} else { |
UNIMPLEMENTED(); |
@@ -4715,45 +4738,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
bool ge = instr->Bit(4) == 1; |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- if (ge) |
- src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; |
- else |
- src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4765,36 +4758,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
bool min = instr->Bit(4) != 0; |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = MinMax(src1[i], src2[i], min); |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4806,36 +4778,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
// vsub.size Qd, Qm, Qn. |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] -= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] -= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] -= src2[i]; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4844,36 +4795,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
// vceq.size Qd, Qm, Qn. |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 16; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon8: |
+ CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon16: |
+ CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
- case Neon32: { |
- uint32_t src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0; |
- } |
- set_q_register(Vd, src1); |
+ case Neon32: |
+ CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4886,37 +4816,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
bool min = instr->Bit(4) != 0; |
switch (size) { |
- case Neon8: { |
- uint8_t dst[8], src1[8], src2[8]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
- dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
- } |
- set_d_register(Vd, dst); |
+ case Neon8: |
+ PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon16: { |
- uint16_t dst[4], src1[4], src2[4]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- for (int i = 0; i < 2; i++) { |
- dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min); |
- dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min); |
- } |
- set_d_register(Vd, dst); |
+ case Neon16: |
+ PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
- case Neon32: { |
- uint32_t dst[2], src1[2], src2[2]; |
- get_d_register(Vn, src1); |
- get_d_register(Vm, src2); |
- dst[0] = MinMax(src1[0], src1[1], min); |
- dst[1] = MinMax(src2[0], src2[1], min); |
- set_d_register(Vd, dst); |
+ case Neon32: |
+ PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |
@@ -4927,12 +4835,12 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
// vmul.f32 Qd, Qn, Qm |
float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
for (int i = 0; i < 4; i++) { |
src1[i] = src1[i] * src2[i]; |
} |
- set_q_register(Vd, src1); |
+ set_neon_register(Vd, src1); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4943,8 +4851,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
// vcge/vcgt.f32 Qd, Qm, Qn |
bool ge = instr->Bit(21) == 0; |
float src1[4], src2[4]; |
- get_q_register(Vn, src1); |
- get_q_register(Vm, src2); |
+ get_neon_register(Vn, src1); |
+ get_neon_register(Vm, src2); |
uint32_t dst[4]; |
for (int i = 0; i < 4; i++) { |
if (ge) { |
@@ -4953,7 +4861,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; |
} |
} |
- set_q_register(Vd, dst); |
+ set_neon_register(Vd, dst); |
} else { |
UNIMPLEMENTED(); |
} |
@@ -4994,7 +4902,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vd = instr->VFPDRegValue(kSimd128Precision); |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
uint32_t q_data[4]; |
- get_q_register(Vm, q_data); |
+ get_neon_register(Vm, q_data); |
int op = instr->Bits(8, 7); |
for (int i = 0; i < 4; i++) { |
switch (op) { |
@@ -5020,7 +4928,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
break; |
} |
} |
- set_q_register(Vd, q_data); |
+ set_neon_register(Vd, q_data); |
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
if (instr->Bit(6) == 0) { |
// vswp Dd, Dm. |
@@ -5036,10 +4944,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
uint32_t dval[4], mval[4]; |
int vd = instr->VFPDRegValue(kSimd128Precision); |
int vm = instr->VFPMRegValue(kSimd128Precision); |
- get_q_register(vd, dval); |
- get_q_register(vm, mval); |
- set_q_register(vm, dval); |
- set_q_register(vd, mval); |
+ get_neon_register(vd, dval); |
+ get_neon_register(vm, mval); |
+ set_neon_register(vm, dval); |
+ set_neon_register(vd, mval); |
} |
} else if (instr->Bits(11, 7) == 0x18) { |
// vdup.32 Qd, Sm. |
@@ -5049,15 +4957,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
uint32_t s_data = get_s_register(vm * 2 + index); |
uint32_t q_data[4]; |
for (int i = 0; i < 4; i++) q_data[i] = s_data; |
- set_q_register(vd, q_data); |
+ set_neon_register(vd, q_data); |
} else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
// vmvn Qd, Qm. |
int vd = instr->VFPDRegValue(kSimd128Precision); |
int vm = instr->VFPMRegValue(kSimd128Precision); |
uint32_t q_data[4]; |
- get_q_register(vm, q_data); |
+ get_neon_register(vm, q_data); |
for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
- set_q_register(vd, q_data); |
+ set_neon_register(vd, q_data); |
} else if (instr->Bits(11, 10) == 0x2) { |
// vtb[l,x] Dd, <list>, Dm. |
int vd = instr->VFPDRegValue(kDoublePrecision); |
@@ -5082,108 +4990,79 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
} |
} |
set_d_register(vd, &result); |
- } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && |
- instr->Bit(6) == 1) { |
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
- if (instr->Bit(7) == 1) { |
- // vzip.<size> Qd, Qm. |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- dst1[i * 2] = src1[i]; |
- dst1[i * 2 + 1] = src2[i]; |
- dst2[i * 2] = src1[i + 8]; |
- dst2[i * 2 + 1] = src2[i + 8]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- dst1[i * 2] = src1[i]; |
- dst1[i * 2 + 1] = src2[i]; |
- dst2[i * 2] = src1[i + 4]; |
- dst2[i * 2 + 1] = src2[i + 4]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
+ if (instr->Bit(6) == 0) { |
+ int Vd = instr->VFPDRegValue(kDoublePrecision); |
+ int Vm = instr->VFPMRegValue(kDoublePrecision); |
+ if (instr->Bit(7) == 1) { |
+ // vzip.<size> Dd, Dm. |
+ switch (size) { |
+ case Neon8: |
+ Zip<uint8_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Zip<uint16_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ UNIMPLEMENTED(); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- case Neon32: { |
- uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 2; i++) { |
- dst1[i * 2] = src1[i]; |
- dst1[i * 2 + 1] = src2[i]; |
- dst2[i * 2] = src1[i + 2]; |
- dst2[i * 2 + 1] = src2[i + 2]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
+ } else { |
+ // vuzp.<size> Dd, Dm. |
+ switch (size) { |
+ case Neon8: |
+ Unzip<uint8_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Unzip<uint16_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ UNIMPLEMENTED(); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- default: |
- UNREACHABLE(); |
- break; |
} |
} else { |
- // vuzp.<size> Qd, Qm. |
- switch (size) { |
- case Neon8: { |
- uint8_t src1[16], src2[16], dst1[16], dst2[16]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 8; i++) { |
- dst1[i] = src1[i * 2]; |
- dst1[i + 8] = src2[i * 2]; |
- dst2[i] = src1[i * 2 + 1]; |
- dst2[i + 8] = src2[i * 2 + 1]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
- } |
- case Neon16: { |
- uint16_t src1[8], src2[8], dst1[8], dst2[8]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 4; i++) { |
- dst1[i] = src1[i * 2]; |
- dst1[i + 4] = src2[i * 2]; |
- dst2[i] = src1[i * 2 + 1]; |
- dst2[i + 4] = src2[i * 2 + 1]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ if (instr->Bit(7) == 1) { |
+ // vzip.<size> Qd, Qm. |
+ switch (size) { |
+ case Neon8: |
+ Zip<uint8_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Zip<uint16_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ Zip<uint32_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- case Neon32: { |
- uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
- get_q_register(Vd, src1); |
- get_q_register(Vm, src2); |
- for (int i = 0; i < 2; i++) { |
- dst1[i] = src1[i * 2]; |
- dst1[i + 2] = src2[i * 2]; |
- dst2[i] = src1[i * 2 + 1]; |
- dst2[i + 2] = src2[i * 2 + 1]; |
- } |
- set_q_register(Vd, dst1); |
- set_q_register(Vm, dst2); |
- break; |
+ } else { |
+ // vuzp.<size> Qd, Qm. |
+ switch (size) { |
+ case Neon8: |
+ Unzip<uint8_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Unzip<uint16_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ Unzip<uint32_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- default: |
- UNREACHABLE(); |
- break; |
} |
} |
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
@@ -5197,32 +5076,32 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
case Neon16: { |
DCHECK_EQ(Neon8, size); |
uint8_t src[16]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 16; i += 2) { |
std::swap(src[i], src[i + 1]); |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
case Neon32: { |
switch (size) { |
case Neon16: { |
uint16_t src[8]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 8; i += 2) { |
std::swap(src[i], src[i + 1]); |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
case Neon8: { |
uint8_t src[16]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 4; i++) { |
std::swap(src[i * 4], src[i * 4 + 3]); |
std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
default: |
@@ -5235,30 +5114,30 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
switch (size) { |
case Neon32: { |
uint32_t src[4]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
std::swap(src[0], src[1]); |
std::swap(src[2], src[3]); |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
case Neon16: { |
uint16_t src[8]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 4; i++) { |
std::swap(src[i * 4], src[i * 4 + 3]); |
std::swap(src[i * 4 + 1], src[i * 4 + 2]); |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
case Neon8: { |
uint8_t src[16]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 4; i++) { |
std::swap(src[i], src[7 - i]); |
std::swap(src[i + 8], src[15 - i]); |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
break; |
} |
default: |
@@ -5271,48 +5150,44 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
UNREACHABLE(); |
break; |
} |
- } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { |
- int Vd = instr->VFPDRegValue(kSimd128Precision); |
- int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) { |
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); |
- // vtrn.<size> Qd, Qm. |
- switch (size) { |
- case Neon8: { |
- uint8_t src[16], dst[16]; |
- get_q_register(Vd, dst); |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- std::swap(dst[2 * i + 1], src[2 * i]); |
- } |
- set_q_register(Vd, dst); |
- set_q_register(Vm, src); |
- break; |
- } |
- case Neon16: { |
- uint16_t src[8], dst[8]; |
- get_q_register(Vd, dst); |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- std::swap(dst[2 * i + 1], src[2 * i]); |
- } |
- set_q_register(Vd, dst); |
- set_q_register(Vm, src); |
- break; |
+ if (instr->Bit(6) == 0) { |
+ int Vd = instr->VFPDRegValue(kDoublePrecision); |
+ int Vm = instr->VFPMRegValue(kDoublePrecision); |
+ // vtrn.<size> Dd, Dm. |
+ switch (size) { |
+ case Neon8: |
+ Transpose<uint8_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Transpose<uint16_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ Transpose<uint32_t, kDoubleSize>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- case Neon32: { |
- uint32_t src[4], dst[4]; |
- get_q_register(Vd, dst); |
- get_q_register(Vm, src); |
- for (int i = 0; i < 2; i++) { |
- std::swap(dst[2 * i + 1], src[2 * i]); |
- } |
- set_q_register(Vd, dst); |
- set_q_register(Vm, src); |
- break; |
+ } else { |
+ int Vd = instr->VFPDRegValue(kSimd128Precision); |
+ int Vm = instr->VFPMRegValue(kSimd128Precision); |
+ // vtrn.<size> Qd, Qm. |
+ switch (size) { |
+ case Neon8: |
+ Transpose<uint8_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon16: |
+ Transpose<uint16_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ case Neon32: |
+ Transpose<uint32_t, kSimd128Size>(this, Vd, Vm); |
+ break; |
+ default: |
+ UNREACHABLE(); |
+ break; |
} |
- default: |
- UNREACHABLE(); |
- break; |
} |
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
int Vd = instr->VFPDRegValue(kSimd128Precision); |
@@ -5323,41 +5198,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bit(10) != 0) { |
// floating point (clear sign bits) |
uint32_t src[4]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 4; i++) { |
src[i] &= ~0x80000000; |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
} else { |
// signed integer |
switch (size) { |
- case Neon8: { |
- int8_t src[16]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 16; i++) { |
- src[i] = std::abs(src[i]); |
- } |
- set_q_register(Vd, src); |
+ case Neon8: |
+ Abs<int8_t, kSimd128Size>(this, Vd, Vm); |
break; |
- } |
- case Neon16: { |
- int16_t src[8]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- src[i] = std::abs(src[i]); |
- } |
- set_q_register(Vd, src); |
+ case Neon16: |
+ Abs<int16_t, kSimd128Size>(this, Vd, Vm); |
break; |
- } |
- case Neon32: { |
- int32_t src[4]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- src[i] = std::abs(src[i]); |
- } |
- set_q_register(Vd, src); |
+ case Neon32: |
+ Abs<int32_t, kSimd128Size>(this, Vd, Vm); |
break; |
- } |
default: |
UNIMPLEMENTED(); |
break; |
@@ -5368,40 +5225,23 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
if (instr->Bit(10) != 0) { |
// floating point (toggle sign bits) |
uint32_t src[4]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
for (int i = 0; i < 4; i++) { |
src[i] ^= 0x80000000; |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
} else { |
// signed integer |
switch (size) { |
- case Neon8: { |
- int8_t src[16]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 16; i++) { |
- src[i] = -src[i]; |
- } |
- set_q_register(Vd, src); |
+ case Neon8: |
+ Neg<int8_t, kSimd128Size>(this, Vd, Vm); |
break; |
- } |
case Neon16: |
- int16_t src[8]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- src[i] = -src[i]; |
- } |
- set_q_register(Vd, src); |
+ Neg<int16_t, kSimd128Size>(this, Vd, Vm); |
break; |
- case Neon32: { |
- int32_t src[4]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- src[i] = -src[i]; |
- } |
- set_q_register(Vd, src); |
+ case Neon32: |
+ Neg<int32_t, kSimd128Size>(this, Vd, Vm); |
break; |
- } |
default: |
UNIMPLEMENTED(); |
break; |
@@ -5415,7 +5255,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vd = instr->VFPDRegValue(kSimd128Precision); |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
uint32_t src[4]; |
- get_q_register(Vm, src); |
+ get_neon_register(Vm, src); |
if (instr->Bit(7) == 0) { |
for (int i = 0; i < 4; i++) { |
float denom = bit_cast<float>(src[i]); |
@@ -5433,7 +5273,7 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
src[i] = bit_cast<uint32_t>(result); |
} |
} |
- set_q_register(Vd, src); |
+ set_neon_register(Vd, src); |
} else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && |
instr->Bits(7, 6) != 0) { |
// vqmovn.<type><size> Dd, Qm. |
@@ -5481,33 +5321,15 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { |
int Vm = instr->VFPMRegValue(kSimd128Precision); |
NeonSize ns = static_cast<NeonSize>(size / 16); |
switch (ns) { |
- case Neon8: { |
- uint8_t src[16]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 16; i++) { |
- src[i] >>= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon8: |
+ ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon16: { |
- uint16_t src[8]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 8; i++) { |
- src[i] >>= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon16: |
+ ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
- case Neon32: { |
- uint32_t src[4]; |
- get_q_register(Vm, src); |
- for (int i = 0; i < 4; i++) { |
- src[i] >>= shift; |
- } |
- set_q_register(Vd, src); |
+ case Neon32: |
+ ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift); |
break; |
- } |
default: |
UNREACHABLE(); |
break; |