Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Unified Diff: src/arm/simulator-arm.cc

Issue 2797923006: [ARM] Implement D-register versions of vzip, vuzp, and vtrn. (Closed)
Patch Set: Initial Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/arm/simulator-arm.cc
diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc
index 2bf2a6a3a52b549a548bc384081e975344b63a69..5bac15cbace866b2f66e26e56aea25efd344d8a9 100644
--- a/src/arm/simulator-arm.cc
+++ b/src/arm/simulator-arm.cc
@@ -5082,108 +5082,207 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
}
}
set_d_register(vd, &result);
- } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 &&
- instr->Bit(6) == 1) {
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
- int Vd = instr->VFPDRegValue(kSimd128Precision);
- int Vm = instr->VFPMRegValue(kSimd128Precision);
- if (instr->Bit(7) == 1) {
- // vzip.<size> Qd, Qm.
- switch (size) {
- case Neon8: {
- uint8_t src1[16], src2[16], dst1[16], dst2[16];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 8; i++) {
- dst1[i * 2] = src1[i];
- dst1[i * 2 + 1] = src2[i];
- dst2[i * 2] = src1[i + 8];
- dst2[i * 2 + 1] = src2[i + 8];
+ if (instr->Bit(6) == 0) {
+ int Vd = instr->VFPDRegValue(kDoublePrecision);
+ int Vm = instr->VFPMRegValue(kDoublePrecision);
+ if (instr->Bit(7) == 1) {
+ // vzip.<size> Dd, Dm.
+ switch (size) {
+ case Neon8: {
martyn.capewell 2017/04/06 13:09:49 It would be good to factorise the zip and uzp oper
bbudge 2017/04/06 17:49:43 Done. I rewrote the get_ and set_q_register templ
+ uint8_t src1[8], src2[8], dst1[8], dst2[8];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ dst1[i * 2] = src1[i];
+ dst1[i * 2 + 1] = src2[i];
+ dst2[i * 2] = src1[i + 4];
+ dst2[i * 2 + 1] = src2[i + 4];
+ }
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
- }
- case Neon16: {
- uint16_t src1[8], src2[8], dst1[8], dst2[8];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 4; i++) {
- dst1[i * 2] = src1[i];
- dst1[i * 2 + 1] = src2[i];
- dst2[i * 2] = src1[i + 4];
- dst2[i * 2 + 1] = src2[i + 4];
+ case Neon16: {
+ uint16_t src1[4], src2[4], dst1[4], dst2[4];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ for (int i = 0; i < 2; i++) {
+ dst1[i * 2] = src1[i];
+ dst1[i * 2 + 1] = src2[i];
+ dst2[i * 2] = src1[i + 2];
+ dst2[i * 2 + 1] = src2[i + 2];
+ }
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
+ case Neon32: {
+ uint32_t src1[2], src2[2], dst1[2], dst2[2];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ dst1[0] = src1[0];
+ dst1[1] = src2[0];
+ dst2[0] = src1[1];
+ dst2[1] = src2[1];
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- case Neon32: {
- uint32_t src1[4], src2[4], dst1[4], dst2[4];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 2; i++) {
- dst1[i * 2] = src1[i];
- dst1[i * 2 + 1] = src2[i];
- dst2[i * 2] = src1[i + 2];
- dst2[i * 2 + 1] = src2[i + 2];
+ } else {
+ // vuzp.<size> Dd, Dm.
+ switch (size) {
+ case Neon8: {
+ uint8_t src1[8], src2[8], dst1[8], dst2[8];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ dst1[i] = src1[i * 2];
+ dst1[i + 4] = src2[i * 2];
+ dst2[i] = src1[i * 2 + 1];
+ dst2[i + 4] = src2[i * 2 + 1];
+ }
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
+ case Neon16: {
+ uint16_t src1[4], src2[4], dst1[4], dst2[4];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ for (int i = 0; i < 2; i++) {
+ dst1[i] = src1[i * 2];
+ dst1[i + 2] = src2[i * 2];
+ dst2[i] = src1[i * 2 + 1];
+ dst2[i + 2] = src2[i * 2 + 1];
+ }
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
+ }
+ case Neon32: {
+ uint32_t src1[2], src2[2], dst1[2], dst2[2];
+ get_d_register(Vd, src1);
+ get_d_register(Vm, src2);
+ dst1[0] = src1[0];
+ dst1[1] = src2[0];
+ dst2[0] = src1[1];
+ dst2[1] = src2[1];
+ set_d_register(Vd, dst1);
+ set_d_register(Vm, dst2);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- default:
- UNREACHABLE();
- break;
}
} else {
- // vuzp.<size> Qd, Qm.
- switch (size) {
- case Neon8: {
- uint8_t src1[16], src2[16], dst1[16], dst2[16];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 8; i++) {
- dst1[i] = src1[i * 2];
- dst1[i + 8] = src2[i * 2];
- dst2[i] = src1[i * 2 + 1];
- dst2[i + 8] = src2[i * 2 + 1];
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ if (instr->Bit(7) == 1) {
+ // vzip.<size> Qd, Qm.
+ switch (size) {
+ case Neon8: {
+ uint8_t src1[16], src2[16], dst1[16], dst2[16];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 8; i++) {
+ dst1[i * 2] = src1[i];
+ dst1[i * 2 + 1] = src2[i];
+ dst2[i * 2] = src1[i + 8];
+ dst2[i * 2 + 1] = src2[i + 8];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
- }
- case Neon16: {
- uint16_t src1[8], src2[8], dst1[8], dst2[8];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 4; i++) {
- dst1[i] = src1[i * 2];
- dst1[i + 4] = src2[i * 2];
- dst2[i] = src1[i * 2 + 1];
- dst2[i + 4] = src2[i * 2 + 1];
+ case Neon16: {
+ uint16_t src1[8], src2[8], dst1[8], dst2[8];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ dst1[i * 2] = src1[i];
+ dst1[i * 2 + 1] = src2[i];
+ dst2[i * 2] = src1[i + 4];
+ dst2[i * 2 + 1] = src2[i + 4];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
+ case Neon32: {
+ uint32_t src1[4], src2[4], dst1[4], dst2[4];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 2; i++) {
+ dst1[i * 2] = src1[i];
+ dst1[i * 2 + 1] = src2[i];
+ dst2[i * 2] = src1[i + 2];
+ dst2[i * 2 + 1] = src2[i + 2];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- case Neon32: {
- uint32_t src1[4], src2[4], dst1[4], dst2[4];
- get_q_register(Vd, src1);
- get_q_register(Vm, src2);
- for (int i = 0; i < 2; i++) {
- dst1[i] = src1[i * 2];
- dst1[i + 2] = src2[i * 2];
- dst2[i] = src1[i * 2 + 1];
- dst2[i + 2] = src2[i * 2 + 1];
+ } else {
+ // vuzp.<size> Qd, Qm.
+ switch (size) {
+ case Neon8: {
+ uint8_t src1[16], src2[16], dst1[16], dst2[16];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 8; i++) {
+ dst1[i] = src1[i * 2];
+ dst1[i + 8] = src2[i * 2];
+ dst2[i] = src1[i * 2 + 1];
+ dst2[i + 8] = src2[i * 2 + 1];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
}
- set_q_register(Vd, dst1);
- set_q_register(Vm, dst2);
- break;
+ case Neon16: {
+ uint16_t src1[8], src2[8], dst1[8], dst2[8];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 4; i++) {
+ dst1[i] = src1[i * 2];
+ dst1[i + 4] = src2[i * 2];
+ dst2[i] = src1[i * 2 + 1];
+ dst2[i + 4] = src2[i * 2 + 1];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
+ }
+ case Neon32: {
+ uint32_t src1[4], src2[4], dst1[4], dst2[4];
+ get_q_register(Vd, src1);
+ get_q_register(Vm, src2);
+ for (int i = 0; i < 2; i++) {
+ dst1[i] = src1[i * 2];
+ dst1[i + 2] = src2[i * 2];
+ dst2[i] = src1[i * 2 + 1];
+ dst2[i + 2] = src2[i * 2 + 1];
+ }
+ set_q_register(Vd, dst1);
+ set_q_register(Vm, dst2);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- default:
- UNREACHABLE();
- break;
}
}
} else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
@@ -5271,48 +5370,90 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
UNREACHABLE();
break;
}
- } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) {
- int Vd = instr->VFPDRegValue(kSimd128Precision);
- int Vm = instr->VFPMRegValue(kSimd128Precision);
+ } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
- // vtrn.<size> Qd, Qm.
- switch (size) {
- case Neon8: {
- uint8_t src[16], dst[16];
- get_q_register(Vd, dst);
- get_q_register(Vm, src);
- for (int i = 0; i < 8; i++) {
- std::swap(dst[2 * i + 1], src[2 * i]);
+ if (instr->Bit(6) == 0) {
+ int Vd = instr->VFPDRegValue(kDoublePrecision);
+ int Vm = instr->VFPMRegValue(kDoublePrecision);
+ // vtrn.<size> Dd, Dm.
+ switch (size) {
+ case Neon8: {
+ uint8_t src[8], dst[8];
+ get_d_register(Vd, dst);
+ get_d_register(Vm, src);
+ for (int i = 0; i < 4; i++) {
+ std::swap(dst[2 * i + 1], src[2 * i]);
+ }
+ set_d_register(Vd, dst);
+ set_d_register(Vm, src);
+ break;
}
- set_q_register(Vd, dst);
- set_q_register(Vm, src);
- break;
- }
- case Neon16: {
- uint16_t src[8], dst[8];
- get_q_register(Vd, dst);
- get_q_register(Vm, src);
- for (int i = 0; i < 4; i++) {
- std::swap(dst[2 * i + 1], src[2 * i]);
+ case Neon16: {
+ uint16_t src[4], dst[4];
+ get_d_register(Vd, dst);
+ get_d_register(Vm, src);
+ for (int i = 0; i < 2; i++) {
+ std::swap(dst[2 * i + 1], src[2 * i]);
+ }
+ set_d_register(Vd, dst);
+ set_d_register(Vm, src);
+ break;
}
- set_q_register(Vd, dst);
- set_q_register(Vm, src);
- break;
+ case Neon32: {
+ uint32_t src[2], dst[2];
+ get_d_register(Vd, dst);
+ get_d_register(Vm, src);
+ std::swap(dst[1], src[0]);
+ set_d_register(Vd, dst);
+ set_d_register(Vm, src);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- case Neon32: {
- uint32_t src[4], dst[4];
- get_q_register(Vd, dst);
- get_q_register(Vm, src);
- for (int i = 0; i < 2; i++) {
- std::swap(dst[2 * i + 1], src[2 * i]);
+ } else {
+ int Vd = instr->VFPDRegValue(kSimd128Precision);
+ int Vm = instr->VFPMRegValue(kSimd128Precision);
+ // vtrn.<size> Qd, Qm.
+ switch (size) {
+ case Neon8: {
+ uint8_t src[16], dst[16];
+ get_q_register(Vd, dst);
+ get_q_register(Vm, src);
+ for (int i = 0; i < 8; i++) {
+ std::swap(dst[2 * i + 1], src[2 * i]);
+ }
+ set_q_register(Vd, dst);
+ set_q_register(Vm, src);
+ break;
}
- set_q_register(Vd, dst);
- set_q_register(Vm, src);
- break;
+ case Neon16: {
+ uint16_t src[8], dst[8];
+ get_q_register(Vd, dst);
+ get_q_register(Vm, src);
+ for (int i = 0; i < 4; i++) {
+ std::swap(dst[2 * i + 1], src[2 * i]);
+ }
+ set_q_register(Vd, dst);
+ set_q_register(Vm, src);
+ break;
+ }
+ case Neon32: {
+ uint32_t src[4], dst[4];
+ get_q_register(Vd, dst);
+ get_q_register(Vm, src);
+ for (int i = 0; i < 2; i++) {
+ std::swap(dst[2 * i + 1], src[2 * i]);
+ }
+ set_q_register(Vd, dst);
+ set_q_register(Vm, src);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ break;
}
- default:
- UNREACHABLE();
- break;
}
} else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
int Vd = instr->VFPDRegValue(kSimd128Precision);

Powered by Google App Engine
This is Rietveld 408576698