Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD. (Closed)
Patch Set: Review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 3317 matching lines...) Expand 10 before | Expand all | Expand 10 after
3328 switch (size) { 3328 switch (size) {
3329 case Neon8: { 3329 case Neon8: {
3330 rt_value &= 0xFF; 3330 rt_value &= 0xFF;
3331 uint8_t* dst = reinterpret_cast<uint8_t*>(q_data); 3331 uint8_t* dst = reinterpret_cast<uint8_t*>(q_data);
3332 for (int i = 0; i < 16; i++) { 3332 for (int i = 0; i < 16; i++) {
3333 dst[i] = rt_value; 3333 dst[i] = rt_value;
3334 } 3334 }
3335 break; 3335 break;
3336 } 3336 }
3337 case Neon16: { 3337 case Neon16: {
3338 // Perform pairwise ops instead of casting to uint16_t. 3338 // Perform pairwise op.
3339 rt_value &= 0xFFFFu; 3339 rt_value &= 0xFFFFu;
3340 uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); 3340 uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu);
3341 for (int i = 0; i < 4; i++) { 3341 for (int i = 0; i < 4; i++) {
3342 q_data[i] = rt_rt; 3342 q_data[i] = rt_rt;
3343 } 3343 }
3344 break; 3344 break;
3345 } 3345 }
3346 case Neon32: { 3346 case Neon32: {
3347 for (int i = 0; i < 4; i++) { 3347 for (int i = 0; i < 4; i++) {
3348 q_data[i] = rt_value; 3348 q_data[i] = rt_value;
(...skipping 482 matching lines...) Expand 10 before | Expand all | Expand 10 after
3831 HandleVList(instr); 3831 HandleVList(instr);
3832 break; 3832 break;
3833 default: 3833 default:
3834 UNIMPLEMENTED(); // Not used by V8. 3834 UNIMPLEMENTED(); // Not used by V8.
3835 } 3835 }
3836 } else { 3836 } else {
3837 UNIMPLEMENTED(); // Not used by V8. 3837 UNIMPLEMENTED(); // Not used by V8.
3838 } 3838 }
3839 } 3839 }
3840 3840
3841 #define HIGH_16(x) ((x) >> 16)
3842 #define LOW_16(x) ((x)&0xFFFFu)
3843 #define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu)
3844 #define PAIRWISE_OP(x, y, OP) \
3845 COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y))))
3846
3847 #define ADD_16(x, y) ((x) + (y))
3848 #define SUB_16(x, y) ((x) - (y))
3849 #define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0)
3850 #define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0)
3851
3852 void Simulator::DecodeSpecialCondition(Instruction* instr) { 3841 void Simulator::DecodeSpecialCondition(Instruction* instr) {
3853 switch (instr->SpecialValue()) { 3842 switch (instr->SpecialValue()) {
3854 case 4: 3843 case 4:
3855 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 && 3844 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 &&
3856 instr->Bit(4) == 1) { 3845 instr->Bit(4) == 1) {
3857 // vmov Qd, Qm 3846 // vmov Qd, Qm
3858 int Vd = instr->VFPDRegValue(kSimd128Precision); 3847 int Vd = instr->VFPDRegValue(kSimd128Precision);
3859 int Vm = instr->VFPMRegValue(kSimd128Precision); 3848 int Vm = instr->VFPMRegValue(kSimd128Precision);
3860 uint32_t data[4]; 3849 uint32_t data[4];
3861 get_q_register(Vm, data); 3850 get_q_register(Vm, data);
(...skipping 12 matching lines...) Expand all
3874 switch (size) { 3863 switch (size) {
3875 case Neon8: { 3864 case Neon8: {
3876 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); 3865 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3877 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); 3866 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3878 for (int i = 0; i < 16; i++) { 3867 for (int i = 0; i < 16; i++) {
3879 s1[i] += s2[i]; 3868 s1[i] += s2[i];
3880 } 3869 }
3881 break; 3870 break;
3882 } 3871 }
3883 case Neon16: { 3872 case Neon16: {
3884 for (int i = 0; i < 4; i++) { 3873 uint16_t s1[8], s2[8];
3885 src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16); 3874 memcpy(s1, src1, sizeof(s1));
3875 memcpy(s2, src2, sizeof(s2));
3876 for (int i = 0; i < 8; i++) {
3877 s1[i] += s2[i];
3886 } 3878 }
3879 memcpy(src1, s1, sizeof(src1));
3887 break; 3880 break;
3888 } 3881 }
3889 case Neon32: { 3882 case Neon32: {
3890 for (int i = 0; i < 4; i++) { 3883 for (int i = 0; i < 4; i++) {
3891 src1[i] += src2[i]; 3884 src1[i] += src2[i];
3892 } 3885 }
3893 break; 3886 break;
3894 } 3887 }
3895 default: 3888 default:
3896 UNREACHABLE(); 3889 UNREACHABLE();
3897 break; 3890 break;
3898 } 3891 }
3899 } else { 3892 } else {
3900 // vtst.i<size> Qd, Qm, Qn. 3893 // vtst.i<size> Qd, Qm, Qn.
3901 switch (size) { 3894 switch (size) {
3902 case Neon8: { 3895 case Neon8: {
3903 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); 3896 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3904 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); 3897 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3905 for (int i = 0; i < 16; i++) { 3898 for (int i = 0; i < 16; i++) {
3906 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0; 3899 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0;
3907 } 3900 }
3908 break; 3901 break;
3909 } 3902 }
3910 case Neon16: { 3903 case Neon16: {
3911 for (int i = 0; i < 4; i++) { 3904 uint16_t s1[8], s2[8];
3912 src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16); 3905 memcpy(s1, src1, sizeof(s1));
3906 memcpy(s2, src2, sizeof(s2));
3907 for (int i = 0; i < 8; i++) {
3908 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFFFu : 0;
3913 } 3909 }
3910 memcpy(src1, s1, sizeof(src1));
3914 break; 3911 break;
3915 } 3912 }
3916 case Neon32: { 3913 case Neon32: {
3917 for (int i = 0; i < 4; i++) { 3914 for (int i = 0; i < 4; i++) {
3918 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; 3915 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
3919 } 3916 }
3920 break; 3917 break;
3921 } 3918 }
3922 default: 3919 default:
3923 UNREACHABLE(); 3920 UNREACHABLE();
(...skipping 14 matching lines...) Expand all
3938 // vadd.f32 Qd, Qm, Qn. 3935 // vadd.f32 Qd, Qm, Qn.
3939 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) + 3936 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) +
3940 bit_cast<float>(src2[i])); 3937 bit_cast<float>(src2[i]));
3941 } else { 3938 } else {
3942 // vsub.f32 Qd, Qm, Qn. 3939 // vsub.f32 Qd, Qm, Qn.
3943 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) - 3940 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) -
3944 bit_cast<float>(src2[i])); 3941 bit_cast<float>(src2[i]));
3945 } 3942 }
3946 } 3943 }
3947 set_q_register(Vd, src1); 3944 set_q_register(Vd, src1);
3945 } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 &&
3946 instr->Bit(4) == 1) {
3947 // vmul.i<size> Qd, Qm, Qn.
3948 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
3949 int Vd = instr->VFPDRegValue(kSimd128Precision);
3950 int Vm = instr->VFPMRegValue(kSimd128Precision);
3951 int Vn = instr->VFPNRegValue(kSimd128Precision);
3952 uint32_t src1[4], src2[4];
3953 get_q_register(Vn, src1);
3954 get_q_register(Vm, src2);
3955 switch (size) {
3956 case Neon8: {
3957 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3958 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3959 for (int i = 0; i < 16; i++) {
3960 s1[i] *= s2[i];
3961 }
3962 break;
3963 }
3964 case Neon16: {
3965 uint16_t s1[8], s2[8];
3966 memcpy(s1, src1, sizeof(s1));
3967 memcpy(s2, src2, sizeof(s2));
3968 for (int i = 0; i < 8; i++) {
3969 s1[i] *= s2[i];
3970 }
3971 memcpy(src1, s1, sizeof(src1));
3972 break;
3973 }
3974 case Neon32: {
3975 for (int i = 0; i < 4; i++) {
3976 src1[i] *= src2[i];
3977 }
3978 break;
3979 }
3980 default:
3981 UNIMPLEMENTED();
3982 break;
3983 }
3984 set_q_register(Vd, src1);
3948 } else { 3985 } else {
3949 UNIMPLEMENTED(); 3986 UNIMPLEMENTED();
3950 } 3987 }
3951 break; 3988 break;
3952 case 5: 3989 case 5:
3953 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 3990 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
3954 (instr->Bit(4) == 1)) { 3991 (instr->Bit(4) == 1)) {
3955 // vmovl signed 3992 // vmovl signed
3956 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 3993 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
3957 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 3994 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
3958 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 3995 int Vm = (instr->Bit(5) << 4) | instr->VmValue();
3959 int imm3 = instr->Bits(21, 19); 3996 int imm3 = instr->Bits(21, 19);
3960 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 3997 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
3961 int esize = 8 * imm3; 3998 int esize = 8 * imm3;
3962 int elements = 64 / esize; 3999 int elements = 64 / esize;
3963 int8_t from[8]; 4000 int8_t from[8];
3964 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 4001 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
3965 int16_t to[8]; 4002 int16_t to[8];
3966 int e = 0; 4003 int e = 0;
3967 while (e < elements) { 4004 while (e < elements) {
3968 to[e] = from[e]; 4005 to[e] = from[e];
3969 e++; 4006 e++;
3970 } 4007 }
3971 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); 4008 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
4009 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
4010 // vext.
4011 int imm4 = instr->Bits(11, 8);
4012 int Vd = instr->VFPDRegValue(kSimd128Precision);
4013 int Vm = instr->VFPMRegValue(kSimd128Precision);
4014 int Vn = instr->VFPNRegValue(kSimd128Precision);
4015 uint32_t src1[4], src2[4], dst[4];
4016 get_q_register(Vn, src1);
4017 get_q_register(Vm, src2);
4018 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
4019 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
4020 uint8_t* d = reinterpret_cast<uint8_t*>(dst);
4021 int boundary = 16 - imm4;
4022 int i = 0;
4023 for (; i < boundary; i++) {
4024 d[i] = s1[i + imm4];
4025 }
4026 for (; i < 16; i++) {
4027 d[i] = s2[i - boundary];
4028 }
4029 set_q_register(Vd, dst);
3972 } else { 4030 } else {
3973 UNIMPLEMENTED(); 4031 UNIMPLEMENTED();
3974 } 4032 }
3975 break; 4033 break;
3976 case 6: 4034 case 6:
3977 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { 4035 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) {
3978 // vsub.size Qd, Qm, Qn. 4036 // vsub.size Qd, Qm, Qn.
3979 int size = static_cast<NeonSize>(instr->Bits(21, 20)); 4037 int size = static_cast<NeonSize>(instr->Bits(21, 20));
3980 int Vd = instr->VFPDRegValue(kSimd128Precision); 4038 int Vd = instr->VFPDRegValue(kSimd128Precision);
3981 int Vm = instr->VFPMRegValue(kSimd128Precision); 4039 int Vm = instr->VFPMRegValue(kSimd128Precision);
3982 int Vn = instr->VFPNRegValue(kSimd128Precision); 4040 int Vn = instr->VFPNRegValue(kSimd128Precision);
3983 uint32_t src1[4], src2[4]; 4041 uint32_t src1[4], src2[4];
3984 get_q_register(Vn, src1); 4042 get_q_register(Vn, src1);
3985 get_q_register(Vm, src2); 4043 get_q_register(Vm, src2);
3986 switch (size) { 4044 switch (size) {
3987 case Neon8: { 4045 case Neon8: {
3988 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); 4046 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
3989 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); 4047 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
3990 for (int i = 0; i < 16; i++) { 4048 for (int i = 0; i < 16; i++) {
3991 s1[i] -= s2[i]; 4049 s1[i] -= s2[i];
3992 } 4050 }
3993 break; 4051 break;
3994 } 4052 }
3995 case Neon16: { 4053 case Neon16: {
3996 for (int i = 0; i < 4; i++) { 4054 uint16_t s1[8], s2[8];
3997 src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16); 4055 memcpy(s1, src1, sizeof(s1));
4056 memcpy(s2, src2, sizeof(s2));
4057 for (int i = 0; i < 8; i++) {
4058 s1[i] -= s2[i];
3998 } 4059 }
4060 memcpy(src1, s1, sizeof(src1));
3999 break; 4061 break;
4000 } 4062 }
4001 case Neon32: { 4063 case Neon32: {
4002 for (int i = 0; i < 4; i++) { 4064 for (int i = 0; i < 4; i++) {
4003 src1[i] -= src2[i]; 4065 src1[i] -= src2[i];
4004 } 4066 }
4005 break; 4067 break;
4006 } 4068 }
4007 default: 4069 default:
4008 UNREACHABLE(); 4070 UNREACHABLE();
(...skipping 12 matching lines...) Expand all
4021 switch (size) { 4083 switch (size) {
4022 case Neon8: { 4084 case Neon8: {
4023 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); 4085 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
4024 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); 4086 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
4025 for (int i = 0; i < 16; i++) { 4087 for (int i = 0; i < 16; i++) {
4026 s1[i] = s1[i] == s2[i] ? 0xFF : 0; 4088 s1[i] = s1[i] == s2[i] ? 0xFF : 0;
4027 } 4089 }
4028 break; 4090 break;
4029 } 4091 }
4030 case Neon16: { 4092 case Neon16: {
4031 for (int i = 0; i < 4; i++) { 4093 uint16_t s1[8], s2[8];
4032 src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16); 4094 memcpy(s1, src1, sizeof(s1));
4095 memcpy(s2, src2, sizeof(s2));
4096 for (int i = 0; i < 8; i++) {
4097 s1[i] = s1[i] == s2[i] ? 0xffffu : 0;
4033 } 4098 }
4099 memcpy(src1, s1, sizeof(src1));
4034 break; 4100 break;
4035 } 4101 }
4036 case Neon32: { 4102 case Neon32: {
4037 for (int i = 0; i < 4; i++) { 4103 for (int i = 0; i < 4; i++) {
4038 src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0; 4104 src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0;
4039 } 4105 }
4040 break; 4106 break;
4041 } 4107 }
4042 default: 4108 default:
4043 UNREACHABLE(); 4109 UNREACHABLE();
(...skipping 14 matching lines...) Expand all
4058 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); 4124 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4059 } 4125 }
4060 set_q_register(Vd, dst); 4126 set_q_register(Vd, dst);
4061 } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && 4127 } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 &&
4062 instr->Bit(4) == 1) { 4128 instr->Bit(4) == 1) {
4063 if (instr->Bit(6) == 0) { 4129 if (instr->Bit(6) == 0) {
4064 // veor Dd, Dn, Dm 4130 // veor Dd, Dn, Dm
4065 int Vd = instr->VFPDRegValue(kDoublePrecision); 4131 int Vd = instr->VFPDRegValue(kDoublePrecision);
4066 int Vn = instr->VFPNRegValue(kDoublePrecision); 4132 int Vn = instr->VFPNRegValue(kDoublePrecision);
4067 int Vm = instr->VFPMRegValue(kDoublePrecision); 4133 int Vm = instr->VFPMRegValue(kDoublePrecision);
4068 uint64_t n_data, m_data; 4134 uint64_t src1, src2;
4069 get_d_register(Vn, &n_data); 4135 get_d_register(Vn, &src1);
4070 get_d_register(Vm, &m_data); 4136 get_d_register(Vm, &src2);
4071 n_data ^= m_data; 4137 src1 ^= src2;
4072 set_d_register(Vd, &n_data); 4138 set_d_register(Vd, &src1);
4073 4139
4074 } else { 4140 } else {
4075 // veor Qd, Qn, Qm 4141 // veor Qd, Qn, Qm
4076 int Vd = instr->VFPDRegValue(kSimd128Precision); 4142 int Vd = instr->VFPDRegValue(kSimd128Precision);
4077 int Vn = instr->VFPNRegValue(kSimd128Precision); 4143 int Vn = instr->VFPNRegValue(kSimd128Precision);
4078 int Vm = instr->VFPMRegValue(kSimd128Precision); 4144 int Vm = instr->VFPMRegValue(kSimd128Precision);
4079 uint32_t n_data[4], m_data[4]; 4145 uint32_t src1[4], src2[4];
4080 get_q_register(Vn, n_data); 4146 get_q_register(Vn, src1);
4081 get_q_register(Vm, m_data); 4147 get_q_register(Vm, src2);
4082 for (int i = 0; i < 4; i++) n_data[i] ^= m_data[i]; 4148 for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
4083 set_q_register(Vd, n_data); 4149 set_q_register(Vd, src1);
4084 } 4150 }
4151 } else if (instr->Bit(21) == 0 && instr->Bits(11, 8) == 0xd &&
4152 instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4153 // vmul.f32 Qd, Qn, Qm
4154 int Vd = instr->VFPDRegValue(kSimd128Precision);
4155 int Vn = instr->VFPNRegValue(kSimd128Precision);
4156 int Vm = instr->VFPMRegValue(kSimd128Precision);
4157 uint32_t src1[4], src2[4];
4158 get_q_register(Vn, src1);
4159 get_q_register(Vm, src2);
4160 for (int i = 0; i < 4; i++) {
4161 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) *
4162 bit_cast<float>(src2[i]));
4163 }
4164 set_q_register(Vd, src1);
4085 } else { 4165 } else {
4086 UNIMPLEMENTED(); 4166 UNIMPLEMENTED();
4087 } 4167 }
4088 break; 4168 break;
4089 case 7: 4169 case 7:
4090 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 4170 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
4091 (instr->Bit(4) == 1)) { 4171 (instr->Bit(4) == 1)) {
4092 // vmovl unsigned 4172 // vmovl unsigned
4093 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 4173 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
4094 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 4174 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1);
4095 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 4175 int Vm = (instr->Bit(5) << 4) | instr->VmValue();
4096 int imm3 = instr->Bits(21, 19); 4176 int imm3 = instr->Bits(21, 19);
4097 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 4177 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();
4098 int esize = 8 * imm3; 4178 int esize = 8 * imm3;
4099 int elements = 64 / esize; 4179 int elements = 64 / esize;
4100 uint8_t from[8]; 4180 uint8_t from[8];
4101 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 4181 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));
4102 uint16_t to[8]; 4182 uint16_t to[8];
4103 int e = 0; 4183 int e = 0;
4104 while (e < elements) { 4184 while (e < elements) {
4105 to[e] = from[e]; 4185 to[e] = from[e];
4106 e++; 4186 e++;
4107 } 4187 }
4108 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); 4188 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
4109 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB && 4189 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
4110 instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 && 4190 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
4111 instr->Bit(4) == 0) { 4191 instr->Bit(6) == 1) {
4112 // vcvt.<Td>.<Tm> Qd, Qm. 4192 // vcvt.<Td>.<Tm> Qd, Qm.
4113 int Vd = instr->VFPDRegValue(kSimd128Precision); 4193 int Vd = instr->VFPDRegValue(kSimd128Precision);
4114 int Vm = instr->VFPMRegValue(kSimd128Precision); 4194 int Vm = instr->VFPMRegValue(kSimd128Precision);
4115 uint32_t q_data[4]; 4195 uint32_t q_data[4];
4116 get_q_register(Vm, q_data); 4196 get_q_register(Vm, q_data);
4117 int op = instr->Bits(8, 7); 4197 int op = instr->Bits(8, 7);
4118 for (int i = 0; i < 4; i++) { 4198 for (int i = 0; i < 4; i++) {
4119 switch (op) { 4199 switch (op) {
4120 case 0: 4200 case 0:
4121 // f32 <- s32, round towards nearest. 4201 // f32 <- s32, round towards nearest.
4122 q_data[i] = bit_cast<uint32_t>( 4202 q_data[i] = bit_cast<uint32_t>(std::round(
4123 std::round(static_cast<float>(bit_cast<int32_t>(q_data[i])))); 4203 static_cast<float>(bit_cast<int32_t>(q_data[i]))));
4124 break; 4204 break;
4125 case 1: 4205 case 1:
4126 // f32 <- u32, round towards nearest. 4206 // f32 <- u32, round towards nearest.
4127 q_data[i] = 4207 q_data[i] = bit_cast<uint32_t>(
4128 bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i]))); 4208 std::round(static_cast<float>(q_data[i])));
4129 break; 4209 break;
4130 case 2: 4210 case 2:
4131 // s32 <- f32, round to zero. 4211 // s32 <- f32, round to zero.
4132 q_data[i] = static_cast<uint32_t>( 4212 q_data[i] = static_cast<uint32_t>(
4133 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); 4213 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
4134 break; 4214 break;
4135 case 3: 4215 case 3:
4136 // u32 <- f32, round to zero. 4216 // u32 <- f32, round to zero.
4137 q_data[i] = static_cast<uint32_t>( 4217 q_data[i] = static_cast<uint32_t>(
4138 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); 4218 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
4139 break; 4219 break;
4140 } 4220 }
4141 } 4221 }
4142 set_q_register(Vd, q_data); 4222 set_q_register(Vd, q_data);
4143 } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && 4223 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
4144 (instr->Bit(4) == 0)) { 4224 if (instr->Bit(6) == 0) {
4145 if (instr->Bit(6) == 0) { 4225 // vswp Dd, Dm.
4146 // vswp Dd, Dm. 4226 uint64_t dval, mval;
4147 uint64_t dval, mval; 4227 int vd = instr->VFPDRegValue(kDoublePrecision);
4148 int vd = instr->VFPDRegValue(kDoublePrecision); 4228 int vm = instr->VFPMRegValue(kDoublePrecision);
4229 get_d_register(vd, &dval);
4230 get_d_register(vm, &mval);
4231 set_d_register(vm, &dval);
4232 set_d_register(vd, &mval);
4233 } else {
4234 // vswp Qd, Qm.
4235 uint32_t dval[4], mval[4];
4236 int vd = instr->VFPDRegValue(kSimd128Precision);
4237 int vm = instr->VFPMRegValue(kSimd128Precision);
4238 get_q_register(vd, dval);
4239 get_q_register(vm, mval);
4240 set_q_register(vm, dval);
4241 set_q_register(vd, mval);
4242 }
4243 } else if (instr->Bits(11, 7) == 0x18) {
4244 // vdup.32 Qd, Sm.
4245 int vd = instr->VFPDRegValue(kSimd128Precision);
4149 int vm = instr->VFPMRegValue(kDoublePrecision); 4246 int vm = instr->VFPMRegValue(kDoublePrecision);
4150 get_d_register(vd, &dval); 4247 int index = instr->Bit(19);
4151 get_d_register(vm, &mval); 4248 uint32_t s_data = get_s_register(vm * 2 + index);
4152 set_d_register(vm, &dval); 4249 uint32_t q_data[4];
4153 set_d_register(vd, &mval); 4250 for (int i = 0; i < 4; i++) q_data[i] = s_data;
4154 } else { 4251 set_q_register(vd, q_data);
4155 // vswp Qd, Qm. 4252 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
4156 uint32_t dval[4], mval[4]; 4253 // vmvn Qd, Qm.
4157 int vd = instr->VFPDRegValue(kSimd128Precision); 4254 int vd = instr->VFPDRegValue(kSimd128Precision);
4158 int vm = instr->VFPMRegValue(kSimd128Precision); 4255 int vm = instr->VFPMRegValue(kSimd128Precision);
4159 get_q_register(vd, dval); 4256 uint32_t q_data[4];
4160 get_q_register(vm, mval); 4257 get_q_register(vm, q_data);
4161 set_q_register(vm, dval); 4258 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
4162 set_q_register(vd, mval); 4259 set_q_register(vd, q_data);
4260 } else if (instr->Bits(11, 10) == 0x2) {
4261 // vtb[l,x] Dd, <list>, Dm.
4262 int vd = instr->VFPDRegValue(kDoublePrecision);
4263 int vn = instr->VFPNRegValue(kDoublePrecision);
4264 int vm = instr->VFPMRegValue(kDoublePrecision);
4265 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
4266 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
4267 uint64_t destination = 0, indices = 0, result = 0;
4268 get_d_register(vd, &destination);
4269 get_d_register(vm, &indices);
4270 for (int i = 0; i < kDoubleSize; i++) {
4271 int shift = i * kBitsPerByte;
4272 int index = (indices >> shift) & 0xFF;
4273 if (index < table_len) {
4274 uint64_t table;
4275 get_d_register(vn + index / kDoubleSize, &table);
4276 result |=
4277 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
4278 << shift;
4279 } else if (vtbx) {
4280 result |= destination & (0xFFull << shift);
4281 }
4282 }
4283 set_d_register(vd, &result);
4284 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) {
4285 // vzip.<size> Qd, Qm.
4286 int size = static_cast<NeonSize>(instr->Bits(19, 18));
4287 int Vd = instr->VFPDRegValue(kSimd128Precision);
4288 int Vm = instr->VFPMRegValue(kSimd128Precision);
4289 uint32_t src1[4], src2[4], dst1[4], dst2[4];
4290 get_q_register(Vd, src1);
4291 get_q_register(Vm, src2);
4292 switch (size) {
4293 case Neon8: {
4294 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1);
4295 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2);
4296 uint8_t* d1 = reinterpret_cast<uint8_t*>(dst1);
4297 uint8_t* d2 = reinterpret_cast<uint8_t*>(dst2);
4298 for (int i = 0; i < 8; i++) {
4299 d1[i * 2] = s1[i];
4300 d1[i * 2 + 1] = s2[i];
4301 d2[i * 2] = s1[i + 8];
4302 d2[i * 2 + 1] = s2[i + 8];
4303 }
4304 break;
4305 }
4306 case Neon16: {
4307 uint16_t s1[8], s2[8], d1[8], d2[8];
4308 memcpy(s1, src1, sizeof(s1));
4309 memcpy(s2, src2, sizeof(s2));
4310 for (int i = 0; i < 8; i += 2) {
4311 d1[i] = s1[i / 2];
4312 d1[i + 1] = s2[i / 2];
4313 d2[i] = s1[i / 2 + 4];
4314 d2[i + 1] = s2[i / 2 + 4];
4315 }
4316 memcpy(dst1, d1, sizeof(dst1));
4317 memcpy(dst2, d2, sizeof(dst2));
4318 break;
4319 }
4320 case Neon32: {
4321 for (int i = 0; i < 2; i++) {
4322 dst1[i * 2] = src1[i];
4323 dst1[i * 2 + 1] = src2[i];
4324 dst2[i * 2] = src1[i + 2];
4325 dst2[i * 2 + 1] = src2[i + 2];
4326 }
4327 break;
4328 }
4329 default:
4330 UNREACHABLE();
4331 break;
4332 }
4333 set_q_register(Vd, dst1);
4334 set_q_register(Vm, dst2);
4335 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
4336 // vrev<op>.size Qd, Qm
4337 int Vd = instr->VFPDRegValue(kSimd128Precision);
4338 int Vm = instr->VFPMRegValue(kSimd128Precision);
4339 int size = static_cast<NeonSize>(instr->Bits(19, 18));
4340 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
4341 instr->Bits(8, 7));
4342 uint32_t src[4];
4343 get_q_register(Vm, src);
4344 switch (op) {
4345 case Neon16: {
4346 DCHECK_EQ(Neon8, size);
4347 uint8_t* s = reinterpret_cast<uint8_t*>(src);
4348 for (int i = 0; i < 16; i += 2) {
4349 std::swap(s[i], s[i + 1]);
4350 }
4351 break;
4352 }
4353 case Neon32: {
4354 switch (size) {
4355 case Neon16:
4356 for (int i = 0; i < 4; i++) {
4357 src[i] = (src[i] >> 16) | (src[i] << 16);
4358 }
4359 break;
4360 case Neon8: {
4361 uint8_t* s = reinterpret_cast<uint8_t*>(src);
4362 for (int i = 0; i < 4; i++) {
4363 std::swap(s[i * 4], s[i * 4 + 3]);
4364 std::swap(s[i * 4 + 1], s[i * 4 + 2]);
4365 }
4366 break;
4367 }
4368 default:
4369 UNREACHABLE();
4370 break;
4371 }
4372 break;
4373 }
4374 case Neon64: {
4375 switch (size) {
4376 case Neon32: {
4377 std::swap(src[0], src[1]);
4378 std::swap(src[2], src[3]);
4379 break;
4380 }
4381 case Neon16: {
4382 for (int i = 0; i <= 2; i += 2) {
4383 uint32_t w1 = src[i];
4384 uint32_t w2 = src[i + 1];
4385 src[i] = (w2 >> 16) | (w2 << 16);
4386 src[i + 1] = (w1 >> 16) | (w1 << 16);
4387 }
4388 break;
4389 }
4390 case Neon8: {
4391 uint8_t* s = reinterpret_cast<uint8_t*>(src);
4392 for (int i = 0; i < 4; i++) {
4393 std::swap(s[i], s[7 - i]);
4394 std::swap(s[i + 8], s[15 - i]);
4395 }
4396 break;
4397 }
4398 default:
4399 UNREACHABLE();
4400 break;
4401 }
4402 break;
4403 }
4404 default:
4405 UNREACHABLE();
4406 break;
4407 }
4408 set_q_register(Vd, src);
4409 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
4410 int Vd = instr->VFPDRegValue(kSimd128Precision);
4411 int Vm = instr->VFPMRegValue(kSimd128Precision);
4412 int size = static_cast<NeonSize>(instr->Bits(19, 18));
4413 uint32_t src[4];
4414 get_q_register(Vm, src);
4415 if (instr->Bits(9, 6) == 0xd) {
4416 // vabs<type>.<size> Qd, Qm
4417 if (instr->Bit(10) != 0) {
4418 // floating point (clear sign bits)
4419 for (int i = 0; i < 4; i++) {
4420 src[i] &= ~0x80000000;
4421 }
4422 } else {
4423 // signed integer
4424 switch (size) {
4425 case Neon8: {
4426 int8_t* s = reinterpret_cast<int8_t*>(src);
4427 for (int i = 0; i < 16; i++) {
4428 s[i] = std::abs(s[i]);
4429 }
4430 break;
4431 }
4432 case Neon16: {
4433 int16_t s[8];
4434 memcpy(s, src, sizeof(s));
4435 for (int i = 0; i < 8; i++) {
4436 s[i] = std::abs(s[i]);
4437 }
4438 memcpy(src, s, sizeof(src));
4439 break;
4440 }
4441 case Neon32: {
4442 int32_t* as_signed = reinterpret_cast<int32_t*>(src);
4443 for (int i = 0; i < 4; i++) {
4444 as_signed[i] = std::abs(as_signed[i]);
4445 }
4446 break;
4447 }
4448 default:
4449 UNIMPLEMENTED();
4450 break;
4451 }
4452 }
4453 } else if (instr->Bits(9, 6) == 0xf) {
4454 // vneg<type>.<size> Qd, Qm (signed integer)
4455 if (instr->Bit(10) != 0) {
4456 // floating point (toggle sign bits)
4457 for (int i = 0; i < 4; i++) {
4458 src[i] ^= 0x80000000;
4459 }
4460 } else {
4461 // signed integer
4462 switch (size) {
4463 case Neon8: {
4464 int8_t* s = reinterpret_cast<int8_t*>(src);
4465 for (int i = 0; i < 16; i++) {
4466 s[i] = -s[i];
4467 }
4468 break;
4469 }
4470 case Neon16:
4471 int16_t s[8];
4472 memcpy(s, src, sizeof(s));
4473 for (int i = 0; i < 8; i++) {
4474 s[i] = -s[i];
4475 }
4476 memcpy(src, s, sizeof(src));
4477 break;
4478 case Neon32: {
4479 int32_t* as_signed = reinterpret_cast<int32_t*>(src);
4480 for (int i = 0; i < 4; i++) {
4481 as_signed[i] = -as_signed[i];
4482 }
4483 break;
4484 }
4485 default:
4486 UNIMPLEMENTED();
4487 break;
4488 }
4489 }
4490 } else {
4491 UNIMPLEMENTED();
4492 }
4493 set_q_register(Vd, src);
4494 } else {
4495 UNIMPLEMENTED();
4163 } 4496 }
4164 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 &&
4165 instr->Bit(4) == 0x0) {
4166 // vdup.32 Qd, Sm.
4167 int vd = instr->VFPDRegValue(kSimd128Precision);
4168 int vm = instr->VFPMRegValue(kDoublePrecision);
4169 int index = instr->Bit(19);
4170 uint32_t s_data = get_s_register(vm * 2 + index);
4171 uint32_t q_data[4];
4172 for (int i = 0; i < 4; i++) q_data[i] = s_data;
4173 set_q_register(vd, q_data);
4174 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 &&
4175 instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) {
4176 // vmvn Qd, Qm.
4177 int vd = instr->VFPDRegValue(kSimd128Precision);
4178 int vm = instr->VFPMRegValue(kSimd128Precision);
4179 uint32_t q_data[4];
4180 get_q_register(vm, q_data);
4181 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
4182 set_q_register(vd, q_data);
4183 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 &&
4184 instr->Bit(4) == 0x0) {
4185 // vtb[l,x] Dd, <list>, Dm.
4186 int vd = instr->VFPDRegValue(kDoublePrecision);
4187 int vn = instr->VFPNRegValue(kDoublePrecision);
4188 int vm = instr->VFPMRegValue(kDoublePrecision);
4189 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
4190 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
4191 uint64_t destination = 0, indices = 0, result = 0;
4192 get_d_register(vd, &destination);
4193 get_d_register(vm, &indices);
4194 for (int i = 0; i < kDoubleSize; i++) {
4195 int shift = i * kBitsPerByte;
4196 int index = (indices >> shift) & 0xFF;
4197 if (index < table_len) {
4198 uint64_t table;
4199 get_d_register(vn + index / kDoubleSize, &table);
4200 result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
4201 << shift;
4202 } else if (vtbx) {
4203 result |= destination & (0xFFull << shift);
4204 }
4205 }
4206 set_d_register(vd, &result);
4207 } else {
4208 UNIMPLEMENTED();
4209 } 4497 }
4210 break; 4498 break;
4211 case 8: 4499 case 8:
4212 if (instr->Bits(21, 20) == 0) { 4500 if (instr->Bits(21, 20) == 0) {
4213 // vst1 4501 // vst1
4214 int Vd = (instr->Bit(22) << 4) | instr->VdValue(); 4502 int Vd = (instr->Bit(22) << 4) | instr->VdValue();
4215 int Rn = instr->VnValue(); 4503 int Rn = instr->VnValue();
4216 int type = instr->Bits(11, 8); 4504 int type = instr->Bits(11, 8);
4217 int Rm = instr->VmValue(); 4505 int Rm = instr->VmValue();
4218 int32_t address = get_register(Rn); 4506 int32_t address = get_register(Rn);
(...skipping 498 matching lines...) Expand 10 before | Expand all | Expand 10 after
4717 set_register(sp, current_sp + sizeof(uintptr_t)); 5005 set_register(sp, current_sp + sizeof(uintptr_t));
4718 return address; 5006 return address;
4719 } 5007 }
4720 5008
4721 } // namespace internal 5009 } // namespace internal
4722 } // namespace v8 5010 } // namespace v8
4723 5011
4724 #endif // USE_SIMULATOR 5012 #endif // USE_SIMULATOR
4725 5013
4726 #endif // V8_TARGET_ARCH_ARM 5014 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698