OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <stdarg.h> | 5 #include <stdarg.h> |
6 #include <stdlib.h> | 6 #include <stdlib.h> |
7 #include <cmath> | 7 #include <cmath> |
8 | 8 |
9 #if V8_TARGET_ARCH_ARM | 9 #if V8_TARGET_ARCH_ARM |
10 | 10 |
(...skipping 3317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3328 switch (size) { | 3328 switch (size) { |
3329 case Neon8: { | 3329 case Neon8: { |
3330 rt_value &= 0xFF; | 3330 rt_value &= 0xFF; |
3331 uint8_t* dst = reinterpret_cast<uint8_t*>(q_data); | 3331 uint8_t* dst = reinterpret_cast<uint8_t*>(q_data); |
3332 for (int i = 0; i < 16; i++) { | 3332 for (int i = 0; i < 16; i++) { |
3333 dst[i] = rt_value; | 3333 dst[i] = rt_value; |
3334 } | 3334 } |
3335 break; | 3335 break; |
3336 } | 3336 } |
3337 case Neon16: { | 3337 case Neon16: { |
3338 // Perform pairwise ops instead of casting to uint16_t. | 3338 // Perform pairwise op. |
3339 rt_value &= 0xFFFFu; | 3339 rt_value &= 0xFFFFu; |
3340 uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); | 3340 uint32_t rt_rt = (rt_value << 16) | (rt_value & 0xFFFFu); |
3341 for (int i = 0; i < 4; i++) { | 3341 for (int i = 0; i < 4; i++) { |
3342 q_data[i] = rt_rt; | 3342 q_data[i] = rt_rt; |
3343 } | 3343 } |
3344 break; | 3344 break; |
3345 } | 3345 } |
3346 case Neon32: { | 3346 case Neon32: { |
3347 for (int i = 0; i < 4; i++) { | 3347 for (int i = 0; i < 4; i++) { |
3348 q_data[i] = rt_value; | 3348 q_data[i] = rt_value; |
(...skipping 482 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3831 HandleVList(instr); | 3831 HandleVList(instr); |
3832 break; | 3832 break; |
3833 default: | 3833 default: |
3834 UNIMPLEMENTED(); // Not used by V8. | 3834 UNIMPLEMENTED(); // Not used by V8. |
3835 } | 3835 } |
3836 } else { | 3836 } else { |
3837 UNIMPLEMENTED(); // Not used by V8. | 3837 UNIMPLEMENTED(); // Not used by V8. |
3838 } | 3838 } |
3839 } | 3839 } |
3840 | 3840 |
3841 #define HIGH_16(x) ((x) >> 16) | |
3842 #define LOW_16(x) ((x)&0xFFFFu) | |
3843 #define COMBINE_32(high, low) ((high) << 16 | (low)&0xFFFFu) | |
3844 #define PAIRWISE_OP(x, y, OP) \ | |
3845 COMBINE_32(OP(HIGH_16((x)), HIGH_16((y))), OP(LOW_16((x)), LOW_16((y)))) | |
3846 | |
3847 #define ADD_16(x, y) ((x) + (y)) | |
3848 #define SUB_16(x, y) ((x) - (y)) | |
3849 #define CEQ_16(x, y) ((x) == (y) ? 0xFFFFu : 0) | |
3850 #define TST_16(x, y) (((x) & (y)) != 0 ? 0xFFFFu : 0) | |
3851 | |
3852 void Simulator::DecodeSpecialCondition(Instruction* instr) { | 3841 void Simulator::DecodeSpecialCondition(Instruction* instr) { |
3853 switch (instr->SpecialValue()) { | 3842 switch (instr->SpecialValue()) { |
3854 case 4: | 3843 case 4: |
3855 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 && | 3844 if (instr->Bits(21, 20) == 2 && instr->Bits(11, 8) == 1 && |
3856 instr->Bit(4) == 1) { | 3845 instr->Bit(4) == 1) { |
3857 // vmov Qd, Qm | 3846 // vmov Qd, Qm |
3858 int Vd = instr->VFPDRegValue(kSimd128Precision); | 3847 int Vd = instr->VFPDRegValue(kSimd128Precision); |
3859 int Vm = instr->VFPMRegValue(kSimd128Precision); | 3848 int Vm = instr->VFPMRegValue(kSimd128Precision); |
3860 uint32_t data[4]; | 3849 uint32_t data[4]; |
3861 get_q_register(Vm, data); | 3850 get_q_register(Vm, data); |
(...skipping 12 matching lines...) Expand all Loading... |
3874 switch (size) { | 3863 switch (size) { |
3875 case Neon8: { | 3864 case Neon8: { |
3876 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); | 3865 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
3877 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); | 3866 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
3878 for (int i = 0; i < 16; i++) { | 3867 for (int i = 0; i < 16; i++) { |
3879 s1[i] += s2[i]; | 3868 s1[i] += s2[i]; |
3880 } | 3869 } |
3881 break; | 3870 break; |
3882 } | 3871 } |
3883 case Neon16: { | 3872 case Neon16: { |
3884 for (int i = 0; i < 4; i++) { | 3873 uint16_t s1[8], s2[8]; |
3885 src1[i] = PAIRWISE_OP(src1[i], src2[i], ADD_16); | 3874 memcpy(s1, src1, sizeof(s1)); |
| 3875 memcpy(s2, src2, sizeof(s2)); |
| 3876 for (int i = 0; i < 8; i++) { |
| 3877 s1[i] += s2[i]; |
3886 } | 3878 } |
| 3879 memcpy(src1, s1, sizeof(src1)); |
3887 break; | 3880 break; |
3888 } | 3881 } |
3889 case Neon32: { | 3882 case Neon32: { |
3890 for (int i = 0; i < 4; i++) { | 3883 for (int i = 0; i < 4; i++) { |
3891 src1[i] += src2[i]; | 3884 src1[i] += src2[i]; |
3892 } | 3885 } |
3893 break; | 3886 break; |
3894 } | 3887 } |
3895 default: | 3888 default: |
3896 UNREACHABLE(); | 3889 UNREACHABLE(); |
3897 break; | 3890 break; |
3898 } | 3891 } |
3899 } else { | 3892 } else { |
3900 // vtst.i<size> Qd, Qm, Qn. | 3893 // vtst.i<size> Qd, Qm, Qn. |
3901 switch (size) { | 3894 switch (size) { |
3902 case Neon8: { | 3895 case Neon8: { |
3903 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); | 3896 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
3904 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); | 3897 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
3905 for (int i = 0; i < 16; i++) { | 3898 for (int i = 0; i < 16; i++) { |
3906 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0; | 3899 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFu : 0; |
3907 } | 3900 } |
3908 break; | 3901 break; |
3909 } | 3902 } |
3910 case Neon16: { | 3903 case Neon16: { |
3911 for (int i = 0; i < 4; i++) { | 3904 uint16_t s1[8], s2[8]; |
3912 src1[i] = PAIRWISE_OP(src1[i], src2[i], TST_16); | 3905 memcpy(s1, src1, sizeof(s1)); |
| 3906 memcpy(s2, src2, sizeof(s2)); |
| 3907 for (int i = 0; i < 8; i++) { |
| 3908 s1[i] = (s1[i] & s2[i]) != 0 ? 0xFFFFu : 0; |
3913 } | 3909 } |
| 3910 memcpy(src1, s1, sizeof(src1)); |
3914 break; | 3911 break; |
3915 } | 3912 } |
3916 case Neon32: { | 3913 case Neon32: { |
3917 for (int i = 0; i < 4; i++) { | 3914 for (int i = 0; i < 4; i++) { |
3918 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; | 3915 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0; |
3919 } | 3916 } |
3920 break; | 3917 break; |
3921 } | 3918 } |
3922 default: | 3919 default: |
3923 UNREACHABLE(); | 3920 UNREACHABLE(); |
(...skipping 14 matching lines...) Expand all Loading... |
3938 // vadd.f32 Qd, Qm, Qn. | 3935 // vadd.f32 Qd, Qm, Qn. |
3939 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) + | 3936 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) + |
3940 bit_cast<float>(src2[i])); | 3937 bit_cast<float>(src2[i])); |
3941 } else { | 3938 } else { |
3942 // vsub.f32 Qd, Qm, Qn. | 3939 // vsub.f32 Qd, Qm, Qn. |
3943 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) - | 3940 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) - |
3944 bit_cast<float>(src2[i])); | 3941 bit_cast<float>(src2[i])); |
3945 } | 3942 } |
3946 } | 3943 } |
3947 set_q_register(Vd, src1); | 3944 set_q_register(Vd, src1); |
| 3945 } else if (instr->Bits(11, 8) == 0x9 && instr->Bit(6) == 1 && |
| 3946 instr->Bit(4) == 1) { |
| 3947 // vmul.i<size> Qd, Qm, Qn. |
| 3948 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); |
| 3949 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 3950 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 3951 int Vn = instr->VFPNRegValue(kSimd128Precision); |
| 3952 uint32_t src1[4], src2[4]; |
| 3953 get_q_register(Vn, src1); |
| 3954 get_q_register(Vm, src2); |
| 3955 switch (size) { |
| 3956 case Neon8: { |
| 3957 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| 3958 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| 3959 for (int i = 0; i < 16; i++) { |
| 3960 s1[i] *= s2[i]; |
| 3961 } |
| 3962 break; |
| 3963 } |
| 3964 case Neon16: { |
| 3965 uint16_t s1[8], s2[8]; |
| 3966 memcpy(s1, src1, sizeof(s1)); |
| 3967 memcpy(s2, src2, sizeof(s2)); |
| 3968 for (int i = 0; i < 8; i++) { |
| 3969 s1[i] *= s2[i]; |
| 3970 } |
| 3971 memcpy(src1, s1, sizeof(src1)); |
| 3972 break; |
| 3973 } |
| 3974 case Neon32: { |
| 3975 for (int i = 0; i < 4; i++) { |
| 3976 src1[i] *= src2[i]; |
| 3977 } |
| 3978 break; |
| 3979 } |
| 3980 default: |
| 3981 UNIMPLEMENTED(); |
| 3982 break; |
| 3983 } |
| 3984 set_q_register(Vd, src1); |
3948 } else { | 3985 } else { |
3949 UNIMPLEMENTED(); | 3986 UNIMPLEMENTED(); |
3950 } | 3987 } |
3951 break; | 3988 break; |
3952 case 5: | 3989 case 5: |
3953 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && | 3990 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
3954 (instr->Bit(4) == 1)) { | 3991 (instr->Bit(4) == 1)) { |
3955 // vmovl signed | 3992 // vmovl signed |
3956 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); | 3993 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); |
3957 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); | 3994 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); |
3958 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); | 3995 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); |
3959 int imm3 = instr->Bits(21, 19); | 3996 int imm3 = instr->Bits(21, 19); |
3960 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); | 3997 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); |
3961 int esize = 8 * imm3; | 3998 int esize = 8 * imm3; |
3962 int elements = 64 / esize; | 3999 int elements = 64 / esize; |
3963 int8_t from[8]; | 4000 int8_t from[8]; |
3964 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); | 4001 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); |
3965 int16_t to[8]; | 4002 int16_t to[8]; |
3966 int e = 0; | 4003 int e = 0; |
3967 while (e < elements) { | 4004 while (e < elements) { |
3968 to[e] = from[e]; | 4005 to[e] = from[e]; |
3969 e++; | 4006 e++; |
3970 } | 4007 } |
3971 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); | 4008 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
| 4009 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { |
| 4010 // vext. |
| 4011 int imm4 = instr->Bits(11, 8); |
| 4012 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4013 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4014 int Vn = instr->VFPNRegValue(kSimd128Precision); |
| 4015 uint32_t src1[4], src2[4], dst[4]; |
| 4016 get_q_register(Vn, src1); |
| 4017 get_q_register(Vm, src2); |
| 4018 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| 4019 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| 4020 uint8_t* d = reinterpret_cast<uint8_t*>(dst); |
| 4021 int boundary = 16 - imm4; |
| 4022 int i = 0; |
| 4023 for (; i < boundary; i++) { |
| 4024 d[i] = s1[i + imm4]; |
| 4025 } |
| 4026 for (; i < 16; i++) { |
| 4027 d[i] = s2[i - boundary]; |
| 4028 } |
| 4029 set_q_register(Vd, dst); |
3972 } else { | 4030 } else { |
3973 UNIMPLEMENTED(); | 4031 UNIMPLEMENTED(); |
3974 } | 4032 } |
3975 break; | 4033 break; |
3976 case 6: | 4034 case 6: |
3977 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { | 4035 if (instr->Bits(11, 8) == 8 && instr->Bit(4) == 0) { |
3978 // vsub.size Qd, Qm, Qn. | 4036 // vsub.size Qd, Qm, Qn. |
3979 int size = static_cast<NeonSize>(instr->Bits(21, 20)); | 4037 int size = static_cast<NeonSize>(instr->Bits(21, 20)); |
3980 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4038 int Vd = instr->VFPDRegValue(kSimd128Precision); |
3981 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4039 int Vm = instr->VFPMRegValue(kSimd128Precision); |
3982 int Vn = instr->VFPNRegValue(kSimd128Precision); | 4040 int Vn = instr->VFPNRegValue(kSimd128Precision); |
3983 uint32_t src1[4], src2[4]; | 4041 uint32_t src1[4], src2[4]; |
3984 get_q_register(Vn, src1); | 4042 get_q_register(Vn, src1); |
3985 get_q_register(Vm, src2); | 4043 get_q_register(Vm, src2); |
3986 switch (size) { | 4044 switch (size) { |
3987 case Neon8: { | 4045 case Neon8: { |
3988 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); | 4046 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
3989 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); | 4047 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
3990 for (int i = 0; i < 16; i++) { | 4048 for (int i = 0; i < 16; i++) { |
3991 s1[i] -= s2[i]; | 4049 s1[i] -= s2[i]; |
3992 } | 4050 } |
3993 break; | 4051 break; |
3994 } | 4052 } |
3995 case Neon16: { | 4053 case Neon16: { |
3996 for (int i = 0; i < 4; i++) { | 4054 uint16_t s1[8], s2[8]; |
3997 src1[i] = PAIRWISE_OP(src1[i], src2[i], SUB_16); | 4055 memcpy(s1, src1, sizeof(s1)); |
| 4056 memcpy(s2, src2, sizeof(s2)); |
| 4057 for (int i = 0; i < 8; i++) { |
| 4058 s1[i] -= s2[i]; |
3998 } | 4059 } |
| 4060 memcpy(src1, s1, sizeof(src1)); |
3999 break; | 4061 break; |
4000 } | 4062 } |
4001 case Neon32: { | 4063 case Neon32: { |
4002 for (int i = 0; i < 4; i++) { | 4064 for (int i = 0; i < 4; i++) { |
4003 src1[i] -= src2[i]; | 4065 src1[i] -= src2[i]; |
4004 } | 4066 } |
4005 break; | 4067 break; |
4006 } | 4068 } |
4007 default: | 4069 default: |
4008 UNREACHABLE(); | 4070 UNREACHABLE(); |
(...skipping 12 matching lines...) Expand all Loading... |
4021 switch (size) { | 4083 switch (size) { |
4022 case Neon8: { | 4084 case Neon8: { |
4023 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); | 4085 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
4024 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); | 4086 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
4025 for (int i = 0; i < 16; i++) { | 4087 for (int i = 0; i < 16; i++) { |
4026 s1[i] = s1[i] == s2[i] ? 0xFF : 0; | 4088 s1[i] = s1[i] == s2[i] ? 0xFF : 0; |
4027 } | 4089 } |
4028 break; | 4090 break; |
4029 } | 4091 } |
4030 case Neon16: { | 4092 case Neon16: { |
4031 for (int i = 0; i < 4; i++) { | 4093 uint16_t s1[8], s2[8]; |
4032 src1[i] = PAIRWISE_OP(src1[i], src2[i], CEQ_16); | 4094 memcpy(s1, src1, sizeof(s1)); |
| 4095 memcpy(s2, src2, sizeof(s2)); |
| 4096 for (int i = 0; i < 8; i++) { |
| 4097 s1[i] = s1[i] == s2[i] ? 0xffffu : 0; |
4033 } | 4098 } |
| 4099 memcpy(src1, s1, sizeof(src1)); |
4034 break; | 4100 break; |
4035 } | 4101 } |
4036 case Neon32: { | 4102 case Neon32: { |
4037 for (int i = 0; i < 4; i++) { | 4103 for (int i = 0; i < 4; i++) { |
4038 src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0; | 4104 src1[i] = src1[i] == src2[i] ? 0xFFFFFFFF : 0; |
4039 } | 4105 } |
4040 break; | 4106 break; |
4041 } | 4107 } |
4042 default: | 4108 default: |
4043 UNREACHABLE(); | 4109 UNREACHABLE(); |
(...skipping 14 matching lines...) Expand all Loading... |
4058 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); | 4124 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); |
4059 } | 4125 } |
4060 set_q_register(Vd, dst); | 4126 set_q_register(Vd, dst); |
4061 } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && | 4127 } else if (instr->Bits(21, 20) == 0 && instr->Bits(11, 8) == 1 && |
4062 instr->Bit(4) == 1) { | 4128 instr->Bit(4) == 1) { |
4063 if (instr->Bit(6) == 0) { | 4129 if (instr->Bit(6) == 0) { |
4064 // veor Dd, Dn, Dm | 4130 // veor Dd, Dn, Dm |
4065 int Vd = instr->VFPDRegValue(kDoublePrecision); | 4131 int Vd = instr->VFPDRegValue(kDoublePrecision); |
4066 int Vn = instr->VFPNRegValue(kDoublePrecision); | 4132 int Vn = instr->VFPNRegValue(kDoublePrecision); |
4067 int Vm = instr->VFPMRegValue(kDoublePrecision); | 4133 int Vm = instr->VFPMRegValue(kDoublePrecision); |
4068 uint64_t n_data, m_data; | 4134 uint64_t src1, src2; |
4069 get_d_register(Vn, &n_data); | 4135 get_d_register(Vn, &src1); |
4070 get_d_register(Vm, &m_data); | 4136 get_d_register(Vm, &src2); |
4071 n_data ^= m_data; | 4137 src1 ^= src2; |
4072 set_d_register(Vd, &n_data); | 4138 set_d_register(Vd, &src1); |
4073 | 4139 |
4074 } else { | 4140 } else { |
4075 // veor Qd, Qn, Qm | 4141 // veor Qd, Qn, Qm |
4076 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4142 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4077 int Vn = instr->VFPNRegValue(kSimd128Precision); | 4143 int Vn = instr->VFPNRegValue(kSimd128Precision); |
4078 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4144 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4079 uint32_t n_data[4], m_data[4]; | 4145 uint32_t src1[4], src2[4]; |
4080 get_q_register(Vn, n_data); | 4146 get_q_register(Vn, src1); |
4081 get_q_register(Vm, m_data); | 4147 get_q_register(Vm, src2); |
4082 for (int i = 0; i < 4; i++) n_data[i] ^= m_data[i]; | 4148 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; |
4083 set_q_register(Vd, n_data); | 4149 set_q_register(Vd, src1); |
4084 } | 4150 } |
| 4151 } else if (instr->Bit(21) == 0 && instr->Bits(11, 8) == 0xd && |
| 4152 instr->Bit(6) == 1 && instr->Bit(4) == 1) { |
| 4153 // vmul.f32 Qd, Qn, Qm |
| 4154 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4155 int Vn = instr->VFPNRegValue(kSimd128Precision); |
| 4156 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4157 uint32_t src1[4], src2[4]; |
| 4158 get_q_register(Vn, src1); |
| 4159 get_q_register(Vm, src2); |
| 4160 for (int i = 0; i < 4; i++) { |
| 4161 src1[i] = bit_cast<uint32_t>(bit_cast<float>(src1[i]) * |
| 4162 bit_cast<float>(src2[i])); |
| 4163 } |
| 4164 set_q_register(Vd, src1); |
4085 } else { | 4165 } else { |
4086 UNIMPLEMENTED(); | 4166 UNIMPLEMENTED(); |
4087 } | 4167 } |
4088 break; | 4168 break; |
4089 case 7: | 4169 case 7: |
4090 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && | 4170 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && |
4091 (instr->Bit(4) == 1)) { | 4171 (instr->Bit(4) == 1)) { |
4092 // vmovl unsigned | 4172 // vmovl unsigned |
4093 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); | 4173 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); |
4094 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); | 4174 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); |
4095 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); | 4175 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); |
4096 int imm3 = instr->Bits(21, 19); | 4176 int imm3 = instr->Bits(21, 19); |
4097 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); | 4177 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); |
4098 int esize = 8 * imm3; | 4178 int esize = 8 * imm3; |
4099 int elements = 64 / esize; | 4179 int elements = 64 / esize; |
4100 uint8_t from[8]; | 4180 uint8_t from[8]; |
4101 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); | 4181 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); |
4102 uint16_t to[8]; | 4182 uint16_t to[8]; |
4103 int e = 0; | 4183 int e = 0; |
4104 while (e < elements) { | 4184 while (e < elements) { |
4105 to[e] = from[e]; | 4185 to[e] = from[e]; |
4106 e++; | 4186 e++; |
4107 } | 4187 } |
4108 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); | 4188 set_q_register(Vd, reinterpret_cast<uint64_t*>(to)); |
4109 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0xB && | 4189 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { |
4110 instr->Bits(11, 9) == 0x3 && instr->Bit(6) == 1 && | 4190 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && |
4111 instr->Bit(4) == 0) { | 4191 instr->Bit(6) == 1) { |
4112 // vcvt.<Td>.<Tm> Qd, Qm. | 4192 // vcvt.<Td>.<Tm> Qd, Qm. |
4113 int Vd = instr->VFPDRegValue(kSimd128Precision); | 4193 int Vd = instr->VFPDRegValue(kSimd128Precision); |
4114 int Vm = instr->VFPMRegValue(kSimd128Precision); | 4194 int Vm = instr->VFPMRegValue(kSimd128Precision); |
4115 uint32_t q_data[4]; | 4195 uint32_t q_data[4]; |
4116 get_q_register(Vm, q_data); | 4196 get_q_register(Vm, q_data); |
4117 int op = instr->Bits(8, 7); | 4197 int op = instr->Bits(8, 7); |
4118 for (int i = 0; i < 4; i++) { | 4198 for (int i = 0; i < 4; i++) { |
4119 switch (op) { | 4199 switch (op) { |
4120 case 0: | 4200 case 0: |
4121 // f32 <- s32, round towards nearest. | 4201 // f32 <- s32, round towards nearest. |
4122 q_data[i] = bit_cast<uint32_t>( | 4202 q_data[i] = bit_cast<uint32_t>(std::round( |
4123 std::round(static_cast<float>(bit_cast<int32_t>(q_data[i])))); | 4203 static_cast<float>(bit_cast<int32_t>(q_data[i])))); |
4124 break; | 4204 break; |
4125 case 1: | 4205 case 1: |
4126 // f32 <- u32, round towards nearest. | 4206 // f32 <- u32, round towards nearest. |
4127 q_data[i] = | 4207 q_data[i] = bit_cast<uint32_t>( |
4128 bit_cast<uint32_t>(std::round(static_cast<float>(q_data[i]))); | 4208 std::round(static_cast<float>(q_data[i]))); |
4129 break; | 4209 break; |
4130 case 2: | 4210 case 2: |
4131 // s32 <- f32, round to zero. | 4211 // s32 <- f32, round to zero. |
4132 q_data[i] = static_cast<uint32_t>( | 4212 q_data[i] = static_cast<uint32_t>( |
4133 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); | 4213 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); |
4134 break; | 4214 break; |
4135 case 3: | 4215 case 3: |
4136 // u32 <- f32, round to zero. | 4216 // u32 <- f32, round to zero. |
4137 q_data[i] = static_cast<uint32_t>( | 4217 q_data[i] = static_cast<uint32_t>( |
4138 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); | 4218 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); |
4139 break; | 4219 break; |
4140 } | 4220 } |
4141 } | 4221 } |
4142 set_q_register(Vd, q_data); | 4222 set_q_register(Vd, q_data); |
4143 } else if ((instr->Bits(21, 16) == 0x32) && (instr->Bits(11, 7) == 0) && | 4223 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { |
4144 (instr->Bit(4) == 0)) { | 4224 if (instr->Bit(6) == 0) { |
4145 if (instr->Bit(6) == 0) { | 4225 // vswp Dd, Dm. |
4146 // vswp Dd, Dm. | 4226 uint64_t dval, mval; |
4147 uint64_t dval, mval; | 4227 int vd = instr->VFPDRegValue(kDoublePrecision); |
4148 int vd = instr->VFPDRegValue(kDoublePrecision); | 4228 int vm = instr->VFPMRegValue(kDoublePrecision); |
| 4229 get_d_register(vd, &dval); |
| 4230 get_d_register(vm, &mval); |
| 4231 set_d_register(vm, &dval); |
| 4232 set_d_register(vd, &mval); |
| 4233 } else { |
| 4234 // vswp Qd, Qm. |
| 4235 uint32_t dval[4], mval[4]; |
| 4236 int vd = instr->VFPDRegValue(kSimd128Precision); |
| 4237 int vm = instr->VFPMRegValue(kSimd128Precision); |
| 4238 get_q_register(vd, dval); |
| 4239 get_q_register(vm, mval); |
| 4240 set_q_register(vm, dval); |
| 4241 set_q_register(vd, mval); |
| 4242 } |
| 4243 } else if (instr->Bits(11, 7) == 0x18) { |
| 4244 // vdup.32 Qd, Sm. |
| 4245 int vd = instr->VFPDRegValue(kSimd128Precision); |
4149 int vm = instr->VFPMRegValue(kDoublePrecision); | 4246 int vm = instr->VFPMRegValue(kDoublePrecision); |
4150 get_d_register(vd, &dval); | 4247 int index = instr->Bit(19); |
4151 get_d_register(vm, &mval); | 4248 uint32_t s_data = get_s_register(vm * 2 + index); |
4152 set_d_register(vm, &dval); | 4249 uint32_t q_data[4]; |
4153 set_d_register(vd, &mval); | 4250 for (int i = 0; i < 4; i++) q_data[i] = s_data; |
4154 } else { | 4251 set_q_register(vd, q_data); |
4155 // vswp Qd, Qm. | 4252 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { |
4156 uint32_t dval[4], mval[4]; | 4253 // vmvn Qd, Qm. |
4157 int vd = instr->VFPDRegValue(kSimd128Precision); | 4254 int vd = instr->VFPDRegValue(kSimd128Precision); |
4158 int vm = instr->VFPMRegValue(kSimd128Precision); | 4255 int vm = instr->VFPMRegValue(kSimd128Precision); |
4159 get_q_register(vd, dval); | 4256 uint32_t q_data[4]; |
4160 get_q_register(vm, mval); | 4257 get_q_register(vm, q_data); |
4161 set_q_register(vm, dval); | 4258 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; |
4162 set_q_register(vd, mval); | 4259 set_q_register(vd, q_data); |
| 4260 } else if (instr->Bits(11, 10) == 0x2) { |
| 4261 // vtb[l,x] Dd, <list>, Dm. |
| 4262 int vd = instr->VFPDRegValue(kDoublePrecision); |
| 4263 int vn = instr->VFPNRegValue(kDoublePrecision); |
| 4264 int vm = instr->VFPMRegValue(kDoublePrecision); |
| 4265 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; |
| 4266 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx |
| 4267 uint64_t destination = 0, indices = 0, result = 0; |
| 4268 get_d_register(vd, &destination); |
| 4269 get_d_register(vm, &indices); |
| 4270 for (int i = 0; i < kDoubleSize; i++) { |
| 4271 int shift = i * kBitsPerByte; |
| 4272 int index = (indices >> shift) & 0xFF; |
| 4273 if (index < table_len) { |
| 4274 uint64_t table; |
| 4275 get_d_register(vn + index / kDoubleSize, &table); |
| 4276 result |= |
| 4277 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) |
| 4278 << shift; |
| 4279 } else if (vtbx) { |
| 4280 result |= destination & (0xFFull << shift); |
| 4281 } |
| 4282 } |
| 4283 set_d_register(vd, &result); |
| 4284 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x7) { |
| 4285 // vzip.<size> Qd, Qm. |
| 4286 int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 4287 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4288 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4289 uint32_t src1[4], src2[4], dst1[4], dst2[4]; |
| 4290 get_q_register(Vd, src1); |
| 4291 get_q_register(Vm, src2); |
| 4292 switch (size) { |
| 4293 case Neon8: { |
| 4294 uint8_t* s1 = reinterpret_cast<uint8_t*>(src1); |
| 4295 uint8_t* s2 = reinterpret_cast<uint8_t*>(src2); |
| 4296 uint8_t* d1 = reinterpret_cast<uint8_t*>(dst1); |
| 4297 uint8_t* d2 = reinterpret_cast<uint8_t*>(dst2); |
| 4298 for (int i = 0; i < 8; i++) { |
| 4299 d1[i * 2] = s1[i]; |
| 4300 d1[i * 2 + 1] = s2[i]; |
| 4301 d2[i * 2] = s1[i + 8]; |
| 4302 d2[i * 2 + 1] = s2[i + 8]; |
| 4303 } |
| 4304 break; |
| 4305 } |
| 4306 case Neon16: { |
| 4307 uint16_t s1[8], s2[8], d1[8], d2[8]; |
| 4308 memcpy(s1, src1, sizeof(s1)); |
| 4309 memcpy(s2, src2, sizeof(s2)); |
| 4310 for (int i = 0; i < 8; i += 2) { |
| 4311 d1[i] = s1[i / 2]; |
| 4312 d1[i + 1] = s2[i / 2]; |
| 4313 d2[i] = s1[i / 2 + 4]; |
| 4314 d2[i + 1] = s2[i / 2 + 4]; |
| 4315 } |
| 4316 memcpy(dst1, d1, sizeof(dst1)); |
| 4317 memcpy(dst2, d2, sizeof(dst2)); |
| 4318 break; |
| 4319 } |
| 4320 case Neon32: { |
| 4321 for (int i = 0; i < 2; i++) { |
| 4322 dst1[i * 2] = src1[i]; |
| 4323 dst1[i * 2 + 1] = src2[i]; |
| 4324 dst2[i * 2] = src1[i + 2]; |
| 4325 dst2[i * 2 + 1] = src2[i + 2]; |
| 4326 } |
| 4327 break; |
| 4328 } |
| 4329 default: |
| 4330 UNREACHABLE(); |
| 4331 break; |
| 4332 } |
| 4333 set_q_register(Vd, dst1); |
| 4334 set_q_register(Vm, dst2); |
| 4335 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { |
| 4336 // vrev<op>.size Qd, Qm |
| 4337 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4338 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4339 int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 4340 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - |
| 4341 instr->Bits(8, 7)); |
| 4342 uint32_t src[4]; |
| 4343 get_q_register(Vm, src); |
| 4344 switch (op) { |
| 4345 case Neon16: { |
| 4346 DCHECK_EQ(Neon8, size); |
| 4347 uint8_t* s = reinterpret_cast<uint8_t*>(src); |
| 4348 for (int i = 0; i < 16; i += 2) { |
| 4349 std::swap(s[i], s[i + 1]); |
| 4350 } |
| 4351 break; |
| 4352 } |
| 4353 case Neon32: { |
| 4354 switch (size) { |
| 4355 case Neon16: |
| 4356 for (int i = 0; i < 4; i++) { |
| 4357 src[i] = (src[i] >> 16) | (src[i] << 16); |
| 4358 } |
| 4359 break; |
| 4360 case Neon8: { |
| 4361 uint8_t* s = reinterpret_cast<uint8_t*>(src); |
| 4362 for (int i = 0; i < 4; i++) { |
| 4363 std::swap(s[i * 4], s[i * 4 + 3]); |
| 4364 std::swap(s[i * 4 + 1], s[i * 4 + 2]); |
| 4365 } |
| 4366 break; |
| 4367 } |
| 4368 default: |
| 4369 UNREACHABLE(); |
| 4370 break; |
| 4371 } |
| 4372 break; |
| 4373 } |
| 4374 case Neon64: { |
| 4375 switch (size) { |
| 4376 case Neon32: { |
| 4377 std::swap(src[0], src[1]); |
| 4378 std::swap(src[2], src[3]); |
| 4379 break; |
| 4380 } |
| 4381 case Neon16: { |
| 4382 for (int i = 0; i <= 2; i += 2) { |
| 4383 uint32_t w1 = src[i]; |
| 4384 uint32_t w2 = src[i + 1]; |
| 4385 src[i] = (w2 >> 16) | (w2 << 16); |
| 4386 src[i + 1] = (w1 >> 16) | (w1 << 16); |
| 4387 } |
| 4388 break; |
| 4389 } |
| 4390 case Neon8: { |
| 4391 uint8_t* s = reinterpret_cast<uint8_t*>(src); |
| 4392 for (int i = 0; i < 4; i++) { |
| 4393 std::swap(s[i], s[7 - i]); |
| 4394 std::swap(s[i + 8], s[15 - i]); |
| 4395 } |
| 4396 break; |
| 4397 } |
| 4398 default: |
| 4399 UNREACHABLE(); |
| 4400 break; |
| 4401 } |
| 4402 break; |
| 4403 } |
| 4404 default: |
| 4405 UNREACHABLE(); |
| 4406 break; |
| 4407 } |
| 4408 set_q_register(Vd, src); |
| 4409 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { |
| 4410 int Vd = instr->VFPDRegValue(kSimd128Precision); |
| 4411 int Vm = instr->VFPMRegValue(kSimd128Precision); |
| 4412 int size = static_cast<NeonSize>(instr->Bits(19, 18)); |
| 4413 uint32_t src[4]; |
| 4414 get_q_register(Vm, src); |
| 4415 if (instr->Bits(9, 6) == 0xd) { |
| 4416 // vabs<type>.<size> Qd, Qm |
| 4417 if (instr->Bit(10) != 0) { |
| 4418 // floating point (clear sign bits) |
| 4419 for (int i = 0; i < 4; i++) { |
| 4420 src[i] &= ~0x80000000; |
| 4421 } |
| 4422 } else { |
| 4423 // signed integer |
| 4424 switch (size) { |
| 4425 case Neon8: { |
| 4426 int8_t* s = reinterpret_cast<int8_t*>(src); |
| 4427 for (int i = 0; i < 16; i++) { |
| 4428 s[i] = std::abs(s[i]); |
| 4429 } |
| 4430 break; |
| 4431 } |
| 4432 case Neon16: { |
| 4433 int16_t s[8]; |
| 4434 memcpy(s, src, sizeof(s)); |
| 4435 for (int i = 0; i < 8; i++) { |
| 4436 s[i] = std::abs(s[i]); |
| 4437 } |
| 4438 memcpy(src, s, sizeof(src)); |
| 4439 break; |
| 4440 } |
| 4441 case Neon32: { |
| 4442 int32_t* as_signed = reinterpret_cast<int32_t*>(src); |
| 4443 for (int i = 0; i < 4; i++) { |
| 4444 as_signed[i] = std::abs(as_signed[i]); |
| 4445 } |
| 4446 break; |
| 4447 } |
| 4448 default: |
| 4449 UNIMPLEMENTED(); |
| 4450 break; |
| 4451 } |
| 4452 } |
| 4453 } else if (instr->Bits(9, 6) == 0xf) { |
| 4454 // vneg<type>.<size> Qd, Qm (signed integer) |
| 4455 if (instr->Bit(10) != 0) { |
| 4456 // floating point (toggle sign bits) |
| 4457 for (int i = 0; i < 4; i++) { |
| 4458 src[i] ^= 0x80000000; |
| 4459 } |
| 4460 } else { |
| 4461 // signed integer |
| 4462 switch (size) { |
| 4463 case Neon8: { |
| 4464 int8_t* s = reinterpret_cast<int8_t*>(src); |
| 4465 for (int i = 0; i < 16; i++) { |
| 4466 s[i] = -s[i]; |
| 4467 } |
| 4468 break; |
| 4469 } |
| 4470 case Neon16: |
| 4471 int16_t s[8]; |
| 4472 memcpy(s, src, sizeof(s)); |
| 4473 for (int i = 0; i < 8; i++) { |
| 4474 s[i] = -s[i]; |
| 4475 } |
| 4476 memcpy(src, s, sizeof(src)); |
| 4477 break; |
| 4478 case Neon32: { |
| 4479 int32_t* as_signed = reinterpret_cast<int32_t*>(src); |
| 4480 for (int i = 0; i < 4; i++) { |
| 4481 as_signed[i] = -as_signed[i]; |
| 4482 } |
| 4483 break; |
| 4484 } |
| 4485 default: |
| 4486 UNIMPLEMENTED(); |
| 4487 break; |
| 4488 } |
| 4489 } |
| 4490 } else { |
| 4491 UNIMPLEMENTED(); |
| 4492 } |
| 4493 set_q_register(Vd, src); |
| 4494 } else { |
| 4495 UNIMPLEMENTED(); |
4163 } | 4496 } |
4164 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 7) == 0x18 && | |
4165 instr->Bit(4) == 0x0) { | |
4166 // vdup.32 Qd, Sm. | |
4167 int vd = instr->VFPDRegValue(kSimd128Precision); | |
4168 int vm = instr->VFPMRegValue(kDoublePrecision); | |
4169 int index = instr->Bit(19); | |
4170 uint32_t s_data = get_s_register(vm * 2 + index); | |
4171 uint32_t q_data[4]; | |
4172 for (int i = 0; i < 4; i++) q_data[i] = s_data; | |
4173 set_q_register(vd, q_data); | |
4174 } else if (instr->Opc1Value() == 7 && instr->Bits(19, 16) == 0 && | |
4175 instr->Bits(11, 6) == 0x17 && instr->Bit(4) == 0) { | |
4176 // vmvn Qd, Qm. | |
4177 int vd = instr->VFPDRegValue(kSimd128Precision); | |
4178 int vm = instr->VFPMRegValue(kSimd128Precision); | |
4179 uint32_t q_data[4]; | |
4180 get_q_register(vm, q_data); | |
4181 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; | |
4182 set_q_register(vd, q_data); | |
4183 } else if (instr->Opc1Value() == 0x7 && instr->Bits(11, 10) == 0x2 && | |
4184 instr->Bit(4) == 0x0) { | |
4185 // vtb[l,x] Dd, <list>, Dm. | |
4186 int vd = instr->VFPDRegValue(kDoublePrecision); | |
4187 int vn = instr->VFPNRegValue(kDoublePrecision); | |
4188 int vm = instr->VFPMRegValue(kDoublePrecision); | |
4189 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; | |
4190 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx | |
4191 uint64_t destination = 0, indices = 0, result = 0; | |
4192 get_d_register(vd, &destination); | |
4193 get_d_register(vm, &indices); | |
4194 for (int i = 0; i < kDoubleSize; i++) { | |
4195 int shift = i * kBitsPerByte; | |
4196 int index = (indices >> shift) & 0xFF; | |
4197 if (index < table_len) { | |
4198 uint64_t table; | |
4199 get_d_register(vn + index / kDoubleSize, &table); | |
4200 result |= ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) | |
4201 << shift; | |
4202 } else if (vtbx) { | |
4203 result |= destination & (0xFFull << shift); | |
4204 } | |
4205 } | |
4206 set_d_register(vd, &result); | |
4207 } else { | |
4208 UNIMPLEMENTED(); | |
4209 } | 4497 } |
4210 break; | 4498 break; |
4211 case 8: | 4499 case 8: |
4212 if (instr->Bits(21, 20) == 0) { | 4500 if (instr->Bits(21, 20) == 0) { |
4213 // vst1 | 4501 // vst1 |
4214 int Vd = (instr->Bit(22) << 4) | instr->VdValue(); | 4502 int Vd = (instr->Bit(22) << 4) | instr->VdValue(); |
4215 int Rn = instr->VnValue(); | 4503 int Rn = instr->VnValue(); |
4216 int type = instr->Bits(11, 8); | 4504 int type = instr->Bits(11, 8); |
4217 int Rm = instr->VmValue(); | 4505 int Rm = instr->VmValue(); |
4218 int32_t address = get_register(Rn); | 4506 int32_t address = get_register(Rn); |
(...skipping 498 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4717 set_register(sp, current_sp + sizeof(uintptr_t)); | 5005 set_register(sp, current_sp + sizeof(uintptr_t)); |
4718 return address; | 5006 return address; |
4719 } | 5007 } |
4720 | 5008 |
4721 } // namespace internal | 5009 } // namespace internal |
4722 } // namespace v8 | 5010 } // namespace v8 |
4723 | 5011 |
4724 #endif // USE_SIMULATOR | 5012 #endif // USE_SIMULATOR |
4725 | 5013 |
4726 #endif // V8_TARGET_ARCH_ARM | 5014 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |