Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2797923006: [ARM] Implement D-register versions of vzip, vuzp, and vtrn. (Closed)
Patch Set: Remove tests for vzip.32 / vuzp.32 - these are pseudo ops for vtrn.32. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 878 matching lines...) Expand 10 before | Expand all | Expand 10 after
889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 889 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2); 890 memcpy(value, vfp_registers_ + dreg * 2, sizeof(*value) * 2);
891 } 891 }
892 892
893 893
894 void Simulator::set_d_register(int dreg, const uint32_t* value) { 894 void Simulator::set_d_register(int dreg, const uint32_t* value) {
895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters())); 895 DCHECK((dreg >= 0) && (dreg < DwVfpRegister::NumRegisters()));
896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2); 896 memcpy(vfp_registers_ + dreg * 2, value, sizeof(*value) * 2);
897 } 897 }
898 898
899 template <typename T> 899 template <typename T, int SIZE>
900 void Simulator::get_d_register(int dreg, T* value) { 900 void Simulator::get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]) {
901 DCHECK((dreg >= 0) && (dreg < num_d_registers)); 901 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
902 memcpy(value, vfp_registers_ + dreg * 2, kDoubleSize); 902 DCHECK_LE(0, reg);
903 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
904 memcpy(value, vfp_registers_ + reg * (SIZE / 4), SIZE);
903 } 905 }
904 906
905 template <typename T> 907 template <typename T, int SIZE>
906 void Simulator::set_d_register(int dreg, const T* value) { 908 void Simulator::set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]) {
907 DCHECK((dreg >= 0) && (dreg < num_d_registers)); 909 DCHECK(SIZE == kSimd128Size || SIZE == kDoubleSize);
908 memcpy(vfp_registers_ + dreg * 2, value, kDoubleSize); 910 DCHECK_LE(0, reg);
909 } 911 DCHECK_GT(SIZE == kSimd128Size ? num_q_registers : num_d_registers, reg);
910 912 memcpy(vfp_registers_ + reg * (SIZE / 4), value, SIZE);
911 template <typename T>
912 void Simulator::get_q_register(int qreg, T* value) {
913 DCHECK((qreg >= 0) && (qreg < num_q_registers));
914 memcpy(value, vfp_registers_ + qreg * 4, kSimd128Size);
915 }
916
917 template <typename T>
918 void Simulator::set_q_register(int qreg, const T* value) {
919 DCHECK((qreg >= 0) && (qreg < num_q_registers));
920 memcpy(vfp_registers_ + qreg * 4, value, kSimd128Size);
921 } 913 }
922 914
923 // Raw access to the PC register. 915 // Raw access to the PC register.
924 void Simulator::set_pc(int32_t value) { 916 void Simulator::set_pc(int32_t value) {
925 pc_modified_ = true; 917 pc_modified_ = true;
926 registers_[pc] = value; 918 registers_[pc] = value;
927 } 919 }
928 920
929 921
930 bool Simulator::has_bad_pc() const { 922 bool Simulator::has_bad_pc() const {
(...skipping 2570 matching lines...) Expand 10 before | Expand all | Expand 10 after
3501 case Neon32: { 3493 case Neon32: {
3502 for (int i = 0; i < 4; i++) { 3494 for (int i = 0; i < 4; i++) {
3503 q_data[i] = rt_value; 3495 q_data[i] = rt_value;
3504 } 3496 }
3505 break; 3497 break;
3506 } 3498 }
3507 default: 3499 default:
3508 UNREACHABLE(); 3500 UNREACHABLE();
3509 break; 3501 break;
3510 } 3502 }
3511 set_q_register(vd, q_data); 3503 set_neon_register(vd, q_data);
3512 } 3504 }
3513 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) { 3505 } else if ((instr->VLValue() == 0x1) && (instr->VCValue() == 0x1)) {
3514 // vmov (scalar to ARM core register) 3506 // vmov (scalar to ARM core register)
3515 int vn = instr->VFPNRegValue(kDoublePrecision); 3507 int vn = instr->VFPNRegValue(kDoublePrecision);
3516 int rt = instr->RtValue(); 3508 int rt = instr->RtValue();
3517 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5); 3509 int opc1_opc2 = (instr->Bits(22, 21) << 2) | instr->Bits(6, 5);
3518 uint64_t data; 3510 uint64_t data;
3519 get_d_register(vn, &data); 3511 get_d_register(vn, &data);
3520 if ((opc1_opc2 & 0xb) == 0) { 3512 if ((opc1_opc2 & 0xb) == 0) {
3521 // NeonS32 / NeonU32 3513 // NeonS32 / NeonU32
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
3988 break; 3980 break;
3989 default: 3981 default:
3990 UNIMPLEMENTED(); // Not used by V8. 3982 UNIMPLEMENTED(); // Not used by V8.
3991 } 3983 }
3992 } else { 3984 } else {
3993 UNIMPLEMENTED(); // Not used by V8. 3985 UNIMPLEMENTED(); // Not used by V8.
3994 } 3986 }
3995 } 3987 }
3996 3988
3997 // Templated operations for NEON instructions. 3989 // Templated operations for NEON instructions.
3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
3999 template <typename T, typename U> 3990 template <typename T, typename U>
4000 U Widen(T value) { 3991 U Widen(T value) {
4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 3992 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U"); 3993 static_assert(sizeof(U) > sizeof(T), "T must smaller than U");
4003 return static_cast<U>(value); 3994 return static_cast<U>(value);
4004 } 3995 }
4005 3996
4006 template <typename T, typename U> 3997 template <typename T, typename U>
4007 U Narrow(T value) { 3998 U Narrow(T value) {
4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger"); 3999 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");
4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U"); 4000 static_assert(sizeof(U) < sizeof(T), "T must larger than U");
4010 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(), 4001 static_assert(std::is_unsigned<T>() == std::is_unsigned<U>(),
4011 "Signed-ness of T and U must match"); 4002 "Signed-ness of T and U must match");
4012 // Make sure value can be expressed in the smaller type; otherwise, the 4003 // Make sure value can be expressed in the smaller type; otherwise, the
4013 // casted result is implementation defined. 4004 // casted result is implementation defined.
4014 DCHECK_LE(std::numeric_limits<T>::min(), value); 4005 DCHECK_LE(std::numeric_limits<T>::min(), value);
4015 DCHECK_GE(std::numeric_limits<T>::max(), value); 4006 DCHECK_GE(std::numeric_limits<T>::max(), value);
4016 return static_cast<U>(value); 4007 return static_cast<U>(value);
4017 } 4008 }
4018 4009
4019 template <typename T> 4010 template <typename T>
4020 T Clamp(int64_t value) { 4011 T Clamp(int64_t value) {
4021 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4012 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4022 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); 4013 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4023 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); 4014 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4024 int64_t clamped = std::max(min, std::min(max, value)); 4015 int64_t clamped = std::max(min, std::min(max, value));
4025 return static_cast<T>(clamped); 4016 return static_cast<T>(clamped);
4026 } 4017 }
4027 4018
4028 template <typename T>
4029 T MinMax(T a, T b, bool is_min) {
4030 return is_min ? std::min(a, b) : std::max(a, b);
4031 }
4032
4033 template <typename T, typename U> 4019 template <typename T, typename U>
4034 void Widen(Simulator* simulator, int Vd, int Vm) { 4020 void Widen(Simulator* simulator, int Vd, int Vm) {
4035 static const int kLanes = 8 / sizeof(T); 4021 static const int kLanes = 8 / sizeof(T);
4036 T src[kLanes]; 4022 T src[kLanes];
4037 U dst[kLanes]; 4023 U dst[kLanes];
4038 simulator->get_d_register(Vm, src); 4024 simulator->get_neon_register<T, kDoubleSize>(Vm, src);
4039 for (int i = 0; i < kLanes; i++) { 4025 for (int i = 0; i < kLanes; i++) {
4040 dst[i] = Widen<T, U>(src[i]); 4026 dst[i] = Widen<T, U>(src[i]);
4041 } 4027 }
4042 simulator->set_q_register(Vd, dst); 4028 simulator->set_neon_register(Vd, dst);
4029 }
4030
4031 template <typename T, int SIZE>
4032 void Abs(Simulator* simulator, int Vd, int Vm) {
4033 static const int kElems = SIZE / sizeof(T);
4034 T src[kElems];
4035 simulator->get_neon_register<T, SIZE>(Vm, src);
4036 for (int i = 0; i < kElems; i++) {
4037 src[i] = std::abs(src[i]);
4038 }
4039 simulator->set_neon_register<T, SIZE>(Vd, src);
4040 }
4041
4042 template <typename T, int SIZE>
4043 void Neg(Simulator* simulator, int Vd, int Vm) {
4044 static const int kElems = SIZE / sizeof(T);
4045 T src[kElems];
4046 simulator->get_neon_register<T, SIZE>(Vm, src);
4047 for (int i = 0; i < kElems; i++) {
4048 src[i] = -src[i];
4049 }
4050 simulator->set_neon_register<T, SIZE>(Vd, src);
4043 } 4051 }
4044 4052
4045 template <typename T, typename U> 4053 template <typename T, typename U>
4046 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) { 4054 void SaturatingNarrow(Simulator* simulator, int Vd, int Vm) {
4047 static const int kLanes = 16 / sizeof(T); 4055 static const int kLanes = 16 / sizeof(T);
4048 T src[kLanes]; 4056 T src[kLanes];
4049 U dst[kLanes]; 4057 U dst[kLanes];
4050 simulator->get_q_register(Vm, src); 4058 simulator->get_neon_register(Vm, src);
4051 for (int i = 0; i < kLanes; i++) { 4059 for (int i = 0; i < kLanes; i++) {
4052 dst[i] = Narrow<T, U>(Clamp<U>(src[i])); 4060 dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
4053 } 4061 }
4054 simulator->set_d_register(Vd, dst); 4062 simulator->set_neon_register<U, kDoubleSize>(Vd, dst);
4055 } 4063 }
4056 4064
4057 template <typename T> 4065 template <typename T>
4058 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4066 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4059 static const int kLanes = 16 / sizeof(T); 4067 static const int kLanes = 16 / sizeof(T);
4060 T src1[kLanes], src2[kLanes]; 4068 T src1[kLanes], src2[kLanes];
4061 simulator->get_q_register(Vn, src1); 4069 simulator->get_neon_register(Vn, src1);
4062 simulator->get_q_register(Vm, src2); 4070 simulator->get_neon_register(Vm, src2);
4063 for (int i = 0; i < kLanes; i++) { 4071 for (int i = 0; i < kLanes; i++) {
4064 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i])); 4072 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
4065 } 4073 }
4066 simulator->set_q_register(Vd, src1); 4074 simulator->set_neon_register(Vd, src1);
4067 } 4075 }
4068 4076
4069 template <typename T> 4077 template <typename T>
4070 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4078 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4071 static const int kLanes = 16 / sizeof(T); 4079 static const int kLanes = 16 / sizeof(T);
4072 T src1[kLanes], src2[kLanes]; 4080 T src1[kLanes], src2[kLanes];
4073 simulator->get_q_register(Vn, src1); 4081 simulator->get_neon_register(Vn, src1);
4074 simulator->get_q_register(Vm, src2); 4082 simulator->get_neon_register(Vm, src2);
4075 for (int i = 0; i < kLanes; i++) { 4083 for (int i = 0; i < kLanes; i++) {
4076 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i])); 4084 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
4077 } 4085 }
4078 simulator->set_q_register(Vd, src1); 4086 simulator->set_neon_register(Vd, src1);
4087 }
4088
4089 template <typename T, int SIZE>
4090 void Zip(Simulator* simulator, int Vd, int Vm) {
4091 static const int kElems = SIZE / sizeof(T);
4092 static const int kPairs = kElems / 2;
4093 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
4094 simulator->get_neon_register<T, SIZE>(Vd, src1);
4095 simulator->get_neon_register<T, SIZE>(Vm, src2);
4096 for (int i = 0; i < kPairs; i++) {
4097 dst1[i * 2] = src1[i];
4098 dst1[i * 2 + 1] = src2[i];
4099 dst2[i * 2] = src1[i + kPairs];
4100 dst2[i * 2 + 1] = src2[i + kPairs];
4101 }
4102 simulator->set_neon_register<T, SIZE>(Vd, dst1);
4103 simulator->set_neon_register<T, SIZE>(Vm, dst2);
4104 }
4105
4106 template <typename T, int SIZE>
4107 void Unzip(Simulator* simulator, int Vd, int Vm) {
4108 static const int kElems = SIZE / sizeof(T);
4109 static const int kPairs = kElems / 2;
4110 T src1[kElems], src2[kElems], dst1[kElems], dst2[kElems];
4111 simulator->get_neon_register<T, SIZE>(Vd, src1);
4112 simulator->get_neon_register<T, SIZE>(Vm, src2);
4113 for (int i = 0; i < kPairs; i++) {
4114 dst1[i] = src1[i * 2];
4115 dst1[i + kPairs] = src2[i * 2];
4116 dst2[i] = src1[i * 2 + 1];
4117 dst2[i + kPairs] = src2[i * 2 + 1];
4118 }
4119 simulator->set_neon_register<T, SIZE>(Vd, dst1);
4120 simulator->set_neon_register<T, SIZE>(Vm, dst2);
4121 }
4122
4123 template <typename T, int SIZE>
4124 void Transpose(Simulator* simulator, int Vd, int Vm) {
4125 static const int kElems = SIZE / sizeof(T);
4126 static const int kPairs = kElems / 2;
4127 T src1[kElems], src2[kElems];
4128 simulator->get_neon_register<T, SIZE>(Vd, src1);
4129 simulator->get_neon_register<T, SIZE>(Vm, src2);
4130 for (int i = 0; i < kPairs; i++) {
4131 std::swap(src1[2 * i + 1], src2[2 * i]);
4132 }
4133 simulator->set_neon_register<T, SIZE>(Vd, src1);
4134 simulator->set_neon_register<T, SIZE>(Vm, src2);
4135 }
4136
4137 template <typename T, int SIZE>
4138 void Test(Simulator* simulator, int Vd, int Vm, int Vn) {
4139 static const int kElems = SIZE / sizeof(T);
4140 T src1[kElems], src2[kElems];
4141 simulator->get_neon_register<T, SIZE>(Vn, src1);
4142 simulator->get_neon_register<T, SIZE>(Vm, src2);
4143 for (int i = 0; i < kElems; i++) {
4144 src1[i] = (src1[i] & src2[i]) != 0 ? -1 : 0;
4145 }
4146 simulator->set_neon_register<T, SIZE>(Vd, src1);
4147 }
4148
4149 template <typename T, int SIZE>
4150 void Add(Simulator* simulator, int Vd, int Vm, int Vn) {
4151 static const int kElems = SIZE / sizeof(T);
4152 T src1[kElems], src2[kElems];
4153 simulator->get_neon_register<T, SIZE>(Vn, src1);
4154 simulator->get_neon_register<T, SIZE>(Vm, src2);
4155 for (int i = 0; i < kElems; i++) {
4156 src1[i] += src2[i];
4157 }
4158 simulator->set_neon_register<T, SIZE>(Vd, src1);
4159 }
4160
4161 template <typename T, int SIZE>
4162 void Sub(Simulator* simulator, int Vd, int Vm, int Vn) {
4163 static const int kElems = SIZE / sizeof(T);
4164 T src1[kElems], src2[kElems];
4165 simulator->get_neon_register<T, SIZE>(Vn, src1);
4166 simulator->get_neon_register<T, SIZE>(Vm, src2);
4167 for (int i = 0; i < kElems; i++) {
4168 src1[i] -= src2[i];
4169 }
4170 simulator->set_neon_register<T, SIZE>(Vd, src1);
4171 }
4172
4173 template <typename T, int SIZE>
4174 void Mul(Simulator* simulator, int Vd, int Vm, int Vn) {
4175 static const int kElems = SIZE / sizeof(T);
4176 T src1[kElems], src2[kElems];
4177 simulator->get_neon_register<T, SIZE>(Vn, src1);
4178 simulator->get_neon_register<T, SIZE>(Vm, src2);
4179 for (int i = 0; i < kElems; i++) {
4180 src1[i] *= src2[i];
4181 }
4182 simulator->set_neon_register<T, SIZE>(Vd, src1);
4183 }
4184
4185 template <typename T, int SIZE>
4186 void ShiftLeft(Simulator* simulator, int Vd, int Vm, int shift) {
4187 static const int kElems = SIZE / sizeof(T);
4188 T src[kElems];
4189 simulator->get_neon_register<T, SIZE>(Vm, src);
4190 for (int i = 0; i < kElems; i++) {
4191 src[i] <<= shift;
4192 }
4193 simulator->set_neon_register<T, SIZE>(Vd, src);
4194 }
4195
4196 template <typename T, int SIZE>
4197 void ShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
4198 static const int kElems = SIZE / sizeof(T);
4199 T src[kElems];
4200 simulator->get_neon_register<T, SIZE>(Vm, src);
4201 for (int i = 0; i < kElems; i++) {
4202 src[i] >>= shift;
4203 }
4204 simulator->set_neon_register<T, SIZE>(Vd, src);
4205 }
4206
4207 template <typename T, int SIZE>
4208 void ArithmeticShiftRight(Simulator* simulator, int Vd, int Vm, int shift) {
4209 static const int kElems = SIZE / sizeof(T);
4210 T src[kElems];
4211 simulator->get_neon_register<T, SIZE>(Vm, src);
4212 for (int i = 0; i < kElems; i++) {
4213 src[i] = ArithmeticShiftRight(src[i], shift);
4214 }
4215 simulator->set_neon_register<T, SIZE>(Vd, src);
4216 }
4217
4218 template <typename T, int SIZE>
4219 void CompareEqual(Simulator* simulator, int Vd, int Vm, int Vn) {
4220 static const int kElems = SIZE / sizeof(T);
4221 T src1[kElems], src2[kElems];
4222 simulator->get_neon_register<T, SIZE>(Vn, src1);
4223 simulator->get_neon_register<T, SIZE>(Vm, src2);
4224 for (int i = 0; i < kElems; i++) {
4225 src1[i] = src1[i] == src2[i] ? -1 : 0;
4226 }
4227 simulator->set_neon_register<T, SIZE>(Vd, src1);
4228 }
4229
4230 template <typename T, int SIZE>
4231 void CompareGreater(Simulator* simulator, int Vd, int Vm, int Vn, bool ge) {
4232 static const int kElems = SIZE / sizeof(T);
4233 T src1[kElems], src2[kElems];
4234 simulator->get_neon_register<T, SIZE>(Vn, src1);
4235 simulator->get_neon_register<T, SIZE>(Vm, src2);
4236 for (int i = 0; i < kElems; i++) {
4237 if (ge)
4238 src1[i] = src1[i] >= src2[i] ? -1 : 0;
4239 else
4240 src1[i] = src1[i] > src2[i] ? -1 : 0;
4241 }
4242 simulator->set_neon_register<T, SIZE>(Vd, src1);
4243 }
4244
4245 template <typename T>
4246 T MinMax(T a, T b, bool is_min) {
4247 return is_min ? std::min(a, b) : std::max(a, b);
4248 }
4249
4250 template <typename T, int SIZE>
4251 void MinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
4252 static const int kElems = SIZE / sizeof(T);
4253 T src1[kElems], src2[kElems];
4254 simulator->get_neon_register<T, SIZE>(Vn, src1);
4255 simulator->get_neon_register<T, SIZE>(Vm, src2);
4256 for (int i = 0; i < kElems; i++) {
4257 src1[i] = MinMax(src1[i], src2[i], min);
4258 }
4259 simulator->set_neon_register<T, SIZE>(Vd, src1);
4260 }
4261
4262 template <typename T>
4263 void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) {
4264 static const int kElems = kDoubleSize / sizeof(T);
4265 static const int kPairs = kElems / 2;
4266 T dst[kElems], src1[kElems], src2[kElems];
4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);
4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);
4269 for (int i = 0; i < kPairs; i++) {
4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4272 }
4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);
4079 } 4274 }
4080 4275
4081 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4276 void Simulator::DecodeSpecialCondition(Instruction* instr) {
4082 switch (instr->SpecialValue()) { 4277 switch (instr->SpecialValue()) {
4083 case 4: { 4278 case 4: {
4084 int Vd, Vm, Vn; 4279 int Vd, Vm, Vn;
4085 if (instr->Bit(6) == 0) { 4280 if (instr->Bit(6) == 0) {
4086 Vd = instr->VFPDRegValue(kDoublePrecision); 4281 Vd = instr->VFPDRegValue(kDoublePrecision);
4087 Vm = instr->VFPMRegValue(kDoublePrecision); 4282 Vm = instr->VFPMRegValue(kDoublePrecision);
4088 Vn = instr->VFPNRegValue(kDoublePrecision); 4283 Vn = instr->VFPNRegValue(kDoublePrecision);
(...skipping 25 matching lines...) Expand all
4114 UNIMPLEMENTED(); 4309 UNIMPLEMENTED();
4115 } 4310 }
4116 break; 4311 break;
4117 } 4312 }
4118 case 0x1: { 4313 case 0x1: {
4119 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 && 4314 if (instr->Bits(21, 20) == 2 && instr->Bit(6) == 1 &&
4120 instr->Bit(4) == 1) { 4315 instr->Bit(4) == 1) {
4121 // vmov Qd, Qm. 4316 // vmov Qd, Qm.
4122 // vorr, Qd, Qm, Qn. 4317 // vorr, Qd, Qm, Qn.
4123 uint32_t src1[4]; 4318 uint32_t src1[4];
4124 get_q_register(Vm, src1); 4319 get_neon_register(Vm, src1);
4125 if (Vm != Vn) { 4320 if (Vm != Vn) {
4126 uint32_t src2[4]; 4321 uint32_t src2[4];
4127 get_q_register(Vn, src2); 4322 get_neon_register(Vn, src2);
4128 for (int i = 0; i < 4; i++) { 4323 for (int i = 0; i < 4; i++) {
4129 src1[i] = src1[i] | src2[i]; 4324 src1[i] = src1[i] | src2[i];
4130 } 4325 }
4131 } 4326 }
4132 set_q_register(Vd, src1); 4327 set_neon_register(Vd, src1);
4133 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && 4328 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&
4134 instr->Bit(4) == 1) { 4329 instr->Bit(4) == 1) {
4135 // vand Qd, Qm, Qn. 4330 // vand Qd, Qm, Qn.
4136 uint32_t src1[4], src2[4]; 4331 uint32_t src1[4], src2[4];
4137 get_q_register(Vn, src1); 4332 get_neon_register(Vn, src1);
4138 get_q_register(Vm, src2); 4333 get_neon_register(Vm, src2);
4139 for (int i = 0; i < 4; i++) { 4334 for (int i = 0; i < 4; i++) {
4140 src1[i] = src1[i] & src2[i]; 4335 src1[i] = src1[i] & src2[i];
4141 } 4336 }
4142 set_q_register(Vd, src1); 4337 set_neon_register(Vd, src1);
4143 } else { 4338 } else {
4144 UNIMPLEMENTED(); 4339 UNIMPLEMENTED();
4145 } 4340 }
4146 break; 4341 break;
4147 } 4342 }
4148 case 0x2: { 4343 case 0x2: {
4149 if (instr->Bit(4) == 1) { 4344 if (instr->Bit(4) == 1) {
4150 // vqsub.s<size> Qd, Qm, Qn. 4345 // vqsub.s<size> Qd, Qm, Qn.
4151 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4346 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4152 switch (size) { 4347 switch (size) {
(...skipping 13 matching lines...) Expand all
4166 } else { 4361 } else {
4167 UNIMPLEMENTED(); 4362 UNIMPLEMENTED();
4168 } 4363 }
4169 break; 4364 break;
4170 } 4365 }
4171 case 0x3: { 4366 case 0x3: {
4172 // vcge/vcgt.s<size> Qd, Qm, Qn. 4367 // vcge/vcgt.s<size> Qd, Qm, Qn.
4173 bool ge = instr->Bit(4) == 1; 4368 bool ge = instr->Bit(4) == 1;
4174 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4369 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4175 switch (size) { 4370 switch (size) {
4176 case Neon8: { 4371 case Neon8:
4177 int8_t src1[16], src2[16]; 4372 CompareGreater<int8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4178 get_q_register(Vn, src1);
4179 get_q_register(Vm, src2);
4180 for (int i = 0; i < 16; i++) {
4181 if (ge)
4182 src1[i] = src1[i] >= src2[i] ? 0xFF : 0;
4183 else
4184 src1[i] = src1[i] > src2[i] ? 0xFF : 0;
4185 }
4186 set_q_register(Vd, src1);
4187 break; 4373 break;
4188 } 4374 case Neon16:
4189 case Neon16: { 4375 CompareGreater<int16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4190 int16_t src1[8], src2[8];
4191 get_q_register(Vn, src1);
4192 get_q_register(Vm, src2);
4193 for (int i = 0; i < 8; i++) {
4194 if (ge)
4195 src1[i] = src1[i] >= src2[i] ? 0xFFFF : 0;
4196 else
4197 src1[i] = src1[i] > src2[i] ? 0xFFFF : 0;
4198 }
4199 set_q_register(Vd, src1);
4200 break; 4376 break;
4201 } 4377 case Neon32:
4202 case Neon32: { 4378 CompareGreater<int32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4203 int32_t src1[4], src2[4];
4204 get_q_register(Vn, src1);
4205 get_q_register(Vm, src2);
4206 for (int i = 0; i < 4; i++) {
4207 if (ge)
4208 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFF : 0;
4209 else
4210 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFF : 0;
4211 }
4212 set_q_register(Vd, src1);
4213 break; 4379 break;
4214 }
4215 default: 4380 default:
4216 UNREACHABLE(); 4381 UNREACHABLE();
4217 break; 4382 break;
4218 } 4383 }
4219 break; 4384 break;
4220 } 4385 }
4221 case 0x6: { 4386 case 0x6: {
4222 // vmin/vmax.s<size> Qd, Qm, Qn. 4387 // vmin/vmax.s<size> Qd, Qm, Qn.
4223 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4224 bool min = instr->Bit(4) != 0; 4389 bool min = instr->Bit(4) != 0;
4225 switch (size) { 4390 switch (size) {
4226 case Neon8: { 4391 case Neon8:
4227 int8_t src1[16], src2[16]; 4392 MinMax<int8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4228 get_q_register(Vn, src1);
4229 get_q_register(Vm, src2);
4230 for (int i = 0; i < 16; i++) {
4231 src1[i] = MinMax(src1[i], src2[i], min);
4232 }
4233 set_q_register(Vd, src1);
4234 break; 4393 break;
4235 } 4394 case Neon16:
4236 case Neon16: { 4395 MinMax<int16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4237 int16_t src1[8], src2[8];
4238 get_q_register(Vn, src1);
4239 get_q_register(Vm, src2);
4240 for (int i = 0; i < 8; i++) {
4241 src1[i] = MinMax(src1[i], src2[i], min);
4242 }
4243 set_q_register(Vd, src1);
4244 break; 4396 break;
4245 } 4397 case Neon32:
4246 case Neon32: { 4398 MinMax<int32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4247 int32_t src1[4], src2[4];
4248 get_q_register(Vn, src1);
4249 get_q_register(Vm, src2);
4250 for (int i = 0; i < 4; i++) {
4251 src1[i] = MinMax(src1[i], src2[i], min);
4252 }
4253 set_q_register(Vd, src1);
4254 break; 4399 break;
4255 }
4256 default: 4400 default:
4257 UNREACHABLE(); 4401 UNREACHABLE();
4258 break; 4402 break;
4259 } 4403 }
4260 break; 4404 break;
4261 } 4405 }
4262 case 0x8: { 4406 case 0x8: {
4263 // vadd/vtst 4407 // vadd/vtst
4264 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4408 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4265 if (instr->Bit(4) == 0) { 4409 if (instr->Bit(4) == 0) {
4266 // vadd.i<size> Qd, Qm, Qn. 4410 // vadd.i<size> Qd, Qm, Qn.
4267 switch (size) { 4411 switch (size) {
4268 case Neon8: { 4412 case Neon8:
4269 uint8_t src1[16], src2[16]; 4413 Add<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
4270 get_q_register(Vn, src1);
4271 get_q_register(Vm, src2);
4272 for (int i = 0; i < 16; i++) {
4273 src1[i] += src2[i];
4274 }
4275 set_q_register(Vd, src1);
4276 break; 4414 break;
4277 } 4415 case Neon16:
4278 case Neon16: { 4416 Add<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
4279 uint16_t src1[8], src2[8];
4280 get_q_register(Vn, src1);
4281 get_q_register(Vm, src2);
4282 for (int i = 0; i < 8; i++) {
4283 src1[i] += src2[i];
4284 }
4285 set_q_register(Vd, src1);
4286 break; 4417 break;
4287 } 4418 case Neon32:
4288 case Neon32: { 4419 Add<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
4289 uint32_t src1[4], src2[4];
4290 get_q_register(Vn, src1);
4291 get_q_register(Vm, src2);
4292 for (int i = 0; i < 4; i++) {
4293 src1[i] += src2[i];
4294 }
4295 set_q_register(Vd, src1);
4296 break; 4420 break;
4297 }
4298 default: 4421 default:
4299 UNREACHABLE(); 4422 UNREACHABLE();
4300 break; 4423 break;
4301 } 4424 }
4302 } else { 4425 } else {
4303 // vtst.i<size> Qd, Qm, Qn. 4426 // vtst.i<size> Qd, Qm, Qn.
4304 switch (size) { 4427 switch (size) {
4305 case Neon8: { 4428 case Neon8:
4306 uint8_t src1[16], src2[16]; 4429 Test<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
4307 get_q_register(Vn, src1);
4308 get_q_register(Vm, src2);
4309 for (int i = 0; i < 16; i++) {
4310 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFu : 0;
4311 }
4312 set_q_register(Vd, src1);
4313 break; 4430 break;
4314 } 4431 case Neon16:
4315 case Neon16: { 4432 Test<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
4316 uint16_t src1[8], src2[8];
4317 get_q_register(Vn, src1);
4318 get_q_register(Vm, src2);
4319 for (int i = 0; i < 8; i++) {
4320 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFu : 0;
4321 }
4322 set_q_register(Vd, src1);
4323 break; 4433 break;
4324 } 4434 case Neon32:
4325 case Neon32: { 4435 Test<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
4326 uint32_t src1[4], src2[4];
4327 get_q_register(Vn, src1);
4328 get_q_register(Vm, src2);
4329 for (int i = 0; i < 4; i++) {
4330 src1[i] = (src1[i] & src2[i]) != 0 ? 0xFFFFFFFFu : 0;
4331 }
4332 set_q_register(Vd, src1);
4333 break; 4436 break;
4334 }
4335 default: 4437 default:
4336 UNREACHABLE(); 4438 UNREACHABLE();
4337 break; 4439 break;
4338 } 4440 }
4339 } 4441 }
4340 break; 4442 break;
4341 } 4443 }
4342 case 0x9: { 4444 case 0x9: {
4343 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4445 if (instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4344 // vmul.i<size> Qd, Qm, Qn. 4446 // vmul.i<size> Qd, Qm, Qn.
4345 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4447 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4346 switch (size) { 4448 switch (size) {
4347 case Neon8: { 4449 case Neon8:
4348 uint8_t src1[16], src2[16]; 4450 Mul<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
4349 get_q_register(Vn, src1);
4350 get_q_register(Vm, src2);
4351 for (int i = 0; i < 16; i++) {
4352 src1[i] *= src2[i];
4353 }
4354 set_q_register(Vd, src1);
4355 break; 4451 break;
4356 } 4452 case Neon16:
4357 case Neon16: { 4453 Mul<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
4358 uint16_t src1[8], src2[8];
4359 get_q_register(Vn, src1);
4360 get_q_register(Vm, src2);
4361 for (int i = 0; i < 8; i++) {
4362 src1[i] *= src2[i];
4363 }
4364 set_q_register(Vd, src1);
4365 break; 4454 break;
4366 } 4455 case Neon32:
4367 case Neon32: { 4456 Mul<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
4368 uint32_t src1[4], src2[4];
4369 get_q_register(Vn, src1);
4370 get_q_register(Vm, src2);
4371 for (int i = 0; i < 4; i++) {
4372 src1[i] *= src2[i];
4373 }
4374 set_q_register(Vd, src1);
4375 break; 4457 break;
4376 }
4377 default: 4458 default:
4378 UNREACHABLE(); 4459 UNREACHABLE();
4379 break; 4460 break;
4380 } 4461 }
4381 } else { 4462 } else {
4382 UNIMPLEMENTED(); 4463 UNIMPLEMENTED();
4383 } 4464 }
4384 break; 4465 break;
4385 } 4466 }
4386 case 0xa: { 4467 case 0xa: {
4387 // vpmin/vpmax.s<size> Dd, Dm, Dn. 4468 // vpmin/vpmax.s<size> Dd, Dm, Dn.
4388 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4469 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4389 bool min = instr->Bit(4) != 0; 4470 bool min = instr->Bit(4) != 0;
4390 switch (size) { 4471 switch (size) {
4391 case Neon8: { 4472 case Neon8:
4392 int8_t dst[8], src1[8], src2[8]; 4473 PairwiseMinMax<int8_t>(this, Vd, Vm, Vn, min);
4393 get_d_register(Vn, src1);
4394 get_d_register(Vm, src2);
4395 for (int i = 0; i < 4; i++) {
4396 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4397 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4398 }
4399 set_d_register(Vd, dst);
4400 break; 4474 break;
4401 } 4475 case Neon16:
4402 case Neon16: { 4476 PairwiseMinMax<int16_t>(this, Vd, Vm, Vn, min);
4403 int16_t dst[4], src1[4], src2[4];
4404 get_d_register(Vn, src1);
4405 get_d_register(Vm, src2);
4406 for (int i = 0; i < 2; i++) {
4407 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4408 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4409 }
4410 set_d_register(Vd, dst);
4411 break; 4477 break;
4412 } 4478 case Neon32:
4413 case Neon32: { 4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);
4414 int32_t dst[2], src1[2], src2[2];
4415 get_d_register(Vn, src1);
4416 get_d_register(Vm, src2);
4417 dst[0] = MinMax(src1[0], src1[1], min);
4418 dst[1] = MinMax(src2[0], src2[1], min);
4419 set_d_register(Vd, dst);
4420 break; 4480 break;
4421 }
4422 default: 4481 default:
4423 UNREACHABLE(); 4482 UNREACHABLE();
4424 break; 4483 break;
4425 } 4484 }
4426 break; 4485 break;
4427 } 4486 }
4428 case 0xd: { 4487 case 0xd: {
4429 if (instr->Bit(4) == 0) { 4488 if (instr->Bit(4) == 0) {
4430 float src1[4], src2[4]; 4489 float src1[4], src2[4];
4431 get_q_register(Vn, src1); 4490 get_neon_register(Vn, src1);
4432 get_q_register(Vm, src2); 4491 get_neon_register(Vm, src2);
4433 for (int i = 0; i < 4; i++) { 4492 for (int i = 0; i < 4; i++) {
4434 if (instr->Bit(21) == 0) { 4493 if (instr->Bit(21) == 0) {
4435 // vadd.f32 Qd, Qm, Qn. 4494 // vadd.f32 Qd, Qm, Qn.
4436 src1[i] = src1[i] + src2[i]; 4495 src1[i] = src1[i] + src2[i];
4437 } else { 4496 } else {
4438 // vsub.f32 Qd, Qm, Qn. 4497 // vsub.f32 Qd, Qm, Qn.
4439 src1[i] = src1[i] - src2[i]; 4498 src1[i] = src1[i] - src2[i];
4440 } 4499 }
4441 } 4500 }
4442 set_q_register(Vd, src1); 4501 set_neon_register(Vd, src1);
4443 } else { 4502 } else {
4444 UNIMPLEMENTED(); 4503 UNIMPLEMENTED();
4445 } 4504 }
4446 break; 4505 break;
4447 } 4506 }
4448 case 0xe: { 4507 case 0xe: {
4449 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) { 4508 if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 0) {
4450 // vceq.f32. 4509 // vceq.f32.
4451 float src1[4], src2[4]; 4510 float src1[4], src2[4];
4452 get_q_register(Vn, src1); 4511 get_neon_register(Vn, src1);
4453 get_q_register(Vm, src2); 4512 get_neon_register(Vm, src2);
4454 uint32_t dst[4]; 4513 uint32_t dst[4];
4455 for (int i = 0; i < 4; i++) { 4514 for (int i = 0; i < 4; i++) {
4456 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0; 4515 dst[i] = (src1[i] == src2[i]) ? 0xFFFFFFFF : 0;
4457 } 4516 }
4458 set_q_register(Vd, dst); 4517 set_neon_register(Vd, dst);
4459 } else { 4518 } else {
4460 UNIMPLEMENTED(); 4519 UNIMPLEMENTED();
4461 } 4520 }
4462 break; 4521 break;
4463 } 4522 }
4464 case 0xf: { 4523 case 0xf: {
4465 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) { 4524 if (instr->Bit(20) == 0 && instr->Bit(6) == 1) {
4466 float src1[4], src2[4]; 4525 float src1[4], src2[4];
4467 get_q_register(Vn, src1); 4526 get_neon_register(Vn, src1);
4468 get_q_register(Vm, src2); 4527 get_neon_register(Vm, src2);
4469 if (instr->Bit(4) == 1) { 4528 if (instr->Bit(4) == 1) {
4470 if (instr->Bit(21) == 0) { 4529 if (instr->Bit(21) == 0) {
4471 // vrecps.f32 Qd, Qm, Qn. 4530 // vrecps.f32 Qd, Qm, Qn.
4472 for (int i = 0; i < 4; i++) { 4531 for (int i = 0; i < 4; i++) {
4473 src1[i] = 2.0f - src1[i] * src2[i]; 4532 src1[i] = 2.0f - src1[i] * src2[i];
4474 } 4533 }
4475 } else { 4534 } else {
4476 // vrsqrts.f32 Qd, Qm, Qn. 4535 // vrsqrts.f32 Qd, Qm, Qn.
4477 for (int i = 0; i < 4; i++) { 4536 for (int i = 0; i < 4; i++) {
4478 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f; 4537 src1[i] = (3.0f - src1[i] * src2[i]) * 0.5f;
4479 } 4538 }
4480 } 4539 }
4481 } else { 4540 } else {
4482 // vmin/vmax.f32 Qd, Qm, Qn. 4541 // vmin/vmax.f32 Qd, Qm, Qn.
4483 bool min = instr->Bit(21) == 1; 4542 bool min = instr->Bit(21) == 1;
4484 for (int i = 0; i < 4; i++) { 4543 for (int i = 0; i < 4; i++) {
4485 src1[i] = MinMax(src1[i], src2[i], min); 4544 src1[i] = MinMax(src1[i], src2[i], min);
4486 } 4545 }
4487 } 4546 }
4488 set_q_register(Vd, src1); 4547 set_neon_register(Vd, src1);
4489 } else { 4548 } else {
4490 UNIMPLEMENTED(); 4549 UNIMPLEMENTED();
4491 } 4550 }
4492 break; 4551 break;
4493 } 4552 }
4494 default: 4553 default:
4495 UNIMPLEMENTED(); 4554 UNIMPLEMENTED();
4496 break; 4555 break;
4497 } 4556 }
4498 break; 4557 break;
(...skipping 20 matching lines...) Expand all
4519 UNIMPLEMENTED(); 4578 UNIMPLEMENTED();
4520 break; 4579 break;
4521 } 4580 }
4522 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { 4581 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
4523 // vext. 4582 // vext.
4524 int imm4 = instr->Bits(11, 8); 4583 int imm4 = instr->Bits(11, 8);
4525 int Vd = instr->VFPDRegValue(kSimd128Precision); 4584 int Vd = instr->VFPDRegValue(kSimd128Precision);
4526 int Vm = instr->VFPMRegValue(kSimd128Precision); 4585 int Vm = instr->VFPMRegValue(kSimd128Precision);
4527 int Vn = instr->VFPNRegValue(kSimd128Precision); 4586 int Vn = instr->VFPNRegValue(kSimd128Precision);
4528 uint8_t src1[16], src2[16], dst[16]; 4587 uint8_t src1[16], src2[16], dst[16];
4529 get_q_register(Vn, src1); 4588 get_neon_register(Vn, src1);
4530 get_q_register(Vm, src2); 4589 get_neon_register(Vm, src2);
4531 int boundary = kSimd128Size - imm4; 4590 int boundary = kSimd128Size - imm4;
4532 int i = 0; 4591 int i = 0;
4533 for (; i < boundary; i++) { 4592 for (; i < boundary; i++) {
4534 dst[i] = src1[i + imm4]; 4593 dst[i] = src1[i + imm4];
4535 } 4594 }
4536 for (; i < 16; i++) { 4595 for (; i < 16; i++) {
4537 dst[i] = src2[i - boundary]; 4596 dst[i] = src2[i - boundary];
4538 } 4597 }
4539 set_q_register(Vd, dst); 4598 set_neon_register(Vd, dst);
4540 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) { 4599 } else if (instr->Bits(11, 7) == 0xA && instr->Bit(4) == 1) {
4541 // vshl.i<size> Qd, Qm, shift 4600 // vshl.i<size> Qd, Qm, shift
4542 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 4601 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
4543 int shift = instr->Bits(21, 16) - size; 4602 int shift = instr->Bits(21, 16) - size;
4544 int Vd = instr->VFPDRegValue(kSimd128Precision); 4603 int Vd = instr->VFPDRegValue(kSimd128Precision);
4545 int Vm = instr->VFPMRegValue(kSimd128Precision); 4604 int Vm = instr->VFPMRegValue(kSimd128Precision);
4546 NeonSize ns = static_cast<NeonSize>(size / 16); 4605 NeonSize ns = static_cast<NeonSize>(size / 16);
4547 switch (ns) { 4606 switch (ns) {
4548 case Neon8: { 4607 case Neon8:
4549 uint8_t src[16]; 4608 ShiftLeft<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
4550 get_q_register(Vm, src);
4551 for (int i = 0; i < 16; i++) {
4552 src[i] <<= shift;
4553 }
4554 set_q_register(Vd, src);
4555 break; 4609 break;
4556 } 4610 case Neon16:
4557 case Neon16: { 4611 ShiftLeft<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
4558 uint16_t src[8];
4559 get_q_register(Vm, src);
4560 for (int i = 0; i < 8; i++) {
4561 src[i] <<= shift;
4562 }
4563 set_q_register(Vd, src);
4564 break; 4612 break;
4565 } 4613 case Neon32:
4566 case Neon32: { 4614 ShiftLeft<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
4567 uint32_t src[4];
4568 get_q_register(Vm, src);
4569 for (int i = 0; i < 4; i++) {
4570 src[i] <<= shift;
4571 }
4572 set_q_register(Vd, src);
4573 break; 4615 break;
4574 }
4575 default: 4616 default:
4576 UNREACHABLE(); 4617 UNREACHABLE();
4577 break; 4618 break;
4578 } 4619 }
4579 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { 4620 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
4580 // vshr.s<size> Qd, Qm, shift 4621 // vshr.s<size> Qd, Qm, shift
4581 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 4622 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
4582 int shift = 2 * size - instr->Bits(21, 16); 4623 int shift = 2 * size - instr->Bits(21, 16);
4583 int Vd = instr->VFPDRegValue(kSimd128Precision); 4624 int Vd = instr->VFPDRegValue(kSimd128Precision);
4584 int Vm = instr->VFPMRegValue(kSimd128Precision); 4625 int Vm = instr->VFPMRegValue(kSimd128Precision);
4585 NeonSize ns = static_cast<NeonSize>(size / 16); 4626 NeonSize ns = static_cast<NeonSize>(size / 16);
4586 switch (ns) { 4627 switch (ns) {
4587 case Neon8: { 4628 case Neon8:
4588 int8_t src[16]; 4629 ArithmeticShiftRight<int8_t, kSimd128Size>(this, Vd, Vm, shift);
4589 get_q_register(Vm, src);
4590 for (int i = 0; i < 16; i++) {
4591 src[i] = ArithmeticShiftRight(src[i], shift);
4592 }
4593 set_q_register(Vd, src);
4594 break; 4630 break;
4595 } 4631 case Neon16:
4596 case Neon16: { 4632 ArithmeticShiftRight<int16_t, kSimd128Size>(this, Vd, Vm, shift);
4597 int16_t src[8];
4598 get_q_register(Vm, src);
4599 for (int i = 0; i < 8; i++) {
4600 src[i] = ArithmeticShiftRight(src[i], shift);
4601 }
4602 set_q_register(Vd, src);
4603 break; 4633 break;
4604 } 4634 case Neon32:
4605 case Neon32: { 4635 ArithmeticShiftRight<int32_t, kSimd128Size>(this, Vd, Vm, shift);
4606 int32_t src[4];
4607 get_q_register(Vm, src);
4608 for (int i = 0; i < 4; i++) {
4609 src[i] = ArithmeticShiftRight(src[i], shift);
4610 }
4611 set_q_register(Vd, src);
4612 break; 4636 break;
4613 }
4614 default: 4637 default:
4615 UNREACHABLE(); 4638 UNREACHABLE();
4616 break; 4639 break;
4617 } 4640 }
4618 } else { 4641 } else {
4619 UNIMPLEMENTED(); 4642 UNIMPLEMENTED();
4620 } 4643 }
4621 break; 4644 break;
4622 case 6: { 4645 case 6: {
4623 int Vd, Vm, Vn; 4646 int Vd, Vm, Vn;
(...skipping 27 matching lines...) Expand all
4651 } 4674 }
4652 } else { 4675 } else {
4653 UNIMPLEMENTED(); 4676 UNIMPLEMENTED();
4654 } 4677 }
4655 break; 4678 break;
4656 } 4679 }
4657 case 0x1: { 4680 case 0x1: {
4658 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) { 4681 if (instr->Bits(21, 20) == 1 && instr->Bit(4) == 1) {
4659 // vbsl.size Qd, Qm, Qn. 4682 // vbsl.size Qd, Qm, Qn.
4660 uint32_t dst[4], src1[4], src2[4]; 4683 uint32_t dst[4], src1[4], src2[4];
4661 get_q_register(Vd, dst); 4684 get_neon_register(Vd, dst);
4662 get_q_register(Vn, src1); 4685 get_neon_register(Vn, src1);
4663 get_q_register(Vm, src2); 4686 get_neon_register(Vm, src2);
4664 for (int i = 0; i < 4; i++) { 4687 for (int i = 0; i < 4; i++) {
4665 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]); 4688 dst[i] = (dst[i] & src1[i]) | (~dst[i] & src2[i]);
4666 } 4689 }
4667 set_q_register(Vd, dst); 4690 set_neon_register(Vd, dst);
4668 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) { 4691 } else if (instr->Bits(21, 20) == 0 && instr->Bit(4) == 1) {
4669 if (instr->Bit(6) == 0) { 4692 if (instr->Bit(6) == 0) {
4670 // veor Dd, Dn, Dm 4693 // veor Dd, Dn, Dm
4671 uint64_t src1, src2; 4694 uint64_t src1, src2;
4672 get_d_register(Vn, &src1); 4695 get_d_register(Vn, &src1);
4673 get_d_register(Vm, &src2); 4696 get_d_register(Vm, &src2);
4674 src1 ^= src2; 4697 src1 ^= src2;
4675 set_d_register(Vd, &src1); 4698 set_d_register(Vd, &src1);
4676 4699
4677 } else { 4700 } else {
4678 // veor Qd, Qn, Qm 4701 // veor Qd, Qn, Qm
4679 uint32_t src1[4], src2[4]; 4702 uint32_t src1[4], src2[4];
4680 get_q_register(Vn, src1); 4703 get_neon_register(Vn, src1);
4681 get_q_register(Vm, src2); 4704 get_neon_register(Vm, src2);
4682 for (int i = 0; i < 4; i++) src1[i] ^= src2[i]; 4705 for (int i = 0; i < 4; i++) src1[i] ^= src2[i];
4683 set_q_register(Vd, src1); 4706 set_neon_register(Vd, src1);
4684 } 4707 }
4685 } else { 4708 } else {
4686 UNIMPLEMENTED(); 4709 UNIMPLEMENTED();
4687 } 4710 }
4688 break; 4711 break;
4689 } 4712 }
4690 case 0x2: { 4713 case 0x2: {
4691 if (instr->Bit(4) == 1) { 4714 if (instr->Bit(4) == 1) {
4692 // vqsub.u<size> Qd, Qm, Qn. 4715 // vqsub.u<size> Qd, Qm, Qn.
4693 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
(...skipping 14 matching lines...) Expand all
4708 } else { 4731 } else {
4709 UNIMPLEMENTED(); 4732 UNIMPLEMENTED();
4710 } 4733 }
4711 break; 4734 break;
4712 } 4735 }
4713 case 0x3: { 4736 case 0x3: {
4714 // vcge/vcgt.u<size> Qd, Qm, Qn. 4737 // vcge/vcgt.u<size> Qd, Qm, Qn.
4715 bool ge = instr->Bit(4) == 1; 4738 bool ge = instr->Bit(4) == 1;
4716 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4739 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4717 switch (size) { 4740 switch (size) {
4718 case Neon8: { 4741 case Neon8:
4719 uint8_t src1[16], src2[16]; 4742 CompareGreater<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4720 get_q_register(Vn, src1);
4721 get_q_register(Vm, src2);
4722 for (int i = 0; i < 16; i++) {
4723 if (ge)
4724 src1[i] = src1[i] >= src2[i] ? 0xFFu : 0;
4725 else
4726 src1[i] = src1[i] > src2[i] ? 0xFFu : 0;
4727 }
4728 set_q_register(Vd, src1);
4729 break; 4743 break;
4730 } 4744 case Neon16:
4731 case Neon16: { 4745 CompareGreater<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4732 uint16_t src1[8], src2[8];
4733 get_q_register(Vn, src1);
4734 get_q_register(Vm, src2);
4735 for (int i = 0; i < 8; i++) {
4736 if (ge)
4737 src1[i] = src1[i] >= src2[i] ? 0xFFFFu : 0;
4738 else
4739 src1[i] = src1[i] > src2[i] ? 0xFFFFu : 0;
4740 }
4741 set_q_register(Vd, src1);
4742 break; 4746 break;
4743 } 4747 case Neon32:
4744 case Neon32: { 4748 CompareGreater<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, ge);
4745 uint32_t src1[4], src2[4];
4746 get_q_register(Vn, src1);
4747 get_q_register(Vm, src2);
4748 for (int i = 0; i < 4; i++) {
4749 if (ge)
4750 src1[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4751 else
4752 src1[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4753 }
4754 set_q_register(Vd, src1);
4755 break; 4749 break;
4756 }
4757 default: 4750 default:
4758 UNREACHABLE(); 4751 UNREACHABLE();
4759 break; 4752 break;
4760 } 4753 }
4761 break; 4754 break;
4762 } 4755 }
4763 case 0x6: { 4756 case 0x6: {
4764 // vmin/vmax.u<size> Qd, Qm, Qn. 4757 // vmin/vmax.u<size> Qd, Qm, Qn.
4765 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4758 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4766 bool min = instr->Bit(4) != 0; 4759 bool min = instr->Bit(4) != 0;
4767 switch (size) { 4760 switch (size) {
4768 case Neon8: { 4761 case Neon8:
4769 uint8_t src1[16], src2[16]; 4762 MinMax<uint8_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4770 get_q_register(Vn, src1);
4771 get_q_register(Vm, src2);
4772 for (int i = 0; i < 16; i++) {
4773 src1[i] = MinMax(src1[i], src2[i], min);
4774 }
4775 set_q_register(Vd, src1);
4776 break; 4763 break;
4777 } 4764 case Neon16:
4778 case Neon16: { 4765 MinMax<uint16_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4779 uint16_t src1[8], src2[8];
4780 get_q_register(Vn, src1);
4781 get_q_register(Vm, src2);
4782 for (int i = 0; i < 8; i++) {
4783 src1[i] = MinMax(src1[i], src2[i], min);
4784 }
4785 set_q_register(Vd, src1);
4786 break; 4766 break;
4787 } 4767 case Neon32:
4788 case Neon32: { 4768 MinMax<uint32_t, kSimd128Size>(this, Vd, Vm, Vn, min);
4789 uint32_t src1[4], src2[4];
4790 get_q_register(Vn, src1);
4791 get_q_register(Vm, src2);
4792 for (int i = 0; i < 4; i++) {
4793 src1[i] = MinMax(src1[i], src2[i], min);
4794 }
4795 set_q_register(Vd, src1);
4796 break; 4769 break;
4797 }
4798 default: 4770 default:
4799 UNREACHABLE(); 4771 UNREACHABLE();
4800 break; 4772 break;
4801 } 4773 }
4802 break; 4774 break;
4803 } 4775 }
4804 case 0x8: { 4776 case 0x8: {
4805 if (instr->Bit(4) == 0) { 4777 if (instr->Bit(4) == 0) {
4806 // vsub.size Qd, Qm, Qn. 4778 // vsub.size Qd, Qm, Qn.
4807 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4779 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4808 switch (size) { 4780 switch (size) {
4809 case Neon8: { 4781 case Neon8:
4810 uint8_t src1[16], src2[16]; 4782 Sub<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
4811 get_q_register(Vn, src1);
4812 get_q_register(Vm, src2);
4813 for (int i = 0; i < 16; i++) {
4814 src1[i] -= src2[i];
4815 }
4816 set_q_register(Vd, src1);
4817 break; 4783 break;
4818 } 4784 case Neon16:
4819 case Neon16: { 4785 Sub<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
4820 uint16_t src1[8], src2[8];
4821 get_q_register(Vn, src1);
4822 get_q_register(Vm, src2);
4823 for (int i = 0; i < 8; i++) {
4824 src1[i] -= src2[i];
4825 }
4826 set_q_register(Vd, src1);
4827 break; 4786 break;
4828 } 4787 case Neon32:
4829 case Neon32: { 4788 Sub<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
4830 uint32_t src1[4], src2[4];
4831 get_q_register(Vn, src1);
4832 get_q_register(Vm, src2);
4833 for (int i = 0; i < 4; i++) {
4834 src1[i] -= src2[i];
4835 }
4836 set_q_register(Vd, src1);
4837 break; 4789 break;
4838 }
4839 default: 4790 default:
4840 UNREACHABLE(); 4791 UNREACHABLE();
4841 break; 4792 break;
4842 } 4793 }
4843 } else { 4794 } else {
4844 // vceq.size Qd, Qm, Qn. 4795 // vceq.size Qd, Qm, Qn.
4845 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4796 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4846 switch (size) { 4797 switch (size) {
4847 case Neon8: { 4798 case Neon8:
4848 uint8_t src1[16], src2[16]; 4799 CompareEqual<uint8_t, kSimd128Size>(this, Vd, Vm, Vn);
4849 get_q_register(Vn, src1);
4850 get_q_register(Vm, src2);
4851 for (int i = 0; i < 16; i++) {
4852 src1[i] = (src1[i] == src2[i]) ? 0xFFu : 0;
4853 }
4854 set_q_register(Vd, src1);
4855 break; 4800 break;
4856 } 4801 case Neon16:
4857 case Neon16: { 4802 CompareEqual<uint16_t, kSimd128Size>(this, Vd, Vm, Vn);
4858 uint16_t src1[8], src2[8];
4859 get_q_register(Vn, src1);
4860 get_q_register(Vm, src2);
4861 for (int i = 0; i < 8; i++) {
4862 src1[i] = (src1[i] == src2[i]) ? 0xFFFFu : 0;
4863 }
4864 set_q_register(Vd, src1);
4865 break; 4803 break;
4866 } 4804 case Neon32:
4867 case Neon32: { 4805 CompareEqual<uint32_t, kSimd128Size>(this, Vd, Vm, Vn);
4868 uint32_t src1[4], src2[4];
4869 get_q_register(Vn, src1);
4870 get_q_register(Vm, src2);
4871 for (int i = 0; i < 4; i++) {
4872 src1[i] = (src1[i] == src2[i]) ? 0xFFFFFFFFu : 0;
4873 }
4874 set_q_register(Vd, src1);
4875 break; 4806 break;
4876 }
4877 default: 4807 default:
4878 UNREACHABLE(); 4808 UNREACHABLE();
4879 break; 4809 break;
4880 } 4810 }
4881 } 4811 }
4882 break; 4812 break;
4883 } 4813 }
4884 case 0xa: { 4814 case 0xa: {
4885 // vpmin/vpmax.u<size> Dd, Dm, Dn. 4815 // vpmin/vpmax.u<size> Dd, Dm, Dn.
4886 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20)); 4816 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));
4887 bool min = instr->Bit(4) != 0; 4817 bool min = instr->Bit(4) != 0;
4888 switch (size) { 4818 switch (size) {
4889 case Neon8: { 4819 case Neon8:
4890 uint8_t dst[8], src1[8], src2[8]; 4820 PairwiseMinMax<uint8_t>(this, Vd, Vm, Vn, min);
4891 get_d_register(Vn, src1);
4892 get_d_register(Vm, src2);
4893 for (int i = 0; i < 4; i++) {
4894 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4895 dst[i + 4] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4896 }
4897 set_d_register(Vd, dst);
4898 break; 4821 break;
4899 } 4822 case Neon16:
4900 case Neon16: { 4823 PairwiseMinMax<uint16_t>(this, Vd, Vm, Vn, min);
4901 uint16_t dst[4], src1[4], src2[4];
4902 get_d_register(Vn, src1);
4903 get_d_register(Vm, src2);
4904 for (int i = 0; i < 2; i++) {
4905 dst[i + 0] = MinMax(src1[i * 2], src1[i * 2 + 1], min);
4906 dst[i + 2] = MinMax(src2[i * 2], src2[i * 2 + 1], min);
4907 }
4908 set_d_register(Vd, dst);
4909 break; 4824 break;
4910 } 4825 case Neon32:
4911 case Neon32: { 4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);
4912 uint32_t dst[2], src1[2], src2[2];
4913 get_d_register(Vn, src1);
4914 get_d_register(Vm, src2);
4915 dst[0] = MinMax(src1[0], src1[1], min);
4916 dst[1] = MinMax(src2[0], src2[1], min);
4917 set_d_register(Vd, dst);
4918 break; 4827 break;
4919 }
4920 default: 4828 default:
4921 UNREACHABLE(); 4829 UNREACHABLE();
4922 break; 4830 break;
4923 } 4831 }
4924 break; 4832 break;
4925 } 4833 }
4926 case 0xd: { 4834 case 0xd: {
4927 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { 4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {
4928 // vmul.f32 Qd, Qn, Qm 4836 // vmul.f32 Qd, Qn, Qm
4929 float src1[4], src2[4]; 4837 float src1[4], src2[4];
4930 get_q_register(Vn, src1); 4838 get_neon_register(Vn, src1);
4931 get_q_register(Vm, src2); 4839 get_neon_register(Vm, src2);
4932 for (int i = 0; i < 4; i++) { 4840 for (int i = 0; i < 4; i++) {
4933 src1[i] = src1[i] * src2[i]; 4841 src1[i] = src1[i] * src2[i];
4934 } 4842 }
4935 set_q_register(Vd, src1); 4843 set_neon_register(Vd, src1);
4936 } else { 4844 } else {
4937 UNIMPLEMENTED(); 4845 UNIMPLEMENTED();
4938 } 4846 }
4939 break; 4847 break;
4940 } 4848 }
4941 case 0xe: { 4849 case 0xe: {
4942 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) { 4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {
4943 // vcge/vcgt.f32 Qd, Qm, Qn 4851 // vcge/vcgt.f32 Qd, Qm, Qn
4944 bool ge = instr->Bit(21) == 0; 4852 bool ge = instr->Bit(21) == 0;
4945 float src1[4], src2[4]; 4853 float src1[4], src2[4];
4946 get_q_register(Vn, src1); 4854 get_neon_register(Vn, src1);
4947 get_q_register(Vm, src2); 4855 get_neon_register(Vm, src2);
4948 uint32_t dst[4]; 4856 uint32_t dst[4];
4949 for (int i = 0; i < 4; i++) { 4857 for (int i = 0; i < 4; i++) {
4950 if (ge) { 4858 if (ge) {
4951 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0; 4859 dst[i] = src1[i] >= src2[i] ? 0xFFFFFFFFu : 0;
4952 } else { 4860 } else {
4953 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0; 4861 dst[i] = src1[i] > src2[i] ? 0xFFFFFFFFu : 0;
4954 } 4862 }
4955 } 4863 }
4956 set_q_register(Vd, dst); 4864 set_neon_register(Vd, dst);
4957 } else { 4865 } else {
4958 UNIMPLEMENTED(); 4866 UNIMPLEMENTED();
4959 } 4867 }
4960 break; 4868 break;
4961 } 4869 }
4962 default: 4870 default:
4963 UNREACHABLE(); 4871 UNREACHABLE();
4964 break; 4872 break;
4965 } 4873 }
4966 break; 4874 break;
(...skipping 20 matching lines...) Expand all
4987 UNIMPLEMENTED(); 4895 UNIMPLEMENTED();
4988 break; 4896 break;
4989 } 4897 }
4990 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { 4898 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
4991 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && 4899 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
4992 instr->Bit(6) == 1) { 4900 instr->Bit(6) == 1) {
4993 // vcvt.<Td>.<Tm> Qd, Qm. 4901 // vcvt.<Td>.<Tm> Qd, Qm.
4994 int Vd = instr->VFPDRegValue(kSimd128Precision); 4902 int Vd = instr->VFPDRegValue(kSimd128Precision);
4995 int Vm = instr->VFPMRegValue(kSimd128Precision); 4903 int Vm = instr->VFPMRegValue(kSimd128Precision);
4996 uint32_t q_data[4]; 4904 uint32_t q_data[4];
4997 get_q_register(Vm, q_data); 4905 get_neon_register(Vm, q_data);
4998 int op = instr->Bits(8, 7); 4906 int op = instr->Bits(8, 7);
4999 for (int i = 0; i < 4; i++) { 4907 for (int i = 0; i < 4; i++) {
5000 switch (op) { 4908 switch (op) {
5001 case 0: 4909 case 0:
5002 // f32 <- s32, round towards nearest. 4910 // f32 <- s32, round towards nearest.
5003 q_data[i] = bit_cast<uint32_t>(std::round( 4911 q_data[i] = bit_cast<uint32_t>(std::round(
5004 static_cast<float>(bit_cast<int32_t>(q_data[i])))); 4912 static_cast<float>(bit_cast<int32_t>(q_data[i]))));
5005 break; 4913 break;
5006 case 1: 4914 case 1:
5007 // f32 <- u32, round towards nearest. 4915 // f32 <- u32, round towards nearest.
5008 q_data[i] = bit_cast<uint32_t>( 4916 q_data[i] = bit_cast<uint32_t>(
5009 std::round(static_cast<float>(q_data[i]))); 4917 std::round(static_cast<float>(q_data[i])));
5010 break; 4918 break;
5011 case 2: 4919 case 2:
5012 // s32 <- f32, round to zero. 4920 // s32 <- f32, round to zero.
5013 q_data[i] = static_cast<uint32_t>( 4921 q_data[i] = static_cast<uint32_t>(
5014 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ)); 4922 ConvertDoubleToInt(bit_cast<float>(q_data[i]), false, RZ));
5015 break; 4923 break;
5016 case 3: 4924 case 3:
5017 // u32 <- f32, round to zero. 4925 // u32 <- f32, round to zero.
5018 q_data[i] = static_cast<uint32_t>( 4926 q_data[i] = static_cast<uint32_t>(
5019 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ)); 4927 ConvertDoubleToInt(bit_cast<float>(q_data[i]), true, RZ));
5020 break; 4928 break;
5021 } 4929 }
5022 } 4930 }
5023 set_q_register(Vd, q_data); 4931 set_neon_register(Vd, q_data);
5024 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) { 4932 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0) {
5025 if (instr->Bit(6) == 0) { 4933 if (instr->Bit(6) == 0) {
5026 // vswp Dd, Dm. 4934 // vswp Dd, Dm.
5027 uint64_t dval, mval; 4935 uint64_t dval, mval;
5028 int vd = instr->VFPDRegValue(kDoublePrecision); 4936 int vd = instr->VFPDRegValue(kDoublePrecision);
5029 int vm = instr->VFPMRegValue(kDoublePrecision); 4937 int vm = instr->VFPMRegValue(kDoublePrecision);
5030 get_d_register(vd, &dval); 4938 get_d_register(vd, &dval);
5031 get_d_register(vm, &mval); 4939 get_d_register(vm, &mval);
5032 set_d_register(vm, &dval); 4940 set_d_register(vm, &dval);
5033 set_d_register(vd, &mval); 4941 set_d_register(vd, &mval);
5034 } else { 4942 } else {
5035 // vswp Qd, Qm. 4943 // vswp Qd, Qm.
5036 uint32_t dval[4], mval[4]; 4944 uint32_t dval[4], mval[4];
5037 int vd = instr->VFPDRegValue(kSimd128Precision); 4945 int vd = instr->VFPDRegValue(kSimd128Precision);
5038 int vm = instr->VFPMRegValue(kSimd128Precision); 4946 int vm = instr->VFPMRegValue(kSimd128Precision);
5039 get_q_register(vd, dval); 4947 get_neon_register(vd, dval);
5040 get_q_register(vm, mval); 4948 get_neon_register(vm, mval);
5041 set_q_register(vm, dval); 4949 set_neon_register(vm, dval);
5042 set_q_register(vd, mval); 4950 set_neon_register(vd, mval);
5043 } 4951 }
5044 } else if (instr->Bits(11, 7) == 0x18) { 4952 } else if (instr->Bits(11, 7) == 0x18) {
5045 // vdup.32 Qd, Sm. 4953 // vdup.32 Qd, Sm.
5046 int vd = instr->VFPDRegValue(kSimd128Precision); 4954 int vd = instr->VFPDRegValue(kSimd128Precision);
5047 int vm = instr->VFPMRegValue(kDoublePrecision); 4955 int vm = instr->VFPMRegValue(kDoublePrecision);
5048 int index = instr->Bit(19); 4956 int index = instr->Bit(19);
5049 uint32_t s_data = get_s_register(vm * 2 + index); 4957 uint32_t s_data = get_s_register(vm * 2 + index);
5050 uint32_t q_data[4]; 4958 uint32_t q_data[4];
5051 for (int i = 0; i < 4; i++) q_data[i] = s_data; 4959 for (int i = 0; i < 4; i++) q_data[i] = s_data;
5052 set_q_register(vd, q_data); 4960 set_neon_register(vd, q_data);
5053 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) { 4961 } else if (instr->Bits(19, 16) == 0 && instr->Bits(11, 6) == 0x17) {
5054 // vmvn Qd, Qm. 4962 // vmvn Qd, Qm.
5055 int vd = instr->VFPDRegValue(kSimd128Precision); 4963 int vd = instr->VFPDRegValue(kSimd128Precision);
5056 int vm = instr->VFPMRegValue(kSimd128Precision); 4964 int vm = instr->VFPMRegValue(kSimd128Precision);
5057 uint32_t q_data[4]; 4965 uint32_t q_data[4];
5058 get_q_register(vm, q_data); 4966 get_neon_register(vm, q_data);
5059 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i]; 4967 for (int i = 0; i < 4; i++) q_data[i] = ~q_data[i];
5060 set_q_register(vd, q_data); 4968 set_neon_register(vd, q_data);
5061 } else if (instr->Bits(11, 10) == 0x2) { 4969 } else if (instr->Bits(11, 10) == 0x2) {
5062 // vtb[l,x] Dd, <list>, Dm. 4970 // vtb[l,x] Dd, <list>, Dm.
5063 int vd = instr->VFPDRegValue(kDoublePrecision); 4971 int vd = instr->VFPDRegValue(kDoublePrecision);
5064 int vn = instr->VFPNRegValue(kDoublePrecision); 4972 int vn = instr->VFPNRegValue(kDoublePrecision);
5065 int vm = instr->VFPMRegValue(kDoublePrecision); 4973 int vm = instr->VFPMRegValue(kDoublePrecision);
5066 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize; 4974 int table_len = (instr->Bits(9, 8) + 1) * kDoubleSize;
5067 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx 4975 bool vtbx = instr->Bit(6) != 0; // vtbl / vtbx
5068 uint64_t destination = 0, indices = 0, result = 0; 4976 uint64_t destination = 0, indices = 0, result = 0;
5069 get_d_register(vd, &destination); 4977 get_d_register(vd, &destination);
5070 get_d_register(vm, &indices); 4978 get_d_register(vm, &indices);
5071 for (int i = 0; i < kDoubleSize; i++) { 4979 for (int i = 0; i < kDoubleSize; i++) {
5072 int shift = i * kBitsPerByte; 4980 int shift = i * kBitsPerByte;
5073 int index = (indices >> shift) & 0xFF; 4981 int index = (indices >> shift) & 0xFF;
5074 if (index < table_len) { 4982 if (index < table_len) {
5075 uint64_t table; 4983 uint64_t table;
5076 get_d_register(vn + index / kDoubleSize, &table); 4984 get_d_register(vn + index / kDoubleSize, &table);
5077 result |= 4985 result |=
5078 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF) 4986 ((table >> ((index % kDoubleSize) * kBitsPerByte)) & 0xFF)
5079 << shift; 4987 << shift;
5080 } else if (vtbx) { 4988 } else if (vtbx) {
5081 result |= destination & (0xFFull << shift); 4989 result |= destination & (0xFFull << shift);
5082 } 4990 }
5083 } 4991 }
5084 set_d_register(vd, &result); 4992 set_d_register(vd, &result);
5085 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1 && 4993 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x1) {
5086 instr->Bit(6) == 1) {
5087 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 4994 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5088 int Vd = instr->VFPDRegValue(kSimd128Precision); 4995 if (instr->Bit(6) == 0) {
5089 int Vm = instr->VFPMRegValue(kSimd128Precision); 4996 int Vd = instr->VFPDRegValue(kDoublePrecision);
5090 if (instr->Bit(7) == 1) { 4997 int Vm = instr->VFPMRegValue(kDoublePrecision);
5091 // vzip.<size> Qd, Qm. 4998 if (instr->Bit(7) == 1) {
5092 switch (size) { 4999 // vzip.<size> Dd, Dm.
5093 case Neon8: { 5000 switch (size) {
5094 uint8_t src1[16], src2[16], dst1[16], dst2[16]; 5001 case Neon8:
5095 get_q_register(Vd, src1); 5002 Zip<uint8_t, kDoubleSize>(this, Vd, Vm);
5096 get_q_register(Vm, src2); 5003 break;
5097 for (int i = 0; i < 8; i++) { 5004 case Neon16:
5098 dst1[i * 2] = src1[i]; 5005 Zip<uint16_t, kDoubleSize>(this, Vd, Vm);
5099 dst1[i * 2 + 1] = src2[i]; 5006 break;
5100 dst2[i * 2] = src1[i + 8]; 5007 case Neon32:
5101 dst2[i * 2 + 1] = src2[i + 8]; 5008 UNIMPLEMENTED();
5102 } 5009 break;
5103 set_q_register(Vd, dst1); 5010 default:
5104 set_q_register(Vm, dst2); 5011 UNREACHABLE();
5105 break; 5012 break;
5106 } 5013 }
5107 case Neon16: { 5014 } else {
5108 uint16_t src1[8], src2[8], dst1[8], dst2[8]; 5015 // vuzp.<size> Dd, Dm.
5109 get_q_register(Vd, src1); 5016 switch (size) {
5110 get_q_register(Vm, src2); 5017 case Neon8:
5111 for (int i = 0; i < 4; i++) { 5018 Unzip<uint8_t, kDoubleSize>(this, Vd, Vm);
5112 dst1[i * 2] = src1[i]; 5019 break;
5113 dst1[i * 2 + 1] = src2[i]; 5020 case Neon16:
5114 dst2[i * 2] = src1[i + 4]; 5021 Unzip<uint16_t, kDoubleSize>(this, Vd, Vm);
5115 dst2[i * 2 + 1] = src2[i + 4]; 5022 break;
5116 } 5023 case Neon32:
5117 set_q_register(Vd, dst1); 5024 UNIMPLEMENTED();
5118 set_q_register(Vm, dst2); 5025 break;
5119 break; 5026 default:
5027 UNREACHABLE();
5028 break;
5120 } 5029 }
5121 case Neon32: {
5122 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5123 get_q_register(Vd, src1);
5124 get_q_register(Vm, src2);
5125 for (int i = 0; i < 2; i++) {
5126 dst1[i * 2] = src1[i];
5127 dst1[i * 2 + 1] = src2[i];
5128 dst2[i * 2] = src1[i + 2];
5129 dst2[i * 2 + 1] = src2[i + 2];
5130 }
5131 set_q_register(Vd, dst1);
5132 set_q_register(Vm, dst2);
5133 break;
5134 }
5135 default:
5136 UNREACHABLE();
5137 break;
5138 } 5030 }
5139 } else { 5031 } else {
5140 // vuzp.<size> Qd, Qm. 5032 int Vd = instr->VFPDRegValue(kSimd128Precision);
5141 switch (size) { 5033 int Vm = instr->VFPMRegValue(kSimd128Precision);
5142 case Neon8: { 5034 if (instr->Bit(7) == 1) {
5143 uint8_t src1[16], src2[16], dst1[16], dst2[16]; 5035 // vzip.<size> Qd, Qm.
5144 get_q_register(Vd, src1); 5036 switch (size) {
5145 get_q_register(Vm, src2); 5037 case Neon8:
5146 for (int i = 0; i < 8; i++) { 5038 Zip<uint8_t, kSimd128Size>(this, Vd, Vm);
5147 dst1[i] = src1[i * 2]; 5039 break;
5148 dst1[i + 8] = src2[i * 2]; 5040 case Neon16:
5149 dst2[i] = src1[i * 2 + 1]; 5041 Zip<uint16_t, kSimd128Size>(this, Vd, Vm);
5150 dst2[i + 8] = src2[i * 2 + 1]; 5042 break;
5151 } 5043 case Neon32:
5152 set_q_register(Vd, dst1); 5044 Zip<uint32_t, kSimd128Size>(this, Vd, Vm);
5153 set_q_register(Vm, dst2); 5045 break;
5154 break; 5046 default:
5047 UNREACHABLE();
5048 break;
5155 } 5049 }
5156 case Neon16: { 5050 } else {
5157 uint16_t src1[8], src2[8], dst1[8], dst2[8]; 5051 // vuzp.<size> Qd, Qm.
5158 get_q_register(Vd, src1); 5052 switch (size) {
5159 get_q_register(Vm, src2); 5053 case Neon8:
5160 for (int i = 0; i < 4; i++) { 5054 Unzip<uint8_t, kSimd128Size>(this, Vd, Vm);
5161 dst1[i] = src1[i * 2]; 5055 break;
5162 dst1[i + 4] = src2[i * 2]; 5056 case Neon16:
5163 dst2[i] = src1[i * 2 + 1]; 5057 Unzip<uint16_t, kSimd128Size>(this, Vd, Vm);
5164 dst2[i + 4] = src2[i * 2 + 1]; 5058 break;
5165 } 5059 case Neon32:
5166 set_q_register(Vd, dst1); 5060 Unzip<uint32_t, kSimd128Size>(this, Vd, Vm);
5167 set_q_register(Vm, dst2); 5061 break;
5168 break; 5062 default:
5063 UNREACHABLE();
5064 break;
5169 } 5065 }
5170 case Neon32: {
5171 uint32_t src1[4], src2[4], dst1[4], dst2[4];
5172 get_q_register(Vd, src1);
5173 get_q_register(Vm, src2);
5174 for (int i = 0; i < 2; i++) {
5175 dst1[i] = src1[i * 2];
5176 dst1[i + 2] = src2[i * 2];
5177 dst2[i] = src1[i * 2 + 1];
5178 dst2[i + 2] = src2[i * 2 + 1];
5179 }
5180 set_q_register(Vd, dst1);
5181 set_q_register(Vm, dst2);
5182 break;
5183 }
5184 default:
5185 UNREACHABLE();
5186 break;
5187 } 5066 }
5188 } 5067 }
5189 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) { 5068 } else if (instr->Bits(17, 16) == 0 && instr->Bits(11, 9) == 0) {
5190 // vrev<op>.size Qd, Qm 5069 // vrev<op>.size Qd, Qm
5191 int Vd = instr->VFPDRegValue(kSimd128Precision); 5070 int Vd = instr->VFPDRegValue(kSimd128Precision);
5192 int Vm = instr->VFPMRegValue(kSimd128Precision); 5071 int Vm = instr->VFPMRegValue(kSimd128Precision);
5193 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5072 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5194 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) - 5073 NeonSize op = static_cast<NeonSize>(static_cast<int>(Neon64) -
5195 instr->Bits(8, 7)); 5074 instr->Bits(8, 7));
5196 switch (op) { 5075 switch (op) {
5197 case Neon16: { 5076 case Neon16: {
5198 DCHECK_EQ(Neon8, size); 5077 DCHECK_EQ(Neon8, size);
5199 uint8_t src[16]; 5078 uint8_t src[16];
5200 get_q_register(Vm, src); 5079 get_neon_register(Vm, src);
5201 for (int i = 0; i < 16; i += 2) { 5080 for (int i = 0; i < 16; i += 2) {
5202 std::swap(src[i], src[i + 1]); 5081 std::swap(src[i], src[i + 1]);
5203 } 5082 }
5204 set_q_register(Vd, src); 5083 set_neon_register(Vd, src);
5205 break; 5084 break;
5206 } 5085 }
5207 case Neon32: { 5086 case Neon32: {
5208 switch (size) { 5087 switch (size) {
5209 case Neon16: { 5088 case Neon16: {
5210 uint16_t src[8]; 5089 uint16_t src[8];
5211 get_q_register(Vm, src); 5090 get_neon_register(Vm, src);
5212 for (int i = 0; i < 8; i += 2) { 5091 for (int i = 0; i < 8; i += 2) {
5213 std::swap(src[i], src[i + 1]); 5092 std::swap(src[i], src[i + 1]);
5214 } 5093 }
5215 set_q_register(Vd, src); 5094 set_neon_register(Vd, src);
5216 break; 5095 break;
5217 } 5096 }
5218 case Neon8: { 5097 case Neon8: {
5219 uint8_t src[16]; 5098 uint8_t src[16];
5220 get_q_register(Vm, src); 5099 get_neon_register(Vm, src);
5221 for (int i = 0; i < 4; i++) { 5100 for (int i = 0; i < 4; i++) {
5222 std::swap(src[i * 4], src[i * 4 + 3]); 5101 std::swap(src[i * 4], src[i * 4 + 3]);
5223 std::swap(src[i * 4 + 1], src[i * 4 + 2]); 5102 std::swap(src[i * 4 + 1], src[i * 4 + 2]);
5224 } 5103 }
5225 set_q_register(Vd, src); 5104 set_neon_register(Vd, src);
5226 break; 5105 break;
5227 } 5106 }
5228 default: 5107 default:
5229 UNREACHABLE(); 5108 UNREACHABLE();
5230 break; 5109 break;
5231 } 5110 }
5232 break; 5111 break;
5233 } 5112 }
5234 case Neon64: { 5113 case Neon64: {
5235 switch (size) { 5114 switch (size) {
5236 case Neon32: { 5115 case Neon32: {
5237 uint32_t src[4]; 5116 uint32_t src[4];
5238 get_q_register(Vm, src); 5117 get_neon_register(Vm, src);
5239 std::swap(src[0], src[1]); 5118 std::swap(src[0], src[1]);
5240 std::swap(src[2], src[3]); 5119 std::swap(src[2], src[3]);
5241 set_q_register(Vd, src); 5120 set_neon_register(Vd, src);
5242 break; 5121 break;
5243 } 5122 }
5244 case Neon16: { 5123 case Neon16: {
5245 uint16_t src[8]; 5124 uint16_t src[8];
5246 get_q_register(Vm, src); 5125 get_neon_register(Vm, src);
5247 for (int i = 0; i < 4; i++) { 5126 for (int i = 0; i < 4; i++) {
5248 std::swap(src[i * 4], src[i * 4 + 3]); 5127 std::swap(src[i * 4], src[i * 4 + 3]);
5249 std::swap(src[i * 4 + 1], src[i * 4 + 2]); 5128 std::swap(src[i * 4 + 1], src[i * 4 + 2]);
5250 } 5129 }
5251 set_q_register(Vd, src); 5130 set_neon_register(Vd, src);
5252 break; 5131 break;
5253 } 5132 }
5254 case Neon8: { 5133 case Neon8: {
5255 uint8_t src[16]; 5134 uint8_t src[16];
5256 get_q_register(Vm, src); 5135 get_neon_register(Vm, src);
5257 for (int i = 0; i < 4; i++) { 5136 for (int i = 0; i < 4; i++) {
5258 std::swap(src[i], src[7 - i]); 5137 std::swap(src[i], src[7 - i]);
5259 std::swap(src[i + 8], src[15 - i]); 5138 std::swap(src[i + 8], src[15 - i]);
5260 } 5139 }
5261 set_q_register(Vd, src); 5140 set_neon_register(Vd, src);
5262 break; 5141 break;
5263 } 5142 }
5264 default: 5143 default:
5265 UNREACHABLE(); 5144 UNREACHABLE();
5266 break; 5145 break;
5267 } 5146 }
5268 break; 5147 break;
5269 } 5148 }
5270 default: 5149 default:
5271 UNREACHABLE(); 5150 UNREACHABLE();
5272 break; 5151 break;
5273 } 5152 }
5274 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 6) == 0x3) { 5153 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 7) == 0x1) {
5275 int Vd = instr->VFPDRegValue(kSimd128Precision);
5276 int Vm = instr->VFPMRegValue(kSimd128Precision);
5277 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5154 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5278 // vtrn.<size> Qd, Qm. 5155 if (instr->Bit(6) == 0) {
5279 switch (size) { 5156 int Vd = instr->VFPDRegValue(kDoublePrecision);
5280 case Neon8: { 5157 int Vm = instr->VFPMRegValue(kDoublePrecision);
5281 uint8_t src[16], dst[16]; 5158 // vtrn.<size> Dd, Dm.
5282 get_q_register(Vd, dst); 5159 switch (size) {
5283 get_q_register(Vm, src); 5160 case Neon8:
5284 for (int i = 0; i < 8; i++) { 5161 Transpose<uint8_t, kDoubleSize>(this, Vd, Vm);
5285 std::swap(dst[2 * i + 1], src[2 * i]); 5162 break;
5286 } 5163 case Neon16:
5287 set_q_register(Vd, dst); 5164 Transpose<uint16_t, kDoubleSize>(this, Vd, Vm);
5288 set_q_register(Vm, src); 5165 break;
5289 break; 5166 case Neon32:
5167 Transpose<uint32_t, kDoubleSize>(this, Vd, Vm);
5168 break;
5169 default:
5170 UNREACHABLE();
5171 break;
5290 } 5172 }
5291 case Neon16: { 5173 } else {
5292 uint16_t src[8], dst[8]; 5174 int Vd = instr->VFPDRegValue(kSimd128Precision);
5293 get_q_register(Vd, dst); 5175 int Vm = instr->VFPMRegValue(kSimd128Precision);
5294 get_q_register(Vm, src); 5176 // vtrn.<size> Qd, Qm.
5295 for (int i = 0; i < 4; i++) { 5177 switch (size) {
5296 std::swap(dst[2 * i + 1], src[2 * i]); 5178 case Neon8:
5297 } 5179 Transpose<uint8_t, kSimd128Size>(this, Vd, Vm);
5298 set_q_register(Vd, dst); 5180 break;
5299 set_q_register(Vm, src); 5181 case Neon16:
5300 break; 5182 Transpose<uint16_t, kSimd128Size>(this, Vd, Vm);
5183 break;
5184 case Neon32:
5185 Transpose<uint32_t, kSimd128Size>(this, Vd, Vm);
5186 break;
5187 default:
5188 UNREACHABLE();
5189 break;
5301 } 5190 }
5302 case Neon32: {
5303 uint32_t src[4], dst[4];
5304 get_q_register(Vd, dst);
5305 get_q_register(Vm, src);
5306 for (int i = 0; i < 2; i++) {
5307 std::swap(dst[2 * i + 1], src[2 * i]);
5308 }
5309 set_q_register(Vd, dst);
5310 set_q_register(Vm, src);
5311 break;
5312 }
5313 default:
5314 UNREACHABLE();
5315 break;
5316 } 5191 }
5317 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) { 5192 } else if (instr->Bits(17, 16) == 0x1 && instr->Bit(11) == 0) {
5318 int Vd = instr->VFPDRegValue(kSimd128Precision); 5193 int Vd = instr->VFPDRegValue(kSimd128Precision);
5319 int Vm = instr->VFPMRegValue(kSimd128Precision); 5194 int Vm = instr->VFPMRegValue(kSimd128Precision);
5320 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5195 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5321 if (instr->Bits(9, 6) == 0xd) { 5196 if (instr->Bits(9, 6) == 0xd) {
5322 // vabs<type>.<size> Qd, Qm 5197 // vabs<type>.<size> Qd, Qm
5323 if (instr->Bit(10) != 0) { 5198 if (instr->Bit(10) != 0) {
5324 // floating point (clear sign bits) 5199 // floating point (clear sign bits)
5325 uint32_t src[4]; 5200 uint32_t src[4];
5326 get_q_register(Vm, src); 5201 get_neon_register(Vm, src);
5327 for (int i = 0; i < 4; i++) { 5202 for (int i = 0; i < 4; i++) {
5328 src[i] &= ~0x80000000; 5203 src[i] &= ~0x80000000;
5329 } 5204 }
5330 set_q_register(Vd, src); 5205 set_neon_register(Vd, src);
5331 } else { 5206 } else {
5332 // signed integer 5207 // signed integer
5333 switch (size) { 5208 switch (size) {
5334 case Neon8: { 5209 case Neon8:
5335 int8_t src[16]; 5210 Abs<int8_t, kSimd128Size>(this, Vd, Vm);
5336 get_q_register(Vm, src);
5337 for (int i = 0; i < 16; i++) {
5338 src[i] = std::abs(src[i]);
5339 }
5340 set_q_register(Vd, src);
5341 break; 5211 break;
5342 } 5212 case Neon16:
5343 case Neon16: { 5213 Abs<int16_t, kSimd128Size>(this, Vd, Vm);
5344 int16_t src[8];
5345 get_q_register(Vm, src);
5346 for (int i = 0; i < 8; i++) {
5347 src[i] = std::abs(src[i]);
5348 }
5349 set_q_register(Vd, src);
5350 break; 5214 break;
5351 } 5215 case Neon32:
5352 case Neon32: { 5216 Abs<int32_t, kSimd128Size>(this, Vd, Vm);
5353 int32_t src[4];
5354 get_q_register(Vm, src);
5355 for (int i = 0; i < 4; i++) {
5356 src[i] = std::abs(src[i]);
5357 }
5358 set_q_register(Vd, src);
5359 break; 5217 break;
5360 }
5361 default: 5218 default:
5362 UNIMPLEMENTED(); 5219 UNIMPLEMENTED();
5363 break; 5220 break;
5364 } 5221 }
5365 } 5222 }
5366 } else if (instr->Bits(9, 6) == 0xf) { 5223 } else if (instr->Bits(9, 6) == 0xf) {
5367 // vneg<type>.<size> Qd, Qm (signed integer) 5224 // vneg<type>.<size> Qd, Qm (signed integer)
5368 if (instr->Bit(10) != 0) { 5225 if (instr->Bit(10) != 0) {
5369 // floating point (toggle sign bits) 5226 // floating point (toggle sign bits)
5370 uint32_t src[4]; 5227 uint32_t src[4];
5371 get_q_register(Vm, src); 5228 get_neon_register(Vm, src);
5372 for (int i = 0; i < 4; i++) { 5229 for (int i = 0; i < 4; i++) {
5373 src[i] ^= 0x80000000; 5230 src[i] ^= 0x80000000;
5374 } 5231 }
5375 set_q_register(Vd, src); 5232 set_neon_register(Vd, src);
5376 } else { 5233 } else {
5377 // signed integer 5234 // signed integer
5378 switch (size) { 5235 switch (size) {
5379 case Neon8: { 5236 case Neon8:
5380 int8_t src[16]; 5237 Neg<int8_t, kSimd128Size>(this, Vd, Vm);
5381 get_q_register(Vm, src);
5382 for (int i = 0; i < 16; i++) {
5383 src[i] = -src[i];
5384 }
5385 set_q_register(Vd, src);
5386 break; 5238 break;
5387 }
5388 case Neon16: 5239 case Neon16:
5389 int16_t src[8]; 5240 Neg<int16_t, kSimd128Size>(this, Vd, Vm);
5390 get_q_register(Vm, src);
5391 for (int i = 0; i < 8; i++) {
5392 src[i] = -src[i];
5393 }
5394 set_q_register(Vd, src);
5395 break; 5241 break;
5396 case Neon32: { 5242 case Neon32:
5397 int32_t src[4]; 5243 Neg<int32_t, kSimd128Size>(this, Vd, Vm);
5398 get_q_register(Vm, src);
5399 for (int i = 0; i < 4; i++) {
5400 src[i] = -src[i];
5401 }
5402 set_q_register(Vd, src);
5403 break; 5244 break;
5404 }
5405 default: 5245 default:
5406 UNIMPLEMENTED(); 5246 UNIMPLEMENTED();
5407 break; 5247 break;
5408 } 5248 }
5409 } 5249 }
5410 } else { 5250 } else {
5411 UNIMPLEMENTED(); 5251 UNIMPLEMENTED();
5412 } 5252 }
5413 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) { 5253 } else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) {
5414 // vrecpe/vrsqrte.f32 Qd, Qm. 5254 // vrecpe/vrsqrte.f32 Qd, Qm.
5415 int Vd = instr->VFPDRegValue(kSimd128Precision); 5255 int Vd = instr->VFPDRegValue(kSimd128Precision);
5416 int Vm = instr->VFPMRegValue(kSimd128Precision); 5256 int Vm = instr->VFPMRegValue(kSimd128Precision);
5417 uint32_t src[4]; 5257 uint32_t src[4];
5418 get_q_register(Vm, src); 5258 get_neon_register(Vm, src);
5419 if (instr->Bit(7) == 0) { 5259 if (instr->Bit(7) == 0) {
5420 for (int i = 0; i < 4; i++) { 5260 for (int i = 0; i < 4; i++) {
5421 float denom = bit_cast<float>(src[i]); 5261 float denom = bit_cast<float>(src[i]);
5422 div_zero_vfp_flag_ = (denom == 0); 5262 div_zero_vfp_flag_ = (denom == 0);
5423 float result = 1.0f / denom; 5263 float result = 1.0f / denom;
5424 result = canonicalizeNaN(result); 5264 result = canonicalizeNaN(result);
5425 src[i] = bit_cast<uint32_t>(result); 5265 src[i] = bit_cast<uint32_t>(result);
5426 } 5266 }
5427 } else { 5267 } else {
5428 lazily_initialize_fast_sqrt(isolate_); 5268 lazily_initialize_fast_sqrt(isolate_);
5429 for (int i = 0; i < 4; i++) { 5269 for (int i = 0; i < 4; i++) {
5430 float radicand = bit_cast<float>(src[i]); 5270 float radicand = bit_cast<float>(src[i]);
5431 float result = 1.0f / fast_sqrt(radicand, isolate_); 5271 float result = 1.0f / fast_sqrt(radicand, isolate_);
5432 result = canonicalizeNaN(result); 5272 result = canonicalizeNaN(result);
5433 src[i] = bit_cast<uint32_t>(result); 5273 src[i] = bit_cast<uint32_t>(result);
5434 } 5274 }
5435 } 5275 }
5436 set_q_register(Vd, src); 5276 set_neon_register(Vd, src);
5437 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 && 5277 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
5438 instr->Bits(7, 6) != 0) { 5278 instr->Bits(7, 6) != 0) {
5439 // vqmovn.<type><size> Dd, Qm. 5279 // vqmovn.<type><size> Dd, Qm.
5440 int Vd = instr->VFPDRegValue(kDoublePrecision); 5280 int Vd = instr->VFPDRegValue(kDoublePrecision);
5441 int Vm = instr->VFPMRegValue(kSimd128Precision); 5281 int Vm = instr->VFPMRegValue(kSimd128Precision);
5442 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18)); 5282 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5443 bool is_unsigned = instr->Bit(6) != 0; 5283 bool is_unsigned = instr->Bit(6) != 0;
5444 switch (size) { 5284 switch (size) {
5445 case Neon8: { 5285 case Neon8: {
5446 if (is_unsigned) { 5286 if (is_unsigned) {
(...skipping 27 matching lines...) Expand all
5474 UNIMPLEMENTED(); 5314 UNIMPLEMENTED();
5475 } 5315 }
5476 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { 5316 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
5477 // vshr.u<size> Qd, Qm, shift 5317 // vshr.u<size> Qd, Qm, shift
5478 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 5318 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
5479 int shift = 2 * size - instr->Bits(21, 16); 5319 int shift = 2 * size - instr->Bits(21, 16);
5480 int Vd = instr->VFPDRegValue(kSimd128Precision); 5320 int Vd = instr->VFPDRegValue(kSimd128Precision);
5481 int Vm = instr->VFPMRegValue(kSimd128Precision); 5321 int Vm = instr->VFPMRegValue(kSimd128Precision);
5482 NeonSize ns = static_cast<NeonSize>(size / 16); 5322 NeonSize ns = static_cast<NeonSize>(size / 16);
5483 switch (ns) { 5323 switch (ns) {
5484 case Neon8: { 5324 case Neon8:
5485 uint8_t src[16]; 5325 ShiftRight<uint8_t, kSimd128Size>(this, Vd, Vm, shift);
5486 get_q_register(Vm, src);
5487 for (int i = 0; i < 16; i++) {
5488 src[i] >>= shift;
5489 }
5490 set_q_register(Vd, src);
5491 break; 5326 break;
5492 } 5327 case Neon16:
5493 case Neon16: { 5328 ShiftRight<uint16_t, kSimd128Size>(this, Vd, Vm, shift);
5494 uint16_t src[8];
5495 get_q_register(Vm, src);
5496 for (int i = 0; i < 8; i++) {
5497 src[i] >>= shift;
5498 }
5499 set_q_register(Vd, src);
5500 break; 5329 break;
5501 } 5330 case Neon32:
5502 case Neon32: { 5331 ShiftRight<uint32_t, kSimd128Size>(this, Vd, Vm, shift);
5503 uint32_t src[4];
5504 get_q_register(Vm, src);
5505 for (int i = 0; i < 4; i++) {
5506 src[i] >>= shift;
5507 }
5508 set_q_register(Vd, src);
5509 break; 5332 break;
5510 }
5511 default: 5333 default:
5512 UNREACHABLE(); 5334 UNREACHABLE();
5513 break; 5335 break;
5514 } 5336 }
5515 } else { 5337 } else {
5516 UNIMPLEMENTED(); 5338 UNIMPLEMENTED();
5517 } 5339 }
5518 break; 5340 break;
5519 case 8: 5341 case 8:
5520 if (instr->Bits(21, 20) == 0) { 5342 if (instr->Bits(21, 20) == 0) {
(...skipping 701 matching lines...) Expand 10 before | Expand all | Expand 10 after
6222 processor->prev_ = nullptr; 6044 processor->prev_ = nullptr;
6223 processor->next_ = nullptr; 6045 processor->next_ = nullptr;
6224 } 6046 }
6225 6047
6226 } // namespace internal 6048 } // namespace internal
6227 } // namespace v8 6049 } // namespace v8
6228 6050
6229 #endif // USE_SIMULATOR 6051 #endif // USE_SIMULATOR
6230 6052
6231 #endif // V8_TARGET_ARCH_ARM 6053 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/simulator-arm.h ('k') | test/cctest/test-assembler-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698