Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(294)

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2773303002: [ARM] Implement widening and narrowing integer moves, vmovl, vqmovn. (Closed)
Patch Set: Add some disassembler tests for vmovl. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <stdarg.h> 5 #include <stdarg.h>
6 #include <stdlib.h> 6 #include <stdlib.h>
7 #include <cmath> 7 #include <cmath>
8 8
9 #if V8_TARGET_ARCH_ARM 9 #if V8_TARGET_ARCH_ARM
10 10
(...skipping 3978 matching lines...) Expand 10 before | Expand all | Expand 10 after
3989 default: 3989 default:
3990 UNIMPLEMENTED(); // Not used by V8. 3990 UNIMPLEMENTED(); // Not used by V8.
3991 } 3991 }
3992 } else { 3992 } else {
3993 UNIMPLEMENTED(); // Not used by V8. 3993 UNIMPLEMENTED(); // Not used by V8.
3994 } 3994 }
3995 } 3995 }
3996 3996
3997 // Templated operations for NEON instructions. 3997 // Templated operations for NEON instructions.
3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition. 3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.
3999 template <typename T> 3999 template <typename T, typename U>
4000 int64_t Widen(T value) { 4000 U Widen(T value) {
4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4002 return static_cast<int64_t>(value); 4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U");
4003 return static_cast<U>(value);
4004 }
4005
4006 template <typename T, typename U>
4007 U Narrow(T value) {
4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");
4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U");
4010 return static_cast<U>(value);
martyn.capewell 2017/03/27 13:19:43 This is potentially unsafe for signed types if the
bbudge 2017/03/27 17:48:02 Done. Also added checks that value can be expresse
4003 } 4011 }
4004 4012
4005 template <typename T> 4013 template <typename T>
4006 T Clamp(int64_t value) { 4014 T Clamp(int64_t value) {
4007 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller"); 4015 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");
4008 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min()); 4016 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());
4009 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max()); 4017 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());
4010 int64_t clamped = std::max(min, std::min(max, value)); 4018 int64_t clamped = std::max(min, std::min(max, value));
4011 return static_cast<T>(clamped); 4019 return static_cast<T>(clamped);
4012 } 4020 }
4013 4021
4014 template <typename T> 4022 template <typename T>
4015 T MinMax(T a, T b, bool is_min) { 4023 T MinMax(T a, T b, bool is_min) {
4016 return is_min ? std::min(a, b) : std::max(a, b); 4024 return is_min ? std::min(a, b) : std::max(a, b);
4017 } 4025 }
4018 4026
4027 template <typename T, typename U>
4028 void Widen(Simulator* simulator, int Vd, int Vm) {
4029 static const int kLanes = 8 / sizeof(T);
4030 T src[kLanes];
4031 U dst[kLanes];
4032 simulator->get_d_register(Vm, src);
4033 for (int i = 0; i < kLanes; i++) {
4034 dst[i] = Widen<T, U>(src[i]);
4035 }
4036 simulator->set_q_register(Vd, dst);
4037 }
4038
4039 template <typename T, typename U>
4040 void Narrow(Simulator* simulator, int Vd, int Vm) {
martyn.capewell 2017/03/27 13:19:43 This may be better named SaturatingNarrow(). If yo
bbudge 2017/03/27 17:48:02 Done.
4041 static const int kLanes = 16 / sizeof(T);
4042 T src[kLanes];
4043 U dst[kLanes];
4044 simulator->get_q_register(Vm, src);
4045 for (int i = 0; i < kLanes; i++) {
4046 dst[i] = Narrow<T, U>(Clamp<U>(src[i]));
4047 }
4048 simulator->set_d_register(Vd, dst);
4049 }
4050
4019 template <typename T> 4051 template <typename T>
4020 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4052 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4021 static const int kLanes = 16 / sizeof(T); 4053 static const int kLanes = 16 / sizeof(T);
4022 T src1[kLanes], src2[kLanes]; 4054 T src1[kLanes], src2[kLanes];
4023 simulator->get_q_register(Vn, src1); 4055 simulator->get_q_register(Vn, src1);
4024 simulator->get_q_register(Vm, src2); 4056 simulator->get_q_register(Vm, src2);
4025 for (int i = 0; i < kLanes; i++) { 4057 for (int i = 0; i < kLanes; i++) {
4026 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i])); 4058 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));
4027 } 4059 }
4028 simulator->set_q_register(Vd, src1); 4060 simulator->set_q_register(Vd, src1);
4029 } 4061 }
4030 4062
4031 template <typename T> 4063 template <typename T>
4032 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) { 4064 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {
4033 static const int kLanes = 16 / sizeof(T); 4065 static const int kLanes = 16 / sizeof(T);
4034 T src1[kLanes], src2[kLanes]; 4066 T src1[kLanes], src2[kLanes];
4035 simulator->get_q_register(Vn, src1); 4067 simulator->get_q_register(Vn, src1);
4036 simulator->get_q_register(Vm, src2); 4068 simulator->get_q_register(Vm, src2);
4037 for (int i = 0; i < kLanes; i++) { 4069 for (int i = 0; i < kLanes; i++) {
4038 src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i])); 4070 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));
4039 } 4071 }
4040 simulator->set_q_register(Vd, src1); 4072 simulator->set_q_register(Vd, src1);
4041 } 4073 }
4042 4074
4043 void Simulator::DecodeSpecialCondition(Instruction* instr) { 4075 void Simulator::DecodeSpecialCondition(Instruction* instr) {
4044 switch (instr->SpecialValue()) { 4076 switch (instr->SpecialValue()) {
4045 case 4: { 4077 case 4: {
4046 int Vd, Vm, Vn; 4078 int Vd, Vm, Vn;
4047 if (instr->Bit(6) == 0) { 4079 if (instr->Bit(6) == 0) {
4048 Vd = instr->VFPDRegValue(kDoublePrecision); 4080 Vd = instr->VFPDRegValue(kDoublePrecision);
(...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after
4457 UNIMPLEMENTED(); 4489 UNIMPLEMENTED();
4458 break; 4490 break;
4459 } 4491 }
4460 break; 4492 break;
4461 } 4493 }
4462 case 5: 4494 case 5:
4463 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 4495 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
4464 (instr->Bit(4) == 1)) { 4496 (instr->Bit(4) == 1)) {
4465 // vmovl signed 4497 // vmovl signed
4466 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 4498 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
4467 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 4499 int Vd = instr->VFPDRegValue(kSimd128Precision);
4468 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 4500 int Vm = instr->VFPMRegValue(kDoublePrecision);
4469 int imm3 = instr->Bits(21, 19); 4501 int imm3 = instr->Bits(21, 19);
4470 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 4502 switch (imm3) {
4471 int esize = 8 * imm3; 4503 case 1:
4472 int elements = 64 / esize; 4504 Widen<int8_t, int16_t>(this, Vd, Vm);
4473 int8_t from[8]; 4505 break;
4474 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 4506 case 2:
4475 int16_t to[8]; 4507 Widen<int16_t, int32_t>(this, Vd, Vm);
4476 int e = 0; 4508 break;
4477 while (e < elements) { 4509 case 4:
4478 to[e] = from[e]; 4510 Widen<int32_t, int64_t>(this, Vd, Vm);
4479 e++; 4511 break;
4512 default:
4513 UNIMPLEMENTED();
4514 break;
4480 } 4515 }
4481 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
4482 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) { 4516 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {
4483 // vext. 4517 // vext.
4484 int imm4 = instr->Bits(11, 8); 4518 int imm4 = instr->Bits(11, 8);
4485 int Vd = instr->VFPDRegValue(kSimd128Precision); 4519 int Vd = instr->VFPDRegValue(kSimd128Precision);
4486 int Vm = instr->VFPMRegValue(kSimd128Precision); 4520 int Vm = instr->VFPMRegValue(kSimd128Precision);
4487 int Vn = instr->VFPNRegValue(kSimd128Precision); 4521 int Vn = instr->VFPNRegValue(kSimd128Precision);
4488 uint8_t src1[16], src2[16], dst[16]; 4522 uint8_t src1[16], src2[16], dst[16];
4489 get_q_register(Vn, src1); 4523 get_q_register(Vn, src1);
4490 get_q_register(Vm, src2); 4524 get_q_register(Vm, src2);
4491 int boundary = kSimd128Size - imm4; 4525 int boundary = kSimd128Size - imm4;
(...skipping 431 matching lines...) Expand 10 before | Expand all | Expand 10 after
4923 UNREACHABLE(); 4957 UNREACHABLE();
4924 break; 4958 break;
4925 } 4959 }
4926 break; 4960 break;
4927 } 4961 }
4928 case 7: 4962 case 7:
4929 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) && 4963 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&
4930 (instr->Bit(4) == 1)) { 4964 (instr->Bit(4) == 1)) {
4931 // vmovl unsigned 4965 // vmovl unsigned
4932 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED(); 4966 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();
4933 int Vd = (instr->Bit(22) << 3) | (instr->VdValue() >> 1); 4967 int Vd = instr->VFPDRegValue(kSimd128Precision);
4934 int Vm = (instr->Bit(5) << 4) | instr->VmValue(); 4968 int Vm = instr->VFPMRegValue(kDoublePrecision);
4935 int imm3 = instr->Bits(21, 19); 4969 int imm3 = instr->Bits(21, 19);
4936 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED(); 4970 switch (imm3) {
4937 int esize = 8 * imm3; 4971 case 1:
4938 int elements = 64 / esize; 4972 Widen<uint8_t, uint16_t>(this, Vd, Vm);
4939 uint8_t from[8]; 4973 break;
4940 get_d_register(Vm, reinterpret_cast<uint64_t*>(from)); 4974 case 2:
4941 uint16_t to[8]; 4975 Widen<uint16_t, uint32_t>(this, Vd, Vm);
4942 int e = 0; 4976 break;
4943 while (e < elements) { 4977 case 4:
4944 to[e] = from[e]; 4978 Widen<uint32_t, uint64_t>(this, Vd, Vm);
4945 e++; 4979 break;
4980 default:
4981 UNIMPLEMENTED();
4982 break;
4946 } 4983 }
4947 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));
4948 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) { 4984 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {
4949 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 && 4985 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&
4950 instr->Bit(6) == 1) { 4986 instr->Bit(6) == 1) {
4951 // vcvt.<Td>.<Tm> Qd, Qm. 4987 // vcvt.<Td>.<Tm> Qd, Qm.
4952 int Vd = instr->VFPDRegValue(kSimd128Precision); 4988 int Vd = instr->VFPDRegValue(kSimd128Precision);
4953 int Vm = instr->VFPMRegValue(kSimd128Precision); 4989 int Vm = instr->VFPMRegValue(kSimd128Precision);
4954 uint32_t q_data[4]; 4990 uint32_t q_data[4];
4955 get_q_register(Vm, q_data); 4991 get_q_register(Vm, q_data);
4956 int op = instr->Bits(8, 7); 4992 int op = instr->Bits(8, 7);
4957 for (int i = 0; i < 4; i++) { 4993 for (int i = 0; i < 4; i++) {
(...skipping 427 matching lines...) Expand 10 before | Expand all | Expand 10 after
5385 } else { 5421 } else {
5386 lazily_initialize_fast_sqrt(isolate_); 5422 lazily_initialize_fast_sqrt(isolate_);
5387 for (int i = 0; i < 4; i++) { 5423 for (int i = 0; i < 4; i++) {
5388 float radicand = bit_cast<float>(src[i]); 5424 float radicand = bit_cast<float>(src[i]);
5389 float result = 1.0f / fast_sqrt(radicand, isolate_); 5425 float result = 1.0f / fast_sqrt(radicand, isolate_);
5390 result = canonicalizeNaN(result); 5426 result = canonicalizeNaN(result);
5391 src[i] = bit_cast<uint32_t>(result); 5427 src[i] = bit_cast<uint32_t>(result);
5392 } 5428 }
5393 } 5429 }
5394 set_q_register(Vd, src); 5430 set_q_register(Vd, src);
5431 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&
5432 instr->Bits(7, 6) != 0) {
5433 // vqmovn.<type><size> Dd, Qm.
5434 int Vd = instr->VFPDRegValue(kDoublePrecision);
5435 int Vm = instr->VFPMRegValue(kSimd128Precision);
5436 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));
5437 bool is_unsigned = instr->Bit(6) != 0;
5438 switch (size) {
5439 case Neon8: {
5440 if (is_unsigned) {
5441 Narrow<uint16_t, uint8_t>(this, Vd, Vm);
5442 } else {
5443 Narrow<int16_t, int8_t>(this, Vd, Vm);
5444 }
5445 break;
5446 }
5447 case Neon16: {
5448 if (is_unsigned) {
5449 Narrow<uint32_t, uint16_t>(this, Vd, Vm);
5450 } else {
5451 Narrow<int32_t, int16_t>(this, Vd, Vm);
5452 }
5453 break;
5454 }
5455 case Neon32: {
5456 if (is_unsigned) {
5457 Narrow<uint64_t, uint32_t>(this, Vd, Vm);
5458 } else {
5459 Narrow<int64_t, int32_t>(this, Vd, Vm);
5460 }
5461 break;
5462 }
5463 default:
5464 UNIMPLEMENTED();
5465 break;
5466 }
5395 } else { 5467 } else {
5396 UNIMPLEMENTED(); 5468 UNIMPLEMENTED();
5397 } 5469 }
5398 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) { 5470 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {
5399 // vshr.u<size> Qd, Qm, shift 5471 // vshr.u<size> Qd, Qm, shift
5400 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16)); 5472 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));
5401 int shift = 2 * size - instr->Bits(21, 16); 5473 int shift = 2 * size - instr->Bits(21, 16);
5402 int Vd = instr->VFPDRegValue(kSimd128Precision); 5474 int Vd = instr->VFPDRegValue(kSimd128Precision);
5403 int Vm = instr->VFPMRegValue(kSimd128Precision); 5475 int Vm = instr->VFPMRegValue(kSimd128Precision);
5404 NeonSize ns = static_cast<NeonSize>(size / 16); 5476 NeonSize ns = static_cast<NeonSize>(size / 16);
(...skipping 739 matching lines...) Expand 10 before | Expand all | Expand 10 after
6144 processor->prev_ = nullptr; 6216 processor->prev_ = nullptr;
6145 processor->next_ = nullptr; 6217 processor->next_ = nullptr;
6146 } 6218 }
6147 6219
6148 } // namespace internal 6220 } // namespace internal
6149 } // namespace v8 6221 } // namespace v8
6150 6222
6151 #endif // USE_SIMULATOR 6223 #endif // USE_SIMULATOR
6152 6224
6153 #endif // V8_TARGET_ARCH_ARM 6225 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698