src/arm/simulator-arm.cc - Issue 2773303002: [ARM] Implement widening and narrowing integer moves, vmovl, vqmovn.

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2773303002: [ARM] Implement widening and narrowing integer moves, vmovl, vqmovn. (Closed)

Patch Set: Add some disassembler tests for vmovl. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <stdarg.h>	5 #include <stdarg.h>

6 #include <stdlib.h>	6 #include <stdlib.h>

7 #include <cmath>	7 #include <cmath>

8	8

9 #if V8_TARGET_ARCH_ARM	9 #if V8_TARGET_ARCH_ARM

10	10

(...skipping 3978 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3989 default:	3989 default:

3990 UNIMPLEMENTED(); // Not used by V8.	3990 UNIMPLEMENTED(); // Not used by V8.

3991 }	3991 }

3992 } else {	3992 } else {

3993 UNIMPLEMENTED(); // Not used by V8.	3993 UNIMPLEMENTED(); // Not used by V8.

3994 }	3994 }

3995 }	3995 }

3996	3996

3997 // Templated operations for NEON instructions.	3997 // Templated operations for NEON instructions.

3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.	3998 // TODO(bbudge) Add more templates for use in DecodeSpecialCondition.

3999 template <typename T>	3999 template <typename T, typename U>

4000 int64_t Widen(T value) {	4000 U Widen(T value) {

4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");	4001 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");

4002 return static_cast<int64_t>(value);	4002 static_assert(sizeof(U) > sizeof(T), "T must smaller than U");

	4003 return static_cast<U>(value);

	4004 }

	4005

	4006 template <typename T, typename U>

	4007 U Narrow(T value) {

	4008 static_assert(sizeof(int8_t) < sizeof(T), "T must be int16_t or larger");

	4009 static_assert(sizeof(U) < sizeof(T), "T must larger than U");

	4010 return static_cast<U>(value);
	martyn.capewell 2017/03/27 13:19:43 This is potentially unsafe for signed types if the This is potentially unsafe for signed types if the value can't be represented in the destination type, though it's fine at the moment, as the inputs are always clamped. Perhaps assert the signedness is compatible, too? bbudge 2017/03/27 17:48:02 Done. Also added checks that value can be expresse Show quoted text On 2017/03/27 13:19:43, martyn.capewell wrote: > This is potentially unsafe for signed types if the value can't be represented in > the destination type, though it's fine at the moment, as the inputs are always > clamped. > > Perhaps assert the signedness is compatible, too? Done. Also added checks that value can be expressed in the narrower type.
4003 }	4011 }

4004	4012

4005 template <typename T>	4013 template <typename T>

4006 T Clamp(int64_t value) {	4014 T Clamp(int64_t value) {

4007 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");	4015 static_assert(sizeof(int64_t) > sizeof(T), "T must be int32_t or smaller");

4008 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());	4016 int64_t min = static_cast<int64_t>(std::numeric_limits<T>::min());

4009 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());	4017 int64_t max = static_cast<int64_t>(std::numeric_limits<T>::max());

4010 int64_t clamped = std::max(min, std::min(max, value));	4018 int64_t clamped = std::max(min, std::min(max, value));

4011 return static_cast<T>(clamped);	4019 return static_cast<T>(clamped);

4012 }	4020 }

4013	4021

4014 template <typename T>	4022 template <typename T>

4015 T MinMax(T a, T b, bool is_min) {	4023 T MinMax(T a, T b, bool is_min) {

4016 return is_min ? std::min(a, b) : std::max(a, b);	4024 return is_min ? std::min(a, b) : std::max(a, b);

4017 }	4025 }

4018	4026

	4027 template <typename T, typename U>

	4028 void Widen(Simulator* simulator, int Vd, int Vm) {

	4029 static const int kLanes = 8 / sizeof(T);

	4030 T src[kLanes];

	4031 U dst[kLanes];

	4032 simulator->get_d_register(Vm, src);

	4033 for (int i = 0; i < kLanes; i++) {

	4034 dst[i] = Widen<T, U>(src[i]);

	4035 }

	4036 simulator->set_q_register(Vd, dst);

	4037 }

	4038

	4039 template <typename T, typename U>

	4040 void Narrow(Simulator* simulator, int Vd, int Vm) {
	martyn.capewell 2017/03/27 13:19:43 This may be better named SaturatingNarrow(). If yo This may be better named SaturatingNarrow(). If you later need to implement vmovn, you'll need a non-saturating version of this function, or an extra parameter to this one. bbudge 2017/03/27 17:48:02 Done. Show quoted text On 2017/03/27 13:19:43, martyn.capewell wrote: > This may be better named SaturatingNarrow(). If you later need to implement > vmovn, you'll need a non-saturating version of this function, or an extra > parameter to this one. Done.
	4041 static const int kLanes = 16 / sizeof(T);

	4042 T src[kLanes];

	4043 U dst[kLanes];

	4044 simulator->get_q_register(Vm, src);

	4045 for (int i = 0; i < kLanes; i++) {

	4046 dst[i] = Narrow<T, U>(Clamp<U>(src[i]));

	4047 }

	4048 simulator->set_d_register(Vd, dst);

	4049 }

	4050

4019 template <typename T>	4051 template <typename T>

4020 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {	4052 void AddSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {

4021 static const int kLanes = 16 / sizeof(T);	4053 static const int kLanes = 16 / sizeof(T);

4022 T src1[kLanes], src2[kLanes];	4054 T src1[kLanes], src2[kLanes];

4023 simulator->get_q_register(Vn, src1);	4055 simulator->get_q_register(Vn, src1);

4024 simulator->get_q_register(Vm, src2);	4056 simulator->get_q_register(Vm, src2);

4025 for (int i = 0; i < kLanes; i++) {	4057 for (int i = 0; i < kLanes; i++) {

4026 src1[i] = Clamp<T>(Widen(src1[i]) + Widen(src2[i]));	4058 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) + Widen<T, int64_t>(src2[i]));

4027 }	4059 }

4028 simulator->set_q_register(Vd, src1);	4060 simulator->set_q_register(Vd, src1);

4029 }	4061 }

4030	4062

4031 template <typename T>	4063 template <typename T>

4032 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {	4064 void SubSaturate(Simulator* simulator, int Vd, int Vm, int Vn) {

4033 static const int kLanes = 16 / sizeof(T);	4065 static const int kLanes = 16 / sizeof(T);

4034 T src1[kLanes], src2[kLanes];	4066 T src1[kLanes], src2[kLanes];

4035 simulator->get_q_register(Vn, src1);	4067 simulator->get_q_register(Vn, src1);

4036 simulator->get_q_register(Vm, src2);	4068 simulator->get_q_register(Vm, src2);

4037 for (int i = 0; i < kLanes; i++) {	4069 for (int i = 0; i < kLanes; i++) {

4038 src1[i] = Clamp<T>(Widen(src1[i]) - Widen(src2[i]));	4070 src1[i] = Clamp<T>(Widen<T, int64_t>(src1[i]) - Widen<T, int64_t>(src2[i]));

4039 }	4071 }

4040 simulator->set_q_register(Vd, src1);	4072 simulator->set_q_register(Vd, src1);

4041 }	4073 }

4042	4074

4043 void Simulator::DecodeSpecialCondition(Instruction* instr) {	4075 void Simulator::DecodeSpecialCondition(Instruction* instr) {

4044 switch (instr->SpecialValue()) {	4076 switch (instr->SpecialValue()) {

4045 case 4: {	4077 case 4: {

4046 int Vd, Vm, Vn;	4078 int Vd, Vm, Vn;

4047 if (instr->Bit(6) == 0) {	4079 if (instr->Bit(6) == 0) {

4048 Vd = instr->VFPDRegValue(kDoublePrecision);	4080 Vd = instr->VFPDRegValue(kDoublePrecision);

(...skipping 408 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4457 UNIMPLEMENTED();	4489 UNIMPLEMENTED();

4458 break;	4490 break;

4459 }	4491 }

4460 break;	4492 break;

4461 }	4493 }

4462 case 5:	4494 case 5:

4463 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&	4495 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&

4464 (instr->Bit(4) == 1)) {	4496 (instr->Bit(4) == 1)) {

4465 // vmovl signed	4497 // vmovl signed

4466 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();	4498 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();

4467 int Vd = (instr->Bit(22) << 3) \| (instr->VdValue() >> 1);	4499 int Vd = instr->VFPDRegValue(kSimd128Precision);

4468 int Vm = (instr->Bit(5) << 4) \| instr->VmValue();	4500 int Vm = instr->VFPMRegValue(kDoublePrecision);

4469 int imm3 = instr->Bits(21, 19);	4501 int imm3 = instr->Bits(21, 19);

4470 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();	4502 switch (imm3) {

4471 int esize = 8 * imm3;	4503 case 1:

4472 int elements = 64 / esize;	4504 Widen<int8_t, int16_t>(this, Vd, Vm);

4473 int8_t from[8];	4505 break;

4474 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));	4506 case 2:

4475 int16_t to[8];	4507 Widen<int16_t, int32_t>(this, Vd, Vm);

4476 int e = 0;	4508 break;

4477 while (e < elements) {	4509 case 4:

4478 to[e] = from[e];	4510 Widen<int32_t, int64_t>(this, Vd, Vm);

4479 e++;	4511 break;

	4512 default:

	4513 UNIMPLEMENTED();

	4514 break;

4480 }	4515 }

4481 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));

4482 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {	4516 } else if (instr->Bits(21, 20) == 3 && instr->Bit(4) == 0) {

4483 // vext.	4517 // vext.

4484 int imm4 = instr->Bits(11, 8);	4518 int imm4 = instr->Bits(11, 8);

4485 int Vd = instr->VFPDRegValue(kSimd128Precision);	4519 int Vd = instr->VFPDRegValue(kSimd128Precision);

4486 int Vm = instr->VFPMRegValue(kSimd128Precision);	4520 int Vm = instr->VFPMRegValue(kSimd128Precision);

4487 int Vn = instr->VFPNRegValue(kSimd128Precision);	4521 int Vn = instr->VFPNRegValue(kSimd128Precision);

4488 uint8_t src1[16], src2[16], dst[16];	4522 uint8_t src1[16], src2[16], dst[16];

4489 get_q_register(Vn, src1);	4523 get_q_register(Vn, src1);

4490 get_q_register(Vm, src2);	4524 get_q_register(Vm, src2);

4491 int boundary = kSimd128Size - imm4;	4525 int boundary = kSimd128Size - imm4;

(...skipping 431 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4923 UNREACHABLE();	4957 UNREACHABLE();

4924 break;	4958 break;

4925 }	4959 }

4926 break;	4960 break;

4927 }	4961 }

4928 case 7:	4962 case 7:

4929 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&	4963 if ((instr->Bits(18, 16) == 0) && (instr->Bits(11, 6) == 0x28) &&

4930 (instr->Bit(4) == 1)) {	4964 (instr->Bit(4) == 1)) {

4931 // vmovl unsigned	4965 // vmovl unsigned

4932 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();	4966 if ((instr->VdValue() & 1) != 0) UNIMPLEMENTED();

4933 int Vd = (instr->Bit(22) << 3) \| (instr->VdValue() >> 1);	4967 int Vd = instr->VFPDRegValue(kSimd128Precision);

4934 int Vm = (instr->Bit(5) << 4) \| instr->VmValue();	4968 int Vm = instr->VFPMRegValue(kDoublePrecision);

4935 int imm3 = instr->Bits(21, 19);	4969 int imm3 = instr->Bits(21, 19);

4936 if ((imm3 != 1) && (imm3 != 2) && (imm3 != 4)) UNIMPLEMENTED();	4970 switch (imm3) {

4937 int esize = 8 * imm3;	4971 case 1:

4938 int elements = 64 / esize;	4972 Widen<uint8_t, uint16_t>(this, Vd, Vm);

4939 uint8_t from[8];	4973 break;

4940 get_d_register(Vm, reinterpret_cast<uint64_t*>(from));	4974 case 2:

4941 uint16_t to[8];	4975 Widen<uint16_t, uint32_t>(this, Vd, Vm);

4942 int e = 0;	4976 break;

4943 while (e < elements) {	4977 case 4:

4944 to[e] = from[e];	4978 Widen<uint32_t, uint64_t>(this, Vd, Vm);

4945 e++;	4979 break;

	4980 default:

	4981 UNIMPLEMENTED();

	4982 break;

4946 }	4983 }

4947 set_q_register(Vd, reinterpret_cast<uint64_t*>(to));

4948 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {	4984 } else if (instr->Opc1Value() == 7 && instr->Bit(4) == 0) {

4949 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&	4985 if (instr->Bits(19, 16) == 0xB && instr->Bits(11, 9) == 0x3 &&

4950 instr->Bit(6) == 1) {	4986 instr->Bit(6) == 1) {

4951 // vcvt.<Td>.<Tm> Qd, Qm.	4987 // vcvt.<Td>.<Tm> Qd, Qm.

4952 int Vd = instr->VFPDRegValue(kSimd128Precision);	4988 int Vd = instr->VFPDRegValue(kSimd128Precision);

4953 int Vm = instr->VFPMRegValue(kSimd128Precision);	4989 int Vm = instr->VFPMRegValue(kSimd128Precision);

4954 uint32_t q_data[4];	4990 uint32_t q_data[4];

4955 get_q_register(Vm, q_data);	4991 get_q_register(Vm, q_data);

4956 int op = instr->Bits(8, 7);	4992 int op = instr->Bits(8, 7);

4957 for (int i = 0; i < 4; i++) {	4993 for (int i = 0; i < 4; i++) {

(...skipping 427 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5385 } else {	5421 } else {

5386 lazily_initialize_fast_sqrt(isolate_);	5422 lazily_initialize_fast_sqrt(isolate_);

5387 for (int i = 0; i < 4; i++) {	5423 for (int i = 0; i < 4; i++) {

5388 float radicand = bit_cast<float>(src[i]);	5424 float radicand = bit_cast<float>(src[i]);

5389 float result = 1.0f / fast_sqrt(radicand, isolate_);	5425 float result = 1.0f / fast_sqrt(radicand, isolate_);

5390 result = canonicalizeNaN(result);	5426 result = canonicalizeNaN(result);

5391 src[i] = bit_cast<uint32_t>(result);	5427 src[i] = bit_cast<uint32_t>(result);

5392 }	5428 }

5393 }	5429 }

5394 set_q_register(Vd, src);	5430 set_q_register(Vd, src);

	5431 } else if (instr->Bits(17, 16) == 0x2 && instr->Bits(11, 8) == 0x2 &&

	5432 instr->Bits(7, 6) != 0) {

	5433 // vqmovn.<type><size> Dd, Qm.

	5434 int Vd = instr->VFPDRegValue(kDoublePrecision);

	5435 int Vm = instr->VFPMRegValue(kSimd128Precision);

	5436 NeonSize size = static_cast<NeonSize>(instr->Bits(19, 18));

	5437 bool is_unsigned = instr->Bit(6) != 0;

	5438 switch (size) {

	5439 case Neon8: {

	5440 if (is_unsigned) {

	5441 Narrow<uint16_t, uint8_t>(this, Vd, Vm);

	5442 } else {

	5443 Narrow<int16_t, int8_t>(this, Vd, Vm);

	5444 }

	5445 break;

	5446 }

	5447 case Neon16: {

	5448 if (is_unsigned) {

	5449 Narrow<uint32_t, uint16_t>(this, Vd, Vm);

	5450 } else {

	5451 Narrow<int32_t, int16_t>(this, Vd, Vm);

	5452 }

	5453 break;

	5454 }

	5455 case Neon32: {

	5456 if (is_unsigned) {

	5457 Narrow<uint64_t, uint32_t>(this, Vd, Vm);

	5458 } else {

	5459 Narrow<int64_t, int32_t>(this, Vd, Vm);

	5460 }

	5461 break;

	5462 }

	5463 default:

	5464 UNIMPLEMENTED();

	5465 break;

	5466 }

5395 } else {	5467 } else {

5396 UNIMPLEMENTED();	5468 UNIMPLEMENTED();

5397 }	5469 }

5398 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {	5470 } else if (instr->Bits(11, 7) == 0 && instr->Bit(4) == 1) {

5399 // vshr.u<size> Qd, Qm, shift	5471 // vshr.u<size> Qd, Qm, shift

5400 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));	5472 int size = base::bits::RoundDownToPowerOfTwo32(instr->Bits(21, 16));

5401 int shift = 2 * size - instr->Bits(21, 16);	5473 int shift = 2 * size - instr->Bits(21, 16);

5402 int Vd = instr->VFPDRegValue(kSimd128Precision);	5474 int Vd = instr->VFPDRegValue(kSimd128Precision);

5403 int Vm = instr->VFPMRegValue(kSimd128Precision);	5475 int Vm = instr->VFPMRegValue(kSimd128Precision);

5404 NeonSize ns = static_cast<NeonSize>(size / 16);	5476 NeonSize ns = static_cast<NeonSize>(size / 16);

(...skipping 739 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6144 processor->prev_ = nullptr;	6216 processor->prev_ = nullptr;

6145 processor->next_ = nullptr;	6217 processor->next_ = nullptr;

6146 }	6218 }

6147	6219

6148 } // namespace internal	6220 } // namespace internal

6149 } // namespace v8	6221 } // namespace v8

6150	6222

6151 #endif // USE_SIMULATOR	6223 #endif // USE_SIMULATOR

6152	6224

6153 #endif // V8_TARGET_ARCH_ARM	6225 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/disasm-arm.cc ('k') | test/cctest/test-assembler-arm.cc » ('j') | test/cctest/test-disasm-arm.cc » ('J')