src/arm/simulator-arm.cc - Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types.

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)

Patch Set: Rebase, reformat. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <stdarg.h>	5 #include <stdarg.h>

6 #include <stdlib.h>	6 #include <stdlib.h>

7 #include <cmath>	7 #include <cmath>

8	8

9 #if V8_TARGET_ARCH_ARM	9 #if V8_TARGET_ARCH_ARM

10	10

(...skipping 4260 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4271 T dst[kElems], src1[kElems], src2[kElems];	4271 T dst[kElems], src1[kElems], src2[kElems];

4272 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);	4272 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);

4273 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);	4273 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);

4274 for (int i = 0; i < kPairs; i++) {	4274 for (int i = 0; i < kPairs; i++) {

4275 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);	4275 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);

4276 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);	4276 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);

4277 }	4277 }

4278 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);	4278 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);

4279 }	4279 }

4280	4280

	4281 template <typename T>

	4282 void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) {

	4283 static const int kElems = kDoubleSize / sizeof(T);

	4284 static const int kPairs = kElems / 2;

	4285 T dst[kElems], src1[kElems], src2[kElems];

	4286 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);

	4287 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);

	4288 for (int i = 0; i < kPairs; i++) {

	4289 dst[i] = src1[i * 2] + src1[i * 2 + 1];

	4290 dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];

	4291 }

	4292 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);

	4293 }

	4294

4281 void Simulator::DecodeSpecialCondition(Instruction* instr) {	4295 void Simulator::DecodeSpecialCondition(Instruction* instr) {

4282 switch (instr->SpecialValue()) {	4296 switch (instr->SpecialValue()) {

4283 case 4: {	4297 case 4: {

4284 int Vd, Vm, Vn;	4298 int Vd, Vm, Vn;

4285 if (instr->Bit(6) == 0) {	4299 if (instr->Bit(6) == 0) {

4286 Vd = instr->VFPDRegValue(kDoublePrecision);	4300 Vd = instr->VFPDRegValue(kDoublePrecision);

4287 Vm = instr->VFPMRegValue(kDoublePrecision);	4301 Vm = instr->VFPMRegValue(kDoublePrecision);

4288 Vn = instr->VFPNRegValue(kDoublePrecision);	4302 Vn = instr->VFPNRegValue(kDoublePrecision);

4289 } else {	4303 } else {

4290 Vd = instr->VFPDRegValue(kSimd128Precision);	4304 Vd = instr->VFPDRegValue(kSimd128Precision);

(...skipping 191 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4482 break;	4496 break;

4483 case Neon32:	4497 case Neon32:

4484 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);	4498 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);

4485 break;	4499 break;

4486 default:	4500 default:

4487 UNREACHABLE();	4501 UNREACHABLE();

4488 break;	4502 break;

4489 }	4503 }

4490 break;	4504 break;

4491 }	4505 }

	4506 case 0xb: {

	4507 // vpadd.i<size> Dd, Dm, Dn.

	4508 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));

	4509 switch (size) {

	4510 case Neon8:

	4511 PairwiseAdd<int8_t>(this, Vd, Vm, Vn);

	4512 break;

	4513 case Neon16:

	4514 PairwiseAdd<int16_t>(this, Vd, Vm, Vn);

	4515 break;

	4516 case Neon32:

	4517 PairwiseAdd<int32_t>(this, Vd, Vm, Vn);

	4518 break;

	4519 default:

	4520 UNREACHABLE();

	4521 break;

	4522 }

	4523 break;

	4524 }

4492 case 0xd: {	4525 case 0xd: {

4493 if (instr->Bit(4) == 0) {	4526 if (instr->Bit(4) == 0) {

4494 float src1[4], src2[4];	4527 float src1[4], src2[4];

4495 get_neon_register(Vn, src1);	4528 get_neon_register(Vn, src1);

4496 get_neon_register(Vm, src2);	4529 get_neon_register(Vm, src2);

4497 for (int i = 0; i < 4; i++) {	4530 for (int i = 0; i < 4; i++) {

4498 if (instr->Bit(21) == 0) {	4531 if (instr->Bit(21) == 0) {

4499 // vadd.f32 Qd, Qm, Qn.	4532 // vadd.f32 Qd, Qm, Qn.

4500 src1[i] = src1[i] + src2[i];	4533 src1[i] = src1[i] + src2[i];

4501 } else {	4534 } else {

(...skipping 328 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4830 case Neon32:	4863 case Neon32:

4831 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);	4864 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);

4832 break;	4865 break;

4833 default:	4866 default:

4834 UNREACHABLE();	4867 UNREACHABLE();

4835 break;	4868 break;

4836 }	4869 }

4837 break;	4870 break;

4838 }	4871 }

4839 case 0xd: {	4872 case 0xd: {

4840 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {	4873 if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&

	4874 instr->Bit(4) == 1) {

4841 // vmul.f32 Qd, Qn, Qm	4875 // vmul.f32 Qd, Qn, Qm

4842 float src1[4], src2[4];	4876 float src1[4], src2[4];

4843 get_neon_register(Vn, src1);	4877 get_neon_register(Vn, src1);

4844 get_neon_register(Vm, src2);	4878 get_neon_register(Vm, src2);

4845 for (int i = 0; i < 4; i++) {	4879 for (int i = 0; i < 4; i++) {

4846 src1[i] = src1[i] * src2[i];	4880 src1[i] = src1[i] * src2[i];

4847 }	4881 }

4848 set_neon_register(Vd, src1);	4882 set_neon_register(Vd, src1);

	4883 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&

	4884 instr->Bit(4) == 0) {

	4885 // vpadd.f32 Dd, Dn, Dm

	4886 PairwiseAdd<float>(this, Vd, Vm, Vn);

4849 } else {	4887 } else {

4850 UNIMPLEMENTED();	4888 UNIMPLEMENTED();

4851 }	4889 }

4852 break;	4890 break;

4853 }	4891 }

4854 case 0xe: {	4892 case 0xe: {

4855 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {	4893 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {

4856 // vcge/vcgt.f32 Qd, Qm, Qn	4894 // vcge/vcgt.f32 Qd, Qm, Qn

4857 bool ge = instr->Bit(21) == 0;	4895 bool ge = instr->Bit(21) == 0;

4858 float src1[4], src2[4];	4896 float src1[4], src2[4];

(...skipping 1190 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6049 processor->prev_ = nullptr;	6087 processor->prev_ = nullptr;

6050 processor->next_ = nullptr;	6088 processor->next_ = nullptr;

6051 }	6089 }

6052	6090

6053 } // namespace internal	6091 } // namespace internal

6054 } // namespace v8	6092 } // namespace v8

6055	6093

6056 #endif // USE_SIMULATOR	6094 #endif // USE_SIMULATOR

6057	6095

6058 #endif // V8_TARGET_ARCH_ARM	6096 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/disasm-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | no next file with comments »