src/arm/simulator-arm.cc - Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types.

Side by Side Diff: src/arm/simulator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)

Patch Set: Fix MIPS. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <stdarg.h>	5 #include <stdarg.h>

6 #include <stdlib.h>	6 #include <stdlib.h>

7 #include <cmath>	7 #include <cmath>

8	8

9 #if V8_TARGET_ARCH_ARM	9 #if V8_TARGET_ARCH_ARM

10	10

(...skipping 4255 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4266 T dst[kElems], src1[kElems], src2[kElems];	4266 T dst[kElems], src1[kElems], src2[kElems];

4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);	4267 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);

4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);	4268 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);

4269 for (int i = 0; i < kPairs; i++) {	4269 for (int i = 0; i < kPairs; i++) {

4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);	4270 dst[i] = MinMax(src1[i * 2], src1[i * 2 + 1], min);

4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);	4271 dst[i + kPairs] = MinMax(src2[i * 2], src2[i * 2 + 1], min);

4272 }	4272 }

4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);	4273 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);

4274 }	4274 }

4275	4275

	4276 template <typename T>

	4277 void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) {

	4278 static const int kElems = kDoubleSize / sizeof(T);

	4279 static const int kPairs = kElems / 2;

	4280 T dst[kElems], src1[kElems], src2[kElems];

	4281 simulator->get_neon_register<T, kDoubleSize>(Vn, src1);

	4282 simulator->get_neon_register<T, kDoubleSize>(Vm, src2);

	4283 for (int i = 0; i < kPairs; i++) {

	4284 dst[i] = src1[i * 2] + src1[i * 2 + 1];

	4285 dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1];

	4286 }

	4287 simulator->set_neon_register<T, kDoubleSize>(Vd, dst);

	4288 }

	4289

4276 void Simulator::DecodeSpecialCondition(Instruction* instr) {	4290 void Simulator::DecodeSpecialCondition(Instruction* instr) {

4277 switch (instr->SpecialValue()) {	4291 switch (instr->SpecialValue()) {

4278 case 4: {	4292 case 4: {

4279 int Vd, Vm, Vn;	4293 int Vd, Vm, Vn;

4280 if (instr->Bit(6) == 0) {	4294 if (instr->Bit(6) == 0) {

4281 Vd = instr->VFPDRegValue(kDoublePrecision);	4295 Vd = instr->VFPDRegValue(kDoublePrecision);

4282 Vm = instr->VFPMRegValue(kDoublePrecision);	4296 Vm = instr->VFPMRegValue(kDoublePrecision);

4283 Vn = instr->VFPNRegValue(kDoublePrecision);	4297 Vn = instr->VFPNRegValue(kDoublePrecision);

4284 } else {	4298 } else {

4285 Vd = instr->VFPDRegValue(kSimd128Precision);	4299 Vd = instr->VFPDRegValue(kSimd128Precision);

(...skipping 191 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4477 break;	4491 break;

4478 case Neon32:	4492 case Neon32:

4479 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);	4493 PairwiseMinMax<int32_t>(this, Vd, Vm, Vn, min);

4480 break;	4494 break;

4481 default:	4495 default:

4482 UNREACHABLE();	4496 UNREACHABLE();

4483 break;	4497 break;

4484 }	4498 }

4485 break;	4499 break;

4486 }	4500 }

	4501 case 0xb: {

	4502 // vpadd.i<size> Dd, Dm, Dn.

	4503 NeonSize size = static_cast<NeonSize>(instr->Bits(21, 20));

	4504 switch (size) {

	4505 case Neon8:

	4506 PairwiseAdd<int8_t>(this, Vd, Vm, Vn);

	4507 break;

	4508 case Neon16:

	4509 PairwiseAdd<int16_t>(this, Vd, Vm, Vn);

	4510 break;

	4511 case Neon32:

	4512 PairwiseAdd<int32_t>(this, Vd, Vm, Vn);

	4513 break;

	4514 default:

	4515 UNREACHABLE();

	4516 break;

	4517 }

	4518 break;

	4519 }

4487 case 0xd: {	4520 case 0xd: {

4488 if (instr->Bit(4) == 0) {	4521 if (instr->Bit(4) == 0) {

4489 float src1[4], src2[4];	4522 float src1[4], src2[4];

4490 get_neon_register(Vn, src1);	4523 get_neon_register(Vn, src1);

4491 get_neon_register(Vm, src2);	4524 get_neon_register(Vm, src2);

4492 for (int i = 0; i < 4; i++) {	4525 for (int i = 0; i < 4; i++) {

4493 if (instr->Bit(21) == 0) {	4526 if (instr->Bit(21) == 0) {

4494 // vadd.f32 Qd, Qm, Qn.	4527 // vadd.f32 Qd, Qm, Qn.

4495 src1[i] = src1[i] + src2[i];	4528 src1[i] = src1[i] + src2[i];

4496 } else {	4529 } else {

(...skipping 328 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4825 case Neon32:	4858 case Neon32:

4826 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);	4859 PairwiseMinMax<uint32_t>(this, Vd, Vm, Vn, min);

4827 break;	4860 break;

4828 default:	4861 default:

4829 UNREACHABLE();	4862 UNREACHABLE();

4830 break;	4863 break;

4831 }	4864 }

4832 break;	4865 break;

4833 }	4866 }

4834 case 0xd: {	4867 case 0xd: {

4835 if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) {	4868 if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 &&

	4869 instr->Bit(4) == 1) {

4836 // vmul.f32 Qd, Qn, Qm	4870 // vmul.f32 Qd, Qn, Qm

4837 float src1[4], src2[4];	4871 float src1[4], src2[4];

4838 get_neon_register(Vn, src1);	4872 get_neon_register(Vn, src1);

4839 get_neon_register(Vm, src2);	4873 get_neon_register(Vm, src2);

4840 for (int i = 0; i < 4; i++) {	4874 for (int i = 0; i < 4; i++) {

4841 src1[i] = src1[i] * src2[i];	4875 src1[i] = src1[i] * src2[i];

4842 }	4876 }

4843 set_neon_register(Vd, src1);	4877 set_neon_register(Vd, src1);

	4878 } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 &&

	4879 instr->Bit(4) == 0) {

	4880 // vpadd.f32 Dd, Dn, Dm

	4881 PairwiseAdd<float>(this, Vd, Vm, Vn);

4844 } else {	4882 } else {

4845 UNIMPLEMENTED();	4883 UNIMPLEMENTED();

4846 }	4884 }

4847 break;	4885 break;

4848 }	4886 }

4849 case 0xe: {	4887 case 0xe: {

4850 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {	4888 if (instr->Bit(20) == 0 && instr->Bit(4) == 0) {

4851 // vcge/vcgt.f32 Qd, Qm, Qn	4889 // vcge/vcgt.f32 Qd, Qm, Qn

4852 bool ge = instr->Bit(21) == 0;	4890 bool ge = instr->Bit(21) == 0;

4853 float src1[4], src2[4];	4891 float src1[4], src2[4];

(...skipping 1190 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
6044 processor->prev_ = nullptr;	6082 processor->prev_ = nullptr;

6045 processor->next_ = nullptr;	6083 processor->next_ = nullptr;

6046 }	6084 }

6047	6085

6048 } // namespace internal	6086 } // namespace internal

6049 } // namespace v8	6087 } // namespace v8

6050	6088

6051 #endif // USE_SIMULATOR	6089 #endif // USE_SIMULATOR

6052	6090

6053 #endif // V8_TARGET_ARCH_ARM	6091 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« src/arm/assembler-arm.h ('K') | « src/arm/disasm-arm.cc ('k') | src/compiler/arm/code-generator-arm.cc » ('j') | src/compiler/arm/code-generator-arm.cc » ('J')