src/arm/assembler-arm.cc - Issue 2711863002: Implement remaining Boolean SIMD operations on ARM.

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2711863002: Implement remaining Boolean SIMD operations on ARM. (Closed)

Patch Set: All Boolean vector tests. Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.	1 // Copyright (c) 1994-2006 Sun Microsystems Inc.

2 // All Rights Reserved.	2 // All Rights Reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions	5 // modification, are permitted provided that the following conditions

6 // are met:	6 // are met:

7 //	7 //

8 // - Redistributions of source code must retain the above copyright notice,	8 // - Redistributions of source code must retain the above copyright notice,

9 // this list of conditions and the following disclaimer.	9 // this list of conditions and the following disclaimer.

10 //	10 //

(...skipping 4056 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4067 }	4067 }

4068	4068

4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,	4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,

4070 const QwNeonRegister src) {	4070 const QwNeonRegister src) {

4071 DCHECK(IsEnabled(NEON));	4071 DCHECK(IsEnabled(NEON));

4072 DCHECK(VfpRegisterIsAvailable(dst));	4072 DCHECK(VfpRegisterIsAvailable(dst));

4073 DCHECK(VfpRegisterIsAvailable(src));	4073 DCHECK(VfpRegisterIsAvailable(src));

4074 emit(EncodeNeonVCVT(U32, dst, F32, src));	4074 emit(EncodeNeonVCVT(U32, dst, F32, src));

4075 }	4075 }

4076	4076

4077 // op is instr->Bits(11, 7).	4077 enum UnaryOp { VABS, VABSF, VNEG, VNEGF };

4078 static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size,	4078

4079 const QwNeonRegister dst,	4079 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst,

4080 const QwNeonRegister src) {	4080 QwNeonRegister src) {

4081 DCHECK_IMPLIES(is_float, size == Neon32);	4081 int op_encoding = 0;

	4082 switch (op) {

	4083 case VABS:
	martyn.capewell 2017/02/28 15:45:24 Depending on whether you like fallthrough cases, t Depending on whether you like fallthrough cases, these could be written as: case VABSF: DCHECK_EQ(Neon32, size); op_encoding = B10; // Fall through. case VABS: op_encoding \|= 0x6 * B7; break; and similarly for VNEG. bbudge 2017/02/28 17:39:34 I think v8 style is to avoid fall through unless i Show quoted text On 2017/02/28 15:45:24, martyn.capewell wrote: > Depending on whether you like fallthrough cases, these could be written as: > > case VABSF: > DCHECK_EQ(Neon32, size); > op_encoding = B10; > // Fall through. > case VABS: > op_encoding \|= 0x6 * B7; > break; > > and similarly for VNEG. I think v8 style is to avoid fall through unless it's very compelling.
	4084 op_encoding = 0x6 * B7;

	4085 break;

	4086 case VABSF:

	4087 DCHECK_EQ(Neon32, size);

	4088 op_encoding = 0x6 * B7 \| B10;

	4089 break;

	4090 case VNEG:

	4091 op_encoding = 0x7 * B7;

	4092 break;

	4093 case VNEGF:

	4094 DCHECK_EQ(Neon32, size);

	4095 op_encoding = 0x7 * B7 \| B10;

	4096 break;

	4097 default:

	4098 UNREACHABLE();

	4099 break;

	4100 }

4082 int vd, d;	4101 int vd, d;

4083 dst.split_code(&vd, &d);	4102 dst.split_code(&vd, &d);

4084 int vm, m;	4103 int vm, m;

4085 src.split_code(&vm, &m);	4104 src.split_code(&vm, &m);

4086 int F = is_float ? 1 : 0;	4105 return 0x1E7U * B23 \| d * B22 \| 0x3 * B20 \| size * B18 \| B16 \| vd * B12 \| B8 \|
	martyn.capewell 2017/02/28 15:45:24 The switch above always sets B8, so it can be remo The switch above always sets B8, so it can be removed from here. bbudge 2017/02/28 17:39:34 Wow, good eye. Done. Show quoted text On 2017/02/28 15:45:24, martyn.capewell wrote: > The switch above always sets B8, so it can be removed from here. Wow, good eye. Done.
4087 return 0x1E7U * B23 \| d * B22 \| 0x3 * B20 \| size * B18 \| B16 \| vd * B12 \|	4106 B6 \| m * B5 \| vm \| op_encoding;

4088 F * B10 \| B8 \| op * B7 \| B6 \| m * B5 \| vm;

4089 }	4107 }

4090	4108

4091 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {	4109 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {

4092 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.	4110 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.

4093 // Instruction details available in ARM DDI 0406C.b, A8.8.824.	4111 // Instruction details available in ARM DDI 0406C.b, A8.8.824.

4094 DCHECK(IsEnabled(NEON));	4112 DCHECK(IsEnabled(NEON));

4095 emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src));	4113 emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src));

4096 }	4114 }

4097	4115

4098 void Assembler::vabs(NeonSize size, const QwNeonRegister dst,	4116 void Assembler::vabs(NeonSize size, const QwNeonRegister dst,

4099 const QwNeonRegister src) {	4117 const QwNeonRegister src) {

4100 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.	4118 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.

4101 // Instruction details available in ARM DDI 0406C.b, A8.8.824.	4119 // Instruction details available in ARM DDI 0406C.b, A8.8.824.

4102 DCHECK(IsEnabled(NEON));	4120 DCHECK(IsEnabled(NEON));

4103 emit(EncodeNeonUnaryOp(0x6, false, size, dst, src));	4121 emit(EncodeNeonUnaryOp(VABS, size, dst, src));

4104 }	4122 }

4105	4123

4106 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {	4124 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {

4107 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.	4125 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.

4108 // Instruction details available in ARM DDI 0406C.b, A8.8.968.	4126 // Instruction details available in ARM DDI 0406C.b, A8.8.968.

4109 DCHECK(IsEnabled(NEON));	4127 DCHECK(IsEnabled(NEON));

4110 emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src));	4128 emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src));

4111 }	4129 }

4112	4130

4113 void Assembler::vneg(NeonSize size, const QwNeonRegister dst,	4131 void Assembler::vneg(NeonSize size, const QwNeonRegister dst,

4114 const QwNeonRegister src) {	4132 const QwNeonRegister src) {

4115 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.	4133 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.

4116 // Instruction details available in ARM DDI 0406C.b, A8.8.968.	4134 // Instruction details available in ARM DDI 0406C.b, A8.8.968.

4117 DCHECK(IsEnabled(NEON));	4135 DCHECK(IsEnabled(NEON));

4118 emit(EncodeNeonUnaryOp(0x7, false, size, dst, src));	4136 emit(EncodeNeonUnaryOp(VNEG, size, dst, src));

4119 }	4137 }

4120	4138

4121 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,	4139 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,

4122 DwVfpRegister src2) {	4140 DwVfpRegister src2) {

4123 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.	4141 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.

4124 // Instruction details available in ARM DDI 0406C.b, A8.8.888.	4142 // Instruction details available in ARM DDI 0406C.b, A8.8.888.

4125 DCHECK(IsEnabled(NEON));	4143 DCHECK(IsEnabled(NEON));

4126 int vd, d;	4144 int vd, d;

4127 dst.split_code(&vd, &d);	4145 dst.split_code(&vd, &d);

4128 int vn, n;	4146 int vn, n;

4129 src1.split_code(&vn, &n);	4147 src1.split_code(&vn, &n);

4130 int vm, m;	4148 int vm, m;

4131 src2.split_code(&vm, &m);	4149 src2.split_code(&vm, &m);

4132 emit(0x1E6U * B23 \| d * B22 \| vn * B16 \| vd * B12 \| B8 \| n * B7 \| m * B5 \|	4150 emit(0x1E6U * B23 \| d * B22 \| vn * B16 \| vd * B12 \| B8 \| n * B7 \| m * B5 \|

4133 B4 \| vm);	4151 B4 \| vm);

4134 }	4152 }

4135	4153

4136 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };	4154 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };

4137	4155

4138 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op,	4156 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst,

4139 const QwNeonRegister dst,	4157 QwNeonRegister src1,

4140 const QwNeonRegister src1,	4158 QwNeonRegister src2) {

4141 const QwNeonRegister src2) {

4142 int op_encoding = 0;	4159 int op_encoding = 0;

4143 switch (op) {	4160 switch (op) {

4144 case VBIC:	4161 case VBIC:

4145 op_encoding = 0x1 * B20;	4162 op_encoding = 0x1 * B20;

4146 break;	4163 break;

4147 case VBIF:	4164 case VBIF:

4148 op_encoding = B24 \| 0x3 * B20;	4165 op_encoding = B24 \| 0x3 * B20;

4149 break;	4166 break;

4150 case VBIT:	4167 case VBIT:

4151 op_encoding = B24 \| 0x2 * B20;	4168 op_encoding = B24 \| 0x2 * B20;

(...skipping 128 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4280 VMUL,	4297 VMUL,

4281 VMIN,	4298 VMIN,

4282 VMAX,	4299 VMAX,

4283 VTST,	4300 VTST,

4284 VCEQ,	4301 VCEQ,

4285 VCGE,	4302 VCGE,

4286 VCGT	4303 VCGT

4287 };	4304 };

4288	4305

4289 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,	4306 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,

4290 const QwNeonRegister dst,	4307 QwNeonRegister dst, QwNeonRegister src1,

4291 const QwNeonRegister src1,	4308 QwNeonRegister src2) {

4292 const QwNeonRegister src2) {

4293 int op_encoding = 0;	4309 int op_encoding = 0;

4294 switch (op) {	4310 switch (op) {

4295 case VADD:	4311 case VADD:

4296 op_encoding = 0x8 * B8;	4312 op_encoding = 0x8 * B8;

4297 break;	4313 break;

4298 case VQADD:	4314 case VQADD:

4299 op_encoding = B4;	4315 op_encoding = B4;

4300 break;	4316 break;

4301 case VSUB:	4317 case VSUB:

4302 op_encoding = B24 \| 0x8 * B8;	4318 op_encoding = B24 \| 0x8 * B8;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4334 int vn, n;	4350 int vn, n;

4335 src1.split_code(&vn, &n);	4351 src1.split_code(&vn, &n);

4336 int vm, m;	4352 int vm, m;

4337 src2.split_code(&vm, &m);	4353 src2.split_code(&vm, &m);

4338 int size = NeonSz(dt);	4354 int size = NeonSz(dt);

4339 int u = NeonU(dt);	4355 int u = NeonU(dt);

4340 return 0x1E4U * B23 \| u * B24 \| d * B22 \| size * B20 \| vn * B16 \| vd * B12 \|	4356 return 0x1E4U * B23 \| u * B24 \| d * B22 \| size * B20 \| vn * B16 \| vd * B12 \|

4341 n * B7 \| B6 \| m * B5 \| vm \| op_encoding;	4357 n * B7 \| B6 \| m * B5 \| vm \| op_encoding;

4342 }	4358 }

4343	4359

4344 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size,	4360 static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,

4345 const QwNeonRegister dst,	4361 QwNeonRegister src1, QwNeonRegister src2) {

4346 const QwNeonRegister src1,

4347 const QwNeonRegister src2) {

4348 // Map NeonSize values to the signed values in NeonDataType, so the U bit	4362 // Map NeonSize values to the signed values in NeonDataType, so the U bit

4349 // will be 0.	4363 // will be 0.

4350 return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);	4364 return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);

4351 }	4365 }

4352	4366

4353 void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,	4367 void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,

4354 QwNeonRegister src2) {	4368 QwNeonRegister src2) {

4355 DCHECK(IsEnabled(NEON));	4369 DCHECK(IsEnabled(NEON));

4356 // Qd = vadd(Qn, Qm) SIMD floating point addition.	4370 // Qd = vadd(Qn, Qm) SIMD floating point addition.

4357 // Instruction details available in ARM DDI 0406C.b, A8-830.	4371 // Instruction details available in ARM DDI 0406C.b, A8-830.

(...skipping 164 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4522 }	4536 }

4523	4537

4524 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,	4538 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,

4525 QwNeonRegister src2) {	4539 QwNeonRegister src2) {

4526 DCHECK(IsEnabled(NEON));	4540 DCHECK(IsEnabled(NEON));

4527 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.	4541 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.

4528 // Instruction details available in ARM DDI 0406C.b, A8-1040.	4542 // Instruction details available in ARM DDI 0406C.b, A8-1040.

4529 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));	4543 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));

4530 }	4544 }

4531	4545

	4546 enum PairwiseOp { VPMIN, VPMAX };

	4547

	4548 static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt,

	4549 DwVfpRegister dst, DwVfpRegister src1,

	4550 DwVfpRegister src2) {

	4551 int op_encoding = 0;

	4552 switch (op) {

	4553 case VPMIN:

	4554 op_encoding = 0xA * B8 \| B4;

	4555 break;

	4556 case VPMAX:

	4557 op_encoding = 0xA * B8;

	4558 break;

	4559 default:

	4560 UNREACHABLE();

	4561 break;

	4562 }

	4563 int vd, d;

	4564 dst.split_code(&vd, &d);

	4565 int vn, n;

	4566 src1.split_code(&vn, &n);

	4567 int vm, m;

	4568 src2.split_code(&vm, &m);

	4569 int size = NeonSz(dt);

	4570 int u = NeonU(dt);

	4571 return 0x1E4U * B23 \| u * B24 \| d * B22 \| size * B20 \| vn * B16 \| vd * B12 \|

	4572 n * B7 \| m * B5 \| vm \| op_encoding;

	4573 }

	4574

	4575 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,

	4576 DwVfpRegister src2) {

	4577 DCHECK(IsEnabled(NEON));

	4578 // Qd = vpmin(Qn, Qm) SIMD integer pairwise MIN.
	martyn.capewell 2017/02/28 15:45:24 These are D registers for min and max. These are D registers for min and max. bbudge 2017/02/28 17:39:35 Done here and below. Show quoted text On 2017/02/28 15:45:24, martyn.capewell wrote: > These are D registers for min and max. Done here and below.
	4579 // Instruction details available in ARM DDI 0406C.b, A8-986.

	4580 emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));

	4581 }

	4582

	4583 void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,

	4584 DwVfpRegister src2) {

	4585 DCHECK(IsEnabled(NEON));

	4586 // Qd = vpmax(Qn, Qm) SIMD integer pairwise MAX.

	4587 // Instruction details available in ARM DDI 0406C.b, A8-986.

	4588 emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));

	4589 }

	4590

4532 void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,	4591 void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,

4533 QwNeonRegister src2) {	4592 QwNeonRegister src2) {

4534 DCHECK(IsEnabled(NEON));	4593 DCHECK(IsEnabled(NEON));

4535 // Qd = vtst(Qn, Qm) SIMD test integer operands.	4594 // Qd = vtst(Qn, Qm) SIMD test integer operands.

4536 // Instruction details available in ARM DDI 0406C.b, A8-1098.	4595 // Instruction details available in ARM DDI 0406C.b, A8-1098.

4537 emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));	4596 emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));

4538 }	4597 }

4539	4598

4540 void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,	4599 void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,

4541 QwNeonRegister src2) {	4600 QwNeonRegister src2) {

(...skipping 683 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5225 DCHECK(is_uint12(offset));	5284 DCHECK(is_uint12(offset));

5226 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));	5285 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));

5227 }	5286 }

5228 }	5287 }

5229	5288

5230	5289

5231 } // namespace internal	5290 } // namespace internal

5232 } // namespace v8	5291 } // namespace v8

5233	5292

5234 #endif // V8_TARGET_ARCH_ARM	5293 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/disasm-arm.cc » ('j') | src/arm/disasm-arm.cc » ('J')