src/arm/assembler-arm.cc - Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD.

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD. (Closed)

Patch Set: Clean up. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.	1 // Copyright (c) 1994-2006 Sun Microsystems Inc.

2 // All Rights Reserved.	2 // All Rights Reserved.

3 //	3 //

4 // Redistribution and use in source and binary forms, with or without	4 // Redistribution and use in source and binary forms, with or without

5 // modification, are permitted provided that the following conditions	5 // modification, are permitted provided that the following conditions

6 // are met:	6 // are met:

7 //	7 //

8 // - Redistributions of source code must retain the above copyright notice,	8 // - Redistributions of source code must retain the above copyright notice,

9 // this list of conditions and the following disclaimer.	9 // this list of conditions and the following disclaimer.

10 //	10 //

(...skipping 4056 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4067 }	4067 }

4068	4068

4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,	4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,

4070 const QwNeonRegister src) {	4070 const QwNeonRegister src) {

4071 DCHECK(IsEnabled(NEON));	4071 DCHECK(IsEnabled(NEON));

4072 DCHECK(VfpRegisterIsAvailable(dst));	4072 DCHECK(VfpRegisterIsAvailable(dst));

4073 DCHECK(VfpRegisterIsAvailable(src));	4073 DCHECK(VfpRegisterIsAvailable(src));

4074 emit(EncodeNeonVCVT(U32, dst, F32, src));	4074 emit(EncodeNeonVCVT(U32, dst, F32, src));

4075 }	4075 }

4076	4076

	4077 // op is instr->Bits(11, 7).

	4078 static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size,

	4079 const QwNeonRegister dst,

	4080 const QwNeonRegister src) {

	4081 int vd, d;
	Rodolph Perfetta (ARM) 2016/12/16 20:50:11 DCHECK is_float => size is Neon32 DCHECK is_float => size is Neon32 bbudge 2016/12/17 01:08:32 Done. Show quoted text On 2016/12/16 20:50:11, Rodolph Perfetta (ARM) wrote: > DCHECK is_float => size is Neon32 Done.
	4082 dst.split_code(&vd, &d);

	4083 int vm, m;

	4084 src.split_code(&vm, &m);

	4085 int F = is_float ? 1 : 0;

	4086 return 0x1E7U * B23 \| d * B22 \| 0x3 * B20 \| size * B18 \| B16 \| vd * B12 \|

	4087 F * B10 \| B8 \| op * B7 \| B6 \| m * B5 \| vm;

	4088 }

	4089

	4090 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {

	4091 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.

	4092 // Instruction details available in ARM DDI 0406C.b, A8.8.824.

	4093 DCHECK(IsEnabled(NEON));

	4094 emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src));

	4095 }

	4096

	4097 void Assembler::vabs(NeonSize size, const QwNeonRegister dst,

	4098 const QwNeonRegister src) {

	4099 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.

	4100 // Instruction details available in ARM DDI 0406C.b, A8.8.824.

	4101 DCHECK(IsEnabled(NEON));

	4102 emit(EncodeNeonUnaryOp(0x6, false, size, dst, src));

	4103 }

	4104

	4105 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {

	4106 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.

	4107 // Instruction details available in ARM DDI 0406C.b, A8.8.968.

	4108 DCHECK(IsEnabled(NEON));

	4109 emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src));

	4110 }

	4111

	4112 void Assembler::vneg(NeonSize size, const QwNeonRegister dst,

	4113 const QwNeonRegister src) {

	4114 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.

	4115 // Instruction details available in ARM DDI 0406C.b, A8.8.968.

	4116 DCHECK(IsEnabled(NEON));

	4117 emit(EncodeNeonUnaryOp(0x7, false, size, dst, src));

	4118 }

	4119

4077 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,	4120 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,

4078 DwVfpRegister src2) {	4121 DwVfpRegister src2) {

4079 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.	4122 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.

4080 // Instruction details available in ARM DDI 0406C.b, A8.8.888.	4123 // Instruction details available in ARM DDI 0406C.b, A8.8.888.

4081 DCHECK(IsEnabled(NEON));	4124 DCHECK(IsEnabled(NEON));

4082 int vd, d;	4125 int vd, d;

4083 dst.split_code(&vd, &d);	4126 dst.split_code(&vd, &d);

4084 int vn, n;	4127 int vn, n;

4085 src1.split_code(&vn, &n);	4128 src1.split_code(&vn, &n);

4086 int vm, m;	4129 int vm, m;

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4159 dst.split_code(&vd, &d);	4202 dst.split_code(&vd, &d);

4160 int vn, n;	4203 int vn, n;

4161 src1.split_code(&vn, &n);	4204 src1.split_code(&vn, &n);

4162 int vm, m;	4205 int vm, m;

4163 src2.split_code(&vm, &m);	4206 src2.split_code(&vm, &m);

4164 int sz = static_cast<int>(size);	4207 int sz = static_cast<int>(size);

4165 emit(0x1E6U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|	4208 emit(0x1E6U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|

4166 n * B7 \| B6 \| m * B5 \| vm);	4209 n * B7 \| B6 \| m * B5 \| vm);

4167 }	4210 }

4168	4211

	4212 void Assembler::vmul(QwNeonRegister dst, const QwNeonRegister src1,

	4213 const QwNeonRegister src2) {

	4214 DCHECK(IsEnabled(NEON));

	4215 // Qd = vadd(Qn, Qm) SIMD floating point multiply.

	4216 // Instruction details available in ARM DDI 0406C.b, A8-958.

	4217 int vd, d;

	4218 dst.split_code(&vd, &d);

	4219 int vn, n;

	4220 src1.split_code(&vn, &n);

	4221 int vm, m;

	4222 src2.split_code(&vm, &m);

	4223 emit(0x1E6U * B23 \| d * B22 \| vn * B16 \| vd * B12 \| 0xD * B8 \| n * B7 \| B6 \|

	4224 m * B5 \| B4 \| vm);

	4225 }

	4226

	4227 void Assembler::vmul(NeonSize size, QwNeonRegister dst,

	4228 const QwNeonRegister src1, const QwNeonRegister src2) {

	4229 DCHECK(IsEnabled(NEON));

	4230 // Qd = vadd(Qn, Qm) SIMD integer multiply.

	4231 // Instruction details available in ARM DDI 0406C.b, A8-960.

	4232 int vd, d;

	4233 dst.split_code(&vd, &d);

	4234 int vn, n;

	4235 src1.split_code(&vn, &n);

	4236 int vm, m;

	4237 src2.split_code(&vm, &m);

	4238 int sz = static_cast<int>(size);

	4239 emit(0x1E4U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x9 * B8 \|

	4240 n * B7 \| B6 \| m * B5 \| B4 \| vm);

	4241 }

	4242

4169 void Assembler::vtst(NeonSize size, QwNeonRegister dst,	4243 void Assembler::vtst(NeonSize size, QwNeonRegister dst,

4170 const QwNeonRegister src1, const QwNeonRegister src2) {	4244 const QwNeonRegister src1, const QwNeonRegister src2) {

4171 DCHECK(IsEnabled(NEON));	4245 DCHECK(IsEnabled(NEON));

4172 // Qd = vtst(Qn, Qm) SIMD test integer operands.	4246 // Qd = vtst(Qn, Qm) SIMD test integer operands.

4173 // Instruction details available in ARM DDI 0406C.b, A8-1098.	4247 // Instruction details available in ARM DDI 0406C.b, A8-1098.

4174 int vd, d;	4248 int vd, d;

4175 dst.split_code(&vd, &d);	4249 dst.split_code(&vd, &d);

4176 int vn, n;	4250 int vn, n;

4177 src1.split_code(&vn, &n);	4251 src1.split_code(&vn, &n);

4178 int vm, m;	4252 int vm, m;

4179 src2.split_code(&vm, &m);	4253 src2.split_code(&vm, &m);

4180 int sz = static_cast<int>(size);	4254 int sz = static_cast<int>(size);

4181 emit(0x1E4U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|	4255 emit(0x1E4U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|

4182 n * B7 \| B6 \| m * B5 \| B4 \| vm);	4256 n * B7 \| B6 \| m * B5 \| B4 \| vm);

4183 }	4257 }

4184	4258

4185 void Assembler::vceq(NeonSize size, QwNeonRegister dst,	4259 void Assembler::vceq(NeonSize size, QwNeonRegister dst,

4186 const QwNeonRegister src1, const QwNeonRegister src2) {	4260 const QwNeonRegister src1, const QwNeonRegister src2) {

4187 DCHECK(IsEnabled(NEON));	4261 DCHECK(IsEnabled(NEON));

4188 // Qd = vceq(Qn, Qm) SIMD integer compare equal.	4262 // Qd = vceq(Qn, Qm) SIMD bitwise compare equal.

4189 // Instruction details available in ARM DDI 0406C.b, A8-844.	4263 // Instruction details available in ARM DDI 0406C.b, A8-844.

4190 int vd, d;	4264 int vd, d;

4191 dst.split_code(&vd, &d);	4265 dst.split_code(&vd, &d);

4192 int vn, n;	4266 int vn, n;

4193 src1.split_code(&vn, &n);	4267 src1.split_code(&vn, &n);

4194 int vm, m;	4268 int vm, m;

4195 src2.split_code(&vm, &m);	4269 src2.split_code(&vm, &m);

4196 int sz = static_cast<int>(size);	4270 int sz = static_cast<int>(size);

4197 emit(0x1E6U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|	4271 emit(0x1E6U * B23 \| d * B22 \| sz * B20 \| vn * B16 \| vd * B12 \| 0x8 * B8 \|

4198 n * B7 \| B6 \| m * B5 \| B4 \| vm);	4272 n * B7 \| B6 \| m * B5 \| B4 \| vm);

4199 }	4273 }

4200	4274

4201 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,	4275 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,

4202 const QwNeonRegister src2) {	4276 const QwNeonRegister src2) {

4203 DCHECK(IsEnabled(NEON));	4277 DCHECK(IsEnabled(NEON));

4204 // Qd = vbsl(Qn, Qm) SIMD bitwise select.	4278 // Qd = vbsl(Qn, Qm) SIMD bitwise select.

4205 // Instruction details available in ARM DDI 0406C.b, A8-844.	4279 // Instruction details available in ARM DDI 0406C.b, A8-844.

4206 int vd, d;	4280 int vd, d;

4207 dst.split_code(&vd, &d);	4281 dst.split_code(&vd, &d);

4208 int vn, n;	4282 int vn, n;

4209 src1.split_code(&vn, &n);	4283 src1.split_code(&vn, &n);

4210 int vm, m;	4284 int vm, m;

4211 src2.split_code(&vm, &m);	4285 src2.split_code(&vm, &m);

4212 int op = 1; // vbsl	4286 int op = 1; // vbsl

4213 emit(0x1E6U * B23 \| d * B22 \| op * B20 \| vn * B16 \| vd * B12 \| 0x1 * B8 \|	4287 emit(0x1E6U * B23 \| d * B22 \| op * B20 \| vn * B16 \| vd * B12 \| 0x1 * B8 \|

4214 n * B7 \| B6 \| m * B5 \| B4 \| vm);	4288 n * B7 \| B6 \| m * B5 \| B4 \| vm);

4215 }	4289 }

4216	4290

	4291 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1,

	4292 const QwNeonRegister src2, int bytes) {

	4293 DCHECK(IsEnabled(NEON));

	4294 // Qd = vext(Qn, Qm) SIMD byte extract.

	4295 // Instruction details available in ARM DDI 0406C.b, A8-890.

	4296 int vd, d;

	4297 dst.split_code(&vd, &d);

	4298 int vn, n;

	4299 src1.split_code(&vn, &n);

	4300 int vm, m;

	4301 src2.split_code(&vm, &m);

	4302 DCHECK_GT(16, bytes);

	4303 emit(0x1E5U * B23 \| d * B22 \| 0x3 * B20 \| vn * B16 \| vd * B12 \| bytes * B8 \|

	4304 n * B7 \| B6 \| m * B5 \| vm);

	4305 }

	4306

	4307 void Assembler::vzip(NeonSize size, QwNeonRegister dst,

	4308 const QwNeonRegister src) {

	4309 DCHECK(IsEnabled(NEON));

	4310 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave).

	4311 // Instruction details available in ARM DDI 0406C.b, A8-1102.

	4312 int vd, d;

	4313 dst.split_code(&vd, &d);

	4314 int vm, m;

	4315 src.split_code(&vm, &m);

	4316 int sz = static_cast<int>(size);

	4317 emit(0x1E7U * B23 \| d * B22 \| 0x3 * B20 \| sz * B18 \| 2 * B16 \| vd * B12 \| B8 \|
	Rodolph Perfetta (ARM) 2016/12/16 20:50:11 no need for B8, you have 0x3 * B7 to cover that bi no need for B8, you have 0x3 * B7 to cover that bit. bbudge 2016/12/17 01:08:32 Good catch, done! Show quoted text On 2016/12/16 20:50:11, Rodolph Perfetta (ARM) wrote: > no need for B8, you have 0x3 * B7 to cover that bit. Good catch, done!
	4318 0x3 * B7 \| B6 \| m * B5 \| vm);

	4319 }

	4320

	4321 void Assembler::vrev(NeonSize op_size, NeonSize element_size,

	4322 const QwNeonRegister dst, const QwNeonRegister src) {

	4323 DCHECK(IsEnabled(NEON));

	4324 DCHECK_GT(static_cast<int>(op_size), static_cast<int>(element_size));

	4325 // Qd = vrev<op>.<size>(Qn, Qm) SIMD scalar reverse.

	4326 // Instruction details available in ARM DDI 0406C.b, A8-1028.

	4327 int vd, d;

	4328 dst.split_code(&vd, &d);

	4329 int vm, m;

	4330 src.split_code(&vm, &m);

	4331 int sz = static_cast<int>(element_size);

	4332 int op = static_cast<int>(Neon64) - static_cast<int>(op_size);

	4333 emit(0x1E7U * B23 \| d * B22 \| 0x3 * B20 \| sz * B18 \| vd * B12 \| op * B7 \| B6 \|

	4334 m * B5 \| vm);

	4335 }

	4336

4217 // Encode NEON vtbl / vtbx instruction.	4337 // Encode NEON vtbl / vtbx instruction.

4218 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,	4338 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,

4219 const DwVfpRegister index, bool vtbx) {	4339 const DwVfpRegister index, bool vtbx) {

4220 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.	4340 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.

4221 // Instruction details available in ARM DDI 0406C.b, A8-1094.	4341 // Instruction details available in ARM DDI 0406C.b, A8-1094.

4222 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.	4342 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.

4223 // Instruction details available in ARM DDI 0406C.b, A8-1094.	4343 // Instruction details available in ARM DDI 0406C.b, A8-1094.

4224 int vd, d;	4344 int vd, d;

4225 dst.split_code(&vd, &d);	4345 dst.split_code(&vd, &d);

4226 int vn, n;	4346 int vn, n;

(...skipping 563 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4790 DCHECK(is_uint12(offset));	4910 DCHECK(is_uint12(offset));

4791 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));	4911 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));

4792 }	4912 }

4793 }	4913 }

4794	4914

4795	4915

4796 } // namespace internal	4916 } // namespace internal

4797 } // namespace v8	4917 } // namespace v8

4798	4918

4799 #endif // V8_TARGET_ARCH_ARM	4919 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« src/arm/assembler-arm.h ('K') | « src/arm/assembler-arm.h ('k') | src/arm/disasm-arm.cc » ('j') | src/arm/disasm-arm.cc » ('J')