Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD. (Closed)
Patch Set: Clean up. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 4056 matching lines...) Expand 10 before | Expand all | Expand 10 after
4067 } 4067 }
4068 4068
4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, 4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
4070 const QwNeonRegister src) { 4070 const QwNeonRegister src) {
4071 DCHECK(IsEnabled(NEON)); 4071 DCHECK(IsEnabled(NEON));
4072 DCHECK(VfpRegisterIsAvailable(dst)); 4072 DCHECK(VfpRegisterIsAvailable(dst));
4073 DCHECK(VfpRegisterIsAvailable(src)); 4073 DCHECK(VfpRegisterIsAvailable(src));
4074 emit(EncodeNeonVCVT(U32, dst, F32, src)); 4074 emit(EncodeNeonVCVT(U32, dst, F32, src));
4075 } 4075 }
4076 4076
4077 // op is instr->Bits(11, 7).
4078 static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size,
4079 const QwNeonRegister dst,
4080 const QwNeonRegister src) {
4081 int vd, d;
Rodolph Perfetta (ARM) 2016/12/16 20:50:11 DCHECK is_float => size is Neon32
bbudge 2016/12/17 01:08:32 Done.
4082 dst.split_code(&vd, &d);
4083 int vm, m;
4084 src.split_code(&vm, &m);
4085 int F = is_float ? 1 : 0;
4086 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 |
4087 F * B10 | B8 | op * B7 | B6 | m * B5 | vm;
4088 }
4089
4090 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {
4091 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
4092 // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4093 DCHECK(IsEnabled(NEON));
4094 emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src));
4095 }
4096
4097 void Assembler::vabs(NeonSize size, const QwNeonRegister dst,
4098 const QwNeonRegister src) {
4099 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
4100 // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4101 DCHECK(IsEnabled(NEON));
4102 emit(EncodeNeonUnaryOp(0x6, false, size, dst, src));
4103 }
4104
4105 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {
4106 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
4107 // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4108 DCHECK(IsEnabled(NEON));
4109 emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src));
4110 }
4111
4112 void Assembler::vneg(NeonSize size, const QwNeonRegister dst,
4113 const QwNeonRegister src) {
4114 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
4115 // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4116 DCHECK(IsEnabled(NEON));
4117 emit(EncodeNeonUnaryOp(0x7, false, size, dst, src));
4118 }
4119
4077 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, 4120 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
4078 DwVfpRegister src2) { 4121 DwVfpRegister src2) {
4079 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. 4122 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
4080 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 4123 // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4081 DCHECK(IsEnabled(NEON)); 4124 DCHECK(IsEnabled(NEON));
4082 int vd, d; 4125 int vd, d;
4083 dst.split_code(&vd, &d); 4126 dst.split_code(&vd, &d);
4084 int vn, n; 4127 int vn, n;
4085 src1.split_code(&vn, &n); 4128 src1.split_code(&vn, &n);
4086 int vm, m; 4129 int vm, m;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
4159 dst.split_code(&vd, &d); 4202 dst.split_code(&vd, &d);
4160 int vn, n; 4203 int vn, n;
4161 src1.split_code(&vn, &n); 4204 src1.split_code(&vn, &n);
4162 int vm, m; 4205 int vm, m;
4163 src2.split_code(&vm, &m); 4206 src2.split_code(&vm, &m);
4164 int sz = static_cast<int>(size); 4207 int sz = static_cast<int>(size);
4165 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4208 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4166 n * B7 | B6 | m * B5 | vm); 4209 n * B7 | B6 | m * B5 | vm);
4167 } 4210 }
4168 4211
4212 void Assembler::vmul(QwNeonRegister dst, const QwNeonRegister src1,
4213 const QwNeonRegister src2) {
4214 DCHECK(IsEnabled(NEON));
4215 // Qd = vadd(Qn, Qm) SIMD floating point multiply.
4216 // Instruction details available in ARM DDI 0406C.b, A8-958.
4217 int vd, d;
4218 dst.split_code(&vd, &d);
4219 int vn, n;
4220 src1.split_code(&vn, &n);
4221 int vm, m;
4222 src2.split_code(&vm, &m);
4223 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
4224 m * B5 | B4 | vm);
4225 }
4226
4227 void Assembler::vmul(NeonSize size, QwNeonRegister dst,
4228 const QwNeonRegister src1, const QwNeonRegister src2) {
4229 DCHECK(IsEnabled(NEON));
4230 // Qd = vadd(Qn, Qm) SIMD integer multiply.
4231 // Instruction details available in ARM DDI 0406C.b, A8-960.
4232 int vd, d;
4233 dst.split_code(&vd, &d);
4234 int vn, n;
4235 src1.split_code(&vn, &n);
4236 int vm, m;
4237 src2.split_code(&vm, &m);
4238 int sz = static_cast<int>(size);
4239 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x9 * B8 |
4240 n * B7 | B6 | m * B5 | B4 | vm);
4241 }
4242
4169 void Assembler::vtst(NeonSize size, QwNeonRegister dst, 4243 void Assembler::vtst(NeonSize size, QwNeonRegister dst,
4170 const QwNeonRegister src1, const QwNeonRegister src2) { 4244 const QwNeonRegister src1, const QwNeonRegister src2) {
4171 DCHECK(IsEnabled(NEON)); 4245 DCHECK(IsEnabled(NEON));
4172 // Qd = vtst(Qn, Qm) SIMD test integer operands. 4246 // Qd = vtst(Qn, Qm) SIMD test integer operands.
4173 // Instruction details available in ARM DDI 0406C.b, A8-1098. 4247 // Instruction details available in ARM DDI 0406C.b, A8-1098.
4174 int vd, d; 4248 int vd, d;
4175 dst.split_code(&vd, &d); 4249 dst.split_code(&vd, &d);
4176 int vn, n; 4250 int vn, n;
4177 src1.split_code(&vn, &n); 4251 src1.split_code(&vn, &n);
4178 int vm, m; 4252 int vm, m;
4179 src2.split_code(&vm, &m); 4253 src2.split_code(&vm, &m);
4180 int sz = static_cast<int>(size); 4254 int sz = static_cast<int>(size);
4181 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4255 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4182 n * B7 | B6 | m * B5 | B4 | vm); 4256 n * B7 | B6 | m * B5 | B4 | vm);
4183 } 4257 }
4184 4258
4185 void Assembler::vceq(NeonSize size, QwNeonRegister dst, 4259 void Assembler::vceq(NeonSize size, QwNeonRegister dst,
4186 const QwNeonRegister src1, const QwNeonRegister src2) { 4260 const QwNeonRegister src1, const QwNeonRegister src2) {
4187 DCHECK(IsEnabled(NEON)); 4261 DCHECK(IsEnabled(NEON));
4188 // Qd = vceq(Qn, Qm) SIMD integer compare equal. 4262 // Qd = vceq(Qn, Qm) SIMD bitwise compare equal.
4189 // Instruction details available in ARM DDI 0406C.b, A8-844. 4263 // Instruction details available in ARM DDI 0406C.b, A8-844.
4190 int vd, d; 4264 int vd, d;
4191 dst.split_code(&vd, &d); 4265 dst.split_code(&vd, &d);
4192 int vn, n; 4266 int vn, n;
4193 src1.split_code(&vn, &n); 4267 src1.split_code(&vn, &n);
4194 int vm, m; 4268 int vm, m;
4195 src2.split_code(&vm, &m); 4269 src2.split_code(&vm, &m);
4196 int sz = static_cast<int>(size); 4270 int sz = static_cast<int>(size);
4197 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4271 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4198 n * B7 | B6 | m * B5 | B4 | vm); 4272 n * B7 | B6 | m * B5 | B4 | vm);
4199 } 4273 }
4200 4274
4201 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, 4275 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
4202 const QwNeonRegister src2) { 4276 const QwNeonRegister src2) {
4203 DCHECK(IsEnabled(NEON)); 4277 DCHECK(IsEnabled(NEON));
4204 // Qd = vbsl(Qn, Qm) SIMD bitwise select. 4278 // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4205 // Instruction details available in ARM DDI 0406C.b, A8-844. 4279 // Instruction details available in ARM DDI 0406C.b, A8-844.
4206 int vd, d; 4280 int vd, d;
4207 dst.split_code(&vd, &d); 4281 dst.split_code(&vd, &d);
4208 int vn, n; 4282 int vn, n;
4209 src1.split_code(&vn, &n); 4283 src1.split_code(&vn, &n);
4210 int vm, m; 4284 int vm, m;
4211 src2.split_code(&vm, &m); 4285 src2.split_code(&vm, &m);
4212 int op = 1; // vbsl 4286 int op = 1; // vbsl
4213 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | 4287 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
4214 n * B7 | B6 | m * B5 | B4 | vm); 4288 n * B7 | B6 | m * B5 | B4 | vm);
4215 } 4289 }
4216 4290
4291 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1,
4292 const QwNeonRegister src2, int bytes) {
4293 DCHECK(IsEnabled(NEON));
4294 // Qd = vext(Qn, Qm) SIMD byte extract.
4295 // Instruction details available in ARM DDI 0406C.b, A8-890.
4296 int vd, d;
4297 dst.split_code(&vd, &d);
4298 int vn, n;
4299 src1.split_code(&vn, &n);
4300 int vm, m;
4301 src2.split_code(&vm, &m);
4302 DCHECK_GT(16, bytes);
4303 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
4304 n * B7 | B6 | m * B5 | vm);
4305 }
4306
4307 void Assembler::vzip(NeonSize size, QwNeonRegister dst,
4308 const QwNeonRegister src) {
4309 DCHECK(IsEnabled(NEON));
4310 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave).
4311 // Instruction details available in ARM DDI 0406C.b, A8-1102.
4312 int vd, d;
4313 dst.split_code(&vd, &d);
4314 int vm, m;
4315 src.split_code(&vm, &m);
4316 int sz = static_cast<int>(size);
4317 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 | B8 |
Rodolph Perfetta (ARM) 2016/12/16 20:50:11 no need for B8, you have 0x3 * B7 to cover that bi
bbudge 2016/12/17 01:08:32 Good catch, done!
4318 0x3 * B7 | B6 | m * B5 | vm);
4319 }
4320
4321 void Assembler::vrev(NeonSize op_size, NeonSize element_size,
4322 const QwNeonRegister dst, const QwNeonRegister src) {
4323 DCHECK(IsEnabled(NEON));
4324 DCHECK_GT(static_cast<int>(op_size), static_cast<int>(element_size));
4325 // Qd = vrev<op>.<size>(Qn, Qm) SIMD scalar reverse.
4326 // Instruction details available in ARM DDI 0406C.b, A8-1028.
4327 int vd, d;
4328 dst.split_code(&vd, &d);
4329 int vm, m;
4330 src.split_code(&vm, &m);
4331 int sz = static_cast<int>(element_size);
4332 int op = static_cast<int>(Neon64) - static_cast<int>(op_size);
4333 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 | B6 |
4334 m * B5 | vm);
4335 }
4336
4217 // Encode NEON vtbl / vtbx instruction. 4337 // Encode NEON vtbl / vtbx instruction.
4218 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, 4338 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,
4219 const DwVfpRegister index, bool vtbx) { 4339 const DwVfpRegister index, bool vtbx) {
4220 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. 4340 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
4221 // Instruction details available in ARM DDI 0406C.b, A8-1094. 4341 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4222 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. 4342 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
4223 // Instruction details available in ARM DDI 0406C.b, A8-1094. 4343 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4224 int vd, d; 4344 int vd, d;
4225 dst.split_code(&vd, &d); 4345 dst.split_code(&vd, &d);
4226 int vn, n; 4346 int vn, n;
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after
4790 DCHECK(is_uint12(offset)); 4910 DCHECK(is_uint12(offset));
4791 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); 4911 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));
4792 } 4912 }
4793 } 4913 }
4794 4914
4795 4915
4796 } // namespace internal 4916 } // namespace internal
4797 } // namespace v8 4917 } // namespace v8
4798 4918
4799 #endif // V8_TARGET_ARCH_ARM 4919 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698