Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(91)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD. (Closed)
Patch Set: Review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/disasm-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 4056 matching lines...) Expand 10 before | Expand all | Expand 10 after
4067 } 4067 }
4068 4068
4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, 4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst,
4070 const QwNeonRegister src) { 4070 const QwNeonRegister src) {
4071 DCHECK(IsEnabled(NEON)); 4071 DCHECK(IsEnabled(NEON));
4072 DCHECK(VfpRegisterIsAvailable(dst)); 4072 DCHECK(VfpRegisterIsAvailable(dst));
4073 DCHECK(VfpRegisterIsAvailable(src)); 4073 DCHECK(VfpRegisterIsAvailable(src));
4074 emit(EncodeNeonVCVT(U32, dst, F32, src)); 4074 emit(EncodeNeonVCVT(U32, dst, F32, src));
4075 } 4075 }
4076 4076
4077 // op is instr->Bits(11, 7).
4078 static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size,
4079 const QwNeonRegister dst,
4080 const QwNeonRegister src) {
4081 DCHECK_IMPLIES(is_float, size == Neon32);
4082 int vd, d;
4083 dst.split_code(&vd, &d);
4084 int vm, m;
4085 src.split_code(&vm, &m);
4086 int F = is_float ? 1 : 0;
4087 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 |
4088 F * B10 | B8 | op * B7 | B6 | m * B5 | vm;
4089 }
4090
4091 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) {
4092 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
4093 // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4094 DCHECK(IsEnabled(NEON));
4095 emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src));
4096 }
4097
4098 void Assembler::vabs(NeonSize size, const QwNeonRegister dst,
4099 const QwNeonRegister src) {
4100 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
4101 // Instruction details available in ARM DDI 0406C.b, A8.8.824.
4102 DCHECK(IsEnabled(NEON));
4103 emit(EncodeNeonUnaryOp(0x6, false, size, dst, src));
4104 }
4105
4106 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) {
4107 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
4108 // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4109 DCHECK(IsEnabled(NEON));
4110 emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src));
4111 }
4112
4113 void Assembler::vneg(NeonSize size, const QwNeonRegister dst,
4114 const QwNeonRegister src) {
4115 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
4116 // Instruction details available in ARM DDI 0406C.b, A8.8.968.
4117 DCHECK(IsEnabled(NEON));
4118 emit(EncodeNeonUnaryOp(0x7, false, size, dst, src));
4119 }
4120
4077 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, 4121 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
4078 DwVfpRegister src2) { 4122 DwVfpRegister src2) {
4079 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. 4123 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
4080 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 4124 // Instruction details available in ARM DDI 0406C.b, A8.8.888.
4081 DCHECK(IsEnabled(NEON)); 4125 DCHECK(IsEnabled(NEON));
4082 int vd, d; 4126 int vd, d;
4083 dst.split_code(&vd, &d); 4127 dst.split_code(&vd, &d);
4084 int vn, n; 4128 int vn, n;
4085 src1.split_code(&vn, &n); 4129 src1.split_code(&vn, &n);
4086 int vm, m; 4130 int vm, m;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
4159 dst.split_code(&vd, &d); 4203 dst.split_code(&vd, &d);
4160 int vn, n; 4204 int vn, n;
4161 src1.split_code(&vn, &n); 4205 src1.split_code(&vn, &n);
4162 int vm, m; 4206 int vm, m;
4163 src2.split_code(&vm, &m); 4207 src2.split_code(&vm, &m);
4164 int sz = static_cast<int>(size); 4208 int sz = static_cast<int>(size);
4165 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4209 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4166 n * B7 | B6 | m * B5 | vm); 4210 n * B7 | B6 | m * B5 | vm);
4167 } 4211 }
4168 4212
4213 void Assembler::vmul(QwNeonRegister dst, const QwNeonRegister src1,
4214 const QwNeonRegister src2) {
4215 DCHECK(IsEnabled(NEON));
4216 // Qd = vadd(Qn, Qm) SIMD floating point multiply.
4217 // Instruction details available in ARM DDI 0406C.b, A8-958.
4218 int vd, d;
4219 dst.split_code(&vd, &d);
4220 int vn, n;
4221 src1.split_code(&vn, &n);
4222 int vm, m;
4223 src2.split_code(&vm, &m);
4224 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
4225 m * B5 | B4 | vm);
4226 }
4227
4228 void Assembler::vmul(NeonSize size, QwNeonRegister dst,
4229 const QwNeonRegister src1, const QwNeonRegister src2) {
4230 DCHECK(IsEnabled(NEON));
4231 // Qd = vadd(Qn, Qm) SIMD integer multiply.
4232 // Instruction details available in ARM DDI 0406C.b, A8-960.
4233 int vd, d;
4234 dst.split_code(&vd, &d);
4235 int vn, n;
4236 src1.split_code(&vn, &n);
4237 int vm, m;
4238 src2.split_code(&vm, &m);
4239 int sz = static_cast<int>(size);
4240 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x9 * B8 |
4241 n * B7 | B6 | m * B5 | B4 | vm);
4242 }
4243
4169 void Assembler::vtst(NeonSize size, QwNeonRegister dst, 4244 void Assembler::vtst(NeonSize size, QwNeonRegister dst,
4170 const QwNeonRegister src1, const QwNeonRegister src2) { 4245 const QwNeonRegister src1, const QwNeonRegister src2) {
4171 DCHECK(IsEnabled(NEON)); 4246 DCHECK(IsEnabled(NEON));
4172 // Qd = vtst(Qn, Qm) SIMD test integer operands. 4247 // Qd = vtst(Qn, Qm) SIMD test integer operands.
4173 // Instruction details available in ARM DDI 0406C.b, A8-1098. 4248 // Instruction details available in ARM DDI 0406C.b, A8-1098.
4174 int vd, d; 4249 int vd, d;
4175 dst.split_code(&vd, &d); 4250 dst.split_code(&vd, &d);
4176 int vn, n; 4251 int vn, n;
4177 src1.split_code(&vn, &n); 4252 src1.split_code(&vn, &n);
4178 int vm, m; 4253 int vm, m;
4179 src2.split_code(&vm, &m); 4254 src2.split_code(&vm, &m);
4180 int sz = static_cast<int>(size); 4255 int sz = static_cast<int>(size);
4181 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4256 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4182 n * B7 | B6 | m * B5 | B4 | vm); 4257 n * B7 | B6 | m * B5 | B4 | vm);
4183 } 4258 }
4184 4259
4185 void Assembler::vceq(NeonSize size, QwNeonRegister dst, 4260 void Assembler::vceq(NeonSize size, QwNeonRegister dst,
4186 const QwNeonRegister src1, const QwNeonRegister src2) { 4261 const QwNeonRegister src1, const QwNeonRegister src2) {
4187 DCHECK(IsEnabled(NEON)); 4262 DCHECK(IsEnabled(NEON));
4188 // Qd = vceq(Qn, Qm) SIMD integer compare equal. 4263 // Qd = vceq(Qn, Qm) SIMD bitwise compare equal.
4189 // Instruction details available in ARM DDI 0406C.b, A8-844. 4264 // Instruction details available in ARM DDI 0406C.b, A8-844.
4190 int vd, d; 4265 int vd, d;
4191 dst.split_code(&vd, &d); 4266 dst.split_code(&vd, &d);
4192 int vn, n; 4267 int vn, n;
4193 src1.split_code(&vn, &n); 4268 src1.split_code(&vn, &n);
4194 int vm, m; 4269 int vm, m;
4195 src2.split_code(&vm, &m); 4270 src2.split_code(&vm, &m);
4196 int sz = static_cast<int>(size); 4271 int sz = static_cast<int>(size);
4197 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | 4272 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4198 n * B7 | B6 | m * B5 | B4 | vm); 4273 n * B7 | B6 | m * B5 | B4 | vm);
4199 } 4274 }
4200 4275
4201 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, 4276 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
4202 const QwNeonRegister src2) { 4277 const QwNeonRegister src2) {
4203 DCHECK(IsEnabled(NEON)); 4278 DCHECK(IsEnabled(NEON));
4204 // Qd = vbsl(Qn, Qm) SIMD bitwise select. 4279 // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4205 // Instruction details available in ARM DDI 0406C.b, A8-844. 4280 // Instruction details available in ARM DDI 0406C.b, A8-844.
4206 int vd, d; 4281 int vd, d;
4207 dst.split_code(&vd, &d); 4282 dst.split_code(&vd, &d);
4208 int vn, n; 4283 int vn, n;
4209 src1.split_code(&vn, &n); 4284 src1.split_code(&vn, &n);
4210 int vm, m; 4285 int vm, m;
4211 src2.split_code(&vm, &m); 4286 src2.split_code(&vm, &m);
4212 int op = 1; // vbsl 4287 int op = 1; // vbsl
4213 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | 4288 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
4214 n * B7 | B6 | m * B5 | B4 | vm); 4289 n * B7 | B6 | m * B5 | B4 | vm);
4215 } 4290 }
4216 4291
4292 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1,
4293 const QwNeonRegister src2, int bytes) {
4294 DCHECK(IsEnabled(NEON));
4295 // Qd = vext(Qn, Qm) SIMD byte extract.
4296 // Instruction details available in ARM DDI 0406C.b, A8-890.
4297 int vd, d;
4298 dst.split_code(&vd, &d);
4299 int vn, n;
4300 src1.split_code(&vn, &n);
4301 int vm, m;
4302 src2.split_code(&vm, &m);
4303 DCHECK_GT(16, bytes);
4304 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
4305 n * B7 | B6 | m * B5 | vm);
4306 }
4307
4308 void Assembler::vzip(NeonSize size, QwNeonRegister dst,
4309 const QwNeonRegister src) {
4310 DCHECK(IsEnabled(NEON));
4311 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave).
4312 // Instruction details available in ARM DDI 0406C.b, A8-1102.
4313 int vd, d;
4314 dst.split_code(&vd, &d);
4315 int vm, m;
4316 src.split_code(&vm, &m);
4317 int sz = static_cast<int>(size);
4318 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 |
4319 0x3 * B7 | B6 | m * B5 | vm);
4320 }
4321
4322 static Instr EncodeNeonVREV(NeonSize op_size, NeonSize size,
4323 const QwNeonRegister dst,
4324 const QwNeonRegister src) {
4325 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse.
4326 // Instruction details available in ARM DDI 0406C.b, A8-1028.
4327 DCHECK_GT(op_size, static_cast<int>(size));
4328 int vd, d;
4329 dst.split_code(&vd, &d);
4330 int vm, m;
4331 src.split_code(&vm, &m);
4332 int sz = static_cast<int>(size);
4333 int op = static_cast<int>(Neon64) - static_cast<int>(op_size);
4334 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 |
4335 B6 | m * B5 | vm;
4336 }
4337
4338 void Assembler::vrev16(NeonSize size, const QwNeonRegister dst,
4339 const QwNeonRegister src) {
4340 DCHECK(IsEnabled(NEON));
4341 emit(EncodeNeonVREV(Neon16, size, dst, src));
4342 }
4343
4344 void Assembler::vrev32(NeonSize size, const QwNeonRegister dst,
4345 const QwNeonRegister src) {
4346 DCHECK(IsEnabled(NEON));
4347 emit(EncodeNeonVREV(Neon32, size, dst, src));
4348 }
4349
4350 void Assembler::vrev64(NeonSize size, const QwNeonRegister dst,
4351 const QwNeonRegister src) {
4352 DCHECK(IsEnabled(NEON));
4353 emit(EncodeNeonVREV(Neon64, size, dst, src));
4354 }
4355
4217 // Encode NEON vtbl / vtbx instruction. 4356 // Encode NEON vtbl / vtbx instruction.
4218 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, 4357 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list,
4219 const DwVfpRegister index, bool vtbx) { 4358 const DwVfpRegister index, bool vtbx) {
4220 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. 4359 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
4221 // Instruction details available in ARM DDI 0406C.b, A8-1094. 4360 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4222 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. 4361 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
4223 // Instruction details available in ARM DDI 0406C.b, A8-1094. 4362 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4224 int vd, d; 4363 int vd, d;
4225 dst.split_code(&vd, &d); 4364 dst.split_code(&vd, &d);
4226 int vn, n; 4365 int vn, n;
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after
4790 DCHECK(is_uint12(offset)); 4929 DCHECK(is_uint12(offset));
4791 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); 4930 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));
4792 } 4931 }
4793 } 4932 }
4794 4933
4795 4934
4796 } // namespace internal 4935 } // namespace internal
4797 } // namespace v8 4936 } // namespace v8
4798 4937
4799 #endif // V8_TARGET_ARCH_ARM 4938 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/disasm-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698