OLD | NEW |
---|---|
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. |
2 // All Rights Reserved. | 2 // All Rights Reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions | 5 // modification, are permitted provided that the following conditions |
6 // are met: | 6 // are met: |
7 // | 7 // |
8 // - Redistributions of source code must retain the above copyright notice, | 8 // - Redistributions of source code must retain the above copyright notice, |
9 // this list of conditions and the following disclaimer. | 9 // this list of conditions and the following disclaimer. |
10 // | 10 // |
(...skipping 4056 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4067 } | 4067 } |
4068 | 4068 |
4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, | 4069 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, |
4070 const QwNeonRegister src) { | 4070 const QwNeonRegister src) { |
4071 DCHECK(IsEnabled(NEON)); | 4071 DCHECK(IsEnabled(NEON)); |
4072 DCHECK(VfpRegisterIsAvailable(dst)); | 4072 DCHECK(VfpRegisterIsAvailable(dst)); |
4073 DCHECK(VfpRegisterIsAvailable(src)); | 4073 DCHECK(VfpRegisterIsAvailable(src)); |
4074 emit(EncodeNeonVCVT(U32, dst, F32, src)); | 4074 emit(EncodeNeonVCVT(U32, dst, F32, src)); |
4075 } | 4075 } |
4076 | 4076 |
4077 // op is instr->Bits(11, 7). | |
4078 static Instr EncodeNeonUnaryOp(int op, bool is_float, NeonSize size, | |
4079 const QwNeonRegister dst, | |
4080 const QwNeonRegister src) { | |
4081 int vd, d; | |
Rodolph Perfetta (ARM)
2016/12/16 20:50:11
DCHECK is_float => size is Neon32
bbudge
2016/12/17 01:08:32
Done.
| |
4082 dst.split_code(&vd, &d); | |
4083 int vm, m; | |
4084 src.split_code(&vm, &m); | |
4085 int F = is_float ? 1 : 0; | |
4086 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | | |
4087 F * B10 | B8 | op * B7 | B6 | m * B5 | vm; | |
4088 } | |
4089 | |
4090 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) { | |
4091 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. | |
4092 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | |
4093 DCHECK(IsEnabled(NEON)); | |
4094 emit(EncodeNeonUnaryOp(0x6, true, Neon32, dst, src)); | |
4095 } | |
4096 | |
4097 void Assembler::vabs(NeonSize size, const QwNeonRegister dst, | |
4098 const QwNeonRegister src) { | |
4099 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. | |
4100 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | |
4101 DCHECK(IsEnabled(NEON)); | |
4102 emit(EncodeNeonUnaryOp(0x6, false, size, dst, src)); | |
4103 } | |
4104 | |
4105 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) { | |
4106 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. | |
4107 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | |
4108 DCHECK(IsEnabled(NEON)); | |
4109 emit(EncodeNeonUnaryOp(0x7, true, Neon32, dst, src)); | |
4110 } | |
4111 | |
4112 void Assembler::vneg(NeonSize size, const QwNeonRegister dst, | |
4113 const QwNeonRegister src) { | |
4114 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. | |
4115 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | |
4116 DCHECK(IsEnabled(NEON)); | |
4117 emit(EncodeNeonUnaryOp(0x7, false, size, dst, src)); | |
4118 } | |
4119 | |
4077 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | 4120 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, |
4078 DwVfpRegister src2) { | 4121 DwVfpRegister src2) { |
4079 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. | 4122 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. |
4080 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | 4123 // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
4081 DCHECK(IsEnabled(NEON)); | 4124 DCHECK(IsEnabled(NEON)); |
4082 int vd, d; | 4125 int vd, d; |
4083 dst.split_code(&vd, &d); | 4126 dst.split_code(&vd, &d); |
4084 int vn, n; | 4127 int vn, n; |
4085 src1.split_code(&vn, &n); | 4128 src1.split_code(&vn, &n); |
4086 int vm, m; | 4129 int vm, m; |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4159 dst.split_code(&vd, &d); | 4202 dst.split_code(&vd, &d); |
4160 int vn, n; | 4203 int vn, n; |
4161 src1.split_code(&vn, &n); | 4204 src1.split_code(&vn, &n); |
4162 int vm, m; | 4205 int vm, m; |
4163 src2.split_code(&vm, &m); | 4206 src2.split_code(&vm, &m); |
4164 int sz = static_cast<int>(size); | 4207 int sz = static_cast<int>(size); |
4165 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | | 4208 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
4166 n * B7 | B6 | m * B5 | vm); | 4209 n * B7 | B6 | m * B5 | vm); |
4167 } | 4210 } |
4168 | 4211 |
4212 void Assembler::vmul(QwNeonRegister dst, const QwNeonRegister src1, | |
4213 const QwNeonRegister src2) { | |
4214 DCHECK(IsEnabled(NEON)); | |
4215 // Qd = vadd(Qn, Qm) SIMD floating point multiply. | |
4216 // Instruction details available in ARM DDI 0406C.b, A8-958. | |
4217 int vd, d; | |
4218 dst.split_code(&vd, &d); | |
4219 int vn, n; | |
4220 src1.split_code(&vn, &n); | |
4221 int vm, m; | |
4222 src2.split_code(&vm, &m); | |
4223 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 | | |
4224 m * B5 | B4 | vm); | |
4225 } | |
4226 | |
4227 void Assembler::vmul(NeonSize size, QwNeonRegister dst, | |
4228 const QwNeonRegister src1, const QwNeonRegister src2) { | |
4229 DCHECK(IsEnabled(NEON)); | |
4230 // Qd = vadd(Qn, Qm) SIMD integer multiply. | |
4231 // Instruction details available in ARM DDI 0406C.b, A8-960. | |
4232 int vd, d; | |
4233 dst.split_code(&vd, &d); | |
4234 int vn, n; | |
4235 src1.split_code(&vn, &n); | |
4236 int vm, m; | |
4237 src2.split_code(&vm, &m); | |
4238 int sz = static_cast<int>(size); | |
4239 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x9 * B8 | | |
4240 n * B7 | B6 | m * B5 | B4 | vm); | |
4241 } | |
4242 | |
4169 void Assembler::vtst(NeonSize size, QwNeonRegister dst, | 4243 void Assembler::vtst(NeonSize size, QwNeonRegister dst, |
4170 const QwNeonRegister src1, const QwNeonRegister src2) { | 4244 const QwNeonRegister src1, const QwNeonRegister src2) { |
4171 DCHECK(IsEnabled(NEON)); | 4245 DCHECK(IsEnabled(NEON)); |
4172 // Qd = vtst(Qn, Qm) SIMD test integer operands. | 4246 // Qd = vtst(Qn, Qm) SIMD test integer operands. |
4173 // Instruction details available in ARM DDI 0406C.b, A8-1098. | 4247 // Instruction details available in ARM DDI 0406C.b, A8-1098. |
4174 int vd, d; | 4248 int vd, d; |
4175 dst.split_code(&vd, &d); | 4249 dst.split_code(&vd, &d); |
4176 int vn, n; | 4250 int vn, n; |
4177 src1.split_code(&vn, &n); | 4251 src1.split_code(&vn, &n); |
4178 int vm, m; | 4252 int vm, m; |
4179 src2.split_code(&vm, &m); | 4253 src2.split_code(&vm, &m); |
4180 int sz = static_cast<int>(size); | 4254 int sz = static_cast<int>(size); |
4181 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | | 4255 emit(0x1E4U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
4182 n * B7 | B6 | m * B5 | B4 | vm); | 4256 n * B7 | B6 | m * B5 | B4 | vm); |
4183 } | 4257 } |
4184 | 4258 |
4185 void Assembler::vceq(NeonSize size, QwNeonRegister dst, | 4259 void Assembler::vceq(NeonSize size, QwNeonRegister dst, |
4186 const QwNeonRegister src1, const QwNeonRegister src2) { | 4260 const QwNeonRegister src1, const QwNeonRegister src2) { |
4187 DCHECK(IsEnabled(NEON)); | 4261 DCHECK(IsEnabled(NEON)); |
4188 // Qd = vceq(Qn, Qm) SIMD integer compare equal. | 4262 // Qd = vceq(Qn, Qm) SIMD bitwise compare equal. |
4189 // Instruction details available in ARM DDI 0406C.b, A8-844. | 4263 // Instruction details available in ARM DDI 0406C.b, A8-844. |
4190 int vd, d; | 4264 int vd, d; |
4191 dst.split_code(&vd, &d); | 4265 dst.split_code(&vd, &d); |
4192 int vn, n; | 4266 int vn, n; |
4193 src1.split_code(&vn, &n); | 4267 src1.split_code(&vn, &n); |
4194 int vm, m; | 4268 int vm, m; |
4195 src2.split_code(&vm, &m); | 4269 src2.split_code(&vm, &m); |
4196 int sz = static_cast<int>(size); | 4270 int sz = static_cast<int>(size); |
4197 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | | 4271 emit(0x1E6U * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 | |
4198 n * B7 | B6 | m * B5 | B4 | vm); | 4272 n * B7 | B6 | m * B5 | B4 | vm); |
4199 } | 4273 } |
4200 | 4274 |
4201 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, | 4275 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, |
4202 const QwNeonRegister src2) { | 4276 const QwNeonRegister src2) { |
4203 DCHECK(IsEnabled(NEON)); | 4277 DCHECK(IsEnabled(NEON)); |
4204 // Qd = vbsl(Qn, Qm) SIMD bitwise select. | 4278 // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
4205 // Instruction details available in ARM DDI 0406C.b, A8-844. | 4279 // Instruction details available in ARM DDI 0406C.b, A8-844. |
4206 int vd, d; | 4280 int vd, d; |
4207 dst.split_code(&vd, &d); | 4281 dst.split_code(&vd, &d); |
4208 int vn, n; | 4282 int vn, n; |
4209 src1.split_code(&vn, &n); | 4283 src1.split_code(&vn, &n); |
4210 int vm, m; | 4284 int vm, m; |
4211 src2.split_code(&vm, &m); | 4285 src2.split_code(&vm, &m); |
4212 int op = 1; // vbsl | 4286 int op = 1; // vbsl |
4213 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | | 4287 emit(0x1E6U * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 | |
4214 n * B7 | B6 | m * B5 | B4 | vm); | 4288 n * B7 | B6 | m * B5 | B4 | vm); |
4215 } | 4289 } |
4216 | 4290 |
4291 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1, | |
4292 const QwNeonRegister src2, int bytes) { | |
4293 DCHECK(IsEnabled(NEON)); | |
4294 // Qd = vext(Qn, Qm) SIMD byte extract. | |
4295 // Instruction details available in ARM DDI 0406C.b, A8-890. | |
4296 int vd, d; | |
4297 dst.split_code(&vd, &d); | |
4298 int vn, n; | |
4299 src1.split_code(&vn, &n); | |
4300 int vm, m; | |
4301 src2.split_code(&vm, &m); | |
4302 DCHECK_GT(16, bytes); | |
4303 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | |
4304 n * B7 | B6 | m * B5 | vm); | |
4305 } | |
4306 | |
4307 void Assembler::vzip(NeonSize size, QwNeonRegister dst, | |
4308 const QwNeonRegister src) { | |
4309 DCHECK(IsEnabled(NEON)); | |
4310 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). | |
4311 // Instruction details available in ARM DDI 0406C.b, A8-1102. | |
4312 int vd, d; | |
4313 dst.split_code(&vd, &d); | |
4314 int vm, m; | |
4315 src.split_code(&vm, &m); | |
4316 int sz = static_cast<int>(size); | |
4317 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 | B8 | | |
Rodolph Perfetta (ARM)
2016/12/16 20:50:11
no need for B8, you have 0x3 * B7 to cover that bi
bbudge
2016/12/17 01:08:32
Good catch, done!
| |
4318 0x3 * B7 | B6 | m * B5 | vm); | |
4319 } | |
4320 | |
4321 void Assembler::vrev(NeonSize op_size, NeonSize element_size, | |
4322 const QwNeonRegister dst, const QwNeonRegister src) { | |
4323 DCHECK(IsEnabled(NEON)); | |
4324 DCHECK_GT(static_cast<int>(op_size), static_cast<int>(element_size)); | |
4325 // Qd = vrev<op>.<size>(Qn, Qm) SIMD scalar reverse. | |
4326 // Instruction details available in ARM DDI 0406C.b, A8-1028. | |
4327 int vd, d; | |
4328 dst.split_code(&vd, &d); | |
4329 int vm, m; | |
4330 src.split_code(&vm, &m); | |
4331 int sz = static_cast<int>(element_size); | |
4332 int op = static_cast<int>(Neon64) - static_cast<int>(op_size); | |
4333 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 | B6 | | |
4334 m * B5 | vm); | |
4335 } | |
4336 | |
4217 // Encode NEON vtbl / vtbx instruction. | 4337 // Encode NEON vtbl / vtbx instruction. |
4218 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, | 4338 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, |
4219 const DwVfpRegister index, bool vtbx) { | 4339 const DwVfpRegister index, bool vtbx) { |
4220 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 4340 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
4221 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4341 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4222 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 4342 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
4223 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4343 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4224 int vd, d; | 4344 int vd, d; |
4225 dst.split_code(&vd, &d); | 4345 dst.split_code(&vd, &d); |
4226 int vn, n; | 4346 int vn, n; |
(...skipping 563 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4790 DCHECK(is_uint12(offset)); | 4910 DCHECK(is_uint12(offset)); |
4791 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); | 4911 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); |
4792 } | 4912 } |
4793 } | 4913 } |
4794 | 4914 |
4795 | 4915 |
4796 } // namespace internal | 4916 } // namespace internal |
4797 } // namespace v8 | 4917 } // namespace v8 |
4798 | 4918 |
4799 #endif // V8_TARGET_ARCH_ARM | 4919 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |