Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2868603002: [ARM] Improve VFP register moves. (Closed)
Patch Set: Clean up, renaming. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 3892 matching lines...) Expand 10 before | Expand all | Expand 10 after
3903 UNREACHABLE(); 3903 UNREACHABLE();
3904 break; 3904 break;
3905 } 3905 }
3906 int vd, d; 3906 int vd, d;
3907 dst.split_code(&vd, &d); 3907 dst.split_code(&vd, &d);
3908 3908
3909 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | 3909 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
3910 0xB * B8 | d * B7 | E * B5 | B4); 3910 0xB * B8 | d * B7 | E * B5 | B4);
3911 } 3911 }
3912 3912
3913 void Assembler::vdup(QwNeonRegister dst, SwVfpRegister src) { 3913 enum NeonRegType { NEON_D, NEON_Q };
3914
3915 void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
3916 if (type == NEON_D) {
3917 DwVfpRegister::split_code(code, vm, m);
3918 } else {
3919 DCHECK_EQ(type, NEON_Q);
3920 QwNeonRegister::split_code(code, vm, m);
3921 *encoding |= B6;
3922 }
3923 }
3924
3925 static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
3926 DwVfpRegister src, int index) {
3927 DCHECK_NE(Neon64, size);
martyn.capewell 2017/05/10 12:38:06 Assert index is in range.
bbudge 2017/05/10 17:54:52 Done.
3928 int sz = static_cast<int>(size);
3929 int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
3930 int qbit = 0;
3931 int vd, d;
3932 NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
3933 int vm, m;
3934 src.split_code(&vm, &m);
3935
3936 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
3937 0x18 * B7 | qbit | m * B5 | vm;
3938 }
3939
3940 void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
3941 int index) {
3914 DCHECK(IsEnabled(NEON)); 3942 DCHECK(IsEnabled(NEON));
3915 // Instruction details available in ARM DDI 0406C.b, A8-884. 3943 // Instruction details available in ARM DDI 0406C.b, A8-884.
3916 int index = src.code() & 1; 3944 emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
3917 int d_reg = src.code() / 2; 3945 }
3918 int imm4 = 4 | index << 3; // esize = 32, index in bit 3.
3919 int vd, d;
3920 dst.split_code(&vd, &d);
3921 int vm, m;
3922 DwVfpRegister::from_code(d_reg).split_code(&vm, &m);
3923 3946
3924 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | 3947 void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
3925 B6 | m * B5 | vm); 3948 int index) {
3949 // Instruction details available in ARM DDI 0406C.b, A8-884.
3950 DCHECK(IsEnabled(NEON));
3951 emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
3926 } 3952 }
3927 3953
3928 // Encode NEON vcvt.src_type.dst_type instruction. 3954 // Encode NEON vcvt.src_type.dst_type instruction.
3929 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst, 3955 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
3930 VFPType src_type, QwNeonRegister src) { 3956 VFPType src_type, QwNeonRegister src) {
3931 DCHECK(src_type != dst_type); 3957 DCHECK(src_type != dst_type);
3932 DCHECK(src_type == F32 || dst_type == F32); 3958 DCHECK(src_type == F32 || dst_type == F32);
3933 // Instruction details available in ARM DDI 0406C.b, A8.8.868. 3959 // Instruction details available in ARM DDI 0406C.b, A8.8.868.
3934 int vd, d; 3960 int vd, d;
3935 dst.split_code(&vd, &d); 3961 dst.split_code(&vd, &d);
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3970 emit(EncodeNeonVCVT(S32, dst, F32, src)); 3996 emit(EncodeNeonVCVT(S32, dst, F32, src));
3971 } 3997 }
3972 3998
3973 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) { 3999 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
3974 DCHECK(IsEnabled(NEON)); 4000 DCHECK(IsEnabled(NEON));
3975 DCHECK(VfpRegisterIsAvailable(dst)); 4001 DCHECK(VfpRegisterIsAvailable(dst));
3976 DCHECK(VfpRegisterIsAvailable(src)); 4002 DCHECK(VfpRegisterIsAvailable(src));
3977 emit(EncodeNeonVCVT(U32, dst, F32, src)); 4003 emit(EncodeNeonVCVT(U32, dst, F32, src));
3978 } 4004 }
3979 4005
3980 enum NeonRegType { NEON_D, NEON_Q };
3981
3982 void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
3983 if (type == NEON_D) {
3984 DwVfpRegister::split_code(code, vm, m);
3985 } else {
3986 DCHECK_EQ(type, NEON_Q);
3987 QwNeonRegister::split_code(code, vm, m);
3988 *encoding |= B6;
3989 }
3990 }
3991
3992 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF }; 4006 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF };
3993 4007
3994 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, 4008 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
3995 int dst_code, int src_code) { 4009 int dst_code, int src_code) {
3996 int op_encoding = 0; 4010 int op_encoding = 0;
3997 switch (op) { 4011 switch (op) {
3998 case VMVN: 4012 case VMVN:
3999 DCHECK_EQ(Neon8, size); // size == 0 for vmvn 4013 DCHECK_EQ(Neon8, size); // size == 0 for vmvn
4000 op_encoding = B10 | 0x3 * B7; 4014 op_encoding = B10 | 0x3 * B7;
4001 break; 4015 break;
(...skipping 394 matching lines...) Expand 10 before | Expand all | Expand 10 after
4396 } 4410 }
4397 4411
4398 void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, 4412 void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
4399 QwNeonRegister src2) { 4413 QwNeonRegister src2) {
4400 DCHECK(IsEnabled(NEON)); 4414 DCHECK(IsEnabled(NEON));
4401 // Qd = vmax(Qn, Qm) SIMD integer MAX. 4415 // Qd = vmax(Qn, Qm) SIMD integer MAX.
4402 // Instruction details available in ARM DDI 0406C.b, A8-926. 4416 // Instruction details available in ARM DDI 0406C.b, A8-926.
4403 emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2)); 4417 emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
4404 } 4418 }
4405 4419
4406 enum NeonShiftOp { VSHL, VSHR }; 4420 enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI };
4407 4421
4408 static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonDataType dt, 4422 static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
4409 QwNeonRegister dst, QwNeonRegister src, 4423 NeonRegType reg_type, int dst_code, int src_code,
4410 int shift) { 4424 int shift) {
4425 int imm6 = 0;
4426 int size_in_bits = kBitsPerByte << static_cast<int>(size);
4427 int op_encoding = 0;
4428 switch (op) {
4429 case VSHL: {
4430 DCHECK(shift >= 0 && size_in_bits > shift);
4431 imm6 = size_in_bits + shift;
4432 op_encoding = 0x5 * B8;
4433 break;
4434 }
4435 case VSHR: {
4436 DCHECK(shift > 0 && size_in_bits >= shift);
4437 imm6 = 2 * size_in_bits - shift;
4438 if (is_unsigned) op_encoding |= B24;
4439 break;
4440 }
4441 case VSLI: {
4442 DCHECK(shift >= 0 && size_in_bits > shift);
4443 imm6 = size_in_bits + shift;
4444 int L = imm6 >> 6;
4445 imm6 &= 0x3F;
4446 op_encoding = B24 | 0x5 * B8 | L * B7;
4447 break;
4448 }
4449 case VSRI: {
4450 DCHECK(shift > 0 && size_in_bits >= shift);
4451 imm6 = 2 * size_in_bits - shift;
4452 int L = imm6 >> 6;
4453 imm6 &= 0x3F;
4454 op_encoding = B24 | 0x4 * B8 | L * B7;
4455 break;
4456 }
4457 default:
4458 UNREACHABLE();
4459 break;
4460 }
4461
4411 int vd, d; 4462 int vd, d;
4412 dst.split_code(&vd, &d); 4463 NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4413 int vm, m; 4464 int vm, m;
4414 src.split_code(&vm, &m); 4465 NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);
4415 int size_in_bits = kBitsPerByte << NeonSz(dt); 4466
4416 int op_encoding = 0; 4467 return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | m * B5 | B4 | vm |
4417 int imm6 = 0; 4468 op_encoding;
4418 if (op == VSHL) {
4419 DCHECK(shift >= 0 && size_in_bits > shift);
4420 imm6 = size_in_bits + shift;
4421 op_encoding = 0x5 * B8;
4422 } else {
4423 DCHECK_EQ(VSHR, op);
4424 DCHECK(shift > 0 && size_in_bits >= shift);
4425 imm6 = 2 * size_in_bits - shift;
4426 op_encoding = NeonU(dt) * B24;
4427 }
4428 return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | B6 | m * B5 | B4 |
4429 vm | op_encoding;
4430 } 4469 }
4431 4470
4432 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, 4471 void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4433 int shift) { 4472 int shift) {
4434 DCHECK(IsEnabled(NEON)); 4473 DCHECK(IsEnabled(NEON));
4435 // Qd = vshl(Qm, bits) SIMD shift left immediate. 4474 // Qd = vshl(Qm, bits) SIMD shift left immediate.
4436 // Instruction details available in ARM DDI 0406C.b, A8-1046. 4475 // Instruction details available in ARM DDI 0406C.b, A8-1046.
4437 emit(EncodeNeonShiftOp(VSHL, dt, dst, src, shift)); 4476 emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
4477 dst.code(), src.code(), shift));
4438 } 4478 }
4439 4479
4440 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, 4480 void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
4441 int shift) { 4481 int shift) {
4442 DCHECK(IsEnabled(NEON)); 4482 DCHECK(IsEnabled(NEON));
4443 // Qd = vshl(Qm, bits) SIMD shift right immediate. 4483 // Qd = vshl(Qm, bits) SIMD shift right immediate.
4444 // Instruction details available in ARM DDI 0406C.b, A8-1052. 4484 // Instruction details available in ARM DDI 0406C.b, A8-1052.
4445 emit(EncodeNeonShiftOp(VSHR, dt, dst, src, shift)); 4485 emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
4486 dst.code(), src.code(), shift));
4487 }
4488
4489 void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4490 int shift) {
4491 DCHECK(IsEnabled(NEON));
4492 // Dd = vsli(Dm, bits) SIMD shift left and insert.
4493 // Instruction details available in ARM DDI 0406C.b, A8-1056.
4494 emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
4495 shift));
4496 }
4497
4498 void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
4499 int shift) {
4500 DCHECK(IsEnabled(NEON));
4501 // Dd = vsri(Dm, bits) SIMD shift right and insert.
4502 // Instruction details available in ARM DDI 0406C.b, A8-1062.
4503 emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
4504 shift));
4446 } 4505 }
4447 4506
4448 static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst, 4507 static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
4449 QwNeonRegister src) { 4508 QwNeonRegister src) {
4450 int vd, d; 4509 int vd, d;
4451 dst.split_code(&vd, &d); 4510 dst.split_code(&vd, &d);
4452 int vm, m; 4511 int vm, m;
4453 src.split_code(&vm, &m); 4512 src.split_code(&vm, &m);
4454 int rsqrt = is_rsqrt ? 1 : 0; 4513 int rsqrt = is_rsqrt ? 1 : 0;
4455 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x5 * B8 | 4514 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x5 * B8 |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
4532 4591
4533 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | 4592 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
4534 m * B5 | vm); 4593 m * B5 | vm);
4535 } 4594 }
4536 4595
4537 void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1, 4596 void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
4538 DwVfpRegister src2) { 4597 DwVfpRegister src2) {
4539 DCHECK(IsEnabled(NEON)); 4598 DCHECK(IsEnabled(NEON));
4540 // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD. 4599 // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
4541 // Instruction details available in ARM DDI 0406C.b, A8-980. 4600 // Instruction details available in ARM DDI 0406C.b, A8-980.
4542 emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDatatype(size), dst, src1, src2)); 4601 emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
4543 } 4602 }
4544 4603
4545 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, 4604 void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
4546 DwVfpRegister src2) { 4605 DwVfpRegister src2) {
4547 DCHECK(IsEnabled(NEON)); 4606 DCHECK(IsEnabled(NEON));
4548 // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN. 4607 // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
4549 // Instruction details available in ARM DDI 0406C.b, A8-986. 4608 // Instruction details available in ARM DDI 0406C.b, A8-986.
4550 emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2)); 4609 emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
4551 } 4610 }
4552 4611
(...skipping 716 matching lines...) Expand 10 before | Expand all | Expand 10 after
5269 } 5328 }
5270 5329
5271 void PatchingAssembler::FlushICache(Isolate* isolate) { 5330 void PatchingAssembler::FlushICache(Isolate* isolate) {
5272 Assembler::FlushICache(isolate, buffer_, buffer_size_ - kGap); 5331 Assembler::FlushICache(isolate, buffer_, buffer_size_ - kGap);
5273 } 5332 }
5274 5333
5275 } // namespace internal 5334 } // namespace internal
5276 } // namespace v8 5335 } // namespace v8
5277 5336
5278 #endif // V8_TARGET_ARCH_ARM 5337 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/constants-arm.h » ('j') | src/arm/disasm-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698