OLD | NEW |
---|---|
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. | 1 // Copyright (c) 1994-2006 Sun Microsystems Inc. |
2 // All Rights Reserved. | 2 // All Rights Reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions | 5 // modification, are permitted provided that the following conditions |
6 // are met: | 6 // are met: |
7 // | 7 // |
8 // - Redistributions of source code must retain the above copyright notice, | 8 // - Redistributions of source code must retain the above copyright notice, |
9 // this list of conditions and the following disclaimer. | 9 // this list of conditions and the following disclaimer. |
10 // | 10 // |
(...skipping 3885 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3896 // Instruction details available in ARM DDI 0406C.b, A8.8.320. | 3896 // Instruction details available in ARM DDI 0406C.b, A8.8.320. |
3897 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | | 3897 // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | |
3898 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3898 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
3899 DCHECK(IsEnabled(NEON)); | 3899 DCHECK(IsEnabled(NEON)); |
3900 int vd, d; | 3900 int vd, d; |
3901 dst.base().split_code(&vd, &d); | 3901 dst.base().split_code(&vd, &d); |
3902 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | | 3902 emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | |
3903 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); | 3903 dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); |
3904 } | 3904 } |
3905 | 3905 |
3906 | 3906 void Assembler::vst1(NeonSize size, const NeonListOperand& src, |
3907 void Assembler::vst1(NeonSize size, | |
3908 const NeonListOperand& src, | |
3909 const NeonMemOperand& dst) { | 3907 const NeonMemOperand& dst) { |
3910 // Instruction details available in ARM DDI 0406C.b, A8.8.404. | 3908 // Instruction details available in ARM DDI 0406C.b, A8.8.404. |
3911 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | | 3909 // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | |
3912 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) | 3910 // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
3913 DCHECK(IsEnabled(NEON)); | 3911 DCHECK(IsEnabled(NEON)); |
3914 int vd, d; | 3912 int vd, d; |
3915 src.base().split_code(&vd, &d); | 3913 src.base().split_code(&vd, &d); |
3916 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | | 3914 emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | |
3917 size*B6 | dst.align()*B4 | dst.rm().code()); | 3915 size*B6 | dst.align()*B4 | dst.rm().code()); |
3918 } | 3916 } |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3977 // vmov Arm scalar to core register. | 3975 // vmov Arm scalar to core register. |
3978 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); | 3976 DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON)); |
3979 int vn, n; | 3977 int vn, n; |
3980 src.split_code(&vn, &n); | 3978 src.split_code(&vn, &n); |
3981 int opc1_opc2 = EncodeScalar(dt, index); | 3979 int opc1_opc2 = EncodeScalar(dt, index); |
3982 int u = NeonU(dt); | 3980 int u = NeonU(dt); |
3983 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | | 3981 emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 | |
3984 n * B7 | B4 | opc1_opc2); | 3982 n * B7 | B4 | opc1_opc2); |
3985 } | 3983 } |
3986 | 3984 |
3987 void Assembler::vmov(const QwNeonRegister dst, const QwNeonRegister src) { | 3985 void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) { |
3988 // Instruction details available in ARM DDI 0406C.b, A8-938. | 3986 // Instruction details available in ARM DDI 0406C.b, A8-938. |
3989 // vmov is encoded as vorr. | 3987 // vmov is encoded as vorr. |
3990 vorr(dst, src, src); | 3988 vorr(dst, src, src); |
3991 } | 3989 } |
3992 | 3990 |
3993 void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) { | 3991 void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) { |
3994 DCHECK(IsEnabled(NEON)); | |
3995 // Instruction details available in ARM DDI 0406C.b, A8-966. | |
3996 DCHECK(VfpRegisterIsAvailable(dst)); | |
3997 DCHECK(VfpRegisterIsAvailable(src)); | |
3998 int vd, d; | |
3999 dst.split_code(&vd, &d); | |
4000 int vm, m; | |
4001 src.split_code(&vm, &m); | |
4002 emit(0x1E7U * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm); | |
4003 } | |
4004 | |
4005 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { | |
4006 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4007 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
4008 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
4009 DCHECK(IsEnabled(NEON)); | |
4010 int vd, d; | |
4011 dst.split_code(&vd, &d); | |
4012 int vm, m; | |
4013 src.split_code(&vm, &m); | |
4014 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm); | |
4015 } | |
4016 | |
4017 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { | |
4018 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4019 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | | |
4020 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) | |
4021 DCHECK(IsEnabled(NEON)); | |
4022 int vd, d; | |
4023 dst.split_code(&vd, &d); | |
4024 int vm, m; | |
4025 src.split_code(&vm, &m); | |
4026 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 | | |
4027 vm); | |
4028 } | |
4029 | |
4030 void Assembler::vdup(NeonSize size, const QwNeonRegister dst, | |
4031 const Register src) { | |
4032 DCHECK(IsEnabled(NEON)); | 3992 DCHECK(IsEnabled(NEON)); |
4033 // Instruction details available in ARM DDI 0406C.b, A8-886. | 3993 // Instruction details available in ARM DDI 0406C.b, A8-886. |
4034 int B = 0, E = 0; | 3994 int B = 0, E = 0; |
4035 switch (size) { | 3995 switch (size) { |
4036 case Neon8: | 3996 case Neon8: |
4037 B = 1; | 3997 B = 1; |
4038 break; | 3998 break; |
4039 case Neon16: | 3999 case Neon16: |
4040 E = 1; | 4000 E = 1; |
4041 break; | 4001 break; |
4042 case Neon32: | 4002 case Neon32: |
4043 break; | 4003 break; |
4044 default: | 4004 default: |
4045 UNREACHABLE(); | 4005 UNREACHABLE(); |
4046 break; | 4006 break; |
4047 } | 4007 } |
4048 int vd, d; | 4008 int vd, d; |
4049 dst.split_code(&vd, &d); | 4009 dst.split_code(&vd, &d); |
4050 | 4010 |
4051 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | | 4011 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 | |
4052 0xB * B8 | d * B7 | E * B5 | B4); | 4012 0xB * B8 | d * B7 | E * B5 | B4); |
4053 } | 4013 } |
4054 | 4014 |
4055 void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) { | 4015 void Assembler::vdup(QwNeonRegister dst, SwVfpRegister src) { |
4056 DCHECK(IsEnabled(NEON)); | 4016 DCHECK(IsEnabled(NEON)); |
4057 // Instruction details available in ARM DDI 0406C.b, A8-884. | 4017 // Instruction details available in ARM DDI 0406C.b, A8-884. |
4058 int index = src.code() & 1; | 4018 int index = src.code() & 1; |
4059 int d_reg = src.code() / 2; | 4019 int d_reg = src.code() / 2; |
4060 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. | 4020 int imm4 = 4 | index << 3; // esize = 32, index in bit 3. |
4061 int vd, d; | 4021 int vd, d; |
4062 dst.split_code(&vd, &d); | 4022 dst.split_code(&vd, &d); |
4063 int vm, m; | 4023 int vm, m; |
4064 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); | 4024 DwVfpRegister::from_code(d_reg).split_code(&vm, &m); |
4065 | 4025 |
4066 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | | 4026 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 | |
4067 B6 | m * B5 | vm); | 4027 B6 | m * B5 | vm); |
4068 } | 4028 } |
4069 | 4029 |
4070 // Encode NEON vcvt.src_type.dst_type instruction. | 4030 // Encode NEON vcvt.src_type.dst_type instruction. |
4071 static Instr EncodeNeonVCVT(const VFPType dst_type, const QwNeonRegister dst, | 4031 static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst, |
4072 const VFPType src_type, const QwNeonRegister src) { | 4032 VFPType src_type, QwNeonRegister src) { |
4073 DCHECK(src_type != dst_type); | 4033 DCHECK(src_type != dst_type); |
4074 DCHECK(src_type == F32 || dst_type == F32); | 4034 DCHECK(src_type == F32 || dst_type == F32); |
4075 // Instruction details available in ARM DDI 0406C.b, A8.8.868. | 4035 // Instruction details available in ARM DDI 0406C.b, A8.8.868. |
4076 int vd, d; | 4036 int vd, d; |
4077 dst.split_code(&vd, &d); | 4037 dst.split_code(&vd, &d); |
4078 int vm, m; | 4038 int vm, m; |
4079 src.split_code(&vm, &m); | 4039 src.split_code(&vm, &m); |
4080 | 4040 |
4081 int op = 0; | 4041 int op = 0; |
4082 if (src_type == F32) { | 4042 if (src_type == F32) { |
4083 DCHECK(dst_type == S32 || dst_type == U32); | 4043 DCHECK(dst_type == S32 || dst_type == U32); |
4084 op = dst_type == U32 ? 3 : 2; | 4044 op = dst_type == U32 ? 3 : 2; |
4085 } else { | 4045 } else { |
4086 DCHECK(src_type == S32 || src_type == U32); | 4046 DCHECK(src_type == S32 || src_type == U32); |
4087 op = src_type == U32 ? 1 : 0; | 4047 op = src_type == U32 ? 1 : 0; |
4088 } | 4048 } |
4089 | 4049 |
4090 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | | 4050 return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 | |
4091 B6 | m * B5 | vm; | 4051 B6 | m * B5 | vm; |
4092 } | 4052 } |
4093 | 4053 |
4094 void Assembler::vcvt_f32_s32(const QwNeonRegister dst, | 4054 void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) { |
4095 const QwNeonRegister src) { | |
4096 DCHECK(IsEnabled(NEON)); | 4055 DCHECK(IsEnabled(NEON)); |
4097 DCHECK(VfpRegisterIsAvailable(dst)); | 4056 DCHECK(VfpRegisterIsAvailable(dst)); |
4098 DCHECK(VfpRegisterIsAvailable(src)); | 4057 DCHECK(VfpRegisterIsAvailable(src)); |
4099 emit(EncodeNeonVCVT(F32, dst, S32, src)); | 4058 emit(EncodeNeonVCVT(F32, dst, S32, src)); |
4100 } | 4059 } |
4101 | 4060 |
4102 void Assembler::vcvt_f32_u32(const QwNeonRegister dst, | 4061 void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) { |
4103 const QwNeonRegister src) { | |
4104 DCHECK(IsEnabled(NEON)); | 4062 DCHECK(IsEnabled(NEON)); |
4105 DCHECK(VfpRegisterIsAvailable(dst)); | 4063 DCHECK(VfpRegisterIsAvailable(dst)); |
4106 DCHECK(VfpRegisterIsAvailable(src)); | 4064 DCHECK(VfpRegisterIsAvailable(src)); |
4107 emit(EncodeNeonVCVT(F32, dst, U32, src)); | 4065 emit(EncodeNeonVCVT(F32, dst, U32, src)); |
4108 } | 4066 } |
4109 | 4067 |
4110 void Assembler::vcvt_s32_f32(const QwNeonRegister dst, | 4068 void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) { |
4111 const QwNeonRegister src) { | |
4112 DCHECK(IsEnabled(NEON)); | 4069 DCHECK(IsEnabled(NEON)); |
4113 DCHECK(VfpRegisterIsAvailable(dst)); | 4070 DCHECK(VfpRegisterIsAvailable(dst)); |
4114 DCHECK(VfpRegisterIsAvailable(src)); | 4071 DCHECK(VfpRegisterIsAvailable(src)); |
4115 emit(EncodeNeonVCVT(S32, dst, F32, src)); | 4072 emit(EncodeNeonVCVT(S32, dst, F32, src)); |
4116 } | 4073 } |
4117 | 4074 |
4118 void Assembler::vcvt_u32_f32(const QwNeonRegister dst, | 4075 void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) { |
4119 const QwNeonRegister src) { | |
4120 DCHECK(IsEnabled(NEON)); | 4076 DCHECK(IsEnabled(NEON)); |
4121 DCHECK(VfpRegisterIsAvailable(dst)); | 4077 DCHECK(VfpRegisterIsAvailable(dst)); |
4122 DCHECK(VfpRegisterIsAvailable(src)); | 4078 DCHECK(VfpRegisterIsAvailable(src)); |
4123 emit(EncodeNeonVCVT(U32, dst, F32, src)); | 4079 emit(EncodeNeonVCVT(U32, dst, F32, src)); |
4124 } | 4080 } |
4125 | 4081 |
4126 enum UnaryOp { VABS, VABSF, VNEG, VNEGF }; | 4082 enum NeonRegType { NEON_D, NEON_Q }; |
4127 | 4083 |
4128 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonSize size, QwNeonRegister dst, | 4084 enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF }; |
4129 QwNeonRegister src) { | 4085 |
4086 static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size, | |
4087 int dst_code, int src_code) { | |
4130 int op_encoding = 0; | 4088 int op_encoding = 0; |
4131 switch (op) { | 4089 switch (op) { |
4090 case VMVN: | |
4091 DCHECK_EQ(Neon8, size); // size == 0 for vmvn | |
4092 op_encoding = B10 | 0x3 * B7; | |
4093 break; | |
4094 case VSWP: | |
4095 DCHECK_EQ(Neon8, size); // size == 0 for vswp | |
4096 op_encoding = B17; | |
4097 break; | |
4132 case VABS: | 4098 case VABS: |
4133 op_encoding = 0x6 * B7; | 4099 op_encoding = B16 | 0x6 * B7; |
4134 break; | 4100 break; |
4135 case VABSF: | 4101 case VABSF: |
4136 DCHECK_EQ(Neon32, size); | 4102 DCHECK_EQ(Neon32, size); |
4137 op_encoding = 0x6 * B7 | B10; | 4103 op_encoding = B16 | B10 | 0x6 * B7; |
4138 break; | 4104 break; |
4139 case VNEG: | 4105 case VNEG: |
4140 op_encoding = 0x7 * B7; | 4106 op_encoding = B16 | 0x7 * B7; |
4141 break; | 4107 break; |
4142 case VNEGF: | 4108 case VNEGF: |
4143 DCHECK_EQ(Neon32, size); | 4109 DCHECK_EQ(Neon32, size); |
4144 op_encoding = 0x7 * B7 | B10; | 4110 op_encoding = B16 | B10 | 0x7 * B7; |
4145 break; | 4111 break; |
4146 default: | 4112 default: |
4147 UNREACHABLE(); | 4113 UNREACHABLE(); |
4148 break; | 4114 break; |
4149 } | 4115 } |
4150 int vd, d; | 4116 if (reg_type == NEON_Q) { |
4151 dst.split_code(&vd, &d); | 4117 op_encoding |= B6; |
4152 int vm, m; | 4118 dst_code <<= 1; |
4153 src.split_code(&vm, &m); | 4119 src_code <<= 1; |
4154 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | B16 | vd * B12 | B6 | | 4120 } |
4155 m * B5 | vm | op_encoding; | 4121 |
4122 int vd = dst_code & 0x0F; | |
martyn.capewell
2017/03/09 14:03:36
Perhaps there should be some static methods to spl
bbudge
2017/03/09 20:51:05
Good idea. Done.
| |
4123 int d = (dst_code & 0x10) >> 4; | |
4124 int vm = src_code & 0x0F; | |
4125 int m = (src_code & 0x10) >> 4; | |
4126 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 | | |
4127 vm | op_encoding; | |
4156 } | 4128 } |
4157 | 4129 |
4158 void Assembler::vabs(const QwNeonRegister dst, const QwNeonRegister src) { | 4130 void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) { |
4131 // Qd = vmvn(Qn, Qm) SIMD bitwise negate. | |
4132 // Instruction details available in ARM DDI 0406C.b, A8-966. | |
4133 DCHECK(IsEnabled(NEON)); | |
4134 emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code())); | |
4135 } | |
4136 | |
4137 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { | |
4138 DCHECK(IsEnabled(NEON)); | |
4139 // Dd = vswp(Dn, Dm) SIMD d-register swap. | |
4140 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4141 DCHECK(IsEnabled(NEON)); | |
4142 emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code())); | |
4143 } | |
4144 | |
4145 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { | |
4146 // Qd = vswp(Qn, Qm) SIMD q-register swap. | |
4147 // Instruction details available in ARM DDI 0406C.b, A8.8.418. | |
4148 DCHECK(IsEnabled(NEON)); | |
4149 emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code())); | |
4150 } | |
4151 | |
4152 void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) { | |
4159 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. | 4153 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value. |
4160 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4154 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
4161 DCHECK(IsEnabled(NEON)); | 4155 DCHECK(IsEnabled(NEON)); |
4162 emit(EncodeNeonUnaryOp(VABSF, Neon32, dst, src)); | 4156 emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code())); |
4163 } | 4157 } |
4164 | 4158 |
4165 void Assembler::vabs(NeonSize size, const QwNeonRegister dst, | 4159 void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4166 const QwNeonRegister src) { | |
4167 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. | 4160 // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value. |
4168 // Instruction details available in ARM DDI 0406C.b, A8.8.824. | 4161 // Instruction details available in ARM DDI 0406C.b, A8.8.824. |
4169 DCHECK(IsEnabled(NEON)); | 4162 DCHECK(IsEnabled(NEON)); |
4170 emit(EncodeNeonUnaryOp(VABS, size, dst, src)); | 4163 emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code())); |
4171 } | 4164 } |
4172 | 4165 |
4173 void Assembler::vneg(const QwNeonRegister dst, const QwNeonRegister src) { | 4166 void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) { |
4174 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. | 4167 // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate. |
4175 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4168 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
4176 DCHECK(IsEnabled(NEON)); | 4169 DCHECK(IsEnabled(NEON)); |
4177 emit(EncodeNeonUnaryOp(VNEGF, Neon32, dst, src)); | 4170 emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code())); |
4178 } | 4171 } |
4179 | 4172 |
4180 void Assembler::vneg(NeonSize size, const QwNeonRegister dst, | 4173 void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4181 const QwNeonRegister src) { | |
4182 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. | 4174 // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate. |
4183 // Instruction details available in ARM DDI 0406C.b, A8.8.968. | 4175 // Instruction details available in ARM DDI 0406C.b, A8.8.968. |
4184 DCHECK(IsEnabled(NEON)); | 4176 DCHECK(IsEnabled(NEON)); |
4185 emit(EncodeNeonUnaryOp(VNEG, size, dst, src)); | 4177 emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code())); |
4186 } | |
4187 | |
4188 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | |
4189 DwVfpRegister src2) { | |
4190 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR. | |
4191 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | |
4192 DCHECK(IsEnabled(NEON)); | |
4193 int vd, d; | |
4194 dst.split_code(&vd, &d); | |
4195 int vn, n; | |
4196 src1.split_code(&vn, &n); | |
4197 int vm, m; | |
4198 src2.split_code(&vm, &m); | |
4199 emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | | |
4200 B4 | vm); | |
4201 } | 4178 } |
4202 | 4179 |
4203 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; | 4180 enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN }; |
4204 | 4181 |
4205 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, QwNeonRegister dst, | 4182 static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type, |
4206 QwNeonRegister src1, | 4183 int dst_code, int src_code1, |
4207 QwNeonRegister src2) { | 4184 int src_code2) { |
4208 int op_encoding = 0; | 4185 int op_encoding = 0; |
4209 switch (op) { | 4186 switch (op) { |
4210 case VBIC: | 4187 case VBIC: |
4211 op_encoding = 0x1 * B20; | 4188 op_encoding = 0x1 * B20; |
4212 break; | 4189 break; |
4213 case VBIF: | 4190 case VBIF: |
4214 op_encoding = B24 | 0x3 * B20; | 4191 op_encoding = B24 | 0x3 * B20; |
4215 break; | 4192 break; |
4216 case VBIT: | 4193 case VBIT: |
4217 op_encoding = B24 | 0x2 * B20; | 4194 op_encoding = B24 | 0x2 * B20; |
(...skipping 10 matching lines...) Expand all Loading... | |
4228 case VORN: | 4205 case VORN: |
4229 op_encoding = 0x3 * B20; | 4206 op_encoding = 0x3 * B20; |
4230 break; | 4207 break; |
4231 case VAND: | 4208 case VAND: |
4232 // op_encoding is 0. | 4209 // op_encoding is 0. |
4233 break; | 4210 break; |
4234 default: | 4211 default: |
4235 UNREACHABLE(); | 4212 UNREACHABLE(); |
4236 break; | 4213 break; |
4237 } | 4214 } |
4238 int vd, d; | 4215 if (reg_type == NEON_Q) { |
4239 dst.split_code(&vd, &d); | 4216 op_encoding |= B6; |
4240 int vn, n; | 4217 dst_code <<= 1; |
4241 src1.split_code(&vn, &n); | 4218 src_code1 <<= 1; |
4242 int vm, m; | 4219 src_code2 <<= 1; |
4243 src2.split_code(&vm, &m); | 4220 } |
4221 | |
4222 int vd = dst_code & 0x0F; | |
4223 int d = (dst_code & 0x10) >> 4; | |
4224 int vn = src_code1 & 0x0F; | |
4225 int n = (src_code1 & 0x10) >> 4; | |
4226 int vm = src_code2 & 0x0F; | |
4227 int m = (src_code2 & 0x10) >> 4; | |
4244 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | | 4228 return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 | |
4245 n * B7 | B6 | m * B5 | B4 | vm; | 4229 n * B7 | m * B5 | B4 | vm; |
4246 } | 4230 } |
4247 | 4231 |
4248 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, | 4232 void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1, |
4249 QwNeonRegister src2) { | 4233 QwNeonRegister src2) { |
4250 // Qd = vand(Qn, Qm) SIMD AND. | 4234 // Qd = vand(Qn, Qm) SIMD AND. |
4251 // Instruction details available in ARM DDI 0406C.b, A8.8.836. | 4235 // Instruction details available in ARM DDI 0406C.b, A8.8.836. |
4252 DCHECK(IsEnabled(NEON)); | 4236 DCHECK(IsEnabled(NEON)); |
4253 emit(EncodeNeonBinaryBitwiseOp(VAND, dst, src1, src2)); | 4237 emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(), |
4238 src2.code())); | |
4254 } | 4239 } |
4255 | 4240 |
4256 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1, | 4241 void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1, |
4257 const QwNeonRegister src2) { | 4242 QwNeonRegister src2) { |
4258 DCHECK(IsEnabled(NEON)); | |
4259 // Qd = vbsl(Qn, Qm) SIMD bitwise select. | 4243 // Qd = vbsl(Qn, Qm) SIMD bitwise select. |
4260 // Instruction details available in ARM DDI 0406C.b, A8-844. | 4244 // Instruction details available in ARM DDI 0406C.b, A8-844. |
4261 emit(EncodeNeonBinaryBitwiseOp(VBSL, dst, src1, src2)); | 4245 DCHECK(IsEnabled(NEON)); |
4246 emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(), | |
4247 src2.code())); | |
4248 } | |
4249 | |
4250 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, | |
4251 DwVfpRegister src2) { | |
4252 // Dd = veor(Dn, Dm) SIMD exclusive OR. | |
4253 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | |
4254 DCHECK(IsEnabled(NEON)); | |
4255 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(), | |
4256 src2.code())); | |
4262 } | 4257 } |
4263 | 4258 |
4264 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, | 4259 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, |
4265 QwNeonRegister src2) { | 4260 QwNeonRegister src2) { |
4266 // Qd = veor(Qn, Qm) SIMD exclusive OR. | 4261 // Qd = veor(Qn, Qm) SIMD exclusive OR. |
4267 // Instruction details available in ARM DDI 0406C.b, A8.8.888. | 4262 // Instruction details available in ARM DDI 0406C.b, A8.8.888. |
4268 DCHECK(IsEnabled(NEON)); | 4263 DCHECK(IsEnabled(NEON)); |
4269 emit(EncodeNeonBinaryBitwiseOp(VEOR, dst, src1, src2)); | 4264 emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(), |
4265 src2.code())); | |
4270 } | 4266 } |
4271 | 4267 |
4272 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, | 4268 void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1, |
4273 QwNeonRegister src2) { | 4269 QwNeonRegister src2) { |
4274 // Qd = vorr(Qn, Qm) SIMD OR. | 4270 // Qd = vorr(Qn, Qm) SIMD OR. |
4275 // Instruction details available in ARM DDI 0406C.b, A8.8.976. | 4271 // Instruction details available in ARM DDI 0406C.b, A8.8.976. |
4276 DCHECK(IsEnabled(NEON)); | 4272 DCHECK(IsEnabled(NEON)); |
4277 emit(EncodeNeonBinaryBitwiseOp(VORR, dst, src1, src2)); | 4273 emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(), |
4274 src2.code())); | |
4278 } | 4275 } |
4279 | 4276 |
4280 enum FPBinOp { | 4277 enum FPBinOp { |
4281 VADDF, | 4278 VADDF, |
4282 VSUBF, | 4279 VSUBF, |
4283 VMULF, | 4280 VMULF, |
4284 VMINF, | 4281 VMINF, |
4285 VMAXF, | 4282 VMAXF, |
4286 VRECPS, | 4283 VRECPS, |
4287 VRSQRTS, | 4284 VRSQRTS, |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4462 } | 4459 } |
4463 | 4460 |
4464 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, | 4461 void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1, |
4465 QwNeonRegister src2) { | 4462 QwNeonRegister src2) { |
4466 DCHECK(IsEnabled(NEON)); | 4463 DCHECK(IsEnabled(NEON)); |
4467 // Qd = vadd(Qn, Qm) SIMD floating point multiply. | 4464 // Qd = vadd(Qn, Qm) SIMD floating point multiply. |
4468 // Instruction details available in ARM DDI 0406C.b, A8-958. | 4465 // Instruction details available in ARM DDI 0406C.b, A8-958. |
4469 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); | 4466 emit(EncodeNeonBinOp(VMULF, dst, src1, src2)); |
4470 } | 4467 } |
4471 | 4468 |
4472 void Assembler::vmul(NeonSize size, QwNeonRegister dst, | 4469 void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1, |
4473 const QwNeonRegister src1, const QwNeonRegister src2) { | 4470 QwNeonRegister src2) { |
4474 DCHECK(IsEnabled(NEON)); | 4471 DCHECK(IsEnabled(NEON)); |
4475 // Qd = vadd(Qn, Qm) SIMD integer multiply. | 4472 // Qd = vadd(Qn, Qm) SIMD integer multiply. |
4476 // Instruction details available in ARM DDI 0406C.b, A8-960. | 4473 // Instruction details available in ARM DDI 0406C.b, A8-960. |
4477 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); | 4474 emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2)); |
4478 } | 4475 } |
4479 | 4476 |
4480 void Assembler::vmin(const QwNeonRegister dst, const QwNeonRegister src1, | 4477 void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1, |
4481 const QwNeonRegister src2) { | 4478 QwNeonRegister src2) { |
4482 DCHECK(IsEnabled(NEON)); | 4479 DCHECK(IsEnabled(NEON)); |
4483 // Qd = vmin(Qn, Qm) SIMD floating point MIN. | 4480 // Qd = vmin(Qn, Qm) SIMD floating point MIN. |
4484 // Instruction details available in ARM DDI 0406C.b, A8-928. | 4481 // Instruction details available in ARM DDI 0406C.b, A8-928. |
4485 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); | 4482 emit(EncodeNeonBinOp(VMINF, dst, src1, src2)); |
4486 } | 4483 } |
4487 | 4484 |
4488 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4485 void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
4489 QwNeonRegister src2) { | 4486 QwNeonRegister src2) { |
4490 DCHECK(IsEnabled(NEON)); | 4487 DCHECK(IsEnabled(NEON)); |
4491 // Qd = vmin(Qn, Qm) SIMD integer MIN. | 4488 // Qd = vmin(Qn, Qm) SIMD integer MIN. |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4585 } | 4582 } |
4586 | 4583 |
4587 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, | 4584 void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, |
4588 QwNeonRegister src2) { | 4585 QwNeonRegister src2) { |
4589 DCHECK(IsEnabled(NEON)); | 4586 DCHECK(IsEnabled(NEON)); |
4590 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. | 4587 // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step. |
4591 // Instruction details available in ARM DDI 0406C.b, A8-1040. | 4588 // Instruction details available in ARM DDI 0406C.b, A8-1040. |
4592 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); | 4589 emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); |
4593 } | 4590 } |
4594 | 4591 |
4595 enum PairwiseOp { VPMIN, VPMAX }; | 4592 enum NeonPairwiseOp { VPMIN, VPMAX }; |
4596 | 4593 |
4597 static Instr EncodeNeonPairwiseOp(PairwiseOp op, NeonDataType dt, | 4594 static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, |
4598 DwVfpRegister dst, DwVfpRegister src1, | 4595 DwVfpRegister dst, DwVfpRegister src1, |
4599 DwVfpRegister src2) { | 4596 DwVfpRegister src2) { |
4600 int op_encoding = 0; | 4597 int op_encoding = 0; |
4601 switch (op) { | 4598 switch (op) { |
4602 case VPMIN: | 4599 case VPMIN: |
4603 op_encoding = 0xA * B8 | B4; | 4600 op_encoding = 0xA * B8 | B4; |
4604 break; | 4601 break; |
4605 case VPMAX: | 4602 case VPMAX: |
4606 op_encoding = 0xA * B8; | 4603 op_encoding = 0xA * B8; |
4607 break; | 4604 break; |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4686 } | 4683 } |
4687 | 4684 |
4688 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, | 4685 void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, |
4689 QwNeonRegister src2) { | 4686 QwNeonRegister src2) { |
4690 DCHECK(IsEnabled(NEON)); | 4687 DCHECK(IsEnabled(NEON)); |
4691 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. | 4688 // Qd = vcgt(Qn, Qm) SIMD integer compare greater than. |
4692 // Instruction details available in ARM DDI 0406C.b, A8-852. | 4689 // Instruction details available in ARM DDI 0406C.b, A8-852. |
4693 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); | 4690 emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2)); |
4694 } | 4691 } |
4695 | 4692 |
4696 void Assembler::vext(QwNeonRegister dst, const QwNeonRegister src1, | 4693 void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1, |
4697 const QwNeonRegister src2, int bytes) { | 4694 QwNeonRegister src2, int bytes) { |
4698 DCHECK(IsEnabled(NEON)); | 4695 DCHECK(IsEnabled(NEON)); |
4699 // Qd = vext(Qn, Qm) SIMD byte extract. | 4696 // Qd = vext(Qn, Qm) SIMD byte extract. |
4700 // Instruction details available in ARM DDI 0406C.b, A8-890. | 4697 // Instruction details available in ARM DDI 0406C.b, A8-890. |
4701 int vd, d; | 4698 int vd, d; |
4702 dst.split_code(&vd, &d); | 4699 dst.split_code(&vd, &d); |
4703 int vn, n; | 4700 int vn, n; |
4704 src1.split_code(&vn, &n); | 4701 src1.split_code(&vn, &n); |
4705 int vm, m; | 4702 int vm, m; |
4706 src2.split_code(&vm, &m); | 4703 src2.split_code(&vm, &m); |
4707 DCHECK_GT(16, bytes); | 4704 DCHECK_GT(16, bytes); |
4708 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | | 4705 emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 | |
4709 n * B7 | B6 | m * B5 | vm); | 4706 n * B7 | B6 | m * B5 | vm); |
4710 } | 4707 } |
4711 | 4708 |
4712 void Assembler::vzip(NeonSize size, QwNeonRegister dst, | 4709 enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN }; |
4713 const QwNeonRegister src) { | 4710 |
4714 DCHECK(IsEnabled(NEON)); | 4711 static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonSize size, |
4715 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). | 4712 QwNeonRegister dst, QwNeonRegister src) { |
4716 // Instruction details available in ARM DDI 0406C.b, A8-1102. | 4713 int op_encoding = 0; |
4714 switch (op) { | |
4715 case VZIP: | |
4716 op_encoding = 0x2 * B16 | B8 | B7; | |
martyn.capewell
2017/03/09 14:03:36
Some of these seem inconsistently expressed. For e
bbudge
2017/03/09 20:51:05
Done.
| |
4717 break; | |
4718 case VUZP: | |
4719 op_encoding = 0x2 * B16 | B8; | |
4720 break; | |
4721 case VREV16: | |
4722 op_encoding = 0x2 * B7; | |
4723 break; | |
4724 case VREV32: | |
4725 op_encoding = 0x1 * B7; | |
4726 break; | |
4727 case VREV64: | |
4728 // op_encoding is 0; | |
4729 break; | |
4730 case VTRN: | |
4731 op_encoding = 0x2 * B16 | B7; | |
4732 break; | |
4733 default: | |
4734 UNREACHABLE(); | |
4735 break; | |
4736 } | |
4717 int vd, d; | 4737 int vd, d; |
4718 dst.split_code(&vd, &d); | 4738 dst.split_code(&vd, &d); |
4719 int vm, m; | 4739 int vm, m; |
4720 src.split_code(&vm, &m); | 4740 src.split_code(&vm, &m); |
4721 int sz = static_cast<int>(size); | 4741 int sz = static_cast<int>(size); |
4722 emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | 2 * B16 | vd * B12 | | 4742 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | B6 | |
4723 0x3 * B7 | B6 | m * B5 | vm); | 4743 m * B5 | vm | op_encoding; |
4724 } | 4744 } |
4725 | 4745 |
4726 static Instr EncodeNeonVREV(NeonSize op_size, NeonSize size, | 4746 void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
4727 const QwNeonRegister dst, | 4747 DCHECK(IsEnabled(NEON)); |
4728 const QwNeonRegister src) { | 4748 // Qd = vzip.<size>(Qn, Qm) SIMD zip (interleave). |
4749 // Instruction details available in ARM DDI 0406C.b, A8-1102. | |
4750 emit(EncodeNeonSizedOp(VZIP, size, src1, src2)); | |
4751 } | |
4752 | |
4753 void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { | |
4754 DCHECK(IsEnabled(NEON)); | |
4755 // Qd = vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave). | |
4756 // Instruction details available in ARM DDI 0406C.b, A8-1100. | |
4757 emit(EncodeNeonSizedOp(VUZP, size, src1, src2)); | |
4758 } | |
4759 | |
4760 void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { | |
4761 DCHECK(IsEnabled(NEON)); | |
4729 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. | 4762 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
4730 // Instruction details available in ARM DDI 0406C.b, A8-1028. | 4763 // Instruction details available in ARM DDI 0406C.b, A8-1028. |
4731 DCHECK_GT(op_size, static_cast<int>(size)); | 4764 emit(EncodeNeonSizedOp(VREV16, size, dst, src)); |
4732 int vd, d; | |
4733 dst.split_code(&vd, &d); | |
4734 int vm, m; | |
4735 src.split_code(&vm, &m); | |
4736 int sz = static_cast<int>(size); | |
4737 int op = static_cast<int>(Neon64) - static_cast<int>(op_size); | |
4738 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | op * B7 | | |
4739 B6 | m * B5 | vm; | |
4740 } | 4765 } |
4741 | 4766 |
4742 void Assembler::vrev16(NeonSize size, const QwNeonRegister dst, | 4767 void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4743 const QwNeonRegister src) { | |
4744 DCHECK(IsEnabled(NEON)); | 4768 DCHECK(IsEnabled(NEON)); |
4745 emit(EncodeNeonVREV(Neon16, size, dst, src)); | 4769 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
4770 // Instruction details available in ARM DDI 0406C.b, A8-1028. | |
4771 emit(EncodeNeonSizedOp(VREV32, size, dst, src)); | |
4746 } | 4772 } |
4747 | 4773 |
4748 void Assembler::vrev32(NeonSize size, const QwNeonRegister dst, | 4774 void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) { |
4749 const QwNeonRegister src) { | |
4750 DCHECK(IsEnabled(NEON)); | 4775 DCHECK(IsEnabled(NEON)); |
4751 emit(EncodeNeonVREV(Neon32, size, dst, src)); | 4776 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
4777 // Instruction details available in ARM DDI 0406C.b, A8-1028. | |
4778 emit(EncodeNeonSizedOp(VREV64, size, dst, src)); | |
4752 } | 4779 } |
4753 | 4780 |
4754 void Assembler::vrev64(NeonSize size, const QwNeonRegister dst, | 4781 void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) { |
4755 const QwNeonRegister src) { | |
4756 DCHECK(IsEnabled(NEON)); | 4782 DCHECK(IsEnabled(NEON)); |
4757 emit(EncodeNeonVREV(Neon64, size, dst, src)); | 4783 // Qd = vrev<op_size>.<size>(Qn, Qm) SIMD scalar reverse. |
4784 // Instruction details available in ARM DDI 0406C.b, A8-1096. | |
4785 emit(EncodeNeonSizedOp(VTRN, size, src1, src2)); | |
4758 } | 4786 } |
4759 | 4787 |
4760 // Encode NEON vtbl / vtbx instruction. | 4788 // Encode NEON vtbl / vtbx instruction. |
4761 static Instr EncodeNeonVTB(const DwVfpRegister dst, const NeonListOperand& list, | 4789 static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list, |
4762 const DwVfpRegister index, bool vtbx) { | 4790 DwVfpRegister index, bool vtbx) { |
4763 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. | 4791 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices. |
4764 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4792 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4765 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. | 4793 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices. |
4766 // Instruction details available in ARM DDI 0406C.b, A8-1094. | 4794 // Instruction details available in ARM DDI 0406C.b, A8-1094. |
4767 int vd, d; | 4795 int vd, d; |
4768 dst.split_code(&vd, &d); | 4796 dst.split_code(&vd, &d); |
4769 int vn, n; | 4797 int vn, n; |
4770 list.base().split_code(&vn, &n); | 4798 list.base().split_code(&vn, &n); |
4771 int vm, m; | 4799 int vm, m; |
4772 index.split_code(&vm, &m); | 4800 index.split_code(&vm, &m); |
4773 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. | 4801 int op = vtbx ? 1 : 0; // vtbl = 0, vtbx = 1. |
4774 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | | 4802 return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 | |
4775 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; | 4803 list.length() * B8 | n * B7 | op * B6 | m * B5 | vm; |
4776 } | 4804 } |
4777 | 4805 |
4778 void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list, | 4806 void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list, |
4779 const DwVfpRegister index) { | 4807 DwVfpRegister index) { |
4780 DCHECK(IsEnabled(NEON)); | 4808 DCHECK(IsEnabled(NEON)); |
4781 emit(EncodeNeonVTB(dst, list, index, false)); | 4809 emit(EncodeNeonVTB(dst, list, index, false)); |
4782 } | 4810 } |
4783 | 4811 |
4784 void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list, | 4812 void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list, |
4785 const DwVfpRegister index) { | 4813 DwVfpRegister index) { |
4786 DCHECK(IsEnabled(NEON)); | 4814 DCHECK(IsEnabled(NEON)); |
4787 emit(EncodeNeonVTB(dst, list, index, true)); | 4815 emit(EncodeNeonVTB(dst, list, index, true)); |
4788 } | 4816 } |
4789 | 4817 |
4790 // Pseudo instructions. | 4818 // Pseudo instructions. |
4791 void Assembler::nop(int type) { | 4819 void Assembler::nop(int type) { |
4792 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes | 4820 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |
4793 // some of the CPU's pipeline and has to issue. Older ARM chips simply used | 4821 // some of the CPU's pipeline and has to issue. Older ARM chips simply used |
4794 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. | 4822 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. |
4795 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode | 4823 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode |
(...skipping 538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5334 DCHECK(is_uint12(offset)); | 5362 DCHECK(is_uint12(offset)); |
5335 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); | 5363 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); |
5336 } | 5364 } |
5337 } | 5365 } |
5338 | 5366 |
5339 | 5367 |
5340 } // namespace internal | 5368 } // namespace internal |
5341 } // namespace v8 | 5369 } // namespace v8 |
5342 | 5370 |
5343 #endif // V8_TARGET_ARCH_ARM | 5371 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |