Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/arm/assembler-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Don't use temporary FP regs in tests. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 1994-2006 Sun Microsystems Inc. 1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved. 2 // All Rights Reserved.
3 // 3 //
4 // Redistribution and use in source and binary forms, with or without 4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions 5 // modification, are permitted provided that the following conditions
6 // are met: 6 // are met:
7 // 7 //
8 // - Redistributions of source code must retain the above copyright notice, 8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer. 9 // this list of conditions and the following disclaimer.
10 // 10 //
(...skipping 465 matching lines...) Expand 10 before | Expand all | Expand 10 after
476 case 256: 476 case 256:
477 align_ = 3; 477 align_ = 3;
478 break; 478 break;
479 default: 479 default:
480 UNREACHABLE(); 480 UNREACHABLE();
481 align_ = 0; 481 align_ = 0;
482 break; 482 break;
483 } 483 }
484 } 484 }
485 485
486
487 NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) {
488 base_ = base;
489 switch (registers_count) {
490 case 1:
491 type_ = nlt_1;
492 break;
493 case 2:
494 type_ = nlt_2;
495 break;
496 case 3:
497 type_ = nlt_3;
498 break;
499 case 4:
500 type_ = nlt_4;
501 break;
502 default:
503 UNREACHABLE();
504 type_ = nlt_1;
505 break;
506 }
507 }
508
509
510 // ----------------------------------------------------------------------------- 486 // -----------------------------------------------------------------------------
511 // Specific instructions, constants, and masks. 487 // Specific instructions, constants, and masks.
512 488
513 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r)) 489 // str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
514 // register r is not encoded. 490 // register r is not encoded.
515 const Instr kPushRegPattern = 491 const Instr kPushRegPattern =
516 al | B26 | 4 | NegPreIndex | Register::kCode_sp * B16; 492 al | B26 | 4 | NegPreIndex | Register::kCode_sp * B16;
517 // ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r)) 493 // ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
518 // register r is not encoded. 494 // register r is not encoded.
519 const Instr kPopRegPattern = 495 const Instr kPopRegPattern =
(...skipping 2441 matching lines...) Expand 10 before | Expand all | Expand 10 after
2961 // Rt = Sn. 2937 // Rt = Sn.
2962 // Instruction details available in ARM DDI 0406A, A8-642. 2938 // Instruction details available in ARM DDI 0406A, A8-642.
2963 // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) | 2939 // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
2964 // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0) 2940 // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2965 DCHECK(!dst.is(pc)); 2941 DCHECK(!dst.is(pc));
2966 int sn, n; 2942 int sn, n;
2967 src.split_code(&sn, &n); 2943 src.split_code(&sn, &n);
2968 emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4); 2944 emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
2969 } 2945 }
2970 2946
2971
2972 // Type of data to read from or write to VFP register. 2947 // Type of data to read from or write to VFP register.
2973 // Used as specifier in generic vcvt instruction. 2948 // Used as specifier in generic vcvt instruction.
2974 enum VFPType { S32, U32, F32, F64 }; 2949 enum VFPType { S32, U32, F32, F64 };
2975 2950
2976 2951
2977 static bool IsSignedVFPType(VFPType type) { 2952 static bool IsSignedVFPType(VFPType type) {
2978 switch (type) { 2953 switch (type) {
2979 case S32: 2954 case S32:
2980 return true; 2955 return true;
2981 case U32: 2956 case U32:
(...skipping 926 matching lines...) Expand 10 before | Expand all | Expand 10 after
3908 DCHECK(VfpRegisterIsAvailable(dst)); 3883 DCHECK(VfpRegisterIsAvailable(dst));
3909 DCHECK(VfpRegisterIsAvailable(src)); 3884 DCHECK(VfpRegisterIsAvailable(src));
3910 int vd, d; 3885 int vd, d;
3911 dst.split_code(&vd, &d); 3886 dst.split_code(&vd, &d);
3912 int vm, m; 3887 int vm, m;
3913 src.split_code(&vm, &m); 3888 src.split_code(&vm, &m);
3914 emit(0x1E4 * B23 | d * B22 | 2 * B20 | vm * B16 | vd * B12 | B8 | m * B7 | 3889 emit(0x1E4 * B23 | d * B22 | 2 * B20 | vm * B16 | vd * B12 | B8 | m * B7 |
3915 B6 | m * B5 | B4 | vm); 3890 B6 | m * B5 | B4 | vm);
3916 } 3891 }
3917 3892
3893 void Assembler::vmvn(const QwNeonRegister dst, const QwNeonRegister src) {
3894 DCHECK(IsEnabled(NEON));
3895 // Instruction details available in ARM DDI 0406C.b, A8-966.
3896 DCHECK(VfpRegisterIsAvailable(dst));
3897 DCHECK(VfpRegisterIsAvailable(src));
3898 int vd, d;
3899 dst.split_code(&vd, &d);
3900 int vm, m;
3901 src.split_code(&vm, &m);
3902 emit(0x1E7 * B23 | d * B22 | 3 * B20 | vd * B12 | 0x17 * B6 | m * B5 | vm);
3903 }
3904
3918 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) { 3905 void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
3919 // Instruction details available in ARM DDI 0406C.b, A8.8.418. 3906 // Instruction details available in ARM DDI 0406C.b, A8.8.418.
3920 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | 3907 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
3921 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) 3908 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
3922 DCHECK(IsEnabled(NEON)); 3909 DCHECK(IsEnabled(NEON));
3923 int vd, d; 3910 int vd, d;
3924 dst.split_code(&vd, &d); 3911 dst.split_code(&vd, &d);
3925 int vm, m; 3912 int vm, m;
3926 src.split_code(&vm, &m); 3913 src.split_code(&vm, &m);
3927 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm); 3914 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | m * B5 | vm);
3928 } 3915 }
3929 3916
3930 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) { 3917 void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
3931 // Instruction details available in ARM DDI 0406C.b, A8.8.418. 3918 // Instruction details available in ARM DDI 0406C.b, A8.8.418.
3932 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) | 3919 // 1111(31-28) | 00111(27-23) | D(22) | 110010(21-16) |
3933 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0) 3920 // Vd(15-12) | 000000(11-6) | M(5) | 0(4) | Vm(3-0)
3934 DCHECK(IsEnabled(NEON)); 3921 DCHECK(IsEnabled(NEON));
3935 int vd, d; 3922 int vd, d;
3936 dst.split_code(&vd, &d); 3923 dst.split_code(&vd, &d);
3937 int vm, m; 3924 int vm, m;
3938 src.split_code(&vm, &m); 3925 src.split_code(&vm, &m);
3939 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 | 3926 emit(0xFU * B28 | 7 * B23 | d * B22 | 0x32 * B16 | vd * B12 | B6 | m * B5 |
3940 vm); 3927 vm);
3941 } 3928 }
3942 3929
3930 void Assembler::vdup(const QwNeonRegister dst, const Register src,
3931 NeonSize size) {
3932 DCHECK(IsEnabled(NEON));
3933 // Instruction details available in ARM DDI 0406C.b, A8-886.
3934 int B = 0, E = 0;
3935 switch (size) {
3936 case Neon8:
3937 B = 1;
3938 break;
3939 case Neon16:
3940 E = 1;
3941 break;
3942 case Neon32:
3943 break;
3944 default:
3945 UNREACHABLE();
3946 break;
3947 }
3948 int vd, d;
3949 dst.split_code(&vd, &d);
3950
3951 emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
3952 0xB * B8 | d * B7 | E * B5 | B4);
3953 }
3954
3955 void Assembler::vdup(const QwNeonRegister dst, const SwVfpRegister src) {
3956 DCHECK(IsEnabled(NEON));
3957 // Instruction details available in ARM DDI 0406C.b, A8-884.
3958 int index = src.code() & 1;
3959 int d_reg = src.code() / 2;
3960 int imm4 = 4 | index << 3; // esize = 32, index in bit 3.
3961 int vd, d;
3962 dst.split_code(&vd, &d);
3963 int vm, m;
3964 DwVfpRegister::from_code(d_reg).split_code(&vm, &m);
3965
3966 emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 | 0x18 * B7 |
3967 B6 | m * B5 | vm);
3968 }
3969
3970 void Assembler::vcvt(const QwNeonRegister dst, const QwNeonRegister src,
Rodolph Perfetta (ARM) 2016/12/07 15:59:00 other vcvt instruction indicate the conversion dir
bbudge 2016/12/08 03:07:30 Yes, that's a lot more consistent with the existin
3971 NeonDataType from, NeonDataType to) {
3972 // Instruction details available in ARM DDI 0406C.b, A8.8.868.
3973 DCHECK(IsEnabled(NEON));
3974 int vd, d;
3975 dst.split_code(&vd, &d);
3976 int vm, m;
3977 src.split_code(&vm, &m);
3978
3979 int op = 0;
3980 if (from == NeonOtherDataType) {
3981 DCHECK_EQ(NeonS32, to & NeonDataTypeSizeMask);
3982 op = ((to & NeonDataTypeUMask) != 0) ? 1 : 0;
Rodolph Perfetta (ARM) 2016/12/07 15:59:00 this is the other way round: when converting to in
bbudge 2016/12/08 03:07:30 Done.
3983 } else {
3984 DCHECK_EQ(NeonOtherDataType, to);
3985 DCHECK_EQ(NeonS32, from & NeonDataTypeSizeMask);
3986 op = ((from & NeonDataTypeUMask) != 0) ? 3 : 2;
Rodolph Perfetta (ARM) 2016/12/07 15:59:00 and 1 and 0 here.
bbudge 2016/12/08 03:07:30 Done.
3987 }
3988
3989 emit(0x1E7 * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
3990 m * B5 | vm);
3991 }
3992
3943 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1, 3993 void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
3944 DwVfpRegister src2) { 3994 DwVfpRegister src2) {
3995 // Dd = veor(Dn, Dm) 64 bit integer exclusive OR.
3945 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 3996 // Instruction details available in ARM DDI 0406C.b, A8.8.888.
3946 DCHECK(IsEnabled(NEON)); 3997 DCHECK(IsEnabled(NEON));
3947 int vd, d; 3998 int vd, d;
3948 dst.split_code(&vd, &d); 3999 dst.split_code(&vd, &d);
3949 int vn, n; 4000 int vn, n;
3950 src1.split_code(&vn, &n); 4001 src1.split_code(&vn, &n);
3951 int vm, m; 4002 int vm, m;
3952 src2.split_code(&vm, &m); 4003 src2.split_code(&vm, &m);
3953 emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | B4 | 4004 emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | m * B5 | B4 |
3954 vm); 4005 vm);
3955 } 4006 }
3956 4007
3957 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1, 4008 void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
3958 QwNeonRegister src2) { 4009 QwNeonRegister src2) {
4010 // Qd = veor(Qn, Qm) SIMD integer exclusive OR.
3959 // Instruction details available in ARM DDI 0406C.b, A8.8.888. 4011 // Instruction details available in ARM DDI 0406C.b, A8.8.888.
3960 DCHECK(IsEnabled(NEON)); 4012 DCHECK(IsEnabled(NEON));
3961 int vd, d; 4013 int vd, d;
3962 dst.split_code(&vd, &d); 4014 dst.split_code(&vd, &d);
3963 int vn, n; 4015 int vn, n;
3964 src1.split_code(&vn, &n); 4016 src1.split_code(&vn, &n);
3965 int vm, m; 4017 int vm, m;
3966 src2.split_code(&vm, &m); 4018 src2.split_code(&vm, &m);
3967 emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | B6 | m * B5 | 4019 emit(0x1E6 * B23 | d * B22 | vn * B16 | vd * B12 | B8 | n * B7 | B6 | m * B5 |
3968 B4 | vm); 4020 B4 | vm);
3969 } 4021 }
3970 4022
4023 void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
4024 const QwNeonRegister src2) {
4025 DCHECK(IsEnabled(NEON));
4026 // Qd = vadd(Qn, Qm) SIMD floating point addition.
4027 // Instruction details available in ARM DDI 0406C.b, A8-830.
4028 int vd, d;
4029 dst.split_code(&vd, &d);
4030 int vn, n;
4031 src1.split_code(&vn, &n);
4032 int vm, m;
4033 src2.split_code(&vm, &m);
4034 emit(0x1E4 * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | B6 |
4035 m * B5 | vm);
4036 }
4037
4038 void Assembler::vadd(QwNeonRegister dst, const QwNeonRegister src1,
4039 const QwNeonRegister src2, NeonSize size) {
4040 DCHECK(IsEnabled(NEON));
4041 // Qd = vadd(Qn, Qm) SIMD integer addition.
4042 // Instruction details available in ARM DDI 0406C.b, A8-828.
4043 int vd, d;
4044 dst.split_code(&vd, &d);
4045 int vn, n;
4046 src1.split_code(&vn, &n);
4047 int vm, m;
4048 src2.split_code(&vm, &m);
4049 int sz = static_cast<int>(size);
4050 emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4051 n * B7 | B6 | m * B5 | vm);
4052 }
4053
4054 void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1,
4055 const QwNeonRegister src2) {
4056 DCHECK(IsEnabled(NEON));
4057 // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
4058 // Instruction details available in ARM DDI 0406C.b, A8-1086.
4059 int vd, d;
4060 dst.split_code(&vd, &d);
4061 int vn, n;
4062 src1.split_code(&vn, &n);
4063 int vm, m;
4064 src2.split_code(&vm, &m);
4065 emit(0x1E4 * B23 | d * B22 | B21 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
4066 B6 | m * B5 | vm);
4067 }
4068
4069 void Assembler::vsub(QwNeonRegister dst, const QwNeonRegister src1,
4070 const QwNeonRegister src2, NeonSize size) {
4071 DCHECK(IsEnabled(NEON));
4072 // Qd = vsub(Qn, Qm) SIMD integer subtraction.
4073 // Instruction details available in ARM DDI 0406C.b, A8-1084.
4074 int vd, d;
4075 dst.split_code(&vd, &d);
4076 int vn, n;
4077 src1.split_code(&vn, &n);
4078 int vm, m;
4079 src2.split_code(&vm, &m);
4080 int sz = static_cast<int>(size);
4081 emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4082 n * B7 | B6 | m * B5 | vm);
4083 }
4084
4085 void Assembler::vtst(QwNeonRegister dst, const QwNeonRegister src1,
4086 const QwNeonRegister src2, NeonSize size) {
4087 DCHECK(IsEnabled(NEON));
4088 // Qd = vtst(Qn, Qm) SIMD test integer operands.
4089 // Instruction details available in ARM DDI 0406C.b, A8-1098.
4090 int vd, d;
4091 dst.split_code(&vd, &d);
4092 int vn, n;
4093 src1.split_code(&vn, &n);
4094 int vm, m;
4095 src2.split_code(&vm, &m);
4096 int sz = static_cast<int>(size);
4097 emit(0x1E4 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4098 n * B7 | B6 | m * B5 | B4 | vm);
4099 }
4100
4101 void Assembler::vceq(QwNeonRegister dst, const QwNeonRegister src1,
4102 const QwNeonRegister src2, NeonSize size) {
4103 DCHECK(IsEnabled(NEON));
4104 // Qd = vceq(Qn, Qm) SIMD integer compare equal.
4105 // Instruction details available in ARM DDI 0406C.b, A8-844.
4106 int vd, d;
4107 dst.split_code(&vd, &d);
4108 int vn, n;
4109 src1.split_code(&vn, &n);
4110 int vm, m;
4111 src2.split_code(&vm, &m);
4112 int sz = static_cast<int>(size);
4113 emit(0x1E6 * B23 | d * B22 | sz * B20 | vn * B16 | vd * B12 | 0x8 * B8 |
4114 n * B7 | B6 | m * B5 | B4 | vm);
4115 }
4116
4117 void Assembler::vbsl(QwNeonRegister dst, const QwNeonRegister src1,
4118 const QwNeonRegister src2) {
4119 DCHECK(IsEnabled(NEON));
4120 // Qd = vbsl(Qn, Qm) SIMD bitwise select.
4121 // Instruction details available in ARM DDI 0406C.b, A8-844.
4122 int vd, d;
4123 dst.split_code(&vd, &d);
4124 int vn, n;
4125 src1.split_code(&vn, &n);
4126 int vm, m;
4127 src2.split_code(&vm, &m);
4128 int op = 1; // vbsl
4129 emit(0x1E6 * B23 | d * B22 | op * B20 | vn * B16 | vd * B12 | 0x1 * B8 |
4130 n * B7 | B6 | m * B5 | B4 | vm);
4131 }
4132
4133 void Assembler::vtbl(const DwVfpRegister dst, const NeonListOperand& list,
4134 const DwVfpRegister index) {
4135 DCHECK(IsEnabled(NEON));
4136 // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
4137 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4138 int vd, d;
4139 dst.split_code(&vd, &d);
4140 int vn, n;
4141 list.base().split_code(&vn, &n);
4142 int vm, m;
4143 index.split_code(&vm, &m);
4144 int op = 1; // vbsl
4145 emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
4146 list.len() * B8 | n * B7 | m * B5 | vm);
4147 }
4148
4149 void Assembler::vtbx(const DwVfpRegister dst, const NeonListOperand& list,
4150 const DwVfpRegister index) {
4151 DCHECK(IsEnabled(NEON));
4152 // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
4153 // Instruction details available in ARM DDI 0406C.b, A8-1094.
4154 int vd, d;
4155 dst.split_code(&vd, &d);
4156 int vn, n;
4157 list.base().split_code(&vn, &n);
4158 int vm, m;
4159 index.split_code(&vm, &m);
4160 int op = 1; // vbsl
4161 emit(0x1E7 * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
4162 list.len() * B8 | n * B7 | B6 | m * B5 | vm);
4163 }
4164
3971 // Pseudo instructions. 4165 // Pseudo instructions.
3972 void Assembler::nop(int type) { 4166 void Assembler::nop(int type) {
3973 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes 4167 // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
3974 // some of the CPU's pipeline and has to issue. Older ARM chips simply used 4168 // some of the CPU's pipeline and has to issue. Older ARM chips simply used
3975 // MOV Rx, Rx as NOP and it performs better even in newer CPUs. 4169 // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
3976 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode 4170 // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
3977 // a type. 4171 // a type.
3978 DCHECK(0 <= type && type <= 14); // mov pc, pc isn't a nop. 4172 DCHECK(0 <= type && type <= 14); // mov pc, pc isn't a nop.
3979 emit(al | 13*B21 | type*B12 | type); 4173 emit(al | 13*B21 | type*B12 | type);
3980 } 4174 }
(...skipping 533 matching lines...) Expand 10 before | Expand all | Expand 10 after
4514 DCHECK(is_uint12(offset)); 4708 DCHECK(is_uint12(offset));
4515 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset)); 4709 instr_at_put(pc, SetLdrRegisterImmediateOffset(instr, offset));
4516 } 4710 }
4517 } 4711 }
4518 4712
4519 4713
4520 } // namespace internal 4714 } // namespace internal
4521 } // namespace v8 4715 } // namespace v8
4522 4716
4523 #endif // V8_TARGET_ARCH_ARM 4717 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/constants-arm.h » ('j') | src/arm/disasm-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698