OLD | NEW |
---|---|
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. | 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. |
6 #if defined(TARGET_ARCH_ARM) | 6 #if defined(TARGET_ARCH_ARM) |
7 | 7 |
8 #include "vm/intermediate_language.h" | 8 #include "vm/intermediate_language.h" |
9 | 9 |
10 #include "vm/dart_entry.h" | 10 #include "vm/dart_entry.h" |
(...skipping 3013 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3024 switch (op_kind()) { | 3024 switch (op_kind()) { |
3025 case Token::kADD: __ vaddqs(result, left, right); break; | 3025 case Token::kADD: __ vaddqs(result, left, right); break; |
3026 case Token::kSUB: __ vsubqs(result, left, right); break; | 3026 case Token::kSUB: __ vsubqs(result, left, right); break; |
3027 case Token::kMUL: __ vmulqs(result, left, right); break; | 3027 case Token::kMUL: __ vmulqs(result, left, right); break; |
3028 case Token::kDIV: __ Vdivqs(result, left, right); break; | 3028 case Token::kDIV: __ Vdivqs(result, left, right); break; |
3029 default: UNREACHABLE(); | 3029 default: UNREACHABLE(); |
3030 } | 3030 } |
3031 } | 3031 } |
3032 | 3032 |
3033 | 3033 |
3034 LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { | 3034 LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const { |
3035 const intptr_t kNumInputs = 1; | 3035 const intptr_t kNumInputs = 1; |
3036 const intptr_t kNumTemps = 0; | 3036 const intptr_t kNumTemps = 0; |
3037 LocationSummary* summary = | 3037 LocationSummary* summary = |
3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. | 3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. |
3040 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3040 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
3041 summary->set_out(Location::FpuRegisterLocation(Q6)); | 3041 summary->set_out(Location::FpuRegisterLocation(Q6)); |
3042 return summary; | 3042 return summary; |
3043 } | 3043 } |
3044 | 3044 |
3045 | 3045 |
3046 void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3046 void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
3047 QRegister value = locs()->in(0).fpu_reg(); | 3047 QRegister value = locs()->in(0).fpu_reg(); |
3048 QRegister result = locs()->out().fpu_reg(); | 3048 QRegister result = locs()->out().fpu_reg(); |
3049 DRegister dresult0 = EvenDRegisterOf(result); | 3049 DRegister dresult0 = EvenDRegisterOf(result); |
3050 DRegister dresult1 = OddDRegisterOf(result); | 3050 DRegister dresult1 = OddDRegisterOf(result); |
3051 SRegister sresult0 = EvenSRegisterOf(dresult0); | 3051 SRegister sresult0 = EvenSRegisterOf(dresult0); |
3052 SRegister sresult1 = OddSRegisterOf(dresult0); | 3052 SRegister sresult1 = OddSRegisterOf(dresult0); |
3053 SRegister sresult2 = EvenSRegisterOf(dresult1); | 3053 SRegister sresult2 = EvenSRegisterOf(dresult1); |
3054 SRegister sresult3 = OddSRegisterOf(dresult1); | 3054 SRegister sresult3 = OddSRegisterOf(dresult1); |
3055 | 3055 |
3056 DRegister dvalue0 = EvenDRegisterOf(value); | 3056 DRegister dvalue0 = EvenDRegisterOf(value); |
(...skipping 14 matching lines...) Expand all Loading... | |
3071 __ vcvtds(dresult0, sresult0); | 3071 __ vcvtds(dresult0, sresult0); |
3072 break; | 3072 break; |
3073 case MethodRecognizer::kFloat32x4ShuffleZ: | 3073 case MethodRecognizer::kFloat32x4ShuffleZ: |
3074 __ vdup(kWord, result, dvalue1, 0); | 3074 __ vdup(kWord, result, dvalue1, 0); |
3075 __ vcvtds(dresult0, sresult0); | 3075 __ vcvtds(dresult0, sresult0); |
3076 break; | 3076 break; |
3077 case MethodRecognizer::kFloat32x4ShuffleW: | 3077 case MethodRecognizer::kFloat32x4ShuffleW: |
3078 __ vdup(kWord, result, dvalue1, 1); | 3078 __ vdup(kWord, result, dvalue1, 1); |
3079 __ vcvtds(dresult0, sresult0); | 3079 __ vcvtds(dresult0, sresult0); |
3080 break; | 3080 break; |
3081 case MethodRecognizer::kUint32x4Shuffle: | |
3081 case MethodRecognizer::kFloat32x4Shuffle: | 3082 case MethodRecognizer::kFloat32x4Shuffle: |
3082 if (mask_ == 0x00) { | 3083 if (mask_ == 0x00) { |
3083 __ vdup(kWord, result, dvalue0, 0); | 3084 __ vdup(kWord, result, dvalue0, 0); |
3084 } else if (mask_ == 0x55) { | 3085 } else if (mask_ == 0x55) { |
3085 __ vdup(kWord, result, dvalue0, 1); | 3086 __ vdup(kWord, result, dvalue0, 1); |
3086 } else if (mask_ == 0xAA) { | 3087 } else if (mask_ == 0xAA) { |
3087 __ vdup(kWord, result, dvalue1, 0); | 3088 __ vdup(kWord, result, dvalue1, 0); |
3088 } else if (mask_ == 0xFF) { | 3089 } else if (mask_ == 0xFF) { |
3089 __ vdup(kWord, result, dvalue1, 1); | 3090 __ vdup(kWord, result, dvalue1, 1); |
3090 } else { | 3091 } else { |
3091 SRegister svalues[4]; | 3092 SRegister svalues[4]; |
3092 | 3093 |
3093 svalues[0] = EvenSRegisterOf(dtemp0); | 3094 svalues[0] = EvenSRegisterOf(dtemp0); |
3094 svalues[1] = OddSRegisterOf(dtemp0); | 3095 svalues[1] = OddSRegisterOf(dtemp0); |
3095 svalues[2] = EvenSRegisterOf(dtemp1); | 3096 svalues[2] = EvenSRegisterOf(dtemp1); |
3096 svalues[3] = OddSRegisterOf(dtemp1); | 3097 svalues[3] = OddSRegisterOf(dtemp1); |
3097 | 3098 |
3098 __ vmovq(QTMP, value); | 3099 __ vmovq(QTMP, value); |
3099 __ vmovs(sresult0, svalues[mask_ & 0x3]); | 3100 __ vmovs(sresult0, svalues[mask_ & 0x3]); |
3100 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); | 3101 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); |
3101 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); | 3102 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); |
3102 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); | 3103 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); |
3103 } | 3104 } |
3104 break; | 3105 break; |
3105 default: UNREACHABLE(); | 3106 default: UNREACHABLE(); |
3106 } | 3107 } |
3107 } | 3108 } |
3108 | 3109 |
3109 | 3110 |
3111 LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const { | |
3112 const intptr_t kNumInputs = 2; | |
3113 const intptr_t kNumTemps = 0; | |
3114 LocationSummary* summary = | |
3115 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | |
3116 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. | |
3117 summary->set_in(0, Location::FpuRegisterLocation(Q4)); | |
3118 summary->set_in(1, Location::FpuRegisterLocation(Q5)); | |
3119 summary->set_out(Location::FpuRegisterLocation(Q6)); | |
3120 return summary; | |
3121 } | |
3122 | |
3123 | |
3124 void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | |
3125 QRegister left = locs()->in(0).fpu_reg(); | |
3126 QRegister right = locs()->in(1).fpu_reg(); | |
3127 QRegister result = locs()->out().fpu_reg(); | |
3128 | |
3129 DRegister dresult0 = EvenDRegisterOf(result); | |
3130 DRegister dresult1 = OddDRegisterOf(result); | |
3131 SRegister sresult0 = EvenSRegisterOf(dresult0); | |
3132 SRegister sresult1 = OddSRegisterOf(dresult0); | |
3133 SRegister sresult2 = EvenSRegisterOf(dresult1); | |
3134 SRegister sresult3 = OddSRegisterOf(dresult1); | |
3135 | |
3136 DRegister dleft0 = EvenDRegisterOf(left); | |
3137 DRegister dleft1 = OddDRegisterOf(left); | |
3138 DRegister dright0 = EvenDRegisterOf(right); | |
3139 DRegister dright1 = OddDRegisterOf(right); | |
3140 | |
3141 switch (op_kind()) { | |
3142 case MethodRecognizer::kFloat32x4ShuffleMix: | |
3143 case MethodRecognizer::kUint32x4ShuffleMix: | |
zra
2013/10/30 17:39:27
Is it possible to have special cases where vdup ca
Cutch
2013/10/31 01:43:55
I'm not familiar enough with the NEON shuffling in
zra
2013/10/31 15:10:20
Please leave a TODO here for me, and I'll look int
| |
3144 SRegister left_svalues[4]; | |
3145 SRegister right_svalues[4]; | |
3146 | |
3147 left_svalues[0] = EvenSRegisterOf(dleft0); | |
3148 left_svalues[1] = OddSRegisterOf(dleft0); | |
3149 left_svalues[2] = EvenSRegisterOf(dleft1); | |
3150 left_svalues[3] = OddSRegisterOf(dleft1); | |
3151 right_svalues[0] = EvenSRegisterOf(dright0); | |
3152 right_svalues[1] = OddSRegisterOf(dright0); | |
3153 right_svalues[2] = EvenSRegisterOf(dright1); | |
3154 right_svalues[3] = OddSRegisterOf(dright1); | |
3155 | |
3156 __ vmovs(sresult0, left_svalues[mask_ & 0x3]); | |
3157 __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]); | |
3158 __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]); | |
3159 __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]); | |
3160 break; | |
3161 default: UNREACHABLE(); | |
3162 } | |
3163 } | |
3164 | |
3165 | |
3110 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { | 3166 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { |
3111 const intptr_t kNumInputs = 1; | 3167 const intptr_t kNumInputs = 1; |
3112 const intptr_t kNumTemps = 1; | 3168 const intptr_t kNumTemps = 1; |
3113 LocationSummary* summary = | 3169 LocationSummary* summary = |
3114 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3170 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3115 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3171 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
3116 summary->set_temp(0, Location::RequiresRegister()); | 3172 summary->set_temp(0, Location::RequiresRegister()); |
3117 summary->set_out(Location::RequiresRegister()); | 3173 summary->set_out(Location::RequiresRegister()); |
3118 return summary; | 3174 return summary; |
3119 } | 3175 } |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3466 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3522 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { |
3467 QRegister value = locs()->in(0).fpu_reg(); | 3523 QRegister value = locs()->in(0).fpu_reg(); |
3468 QRegister result = locs()->out().fpu_reg(); | 3524 QRegister result = locs()->out().fpu_reg(); |
3469 | 3525 |
3470 if (value != result) { | 3526 if (value != result) { |
3471 __ vmovq(result, value); | 3527 __ vmovq(result, value); |
3472 } | 3528 } |
3473 } | 3529 } |
3474 | 3530 |
3475 | 3531 |
3476 LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const { | |
3477 const intptr_t kNumInputs = 2; | |
3478 const intptr_t kNumTemps = 0; | |
3479 LocationSummary* summary = | |
3480 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | |
3481 summary->set_in(0, Location::RequiresFpuRegister()); | |
3482 summary->set_in(1, Location::RequiresFpuRegister()); | |
3483 summary->set_out(Location::SameAsFirstInput()); | |
3484 return summary; | |
3485 } | |
3486 | |
3487 | |
3488 void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | |
3489 QRegister left = locs()->in(0).fpu_reg(); | |
3490 QRegister right = locs()->in(1).fpu_reg(); | |
3491 QRegister result = locs()->out().fpu_reg(); | |
3492 | |
3493 ASSERT(result == left); | |
3494 | |
3495 DRegister dleft0 = EvenDRegisterOf(left); | |
3496 DRegister dleft1 = OddDRegisterOf(left); | |
3497 DRegister dright0 = EvenDRegisterOf(right); | |
3498 DRegister dright1 = OddDRegisterOf(right); | |
3499 | |
3500 switch (op_kind()) { | |
3501 case MethodRecognizer::kFloat32x4WithZWInXY: | |
3502 __ vmovd(dleft0, dright1); | |
3503 break; | |
3504 case MethodRecognizer::kFloat32x4InterleaveXY: | |
3505 __ vmovq(QTMP, right); | |
3506 __ vzipqw(left, QTMP); | |
3507 break; | |
3508 case MethodRecognizer::kFloat32x4InterleaveZW: | |
3509 __ vmovq(QTMP, right); | |
3510 __ vzipqw(left, QTMP); | |
3511 __ vmovq(left, QTMP); | |
3512 break; | |
3513 case MethodRecognizer::kFloat32x4InterleaveXYPairs: | |
3514 __ vmovd(dleft1, dright0); | |
3515 break; | |
3516 case MethodRecognizer::kFloat32x4InterleaveZWPairs: | |
3517 __ vmovq(QTMP, right); | |
3518 __ vmovd(EvenDRegisterOf(QTMP), dleft1); | |
3519 __ vmovq(result, QTMP); | |
3520 break; | |
3521 default: UNREACHABLE(); | |
3522 } | |
3523 } | |
3524 | |
3525 | |
3526 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { | 3532 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { |
3527 const intptr_t kNumInputs = 4; | 3533 const intptr_t kNumInputs = 4; |
3528 const intptr_t kNumTemps = 1; | 3534 const intptr_t kNumTemps = 1; |
3529 LocationSummary* summary = | 3535 LocationSummary* summary = |
3530 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3536 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3531 summary->set_in(0, Location::RequiresRegister()); | 3537 summary->set_in(0, Location::RequiresRegister()); |
3532 summary->set_in(1, Location::RequiresRegister()); | 3538 summary->set_in(1, Location::RequiresRegister()); |
3533 summary->set_in(2, Location::RequiresRegister()); | 3539 summary->set_in(2, Location::RequiresRegister()); |
3534 summary->set_in(3, Location::RequiresRegister()); | 3540 summary->set_in(3, Location::RequiresRegister()); |
3535 summary->set_temp(0, Location::RequiresRegister()); | 3541 summary->set_temp(0, Location::RequiresRegister()); |
(...skipping 1115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4651 compiler->GenerateCall(token_pos(), | 4657 compiler->GenerateCall(token_pos(), |
4652 &label, | 4658 &label, |
4653 PcDescriptors::kOther, | 4659 PcDescriptors::kOther, |
4654 locs()); | 4660 locs()); |
4655 __ Drop(2); // Discard type arguments and receiver. | 4661 __ Drop(2); // Discard type arguments and receiver. |
4656 } | 4662 } |
4657 | 4663 |
4658 } // namespace dart | 4664 } // namespace dart |
4659 | 4665 |
4660 #endif // defined TARGET_ARCH_ARM | 4666 #endif // defined TARGET_ARCH_ARM |
OLD | NEW |