OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. | 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. |
6 #if defined(TARGET_ARCH_ARM) | 6 #if defined(TARGET_ARCH_ARM) |
7 | 7 |
8 #include "vm/intermediate_language.h" | 8 #include "vm/intermediate_language.h" |
9 | 9 |
10 #include "vm/dart_entry.h" | 10 #include "vm/dart_entry.h" |
(...skipping 3025 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3036 switch (op_kind()) { | 3036 switch (op_kind()) { |
3037 case Token::kADD: __ vaddqs(result, left, right); break; | 3037 case Token::kADD: __ vaddqs(result, left, right); break; |
3038 case Token::kSUB: __ vsubqs(result, left, right); break; | 3038 case Token::kSUB: __ vsubqs(result, left, right); break; |
3039 case Token::kMUL: __ vmulqs(result, left, right); break; | 3039 case Token::kMUL: __ vmulqs(result, left, right); break; |
3040 case Token::kDIV: __ Vdivqs(result, left, right); break; | 3040 case Token::kDIV: __ Vdivqs(result, left, right); break; |
3041 default: UNREACHABLE(); | 3041 default: UNREACHABLE(); |
3042 } | 3042 } |
3043 } | 3043 } |
3044 | 3044 |
3045 | 3045 |
3046 LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { | 3046 LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const { |
3047 const intptr_t kNumInputs = 1; | 3047 const intptr_t kNumInputs = 1; |
3048 const intptr_t kNumTemps = 0; | 3048 const intptr_t kNumTemps = 0; |
3049 LocationSummary* summary = | 3049 LocationSummary* summary = |
3050 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3050 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3051 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. | 3051 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. |
3052 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3052 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
3053 summary->set_out(Location::FpuRegisterLocation(Q6)); | 3053 summary->set_out(Location::FpuRegisterLocation(Q6)); |
3054 return summary; | 3054 return summary; |
3055 } | 3055 } |
3056 | 3056 |
3057 | 3057 |
3058 void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3058 void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
3059 QRegister value = locs()->in(0).fpu_reg(); | 3059 QRegister value = locs()->in(0).fpu_reg(); |
3060 QRegister result = locs()->out().fpu_reg(); | 3060 QRegister result = locs()->out().fpu_reg(); |
3061 DRegister dresult0 = EvenDRegisterOf(result); | 3061 DRegister dresult0 = EvenDRegisterOf(result); |
3062 DRegister dresult1 = OddDRegisterOf(result); | 3062 DRegister dresult1 = OddDRegisterOf(result); |
3063 SRegister sresult0 = EvenSRegisterOf(dresult0); | 3063 SRegister sresult0 = EvenSRegisterOf(dresult0); |
3064 SRegister sresult1 = OddSRegisterOf(dresult0); | 3064 SRegister sresult1 = OddSRegisterOf(dresult0); |
3065 SRegister sresult2 = EvenSRegisterOf(dresult1); | 3065 SRegister sresult2 = EvenSRegisterOf(dresult1); |
3066 SRegister sresult3 = OddSRegisterOf(dresult1); | 3066 SRegister sresult3 = OddSRegisterOf(dresult1); |
3067 | 3067 |
3068 DRegister dvalue0 = EvenDRegisterOf(value); | 3068 DRegister dvalue0 = EvenDRegisterOf(value); |
3069 DRegister dvalue1 = OddDRegisterOf(value); | 3069 DRegister dvalue1 = OddDRegisterOf(value); |
3070 | 3070 |
3071 DRegister dtemp0 = DTMP; | 3071 DRegister dtemp0 = DTMP; |
3072 DRegister dtemp1 = OddDRegisterOf(QTMP); | 3072 DRegister dtemp1 = OddDRegisterOf(QTMP); |
3073 | 3073 |
3074 // For some cases the vdup instruction requires fewer | 3074 // For some cases the vdup instruction requires fewer |
3075 // instructions. For arbitrary shuffles, use vtbl. | 3075 // instructions. For arbitrary shuffles, use vtbl. |
| 3076 |
3076 switch (op_kind()) { | 3077 switch (op_kind()) { |
3077 case MethodRecognizer::kFloat32x4ShuffleX: | 3078 case MethodRecognizer::kFloat32x4ShuffleX: |
3078 __ vdup(kWord, result, dvalue0, 0); | 3079 __ vdup(kWord, result, dvalue0, 0); |
3079 __ vcvtds(dresult0, sresult0); | 3080 __ vcvtds(dresult0, sresult0); |
3080 break; | 3081 break; |
3081 case MethodRecognizer::kFloat32x4ShuffleY: | 3082 case MethodRecognizer::kFloat32x4ShuffleY: |
3082 __ vdup(kWord, result, dvalue0, 1); | 3083 __ vdup(kWord, result, dvalue0, 1); |
3083 __ vcvtds(dresult0, sresult0); | 3084 __ vcvtds(dresult0, sresult0); |
3084 break; | 3085 break; |
3085 case MethodRecognizer::kFloat32x4ShuffleZ: | 3086 case MethodRecognizer::kFloat32x4ShuffleZ: |
3086 __ vdup(kWord, result, dvalue1, 0); | 3087 __ vdup(kWord, result, dvalue1, 0); |
3087 __ vcvtds(dresult0, sresult0); | 3088 __ vcvtds(dresult0, sresult0); |
3088 break; | 3089 break; |
3089 case MethodRecognizer::kFloat32x4ShuffleW: | 3090 case MethodRecognizer::kFloat32x4ShuffleW: |
3090 __ vdup(kWord, result, dvalue1, 1); | 3091 __ vdup(kWord, result, dvalue1, 1); |
3091 __ vcvtds(dresult0, sresult0); | 3092 __ vcvtds(dresult0, sresult0); |
3092 break; | 3093 break; |
| 3094 case MethodRecognizer::kUint32x4Shuffle: |
3093 case MethodRecognizer::kFloat32x4Shuffle: | 3095 case MethodRecognizer::kFloat32x4Shuffle: |
3094 if (mask_ == 0x00) { | 3096 if (mask_ == 0x00) { |
3095 __ vdup(kWord, result, dvalue0, 0); | 3097 __ vdup(kWord, result, dvalue0, 0); |
3096 } else if (mask_ == 0x55) { | 3098 } else if (mask_ == 0x55) { |
3097 __ vdup(kWord, result, dvalue0, 1); | 3099 __ vdup(kWord, result, dvalue0, 1); |
3098 } else if (mask_ == 0xAA) { | 3100 } else if (mask_ == 0xAA) { |
3099 __ vdup(kWord, result, dvalue1, 0); | 3101 __ vdup(kWord, result, dvalue1, 0); |
3100 } else if (mask_ == 0xFF) { | 3102 } else if (mask_ == 0xFF) { |
3101 __ vdup(kWord, result, dvalue1, 1); | 3103 __ vdup(kWord, result, dvalue1, 1); |
3102 } else { | 3104 } else { |
| 3105 // TODO(zra): Investigate better instruction sequences for other |
| 3106 // shuffle masks. |
3103 SRegister svalues[4]; | 3107 SRegister svalues[4]; |
3104 | 3108 |
3105 svalues[0] = EvenSRegisterOf(dtemp0); | 3109 svalues[0] = EvenSRegisterOf(dtemp0); |
3106 svalues[1] = OddSRegisterOf(dtemp0); | 3110 svalues[1] = OddSRegisterOf(dtemp0); |
3107 svalues[2] = EvenSRegisterOf(dtemp1); | 3111 svalues[2] = EvenSRegisterOf(dtemp1); |
3108 svalues[3] = OddSRegisterOf(dtemp1); | 3112 svalues[3] = OddSRegisterOf(dtemp1); |
3109 | 3113 |
3110 __ vmovq(QTMP, value); | 3114 __ vmovq(QTMP, value); |
3111 __ vmovs(sresult0, svalues[mask_ & 0x3]); | 3115 __ vmovs(sresult0, svalues[mask_ & 0x3]); |
3112 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); | 3116 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); |
3113 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); | 3117 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); |
3114 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); | 3118 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); |
3115 } | 3119 } |
3116 break; | 3120 break; |
3117 default: UNREACHABLE(); | 3121 default: UNREACHABLE(); |
3118 } | 3122 } |
3119 } | 3123 } |
3120 | 3124 |
3121 | 3125 |
| 3126 LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const { |
| 3127 const intptr_t kNumInputs = 2; |
| 3128 const intptr_t kNumTemps = 0; |
| 3129 LocationSummary* summary = |
| 3130 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
| 3131 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. |
| 3132 summary->set_in(0, Location::FpuRegisterLocation(Q4)); |
| 3133 summary->set_in(1, Location::FpuRegisterLocation(Q5)); |
| 3134 summary->set_out(Location::FpuRegisterLocation(Q6)); |
| 3135 return summary; |
| 3136 } |
| 3137 |
| 3138 |
| 3139 void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
| 3140 QRegister left = locs()->in(0).fpu_reg(); |
| 3141 QRegister right = locs()->in(1).fpu_reg(); |
| 3142 QRegister result = locs()->out().fpu_reg(); |
| 3143 |
| 3144 DRegister dresult0 = EvenDRegisterOf(result); |
| 3145 DRegister dresult1 = OddDRegisterOf(result); |
| 3146 SRegister sresult0 = EvenSRegisterOf(dresult0); |
| 3147 SRegister sresult1 = OddSRegisterOf(dresult0); |
| 3148 SRegister sresult2 = EvenSRegisterOf(dresult1); |
| 3149 SRegister sresult3 = OddSRegisterOf(dresult1); |
| 3150 |
| 3151 DRegister dleft0 = EvenDRegisterOf(left); |
| 3152 DRegister dleft1 = OddDRegisterOf(left); |
| 3153 DRegister dright0 = EvenDRegisterOf(right); |
| 3154 DRegister dright1 = OddDRegisterOf(right); |
| 3155 |
| 3156 switch (op_kind()) { |
| 3157 case MethodRecognizer::kFloat32x4ShuffleMix: |
| 3158 case MethodRecognizer::kUint32x4ShuffleMix: |
| 3159 // TODO(zra): Investigate better instruction sequences for shuffle masks. |
| 3160 SRegister left_svalues[4]; |
| 3161 SRegister right_svalues[4]; |
| 3162 |
| 3163 left_svalues[0] = EvenSRegisterOf(dleft0); |
| 3164 left_svalues[1] = OddSRegisterOf(dleft0); |
| 3165 left_svalues[2] = EvenSRegisterOf(dleft1); |
| 3166 left_svalues[3] = OddSRegisterOf(dleft1); |
| 3167 right_svalues[0] = EvenSRegisterOf(dright0); |
| 3168 right_svalues[1] = OddSRegisterOf(dright0); |
| 3169 right_svalues[2] = EvenSRegisterOf(dright1); |
| 3170 right_svalues[3] = OddSRegisterOf(dright1); |
| 3171 |
| 3172 __ vmovs(sresult0, left_svalues[mask_ & 0x3]); |
| 3173 __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]); |
| 3174 __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]); |
| 3175 __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]); |
| 3176 break; |
| 3177 default: UNREACHABLE(); |
| 3178 } |
| 3179 } |
| 3180 |
| 3181 |
3122 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { | 3182 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { |
3123 const intptr_t kNumInputs = 1; | 3183 const intptr_t kNumInputs = 1; |
3124 const intptr_t kNumTemps = 1; | 3184 const intptr_t kNumTemps = 1; |
3125 LocationSummary* summary = | 3185 LocationSummary* summary = |
3126 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3186 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3127 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3187 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
3128 summary->set_temp(0, Location::RequiresRegister()); | 3188 summary->set_temp(0, Location::RequiresRegister()); |
3129 summary->set_out(Location::RequiresRegister()); | 3189 summary->set_out(Location::RequiresRegister()); |
3130 return summary; | 3190 return summary; |
3131 } | 3191 } |
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3478 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3538 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { |
3479 QRegister value = locs()->in(0).fpu_reg(); | 3539 QRegister value = locs()->in(0).fpu_reg(); |
3480 QRegister result = locs()->out().fpu_reg(); | 3540 QRegister result = locs()->out().fpu_reg(); |
3481 | 3541 |
3482 if (value != result) { | 3542 if (value != result) { |
3483 __ vmovq(result, value); | 3543 __ vmovq(result, value); |
3484 } | 3544 } |
3485 } | 3545 } |
3486 | 3546 |
3487 | 3547 |
3488 LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const { | |
3489 const intptr_t kNumInputs = 2; | |
3490 const intptr_t kNumTemps = 0; | |
3491 LocationSummary* summary = | |
3492 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | |
3493 summary->set_in(0, Location::RequiresFpuRegister()); | |
3494 summary->set_in(1, Location::RequiresFpuRegister()); | |
3495 summary->set_out(Location::SameAsFirstInput()); | |
3496 return summary; | |
3497 } | |
3498 | |
3499 | |
3500 void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | |
3501 QRegister left = locs()->in(0).fpu_reg(); | |
3502 QRegister right = locs()->in(1).fpu_reg(); | |
3503 QRegister result = locs()->out().fpu_reg(); | |
3504 | |
3505 ASSERT(result == left); | |
3506 | |
3507 DRegister dleft0 = EvenDRegisterOf(left); | |
3508 DRegister dleft1 = OddDRegisterOf(left); | |
3509 DRegister dright0 = EvenDRegisterOf(right); | |
3510 DRegister dright1 = OddDRegisterOf(right); | |
3511 | |
3512 switch (op_kind()) { | |
3513 case MethodRecognizer::kFloat32x4WithZWInXY: | |
3514 __ vmovd(dleft0, dright1); | |
3515 break; | |
3516 case MethodRecognizer::kFloat32x4InterleaveXY: | |
3517 __ vmovq(QTMP, right); | |
3518 __ vzipqw(left, QTMP); | |
3519 break; | |
3520 case MethodRecognizer::kFloat32x4InterleaveZW: | |
3521 __ vmovq(QTMP, right); | |
3522 __ vzipqw(left, QTMP); | |
3523 __ vmovq(left, QTMP); | |
3524 break; | |
3525 case MethodRecognizer::kFloat32x4InterleaveXYPairs: | |
3526 __ vmovd(dleft1, dright0); | |
3527 break; | |
3528 case MethodRecognizer::kFloat32x4InterleaveZWPairs: | |
3529 __ vmovq(QTMP, right); | |
3530 __ vmovd(EvenDRegisterOf(QTMP), dleft1); | |
3531 __ vmovq(result, QTMP); | |
3532 break; | |
3533 default: UNREACHABLE(); | |
3534 } | |
3535 } | |
3536 | |
3537 | |
3538 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { | 3548 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { |
3539 const intptr_t kNumInputs = 4; | 3549 const intptr_t kNumInputs = 4; |
3540 const intptr_t kNumTemps = 1; | 3550 const intptr_t kNumTemps = 1; |
3541 LocationSummary* summary = | 3551 LocationSummary* summary = |
3542 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3552 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
3543 summary->set_in(0, Location::RequiresRegister()); | 3553 summary->set_in(0, Location::RequiresRegister()); |
3544 summary->set_in(1, Location::RequiresRegister()); | 3554 summary->set_in(1, Location::RequiresRegister()); |
3545 summary->set_in(2, Location::RequiresRegister()); | 3555 summary->set_in(2, Location::RequiresRegister()); |
3546 summary->set_in(3, Location::RequiresRegister()); | 3556 summary->set_in(3, Location::RequiresRegister()); |
3547 summary->set_temp(0, Location::RequiresRegister()); | 3557 summary->set_temp(0, Location::RequiresRegister()); |
(...skipping 1149 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4697 compiler->GenerateCall(token_pos(), | 4707 compiler->GenerateCall(token_pos(), |
4698 &label, | 4708 &label, |
4699 PcDescriptors::kOther, | 4709 PcDescriptors::kOther, |
4700 locs()); | 4710 locs()); |
4701 __ Drop(2); // Discard type arguments and receiver. | 4711 __ Drop(2); // Discard type arguments and receiver. |
4702 } | 4712 } |
4703 | 4713 |
4704 } // namespace dart | 4714 } // namespace dart |
4705 | 4715 |
4706 #endif // defined TARGET_ARCH_ARM | 4716 #endif // defined TARGET_ARCH_ARM |
OLD | NEW |