Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. | 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. |
| 6 #if defined(TARGET_ARCH_ARM) | 6 #if defined(TARGET_ARCH_ARM) |
| 7 | 7 |
| 8 #include "vm/intermediate_language.h" | 8 #include "vm/intermediate_language.h" |
| 9 | 9 |
| 10 #include "vm/dart_entry.h" | 10 #include "vm/dart_entry.h" |
| (...skipping 3013 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3024 switch (op_kind()) { | 3024 switch (op_kind()) { |
| 3025 case Token::kADD: __ vaddqs(result, left, right); break; | 3025 case Token::kADD: __ vaddqs(result, left, right); break; |
| 3026 case Token::kSUB: __ vsubqs(result, left, right); break; | 3026 case Token::kSUB: __ vsubqs(result, left, right); break; |
| 3027 case Token::kMUL: __ vmulqs(result, left, right); break; | 3027 case Token::kMUL: __ vmulqs(result, left, right); break; |
| 3028 case Token::kDIV: __ Vdivqs(result, left, right); break; | 3028 case Token::kDIV: __ Vdivqs(result, left, right); break; |
| 3029 default: UNREACHABLE(); | 3029 default: UNREACHABLE(); |
| 3030 } | 3030 } |
| 3031 } | 3031 } |
| 3032 | 3032 |
| 3033 | 3033 |
| 3034 LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { | 3034 LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const { |
| 3035 const intptr_t kNumInputs = 1; | 3035 const intptr_t kNumInputs = 1; |
| 3036 const intptr_t kNumTemps = 0; | 3036 const intptr_t kNumTemps = 0; |
| 3037 LocationSummary* summary = | 3037 LocationSummary* summary = |
| 3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
| 3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. | 3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. |
| 3040 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3040 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
| 3041 summary->set_out(Location::FpuRegisterLocation(Q6)); | 3041 summary->set_out(Location::FpuRegisterLocation(Q6)); |
| 3042 return summary; | 3042 return summary; |
| 3043 } | 3043 } |
| 3044 | 3044 |
| 3045 | 3045 |
| 3046 void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3046 void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
| 3047 QRegister value = locs()->in(0).fpu_reg(); | 3047 QRegister value = locs()->in(0).fpu_reg(); |
| 3048 QRegister result = locs()->out().fpu_reg(); | 3048 QRegister result = locs()->out().fpu_reg(); |
| 3049 DRegister dresult0 = EvenDRegisterOf(result); | 3049 DRegister dresult0 = EvenDRegisterOf(result); |
| 3050 DRegister dresult1 = OddDRegisterOf(result); | 3050 DRegister dresult1 = OddDRegisterOf(result); |
| 3051 SRegister sresult0 = EvenSRegisterOf(dresult0); | 3051 SRegister sresult0 = EvenSRegisterOf(dresult0); |
| 3052 SRegister sresult1 = OddSRegisterOf(dresult0); | 3052 SRegister sresult1 = OddSRegisterOf(dresult0); |
| 3053 SRegister sresult2 = EvenSRegisterOf(dresult1); | 3053 SRegister sresult2 = EvenSRegisterOf(dresult1); |
| 3054 SRegister sresult3 = OddSRegisterOf(dresult1); | 3054 SRegister sresult3 = OddSRegisterOf(dresult1); |
| 3055 | 3055 |
| 3056 DRegister dvalue0 = EvenDRegisterOf(value); | 3056 DRegister dvalue0 = EvenDRegisterOf(value); |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 3071 __ vcvtds(dresult0, sresult0); | 3071 __ vcvtds(dresult0, sresult0); |
| 3072 break; | 3072 break; |
| 3073 case MethodRecognizer::kFloat32x4ShuffleZ: | 3073 case MethodRecognizer::kFloat32x4ShuffleZ: |
| 3074 __ vdup(kWord, result, dvalue1, 0); | 3074 __ vdup(kWord, result, dvalue1, 0); |
| 3075 __ vcvtds(dresult0, sresult0); | 3075 __ vcvtds(dresult0, sresult0); |
| 3076 break; | 3076 break; |
| 3077 case MethodRecognizer::kFloat32x4ShuffleW: | 3077 case MethodRecognizer::kFloat32x4ShuffleW: |
| 3078 __ vdup(kWord, result, dvalue1, 1); | 3078 __ vdup(kWord, result, dvalue1, 1); |
| 3079 __ vcvtds(dresult0, sresult0); | 3079 __ vcvtds(dresult0, sresult0); |
| 3080 break; | 3080 break; |
| 3081 case MethodRecognizer::kUint32x4Shuffle: | |
| 3081 case MethodRecognizer::kFloat32x4Shuffle: | 3082 case MethodRecognizer::kFloat32x4Shuffle: |
| 3082 if (mask_ == 0x00) { | 3083 if (mask_ == 0x00) { |
| 3083 __ vdup(kWord, result, dvalue0, 0); | 3084 __ vdup(kWord, result, dvalue0, 0); |
| 3084 } else if (mask_ == 0x55) { | 3085 } else if (mask_ == 0x55) { |
| 3085 __ vdup(kWord, result, dvalue0, 1); | 3086 __ vdup(kWord, result, dvalue0, 1); |
| 3086 } else if (mask_ == 0xAA) { | 3087 } else if (mask_ == 0xAA) { |
| 3087 __ vdup(kWord, result, dvalue1, 0); | 3088 __ vdup(kWord, result, dvalue1, 0); |
| 3088 } else if (mask_ == 0xFF) { | 3089 } else if (mask_ == 0xFF) { |
| 3089 __ vdup(kWord, result, dvalue1, 1); | 3090 __ vdup(kWord, result, dvalue1, 1); |
| 3090 } else { | 3091 } else { |
| 3091 SRegister svalues[4]; | 3092 SRegister svalues[4]; |
| 3092 | 3093 |
| 3093 svalues[0] = EvenSRegisterOf(dtemp0); | 3094 svalues[0] = EvenSRegisterOf(dtemp0); |
| 3094 svalues[1] = OddSRegisterOf(dtemp0); | 3095 svalues[1] = OddSRegisterOf(dtemp0); |
| 3095 svalues[2] = EvenSRegisterOf(dtemp1); | 3096 svalues[2] = EvenSRegisterOf(dtemp1); |
| 3096 svalues[3] = OddSRegisterOf(dtemp1); | 3097 svalues[3] = OddSRegisterOf(dtemp1); |
| 3097 | 3098 |
| 3098 __ vmovq(QTMP, value); | 3099 __ vmovq(QTMP, value); |
| 3099 __ vmovs(sresult0, svalues[mask_ & 0x3]); | 3100 __ vmovs(sresult0, svalues[mask_ & 0x3]); |
| 3100 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); | 3101 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); |
| 3101 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); | 3102 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); |
| 3102 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); | 3103 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); |
| 3103 } | 3104 } |
| 3104 break; | 3105 break; |
| 3105 default: UNREACHABLE(); | 3106 default: UNREACHABLE(); |
| 3106 } | 3107 } |
| 3107 } | 3108 } |
| 3108 | 3109 |
| 3109 | 3110 |
| 3111 LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const { | |
| 3112 const intptr_t kNumInputs = 2; | |
| 3113 const intptr_t kNumTemps = 0; | |
| 3114 LocationSummary* summary = | |
| 3115 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | |
| 3116 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. | |
| 3117 summary->set_in(0, Location::FpuRegisterLocation(Q4)); | |
| 3118 summary->set_in(1, Location::FpuRegisterLocation(Q5)); | |
| 3119 summary->set_out(Location::FpuRegisterLocation(Q6)); | |
| 3120 return summary; | |
| 3121 } | |
| 3122 | |
| 3123 | |
| 3124 void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | |
| 3125 QRegister left = locs()->in(0).fpu_reg(); | |
| 3126 QRegister right = locs()->in(1).fpu_reg(); | |
| 3127 QRegister result = locs()->out().fpu_reg(); | |
| 3128 | |
| 3129 DRegister dresult0 = EvenDRegisterOf(result); | |
| 3130 DRegister dresult1 = OddDRegisterOf(result); | |
| 3131 SRegister sresult0 = EvenSRegisterOf(dresult0); | |
| 3132 SRegister sresult1 = OddSRegisterOf(dresult0); | |
| 3133 SRegister sresult2 = EvenSRegisterOf(dresult1); | |
| 3134 SRegister sresult3 = OddSRegisterOf(dresult1); | |
| 3135 | |
| 3136 DRegister dleft0 = EvenDRegisterOf(left); | |
| 3137 DRegister dleft1 = OddDRegisterOf(left); | |
| 3138 DRegister dright0 = EvenDRegisterOf(right); | |
| 3139 DRegister dright1 = OddDRegisterOf(right); | |
| 3140 | |
| 3141 switch (op_kind()) { | |
| 3142 case MethodRecognizer::kFloat32x4ShuffleMix: | |
| 3143 case MethodRecognizer::kUint32x4ShuffleMix: | |
|
zra
2013/10/30 17:39:27
Is it possible to have special cases where vdup ca
Cutch
2013/10/31 01:43:55
I'm not familiar enough with the NEON shuffling in
zra
2013/10/31 15:10:20
Please leave a TODO here for me, and I'll look int
| |
| 3144 SRegister left_svalues[4]; | |
| 3145 SRegister right_svalues[4]; | |
| 3146 | |
| 3147 left_svalues[0] = EvenSRegisterOf(dleft0); | |
| 3148 left_svalues[1] = OddSRegisterOf(dleft0); | |
| 3149 left_svalues[2] = EvenSRegisterOf(dleft1); | |
| 3150 left_svalues[3] = OddSRegisterOf(dleft1); | |
| 3151 right_svalues[0] = EvenSRegisterOf(dright0); | |
| 3152 right_svalues[1] = OddSRegisterOf(dright0); | |
| 3153 right_svalues[2] = EvenSRegisterOf(dright1); | |
| 3154 right_svalues[3] = OddSRegisterOf(dright1); | |
| 3155 | |
| 3156 __ vmovs(sresult0, left_svalues[mask_ & 0x3]); | |
| 3157 __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]); | |
| 3158 __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]); | |
| 3159 __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]); | |
| 3160 break; | |
| 3161 default: UNREACHABLE(); | |
| 3162 } | |
| 3163 } | |
| 3164 | |
| 3165 | |
| 3110 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { | 3166 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { |
| 3111 const intptr_t kNumInputs = 1; | 3167 const intptr_t kNumInputs = 1; |
| 3112 const intptr_t kNumTemps = 1; | 3168 const intptr_t kNumTemps = 1; |
| 3113 LocationSummary* summary = | 3169 LocationSummary* summary = |
| 3114 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3170 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
| 3115 summary->set_in(0, Location::FpuRegisterLocation(Q5)); | 3171 summary->set_in(0, Location::FpuRegisterLocation(Q5)); |
| 3116 summary->set_temp(0, Location::RequiresRegister()); | 3172 summary->set_temp(0, Location::RequiresRegister()); |
| 3117 summary->set_out(Location::RequiresRegister()); | 3173 summary->set_out(Location::RequiresRegister()); |
| 3118 return summary; | 3174 return summary; |
| 3119 } | 3175 } |
| (...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3466 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { | 3522 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { |
| 3467 QRegister value = locs()->in(0).fpu_reg(); | 3523 QRegister value = locs()->in(0).fpu_reg(); |
| 3468 QRegister result = locs()->out().fpu_reg(); | 3524 QRegister result = locs()->out().fpu_reg(); |
| 3469 | 3525 |
| 3470 if (value != result) { | 3526 if (value != result) { |
| 3471 __ vmovq(result, value); | 3527 __ vmovq(result, value); |
| 3472 } | 3528 } |
| 3473 } | 3529 } |
| 3474 | 3530 |
| 3475 | 3531 |
| 3476 LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const { | |
| 3477 const intptr_t kNumInputs = 2; | |
| 3478 const intptr_t kNumTemps = 0; | |
| 3479 LocationSummary* summary = | |
| 3480 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | |
| 3481 summary->set_in(0, Location::RequiresFpuRegister()); | |
| 3482 summary->set_in(1, Location::RequiresFpuRegister()); | |
| 3483 summary->set_out(Location::SameAsFirstInput()); | |
| 3484 return summary; | |
| 3485 } | |
| 3486 | |
| 3487 | |
| 3488 void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { | |
| 3489 QRegister left = locs()->in(0).fpu_reg(); | |
| 3490 QRegister right = locs()->in(1).fpu_reg(); | |
| 3491 QRegister result = locs()->out().fpu_reg(); | |
| 3492 | |
| 3493 ASSERT(result == left); | |
| 3494 | |
| 3495 DRegister dleft0 = EvenDRegisterOf(left); | |
| 3496 DRegister dleft1 = OddDRegisterOf(left); | |
| 3497 DRegister dright0 = EvenDRegisterOf(right); | |
| 3498 DRegister dright1 = OddDRegisterOf(right); | |
| 3499 | |
| 3500 switch (op_kind()) { | |
| 3501 case MethodRecognizer::kFloat32x4WithZWInXY: | |
| 3502 __ vmovd(dleft0, dright1); | |
| 3503 break; | |
| 3504 case MethodRecognizer::kFloat32x4InterleaveXY: | |
| 3505 __ vmovq(QTMP, right); | |
| 3506 __ vzipqw(left, QTMP); | |
| 3507 break; | |
| 3508 case MethodRecognizer::kFloat32x4InterleaveZW: | |
| 3509 __ vmovq(QTMP, right); | |
| 3510 __ vzipqw(left, QTMP); | |
| 3511 __ vmovq(left, QTMP); | |
| 3512 break; | |
| 3513 case MethodRecognizer::kFloat32x4InterleaveXYPairs: | |
| 3514 __ vmovd(dleft1, dright0); | |
| 3515 break; | |
| 3516 case MethodRecognizer::kFloat32x4InterleaveZWPairs: | |
| 3517 __ vmovq(QTMP, right); | |
| 3518 __ vmovd(EvenDRegisterOf(QTMP), dleft1); | |
| 3519 __ vmovq(result, QTMP); | |
| 3520 break; | |
| 3521 default: UNREACHABLE(); | |
| 3522 } | |
| 3523 } | |
| 3524 | |
| 3525 | |
| 3526 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { | 3532 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { |
| 3527 const intptr_t kNumInputs = 4; | 3533 const intptr_t kNumInputs = 4; |
| 3528 const intptr_t kNumTemps = 1; | 3534 const intptr_t kNumTemps = 1; |
| 3529 LocationSummary* summary = | 3535 LocationSummary* summary = |
| 3530 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); | 3536 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
| 3531 summary->set_in(0, Location::RequiresRegister()); | 3537 summary->set_in(0, Location::RequiresRegister()); |
| 3532 summary->set_in(1, Location::RequiresRegister()); | 3538 summary->set_in(1, Location::RequiresRegister()); |
| 3533 summary->set_in(2, Location::RequiresRegister()); | 3539 summary->set_in(2, Location::RequiresRegister()); |
| 3534 summary->set_in(3, Location::RequiresRegister()); | 3540 summary->set_in(3, Location::RequiresRegister()); |
| 3535 summary->set_temp(0, Location::RequiresRegister()); | 3541 summary->set_temp(0, Location::RequiresRegister()); |
| (...skipping 1115 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4651 compiler->GenerateCall(token_pos(), | 4657 compiler->GenerateCall(token_pos(), |
| 4652 &label, | 4658 &label, |
| 4653 PcDescriptors::kOther, | 4659 PcDescriptors::kOther, |
| 4654 locs()); | 4660 locs()); |
| 4655 __ Drop(2); // Discard type arguments and receiver. | 4661 __ Drop(2); // Discard type arguments and receiver. |
| 4656 } | 4662 } |
| 4657 | 4663 |
| 4658 } // namespace dart | 4664 } // namespace dart |
| 4659 | 4665 |
| 4660 #endif // defined TARGET_ARCH_ARM | 4666 #endif // defined TARGET_ARCH_ARM |
| OLD | NEW |