Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(600)

Side by Side Diff: runtime/vm/intermediate_language_arm.cc

Issue 51373004: SIMD shuffle API changes (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/vm/intermediate_language.h ('k') | runtime/vm/intermediate_language_ia32.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM.
6 #if defined(TARGET_ARCH_ARM) 6 #if defined(TARGET_ARCH_ARM)
7 7
8 #include "vm/intermediate_language.h" 8 #include "vm/intermediate_language.h"
9 9
10 #include "vm/dart_entry.h" 10 #include "vm/dart_entry.h"
(...skipping 3025 matching lines...) Expand 10 before | Expand all | Expand 10 after
3036 switch (op_kind()) { 3036 switch (op_kind()) {
3037 case Token::kADD: __ vaddqs(result, left, right); break; 3037 case Token::kADD: __ vaddqs(result, left, right); break;
3038 case Token::kSUB: __ vsubqs(result, left, right); break; 3038 case Token::kSUB: __ vsubqs(result, left, right); break;
3039 case Token::kMUL: __ vmulqs(result, left, right); break; 3039 case Token::kMUL: __ vmulqs(result, left, right); break;
3040 case Token::kDIV: __ Vdivqs(result, left, right); break; 3040 case Token::kDIV: __ Vdivqs(result, left, right); break;
3041 default: UNREACHABLE(); 3041 default: UNREACHABLE();
3042 } 3042 }
3043 } 3043 }
3044 3044
3045 3045
3046 LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { 3046 LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const {
3047 const intptr_t kNumInputs = 1; 3047 const intptr_t kNumInputs = 1;
3048 const intptr_t kNumTemps = 0; 3048 const intptr_t kNumTemps = 0;
3049 LocationSummary* summary = 3049 LocationSummary* summary =
3050 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3050 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3051 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. 3051 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions.
3052 summary->set_in(0, Location::FpuRegisterLocation(Q5)); 3052 summary->set_in(0, Location::FpuRegisterLocation(Q5));
3053 summary->set_out(Location::FpuRegisterLocation(Q6)); 3053 summary->set_out(Location::FpuRegisterLocation(Q6));
3054 return summary; 3054 return summary;
3055 } 3055 }
3056 3056
3057 3057
3058 void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 3058 void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3059 QRegister value = locs()->in(0).fpu_reg(); 3059 QRegister value = locs()->in(0).fpu_reg();
3060 QRegister result = locs()->out().fpu_reg(); 3060 QRegister result = locs()->out().fpu_reg();
3061 DRegister dresult0 = EvenDRegisterOf(result); 3061 DRegister dresult0 = EvenDRegisterOf(result);
3062 DRegister dresult1 = OddDRegisterOf(result); 3062 DRegister dresult1 = OddDRegisterOf(result);
3063 SRegister sresult0 = EvenSRegisterOf(dresult0); 3063 SRegister sresult0 = EvenSRegisterOf(dresult0);
3064 SRegister sresult1 = OddSRegisterOf(dresult0); 3064 SRegister sresult1 = OddSRegisterOf(dresult0);
3065 SRegister sresult2 = EvenSRegisterOf(dresult1); 3065 SRegister sresult2 = EvenSRegisterOf(dresult1);
3066 SRegister sresult3 = OddSRegisterOf(dresult1); 3066 SRegister sresult3 = OddSRegisterOf(dresult1);
3067 3067
3068 DRegister dvalue0 = EvenDRegisterOf(value); 3068 DRegister dvalue0 = EvenDRegisterOf(value);
3069 DRegister dvalue1 = OddDRegisterOf(value); 3069 DRegister dvalue1 = OddDRegisterOf(value);
3070 3070
3071 DRegister dtemp0 = DTMP; 3071 DRegister dtemp0 = DTMP;
3072 DRegister dtemp1 = OddDRegisterOf(QTMP); 3072 DRegister dtemp1 = OddDRegisterOf(QTMP);
3073 3073
3074 // For some cases the vdup instruction requires fewer 3074 // For some cases the vdup instruction requires fewer
3075 // instructions. For arbitrary shuffles, use vtbl. 3075 // instructions. For arbitrary shuffles, use vtbl.
3076
3076 switch (op_kind()) { 3077 switch (op_kind()) {
3077 case MethodRecognizer::kFloat32x4ShuffleX: 3078 case MethodRecognizer::kFloat32x4ShuffleX:
3078 __ vdup(kWord, result, dvalue0, 0); 3079 __ vdup(kWord, result, dvalue0, 0);
3079 __ vcvtds(dresult0, sresult0); 3080 __ vcvtds(dresult0, sresult0);
3080 break; 3081 break;
3081 case MethodRecognizer::kFloat32x4ShuffleY: 3082 case MethodRecognizer::kFloat32x4ShuffleY:
3082 __ vdup(kWord, result, dvalue0, 1); 3083 __ vdup(kWord, result, dvalue0, 1);
3083 __ vcvtds(dresult0, sresult0); 3084 __ vcvtds(dresult0, sresult0);
3084 break; 3085 break;
3085 case MethodRecognizer::kFloat32x4ShuffleZ: 3086 case MethodRecognizer::kFloat32x4ShuffleZ:
3086 __ vdup(kWord, result, dvalue1, 0); 3087 __ vdup(kWord, result, dvalue1, 0);
3087 __ vcvtds(dresult0, sresult0); 3088 __ vcvtds(dresult0, sresult0);
3088 break; 3089 break;
3089 case MethodRecognizer::kFloat32x4ShuffleW: 3090 case MethodRecognizer::kFloat32x4ShuffleW:
3090 __ vdup(kWord, result, dvalue1, 1); 3091 __ vdup(kWord, result, dvalue1, 1);
3091 __ vcvtds(dresult0, sresult0); 3092 __ vcvtds(dresult0, sresult0);
3092 break; 3093 break;
3094 case MethodRecognizer::kUint32x4Shuffle:
3093 case MethodRecognizer::kFloat32x4Shuffle: 3095 case MethodRecognizer::kFloat32x4Shuffle:
3094 if (mask_ == 0x00) { 3096 if (mask_ == 0x00) {
3095 __ vdup(kWord, result, dvalue0, 0); 3097 __ vdup(kWord, result, dvalue0, 0);
3096 } else if (mask_ == 0x55) { 3098 } else if (mask_ == 0x55) {
3097 __ vdup(kWord, result, dvalue0, 1); 3099 __ vdup(kWord, result, dvalue0, 1);
3098 } else if (mask_ == 0xAA) { 3100 } else if (mask_ == 0xAA) {
3099 __ vdup(kWord, result, dvalue1, 0); 3101 __ vdup(kWord, result, dvalue1, 0);
3100 } else if (mask_ == 0xFF) { 3102 } else if (mask_ == 0xFF) {
3101 __ vdup(kWord, result, dvalue1, 1); 3103 __ vdup(kWord, result, dvalue1, 1);
3102 } else { 3104 } else {
3105 // TODO(zra): Investigate better instruction sequences for other
3106 // shuffle masks.
3103 SRegister svalues[4]; 3107 SRegister svalues[4];
3104 3108
3105 svalues[0] = EvenSRegisterOf(dtemp0); 3109 svalues[0] = EvenSRegisterOf(dtemp0);
3106 svalues[1] = OddSRegisterOf(dtemp0); 3110 svalues[1] = OddSRegisterOf(dtemp0);
3107 svalues[2] = EvenSRegisterOf(dtemp1); 3111 svalues[2] = EvenSRegisterOf(dtemp1);
3108 svalues[3] = OddSRegisterOf(dtemp1); 3112 svalues[3] = OddSRegisterOf(dtemp1);
3109 3113
3110 __ vmovq(QTMP, value); 3114 __ vmovq(QTMP, value);
3111 __ vmovs(sresult0, svalues[mask_ & 0x3]); 3115 __ vmovs(sresult0, svalues[mask_ & 0x3]);
3112 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); 3116 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]);
3113 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); 3117 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]);
3114 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); 3118 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]);
3115 } 3119 }
3116 break; 3120 break;
3117 default: UNREACHABLE(); 3121 default: UNREACHABLE();
3118 } 3122 }
3119 } 3123 }
3120 3124
3121 3125
3126 LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const {
3127 const intptr_t kNumInputs = 2;
3128 const intptr_t kNumTemps = 0;
3129 LocationSummary* summary =
3130 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3131 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions.
3132 summary->set_in(0, Location::FpuRegisterLocation(Q4));
3133 summary->set_in(1, Location::FpuRegisterLocation(Q5));
3134 summary->set_out(Location::FpuRegisterLocation(Q6));
3135 return summary;
3136 }
3137
3138
3139 void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3140 QRegister left = locs()->in(0).fpu_reg();
3141 QRegister right = locs()->in(1).fpu_reg();
3142 QRegister result = locs()->out().fpu_reg();
3143
3144 DRegister dresult0 = EvenDRegisterOf(result);
3145 DRegister dresult1 = OddDRegisterOf(result);
3146 SRegister sresult0 = EvenSRegisterOf(dresult0);
3147 SRegister sresult1 = OddSRegisterOf(dresult0);
3148 SRegister sresult2 = EvenSRegisterOf(dresult1);
3149 SRegister sresult3 = OddSRegisterOf(dresult1);
3150
3151 DRegister dleft0 = EvenDRegisterOf(left);
3152 DRegister dleft1 = OddDRegisterOf(left);
3153 DRegister dright0 = EvenDRegisterOf(right);
3154 DRegister dright1 = OddDRegisterOf(right);
3155
3156 switch (op_kind()) {
3157 case MethodRecognizer::kFloat32x4ShuffleMix:
3158 case MethodRecognizer::kUint32x4ShuffleMix:
3159 // TODO(zra): Investigate better instruction sequences for shuffle masks.
3160 SRegister left_svalues[4];
3161 SRegister right_svalues[4];
3162
3163 left_svalues[0] = EvenSRegisterOf(dleft0);
3164 left_svalues[1] = OddSRegisterOf(dleft0);
3165 left_svalues[2] = EvenSRegisterOf(dleft1);
3166 left_svalues[3] = OddSRegisterOf(dleft1);
3167 right_svalues[0] = EvenSRegisterOf(dright0);
3168 right_svalues[1] = OddSRegisterOf(dright0);
3169 right_svalues[2] = EvenSRegisterOf(dright1);
3170 right_svalues[3] = OddSRegisterOf(dright1);
3171
3172 __ vmovs(sresult0, left_svalues[mask_ & 0x3]);
3173 __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]);
3174 __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]);
3175 __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]);
3176 break;
3177 default: UNREACHABLE();
3178 }
3179 }
3180
3181
3122 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { 3182 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const {
3123 const intptr_t kNumInputs = 1; 3183 const intptr_t kNumInputs = 1;
3124 const intptr_t kNumTemps = 1; 3184 const intptr_t kNumTemps = 1;
3125 LocationSummary* summary = 3185 LocationSummary* summary =
3126 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3186 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3127 summary->set_in(0, Location::FpuRegisterLocation(Q5)); 3187 summary->set_in(0, Location::FpuRegisterLocation(Q5));
3128 summary->set_temp(0, Location::RequiresRegister()); 3188 summary->set_temp(0, Location::RequiresRegister());
3129 summary->set_out(Location::RequiresRegister()); 3189 summary->set_out(Location::RequiresRegister());
3130 return summary; 3190 return summary;
3131 } 3191 }
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
3478 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { 3538 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3479 QRegister value = locs()->in(0).fpu_reg(); 3539 QRegister value = locs()->in(0).fpu_reg();
3480 QRegister result = locs()->out().fpu_reg(); 3540 QRegister result = locs()->out().fpu_reg();
3481 3541
3482 if (value != result) { 3542 if (value != result) {
3483 __ vmovq(result, value); 3543 __ vmovq(result, value);
3484 } 3544 }
3485 } 3545 }
3486 3546
3487 3547
3488 LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const {
3489 const intptr_t kNumInputs = 2;
3490 const intptr_t kNumTemps = 0;
3491 LocationSummary* summary =
3492 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3493 summary->set_in(0, Location::RequiresFpuRegister());
3494 summary->set_in(1, Location::RequiresFpuRegister());
3495 summary->set_out(Location::SameAsFirstInput());
3496 return summary;
3497 }
3498
3499
3500 void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3501 QRegister left = locs()->in(0).fpu_reg();
3502 QRegister right = locs()->in(1).fpu_reg();
3503 QRegister result = locs()->out().fpu_reg();
3504
3505 ASSERT(result == left);
3506
3507 DRegister dleft0 = EvenDRegisterOf(left);
3508 DRegister dleft1 = OddDRegisterOf(left);
3509 DRegister dright0 = EvenDRegisterOf(right);
3510 DRegister dright1 = OddDRegisterOf(right);
3511
3512 switch (op_kind()) {
3513 case MethodRecognizer::kFloat32x4WithZWInXY:
3514 __ vmovd(dleft0, dright1);
3515 break;
3516 case MethodRecognizer::kFloat32x4InterleaveXY:
3517 __ vmovq(QTMP, right);
3518 __ vzipqw(left, QTMP);
3519 break;
3520 case MethodRecognizer::kFloat32x4InterleaveZW:
3521 __ vmovq(QTMP, right);
3522 __ vzipqw(left, QTMP);
3523 __ vmovq(left, QTMP);
3524 break;
3525 case MethodRecognizer::kFloat32x4InterleaveXYPairs:
3526 __ vmovd(dleft1, dright0);
3527 break;
3528 case MethodRecognizer::kFloat32x4InterleaveZWPairs:
3529 __ vmovq(QTMP, right);
3530 __ vmovd(EvenDRegisterOf(QTMP), dleft1);
3531 __ vmovq(result, QTMP);
3532 break;
3533 default: UNREACHABLE();
3534 }
3535 }
3536
3537
3538 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { 3548 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const {
3539 const intptr_t kNumInputs = 4; 3549 const intptr_t kNumInputs = 4;
3540 const intptr_t kNumTemps = 1; 3550 const intptr_t kNumTemps = 1;
3541 LocationSummary* summary = 3551 LocationSummary* summary =
3542 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3552 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3543 summary->set_in(0, Location::RequiresRegister()); 3553 summary->set_in(0, Location::RequiresRegister());
3544 summary->set_in(1, Location::RequiresRegister()); 3554 summary->set_in(1, Location::RequiresRegister());
3545 summary->set_in(2, Location::RequiresRegister()); 3555 summary->set_in(2, Location::RequiresRegister());
3546 summary->set_in(3, Location::RequiresRegister()); 3556 summary->set_in(3, Location::RequiresRegister());
3547 summary->set_temp(0, Location::RequiresRegister()); 3557 summary->set_temp(0, Location::RequiresRegister());
(...skipping 1149 matching lines...) Expand 10 before | Expand all | Expand 10 after
4697 compiler->GenerateCall(token_pos(), 4707 compiler->GenerateCall(token_pos(),
4698 &label, 4708 &label,
4699 PcDescriptors::kOther, 4709 PcDescriptors::kOther,
4700 locs()); 4710 locs());
4701 __ Drop(2); // Discard type arguments and receiver. 4711 __ Drop(2); // Discard type arguments and receiver.
4702 } 4712 }
4703 4713
4704 } // namespace dart 4714 } // namespace dart
4705 4715
4706 #endif // defined TARGET_ARCH_ARM 4716 #endif // defined TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « runtime/vm/intermediate_language.h ('k') | runtime/vm/intermediate_language_ia32.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698