Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(147)

Side by Side Diff: runtime/vm/intermediate_language_arm.cc

Issue 51373004: SIMD shuffle API changes (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM. 5 #include "vm/globals.h" // Needed here to get TARGET_ARCH_ARM.
6 #if defined(TARGET_ARCH_ARM) 6 #if defined(TARGET_ARCH_ARM)
7 7
8 #include "vm/intermediate_language.h" 8 #include "vm/intermediate_language.h"
9 9
10 #include "vm/dart_entry.h" 10 #include "vm/dart_entry.h"
(...skipping 3013 matching lines...) Expand 10 before | Expand all | Expand 10 after
3024 switch (op_kind()) { 3024 switch (op_kind()) {
3025 case Token::kADD: __ vaddqs(result, left, right); break; 3025 case Token::kADD: __ vaddqs(result, left, right); break;
3026 case Token::kSUB: __ vsubqs(result, left, right); break; 3026 case Token::kSUB: __ vsubqs(result, left, right); break;
3027 case Token::kMUL: __ vmulqs(result, left, right); break; 3027 case Token::kMUL: __ vmulqs(result, left, right); break;
3028 case Token::kDIV: __ Vdivqs(result, left, right); break; 3028 case Token::kDIV: __ Vdivqs(result, left, right); break;
3029 default: UNREACHABLE(); 3029 default: UNREACHABLE();
3030 } 3030 }
3031 } 3031 }
3032 3032
3033 3033
3034 LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { 3034 LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const {
3035 const intptr_t kNumInputs = 1; 3035 const intptr_t kNumInputs = 1;
3036 const intptr_t kNumTemps = 0; 3036 const intptr_t kNumTemps = 0;
3037 LocationSummary* summary = 3037 LocationSummary* summary =
3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3038 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. 3039 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions.
3040 summary->set_in(0, Location::FpuRegisterLocation(Q5)); 3040 summary->set_in(0, Location::FpuRegisterLocation(Q5));
3041 summary->set_out(Location::FpuRegisterLocation(Q6)); 3041 summary->set_out(Location::FpuRegisterLocation(Q6));
3042 return summary; 3042 return summary;
3043 } 3043 }
3044 3044
3045 3045
3046 void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { 3046 void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3047 QRegister value = locs()->in(0).fpu_reg(); 3047 QRegister value = locs()->in(0).fpu_reg();
3048 QRegister result = locs()->out().fpu_reg(); 3048 QRegister result = locs()->out().fpu_reg();
3049 DRegister dresult0 = EvenDRegisterOf(result); 3049 DRegister dresult0 = EvenDRegisterOf(result);
3050 DRegister dresult1 = OddDRegisterOf(result); 3050 DRegister dresult1 = OddDRegisterOf(result);
3051 SRegister sresult0 = EvenSRegisterOf(dresult0); 3051 SRegister sresult0 = EvenSRegisterOf(dresult0);
3052 SRegister sresult1 = OddSRegisterOf(dresult0); 3052 SRegister sresult1 = OddSRegisterOf(dresult0);
3053 SRegister sresult2 = EvenSRegisterOf(dresult1); 3053 SRegister sresult2 = EvenSRegisterOf(dresult1);
3054 SRegister sresult3 = OddSRegisterOf(dresult1); 3054 SRegister sresult3 = OddSRegisterOf(dresult1);
3055 3055
3056 DRegister dvalue0 = EvenDRegisterOf(value); 3056 DRegister dvalue0 = EvenDRegisterOf(value);
(...skipping 14 matching lines...) Expand all
3071 __ vcvtds(dresult0, sresult0); 3071 __ vcvtds(dresult0, sresult0);
3072 break; 3072 break;
3073 case MethodRecognizer::kFloat32x4ShuffleZ: 3073 case MethodRecognizer::kFloat32x4ShuffleZ:
3074 __ vdup(kWord, result, dvalue1, 0); 3074 __ vdup(kWord, result, dvalue1, 0);
3075 __ vcvtds(dresult0, sresult0); 3075 __ vcvtds(dresult0, sresult0);
3076 break; 3076 break;
3077 case MethodRecognizer::kFloat32x4ShuffleW: 3077 case MethodRecognizer::kFloat32x4ShuffleW:
3078 __ vdup(kWord, result, dvalue1, 1); 3078 __ vdup(kWord, result, dvalue1, 1);
3079 __ vcvtds(dresult0, sresult0); 3079 __ vcvtds(dresult0, sresult0);
3080 break; 3080 break;
3081 case MethodRecognizer::kUint32x4Shuffle:
3081 case MethodRecognizer::kFloat32x4Shuffle: 3082 case MethodRecognizer::kFloat32x4Shuffle:
3082 if (mask_ == 0x00) { 3083 if (mask_ == 0x00) {
3083 __ vdup(kWord, result, dvalue0, 0); 3084 __ vdup(kWord, result, dvalue0, 0);
3084 } else if (mask_ == 0x55) { 3085 } else if (mask_ == 0x55) {
3085 __ vdup(kWord, result, dvalue0, 1); 3086 __ vdup(kWord, result, dvalue0, 1);
3086 } else if (mask_ == 0xAA) { 3087 } else if (mask_ == 0xAA) {
3087 __ vdup(kWord, result, dvalue1, 0); 3088 __ vdup(kWord, result, dvalue1, 0);
3088 } else if (mask_ == 0xFF) { 3089 } else if (mask_ == 0xFF) {
3089 __ vdup(kWord, result, dvalue1, 1); 3090 __ vdup(kWord, result, dvalue1, 1);
3090 } else { 3091 } else {
3091 SRegister svalues[4]; 3092 SRegister svalues[4];
3092 3093
3093 svalues[0] = EvenSRegisterOf(dtemp0); 3094 svalues[0] = EvenSRegisterOf(dtemp0);
3094 svalues[1] = OddSRegisterOf(dtemp0); 3095 svalues[1] = OddSRegisterOf(dtemp0);
3095 svalues[2] = EvenSRegisterOf(dtemp1); 3096 svalues[2] = EvenSRegisterOf(dtemp1);
3096 svalues[3] = OddSRegisterOf(dtemp1); 3097 svalues[3] = OddSRegisterOf(dtemp1);
3097 3098
3098 __ vmovq(QTMP, value); 3099 __ vmovq(QTMP, value);
3099 __ vmovs(sresult0, svalues[mask_ & 0x3]); 3100 __ vmovs(sresult0, svalues[mask_ & 0x3]);
3100 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]); 3101 __ vmovs(sresult1, svalues[(mask_ >> 2) & 0x3]);
3101 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]); 3102 __ vmovs(sresult2, svalues[(mask_ >> 4) & 0x3]);
3102 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]); 3103 __ vmovs(sresult3, svalues[(mask_ >> 6) & 0x3]);
3103 } 3104 }
3104 break; 3105 break;
3105 default: UNREACHABLE(); 3106 default: UNREACHABLE();
3106 } 3107 }
3107 } 3108 }
3108 3109
3109 3110
3111 LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const {
3112 const intptr_t kNumInputs = 2;
3113 const intptr_t kNumTemps = 0;
3114 LocationSummary* summary =
3115 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3116 // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions.
3117 summary->set_in(0, Location::FpuRegisterLocation(Q4));
3118 summary->set_in(1, Location::FpuRegisterLocation(Q5));
3119 summary->set_out(Location::FpuRegisterLocation(Q6));
3120 return summary;
3121 }
3122
3123
3124 void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3125 QRegister left = locs()->in(0).fpu_reg();
3126 QRegister right = locs()->in(1).fpu_reg();
3127 QRegister result = locs()->out().fpu_reg();
3128
3129 DRegister dresult0 = EvenDRegisterOf(result);
3130 DRegister dresult1 = OddDRegisterOf(result);
3131 SRegister sresult0 = EvenSRegisterOf(dresult0);
3132 SRegister sresult1 = OddSRegisterOf(dresult0);
3133 SRegister sresult2 = EvenSRegisterOf(dresult1);
3134 SRegister sresult3 = OddSRegisterOf(dresult1);
3135
3136 DRegister dleft0 = EvenDRegisterOf(left);
3137 DRegister dleft1 = OddDRegisterOf(left);
3138 DRegister dright0 = EvenDRegisterOf(right);
3139 DRegister dright1 = OddDRegisterOf(right);
3140
3141 switch (op_kind()) {
3142 case MethodRecognizer::kFloat32x4ShuffleMix:
3143 case MethodRecognizer::kUint32x4ShuffleMix:
zra 2013/10/30 17:39:27 Is it possible to have special cases where vdup ca
Cutch 2013/10/31 01:43:55 I'm not familiar enough with the NEON shuffling in
zra 2013/10/31 15:10:20 Please leave a TODO here for me, and I'll look int
3144 SRegister left_svalues[4];
3145 SRegister right_svalues[4];
3146
3147 left_svalues[0] = EvenSRegisterOf(dleft0);
3148 left_svalues[1] = OddSRegisterOf(dleft0);
3149 left_svalues[2] = EvenSRegisterOf(dleft1);
3150 left_svalues[3] = OddSRegisterOf(dleft1);
3151 right_svalues[0] = EvenSRegisterOf(dright0);
3152 right_svalues[1] = OddSRegisterOf(dright0);
3153 right_svalues[2] = EvenSRegisterOf(dright1);
3154 right_svalues[3] = OddSRegisterOf(dright1);
3155
3156 __ vmovs(sresult0, left_svalues[mask_ & 0x3]);
3157 __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]);
3158 __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]);
3159 __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]);
3160 break;
3161 default: UNREACHABLE();
3162 }
3163 }
3164
3165
3110 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { 3166 LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const {
3111 const intptr_t kNumInputs = 1; 3167 const intptr_t kNumInputs = 1;
3112 const intptr_t kNumTemps = 1; 3168 const intptr_t kNumTemps = 1;
3113 LocationSummary* summary = 3169 LocationSummary* summary =
3114 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3170 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3115 summary->set_in(0, Location::FpuRegisterLocation(Q5)); 3171 summary->set_in(0, Location::FpuRegisterLocation(Q5));
3116 summary->set_temp(0, Location::RequiresRegister()); 3172 summary->set_temp(0, Location::RequiresRegister());
3117 summary->set_out(Location::RequiresRegister()); 3173 summary->set_out(Location::RequiresRegister());
3118 return summary; 3174 return summary;
3119 } 3175 }
(...skipping 346 matching lines...) Expand 10 before | Expand all | Expand 10 after
3466 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { 3522 void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
3467 QRegister value = locs()->in(0).fpu_reg(); 3523 QRegister value = locs()->in(0).fpu_reg();
3468 QRegister result = locs()->out().fpu_reg(); 3524 QRegister result = locs()->out().fpu_reg();
3469 3525
3470 if (value != result) { 3526 if (value != result) {
3471 __ vmovq(result, value); 3527 __ vmovq(result, value);
3472 } 3528 }
3473 } 3529 }
3474 3530
3475 3531
3476 LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const {
3477 const intptr_t kNumInputs = 2;
3478 const intptr_t kNumTemps = 0;
3479 LocationSummary* summary =
3480 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3481 summary->set_in(0, Location::RequiresFpuRegister());
3482 summary->set_in(1, Location::RequiresFpuRegister());
3483 summary->set_out(Location::SameAsFirstInput());
3484 return summary;
3485 }
3486
3487
3488 void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
3489 QRegister left = locs()->in(0).fpu_reg();
3490 QRegister right = locs()->in(1).fpu_reg();
3491 QRegister result = locs()->out().fpu_reg();
3492
3493 ASSERT(result == left);
3494
3495 DRegister dleft0 = EvenDRegisterOf(left);
3496 DRegister dleft1 = OddDRegisterOf(left);
3497 DRegister dright0 = EvenDRegisterOf(right);
3498 DRegister dright1 = OddDRegisterOf(right);
3499
3500 switch (op_kind()) {
3501 case MethodRecognizer::kFloat32x4WithZWInXY:
3502 __ vmovd(dleft0, dright1);
3503 break;
3504 case MethodRecognizer::kFloat32x4InterleaveXY:
3505 __ vmovq(QTMP, right);
3506 __ vzipqw(left, QTMP);
3507 break;
3508 case MethodRecognizer::kFloat32x4InterleaveZW:
3509 __ vmovq(QTMP, right);
3510 __ vzipqw(left, QTMP);
3511 __ vmovq(left, QTMP);
3512 break;
3513 case MethodRecognizer::kFloat32x4InterleaveXYPairs:
3514 __ vmovd(dleft1, dright0);
3515 break;
3516 case MethodRecognizer::kFloat32x4InterleaveZWPairs:
3517 __ vmovq(QTMP, right);
3518 __ vmovd(EvenDRegisterOf(QTMP), dleft1);
3519 __ vmovq(result, QTMP);
3520 break;
3521 default: UNREACHABLE();
3522 }
3523 }
3524
3525
3526 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { 3532 LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const {
3527 const intptr_t kNumInputs = 4; 3533 const intptr_t kNumInputs = 4;
3528 const intptr_t kNumTemps = 1; 3534 const intptr_t kNumTemps = 1;
3529 LocationSummary* summary = 3535 LocationSummary* summary =
3530 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); 3536 new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
3531 summary->set_in(0, Location::RequiresRegister()); 3537 summary->set_in(0, Location::RequiresRegister());
3532 summary->set_in(1, Location::RequiresRegister()); 3538 summary->set_in(1, Location::RequiresRegister());
3533 summary->set_in(2, Location::RequiresRegister()); 3539 summary->set_in(2, Location::RequiresRegister());
3534 summary->set_in(3, Location::RequiresRegister()); 3540 summary->set_in(3, Location::RequiresRegister());
3535 summary->set_temp(0, Location::RequiresRegister()); 3541 summary->set_temp(0, Location::RequiresRegister());
(...skipping 1115 matching lines...) Expand 10 before | Expand all | Expand 10 after
4651 compiler->GenerateCall(token_pos(), 4657 compiler->GenerateCall(token_pos(),
4652 &label, 4658 &label,
4653 PcDescriptors::kOther, 4659 PcDescriptors::kOther,
4654 locs()); 4660 locs());
4655 __ Drop(2); // Discard type arguments and receiver. 4661 __ Drop(2); // Discard type arguments and receiver.
4656 } 4662 }
4657 4663
4658 } // namespace dart 4664 } // namespace dart
4659 4665
4660 #endif // defined TARGET_ARCH_ARM 4666 #endif // defined TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698