Index: runtime/vm/intermediate_language_arm.cc |
diff --git a/runtime/vm/intermediate_language_arm.cc b/runtime/vm/intermediate_language_arm.cc |
index 775a8e914c34f4a281541252d625c1fe3ecfbc07..6c68bd291e517c97bf5a172711e20865bec05991 100644 |
--- a/runtime/vm/intermediate_language_arm.cc |
+++ b/runtime/vm/intermediate_language_arm.cc |
@@ -3031,7 +3031,7 @@ void BinaryFloat32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
} |
-LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { |
+LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const { |
const intptr_t kNumInputs = 1; |
const intptr_t kNumTemps = 0; |
LocationSummary* summary = |
@@ -3043,7 +3043,7 @@ LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const { |
} |
-void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
+void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
QRegister value = locs()->in(0).fpu_reg(); |
QRegister result = locs()->out().fpu_reg(); |
DRegister dresult0 = EvenDRegisterOf(result); |
@@ -3078,6 +3078,7 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
__ vdup(kWord, result, dvalue1, 1); |
__ vcvtds(dresult0, sresult0); |
break; |
+ case MethodRecognizer::kUint32x4Shuffle: |
case MethodRecognizer::kFloat32x4Shuffle: |
if (mask_ == 0x00) { |
__ vdup(kWord, result, dvalue0, 0); |
@@ -3107,6 +3108,61 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
} |
+LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const { |
+ const intptr_t kNumInputs = 2; |
+ const intptr_t kNumTemps = 0; |
+ LocationSummary* summary = |
+ new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
+ // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions. |
+ summary->set_in(0, Location::FpuRegisterLocation(Q4)); |
+ summary->set_in(1, Location::FpuRegisterLocation(Q5)); |
+ summary->set_out(Location::FpuRegisterLocation(Q6)); |
+ return summary; |
+} |
+ |
+ |
+void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
+ QRegister left = locs()->in(0).fpu_reg(); |
+ QRegister right = locs()->in(1).fpu_reg(); |
+ QRegister result = locs()->out().fpu_reg(); |
+ |
+ DRegister dresult0 = EvenDRegisterOf(result); |
+ DRegister dresult1 = OddDRegisterOf(result); |
+ SRegister sresult0 = EvenSRegisterOf(dresult0); |
+ SRegister sresult1 = OddSRegisterOf(dresult0); |
+ SRegister sresult2 = EvenSRegisterOf(dresult1); |
+ SRegister sresult3 = OddSRegisterOf(dresult1); |
+ |
+ DRegister dleft0 = EvenDRegisterOf(left); |
+ DRegister dleft1 = OddDRegisterOf(left); |
+ DRegister dright0 = EvenDRegisterOf(right); |
+ DRegister dright1 = OddDRegisterOf(right); |
+ |
+ switch (op_kind()) { |
+ case MethodRecognizer::kFloat32x4ShuffleMix: |
+ case MethodRecognizer::kUint32x4ShuffleMix: |
zra
2013/10/30 17:39:27
Is it possible to have special cases where vdup ca
Cutch
2013/10/31 01:43:55
I'm not familiar enough with the NEON shuffling in
zra
2013/10/31 15:10:20
Please leave a TODO here for me, and I'll look int
|
+ SRegister left_svalues[4]; |
+ SRegister right_svalues[4]; |
+ |
+ left_svalues[0] = EvenSRegisterOf(dleft0); |
+ left_svalues[1] = OddSRegisterOf(dleft0); |
+ left_svalues[2] = EvenSRegisterOf(dleft1); |
+ left_svalues[3] = OddSRegisterOf(dleft1); |
+ right_svalues[0] = EvenSRegisterOf(dright0); |
+ right_svalues[1] = OddSRegisterOf(dright0); |
+ right_svalues[2] = EvenSRegisterOf(dright1); |
+ right_svalues[3] = OddSRegisterOf(dright1); |
+ |
+ __ vmovs(sresult0, left_svalues[mask_ & 0x3]); |
+ __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]); |
+ __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]); |
+ __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]); |
+ break; |
+ default: UNREACHABLE(); |
+ } |
+} |
+ |
+ |
LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const { |
const intptr_t kNumInputs = 1; |
const intptr_t kNumTemps = 1; |
@@ -3473,56 +3529,6 @@ void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) { |
} |
-LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const { |
- const intptr_t kNumInputs = 2; |
- const intptr_t kNumTemps = 0; |
- LocationSummary* summary = |
- new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall); |
- summary->set_in(0, Location::RequiresFpuRegister()); |
- summary->set_in(1, Location::RequiresFpuRegister()); |
- summary->set_out(Location::SameAsFirstInput()); |
- return summary; |
-} |
- |
- |
-void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) { |
- QRegister left = locs()->in(0).fpu_reg(); |
- QRegister right = locs()->in(1).fpu_reg(); |
- QRegister result = locs()->out().fpu_reg(); |
- |
- ASSERT(result == left); |
- |
- DRegister dleft0 = EvenDRegisterOf(left); |
- DRegister dleft1 = OddDRegisterOf(left); |
- DRegister dright0 = EvenDRegisterOf(right); |
- DRegister dright1 = OddDRegisterOf(right); |
- |
- switch (op_kind()) { |
- case MethodRecognizer::kFloat32x4WithZWInXY: |
- __ vmovd(dleft0, dright1); |
- break; |
- case MethodRecognizer::kFloat32x4InterleaveXY: |
- __ vmovq(QTMP, right); |
- __ vzipqw(left, QTMP); |
- break; |
- case MethodRecognizer::kFloat32x4InterleaveZW: |
- __ vmovq(QTMP, right); |
- __ vzipqw(left, QTMP); |
- __ vmovq(left, QTMP); |
- break; |
- case MethodRecognizer::kFloat32x4InterleaveXYPairs: |
- __ vmovd(dleft1, dright0); |
- break; |
- case MethodRecognizer::kFloat32x4InterleaveZWPairs: |
- __ vmovq(QTMP, right); |
- __ vmovd(EvenDRegisterOf(QTMP), dleft1); |
- __ vmovq(result, QTMP); |
- break; |
- default: UNREACHABLE(); |
- } |
-} |
- |
- |
LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const { |
const intptr_t kNumInputs = 4; |
const intptr_t kNumTemps = 1; |