| Index: runtime/vm/intermediate_language_arm.cc
|
| diff --git a/runtime/vm/intermediate_language_arm.cc b/runtime/vm/intermediate_language_arm.cc
|
| index e9d032e77bbc51774a2be055dcf9cb90b054f2e5..3c613e8736b4471f01a164c594826ab3e9677b10 100644
|
| --- a/runtime/vm/intermediate_language_arm.cc
|
| +++ b/runtime/vm/intermediate_language_arm.cc
|
| @@ -3043,7 +3043,7 @@ void BinaryFloat32x4OpInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| }
|
|
|
|
|
| -LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const {
|
| +LocationSummary* Simd32x4ShuffleInstr::MakeLocationSummary() const {
|
| const intptr_t kNumInputs = 1;
|
| const intptr_t kNumTemps = 0;
|
| LocationSummary* summary =
|
| @@ -3055,7 +3055,7 @@ LocationSummary* Float32x4ShuffleInstr::MakeLocationSummary() const {
|
| }
|
|
|
|
|
| -void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| +void Simd32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| QRegister value = locs()->in(0).fpu_reg();
|
| QRegister result = locs()->out().fpu_reg();
|
| DRegister dresult0 = EvenDRegisterOf(result);
|
| @@ -3073,6 +3073,7 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
|
|
| // For some cases the vdup instruction requires fewer
|
| // instructions. For arbitrary shuffles, use vtbl.
|
| +
|
| switch (op_kind()) {
|
| case MethodRecognizer::kFloat32x4ShuffleX:
|
| __ vdup(kWord, result, dvalue0, 0);
|
| @@ -3090,6 +3091,7 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| __ vdup(kWord, result, dvalue1, 1);
|
| __ vcvtds(dresult0, sresult0);
|
| break;
|
| + case MethodRecognizer::kUint32x4Shuffle:
|
| case MethodRecognizer::kFloat32x4Shuffle:
|
| if (mask_ == 0x00) {
|
| __ vdup(kWord, result, dvalue0, 0);
|
| @@ -3100,6 +3102,8 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| } else if (mask_ == 0xFF) {
|
| __ vdup(kWord, result, dvalue1, 1);
|
| } else {
|
| + // TODO(zra): Investigate better instruction sequences for other
|
| + // shuffle masks.
|
| SRegister svalues[4];
|
|
|
| svalues[0] = EvenSRegisterOf(dtemp0);
|
| @@ -3119,6 +3123,62 @@ void Float32x4ShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| }
|
|
|
|
|
| +LocationSummary* Simd32x4ShuffleMixInstr::MakeLocationSummary() const {
|
| + const intptr_t kNumInputs = 2;
|
| + const intptr_t kNumTemps = 0;
|
| + LocationSummary* summary =
|
| + new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
|
| + // Low (< Q7) Q registers are needed for the vcvtds and vmovs instructions.
|
| + summary->set_in(0, Location::FpuRegisterLocation(Q4));
|
| + summary->set_in(1, Location::FpuRegisterLocation(Q5));
|
| + summary->set_out(Location::FpuRegisterLocation(Q6));
|
| + return summary;
|
| +}
|
| +
|
| +
|
| +void Simd32x4ShuffleMixInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| + QRegister left = locs()->in(0).fpu_reg();
|
| + QRegister right = locs()->in(1).fpu_reg();
|
| + QRegister result = locs()->out().fpu_reg();
|
| +
|
| + DRegister dresult0 = EvenDRegisterOf(result);
|
| + DRegister dresult1 = OddDRegisterOf(result);
|
| + SRegister sresult0 = EvenSRegisterOf(dresult0);
|
| + SRegister sresult1 = OddSRegisterOf(dresult0);
|
| + SRegister sresult2 = EvenSRegisterOf(dresult1);
|
| + SRegister sresult3 = OddSRegisterOf(dresult1);
|
| +
|
| + DRegister dleft0 = EvenDRegisterOf(left);
|
| + DRegister dleft1 = OddDRegisterOf(left);
|
| + DRegister dright0 = EvenDRegisterOf(right);
|
| + DRegister dright1 = OddDRegisterOf(right);
|
| +
|
| + switch (op_kind()) {
|
| + case MethodRecognizer::kFloat32x4ShuffleMix:
|
| + case MethodRecognizer::kUint32x4ShuffleMix:
|
| + // TODO(zra): Investigate better instruction sequences for shuffle masks.
|
| + SRegister left_svalues[4];
|
| + SRegister right_svalues[4];
|
| +
|
| + left_svalues[0] = EvenSRegisterOf(dleft0);
|
| + left_svalues[1] = OddSRegisterOf(dleft0);
|
| + left_svalues[2] = EvenSRegisterOf(dleft1);
|
| + left_svalues[3] = OddSRegisterOf(dleft1);
|
| + right_svalues[0] = EvenSRegisterOf(dright0);
|
| + right_svalues[1] = OddSRegisterOf(dright0);
|
| + right_svalues[2] = EvenSRegisterOf(dright1);
|
| + right_svalues[3] = OddSRegisterOf(dright1);
|
| +
|
| + __ vmovs(sresult0, left_svalues[mask_ & 0x3]);
|
| + __ vmovs(sresult1, left_svalues[(mask_ >> 2) & 0x3]);
|
| + __ vmovs(sresult2, right_svalues[(mask_ >> 4) & 0x3]);
|
| + __ vmovs(sresult3, right_svalues[(mask_ >> 6) & 0x3]);
|
| + break;
|
| + default: UNREACHABLE();
|
| + }
|
| +}
|
| +
|
| +
|
| LocationSummary* Simd32x4GetSignMaskInstr::MakeLocationSummary() const {
|
| const intptr_t kNumInputs = 1;
|
| const intptr_t kNumTemps = 1;
|
| @@ -3485,56 +3545,6 @@ void Float32x4ToUint32x4Instr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| }
|
|
|
|
|
| -LocationSummary* Float32x4TwoArgShuffleInstr::MakeLocationSummary() const {
|
| - const intptr_t kNumInputs = 2;
|
| - const intptr_t kNumTemps = 0;
|
| - LocationSummary* summary =
|
| - new LocationSummary(kNumInputs, kNumTemps, LocationSummary::kNoCall);
|
| - summary->set_in(0, Location::RequiresFpuRegister());
|
| - summary->set_in(1, Location::RequiresFpuRegister());
|
| - summary->set_out(Location::SameAsFirstInput());
|
| - return summary;
|
| -}
|
| -
|
| -
|
| -void Float32x4TwoArgShuffleInstr::EmitNativeCode(FlowGraphCompiler* compiler) {
|
| - QRegister left = locs()->in(0).fpu_reg();
|
| - QRegister right = locs()->in(1).fpu_reg();
|
| - QRegister result = locs()->out().fpu_reg();
|
| -
|
| - ASSERT(result == left);
|
| -
|
| - DRegister dleft0 = EvenDRegisterOf(left);
|
| - DRegister dleft1 = OddDRegisterOf(left);
|
| - DRegister dright0 = EvenDRegisterOf(right);
|
| - DRegister dright1 = OddDRegisterOf(right);
|
| -
|
| - switch (op_kind()) {
|
| - case MethodRecognizer::kFloat32x4WithZWInXY:
|
| - __ vmovd(dleft0, dright1);
|
| - break;
|
| - case MethodRecognizer::kFloat32x4InterleaveXY:
|
| - __ vmovq(QTMP, right);
|
| - __ vzipqw(left, QTMP);
|
| - break;
|
| - case MethodRecognizer::kFloat32x4InterleaveZW:
|
| - __ vmovq(QTMP, right);
|
| - __ vzipqw(left, QTMP);
|
| - __ vmovq(left, QTMP);
|
| - break;
|
| - case MethodRecognizer::kFloat32x4InterleaveXYPairs:
|
| - __ vmovd(dleft1, dright0);
|
| - break;
|
| - case MethodRecognizer::kFloat32x4InterleaveZWPairs:
|
| - __ vmovq(QTMP, right);
|
| - __ vmovd(EvenDRegisterOf(QTMP), dleft1);
|
| - __ vmovq(result, QTMP);
|
| - break;
|
| - default: UNREACHABLE();
|
| - }
|
| -}
|
| -
|
| -
|
| LocationSummary* Uint32x4BoolConstructorInstr::MakeLocationSummary() const {
|
| const intptr_t kNumInputs = 4;
|
| const intptr_t kNumTemps = 1;
|
|
|