| Index: src/compiler/arm/instruction-selector-arm.cc
|
| diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
|
| index 6f3957183f33d3b8b8e4700deaa726c55cb68076..c29afb87cab2baeb175349beb556dccd3f7ad264 100644
|
| --- a/src/compiler/arm/instruction-selector-arm.cc
|
| +++ b/src/compiler/arm/instruction-selector-arm.cc
|
| @@ -427,7 +427,7 @@ void InstructionSelector::VisitLoad(Node* node) {
|
| opcode = kArmLdr;
|
| break;
|
| case MachineRepresentation::kSimd128:
|
| - opcode = kArmSimd128Load;
|
| + opcode = kArmVld1S128;
|
| break;
|
| case MachineRepresentation::kWord64: // Fall through.
|
| case MachineRepresentation::kSimd1x4: // Fall through.
|
| @@ -517,7 +517,7 @@ void InstructionSelector::VisitStore(Node* node) {
|
| opcode = kArmStr;
|
| break;
|
| case MachineRepresentation::kSimd128:
|
| - opcode = kArmSimd128Store;
|
| + opcode = kArmVst1S128;
|
| break;
|
| case MachineRepresentation::kWord64: // Fall through.
|
| case MachineRepresentation::kSimd1x4: // Fall through.
|
| @@ -542,8 +542,8 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
|
| }
|
|
|
| void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
| - UnalignedLoadRepresentation load_rep =
|
| - UnalignedLoadRepresentationOf(node->op());
|
| + MachineRepresentation load_rep =
|
| + UnalignedLoadRepresentationOf(node->op()).representation();
|
| ArmOperandGenerator g(this);
|
| Node* base = node->InputAt(0);
|
| Node* index = node->InputAt(1);
|
| @@ -551,16 +551,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
| InstructionCode opcode = kArmLdr;
|
| // Only floating point loads need to be specially handled; integer loads
|
| // support unaligned access. We support unaligned FP loads by loading to
|
| - // integer registers first, then moving to the destination FP register.
|
| - switch (load_rep.representation()) {
|
| + // integer registers first, then moving to the destination FP register. If
|
| + // NEON is supported, we use the vld1.8 instruction.
|
| + switch (load_rep) {
|
| case MachineRepresentation::kFloat32: {
|
| InstructionOperand temp = g.TempRegister();
|
| EmitLoad(this, opcode, &temp, base, index);
|
| Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
|
| return;
|
| }
|
| - case MachineRepresentation::kFloat64: {
|
| - // Compute the address of the least-significant half of the FP value.
|
| + case MachineRepresentation::kFloat64:
|
| + case MachineRepresentation::kSimd128: {
|
| + // Compute the address of the least-significant byte of the FP value.
|
| // We assume that the base node is unlikely to be an encodable immediate
|
| // or the result of a shift operation, so only consider the addressing
|
| // mode that should be used for the index node.
|
| @@ -585,8 +587,12 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
|
|
|
| if (CpuFeatures::IsSupported(NEON)) {
|
| // With NEON we can load directly from the calculated address.
|
| - Emit(kArmVld1F64, g.DefineAsRegister(node), addr);
|
| + ArchOpcode op = load_rep == MachineRepresentation::kFloat64
|
| + ? kArmVld1F64
|
| + : kArmVld1S128;
|
| + Emit(op, g.DefineAsRegister(node), addr);
|
| } else {
|
| + DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
|
| // Load both halves and move to an FP register.
|
| InstructionOperand fp_lo = g.TempRegister();
|
| InstructionOperand fp_hi = g.TempRegister();
|
| @@ -619,6 +625,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
| // Only floating point stores need to be specially handled; integer stores
|
| // support unaligned access. We support unaligned FP stores by moving the
|
| // value to integer registers first, then storing to the destination address.
|
| + // If NEON is supported, we use the vst1.8 instruction.
|
| switch (store_rep) {
|
| case MachineRepresentation::kFloat32: {
|
| inputs[input_count++] = g.TempRegister();
|
| @@ -627,7 +634,8 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
| EmitStore(this, kArmStr, input_count, inputs, index);
|
| return;
|
| }
|
| - case MachineRepresentation::kFloat64: {
|
| + case MachineRepresentation::kFloat64:
|
| + case MachineRepresentation::kSimd128: {
|
| if (CpuFeatures::IsSupported(NEON)) {
|
| InstructionOperand address = g.TempRegister();
|
| {
|
| @@ -653,8 +661,12 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
|
|
|
| inputs[input_count++] = g.UseRegister(value);
|
| inputs[input_count++] = address;
|
| - Emit(kArmVst1F64, 0, nullptr, input_count, inputs);
|
| + ArchOpcode op = store_rep == MachineRepresentation::kFloat64
|
| + ? kArmVst1F64
|
| + : kArmVst1S128;
|
| + Emit(op, 0, nullptr, input_count, inputs);
|
| } else {
|
| + DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
|
| // Store a 64-bit floating point value using two 32-bit integer stores.
|
| // Computing the store address here would require three live temporary
|
| // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
|
|
|