Chromium Code Reviews| Index: src/arm/macro-assembler-arm.cc |
| diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc |
| index 3211dea083557337358dd4c1b94464f118646aba..70a332b8187ae3c3a461a2b6eb6b4a7295200fac 100644 |
| --- a/src/arm/macro-assembler-arm.cc |
| +++ b/src/arm/macro-assembler-arm.cc |
| @@ -1081,8 +1081,8 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { |
| } |
| void MacroAssembler::VmovExtended(Register dst, int src_code) { |
| - DCHECK_LE(32, src_code); |
| - DCHECK_GT(64, src_code); |
| + DCHECK_LE(SwVfpRegister::kMaxNumRegisters, src_code); |
| + DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); |
| if (src_code & 0x1) { |
| VmovHigh(dst, DwVfpRegister::from_code(src_code / 2)); |
| } else { |
| @@ -1091,8 +1091,8 @@ void MacroAssembler::VmovExtended(Register dst, int src_code) { |
| } |
| void MacroAssembler::VmovExtended(int dst_code, Register src) { |
| - DCHECK_LE(32, dst_code); |
| - DCHECK_GT(64, dst_code); |
| + DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); |
| + DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
| if (dst_code & 0x1) { |
| VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); |
| } else { |
| @@ -1102,22 +1102,23 @@ void MacroAssembler::VmovExtended(int dst_code, Register src) { |
| void MacroAssembler::VmovExtended(int dst_code, int src_code, |
| Register scratch) { |
| - if (src_code < 32 && dst_code < 32) { |
| + if (src_code < SwVfpRegister::kMaxNumRegisters && |
| + dst_code < SwVfpRegister::kMaxNumRegisters) { |
| // src and dst are both s-registers. |
| vmov(SwVfpRegister::from_code(dst_code), |
| SwVfpRegister::from_code(src_code)); |
| - } else if (src_code < 32) { |
| + } else if (src_code < SwVfpRegister::kMaxNumRegisters) { |
| // src is an s-register. |
| vmov(scratch, SwVfpRegister::from_code(src_code)); |
| VmovExtended(dst_code, scratch); |
| - } else if (dst_code < 32) { |
| + } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
| // dst is an s-register. |
| VmovExtended(scratch, src_code); |
| vmov(SwVfpRegister::from_code(dst_code), scratch); |
| } else { |
| // Neither src or dst are s-registers. |
| - DCHECK_GT(64, src_code); |
| - DCHECK_GT(64, dst_code); |
| + DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); |
| + DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
| VmovExtended(scratch, src_code); |
| VmovExtended(dst_code, scratch); |
| } |
| @@ -1125,7 +1126,7 @@ void MacroAssembler::VmovExtended(int dst_code, int src_code, |
| void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, |
| Register scratch) { |
| - if (dst_code >= 32) { |
| + if (dst_code >= SwVfpRegister::kMaxNumRegisters) { |
| ldr(scratch, src); |
| VmovExtended(dst_code, scratch); |
| } else { |
| @@ -1135,7 +1136,7 @@ void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, |
| void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, |
| Register scratch) { |
| - if (src_code >= 32) { |
| + if (src_code >= SwVfpRegister::kMaxNumRegisters) { |
| VmovExtended(scratch, src_code); |
| str(scratch, dst); |
| } else { |
| @@ -1143,6 +1144,78 @@ void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, |
| } |
| } |
| +void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
There is a Neon instruction for this: vmov.dt rt,
bbudge
2016/12/10 21:33:04
Awesome, I've implemented this for the other data
|
| + NeonDataType dt, int lane) { |
| + // Read the word containing the lane into dst. |
| + int bytes_per_lane = dt & NeonDataTypeSizeMask; |
| + int byte = (lane * bytes_per_lane); |
| + int word = byte / kPointerSize; |
| + int s_code = src.code() * 4 + word; |
| + if (s_code < SwVfpRegister::kMaxNumRegisters) { |
| + vmov(dst, SwVfpRegister::from_code(s_code)); |
| + } else { |
| + VmovExtended(dst, s_code); |
| + } |
| + if (bytes_per_lane != kPointerSize) { |
| + // Extract lane, and sign extend for signed types. |
| + int width = bytes_per_lane * kBitsPerByte; |
| + int lsb = (byte & 0x3) * kBitsPerByte; |
| + if ((dt & NeonDataTypeUMask) != 0) { |
| + Ubfx(dst, dst, lsb, width); |
| + } else { |
| + Sbfx(dst, dst, lsb, width); |
| + } |
| + } |
| +} |
| + |
| +void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, |
| + Register scratch, int lane) { |
| + int s_code = src.code() * 4 + lane; |
| + VmovExtended(dst.code(), s_code, scratch); |
| +} |
| + |
| +void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
| + Register src_lane, Register scratch, |
| + NeonDataType dt, int lane) { |
| + Move(dst, src); |
| + int bytes_per_lane = dt & NeonDataTypeSizeMask; |
|
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
if you implement vmov.dt Dn[x], Rt then you can dr
bbudge
2016/12/10 21:33:04
I implemented the Neon form of this instruction to
|
| + int byte = (lane * bytes_per_lane); |
| + int word = byte / kPointerSize; |
| + int s_code = dst.code() * 4 + word; |
| + // If lane is word sized, just move src_lane into the containing s-register. |
| + if (bytes_per_lane == kPointerSize) { |
| + if (s_code < SwVfpRegister::kMaxNumRegisters) { |
| + vmov(SwVfpRegister::from_code(s_code), src_lane); |
| + } else { |
| + VmovExtended(s_code, src_lane); |
| + } |
| + return; |
| + } |
| + // Move the s-register containing the lane to replace into scratch register. |
| + if (s_code < SwVfpRegister::kMaxNumRegisters) { |
| + vmov(scratch, SwVfpRegister::from_code(s_code)); |
| + } else { |
| + VmovExtended(scratch, s_code); |
| + } |
| + // Combine scratch with src_lane, shifted into position. |
| + int width = bytes_per_lane * kBitsPerByte; |
| + int lsb = (byte % kPointerSize) * kBitsPerByte; |
| + bfi(scratch, src_lane, lsb, width); |
| + if (s_code < SwVfpRegister::kMaxNumRegisters) { |
| + vmov(SwVfpRegister::from_code(s_code), scratch); |
| + } else { |
| + VmovExtended(s_code, scratch); |
| + } |
| +} |
| + |
| +void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
| + SwVfpRegister src_lane, Register scratch, |
| + int lane) { |
| + Move(dst, src); |
| + int s_code = dst.code() * 4 + lane; |
| + VmovExtended(s_code, src_lane.code(), scratch); |
| +} |
| + |
| void MacroAssembler::LslPair(Register dst_low, Register dst_high, |
| Register src_low, Register src_high, |
| Register scratch, Register shift) { |