Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1146)

Unified Diff: src/arm/macro-assembler-arm.cc

Issue 2868603002: [ARM] Improve VFP register moves. (Closed)
Patch Set: Rebase. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/arm/macro-assembler-arm.cc
diff --git a/src/arm/macro-assembler-arm.cc b/src/arm/macro-assembler-arm.cc
index 30fb52729d0df377afc7b0404d0e81d2f94890e4..6f123934140fb2830f73501ce9d566c6418cceea 100644
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -1079,47 +1079,90 @@ void MacroAssembler::VmovExtended(int dst_code, Register src) {
}
}
-void MacroAssembler::VmovExtended(int dst_code, int src_code,
- Register scratch) {
+void MacroAssembler::VmovExtended(int dst_code, int src_code) {
+ if (src_code == dst_code) return;
+
if (src_code < SwVfpRegister::kMaxNumRegisters &&
dst_code < SwVfpRegister::kMaxNumRegisters) {
// src and dst are both s-registers.
vmov(SwVfpRegister::from_code(dst_code),
SwVfpRegister::from_code(src_code));
- } else if (src_code < SwVfpRegister::kMaxNumRegisters) {
- // src is an s-register.
- vmov(scratch, SwVfpRegister::from_code(src_code));
- VmovExtended(dst_code, scratch);
+ return;
+ }
+ DwVfpRegister dst_d_reg = DwVfpRegister::from_code(dst_code / 2);
+ DwVfpRegister src_d_reg = DwVfpRegister::from_code(src_code / 2);
+ int dst_offset = dst_code & 1;
+ int src_offset = src_code & 1;
+ if (CpuFeatures::IsSupported(NEON)) {
+ // On Neon we can shift and insert from d-registers.
+ if (src_offset == dst_offset) {
+ // Offsets are the same, use vdup to copy the source to the opposite lane.
+ vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset);
+ src_d_reg = kScratchDoubleReg;
+ src_offset = dst_offset ^ 1;
+ }
+ if (dst_offset) {
+ if (dst_d_reg.is(src_d_reg)) {
+ vdup(Neon32, dst_d_reg, src_d_reg, 0);
+ } else {
+ vsli(Neon64, dst_d_reg, src_d_reg, 32);
+ }
+ } else {
+ if (dst_d_reg.is(src_d_reg)) {
+ vdup(Neon32, dst_d_reg, src_d_reg, 1);
+ } else {
+ vsri(Neon64, dst_d_reg, src_d_reg, 32);
+ }
+ }
+ return;
+ }
+
+ // Without Neon, use the scratch registers to move src and/or dst into
+ // s-registers.
+ int scratchSCode = kScratchDoubleReg.low().code();
+ int scratchSCode2 = kScratchDoubleReg2.low().code();
+ if (src_code < SwVfpRegister::kMaxNumRegisters) {
+ // src is an s-register, dst is not.
+ vmov(kScratchDoubleReg, dst_d_reg);
+ vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
+ SwVfpRegister::from_code(src_code));
+ vmov(dst_d_reg, kScratchDoubleReg);
} else if (dst_code < SwVfpRegister::kMaxNumRegisters) {
- // dst is an s-register.
- VmovExtended(scratch, src_code);
- vmov(SwVfpRegister::from_code(dst_code), scratch);
+ // dst is an s-register, src is not.
+ vmov(kScratchDoubleReg, src_d_reg);
+ vmov(SwVfpRegister::from_code(dst_code),
+ SwVfpRegister::from_code(scratchSCode + src_offset));
} else {
- // Neither src or dst are s-registers.
- DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code);
- DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
- VmovExtended(scratch, src_code);
- VmovExtended(dst_code, scratch);
+ // Neither src or dst are s-registers. Both scratch double registers are
+ // available when there are 32 VFP registers.
+ vmov(kScratchDoubleReg, src_d_reg);
+ vmov(kScratchDoubleReg2, dst_d_reg);
+ vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
+ SwVfpRegister::from_code(scratchSCode2 + src_offset));
+ vmov(dst_d_reg, kScratchQuadReg.high());
}
}
-void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
- Register scratch) {
- if (dst_code >= SwVfpRegister::kMaxNumRegisters) {
- ldr(scratch, src);
- VmovExtended(dst_code, scratch);
- } else {
+void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src) {
+ if (dst_code < SwVfpRegister::kMaxNumRegisters) {
vldr(SwVfpRegister::from_code(dst_code), src);
+ } else {
+ // TODO(bbudge) If Neon supported, use load single lane form of vld1.
+ int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1);
+ vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2));
+ vldr(SwVfpRegister::from_code(dst_s_code), src);
+ vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg);
}
}
-void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
- Register scratch) {
- if (src_code >= SwVfpRegister::kMaxNumRegisters) {
- VmovExtended(scratch, src_code);
- str(scratch, dst);
- } else {
+void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code) {
+ if (src_code < SwVfpRegister::kMaxNumRegisters) {
vstr(SwVfpRegister::from_code(src_code), dst);
+ } else {
+ // TODO(bbudge) If Neon supported, use store single lane form of vst1.
+ int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1);
+ vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2));
+ vstr(SwVfpRegister::from_code(src_s_code), dst);
}
}
@@ -1145,9 +1188,9 @@ void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src,
}
void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
- Register scratch, int lane) {
+ int lane) {
int s_code = src.code() * 4 + lane;
- VmovExtended(dst.code(), s_code, scratch);
+ VmovExtended(dst.code(), s_code);
}
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
@@ -1164,11 +1207,10 @@ void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
}
void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
- SwVfpRegister src_lane, Register scratch,
- int lane) {
+ SwVfpRegister src_lane, int lane) {
Move(dst, src);
int s_code = dst.code() * 4 + lane;
- VmovExtended(s_code, src_lane.code(), scratch);
+ VmovExtended(s_code, src_lane.code());
}
void MacroAssembler::LslPair(Register dst_low, Register dst_high,
« no previous file with comments | « src/arm/macro-assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698