Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 2868603002: [ARM] Improve VFP register moves. (Closed)
Patch Set: Clean up, renaming. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <limits.h> // For LONG_MIN, LONG_MAX. 5 #include <limits.h> // For LONG_MIN, LONG_MAX.
6 6
7 #if V8_TARGET_ARCH_ARM 7 #if V8_TARGET_ARCH_ARM
8 8
9 #include "src/assembler-inl.h" 9 #include "src/assembler-inl.h"
10 #include "src/base/bits.h" 10 #include "src/base/bits.h"
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after
1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { 1072 void MacroAssembler::VmovExtended(int dst_code, Register src) {
1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); 1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code);
1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); 1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
1075 if (dst_code & 0x1) { 1075 if (dst_code & 0x1) {
1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); 1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
1077 } else { 1077 } else {
1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); 1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
1079 } 1079 }
1080 } 1080 }
1081 1081
1082 void MacroAssembler::VmovExtended(int dst_code, int src_code, 1082 void MacroAssembler::VmovExtended(int dst_code, int src_code) {
1083 Register scratch) {
1084 if (src_code < SwVfpRegister::kMaxNumRegisters && 1083 if (src_code < SwVfpRegister::kMaxNumRegisters &&
1085 dst_code < SwVfpRegister::kMaxNumRegisters) { 1084 dst_code < SwVfpRegister::kMaxNumRegisters) {
1086 // src and dst are both s-registers. 1085 // src and dst are both s-registers.
1087 vmov(SwVfpRegister::from_code(dst_code), 1086 vmov(SwVfpRegister::from_code(dst_code),
1088 SwVfpRegister::from_code(src_code)); 1087 SwVfpRegister::from_code(src_code));
1089 } else if (src_code < SwVfpRegister::kMaxNumRegisters) { 1088 return;
1090 // src is an s-register. 1089 }
1091 vmov(scratch, SwVfpRegister::from_code(src_code)); 1090 DwVfpRegister dst_d_reg = DwVfpRegister::from_code(dst_code / 2);
1092 VmovExtended(dst_code, scratch); 1091 DwVfpRegister src_d_reg = DwVfpRegister::from_code(src_code / 2);
1092 int dst_offset = dst_code & 1;
1093 int src_offset = src_code & 1;
1094 if (CpuFeatures::IsSupported(NEON)) {
1095 // On Neon we can shift and insert from d-registers.
1096 if (src_offset == dst_offset) {
1097 // Offsets are the same, use vdup to copy the source to the opposite lane.
1098 vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset);
1099 src_d_reg = kScratchDoubleReg;
1100 src_offset = dst_offset ^ 1;
1101 }
martyn.capewell 2017/05/10 12:38:06 There's a tiny optimisation here: as vdup is sligh
bbudge 2017/05/10 17:54:52 Nice. Done.
1102 if (dst_offset) {
1103 vsli(Neon64, dst_d_reg, src_d_reg, 32);
1104 } else {
1105 vsri(Neon64, dst_d_reg, src_d_reg, 32);
1106 }
1107 return;
1108 }
1109
1110 // Without Neon, use the scratch registers to move src and/or dst into
1111 // s-registers.
1112 int scratchSCode = kScratchDoubleReg.low().code();
1113 int scratchSCode2 = kScratchDoubleReg2.low().code();
1114 if (src_code < SwVfpRegister::kMaxNumRegisters) {
1115 // src is an s-register, dst is not.
1116 vmov(kScratchDoubleReg, dst_d_reg);
1117 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
1118 SwVfpRegister::from_code(src_code));
1119 vmov(dst_d_reg, kScratchDoubleReg);
1093 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { 1120 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) {
1094 // dst is an s-register. 1121 // dst is an s-register, src is not.
1095 VmovExtended(scratch, src_code); 1122 vmov(kScratchDoubleReg, src_d_reg);
1096 vmov(SwVfpRegister::from_code(dst_code), scratch); 1123 vmov(SwVfpRegister::from_code(dst_code),
1124 SwVfpRegister::from_code(scratchSCode + src_offset));
1097 } else { 1125 } else {
1098 // Neither src or dst are s-registers. 1126 // Neither src or dst are s-registers. Both scratch double registers are
1099 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); 1127 // available when there are 32 VFP registers.
1100 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); 1128 vmov(kScratchDoubleReg, src_d_reg);
1101 VmovExtended(scratch, src_code); 1129 vmov(kScratchDoubleReg2, dst_d_reg);
1102 VmovExtended(dst_code, scratch); 1130 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
1131 SwVfpRegister::from_code(scratchSCode2 + src_offset));
1132 vmov(dst_d_reg, kScratchQuadReg.high());
1103 } 1133 }
1104 } 1134 }
1105 1135
1106 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, 1136 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src) {
1107 Register scratch) { 1137 if (dst_code < SwVfpRegister::kMaxNumRegisters) {
1108 if (dst_code >= SwVfpRegister::kMaxNumRegisters) { 1138 vldr(SwVfpRegister::from_code(dst_code), src);
1109 ldr(scratch, src);
1110 VmovExtended(dst_code, scratch);
1111 } else { 1139 } else {
1112 vldr(SwVfpRegister::from_code(dst_code), src); 1140 int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1);
martyn.capewell 2017/05/10 12:38:06 When supported, this can use the "single element t
bbudge 2017/05/10 17:54:52 Added TODO. Thanks.
1141 vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2));
1142 vldr(SwVfpRegister::from_code(dst_s_code), src);
1143 vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg);
1113 } 1144 }
1114 } 1145 }
1115 1146
1116 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, 1147 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code) {
1117 Register scratch) { 1148 if (src_code < SwVfpRegister::kMaxNumRegisters) {
1118 if (src_code >= SwVfpRegister::kMaxNumRegisters) { 1149 vstr(SwVfpRegister::from_code(src_code), dst);
1119 VmovExtended(scratch, src_code);
1120 str(scratch, dst);
1121 } else { 1150 } else {
1122 vstr(SwVfpRegister::from_code(src_code), dst); 1151 int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1);
1152 vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2));
1153 vstr(SwVfpRegister::from_code(src_s_code), dst);
1123 } 1154 }
1124 } 1155 }
1125 1156
1126 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, 1157 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
1127 NeonDataType dt, int lane) { 1158 NeonDataType dt, int lane) {
1128 int size = NeonSz(dt); // 0, 1, 2 1159 int size = NeonSz(dt); // 0, 1, 2
1129 int byte = lane << size; 1160 int byte = lane << size;
1130 int double_word = byte >> kDoubleSizeLog2; 1161 int double_word = byte >> kDoubleSizeLog2;
1131 int double_byte = byte & (kDoubleSize - 1); 1162 int double_byte = byte & (kDoubleSize - 1);
1132 int double_lane = double_byte >> size; 1163 int double_lane = double_byte >> size;
1133 DwVfpRegister double_source = 1164 DwVfpRegister double_source =
1134 DwVfpRegister::from_code(src.code() * 2 + double_word); 1165 DwVfpRegister::from_code(src.code() * 2 + double_word);
1135 vmov(dt, dst, double_source, double_lane); 1166 vmov(dt, dst, double_source, double_lane);
1136 } 1167 }
1137 1168
1138 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, 1169 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src,
1139 NeonDataType dt, int lane) { 1170 NeonDataType dt, int lane) {
1140 int size = NeonSz(dt); // 0, 1, 2 1171 int size = NeonSz(dt); // 0, 1, 2
1141 int byte = lane << size; 1172 int byte = lane << size;
1142 int double_byte = byte & (kDoubleSize - 1); 1173 int double_byte = byte & (kDoubleSize - 1);
1143 int double_lane = double_byte >> size; 1174 int double_lane = double_byte >> size;
1144 vmov(dt, dst, src, double_lane); 1175 vmov(dt, dst, src, double_lane);
1145 } 1176 }
1146 1177
1147 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, 1178 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
1148 Register scratch, int lane) { 1179 int lane) {
1149 int s_code = src.code() * 4 + lane; 1180 int s_code = src.code() * 4 + lane;
1150 VmovExtended(dst.code(), s_code, scratch); 1181 VmovExtended(dst.code(), s_code);
1151 } 1182 }
1152 1183
1153 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, 1184 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1154 Register src_lane, NeonDataType dt, int lane) { 1185 Register src_lane, NeonDataType dt, int lane) {
1155 Move(dst, src); 1186 Move(dst, src);
1156 int size = NeonSz(dt); // 0, 1, 2 1187 int size = NeonSz(dt); // 0, 1, 2
1157 int byte = lane << size; 1188 int byte = lane << size;
1158 int double_word = byte >> kDoubleSizeLog2; 1189 int double_word = byte >> kDoubleSizeLog2;
1159 int double_byte = byte & (kDoubleSize - 1); 1190 int double_byte = byte & (kDoubleSize - 1);
1160 int double_lane = double_byte >> size; 1191 int double_lane = double_byte >> size;
1161 DwVfpRegister double_dst = 1192 DwVfpRegister double_dst =
1162 DwVfpRegister::from_code(dst.code() * 2 + double_word); 1193 DwVfpRegister::from_code(dst.code() * 2 + double_word);
1163 vmov(dt, double_dst, double_lane, src_lane); 1194 vmov(dt, double_dst, double_lane, src_lane);
1164 } 1195 }
1165 1196
1166 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, 1197 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1167 SwVfpRegister src_lane, Register scratch, 1198 SwVfpRegister src_lane, int lane) {
1168 int lane) {
1169 Move(dst, src); 1199 Move(dst, src);
1170 int s_code = dst.code() * 4 + lane; 1200 int s_code = dst.code() * 4 + lane;
1171 VmovExtended(s_code, src_lane.code(), scratch); 1201 VmovExtended(s_code, src_lane.code());
1172 } 1202 }
1173 1203
1174 void MacroAssembler::LslPair(Register dst_low, Register dst_high, 1204 void MacroAssembler::LslPair(Register dst_low, Register dst_high,
1175 Register src_low, Register src_high, 1205 Register src_low, Register src_high,
1176 Register scratch, Register shift) { 1206 Register scratch, Register shift) {
1177 DCHECK(!AreAliased(dst_high, src_low)); 1207 DCHECK(!AreAliased(dst_high, src_low));
1178 DCHECK(!AreAliased(dst_high, shift)); 1208 DCHECK(!AreAliased(dst_high, shift));
1179 1209
1180 Label less_than_32; 1210 Label less_than_32;
1181 Label done; 1211 Label done;
(...skipping 2495 matching lines...) Expand 10 before | Expand all | Expand 10 after
3677 } 3707 }
3678 } 3708 }
3679 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); 3709 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift));
3680 add(result, result, Operand(dividend, LSR, 31)); 3710 add(result, result, Operand(dividend, LSR, 31));
3681 } 3711 }
3682 3712
3683 } // namespace internal 3713 } // namespace internal
3684 } // namespace v8 3714 } // namespace v8
3685 3715
3686 #endif // V8_TARGET_ARCH_ARM 3716 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698