Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 2868603002: [ARM] Improve VFP register moves. (Closed)
Patch Set: Rebase. Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <limits.h> // For LONG_MIN, LONG_MAX. 5 #include <limits.h> // For LONG_MIN, LONG_MAX.
6 6
7 #if V8_TARGET_ARCH_ARM 7 #if V8_TARGET_ARCH_ARM
8 8
9 #include "src/assembler-inl.h" 9 #include "src/assembler-inl.h"
10 #include "src/base/bits.h" 10 #include "src/base/bits.h"
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after
1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { 1072 void MacroAssembler::VmovExtended(int dst_code, Register src) {
1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); 1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code);
1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); 1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
1075 if (dst_code & 0x1) { 1075 if (dst_code & 0x1) {
1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); 1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
1077 } else { 1077 } else {
1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); 1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
1079 } 1079 }
1080 } 1080 }
1081 1081
1082 void MacroAssembler::VmovExtended(int dst_code, int src_code, 1082 void MacroAssembler::VmovExtended(int dst_code, int src_code) {
1083 Register scratch) { 1083 if (src_code == dst_code) return;
1084
1084 if (src_code < SwVfpRegister::kMaxNumRegisters && 1085 if (src_code < SwVfpRegister::kMaxNumRegisters &&
1085 dst_code < SwVfpRegister::kMaxNumRegisters) { 1086 dst_code < SwVfpRegister::kMaxNumRegisters) {
1086 // src and dst are both s-registers. 1087 // src and dst are both s-registers.
1087 vmov(SwVfpRegister::from_code(dst_code), 1088 vmov(SwVfpRegister::from_code(dst_code),
1088 SwVfpRegister::from_code(src_code)); 1089 SwVfpRegister::from_code(src_code));
1089 } else if (src_code < SwVfpRegister::kMaxNumRegisters) { 1090 return;
1090 // src is an s-register. 1091 }
1091 vmov(scratch, SwVfpRegister::from_code(src_code)); 1092 DwVfpRegister dst_d_reg = DwVfpRegister::from_code(dst_code / 2);
1092 VmovExtended(dst_code, scratch); 1093 DwVfpRegister src_d_reg = DwVfpRegister::from_code(src_code / 2);
1094 int dst_offset = dst_code & 1;
1095 int src_offset = src_code & 1;
1096 if (CpuFeatures::IsSupported(NEON)) {
1097 // On Neon we can shift and insert from d-registers.
1098 if (src_offset == dst_offset) {
1099 // Offsets are the same, use vdup to copy the source to the opposite lane.
1100 vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset);
1101 src_d_reg = kScratchDoubleReg;
1102 src_offset = dst_offset ^ 1;
1103 }
1104 if (dst_offset) {
1105 if (dst_d_reg.is(src_d_reg)) {
1106 vdup(Neon32, dst_d_reg, src_d_reg, 0);
1107 } else {
1108 vsli(Neon64, dst_d_reg, src_d_reg, 32);
1109 }
1110 } else {
1111 if (dst_d_reg.is(src_d_reg)) {
1112 vdup(Neon32, dst_d_reg, src_d_reg, 1);
1113 } else {
1114 vsri(Neon64, dst_d_reg, src_d_reg, 32);
1115 }
1116 }
1117 return;
1118 }
1119
1120 // Without Neon, use the scratch registers to move src and/or dst into
1121 // s-registers.
1122 int scratchSCode = kScratchDoubleReg.low().code();
1123 int scratchSCode2 = kScratchDoubleReg2.low().code();
1124 if (src_code < SwVfpRegister::kMaxNumRegisters) {
1125 // src is an s-register, dst is not.
1126 vmov(kScratchDoubleReg, dst_d_reg);
1127 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
1128 SwVfpRegister::from_code(src_code));
1129 vmov(dst_d_reg, kScratchDoubleReg);
1093 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { 1130 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) {
1094 // dst is an s-register. 1131 // dst is an s-register, src is not.
1095 VmovExtended(scratch, src_code); 1132 vmov(kScratchDoubleReg, src_d_reg);
1096 vmov(SwVfpRegister::from_code(dst_code), scratch); 1133 vmov(SwVfpRegister::from_code(dst_code),
1134 SwVfpRegister::from_code(scratchSCode + src_offset));
1097 } else { 1135 } else {
1098 // Neither src or dst are s-registers. 1136 // Neither src or dst are s-registers. Both scratch double registers are
1099 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); 1137 // available when there are 32 VFP registers.
1100 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); 1138 vmov(kScratchDoubleReg, src_d_reg);
1101 VmovExtended(scratch, src_code); 1139 vmov(kScratchDoubleReg2, dst_d_reg);
1102 VmovExtended(dst_code, scratch); 1140 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset),
1141 SwVfpRegister::from_code(scratchSCode2 + src_offset));
1142 vmov(dst_d_reg, kScratchQuadReg.high());
1103 } 1143 }
1104 } 1144 }
1105 1145
1106 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, 1146 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src) {
1107 Register scratch) { 1147 if (dst_code < SwVfpRegister::kMaxNumRegisters) {
1108 if (dst_code >= SwVfpRegister::kMaxNumRegisters) { 1148 vldr(SwVfpRegister::from_code(dst_code), src);
1109 ldr(scratch, src);
1110 VmovExtended(dst_code, scratch);
1111 } else { 1149 } else {
1112 vldr(SwVfpRegister::from_code(dst_code), src); 1150 // TODO(bbudge) If Neon supported, use load single lane form of vld1.
1151 int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1);
1152 vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2));
1153 vldr(SwVfpRegister::from_code(dst_s_code), src);
1154 vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg);
1113 } 1155 }
1114 } 1156 }
1115 1157
1116 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, 1158 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code) {
1117 Register scratch) { 1159 if (src_code < SwVfpRegister::kMaxNumRegisters) {
1118 if (src_code >= SwVfpRegister::kMaxNumRegisters) { 1160 vstr(SwVfpRegister::from_code(src_code), dst);
1119 VmovExtended(scratch, src_code);
1120 str(scratch, dst);
1121 } else { 1161 } else {
1122 vstr(SwVfpRegister::from_code(src_code), dst); 1162 // TODO(bbudge) If Neon supported, use store single lane form of vst1.
1163 int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1);
1164 vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2));
1165 vstr(SwVfpRegister::from_code(src_s_code), dst);
1123 } 1166 }
1124 } 1167 }
1125 1168
1126 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, 1169 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
1127 NeonDataType dt, int lane) { 1170 NeonDataType dt, int lane) {
1128 int size = NeonSz(dt); // 0, 1, 2 1171 int size = NeonSz(dt); // 0, 1, 2
1129 int byte = lane << size; 1172 int byte = lane << size;
1130 int double_word = byte >> kDoubleSizeLog2; 1173 int double_word = byte >> kDoubleSizeLog2;
1131 int double_byte = byte & (kDoubleSize - 1); 1174 int double_byte = byte & (kDoubleSize - 1);
1132 int double_lane = double_byte >> size; 1175 int double_lane = double_byte >> size;
1133 DwVfpRegister double_source = 1176 DwVfpRegister double_source =
1134 DwVfpRegister::from_code(src.code() * 2 + double_word); 1177 DwVfpRegister::from_code(src.code() * 2 + double_word);
1135 vmov(dt, dst, double_source, double_lane); 1178 vmov(dt, dst, double_source, double_lane);
1136 } 1179 }
1137 1180
1138 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, 1181 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src,
1139 NeonDataType dt, int lane) { 1182 NeonDataType dt, int lane) {
1140 int size = NeonSz(dt); // 0, 1, 2 1183 int size = NeonSz(dt); // 0, 1, 2
1141 int byte = lane << size; 1184 int byte = lane << size;
1142 int double_byte = byte & (kDoubleSize - 1); 1185 int double_byte = byte & (kDoubleSize - 1);
1143 int double_lane = double_byte >> size; 1186 int double_lane = double_byte >> size;
1144 vmov(dt, dst, src, double_lane); 1187 vmov(dt, dst, src, double_lane);
1145 } 1188 }
1146 1189
1147 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, 1190 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
1148 Register scratch, int lane) { 1191 int lane) {
1149 int s_code = src.code() * 4 + lane; 1192 int s_code = src.code() * 4 + lane;
1150 VmovExtended(dst.code(), s_code, scratch); 1193 VmovExtended(dst.code(), s_code);
1151 } 1194 }
1152 1195
1153 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, 1196 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1154 Register src_lane, NeonDataType dt, int lane) { 1197 Register src_lane, NeonDataType dt, int lane) {
1155 Move(dst, src); 1198 Move(dst, src);
1156 int size = NeonSz(dt); // 0, 1, 2 1199 int size = NeonSz(dt); // 0, 1, 2
1157 int byte = lane << size; 1200 int byte = lane << size;
1158 int double_word = byte >> kDoubleSizeLog2; 1201 int double_word = byte >> kDoubleSizeLog2;
1159 int double_byte = byte & (kDoubleSize - 1); 1202 int double_byte = byte & (kDoubleSize - 1);
1160 int double_lane = double_byte >> size; 1203 int double_lane = double_byte >> size;
1161 DwVfpRegister double_dst = 1204 DwVfpRegister double_dst =
1162 DwVfpRegister::from_code(dst.code() * 2 + double_word); 1205 DwVfpRegister::from_code(dst.code() * 2 + double_word);
1163 vmov(dt, double_dst, double_lane, src_lane); 1206 vmov(dt, double_dst, double_lane, src_lane);
1164 } 1207 }
1165 1208
1166 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, 1209 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1167 SwVfpRegister src_lane, Register scratch, 1210 SwVfpRegister src_lane, int lane) {
1168 int lane) {
1169 Move(dst, src); 1211 Move(dst, src);
1170 int s_code = dst.code() * 4 + lane; 1212 int s_code = dst.code() * 4 + lane;
1171 VmovExtended(s_code, src_lane.code(), scratch); 1213 VmovExtended(s_code, src_lane.code());
1172 } 1214 }
1173 1215
1174 void MacroAssembler::LslPair(Register dst_low, Register dst_high, 1216 void MacroAssembler::LslPair(Register dst_low, Register dst_high,
1175 Register src_low, Register src_high, 1217 Register src_low, Register src_high,
1176 Register scratch, Register shift) { 1218 Register scratch, Register shift) {
1177 DCHECK(!AreAliased(dst_high, src_low)); 1219 DCHECK(!AreAliased(dst_high, src_low));
1178 DCHECK(!AreAliased(dst_high, shift)); 1220 DCHECK(!AreAliased(dst_high, shift));
1179 1221
1180 Label less_than_32; 1222 Label less_than_32;
1181 Label done; 1223 Label done;
(...skipping 2495 matching lines...) Expand 10 before | Expand all | Expand 10 after
3677 } 3719 }
3678 } 3720 }
3679 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); 3721 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift));
3680 add(result, result, Operand(dividend, LSR, 31)); 3722 add(result, result, Operand(dividend, LSR, 31));
3681 } 3723 }
3682 3724
3683 } // namespace internal 3725 } // namespace internal
3684 } // namespace v8 3726 } // namespace v8
3685 3727
3686 #endif // V8_TARGET_ARCH_ARM 3728 #endif // V8_TARGET_ARCH_ARM
OLDNEW
« no previous file with comments | « src/arm/macro-assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698