OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <limits.h> // For LONG_MIN, LONG_MAX. | 5 #include <limits.h> // For LONG_MIN, LONG_MAX. |
6 | 6 |
7 #if V8_TARGET_ARCH_ARM | 7 #if V8_TARGET_ARCH_ARM |
8 | 8 |
9 #include "src/assembler-inl.h" | 9 #include "src/assembler-inl.h" |
10 #include "src/base/bits.h" | 10 #include "src/base/bits.h" |
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { | 1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { |
1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); | 1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); |
1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); | 1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
1075 if (dst_code & 0x1) { | 1075 if (dst_code & 0x1) { |
1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); | 1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); |
1077 } else { | 1077 } else { |
1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); | 1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); |
1079 } | 1079 } |
1080 } | 1080 } |
1081 | 1081 |
1082 void MacroAssembler::VmovExtended(int dst_code, int src_code, | 1082 void MacroAssembler::VmovExtended(int dst_code, int src_code) { |
1083 Register scratch) { | 1083 if (src_code == dst_code) return; |
| 1084 |
1084 if (src_code < SwVfpRegister::kMaxNumRegisters && | 1085 if (src_code < SwVfpRegister::kMaxNumRegisters && |
1085 dst_code < SwVfpRegister::kMaxNumRegisters) { | 1086 dst_code < SwVfpRegister::kMaxNumRegisters) { |
1086 // src and dst are both s-registers. | 1087 // src and dst are both s-registers. |
1087 vmov(SwVfpRegister::from_code(dst_code), | 1088 vmov(SwVfpRegister::from_code(dst_code), |
1088 SwVfpRegister::from_code(src_code)); | 1089 SwVfpRegister::from_code(src_code)); |
1089 } else if (src_code < SwVfpRegister::kMaxNumRegisters) { | 1090 return; |
1090 // src is an s-register. | 1091 } |
1091 vmov(scratch, SwVfpRegister::from_code(src_code)); | 1092 DwVfpRegister dst_d_reg = DwVfpRegister::from_code(dst_code / 2); |
1092 VmovExtended(dst_code, scratch); | 1093 DwVfpRegister src_d_reg = DwVfpRegister::from_code(src_code / 2); |
| 1094 int dst_offset = dst_code & 1; |
| 1095 int src_offset = src_code & 1; |
| 1096 if (CpuFeatures::IsSupported(NEON)) { |
| 1097 // On Neon we can shift and insert from d-registers. |
| 1098 if (src_offset == dst_offset) { |
| 1099 // Offsets are the same, use vdup to copy the source to the opposite lane. |
| 1100 vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset); |
| 1101 src_d_reg = kScratchDoubleReg; |
| 1102 src_offset = dst_offset ^ 1; |
| 1103 } |
| 1104 if (dst_offset) { |
| 1105 if (dst_d_reg.is(src_d_reg)) { |
| 1106 vdup(Neon32, dst_d_reg, src_d_reg, 0); |
| 1107 } else { |
| 1108 vsli(Neon64, dst_d_reg, src_d_reg, 32); |
| 1109 } |
| 1110 } else { |
| 1111 if (dst_d_reg.is(src_d_reg)) { |
| 1112 vdup(Neon32, dst_d_reg, src_d_reg, 1); |
| 1113 } else { |
| 1114 vsri(Neon64, dst_d_reg, src_d_reg, 32); |
| 1115 } |
| 1116 } |
| 1117 return; |
| 1118 } |
| 1119 |
| 1120 // Without Neon, use the scratch registers to move src and/or dst into |
| 1121 // s-registers. |
| 1122 int scratchSCode = kScratchDoubleReg.low().code(); |
| 1123 int scratchSCode2 = kScratchDoubleReg2.low().code(); |
| 1124 if (src_code < SwVfpRegister::kMaxNumRegisters) { |
| 1125 // src is an s-register, dst is not. |
| 1126 vmov(kScratchDoubleReg, dst_d_reg); |
| 1127 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset), |
| 1128 SwVfpRegister::from_code(src_code)); |
| 1129 vmov(dst_d_reg, kScratchDoubleReg); |
1093 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { | 1130 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
1094 // dst is an s-register. | 1131 // dst is an s-register, src is not. |
1095 VmovExtended(scratch, src_code); | 1132 vmov(kScratchDoubleReg, src_d_reg); |
1096 vmov(SwVfpRegister::from_code(dst_code), scratch); | 1133 vmov(SwVfpRegister::from_code(dst_code), |
| 1134 SwVfpRegister::from_code(scratchSCode + src_offset)); |
1097 } else { | 1135 } else { |
1098 // Neither src or dst are s-registers. | 1136 // Neither src or dst are s-registers. Both scratch double registers are |
1099 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); | 1137 // available when there are 32 VFP registers. |
1100 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); | 1138 vmov(kScratchDoubleReg, src_d_reg); |
1101 VmovExtended(scratch, src_code); | 1139 vmov(kScratchDoubleReg2, dst_d_reg); |
1102 VmovExtended(dst_code, scratch); | 1140 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset), |
| 1141 SwVfpRegister::from_code(scratchSCode2 + src_offset)); |
| 1142 vmov(dst_d_reg, kScratchQuadReg.high()); |
1103 } | 1143 } |
1104 } | 1144 } |
1105 | 1145 |
1106 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, | 1146 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src) { |
1107 Register scratch) { | 1147 if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
1108 if (dst_code >= SwVfpRegister::kMaxNumRegisters) { | 1148 vldr(SwVfpRegister::from_code(dst_code), src); |
1109 ldr(scratch, src); | |
1110 VmovExtended(dst_code, scratch); | |
1111 } else { | 1149 } else { |
1112 vldr(SwVfpRegister::from_code(dst_code), src); | 1150 // TODO(bbudge) If Neon supported, use load single lane form of vld1. |
| 1151 int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1); |
| 1152 vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2)); |
| 1153 vldr(SwVfpRegister::from_code(dst_s_code), src); |
| 1154 vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg); |
1113 } | 1155 } |
1114 } | 1156 } |
1115 | 1157 |
1116 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, | 1158 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code) { |
1117 Register scratch) { | 1159 if (src_code < SwVfpRegister::kMaxNumRegisters) { |
1118 if (src_code >= SwVfpRegister::kMaxNumRegisters) { | 1160 vstr(SwVfpRegister::from_code(src_code), dst); |
1119 VmovExtended(scratch, src_code); | |
1120 str(scratch, dst); | |
1121 } else { | 1161 } else { |
1122 vstr(SwVfpRegister::from_code(src_code), dst); | 1162 // TODO(bbudge) If Neon supported, use store single lane form of vst1. |
| 1163 int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1); |
| 1164 vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2)); |
| 1165 vstr(SwVfpRegister::from_code(src_s_code), dst); |
1123 } | 1166 } |
1124 } | 1167 } |
1125 | 1168 |
1126 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, | 1169 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, |
1127 NeonDataType dt, int lane) { | 1170 NeonDataType dt, int lane) { |
1128 int size = NeonSz(dt); // 0, 1, 2 | 1171 int size = NeonSz(dt); // 0, 1, 2 |
1129 int byte = lane << size; | 1172 int byte = lane << size; |
1130 int double_word = byte >> kDoubleSizeLog2; | 1173 int double_word = byte >> kDoubleSizeLog2; |
1131 int double_byte = byte & (kDoubleSize - 1); | 1174 int double_byte = byte & (kDoubleSize - 1); |
1132 int double_lane = double_byte >> size; | 1175 int double_lane = double_byte >> size; |
1133 DwVfpRegister double_source = | 1176 DwVfpRegister double_source = |
1134 DwVfpRegister::from_code(src.code() * 2 + double_word); | 1177 DwVfpRegister::from_code(src.code() * 2 + double_word); |
1135 vmov(dt, dst, double_source, double_lane); | 1178 vmov(dt, dst, double_source, double_lane); |
1136 } | 1179 } |
1137 | 1180 |
1138 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, | 1181 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, |
1139 NeonDataType dt, int lane) { | 1182 NeonDataType dt, int lane) { |
1140 int size = NeonSz(dt); // 0, 1, 2 | 1183 int size = NeonSz(dt); // 0, 1, 2 |
1141 int byte = lane << size; | 1184 int byte = lane << size; |
1142 int double_byte = byte & (kDoubleSize - 1); | 1185 int double_byte = byte & (kDoubleSize - 1); |
1143 int double_lane = double_byte >> size; | 1186 int double_lane = double_byte >> size; |
1144 vmov(dt, dst, src, double_lane); | 1187 vmov(dt, dst, src, double_lane); |
1145 } | 1188 } |
1146 | 1189 |
1147 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, | 1190 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, |
1148 Register scratch, int lane) { | 1191 int lane) { |
1149 int s_code = src.code() * 4 + lane; | 1192 int s_code = src.code() * 4 + lane; |
1150 VmovExtended(dst.code(), s_code, scratch); | 1193 VmovExtended(dst.code(), s_code); |
1151 } | 1194 } |
1152 | 1195 |
1153 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | 1196 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
1154 Register src_lane, NeonDataType dt, int lane) { | 1197 Register src_lane, NeonDataType dt, int lane) { |
1155 Move(dst, src); | 1198 Move(dst, src); |
1156 int size = NeonSz(dt); // 0, 1, 2 | 1199 int size = NeonSz(dt); // 0, 1, 2 |
1157 int byte = lane << size; | 1200 int byte = lane << size; |
1158 int double_word = byte >> kDoubleSizeLog2; | 1201 int double_word = byte >> kDoubleSizeLog2; |
1159 int double_byte = byte & (kDoubleSize - 1); | 1202 int double_byte = byte & (kDoubleSize - 1); |
1160 int double_lane = double_byte >> size; | 1203 int double_lane = double_byte >> size; |
1161 DwVfpRegister double_dst = | 1204 DwVfpRegister double_dst = |
1162 DwVfpRegister::from_code(dst.code() * 2 + double_word); | 1205 DwVfpRegister::from_code(dst.code() * 2 + double_word); |
1163 vmov(dt, double_dst, double_lane, src_lane); | 1206 vmov(dt, double_dst, double_lane, src_lane); |
1164 } | 1207 } |
1165 | 1208 |
1166 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | 1209 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
1167 SwVfpRegister src_lane, Register scratch, | 1210 SwVfpRegister src_lane, int lane) { |
1168 int lane) { | |
1169 Move(dst, src); | 1211 Move(dst, src); |
1170 int s_code = dst.code() * 4 + lane; | 1212 int s_code = dst.code() * 4 + lane; |
1171 VmovExtended(s_code, src_lane.code(), scratch); | 1213 VmovExtended(s_code, src_lane.code()); |
1172 } | 1214 } |
1173 | 1215 |
1174 void MacroAssembler::LslPair(Register dst_low, Register dst_high, | 1216 void MacroAssembler::LslPair(Register dst_low, Register dst_high, |
1175 Register src_low, Register src_high, | 1217 Register src_low, Register src_high, |
1176 Register scratch, Register shift) { | 1218 Register scratch, Register shift) { |
1177 DCHECK(!AreAliased(dst_high, src_low)); | 1219 DCHECK(!AreAliased(dst_high, src_low)); |
1178 DCHECK(!AreAliased(dst_high, shift)); | 1220 DCHECK(!AreAliased(dst_high, shift)); |
1179 | 1221 |
1180 Label less_than_32; | 1222 Label less_than_32; |
1181 Label done; | 1223 Label done; |
(...skipping 2495 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3677 } | 3719 } |
3678 } | 3720 } |
3679 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); | 3721 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); |
3680 add(result, result, Operand(dividend, LSR, 31)); | 3722 add(result, result, Operand(dividend, LSR, 31)); |
3681 } | 3723 } |
3682 | 3724 |
3683 } // namespace internal | 3725 } // namespace internal |
3684 } // namespace v8 | 3726 } // namespace v8 |
3685 | 3727 |
3686 #endif // V8_TARGET_ARCH_ARM | 3728 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |