OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <limits.h> // For LONG_MIN, LONG_MAX. | 5 #include <limits.h> // For LONG_MIN, LONG_MAX. |
6 | 6 |
7 #if V8_TARGET_ARCH_ARM | 7 #if V8_TARGET_ARCH_ARM |
8 | 8 |
9 #include "src/assembler-inl.h" | 9 #include "src/assembler-inl.h" |
10 #include "src/base/bits.h" | 10 #include "src/base/bits.h" |
(...skipping 1061 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { | 1072 void MacroAssembler::VmovExtended(int dst_code, Register src) { |
1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); | 1073 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); |
1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); | 1074 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
1075 if (dst_code & 0x1) { | 1075 if (dst_code & 0x1) { |
1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); | 1076 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); |
1077 } else { | 1077 } else { |
1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); | 1078 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); |
1079 } | 1079 } |
1080 } | 1080 } |
1081 | 1081 |
1082 void MacroAssembler::VmovExtended(int dst_code, int src_code, | 1082 void MacroAssembler::VmovExtended(int dst_code, int src_code) { |
1083 Register scratch) { | |
1084 if (src_code < SwVfpRegister::kMaxNumRegisters && | 1083 if (src_code < SwVfpRegister::kMaxNumRegisters && |
1085 dst_code < SwVfpRegister::kMaxNumRegisters) { | 1084 dst_code < SwVfpRegister::kMaxNumRegisters) { |
1086 // src and dst are both s-registers. | 1085 // src and dst are both s-registers. |
1087 vmov(SwVfpRegister::from_code(dst_code), | 1086 vmov(SwVfpRegister::from_code(dst_code), |
1088 SwVfpRegister::from_code(src_code)); | 1087 SwVfpRegister::from_code(src_code)); |
1089 } else if (src_code < SwVfpRegister::kMaxNumRegisters) { | 1088 return; |
1090 // src is an s-register. | 1089 } |
1091 vmov(scratch, SwVfpRegister::from_code(src_code)); | 1090 DwVfpRegister dst_d_reg = DwVfpRegister::from_code(dst_code / 2); |
1092 VmovExtended(dst_code, scratch); | 1091 DwVfpRegister src_d_reg = DwVfpRegister::from_code(src_code / 2); |
1092 int dst_offset = dst_code & 1; | |
1093 int src_offset = src_code & 1; | |
1094 if (CpuFeatures::IsSupported(NEON)) { | |
1095 // On Neon we can shift and insert from d-registers. | |
1096 if (src_offset == dst_offset) { | |
1097 // Offsets are the same, use vdup to copy the source to the opposite lane. | |
1098 vdup(Neon32, kScratchDoubleReg, src_d_reg, src_offset); | |
1099 src_d_reg = kScratchDoubleReg; | |
1100 src_offset = dst_offset ^ 1; | |
1101 } | |
martyn.capewell
2017/05/10 12:38:06
There's a tiny optimisation here: as vdup is sligh
bbudge
2017/05/10 17:54:52
Nice. Done.
| |
1102 if (dst_offset) { | |
1103 vsli(Neon64, dst_d_reg, src_d_reg, 32); | |
1104 } else { | |
1105 vsri(Neon64, dst_d_reg, src_d_reg, 32); | |
1106 } | |
1107 return; | |
1108 } | |
1109 | |
1110 // Without Neon, use the scratch registers to move src and/or dst into | |
1111 // s-registers. | |
1112 int scratchSCode = kScratchDoubleReg.low().code(); | |
1113 int scratchSCode2 = kScratchDoubleReg2.low().code(); | |
1114 if (src_code < SwVfpRegister::kMaxNumRegisters) { | |
1115 // src is an s-register, dst is not. | |
1116 vmov(kScratchDoubleReg, dst_d_reg); | |
1117 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset), | |
1118 SwVfpRegister::from_code(src_code)); | |
1119 vmov(dst_d_reg, kScratchDoubleReg); | |
1093 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { | 1120 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
1094 // dst is an s-register. | 1121 // dst is an s-register, src is not. |
1095 VmovExtended(scratch, src_code); | 1122 vmov(kScratchDoubleReg, src_d_reg); |
1096 vmov(SwVfpRegister::from_code(dst_code), scratch); | 1123 vmov(SwVfpRegister::from_code(dst_code), |
1124 SwVfpRegister::from_code(scratchSCode + src_offset)); | |
1097 } else { | 1125 } else { |
1098 // Neither src or dst are s-registers. | 1126 // Neither src or dst are s-registers. Both scratch double registers are |
1099 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); | 1127 // available when there are 32 VFP registers. |
1100 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); | 1128 vmov(kScratchDoubleReg, src_d_reg); |
1101 VmovExtended(scratch, src_code); | 1129 vmov(kScratchDoubleReg2, dst_d_reg); |
1102 VmovExtended(dst_code, scratch); | 1130 vmov(SwVfpRegister::from_code(scratchSCode + dst_offset), |
1131 SwVfpRegister::from_code(scratchSCode2 + src_offset)); | |
1132 vmov(dst_d_reg, kScratchQuadReg.high()); | |
1103 } | 1133 } |
1104 } | 1134 } |
1105 | 1135 |
1106 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, | 1136 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src) { |
1107 Register scratch) { | 1137 if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
1108 if (dst_code >= SwVfpRegister::kMaxNumRegisters) { | 1138 vldr(SwVfpRegister::from_code(dst_code), src); |
1109 ldr(scratch, src); | |
1110 VmovExtended(dst_code, scratch); | |
1111 } else { | 1139 } else { |
1112 vldr(SwVfpRegister::from_code(dst_code), src); | 1140 int dst_s_code = kScratchDoubleReg.low().code() + (dst_code & 1); |
martyn.capewell
2017/05/10 12:38:06
When supported, this can use the "single element t
bbudge
2017/05/10 17:54:52
Added TODO. Thanks.
| |
1141 vmov(kScratchDoubleReg, DwVfpRegister::from_code(dst_code / 2)); | |
1142 vldr(SwVfpRegister::from_code(dst_s_code), src); | |
1143 vmov(DwVfpRegister::from_code(dst_code / 2), kScratchDoubleReg); | |
1113 } | 1144 } |
1114 } | 1145 } |
1115 | 1146 |
1116 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, | 1147 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code) { |
1117 Register scratch) { | 1148 if (src_code < SwVfpRegister::kMaxNumRegisters) { |
1118 if (src_code >= SwVfpRegister::kMaxNumRegisters) { | 1149 vstr(SwVfpRegister::from_code(src_code), dst); |
1119 VmovExtended(scratch, src_code); | |
1120 str(scratch, dst); | |
1121 } else { | 1150 } else { |
1122 vstr(SwVfpRegister::from_code(src_code), dst); | 1151 int src_s_code = kScratchDoubleReg.low().code() + (src_code & 1); |
1152 vmov(kScratchDoubleReg, DwVfpRegister::from_code(src_code / 2)); | |
1153 vstr(SwVfpRegister::from_code(src_s_code), dst); | |
1123 } | 1154 } |
1124 } | 1155 } |
1125 | 1156 |
1126 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, | 1157 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, |
1127 NeonDataType dt, int lane) { | 1158 NeonDataType dt, int lane) { |
1128 int size = NeonSz(dt); // 0, 1, 2 | 1159 int size = NeonSz(dt); // 0, 1, 2 |
1129 int byte = lane << size; | 1160 int byte = lane << size; |
1130 int double_word = byte >> kDoubleSizeLog2; | 1161 int double_word = byte >> kDoubleSizeLog2; |
1131 int double_byte = byte & (kDoubleSize - 1); | 1162 int double_byte = byte & (kDoubleSize - 1); |
1132 int double_lane = double_byte >> size; | 1163 int double_lane = double_byte >> size; |
1133 DwVfpRegister double_source = | 1164 DwVfpRegister double_source = |
1134 DwVfpRegister::from_code(src.code() * 2 + double_word); | 1165 DwVfpRegister::from_code(src.code() * 2 + double_word); |
1135 vmov(dt, dst, double_source, double_lane); | 1166 vmov(dt, dst, double_source, double_lane); |
1136 } | 1167 } |
1137 | 1168 |
1138 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, | 1169 void MacroAssembler::ExtractLane(Register dst, DwVfpRegister src, |
1139 NeonDataType dt, int lane) { | 1170 NeonDataType dt, int lane) { |
1140 int size = NeonSz(dt); // 0, 1, 2 | 1171 int size = NeonSz(dt); // 0, 1, 2 |
1141 int byte = lane << size; | 1172 int byte = lane << size; |
1142 int double_byte = byte & (kDoubleSize - 1); | 1173 int double_byte = byte & (kDoubleSize - 1); |
1143 int double_lane = double_byte >> size; | 1174 int double_lane = double_byte >> size; |
1144 vmov(dt, dst, src, double_lane); | 1175 vmov(dt, dst, src, double_lane); |
1145 } | 1176 } |
1146 | 1177 |
1147 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, | 1178 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, |
1148 Register scratch, int lane) { | 1179 int lane) { |
1149 int s_code = src.code() * 4 + lane; | 1180 int s_code = src.code() * 4 + lane; |
1150 VmovExtended(dst.code(), s_code, scratch); | 1181 VmovExtended(dst.code(), s_code); |
1151 } | 1182 } |
1152 | 1183 |
1153 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | 1184 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
1154 Register src_lane, NeonDataType dt, int lane) { | 1185 Register src_lane, NeonDataType dt, int lane) { |
1155 Move(dst, src); | 1186 Move(dst, src); |
1156 int size = NeonSz(dt); // 0, 1, 2 | 1187 int size = NeonSz(dt); // 0, 1, 2 |
1157 int byte = lane << size; | 1188 int byte = lane << size; |
1158 int double_word = byte >> kDoubleSizeLog2; | 1189 int double_word = byte >> kDoubleSizeLog2; |
1159 int double_byte = byte & (kDoubleSize - 1); | 1190 int double_byte = byte & (kDoubleSize - 1); |
1160 int double_lane = double_byte >> size; | 1191 int double_lane = double_byte >> size; |
1161 DwVfpRegister double_dst = | 1192 DwVfpRegister double_dst = |
1162 DwVfpRegister::from_code(dst.code() * 2 + double_word); | 1193 DwVfpRegister::from_code(dst.code() * 2 + double_word); |
1163 vmov(dt, double_dst, double_lane, src_lane); | 1194 vmov(dt, double_dst, double_lane, src_lane); |
1164 } | 1195 } |
1165 | 1196 |
1166 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | 1197 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, |
1167 SwVfpRegister src_lane, Register scratch, | 1198 SwVfpRegister src_lane, int lane) { |
1168 int lane) { | |
1169 Move(dst, src); | 1199 Move(dst, src); |
1170 int s_code = dst.code() * 4 + lane; | 1200 int s_code = dst.code() * 4 + lane; |
1171 VmovExtended(s_code, src_lane.code(), scratch); | 1201 VmovExtended(s_code, src_lane.code()); |
1172 } | 1202 } |
1173 | 1203 |
1174 void MacroAssembler::LslPair(Register dst_low, Register dst_high, | 1204 void MacroAssembler::LslPair(Register dst_low, Register dst_high, |
1175 Register src_low, Register src_high, | 1205 Register src_low, Register src_high, |
1176 Register scratch, Register shift) { | 1206 Register scratch, Register shift) { |
1177 DCHECK(!AreAliased(dst_high, src_low)); | 1207 DCHECK(!AreAliased(dst_high, src_low)); |
1178 DCHECK(!AreAliased(dst_high, shift)); | 1208 DCHECK(!AreAliased(dst_high, shift)); |
1179 | 1209 |
1180 Label less_than_32; | 1210 Label less_than_32; |
1181 Label done; | 1211 Label done; |
(...skipping 2495 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3677 } | 3707 } |
3678 } | 3708 } |
3679 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); | 3709 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); |
3680 add(result, result, Operand(dividend, LSR, 31)); | 3710 add(result, result, Operand(dividend, LSR, 31)); |
3681 } | 3711 } |
3682 | 3712 |
3683 } // namespace internal | 3713 } // namespace internal |
3684 } // namespace v8 | 3714 } // namespace v8 |
3685 | 3715 |
3686 #endif // V8_TARGET_ARCH_ARM | 3716 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |