OLD | NEW |
---|---|
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <limits.h> // For LONG_MIN, LONG_MAX. | 5 #include <limits.h> // For LONG_MIN, LONG_MAX. |
6 | 6 |
7 #if V8_TARGET_ARCH_ARM | 7 #if V8_TARGET_ARCH_ARM |
8 | 8 |
9 #include "src/base/bits.h" | 9 #include "src/base/bits.h" |
10 #include "src/base/division-by-constant.h" | 10 #include "src/base/division-by-constant.h" |
(...skipping 1063 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1074 void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { | 1074 void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { |
1075 if (dst.code() < 16) { | 1075 if (dst.code() < 16) { |
1076 const LowDwVfpRegister loc = LowDwVfpRegister::from_code(dst.code()); | 1076 const LowDwVfpRegister loc = LowDwVfpRegister::from_code(dst.code()); |
1077 vmov(loc.low(), src); | 1077 vmov(loc.low(), src); |
1078 } else { | 1078 } else { |
1079 vmov(dst, VmovIndexLo, src); | 1079 vmov(dst, VmovIndexLo, src); |
1080 } | 1080 } |
1081 } | 1081 } |
1082 | 1082 |
1083 void MacroAssembler::VmovExtended(Register dst, int src_code) { | 1083 void MacroAssembler::VmovExtended(Register dst, int src_code) { |
1084 DCHECK_LE(32, src_code); | 1084 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, src_code); |
1085 DCHECK_GT(64, src_code); | 1085 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); |
1086 if (src_code & 0x1) { | 1086 if (src_code & 0x1) { |
1087 VmovHigh(dst, DwVfpRegister::from_code(src_code / 2)); | 1087 VmovHigh(dst, DwVfpRegister::from_code(src_code / 2)); |
1088 } else { | 1088 } else { |
1089 VmovLow(dst, DwVfpRegister::from_code(src_code / 2)); | 1089 VmovLow(dst, DwVfpRegister::from_code(src_code / 2)); |
1090 } | 1090 } |
1091 } | 1091 } |
1092 | 1092 |
1093 void MacroAssembler::VmovExtended(int dst_code, Register src) { | 1093 void MacroAssembler::VmovExtended(int dst_code, Register src) { |
1094 DCHECK_LE(32, dst_code); | 1094 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code); |
1095 DCHECK_GT(64, dst_code); | 1095 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
1096 if (dst_code & 0x1) { | 1096 if (dst_code & 0x1) { |
1097 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); | 1097 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); |
1098 } else { | 1098 } else { |
1099 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); | 1099 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); |
1100 } | 1100 } |
1101 } | 1101 } |
1102 | 1102 |
1103 void MacroAssembler::VmovExtended(int dst_code, int src_code, | 1103 void MacroAssembler::VmovExtended(int dst_code, int src_code, |
1104 Register scratch) { | 1104 Register scratch) { |
1105 if (src_code < 32 && dst_code < 32) { | 1105 if (src_code < SwVfpRegister::kMaxNumRegisters && |
1106 dst_code < SwVfpRegister::kMaxNumRegisters) { | |
1106 // src and dst are both s-registers. | 1107 // src and dst are both s-registers. |
1107 vmov(SwVfpRegister::from_code(dst_code), | 1108 vmov(SwVfpRegister::from_code(dst_code), |
1108 SwVfpRegister::from_code(src_code)); | 1109 SwVfpRegister::from_code(src_code)); |
1109 } else if (src_code < 32) { | 1110 } else if (src_code < SwVfpRegister::kMaxNumRegisters) { |
1110 // src is an s-register. | 1111 // src is an s-register. |
1111 vmov(scratch, SwVfpRegister::from_code(src_code)); | 1112 vmov(scratch, SwVfpRegister::from_code(src_code)); |
1112 VmovExtended(dst_code, scratch); | 1113 VmovExtended(dst_code, scratch); |
1113 } else if (dst_code < 32) { | 1114 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) { |
1114 // dst is an s-register. | 1115 // dst is an s-register. |
1115 VmovExtended(scratch, src_code); | 1116 VmovExtended(scratch, src_code); |
1116 vmov(SwVfpRegister::from_code(dst_code), scratch); | 1117 vmov(SwVfpRegister::from_code(dst_code), scratch); |
1117 } else { | 1118 } else { |
1118 // Neither src or dst are s-registers. | 1119 // Neither src or dst are s-registers. |
1119 DCHECK_GT(64, src_code); | 1120 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code); |
1120 DCHECK_GT(64, dst_code); | 1121 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code); |
1121 VmovExtended(scratch, src_code); | 1122 VmovExtended(scratch, src_code); |
1122 VmovExtended(dst_code, scratch); | 1123 VmovExtended(dst_code, scratch); |
1123 } | 1124 } |
1124 } | 1125 } |
1125 | 1126 |
1126 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, | 1127 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, |
1127 Register scratch) { | 1128 Register scratch) { |
1128 if (dst_code >= 32) { | 1129 if (dst_code >= SwVfpRegister::kMaxNumRegisters) { |
1129 ldr(scratch, src); | 1130 ldr(scratch, src); |
1130 VmovExtended(dst_code, scratch); | 1131 VmovExtended(dst_code, scratch); |
1131 } else { | 1132 } else { |
1132 vldr(SwVfpRegister::from_code(dst_code), src); | 1133 vldr(SwVfpRegister::from_code(dst_code), src); |
1133 } | 1134 } |
1134 } | 1135 } |
1135 | 1136 |
1136 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, | 1137 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, |
1137 Register scratch) { | 1138 Register scratch) { |
1138 if (src_code >= 32) { | 1139 if (src_code >= SwVfpRegister::kMaxNumRegisters) { |
1139 VmovExtended(scratch, src_code); | 1140 VmovExtended(scratch, src_code); |
1140 str(scratch, dst); | 1141 str(scratch, dst); |
1141 } else { | 1142 } else { |
1142 vstr(SwVfpRegister::from_code(src_code), dst); | 1143 vstr(SwVfpRegister::from_code(src_code), dst); |
1143 } | 1144 } |
1144 } | 1145 } |
1145 | 1146 |
1147 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src, | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
There is a Neon instruction for this: vmov.dt rt,
bbudge
2016/12/10 21:33:04
Awesome, I've implemented this for the other data
| |
1148 NeonDataType dt, int lane) { | |
1149 // Read the word containing the lane into dst. | |
1150 int bytes_per_lane = dt & NeonDataTypeSizeMask; | |
1151 int byte = (lane * bytes_per_lane); | |
1152 int word = byte / kPointerSize; | |
1153 int s_code = src.code() * 4 + word; | |
1154 if (s_code < SwVfpRegister::kMaxNumRegisters) { | |
1155 vmov(dst, SwVfpRegister::from_code(s_code)); | |
1156 } else { | |
1157 VmovExtended(dst, s_code); | |
1158 } | |
1159 if (bytes_per_lane != kPointerSize) { | |
1160 // Extract lane, and sign extend for signed types. | |
1161 int width = bytes_per_lane * kBitsPerByte; | |
1162 int lsb = (byte & 0x3) * kBitsPerByte; | |
1163 if ((dt & NeonDataTypeUMask) != 0) { | |
1164 Ubfx(dst, dst, lsb, width); | |
1165 } else { | |
1166 Sbfx(dst, dst, lsb, width); | |
1167 } | |
1168 } | |
1169 } | |
1170 | |
1171 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, | |
1172 Register scratch, int lane) { | |
1173 int s_code = src.code() * 4 + lane; | |
1174 VmovExtended(dst.code(), s_code, scratch); | |
1175 } | |
1176 | |
1177 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | |
1178 Register src_lane, Register scratch, | |
1179 NeonDataType dt, int lane) { | |
1180 Move(dst, src); | |
1181 int bytes_per_lane = dt & NeonDataTypeSizeMask; | |
Rodolph Perfetta (ARM)
2016/12/08 18:08:28
if you implement vmov.dt Dn[x], Rt then you can dr
bbudge
2016/12/10 21:33:04
I implemented the Neon form of this instruction to
| |
1182 int byte = (lane * bytes_per_lane); | |
1183 int word = byte / kPointerSize; | |
1184 int s_code = dst.code() * 4 + word; | |
1185 // If lane is word sized, just move src_lane into the containing s-register. | |
1186 if (bytes_per_lane == kPointerSize) { | |
1187 if (s_code < SwVfpRegister::kMaxNumRegisters) { | |
1188 vmov(SwVfpRegister::from_code(s_code), src_lane); | |
1189 } else { | |
1190 VmovExtended(s_code, src_lane); | |
1191 } | |
1192 return; | |
1193 } | |
1194 // Move the s-register containing the lane to replace into scratch register. | |
1195 if (s_code < SwVfpRegister::kMaxNumRegisters) { | |
1196 vmov(scratch, SwVfpRegister::from_code(s_code)); | |
1197 } else { | |
1198 VmovExtended(scratch, s_code); | |
1199 } | |
1200 // Combine scratch with src_lane, shifted into position. | |
1201 int width = bytes_per_lane * kBitsPerByte; | |
1202 int lsb = (byte % kPointerSize) * kBitsPerByte; | |
1203 bfi(scratch, src_lane, lsb, width); | |
1204 if (s_code < SwVfpRegister::kMaxNumRegisters) { | |
1205 vmov(SwVfpRegister::from_code(s_code), scratch); | |
1206 } else { | |
1207 VmovExtended(s_code, scratch); | |
1208 } | |
1209 } | |
1210 | |
1211 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, | |
1212 SwVfpRegister src_lane, Register scratch, | |
1213 int lane) { | |
1214 Move(dst, src); | |
1215 int s_code = dst.code() * 4 + lane; | |
1216 VmovExtended(s_code, src_lane.code(), scratch); | |
1217 } | |
1218 | |
1146 void MacroAssembler::LslPair(Register dst_low, Register dst_high, | 1219 void MacroAssembler::LslPair(Register dst_low, Register dst_high, |
1147 Register src_low, Register src_high, | 1220 Register src_low, Register src_high, |
1148 Register scratch, Register shift) { | 1221 Register scratch, Register shift) { |
1149 DCHECK(!AreAliased(dst_high, src_low)); | 1222 DCHECK(!AreAliased(dst_high, src_low)); |
1150 DCHECK(!AreAliased(dst_high, shift)); | 1223 DCHECK(!AreAliased(dst_high, shift)); |
1151 | 1224 |
1152 Label less_than_32; | 1225 Label less_than_32; |
1153 Label done; | 1226 Label done; |
1154 rsb(scratch, shift, Operand(32), SetCC); | 1227 rsb(scratch, shift, Operand(32), SetCC); |
1155 b(gt, &less_than_32); | 1228 b(gt, &less_than_32); |
(...skipping 2735 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3891 } | 3964 } |
3892 } | 3965 } |
3893 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); | 3966 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); |
3894 add(result, result, Operand(dividend, LSR, 31)); | 3967 add(result, result, Operand(dividend, LSR, 31)); |
3895 } | 3968 } |
3896 | 3969 |
3897 } // namespace internal | 3970 } // namespace internal |
3898 } // namespace v8 | 3971 } // namespace v8 |
3899 | 3972 |
3900 #endif // V8_TARGET_ARCH_ARM | 3973 #endif // V8_TARGET_ARCH_ARM |
OLD | NEW |