Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(554)

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <limits.h> // For LONG_MIN, LONG_MAX. 5 #include <limits.h> // For LONG_MIN, LONG_MAX.
6 6
7 #if V8_TARGET_ARCH_ARM 7 #if V8_TARGET_ARCH_ARM
8 8
9 #include "src/base/bits.h" 9 #include "src/base/bits.h"
10 #include "src/base/division-by-constant.h" 10 #include "src/base/division-by-constant.h"
(...skipping 1063 matching lines...) Expand 10 before | Expand all | Expand 10 after
1074 void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) { 1074 void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
1075 if (dst.code() < 16) { 1075 if (dst.code() < 16) {
1076 const LowDwVfpRegister loc = LowDwVfpRegister::from_code(dst.code()); 1076 const LowDwVfpRegister loc = LowDwVfpRegister::from_code(dst.code());
1077 vmov(loc.low(), src); 1077 vmov(loc.low(), src);
1078 } else { 1078 } else {
1079 vmov(dst, VmovIndexLo, src); 1079 vmov(dst, VmovIndexLo, src);
1080 } 1080 }
1081 } 1081 }
1082 1082
1083 void MacroAssembler::VmovExtended(Register dst, int src_code) { 1083 void MacroAssembler::VmovExtended(Register dst, int src_code) {
1084 DCHECK_LE(32, src_code); 1084 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, src_code);
1085 DCHECK_GT(64, src_code); 1085 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code);
1086 if (src_code & 0x1) { 1086 if (src_code & 0x1) {
1087 VmovHigh(dst, DwVfpRegister::from_code(src_code / 2)); 1087 VmovHigh(dst, DwVfpRegister::from_code(src_code / 2));
1088 } else { 1088 } else {
1089 VmovLow(dst, DwVfpRegister::from_code(src_code / 2)); 1089 VmovLow(dst, DwVfpRegister::from_code(src_code / 2));
1090 } 1090 }
1091 } 1091 }
1092 1092
1093 void MacroAssembler::VmovExtended(int dst_code, Register src) { 1093 void MacroAssembler::VmovExtended(int dst_code, Register src) {
1094 DCHECK_LE(32, dst_code); 1094 DCHECK_LE(SwVfpRegister::kMaxNumRegisters, dst_code);
1095 DCHECK_GT(64, dst_code); 1095 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
1096 if (dst_code & 0x1) { 1096 if (dst_code & 0x1) {
1097 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src); 1097 VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
1098 } else { 1098 } else {
1099 VmovLow(DwVfpRegister::from_code(dst_code / 2), src); 1099 VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
1100 } 1100 }
1101 } 1101 }
1102 1102
1103 void MacroAssembler::VmovExtended(int dst_code, int src_code, 1103 void MacroAssembler::VmovExtended(int dst_code, int src_code,
1104 Register scratch) { 1104 Register scratch) {
1105 if (src_code < 32 && dst_code < 32) { 1105 if (src_code < SwVfpRegister::kMaxNumRegisters &&
1106 dst_code < SwVfpRegister::kMaxNumRegisters) {
1106 // src and dst are both s-registers. 1107 // src and dst are both s-registers.
1107 vmov(SwVfpRegister::from_code(dst_code), 1108 vmov(SwVfpRegister::from_code(dst_code),
1108 SwVfpRegister::from_code(src_code)); 1109 SwVfpRegister::from_code(src_code));
1109 } else if (src_code < 32) { 1110 } else if (src_code < SwVfpRegister::kMaxNumRegisters) {
1110 // src is an s-register. 1111 // src is an s-register.
1111 vmov(scratch, SwVfpRegister::from_code(src_code)); 1112 vmov(scratch, SwVfpRegister::from_code(src_code));
1112 VmovExtended(dst_code, scratch); 1113 VmovExtended(dst_code, scratch);
1113 } else if (dst_code < 32) { 1114 } else if (dst_code < SwVfpRegister::kMaxNumRegisters) {
1114 // dst is an s-register. 1115 // dst is an s-register.
1115 VmovExtended(scratch, src_code); 1116 VmovExtended(scratch, src_code);
1116 vmov(SwVfpRegister::from_code(dst_code), scratch); 1117 vmov(SwVfpRegister::from_code(dst_code), scratch);
1117 } else { 1118 } else {
1118 // Neither src or dst are s-registers. 1119 // Neither src or dst are s-registers.
1119 DCHECK_GT(64, src_code); 1120 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, src_code);
1120 DCHECK_GT(64, dst_code); 1121 DCHECK_GT(SwVfpRegister::kMaxNumRegisters * 2, dst_code);
1121 VmovExtended(scratch, src_code); 1122 VmovExtended(scratch, src_code);
1122 VmovExtended(dst_code, scratch); 1123 VmovExtended(dst_code, scratch);
1123 } 1124 }
1124 } 1125 }
1125 1126
1126 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src, 1127 void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
1127 Register scratch) { 1128 Register scratch) {
1128 if (dst_code >= 32) { 1129 if (dst_code >= SwVfpRegister::kMaxNumRegisters) {
1129 ldr(scratch, src); 1130 ldr(scratch, src);
1130 VmovExtended(dst_code, scratch); 1131 VmovExtended(dst_code, scratch);
1131 } else { 1132 } else {
1132 vldr(SwVfpRegister::from_code(dst_code), src); 1133 vldr(SwVfpRegister::from_code(dst_code), src);
1133 } 1134 }
1134 } 1135 }
1135 1136
1136 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code, 1137 void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
1137 Register scratch) { 1138 Register scratch) {
1138 if (src_code >= 32) { 1139 if (src_code >= SwVfpRegister::kMaxNumRegisters) {
1139 VmovExtended(scratch, src_code); 1140 VmovExtended(scratch, src_code);
1140 str(scratch, dst); 1141 str(scratch, dst);
1141 } else { 1142 } else {
1142 vstr(SwVfpRegister::from_code(src_code), dst); 1143 vstr(SwVfpRegister::from_code(src_code), dst);
1143 } 1144 }
1144 } 1145 }
1145 1146
1147 void MacroAssembler::ExtractLane(Register dst, QwNeonRegister src,
Rodolph Perfetta (ARM) 2016/12/08 18:08:28 There is a Neon instruction for this: vmov.dt rt,
bbudge 2016/12/10 21:33:04 Awesome, I've implemented this for the other data
1148 NeonDataType dt, int lane) {
1149 // Read the word containing the lane into dst.
1150 int bytes_per_lane = dt & NeonDataTypeSizeMask;
1151 int byte = (lane * bytes_per_lane);
1152 int word = byte / kPointerSize;
1153 int s_code = src.code() * 4 + word;
1154 if (s_code < SwVfpRegister::kMaxNumRegisters) {
1155 vmov(dst, SwVfpRegister::from_code(s_code));
1156 } else {
1157 VmovExtended(dst, s_code);
1158 }
1159 if (bytes_per_lane != kPointerSize) {
1160 // Extract lane, and sign extend for signed types.
1161 int width = bytes_per_lane * kBitsPerByte;
1162 int lsb = (byte & 0x3) * kBitsPerByte;
1163 if ((dt & NeonDataTypeUMask) != 0) {
1164 Ubfx(dst, dst, lsb, width);
1165 } else {
1166 Sbfx(dst, dst, lsb, width);
1167 }
1168 }
1169 }
1170
1171 void MacroAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
1172 Register scratch, int lane) {
1173 int s_code = src.code() * 4 + lane;
1174 VmovExtended(dst.code(), s_code, scratch);
1175 }
1176
1177 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1178 Register src_lane, Register scratch,
1179 NeonDataType dt, int lane) {
1180 Move(dst, src);
1181 int bytes_per_lane = dt & NeonDataTypeSizeMask;
Rodolph Perfetta (ARM) 2016/12/08 18:08:28 if you implement vmov.dt Dn[x], Rt then you can dr
bbudge 2016/12/10 21:33:04 I implemented the Neon form of this instruction to
1182 int byte = (lane * bytes_per_lane);
1183 int word = byte / kPointerSize;
1184 int s_code = dst.code() * 4 + word;
1185 // If lane is word sized, just move src_lane into the containing s-register.
1186 if (bytes_per_lane == kPointerSize) {
1187 if (s_code < SwVfpRegister::kMaxNumRegisters) {
1188 vmov(SwVfpRegister::from_code(s_code), src_lane);
1189 } else {
1190 VmovExtended(s_code, src_lane);
1191 }
1192 return;
1193 }
1194 // Move the s-register containing the lane to replace into scratch register.
1195 if (s_code < SwVfpRegister::kMaxNumRegisters) {
1196 vmov(scratch, SwVfpRegister::from_code(s_code));
1197 } else {
1198 VmovExtended(scratch, s_code);
1199 }
1200 // Combine scratch with src_lane, shifted into position.
1201 int width = bytes_per_lane * kBitsPerByte;
1202 int lsb = (byte % kPointerSize) * kBitsPerByte;
1203 bfi(scratch, src_lane, lsb, width);
1204 if (s_code < SwVfpRegister::kMaxNumRegisters) {
1205 vmov(SwVfpRegister::from_code(s_code), scratch);
1206 } else {
1207 VmovExtended(s_code, scratch);
1208 }
1209 }
1210
1211 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
1212 SwVfpRegister src_lane, Register scratch,
1213 int lane) {
1214 Move(dst, src);
1215 int s_code = dst.code() * 4 + lane;
1216 VmovExtended(s_code, src_lane.code(), scratch);
1217 }
1218
1146 void MacroAssembler::LslPair(Register dst_low, Register dst_high, 1219 void MacroAssembler::LslPair(Register dst_low, Register dst_high,
1147 Register src_low, Register src_high, 1220 Register src_low, Register src_high,
1148 Register scratch, Register shift) { 1221 Register scratch, Register shift) {
1149 DCHECK(!AreAliased(dst_high, src_low)); 1222 DCHECK(!AreAliased(dst_high, src_low));
1150 DCHECK(!AreAliased(dst_high, shift)); 1223 DCHECK(!AreAliased(dst_high, shift));
1151 1224
1152 Label less_than_32; 1225 Label less_than_32;
1153 Label done; 1226 Label done;
1154 rsb(scratch, shift, Operand(32), SetCC); 1227 rsb(scratch, shift, Operand(32), SetCC);
1155 b(gt, &less_than_32); 1228 b(gt, &less_than_32);
(...skipping 2735 matching lines...) Expand 10 before | Expand all | Expand 10 after
3891 } 3964 }
3892 } 3965 }
3893 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift)); 3966 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift));
3894 add(result, result, Operand(dividend, LSR, 31)); 3967 add(result, result, Operand(dividend, LSR, 31));
3895 } 3968 }
3896 3969
3897 } // namespace internal 3970 } // namespace internal
3898 } // namespace v8 3971 } // namespace v8
3899 3972
3900 #endif // V8_TARGET_ARCH_ARM 3973 #endif // V8_TARGET_ARCH_ARM
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698