src/arm/macro-assembler-arm.cc - Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD.

Side by Side Diff: src/arm/macro-assembler-arm.cc

Issue 2579913002: [ARM] Add NEON instructions for implementing SIMD. (Closed)

Patch Set: Review comments. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2012 the V8 project authors. All rights reserved.	1 // Copyright 2012 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <limits.h> // For LONG_MIN, LONG_MAX.	5 #include <limits.h> // For LONG_MIN, LONG_MAX.

6	6

7 #if V8_TARGET_ARCH_ARM	7 #if V8_TARGET_ARCH_ARM

8	8

9 #include "src/base/bits.h"	9 #include "src/base/bits.h"

10 #include "src/base/division-by-constant.h"	10 #include "src/base/division-by-constant.h"

(...skipping 1167 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1178 }	1178 }

1179	1179

1180 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,	1180 void MacroAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,

1181 SwVfpRegister src_lane, Register scratch,	1181 SwVfpRegister src_lane, Register scratch,

1182 int lane) {	1182 int lane) {

1183 Move(dst, src);	1183 Move(dst, src);

1184 int s_code = dst.code() * 4 + lane;	1184 int s_code = dst.code() * 4 + lane;

1185 VmovExtended(s_code, src_lane.code(), scratch);	1185 VmovExtended(s_code, src_lane.code(), scratch);

1186 }	1186 }

1187	1187

	1188 void MacroAssembler::Swizzle(QwNeonRegister dst, QwNeonRegister src,

	1189 Register scratch, NeonSize size, uint32_t lanes) {

	1190 // TODO(bbudge) Handle Int16x8, Int8x16 vectors.

	1191 DCHECK_EQ(Neon32, size);

	1192 DCHECK_IMPLIES(size == Neon32, lanes < 0xFFFFu);

	1193 if (size == Neon32) {

	1194 switch (lanes) {

	1195 // TODO(bbudge) Handle more special cases.

	1196 case 0x3210: // Identity.

	1197 Move(dst, src);

	1198 return;

	1199 case 0x1032: // Swap top and bottom.

	1200 vext(dst, src, src, 8);

	1201 return;

	1202 case 0x2103: // Rotation.

	1203 vext(dst, src, src, 12);

	1204 return;

	1205 case 0x0321: // Rotation.

	1206 vext(dst, src, src, 4);

	1207 return;

	1208 case 0x0000: // Equivalent to vdup.

	1209 case 0x1111:

	1210 case 0x2222:

	1211 case 0x3333: {

	1212 int lane_code = src.code() * 4 + (lanes & 0xF);

	1213 if (lane_code >= SwVfpRegister::kMaxNumRegisters) {

	1214 // TODO(bbudge) use vdup (vdup.32 dst, D<src>[lane]) once implemented.

	1215 int temp_code = kScratchDoubleReg.code() * 2;

	1216 VmovExtended(temp_code, lane_code, scratch);

	1217 lane_code = temp_code;

	1218 }

	1219 vdup(dst, SwVfpRegister::from_code(lane_code));

	1220 return;

	1221 }

	1222 case 0x2301: // Swap lanes 0, 1 and lanes 2, 3.

	1223 vrev64(Neon32, dst, src);

	1224 return;

	1225 default: // Handle all other cases with vmovs.

	1226 int src_code = src.code() * 4;

	1227 int dst_code = dst.code() * 4;

	1228 bool in_place = src.is(dst);

	1229 if (in_place) {

	1230 vmov(kScratchQuadReg, src);

	1231 src_code = kScratchQuadReg.code() * 4;

	1232 }

	1233 for (int i = 0; i < 4; i++) {

	1234 int lane = (lanes >> (i * 4) & 0xF);

	1235 VmovExtended(dst_code + i, src_code + lane, scratch);

	1236 }

	1237 if (in_place) {

	1238 // Restore zero reg.

	1239 veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);

	1240 }

	1241 return;

	1242 }

	1243 }

	1244 }

	1245

1188 void MacroAssembler::LslPair(Register dst_low, Register dst_high,	1246 void MacroAssembler::LslPair(Register dst_low, Register dst_high,

1189 Register src_low, Register src_high,	1247 Register src_low, Register src_high,

1190 Register scratch, Register shift) {	1248 Register scratch, Register shift) {

1191 DCHECK(!AreAliased(dst_high, src_low));	1249 DCHECK(!AreAliased(dst_high, src_low));

1192 DCHECK(!AreAliased(dst_high, shift));	1250 DCHECK(!AreAliased(dst_high, shift));

1193	1251

1194 Label less_than_32;	1252 Label less_than_32;

1195 Label done;	1253 Label done;

1196 rsb(scratch, shift, Operand(32), SetCC);	1254 rsb(scratch, shift, Operand(32), SetCC);

1197 b(gt, &less_than_32);	1255 b(gt, &less_than_32);

(...skipping 2735 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3933 }	3991 }

3934 }	3992 }

3935 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift));	3993 if (mag.shift > 0) mov(result, Operand(result, ASR, mag.shift));

3936 add(result, result, Operand(dividend, LSR, 31));	3994 add(result, result, Operand(dividend, LSR, 31));

3937 }	3995 }

3938	3996

3939 } // namespace internal	3997 } // namespace internal

3940 } // namespace v8	3998 } // namespace v8

3941	3999

3942 #endif // V8_TARGET_ARCH_ARM	4000 #endif // V8_TARGET_ARCH_ARM

OLD	NEW

« no previous file with comments | « src/arm/macro-assembler-arm.h ('k') | src/arm/simulator-arm.cc » ('j') | no next file with comments »