Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(529)

Unified Diff: test/cctest/test-macro-assembler-arm.cc

Issue 2546933002: [Turbofan] Add ARM NEON instructions for implementing SIMD. (Closed)
Patch Set: Review comments. Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« test/cctest/test-disasm-arm.cc ('K') | « test/cctest/test-disasm-arm.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: test/cctest/test-macro-assembler-arm.cc
diff --git a/test/cctest/test-macro-assembler-arm.cc b/test/cctest/test-macro-assembler-arm.cc
index 06efc58cfa26a2cd3fde8a6df56ba525c113100a..7bb49724d9c5ac90a72e0ae8cf708c411e666543 100644
--- a/test/cctest/test-macro-assembler-arm.cc
+++ b/test/cctest/test-macro-assembler-arm.cc
@@ -42,6 +42,7 @@ typedef void* (*F)(int x, int y, int p2, int p3, int p4);
#define __ masm->
+typedef Object* (*F3)(void* p0, int p1, int p2, int p3, int p4);
typedef int (*F5)(void*, void*, void*, void*, void*);
@@ -134,4 +135,250 @@ TEST(LoadAndStoreWithRepresentation) {
CHECK(!CALL_GENERATED_CODE(isolate, f, 0, 0, 0, 0, 0));
}
+TEST(ExtractLane) {
+ if (!CpuFeatures::IsSupported(NEON)) return;
+
+ // Allocate an executable page of memory.
+ size_t actual_size;
+ byte* buffer = static_cast<byte*>(v8::base::OS::Allocate(
+ Assembler::kMinimalBufferSize, &actual_size, true));
+ CHECK(buffer);
+ Isolate* isolate = CcTest::i_isolate();
+ HandleScope handles(isolate);
+ MacroAssembler assembler(isolate, buffer, static_cast<int>(actual_size),
+ v8::internal::CodeObjectRequired::kYes);
+ MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
+
+ typedef struct {
+ int32_t i32x4_low[4];
+ int32_t i32x4_high[4];
+ int32_t i16x8_low[8];
+ int32_t i16x8_high[8];
+ int32_t i8x16_low[16];
+ int32_t i8x16_high[16];
+ int32_t f32x4_low[4];
+ int32_t f32x4_high[4];
+ } T;
+ T t;
+
+ __ stm(db_w, sp, r4.bit() | r5.bit() | lr.bit());
+
+ for (int i = 0; i < 4; i++) {
+ __ mov(r4, Operand(i));
+ __ vdup(q1, r4, Neon32);
+ __ ExtractLane(r5, q1, NeonS32, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i32x4_low) + 4 * i));
+ SwVfpRegister si = SwVfpRegister::from_code(i);
+ __ ExtractLane(si, q1, r4, i);
+ __ vstr(si, r0, offsetof(T, f32x4_low) + 4 * i);
+ }
+
+ for (int i = 0; i < 8; i++) {
+ __ mov(r4, Operand(i));
+ __ vdup(q1, r4, Neon16);
+ __ ExtractLane(r5, q1, NeonS16, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i16x8_low) + 4 * i));
+ }
+
+ for (int i = 0; i < 16; i++) {
+ __ mov(r4, Operand(i));
+ __ vdup(q1, r4, Neon8);
+ __ ExtractLane(r5, q1, NeonS8, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i8x16_low) + 4 * i));
+ }
+
+ if (CpuFeatures::IsSupported(VFP32DREGS)) {
+ for (int i = 0; i < 4; i++) {
+ __ mov(r4, Operand(-i));
+ __ vdup(q15, r4, Neon32);
+ __ ExtractLane(r5, q15, NeonS32, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i32x4_high) + 4 * i));
+ SwVfpRegister si = SwVfpRegister::from_code(i);
+ __ ExtractLane(si, q15, r4, i);
+ __ vstr(si, r0, offsetof(T, f32x4_high) + 4 * i);
+ }
+
+ for (int i = 0; i < 8; i++) {
+ __ mov(r4, Operand(-i));
+ __ vdup(q15, r4, Neon16);
+ __ ExtractLane(r5, q15, NeonS16, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i16x8_high) + 4 * i));
+ }
+
+ for (int i = 0; i < 16; i++) {
+ __ mov(r4, Operand(-i));
+ __ vdup(q15, r4, Neon8);
+ __ ExtractLane(r5, q15, NeonS8, i);
+ __ str(r5, MemOperand(r0, offsetof(T, i8x16_high) + 4 * i));
+ }
+ }
+
+ __ ldm(ia_w, sp, r4.bit() | r5.bit() | pc.bit());
+ __ bx(lr);
Rodolph Perfetta (ARM) 2016/12/08 18:08:28 nit: this is not necessary, the line above loaded
bbudge 2016/12/10 21:33:05 Done.
+
+ CodeDesc desc;
+ masm->GetCode(&desc);
+ Handle<Code> code = isolate->factory()->NewCode(
+ desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
+#ifdef DEBUG
+ OFStream os(stdout);
+ code->Print(os);
+#endif
+ F3 f = FUNCTION_CAST<F3>(code->entry());
+ Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
+ USE(dummy);
+ for (int i = 0; i < 4; i++) {
+ CHECK_EQ(i, t.i32x4_low[i]);
+ CHECK_EQ(i, t.f32x4_low[i]);
+ }
+ for (int i = 0; i < 8; i++) {
+ CHECK_EQ(i, t.i16x8_low[i]);
+ }
+ for (int i = 0; i < 16; i++) {
+ CHECK_EQ(i, t.i8x16_low[i]);
+ }
+ if (CpuFeatures::IsSupported(VFP32DREGS)) {
+ for (int i = 0; i < 4; i++) {
+ CHECK_EQ(-i, t.i32x4_high[i]);
+ CHECK_EQ(-i, t.f32x4_high[i]);
+ }
+ for (int i = 0; i < 8; i++) {
+ CHECK_EQ(-i, t.i16x8_high[i]);
+ }
+ for (int i = 0; i < 16; i++) {
+ CHECK_EQ(-i, t.i8x16_high[i]);
+ }
+ }
+}
+
+TEST(ReplaceLane) {
+ if (!CpuFeatures::IsSupported(NEON)) return;
+
+ // Allocate an executable page of memory.
+ size_t actual_size;
+ byte* buffer = static_cast<byte*>(v8::base::OS::Allocate(
+ Assembler::kMinimalBufferSize, &actual_size, true));
+ CHECK(buffer);
+ Isolate* isolate = CcTest::i_isolate();
+ HandleScope handles(isolate);
+ MacroAssembler assembler(isolate, buffer, static_cast<int>(actual_size),
+ v8::internal::CodeObjectRequired::kYes);
+ MacroAssembler* masm = &assembler; // Create a pointer for the __ macro.
+
+ typedef struct {
+ int32_t i32x4_low[4];
+ int32_t i32x4_high[4];
+ int16_t i16x8_low[8];
+ int16_t i16x8_high[8];
+ int8_t i8x16_low[16];
+ int8_t i8x16_high[16];
+ int32_t f32x4_low[4];
+ int32_t f32x4_high[4];
+ } T;
+ T t;
+
+ __ stm(db_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | lr.bit());
+
+ const Register kScratch = r5;
+
+ __ veor(q0, q0, q0); // Zero
+ __ veor(q1, q1, q1); // Zero
+ for (int i = 0; i < 4; i++) {
+ __ mov(r4, Operand(i));
+ __ ReplaceLane(q0, q0, r4, kScratch, NeonS32, i);
+ SwVfpRegister si = SwVfpRegister::from_code(i);
+ __ vmov(si, r4);
+ __ ReplaceLane(q1, q1, si, kScratch, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_low))));
+ __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_low))));
+ __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4));
+
+ __ veor(q0, q0, q0); // Zero
+ for (int i = 0; i < 8; i++) {
+ __ mov(r4, Operand(i));
+ __ ReplaceLane(q0, q0, r4, kScratch, NeonS16, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_low))));
+ __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
+
+ __ veor(q0, q0, q0); // Zero
+ for (int i = 0; i < 16; i++) {
+ __ mov(r4, Operand(i));
+ __ ReplaceLane(q0, q0, r4, kScratch, NeonS8, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_low))));
+ __ vst1(Neon8, NeonListOperand(q0), NeonMemOperand(r4));
+
+ if (CpuFeatures::IsSupported(VFP32DREGS)) {
+ __ veor(q14, q14, q14); // Zero
+ __ veor(q15, q15, q15); // Zero
+ for (int i = 0; i < 4; i++) {
+ __ mov(r4, Operand(-i));
+ __ ReplaceLane(q14, q14, r4, kScratch, NeonS32, i);
+ SwVfpRegister si = SwVfpRegister::from_code(i);
+ __ vmov(si, r4);
+ __ ReplaceLane(q15, q15, si, kScratch, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i32x4_high))));
+ __ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, f32x4_high))));
+ __ vst1(Neon8, NeonListOperand(q15), NeonMemOperand(r4));
+
+ __ veor(q14, q14, q14); // Zero
+ for (int i = 0; i < 8; i++) {
+ __ mov(r4, Operand(-i));
+ __ ReplaceLane(q14, q14, r4, kScratch, NeonS16, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i16x8_high))));
+ __ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
+
+ __ veor(q14, q14, q14); // Zero
+ for (int i = 0; i < 16; i++) {
+ __ mov(r4, Operand(-i));
+ __ ReplaceLane(q14, q14, r4, kScratch, NeonS8, i);
+ }
+ __ add(r4, r0, Operand(static_cast<int32_t>(offsetof(T, i8x16_high))));
+ __ vst1(Neon8, NeonListOperand(q14), NeonMemOperand(r4));
+ }
+
+ __ ldm(ia_w, sp, r4.bit() | r5.bit() | r6.bit() | r7.bit() | pc.bit());
+ __ bx(lr);
bbudge 2016/12/10 21:33:04 And here.
+
+ CodeDesc desc;
+ masm->GetCode(&desc);
+ Handle<Code> code = isolate->factory()->NewCode(
+ desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
+#ifdef DEBUG
+ OFStream os(stdout);
+ code->Print(os);
+#endif
+ F3 f = FUNCTION_CAST<F3>(code->entry());
+ Object* dummy = CALL_GENERATED_CODE(isolate, f, &t, 0, 0, 0, 0);
+ USE(dummy);
+ for (int i = 0; i < 4; i++) {
+ CHECK_EQ(i, t.i32x4_low[i]);
+ CHECK_EQ(i, t.f32x4_low[i]);
+ }
+ for (int i = 0; i < 8; i++) {
+ CHECK_EQ(i, t.i16x8_low[i]);
+ }
+ for (int i = 0; i < 16; i++) {
+ CHECK_EQ(i, t.i8x16_low[i]);
+ }
+ if (CpuFeatures::IsSupported(VFP32DREGS)) {
+ for (int i = 0; i < 4; i++) {
+ CHECK_EQ(-i, t.i32x4_high[i]);
+ CHECK_EQ(-i, t.f32x4_high[i]);
+ }
+ for (int i = 0; i < 8; i++) {
+ CHECK_EQ(-i, t.i16x8_high[i]);
+ }
+ for (int i = 0; i < 16; i++) {
+ CHECK_EQ(-i, t.i8x16_high[i]);
+ }
+ }
+}
+
#undef __
« test/cctest/test-disasm-arm.cc ('K') | « test/cctest/test-disasm-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698