Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: src/arm/assembler-arm.cc

Issue 12920009: Use generated Neon version of MemCopy() on ARM, if platform supports it. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 7 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/arm/assembler-arm.cc
===================================================================
--- src/arm/assembler-arm.cc (revision 14076)
+++ src/arm/assembler-arm.cc (working copy)
@@ -154,6 +154,11 @@
}
#else // __arm__
+ if (!IsSupported(NEON) && OS::ArmCpuHasFeature(NEON)) {
+ found_by_runtime_probing_only_ |=
+ static_cast<uint64_t>(1) << NEON;
+ }
+
// Probe for additional features not already known to be available.
if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
// This implementation also sets the VFP flags if runtime
@@ -1597,7 +1602,31 @@
addrmod4(cond | B27 | am, base, src);
}
+static uint32_t pld_op(Register base, int offset, bool write) {
+ // PLD(literal)/PLDW(literal) preload data for read or write.
+ // Instruction details available in ARM DDI 0406B, A8-239.
+ // 1111(31-28) | 0101(27-24)| U(23) | R(22) | 01(21-20) | Rn(19-16) |
+ // 1111(15-12) | imm12(11-0)
+ int U = (offset >= 0) ? B23 : 0;
+ int R = !write ? B22 : 0;
+ if (offset < 0)
+ offset = -offset;
+ ASSERT(is_uint12(offset));
+
+ return 0xf510f000 | base.code()*B16 | U | R | offset;
+}
+
+void Assembler::pld(Register base, int offset) {
+ emit(pld_op(base, offset, false));
+}
+
+
+void Assembler::pldw(Register base, int offset) {
+ emit(pld_op(base, offset, true));
+}
+
+
// Exception-generating instructions and debugging support.
// Stops with a non-negative code less than kNumOfWatchedStops support
// enabling/disabling and a counter feature. See simulator-arm.h .
@@ -2651,6 +2680,266 @@
}
+static inline int bin_log(int x) {
+ switch (x) {
+ case 1:
+ return 0;
+ case 2:
+ return 1;
+ case 4:
+ return 2;
+ case 8:
+ return 3;
+ case 16:
+ return 4;
+ case 32:
+ return 5;
+ case 64:
+ return 6;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+}
+
+
+static inline int count_to_type(int count) {
+ switch (count) {
+ case 1:
+ return 7;
+ case 2:
+ return 10;
+ case 3:
+ return 6;
+ case 4:
+ return 2;
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+}
+
+
+enum NeonElementType {
+ MultipleElements = 0,
+ SingleElement = 1
+};
+
+enum NeonLoadStoreType {
+ Store = 0,
+ Load = 1
+};
+
+static uint32_t neon_vector_op(int size,
+ const Register base,
+ const DwVfpRegister first,
+ NeonWritebackType writeback,
+ int align_bytes,
+ NeonLoadStoreType load,
+ NeonElementType single,
+ NeonElementIndex element_index,
+ int type) {
+
+ ASSERT(type >= 0);
+ ASSERT(!base.is(pc));
+ ASSERT(size == 8 || size == 16 || size == 32);
+
+ int d, Vd;
+ first.split_code(&Vd, &d);
+ // We don't need arbitrary Rm so far.
+ int rm = (writeback == Writeback) ? 13 : 15;
+ uint32_t result =
+ (0xf << 28) | B26 | d*B22 | load*B21 | base.code()*B16 |
+ Vd*B12 | rm;
+
+ int index_align = 0;
+ if (single == SingleElement) {
+ switch (align_bytes) {
+ case 16:
+ ASSERT(size == 32);
+ ASSERT(element_index < element_2);
+ index_align = 0x3 | ((element_index & 0x1) << 3);
+ break;
+ case 8:
+ ASSERT(size == 16 || size == 32);
+ ASSERT(element_index < element_4);
+ index_align = 0x1 | ((element_index & 0x3) << 2);
+ break;
+ case 4:
+ ASSERT(size == 8);
+ index_align = (element_index & 0x7) << 1;
+ break;
+ case 1:
+ index_align = 0;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ int size_enc = bin_log(size) - 3;
+ if (single == SingleElement) {
+ result |= B23 | (1<<10)*size_enc | B9 | B8 | index_align*B4;
+ } else {
+ int align_enc = align_bytes == 1 ? 0 : bin_log(align_bytes / 4);
+ result |= type*B8 | size_enc*B6 | align_enc*B4;
+ }
+ return result;
+}
+
+void Assembler::vld1(int size,
+ const Register base,
+ const DwVfpRegister first,
+ const DwVfpRegister last,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VLD1 (multiple single elements).
+ // Load elements from memory into one, two, three, or four registers,
+ // without de-interleaving.
+ // Instruction details available in ARM DDI 0406B, A8-602.
+ // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
+ // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ ASSERT_LE(first.code(), last.code());
+ int count = last.code() - first.code() + 1;
+ ASSERT(count <= 4);
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Load, MultipleElements, element_0,
+ count_to_type(count)));
+}
+
+void Assembler::vld1(int size,
+ const Register base,
+ const DwVfpRegister first,
+ NeonElementIndex element_index,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VLD1 (single element to one lane).
+ // Load one element from memory into one element of a register.
+ // Instruction details available in ARM DDI 0406B, A8-604.
+ // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
+ // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) | Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Load, SingleElement, element_index, 0));
+}
+
+void Assembler::vst1(int size,
+ const Register base,
+ const DwVfpRegister first,
+ const DwVfpRegister last,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VST1 (multiple single elements).
+ // Store elements to memory from one, two, three, or four registers,
+ // without interleaving.
+ // Instruction details available in ARM DDI 0406B, A8-768.
+ // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
+ // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ ASSERT_LE(first.code(), last.code());
+ int count = last.code() - first.code() + 1;
+ ASSERT(count <= 4);
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Store, MultipleElements, element_0,
+ count_to_type(count)));
+}
+
+void Assembler::vst1(int size,
+ const Register base,
+ const DwVfpRegister first,
+ NeonElementIndex element_index,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VST1 (single element from one lane).
+ // Store one element to memory from one element of a register.
+ // Instruction details available in ARM DDI 0406B, A8-770.
+ // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
+ // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Store, SingleElement, element_index, 0));
+}
+
+void Assembler::vld4(int size,
+ const Register base,
+ const DwVfpRegister first,
+ const DwVfpRegister last,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VLD4 (multiple 4-element structures).
+ // Load multiple 4-element structures from memory into four registers,
+ // with de-interleaving.
+ // Instruction details available in ARM DDI 0406B, A8-620.
+ // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
+ // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ // We don't support increment == 2, yet.
+ int type = 0;
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Load, MultipleElements, element_0, type));
+}
+
+void Assembler::vld4(int size,
+ const Register base,
+ const DwVfpRegister first,
+ NeonElementIndex element_index,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VLD4 (single 4-element structure to one lane).
+ // Load one 4-element structure from memory into corresponding elements of
+ // four registers.
+ // Instruction details available in ARM DDI 0406B, A8-622.
+ // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
+ // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Load, SingleElement, element_index, 0));
+}
+
+void Assembler::vst4(int size,
+ const Register base,
+ const DwVfpRegister first,
+ const DwVfpRegister last,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VST4 (multiple 4-element structures).
+ // Store multiple 4-element structures to memory from four registers,
+ // with interleaving.
+ // Instruction details available in ARM DDI 0406B, A8-780.
+ // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
+ // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ // We don't support increment == 2, yet.
+ int type = 0;
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Store, MultipleElements, element_0, type));
+}
+
+void Assembler::vst4(int size,
+ const Register base,
+ const DwVfpRegister first,
+ NeonElementIndex element_index,
+ NeonWritebackType writeback,
+ int align_bytes) {
+ // VST4 (single 4-element structure from one lane).
+ // Store one 4-element structure to memory from corresponding elements
+ // of four registers.
+ // Instruction details available in ARM DDI 0406B, A8-782.
+ // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
+ // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
+ // Rm(3-0)
+ ASSERT(IsEnabled(NEON));
+ emit(neon_vector_op(size, base, first, writeback, align_bytes,
+ Store, SingleElement, element_index, 0));
+}
+
// Pseudo instructions.
void Assembler::nop(int type) {
// ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
« no previous file with comments | « src/arm/assembler-arm.h ('k') | src/arm/codegen-arm.cc » ('j') | src/arm/codegen-arm.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698