| Index: src/arm/assembler-arm.cc
|
| ===================================================================
|
| --- src/arm/assembler-arm.cc (revision 14076)
|
| +++ src/arm/assembler-arm.cc (working copy)
|
| @@ -154,6 +154,11 @@
|
| }
|
|
|
| #else // __arm__
|
| + if (!IsSupported(NEON) && OS::ArmCpuHasFeature(NEON)) {
|
| + found_by_runtime_probing_only_ |=
|
| + static_cast<uint64_t>(1) << NEON;
|
| + }
|
| +
|
| // Probe for additional features not already known to be available.
|
| if (!IsSupported(VFP3) && OS::ArmCpuHasFeature(VFP3)) {
|
| // This implementation also sets the VFP flags if runtime
|
| @@ -1597,7 +1602,31 @@
|
| addrmod4(cond | B27 | am, base, src);
|
| }
|
|
|
| +static uint32_t pld_op(Register base, int offset, bool write) {
|
| + // PLD(literal)/PLDW(literal) preload data for read or write.
|
| + // Instruction details available in ARM DDI 0406B, A8-239.
|
| + // 1111(31-28) | 0101(27-24)| U(23) | R(22) | 01(21-20) | Rn(19-16) |
|
| + // 1111(15-12) | imm12(11-0)
|
| + int U = (offset >= 0) ? B23 : 0;
|
| + int R = !write ? B22 : 0;
|
| + if (offset < 0)
|
| + offset = -offset;
|
| + ASSERT(is_uint12(offset));
|
| +
|
| + return 0xf510f000 | base.code()*B16 | U | R | offset;
|
| +}
|
|
|
| +
|
| +void Assembler::pld(Register base, int offset) {
|
| + emit(pld_op(base, offset, false));
|
| +}
|
| +
|
| +
|
| +void Assembler::pldw(Register base, int offset) {
|
| + emit(pld_op(base, offset, true));
|
| +}
|
| +
|
| +
|
| // Exception-generating instructions and debugging support.
|
| // Stops with a non-negative code less than kNumOfWatchedStops support
|
| // enabling/disabling and a counter feature. See simulator-arm.h .
|
| @@ -2651,6 +2680,266 @@
|
| }
|
|
|
|
|
| +static inline int bin_log(int x) {
|
| + switch (x) {
|
| + case 1:
|
| + return 0;
|
| + case 2:
|
| + return 1;
|
| + case 4:
|
| + return 2;
|
| + case 8:
|
| + return 3;
|
| + case 16:
|
| + return 4;
|
| + case 32:
|
| + return 5;
|
| + case 64:
|
| + return 6;
|
| + default:
|
| + UNREACHABLE();
|
| + return 0;
|
| + }
|
| +}
|
| +
|
| +
|
| +static inline int count_to_type(int count) {
|
| + switch (count) {
|
| + case 1:
|
| + return 7;
|
| + case 2:
|
| + return 10;
|
| + case 3:
|
| + return 6;
|
| + case 4:
|
| + return 2;
|
| + default:
|
| + UNREACHABLE();
|
| + return 0;
|
| + }
|
| +}
|
| +
|
| +
|
| +enum NeonElementType {
|
| + MultipleElements = 0,
|
| + SingleElement = 1
|
| +};
|
| +
|
| +enum NeonLoadStoreType {
|
| + Store = 0,
|
| + Load = 1
|
| +};
|
| +
|
| +static uint32_t neon_vector_op(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + NeonWritebackType writeback,
|
| + int align_bytes,
|
| + NeonLoadStoreType load,
|
| + NeonElementType single,
|
| + NeonElementIndex element_index,
|
| + int type) {
|
| +
|
| + ASSERT(type >= 0);
|
| + ASSERT(!base.is(pc));
|
| + ASSERT(size == 8 || size == 16 || size == 32);
|
| +
|
| + int d, Vd;
|
| + first.split_code(&Vd, &d);
|
| + // We don't need arbitrary Rm so far.
|
| + int rm = (writeback == Writeback) ? 13 : 15;
|
| + uint32_t result =
|
| + (0xf << 28) | B26 | d*B22 | load*B21 | base.code()*B16 |
|
| + Vd*B12 | rm;
|
| +
|
| + int index_align = 0;
|
| + if (single == SingleElement) {
|
| + switch (align_bytes) {
|
| + case 16:
|
| + ASSERT(size == 32);
|
| + ASSERT(element_index < element_2);
|
| + index_align = 0x3 | ((element_index & 0x1) << 3);
|
| + break;
|
| + case 8:
|
| + ASSERT(size == 16 || size == 32);
|
| + ASSERT(element_index < element_4);
|
| + index_align = 0x1 | ((element_index & 0x3) << 2);
|
| + break;
|
| + case 4:
|
| + ASSERT(size == 8);
|
| + index_align = (element_index & 0x7) << 1;
|
| + break;
|
| + case 1:
|
| + index_align = 0;
|
| + break;
|
| + default:
|
| + UNREACHABLE();
|
| + }
|
| + }
|
| +
|
| + int size_enc = bin_log(size) - 3;
|
| + if (single == SingleElement) {
|
| + result |= B23 | (1<<10)*size_enc | B9 | B8 | index_align*B4;
|
| + } else {
|
| + int align_enc = align_bytes == 1 ? 0 : bin_log(align_bytes / 4);
|
| + result |= type*B8 | size_enc*B6 | align_enc*B4;
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +void Assembler::vld1(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + const DwVfpRegister last,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VLD1 (multiple single elements).
|
| + // Load elements from memory into one, two, three, or four registers,
|
| + // without de-interleaving.
|
| + // Instruction details available in ARM DDI 0406B, A8-602.
|
| + // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + ASSERT_LE(first.code(), last.code());
|
| + int count = last.code() - first.code() + 1;
|
| + ASSERT(count <= 4);
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Load, MultipleElements, element_0,
|
| + count_to_type(count)));
|
| +}
|
| +
|
| +void Assembler::vld1(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + NeonElementIndex element_index,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VLD1 (single element to one lane).
|
| + // Load one element from memory into one element of a register.
|
| + // Instruction details available in ARM DDI 0406B, A8-604.
|
| + // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) | Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Load, SingleElement, element_index, 0));
|
| +}
|
| +
|
| +void Assembler::vst1(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + const DwVfpRegister last,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VST1 (multiple single elements).
|
| + // Store elements to memory from one, two, three, or four registers,
|
| + // without interleaving.
|
| + // Instruction details available in ARM DDI 0406B, A8-768.
|
| + // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + ASSERT_LE(first.code(), last.code());
|
| + int count = last.code() - first.code() + 1;
|
| + ASSERT(count <= 4);
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Store, MultipleElements, element_0,
|
| + count_to_type(count)));
|
| +}
|
| +
|
| +void Assembler::vst1(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + NeonElementIndex element_index,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VST1 (single element from one lane).
|
| + // Store one element to memory from one element of a register.
|
| + // Instruction details available in ARM DDI 0406B, A8-770.
|
| + // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | size(11-10) | 00 (9-8) | index_align(7-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Store, SingleElement, element_index, 0));
|
| +}
|
| +
|
| +void Assembler::vld4(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + const DwVfpRegister last,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VLD4 (multiple 4-element structures).
|
| + // Load multiple 4-element structures from memory into four registers,
|
| + // with de-interleaving.
|
| + // Instruction details available in ARM DDI 0406B, A8-620.
|
| + // 1111(31-28) | 01000 (27-23) | D(22)| 10 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + // We don't support increment == 2, yet.
|
| + int type = 0;
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Load, MultipleElements, element_0, type));
|
| +}
|
| +
|
| +void Assembler::vld4(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + NeonElementIndex element_index,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VLD4 (single 4-element structure to one lane).
|
| + // Load one 4-element structure from memory into corresponding elements of
|
| + // four registers.
|
| + // Instruction details available in ARM DDI 0406B, A8-622.
|
| + // 1111(31-28) | 01001 (27-23) | D(22)| 10 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Load, SingleElement, element_index, 0));
|
| +}
|
| +
|
| +void Assembler::vst4(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + const DwVfpRegister last,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VST4 (multiple 4-element structures).
|
| + // Store multiple 4-element structures to memory from four registers,
|
| + // with interleaving.
|
| + // Instruction details available in ARM DDI 0406B, A8-780.
|
| + // 1111(31-28) | 01000 (27-23) | D(22)| 00 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | type(11-8) | size (7-6) | align(5-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + // We don't support increment == 2, yet.
|
| + int type = 0;
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Store, MultipleElements, element_0, type));
|
| +}
|
| +
|
| +void Assembler::vst4(int size,
|
| + const Register base,
|
| + const DwVfpRegister first,
|
| + NeonElementIndex element_index,
|
| + NeonWritebackType writeback,
|
| + int align_bytes) {
|
| + // VST4 (single 4-element structure from one lane).
|
| + // Store one 4-element structure to memory from corresponding elements
|
| + // of four registers.
|
| + // Instruction details available in ARM DDI 0406B, A8-782.
|
| + // 1111(31-28) | 01001 (27-23) | D(22)| 00 (21-20) |
|
| + // Rn(19-16) | Vd (15-12) | size(11-10) | 11 (9-8) | index_align(7-4) |
|
| + // Rm(3-0)
|
| + ASSERT(IsEnabled(NEON));
|
| + emit(neon_vector_op(size, base, first, writeback, align_bytes,
|
| + Store, SingleElement, element_index, 0));
|
| +}
|
| +
|
| // Pseudo instructions.
|
| void Assembler::nop(int type) {
|
| // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
|
|
|