Chromium Code Reviews| Index: src/arm/assembler-arm.cc |
| diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc |
| index 89c0a3b3cd936d111c4e354156d743c368684f7d..3aaf82ba3341d64bc71296d6bea59c34b9a87049 100644 |
| --- a/src/arm/assembler-arm.cc |
| +++ b/src/arm/assembler-arm.cc |
| @@ -49,6 +49,7 @@ bool CpuFeatures::initialized_ = false; |
| #endif |
| unsigned CpuFeatures::supported_ = 0; |
| unsigned CpuFeatures::found_by_runtime_probing_only_ = 0; |
| +unsigned CpuFeatures::cache_line_size_ = 64; |
| ExternalReference ExternalReference::cpu_features() { |
| @@ -124,6 +125,9 @@ void CpuFeatures::Probe() { |
| static_cast<uint64_t>(1) << VFP3 | |
| static_cast<uint64_t>(1) << ARMv7; |
| } |
| + if (FLAG_enable_neon) { |
| + supported_ |= 1u << NEON; |
| + } |
| // For the simulator=arm build, use ARMv7 when FLAG_enable_armv7 is enabled |
| if (FLAG_enable_armv7) { |
| supported_ |= static_cast<uint64_t>(1) << ARMv7; |
| @@ -156,6 +160,10 @@ void CpuFeatures::Probe() { |
| static_cast<uint64_t>(1) << ARMv7; |
| } |
| + if (!IsSupported(NEON) && FLAG_enable_neon && OS::ArmCpuHasFeature(NEON)) { |
| + found_by_runtime_probing_only_ |= 1u << NEON; |
| + } |
| + |
| if (!IsSupported(ARMv7) && FLAG_enable_armv7 && OS::ArmCpuHasFeature(ARMv7)) { |
| found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << ARMv7; |
| } |
| @@ -170,12 +178,18 @@ void CpuFeatures::Probe() { |
| static_cast<uint64_t>(1) << UNALIGNED_ACCESSES; |
| } |
| - if (OS::GetCpuImplementer() == QUALCOMM_IMPLEMENTER && |
| + CpuImplementer implementer = OS::GetCpuImplementer(); |
| + if (implementer == QUALCOMM_IMPLEMENTER && |
| FLAG_enable_movw_movt && OS::ArmCpuHasFeature(ARMv7)) { |
| found_by_runtime_probing_only_ |= |
| static_cast<uint64_t>(1) << MOVW_MOVT_IMMEDIATE_LOADS; |
| } |
| + CpuPart part = OS::GetCpuPart(implementer); |
| + if ((part == CORTEX_A9) || (part == CORTEX_A5)) { |
| + cache_line_size_ = 32; |
| + } |
| + |
| if (!IsSupported(VFP32DREGS) && FLAG_enable_32dregs |
| && OS::ArmCpuHasFeature(VFP32DREGS)) { |
| found_by_runtime_probing_only_ |= static_cast<uint64_t>(1) << VFP32DREGS; |
| @@ -246,11 +260,12 @@ void CpuFeatures::PrintTarget() { |
| void CpuFeatures::PrintFeatures() { |
| printf( |
| - "ARMv7=%d VFP3=%d VFP32DREGS=%d SUDIV=%d UNALIGNED_ACCESSES=%d " |
| + "ARMv7=%d VFP3=%d VFP32DREGS=%d NEON=%d SUDIV=%d UNALIGNED_ACCESSES=%d " |
| "MOVW_MOVT_IMMEDIATE_LOADS=%d", |
| CpuFeatures::IsSupported(ARMv7), |
| CpuFeatures::IsSupported(VFP3), |
| CpuFeatures::IsSupported(VFP32DREGS), |
| + CpuFeatures::IsSupported(NEON), |
| CpuFeatures::IsSupported(SUDIV), |
| CpuFeatures::IsSupported(UNALIGNED_ACCESSES), |
| CpuFeatures::IsSupported(MOVW_MOVT_IMMEDIATE_LOADS)); |
| @@ -376,6 +391,78 @@ MemOperand::MemOperand(Register rn, Register rm, |
| } |
| +NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align) { |
| + ASSERT((am == Offset) || (am == PostIndex)); |
| + rn_ = rn; |
| + rm_ = (am == Offset) ? pc : sp; |
| + switch (align) { |
|
ulan
2013/07/09 15:16:32
Extracting this switch into a function would avoid
vincent.belliard.fr
2013/07/10 15:30:38
Done.
|
| + case 0: |
| + align_ = 0; |
| + break; |
| + case 64: |
| + align_ = 1; |
| + break; |
| + case 128: |
| + align_ = 2; |
| + break; |
| + case 256: |
| + align_ = 3; |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + align_ = 0; |
| + break; |
| + } |
| +} |
| + |
| + |
| +NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align) { |
| + rn_ = rn; |
| + rm_ = rm; |
| + switch (align) { |
| + case 0: |
| + align_ = 0; |
| + break; |
| + case 64: |
| + align_ = 1; |
| + break; |
| + case 128: |
| + align_ = 2; |
| + break; |
| + case 256: |
| + align_ = 3; |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + align_ = 0; |
| + break; |
| + } |
| +} |
| + |
| + |
| +NeonListOperand::NeonListOperand(DoubleRegister base, int registers_count) { |
| + base_ = base; |
| + switch (registers_count) { |
| + case 1: |
| + type_ = nlt_1; |
| + break; |
| + case 2: |
| + type_ = nlt_2; |
| + break; |
| + case 3: |
| + type_ = nlt_3; |
| + break; |
| + case 4: |
| + type_ = nlt_4; |
| + break; |
| + default: |
| + UNREACHABLE(); |
| + type_ = nlt_1; |
| + break; |
| + } |
| +} |
| + |
| + |
| // ----------------------------------------------------------------------------- |
| // Specific instructions, constants, and masks. |
| @@ -1543,6 +1630,107 @@ void Assembler::bfi(Register dst, |
| } |
| +void Assembler::pkhbt(Register dst, |
| + Register src1, |
| + const Operand& src2, |
| + Condition cond ) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.125. |
| + // cond(31-28) | 01101000(27-20) | Rn(19-16) | |
| + // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0) |
| + ASSERT(!dst.is(pc)); |
| + ASSERT(!src1.is(pc)); |
| + ASSERT(!src2.rm().is(pc)); |
| + ASSERT(!src2.rm().is(no_reg)); |
| + ASSERT(src2.rs().is(no_reg)); |
| + ASSERT((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31)); |
| + ASSERT(src2.shift_op() == LSL); |
| + emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 | |
| + src2.shift_imm_*B7 | B4 | src2.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::pkhtb(Register dst, |
| + Register src1, |
| + const Operand& src2, |
| + Condition cond) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.125. |
| + // cond(31-28) | 01101000(27-20) | Rn(19-16) | |
| + // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0) |
| + ASSERT(!dst.is(pc)); |
| + ASSERT(!src1.is(pc)); |
| + ASSERT(!src2.rm().is(pc)); |
| + ASSERT(!src2.rm().is(no_reg)); |
| + ASSERT(src2.rs().is(no_reg)); |
| + ASSERT((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32)); |
| + ASSERT(src2.shift_op() == ASR); |
| + int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_; |
| + emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 | |
| + asr*B7 | B6 | B4 | src2.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::uxtb(Register dst, |
| + const Operand& src, |
| + Condition cond) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.274. |
| + // cond(31-28) | 01101110(27-20) | 1111(19-16) | |
| + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) |
| + ASSERT(!dst.is(pc)); |
| + ASSERT(!src.rm().is(pc)); |
| + ASSERT(!src.rm().is(no_reg)); |
| + ASSERT(src.rs().is(no_reg)); |
| + ASSERT((src.shift_imm_ == 0) || |
| + (src.shift_imm_ == 8) || |
| + (src.shift_imm_ == 16) || |
| + (src.shift_imm_ == 24)); |
| + ASSERT(src.shift_op() == ROR); |
| + emit(cond | 0x6E*B20 | 0xF*B16 | dst.code()*B12 | |
| + ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::uxtab(Register dst, |
| + Register src1, |
| + const Operand& src2, |
| + Condition cond) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.271. |
| + // cond(31-28) | 01101110(27-20) | Rn(19-16) | |
| + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) |
| + ASSERT(!dst.is(pc)); |
| + ASSERT(!src1.is(pc)); |
| + ASSERT(!src2.rm().is(pc)); |
| + ASSERT(!src2.rm().is(no_reg)); |
| + ASSERT(src2.rs().is(no_reg)); |
| + ASSERT((src2.shift_imm_ == 0) || |
| + (src2.shift_imm_ == 8) || |
| + (src2.shift_imm_ == 16) || |
| + (src2.shift_imm_ == 24)); |
| + ASSERT(src2.shift_op() == ROR); |
| + emit(cond | 0x6E*B20 | src1.code()*B16 | dst.code()*B12 | |
| + ((src2.shift_imm_ >> 1) &0xC)*B8 | 7*B4 | src2.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::uxtb16(Register dst, |
| + const Operand& src, |
| + Condition cond) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.275. |
| + // cond(31-28) | 01101100(27-20) | 1111(19-16) | |
| + // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0) |
| + ASSERT(!dst.is(pc)); |
| + ASSERT(!src.rm().is(pc)); |
| + ASSERT(!src.rm().is(no_reg)); |
| + ASSERT(src.rs().is(no_reg)); |
| + ASSERT((src.shift_imm_ == 0) || |
| + (src.shift_imm_ == 8) || |
| + (src.shift_imm_ == 16) || |
| + (src.shift_imm_ == 24)); |
| + ASSERT(src.shift_op() == ROR); |
| + emit(cond | 0x6C*B20 | 0xF*B16 | dst.code()*B12 | |
| + ((src.shift_imm_ >> 1)&0xC)*B8 | 7*B4 | src.rm().code()); |
| +} |
| + |
| + |
| // Status register access instructions. |
| void Assembler::mrs(Register dst, SRegister s, Condition cond) { |
| ASSERT(!dst.is(pc)); |
| @@ -1640,6 +1828,22 @@ void Assembler::strd(Register src1, Register src2, |
| addrmod3(cond | B7 | B6 | B5 | B4, src1, dst); |
| } |
| +// Preload instructions. |
| +void Assembler::pld(const MemOperand& address) { |
|
ulan
2013/07/09 15:16:32
Missing the description comment.
vincent.belliard.fr
2013/07/10 15:30:38
Done.
|
| + ASSERT(address.rm().is(no_reg)); |
| + ASSERT(address.am() == Offset); |
| + int U = B23; |
| + int offset = address.offset(); |
| + if (offset < 0) { |
| + offset = -offset; |
| + U = 0; |
| + } |
| + ASSERT(offset < 4096); |
| + emit(kSpecialCondition | B26 | B24 | U | B22 | B20 | address.rn().code()*B16 | |
| + 0xf*B12 | offset); |
| +} |
| + |
| + |
| // Load/Store multiple instructions. |
| void Assembler::ldm(BlockAddrMode am, |
| Register base, |
| @@ -2701,6 +2905,50 @@ void Assembler::vsqrt(const DwVfpRegister dst, |
| } |
| +// Support for NEON. |
| + |
| +void Assembler::vld1(NeonSize size, |
| + const NeonListOperand& dst, |
| + const NeonMemOperand& src) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.320. |
| + // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) | |
| + // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
| + ASSERT(CpuFeatures::IsSupported(NEON)); |
| + int vd, d; |
| + dst.base().split_code(&vd, &d); |
| + emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 | |
| + dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::vst1(NeonSize size, |
| + const NeonListOperand& src, |
| + const NeonMemOperand& dst) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.404. |
| + // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) | |
| + // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0) |
| + ASSERT(CpuFeatures::IsSupported(NEON)); |
| + int vd, d; |
| + src.base().split_code(&vd, &d); |
| + emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 | |
| + size*B6 | dst.align()*B4 | dst.rm().code()); |
| +} |
| + |
| + |
| +void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) { |
| + // Instruction details available in ARM DDI 0406C.b, A8.8.346. |
| + // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) | |
| + // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0) |
| + ASSERT(CpuFeatures::IsSupported(NEON)); |
| + int vd, d; |
| + dst.split_code(&vd, &d); |
| + int vm, m; |
| + src.split_code(&vm, &m); |
| + emit(0xFU*B28 | B25 | (dt & NeonDataTypeUMask) | B23 | d*B22 | |
| + (dt & NeonDataTypeSizeMask)*B19 | vd*B12 | 0xA*B8 | m*B5 | B4 | vm); |
| +} |
| + |
| + |
| // Pseudo instructions. |
| void Assembler::nop(int type) { |
| // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes |