Chromium Code Reviews| Index: src/arm64/assembler-arm64.h |
| diff --git a/src/arm64/assembler-arm64.h b/src/arm64/assembler-arm64.h |
| index a55f8138f2a7b0eb0f20dd408e1cc3e7f7537954..e0ce47f58c6a8918067f0c8b15545dd53dd6742b 100644 |
| --- a/src/arm64/assembler-arm64.h |
| +++ b/src/arm64/assembler-arm64.h |
| @@ -65,12 +65,10 @@ namespace internal { |
| static const int kRegListSizeInBits = sizeof(RegList) * kBitsPerByte; |
| - |
| -// Some CPURegister methods can return Register and FPRegister types, so we |
| +// Some CPURegister methods can return Register and VRegister types, so we |
| // need to declare them in advance. |
| struct Register; |
| -struct FPRegister; |
| - |
| +struct VRegister; |
| struct CPURegister { |
| enum Code { |
| @@ -86,12 +84,13 @@ struct CPURegister { |
| // which are always zero-initialized before any constructors are called. |
| kInvalid = 0, |
| kRegister, |
| - kFPRegister, |
| + kVRegister, |
| kNoRegister |
| }; |
| - static CPURegister Create(int code, int size, RegisterType type) { |
| - CPURegister r = {code, size, type}; |
| + static CPURegister Create(int reg_code, int reg_size, RegisterType reg_type, |
| + int lane_count = 1) { |
| + CPURegister r = {reg_code, reg_size, reg_type, lane_count}; |
| return r; |
| } |
| @@ -100,12 +99,15 @@ struct CPURegister { |
| RegList Bit() const; |
| int SizeInBits() const; |
| int SizeInBytes() const; |
| + bool Is8Bits() const; |
| + bool Is16Bits() const; |
| bool Is32Bits() const; |
| bool Is64Bits() const; |
| + bool Is128Bits() const; |
| bool IsValid() const; |
| bool IsValidOrNone() const; |
| bool IsValidRegister() const; |
| - bool IsValidFPRegister() const; |
| + bool IsValidVRegister() const; |
| bool IsNone() const; |
| bool Is(const CPURegister& other) const; |
| bool Aliases(const CPURegister& other) const; |
| @@ -114,12 +116,34 @@ struct CPURegister { |
| bool IsSP() const; |
| bool IsRegister() const; |
| - bool IsFPRegister() const; |
| + bool IsVRegister() const; |
| + |
| + bool IsFPRegister() const { return IsS() || IsD(); } |
| + |
| + bool IsW() const { return IsValidRegister() && Is32Bits(); } |
| + bool IsX() const { return IsValidRegister() && Is64Bits(); } |
| + |
| + // These assertions ensure that the size and type of the register are as |
| + // described. They do not consider the number of lanes that make up a vector. |
| + // So, for example, Is8B() implies IsD(), and Is1D() implies IsD, but IsD() |
| + // does not imply Is1D() or Is8B(). |
| + // Check the number of lanes, ie. the format of the vector, using methods such |
| + // as Is8B(), Is1D(), etc. in the VRegister class. |
| + bool IsV() const { return IsVRegister(); } |
| + bool IsB() const { return IsV() && Is8Bits(); } |
| + bool IsH() const { return IsV() && Is16Bits(); } |
| + bool IsS() const { return IsV() && Is32Bits(); } |
| + bool IsD() const { return IsV() && Is64Bits(); } |
| + bool IsQ() const { return IsV() && Is128Bits(); } |
| Register X() const; |
| Register W() const; |
| - FPRegister D() const; |
| - FPRegister S() const; |
| + VRegister V() const; |
| + VRegister B() const; |
| + VRegister H() const; |
| + VRegister D() const; |
| + VRegister S() const; |
| + VRegister Q() const; |
| bool IsSameSizeAndType(const CPURegister& other) const; |
| @@ -130,6 +154,7 @@ struct CPURegister { |
| int reg_code; |
| int reg_size; |
| RegisterType reg_type; |
| + int lane_count; |
| }; |
| @@ -148,13 +173,16 @@ struct Register : public CPURegister { |
| reg_code = r.reg_code; |
| reg_size = r.reg_size; |
| reg_type = r.reg_type; |
| + lane_count = r.lane_count; |
| DCHECK(IsValidOrNone()); |
| + DCHECK_EQ(r.lane_count, 1); |
| } |
| Register(const Register& r) { // NOLINT(runtime/explicit) |
| reg_code = r.reg_code; |
| reg_size = r.reg_size; |
| reg_type = r.reg_type; |
| + lane_count = r.lane_count; |
| DCHECK(IsValidOrNone()); |
| } |
| @@ -199,7 +227,7 @@ struct Register : public CPURegister { |
| static const bool kSimpleFPAliasing = true; |
| -struct FPRegister : public CPURegister { |
| +struct VRegister : public CPURegister { |
| enum Code { |
| #define REGISTER_CODE(R) kCode_##R, |
| DOUBLE_REGISTERS(REGISTER_CODE) |
| @@ -208,64 +236,148 @@ struct FPRegister : public CPURegister { |
| kCode_no_reg = -1 |
| }; |
| - static FPRegister Create(int code, int size) { |
| - return FPRegister( |
| - CPURegister::Create(code, size, CPURegister::kFPRegister)); |
| + static VRegister Create(int reg_code, int reg_size, int lane_count = 1) { |
| + DCHECK(base::bits::IsPowerOfTwo32(lane_count) && (lane_count <= 16)); |
| + VRegister v(CPURegister::Create(reg_code, reg_size, CPURegister::kVRegister, |
| + lane_count)); |
| + DCHECK(v.IsValidVRegister()); |
| + return v; |
| + } |
| + |
| + static VRegister Create(int reg_code, VectorFormat format) { |
| + int reg_size = RegisterSizeInBitsFromFormat(format); |
| + int reg_count = IsVectorFormat(format) ? LaneCountFromFormat(format) : 1; |
| + return VRegister::Create(reg_code, reg_size, reg_count); |
| } |
| - FPRegister() { |
| + VRegister() { |
| reg_code = 0; |
| reg_size = 0; |
| reg_type = CPURegister::kNoRegister; |
| + lane_count = 1; |
| } |
| - explicit FPRegister(const CPURegister& r) { |
| + explicit VRegister(const CPURegister& r) { |
| reg_code = r.reg_code; |
| reg_size = r.reg_size; |
| reg_type = r.reg_type; |
| + lane_count = r.lane_count; |
| DCHECK(IsValidOrNone()); |
| } |
| - FPRegister(const FPRegister& r) { // NOLINT(runtime/explicit) |
| + VRegister(const VRegister& r) { // NOLINT(runtime/explicit) |
| reg_code = r.reg_code; |
| reg_size = r.reg_size; |
| reg_type = r.reg_type; |
| + lane_count = r.lane_count; |
| DCHECK(IsValidOrNone()); |
| } |
| bool IsValid() const { |
| - DCHECK(IsFPRegister() || IsNone()); |
| - return IsValidFPRegister(); |
| + DCHECK(IsVRegister() || IsNone()); |
| + return IsValidVRegister(); |
| + } |
| + |
| + static VRegister BRegFromCode(unsigned code); |
| + static VRegister HRegFromCode(unsigned code); |
| + static VRegister SRegFromCode(unsigned code); |
| + static VRegister DRegFromCode(unsigned code); |
| + static VRegister QRegFromCode(unsigned code); |
| + static VRegister VRegFromCode(unsigned code); |
| + |
| + VRegister V8B() const { |
| + return VRegister::Create(code(), kDRegSizeInBits, 8); |
| + } |
| + VRegister V16B() const { |
| + return VRegister::Create(code(), kQRegSizeInBits, 16); |
| + } |
| + VRegister V4H() const { |
| + return VRegister::Create(code(), kDRegSizeInBits, 4); |
| + } |
| + VRegister V8H() const { |
| + return VRegister::Create(code(), kQRegSizeInBits, 8); |
| + } |
| + VRegister V2S() const { |
| + return VRegister::Create(code(), kDRegSizeInBits, 2); |
| + } |
| + VRegister V4S() const { |
| + return VRegister::Create(code(), kQRegSizeInBits, 4); |
| + } |
| + VRegister V2D() const { |
| + return VRegister::Create(code(), kQRegSizeInBits, 2); |
| } |
| + VRegister V1D() const { |
| + return VRegister::Create(code(), kDRegSizeInBits, 1); |
| + } |
| + |
| + bool Is8B() const { return (Is64Bits() && (lane_count == 8)); } |
| + bool Is16B() const { return (Is128Bits() && (lane_count == 16)); } |
| + bool Is4H() const { return (Is64Bits() && (lane_count == 4)); } |
| + bool Is8H() const { return (Is128Bits() && (lane_count == 8)); } |
| + bool Is2S() const { return (Is64Bits() && (lane_count == 2)); } |
| + bool Is4S() const { return (Is128Bits() && (lane_count == 4)); } |
| + bool Is1D() const { return (Is64Bits() && (lane_count == 1)); } |
| + bool Is2D() const { return (Is128Bits() && (lane_count == 2)); } |
| + |
| + // For consistency, we assert the number of lanes of these scalar registers, |
| + // even though there are no vectors of equivalent total size with which they |
| + // could alias. |
| + bool Is1B() const { |
| + DCHECK(!(Is8Bits() && IsVector())); |
| + return Is8Bits(); |
| + } |
| + bool Is1H() const { |
| + DCHECK(!(Is16Bits() && IsVector())); |
| + return Is16Bits(); |
| + } |
| + bool Is1S() const { |
| + DCHECK(!(Is32Bits() && IsVector())); |
| + return Is32Bits(); |
| + } |
| + |
| + bool IsLaneSizeB() const { return LaneSizeInBits() == kBRegSizeInBits; } |
| + bool IsLaneSizeH() const { return LaneSizeInBits() == kHRegSizeInBits; } |
| + bool IsLaneSizeS() const { return LaneSizeInBits() == kSRegSizeInBits; } |
| + bool IsLaneSizeD() const { return LaneSizeInBits() == kDRegSizeInBits; } |
| - static FPRegister SRegFromCode(unsigned code); |
| - static FPRegister DRegFromCode(unsigned code); |
| + bool IsScalar() const { return lane_count == 1; } |
| + bool IsVector() const { return lane_count > 1; } |
| + |
| + bool IsSameFormat(const VRegister& other) const { |
| + return (reg_size == other.reg_size) && (lane_count == other.lane_count); |
| + } |
| + |
| + int LaneCount() const { return lane_count; } |
| + |
| + unsigned LaneSizeInBytes() const { return SizeInBytes() / lane_count; } |
| + |
| + unsigned LaneSizeInBits() const { return LaneSizeInBytes() * 8; } |
| // Start of V8 compatibility section --------------------- |
| - static const int kMaxNumRegisters = kNumberOfFPRegisters; |
| + static const int kMaxNumRegisters = kNumberOfVRegisters; |
| STATIC_ASSERT(kMaxNumRegisters == Code::kAfterLast); |
| - // Crankshaft can use all the FP registers except: |
| + // Crankshaft can use all the V registers except: |
| // - d15 which is used to keep the 0 double value |
| // - d30 which is used in crankshaft as a double scratch register |
| // - d31 which is used in the MacroAssembler as a double scratch register |
| - static FPRegister from_code(int code) { |
| + static VRegister from_code(int code) { |
| // Always return a D register. |
| - return FPRegister::Create(code, kDRegSizeInBits); |
| + return VRegister::Create(code, kDRegSizeInBits); |
| } |
| // End of V8 compatibility section ----------------------- |
| }; |
| - |
| -STATIC_ASSERT(sizeof(CPURegister) == sizeof(Register)); |
| -STATIC_ASSERT(sizeof(CPURegister) == sizeof(FPRegister)); |
| - |
| +static_assert(sizeof(CPURegister) == sizeof(Register), |
| + "CPURegister must be same size as Register"); |
| +static_assert(sizeof(CPURegister) == sizeof(VRegister), |
| + "CPURegister must be same size as VRegister"); |
| #if defined(ARM64_DEFINE_REG_STATICS) |
| -#define INITIALIZE_REGISTER(register_class, name, code, size, type) \ |
| - const CPURegister init_##register_class##_##name = {code, size, type}; \ |
| - const register_class& name = *reinterpret_cast<const register_class*>( \ |
| - &init_##register_class##_##name) |
| +#define INITIALIZE_REGISTER(register_class, name, code, size, type) \ |
| + const CPURegister init_##register_class##_##name = {code, size, type, 1}; \ |
| + const register_class& name = *reinterpret_cast<const register_class*>( \ |
| + &init_##register_class##_##name) |
| #define ALIAS_REGISTER(register_class, alias, name) \ |
| const register_class& alias = *reinterpret_cast<const register_class*>( \ |
| &init_##register_class##_##name) |
| @@ -277,10 +389,10 @@ STATIC_ASSERT(sizeof(CPURegister) == sizeof(FPRegister)); |
| #endif // defined(ARM64_DEFINE_REG_STATICS) |
| // No*Reg is used to indicate an unused argument, or an error case. Note that |
| -// these all compare equal (using the Is() method). The Register and FPRegister |
| +// these all compare equal (using the Is() method). The Register and VRegister |
| // variants are provided for convenience. |
| INITIALIZE_REGISTER(Register, NoReg, 0, 0, CPURegister::kNoRegister); |
| -INITIALIZE_REGISTER(FPRegister, NoFPReg, 0, 0, CPURegister::kNoRegister); |
| +INITIALIZE_REGISTER(VRegister, NoVReg, 0, 0, CPURegister::kNoRegister); |
| INITIALIZE_REGISTER(CPURegister, NoCPUReg, 0, 0, CPURegister::kNoRegister); |
| // v8 compatibility. |
| @@ -299,17 +411,26 @@ INITIALIZE_REGISTER(Register, wcsp, kSPRegInternalCode, kWRegSizeInBits, |
| INITIALIZE_REGISTER(Register, csp, kSPRegInternalCode, kXRegSizeInBits, |
| CPURegister::kRegister); |
| -#define DEFINE_FPREGISTERS(N) \ |
| - INITIALIZE_REGISTER(FPRegister, s##N, N, \ |
| - kSRegSizeInBits, CPURegister::kFPRegister); \ |
| - INITIALIZE_REGISTER(FPRegister, d##N, N, \ |
| - kDRegSizeInBits, CPURegister::kFPRegister); |
| -GENERAL_REGISTER_CODE_LIST(DEFINE_FPREGISTERS) |
| -#undef DEFINE_FPREGISTERS |
| +#define DEFINE_VREGISTERS(N) \ |
| + INITIALIZE_REGISTER(VRegister, b##N, N, kBRegSizeInBits, \ |
| + CPURegister::kVRegister); \ |
| + INITIALIZE_REGISTER(VRegister, h##N, N, kHRegSizeInBits, \ |
| + CPURegister::kVRegister); \ |
| + INITIALIZE_REGISTER(VRegister, s##N, N, kSRegSizeInBits, \ |
| + CPURegister::kVRegister); \ |
| + INITIALIZE_REGISTER(VRegister, d##N, N, kDRegSizeInBits, \ |
| + CPURegister::kVRegister); \ |
| + INITIALIZE_REGISTER(VRegister, q##N, N, kQRegSizeInBits, \ |
| + CPURegister::kVRegister); \ |
| + INITIALIZE_REGISTER(VRegister, v##N, N, kQRegSizeInBits, \ |
| + CPURegister::kVRegister); |
| +GENERAL_REGISTER_CODE_LIST(DEFINE_VREGISTERS) |
| +#undef DEFINE_VREGISTERS |
| #undef INITIALIZE_REGISTER |
| // Registers aliases. |
| +ALIAS_REGISTER(VRegister, v8_, v8); // Avoid conflicts with namespace v8. |
| ALIAS_REGISTER(Register, ip0, x16); |
| ALIAS_REGISTER(Register, ip1, x17); |
| ALIAS_REGISTER(Register, wip0, w16); |
| @@ -332,13 +453,13 @@ ALIAS_REGISTER(Register, xzr, x31); |
| ALIAS_REGISTER(Register, wzr, w31); |
| // Keeps the 0 double value. |
| -ALIAS_REGISTER(FPRegister, fp_zero, d15); |
| +ALIAS_REGISTER(VRegister, fp_zero, d15); |
| // Crankshaft double scratch register. |
| -ALIAS_REGISTER(FPRegister, crankshaft_fp_scratch, d29); |
| +ALIAS_REGISTER(VRegister, crankshaft_fp_scratch, d29); |
| // MacroAssembler double scratch registers. |
| -ALIAS_REGISTER(FPRegister, fp_scratch, d30); |
| -ALIAS_REGISTER(FPRegister, fp_scratch1, d30); |
| -ALIAS_REGISTER(FPRegister, fp_scratch2, d31); |
| +ALIAS_REGISTER(VRegister, fp_scratch, d30); |
| +ALIAS_REGISTER(VRegister, fp_scratch1, d30); |
| +ALIAS_REGISTER(VRegister, fp_scratch2, d31); |
| #undef ALIAS_REGISTER |
| @@ -373,11 +494,24 @@ bool AreSameSizeAndType(const CPURegister& reg1, |
| const CPURegister& reg7 = NoCPUReg, |
| const CPURegister& reg8 = NoCPUReg); |
| -typedef FPRegister FloatRegister; |
| -typedef FPRegister DoubleRegister; |
| - |
| -// TODO(arm64) Define SIMD registers. |
| -typedef FPRegister Simd128Register; |
| +// AreSameFormat returns true if all of the specified VRegisters have the same |
| +// vector format. Arguments set to NoReg are ignored, as are any subsequent |
| +// arguments. At least one argument (reg1) must be valid (not NoVReg). |
| +bool AreSameFormat(const VRegister& reg1, const VRegister& reg2, |
| + const VRegister& reg3 = NoVReg, |
| + const VRegister& reg4 = NoVReg); |
| + |
| +// AreConsecutive returns true if all of the specified VRegisters are |
| +// consecutive in the register file. Arguments set to NoReg are ignored, as are |
|
bbudge
2017/01/31 01:41:31
Current behavior is to return 'true' after the fir
martyn.capewell
2017/02/03 11:01:31
Not sure what you're asking - are you saying the c
bbudge
2017/02/08 01:39:11
Comment is OK.
|
| +// any subsequent arguments. At least one argument (reg1) must be valid |
| +// (not NoVReg). |
| +bool AreConsecutive(const VRegister& reg1, const VRegister& reg2, |
| + const VRegister& reg3 = NoVReg, |
| + const VRegister& reg4 = NoVReg); |
| + |
| +typedef VRegister FloatRegister; |
| +typedef VRegister DoubleRegister; |
| +typedef VRegister Simd128Register; |
| // ----------------------------------------------------------------------------- |
| // Lists of registers. |
| @@ -401,10 +535,10 @@ class CPURegList { |
| CPURegList(CPURegister::RegisterType type, int size, int first_reg, |
| int last_reg) |
| : size_(size), type_(type) { |
| - DCHECK(((type == CPURegister::kRegister) && |
| - (last_reg < kNumberOfRegisters)) || |
| - ((type == CPURegister::kFPRegister) && |
| - (last_reg < kNumberOfFPRegisters))); |
| + DCHECK( |
| + ((type == CPURegister::kRegister) && (last_reg < kNumberOfRegisters)) || |
| + ((type == CPURegister::kVRegister) && |
| + (last_reg < kNumberOfVRegisters))); |
| DCHECK(last_reg >= first_reg); |
| list_ = (1UL << (last_reg + 1)) - 1; |
| list_ &= ~((1UL << first_reg) - 1); |
| @@ -457,11 +591,13 @@ class CPURegList { |
| // AAPCS64 callee-saved registers. |
| static CPURegList GetCalleeSaved(int size = kXRegSizeInBits); |
| - static CPURegList GetCalleeSavedFP(int size = kDRegSizeInBits); |
| + static CPURegList GetCalleeSavedV(int size = kDRegSizeInBits); |
| // AAPCS64 caller-saved registers. Note that this includes lr. |
| + // TODO(all): Determine how we handle d8-d15 being callee-saved, but the top |
| + // 64-bits being caller-saved. |
| static CPURegList GetCallerSaved(int size = kXRegSizeInBits); |
| - static CPURegList GetCallerSavedFP(int size = kDRegSizeInBits); |
| + static CPURegList GetCallerSavedV(int size = kDRegSizeInBits); |
| // Registers saved as safepoints. |
| static CPURegList GetSafepointSavedRegisters(); |
| @@ -512,12 +648,12 @@ class CPURegList { |
| bool IsValid() const { |
| const RegList kValidRegisters = 0x8000000ffffffff; |
| - const RegList kValidFPRegisters = 0x0000000ffffffff; |
| + const RegList kValidVRegisters = 0x0000000ffffffff; |
| switch (type_) { |
| case CPURegister::kRegister: |
| return (list_ & kValidRegisters) == list_; |
| - case CPURegister::kFPRegister: |
| - return (list_ & kValidFPRegisters) == list_; |
| + case CPURegister::kVRegister: |
| + return (list_ & kValidVRegisters) == list_; |
| case CPURegister::kNoRegister: |
| return list_ == 0; |
| default: |
| @@ -530,12 +666,11 @@ class CPURegList { |
| // AAPCS64 callee-saved registers. |
| #define kCalleeSaved CPURegList::GetCalleeSaved() |
| -#define kCalleeSavedFP CPURegList::GetCalleeSavedFP() |
| - |
| +#define kCalleeSavedV CPURegList::GetCalleeSavedV() |
| // AAPCS64 caller-saved registers. Note that this includes lr. |
| #define kCallerSaved CPURegList::GetCallerSaved() |
| -#define kCallerSavedFP CPURegList::GetCallerSavedFP() |
| +#define kCallerSavedV CPURegList::GetCallerSavedV() |
| // ----------------------------------------------------------------------------- |
| // Immediates. |
| @@ -1099,9 +1234,101 @@ class Assembler : public AssemblerBase { |
| const Register& rn, |
| const Operand& operand); |
| + // Bitwise and. |
| + void and_(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bit clear immediate. |
| + void bic(const VRegister& vd, const int imm8, const int left_shift = 0); |
| + |
| + // Bit clear. |
| + void bic(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bitwise insert if false. |
| + void bif(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bitwise insert if true. |
| + void bit(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bitwise select. |
| + void bsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Polynomial multiply. |
| + void pmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Vector move immediate. |
| + void movi(const VRegister& vd, const uint64_t imm, Shift shift = LSL, |
| + const int shift_amount = 0); |
| + |
| + // Bitwise not. |
| + void mvn(const VRegister& vd, const VRegister& vn); |
| + |
| + // Vector move inverted immediate. |
| + void mvni(const VRegister& vd, const int imm8, Shift shift = LSL, |
| + const int shift_amount = 0); |
| + |
| + // Signed saturating accumulate of unsigned value. |
| + void suqadd(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned saturating accumulate of signed value. |
| + void usqadd(const VRegister& vd, const VRegister& vn); |
| + |
| + // Absolute value. |
| + void abs(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating absolute value. |
| + void sqabs(const VRegister& vd, const VRegister& vn); |
| + |
| + // Negate. |
| + void neg(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating negate. |
| + void sqneg(const VRegister& vd, const VRegister& vn); |
| + |
| + // Bitwise not. |
| + void not_(const VRegister& vd, const VRegister& vn); |
| + |
| + // Extract narrow. |
| + void xtn(const VRegister& vd, const VRegister& vn); |
| + |
| + // Extract narrow (second part). |
| + void xtn2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating extract narrow. |
| + void sqxtn(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating extract narrow (second part). |
| + void sqxtn2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned saturating extract narrow. |
| + void uqxtn(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned saturating extract narrow (second part). |
| + void uqxtn2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating extract unsigned narrow. |
| + void sqxtun(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed saturating extract unsigned narrow (second part). |
| + void sqxtun2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Move register to register. |
| + void mov(const VRegister& vd, const VRegister& vn); |
| + |
| + // Bitwise orn. |
| + void orn(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bitwise eor. |
| + void eor(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| // Bitwise or (A | B). |
| void orr(const Register& rd, const Register& rn, const Operand& operand); |
| + // Bitwise or. |
| + void orr(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Bitwise or immediate. |
| + void orr(const VRegister& vd, const int imm8, const int left_shift = 0); |
| + |
| // Bitwise nor (A | ~B). |
| void orn(const Register& rd, const Register& rn, const Operand& operand); |
| @@ -1508,147 +1735,1080 @@ class Assembler : public AssemblerBase { |
| mov(Register::XRegFromCode(n), Register::XRegFromCode(n)); |
| } |
| + // Add. |
| + void add(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned halving add. |
| + void uhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Subtract. |
| + void sub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed halving add. |
| + void shadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Multiply by scalar element. |
| + void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Multiply-add by scalar element. |
| + void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Multiply-subtract by scalar element. |
| + void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply-add by scalar element. |
| + void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply-add by scalar element (second part). |
| + void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply-add by scalar element. |
| + void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply-add by scalar element (second part). |
| + void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply-sub by scalar element. |
| + void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply-sub by scalar element (second part). |
| + void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply-sub by scalar element. |
| + void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply-sub by scalar element (second part). |
| + void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply by scalar element. |
| + void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed long multiply by scalar element (second part). |
| + void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply by scalar element. |
| + void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply by scalar element (second part). |
| + void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Add narrow returning high half. |
| + void addhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Add narrow returning high half (second part). |
| + void addhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating double long multiply by element. |
| + void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating double long multiply by element (second part). |
| + void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating doubling long multiply-add by element. |
| + void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating doubling long multiply-add by element (second part). |
| + void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating doubling long multiply-sub by element. |
| + void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating doubling long multiply-sub by element (second part). |
| + void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Compare bitwise to zero. |
| + void cmeq(const VRegister& vd, const VRegister& vn, int value); |
| + |
| + // Compare signed greater than or equal to zero. |
| + void cmge(const VRegister& vd, const VRegister& vn, int value); |
| + |
| + // Compare signed greater than zero. |
| + void cmgt(const VRegister& vd, const VRegister& vn, int value); |
| + |
| + // Compare signed less than or equal to zero. |
| + void cmle(const VRegister& vd, const VRegister& vn, int value); |
| + |
| + // Compare signed less than zero. |
| + void cmlt(const VRegister& vd, const VRegister& vn, int value); |
| + |
| + // Unsigned rounding halving add. |
| + void urhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare equal. |
| + void cmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare signed greater than or equal. |
| + void cmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare signed greater than. |
| + void cmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare unsigned higher. |
| + void cmhi(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare unsigned higher or same. |
| + void cmhs(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Compare bitwise test bits nonzero. |
| + void cmtst(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed shift left by register. |
| + void sshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned shift left by register. |
| + void ushl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply-subtract. |
| + void sqdmlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply-subtract (second part). |
| + void sqdmlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply. |
| + void sqdmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply (second part). |
| + void sqdmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling multiply returning high half. |
| + void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating rounding doubling multiply returning high half. |
| + void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling multiply element returning high half. |
| + void sqdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Signed saturating rounding doubling multiply element returning high half. |
| + void sqrdmulh(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // Unsigned long multiply long. |
| + void umull(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned long multiply (second part). |
| + void umull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Rounding add narrow returning high half. |
| + void raddhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Subtract narrow returning high half. |
| + void subhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Subtract narrow returning high half (second part). |
| + void subhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Rounding add narrow returning high half (second part). |
| + void raddhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Rounding subtract narrow returning high half. |
| + void rsubhn(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Rounding subtract narrow returning high half (second part). |
| + void rsubhn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating shift left by register. |
| + void sqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned saturating shift left by register. |
| + void uqshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed rounding shift left by register. |
| + void srshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned rounding shift left by register. |
| + void urshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating rounding shift left by register. |
| + void sqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned saturating rounding shift left by register. |
| + void uqrshl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference. |
| + void sabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference and accumulate. |
| + void uaba(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Shift left by immediate and insert. |
| + void sli(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Shift right by immediate and insert. |
| + void sri(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed maximum. |
| + void smax(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed pairwise maximum. |
| + void smaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Add across vector. |
| + void addv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed add long across vector. |
| + void saddlv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned add long across vector. |
| + void uaddlv(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP maximum number across vector. |
| + void fmaxnmv(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP maximum across vector. |
| + void fmaxv(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP minimum number across vector. |
| + void fminnmv(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP minimum across vector. |
| + void fminv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed maximum across vector. |
| + void smaxv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed minimum. |
| + void smin(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed minimum pairwise. |
| + void sminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed minimum across vector. |
| + void sminv(const VRegister& vd, const VRegister& vn); |
| + |
| + // One-element structure store from one register. |
| + void st1(const VRegister& vt, const MemOperand& src); |
| + |
| + // One-element structure store from two registers. |
| + void st1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); |
| + |
| + // One-element structure store from three registers. |
| + void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const MemOperand& src); |
| + |
| + // One-element structure store from four registers. |
| + void st1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, const MemOperand& src); |
| + |
| + // One-element single structure store from one lane. |
| + void st1(const VRegister& vt, int lane, const MemOperand& src); |
| + |
| + // Two-element structure store from two registers. |
| + void st2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); |
| + |
| + // Two-element single structure store from two lanes. |
| + void st2(const VRegister& vt, const VRegister& vt2, int lane, |
| + const MemOperand& src); |
| + |
| + // Three-element structure store from three registers. |
| + void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const MemOperand& src); |
| + |
| + // Three-element single structure store from three lanes. |
| + void st3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + int lane, const MemOperand& src); |
| + |
| + // Four-element structure store from four registers. |
| + void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, const MemOperand& src); |
| + |
| + // Four-element single structure store from four lanes. |
| + void st4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, int lane, const MemOperand& src); |
| + |
| + // Unsigned add long. |
| + void uaddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned add long (second part). |
| + void uaddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned add wide. |
| + void uaddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned add wide (second part). |
| + void uaddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed add long. |
| + void saddl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed add long (second part). |
| + void saddl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed add wide. |
| + void saddw(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed add wide (second part). |
| + void saddw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned subtract long. |
| + void usubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned subtract long (second part). |
| + void usubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned subtract wide. |
| + void usubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed subtract long. |
| + void ssubl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed subtract long (second part). |
| + void ssubl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed integer subtract wide. |
| + void ssubw(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed integer subtract wide (second part). |
| + void ssubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned subtract wide (second part). |
| + void usubw2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned maximum. |
| + void umax(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned pairwise maximum. |
| + void umaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned maximum across vector. |
| + void umaxv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned minimum. |
| + void umin(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned pairwise minimum. |
| + void uminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned minimum across vector. |
| + void uminv(const VRegister& vd, const VRegister& vn); |
| + |
| + // Transpose vectors (primary). |
| + void trn1(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Transpose vectors (secondary). |
| + void trn2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unzip vectors (primary). |
| + void uzp1(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unzip vectors (secondary). |
| + void uzp2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Zip vectors (primary). |
| + void zip1(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Zip vectors (secondary). |
| + void zip2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed shift right by immediate. |
| + void sshr(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned shift right by immediate. |
| + void ushr(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed rounding shift right by immediate. |
| + void srshr(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned rounding shift right by immediate. |
| + void urshr(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed shift right by immediate and accumulate. |
| + void ssra(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned shift right by immediate and accumulate. |
| + void usra(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed rounding shift right by immediate and accumulate. |
| + void srsra(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned rounding shift right by immediate and accumulate. |
| + void ursra(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Shift right narrow by immediate. |
| + void shrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Shift right narrow by immediate (second part). |
| + void shrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Rounding shift right narrow by immediate. |
| + void rshrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Rounding shift right narrow by immediate (second part). |
| + void rshrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned saturating shift right narrow by immediate. |
| + void uqshrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned saturating shift right narrow by immediate (second part). |
| + void uqshrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned saturating rounding shift right narrow by immediate. |
| + void uqrshrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned saturating rounding shift right narrow by immediate (second part). |
| + void uqrshrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift right narrow by immediate. |
| + void sqshrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift right narrow by immediate (second part). |
| + void sqshrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating rounded shift right narrow by immediate. |
| + void sqrshrn(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating rounded shift right narrow by immediate (second part). |
| + void sqrshrn2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift right unsigned narrow by immediate. |
| + void sqshrun(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift right unsigned narrow by immediate (second part). |
| + void sqshrun2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed sat rounded shift right unsigned narrow by immediate. |
| + void sqrshrun(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed sat rounded shift right unsigned narrow by immediate (second part). |
| + void sqrshrun2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // FP reciprocal step. |
| + void frecps(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP reciprocal estimate. |
| + void frecpe(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP reciprocal square root estimate. |
| + void frsqrte(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP reciprocal square root step. |
| + void frsqrts(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference and accumulate long. |
| + void sabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference and accumulate long (second part). |
| + void sabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference and accumulate long. |
| + void uabal(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference and accumulate long (second part). |
| + void uabal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference long. |
| + void sabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference long (second part). |
| + void sabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference long. |
| + void uabdl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference long (second part). |
| + void uabdl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Polynomial multiply long. |
| + void pmull(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Polynomial multiply long (second part). |
| + void pmull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply-add. |
| + void smlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply-add (second part). |
| + void smlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned long multiply-add. |
| + void umlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned long multiply-add (second part). |
| + void umlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply-sub. |
| + void smlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply-sub (second part). |
| + void smlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned long multiply-sub. |
| + void umlsl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned long multiply-sub (second part). |
| + void umlsl2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply. |
| + void smull(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed long multiply (second part). |
| + void smull2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply-add. |
| + void sqdmlal(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating doubling long multiply-add (second part). |
| + void sqdmlal2(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned absolute difference. |
| + void uabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed absolute difference and accumulate. |
| + void saba(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| // FP instructions. |
| // Move immediate to FP register. |
| - void fmov(FPRegister fd, double imm); |
| - void fmov(FPRegister fd, float imm); |
| + void fmov(const VRegister& fd, double imm); |
| + void fmov(const VRegister& fd, float imm); |
| // Move FP register to register. |
| - void fmov(Register rd, FPRegister fn); |
| + void fmov(const Register& rd, const VRegister& fn); |
| // Move register to FP register. |
| - void fmov(FPRegister fd, Register rn); |
| + void fmov(const VRegister& fd, const Register& rn); |
| // Move FP register to FP register. |
| - void fmov(FPRegister fd, FPRegister fn); |
| + void fmov(const VRegister& fd, const VRegister& fn); |
| + |
| + // Move 64-bit register to top half of 128-bit FP register. |
| + void fmov(const VRegister& vd, int index, const Register& rn); |
| + |
| + // Move top half of 128-bit FP register to 64-bit register. |
| + void fmov(const Register& rd, const VRegister& vn, int index); |
| // FP add. |
| - void fadd(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP subtract. |
| - void fsub(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP multiply. |
| - void fmul(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| - |
| - // FP fused multiply and add. |
| - void fmadd(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| - const FPRegister& fa); |
| - |
| - // FP fused multiply and subtract. |
| - void fmsub(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| - const FPRegister& fa); |
| - |
| - // FP fused multiply, add and negate. |
| - void fnmadd(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| - const FPRegister& fa); |
| - |
| - // FP fused multiply, subtract and negate. |
| - void fnmsub(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| - const FPRegister& fa); |
| + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP compare equal to zero. |
| + void fcmeq(const VRegister& vd, const VRegister& vn, double imm); |
| + |
| + // FP greater than zero. |
| + void fcmgt(const VRegister& vd, const VRegister& vn, double imm); |
| + |
| + // FP greater than or equal to zero. |
| + void fcmge(const VRegister& vd, const VRegister& vn, double imm); |
| + |
| + // FP less than or equal to zero. |
| + void fcmle(const VRegister& vd, const VRegister& vn, double imm); |
| + |
| + // FP less than to zero. |
| + void fcmlt(const VRegister& vd, const VRegister& vn, double imm); |
| + |
| + // FP absolute difference. |
| + void fabd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise add vector. |
| + void faddp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise add scalar. |
| + void faddp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP pairwise maximum scalar. |
| + void fmaxp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP pairwise maximum number scalar. |
| + void fmaxnmp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP pairwise minimum number scalar. |
| + void fminnmp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP vector multiply accumulate. |
| + void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP vector multiply subtract. |
| + void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP vector multiply extended. |
| + void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP absolute greater than or equal. |
| + void facge(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP absolute greater than. |
| + void facgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP multiply by element. |
| + void fmul(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // FP fused multiply-add to accumulator by element. |
| + void fmla(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // FP fused multiply-sub from accumulator by element. |
| + void fmls(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // FP multiply extended by element. |
| + void fmulx(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int vm_index); |
| + |
| + // FP compare equal. |
| + void fcmeq(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP greater than. |
| + void fcmgt(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP greater than or equal. |
| + void fcmge(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise maximum vector. |
| + void fmaxp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise minimum vector. |
| + void fminp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise minimum scalar. |
| + void fminp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP pairwise maximum number vector. |
| + void fmaxnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP pairwise minimum number vector. |
| + void fminnmp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP fused multiply-add. |
| + void fmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + const VRegister& va); |
| + |
| + // FP fused multiply-subtract. |
| + void fmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + const VRegister& va); |
| + |
| + // FP fused multiply-add and negate. |
| + void fnmadd(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + const VRegister& va); |
| + |
| + // FP fused multiply-subtract and negate. |
| + void fnmsub(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + const VRegister& va); |
| + |
| + // FP multiply-negate scalar. |
| + void fnmul(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // FP reciprocal exponent scalar. |
| + void frecpx(const VRegister& vd, const VRegister& vn); |
| // FP divide. |
| - void fdiv(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fdiv(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP maximum. |
| - void fmax(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fmax(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP minimum. |
| - void fmin(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fmin(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP maximum. |
| - void fmaxnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fmaxnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP minimum. |
| - void fminnm(const FPRegister& fd, const FPRegister& fn, const FPRegister& fm); |
| + void fminnm(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| // FP absolute. |
| - void fabs(const FPRegister& fd, const FPRegister& fn); |
| + void fabs(const VRegister& vd, const VRegister& vn); |
| // FP negate. |
| - void fneg(const FPRegister& fd, const FPRegister& fn); |
| + void fneg(const VRegister& vd, const VRegister& vn); |
| // FP square root. |
| - void fsqrt(const FPRegister& fd, const FPRegister& fn); |
| + void fsqrt(const VRegister& vd, const VRegister& vn); |
| - // FP round to integer (nearest with ties to away). |
| - void frinta(const FPRegister& fd, const FPRegister& fn); |
| + // FP round to integer nearest with ties to away. |
| + void frinta(const VRegister& vd, const VRegister& vn); |
| - // FP round to integer (toward minus infinity). |
| - void frintm(const FPRegister& fd, const FPRegister& fn); |
| + // FP round to integer, implicit rounding. |
| + void frinti(const VRegister& vd, const VRegister& vn); |
| - // FP round to integer (nearest with ties to even). |
| - void frintn(const FPRegister& fd, const FPRegister& fn); |
| + // FP round to integer toward minus infinity. |
| + void frintm(const VRegister& vd, const VRegister& vn); |
| - // FP round to integer (towards plus infinity). |
| - void frintp(const FPRegister& fd, const FPRegister& fn); |
| + // FP round to integer nearest with ties to even. |
| + void frintn(const VRegister& vd, const VRegister& vn); |
| - // FP round to integer (towards zero.) |
| - void frintz(const FPRegister& fd, const FPRegister& fn); |
| + // FP round to integer towards plus infinity. |
| + void frintp(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP round to integer, exact, implicit rounding. |
| + void frintx(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP round to integer towards zero. |
| + void frintz(const VRegister& vd, const VRegister& vn); |
| // FP compare registers. |
| - void fcmp(const FPRegister& fn, const FPRegister& fm); |
| + void fcmp(const VRegister& vn, const VRegister& vm); |
| // FP compare immediate. |
| - void fcmp(const FPRegister& fn, double value); |
| + void fcmp(const VRegister& vn, double value); |
| // FP conditional compare. |
| - void fccmp(const FPRegister& fn, |
| - const FPRegister& fm, |
| - StatusFlags nzcv, |
| + void fccmp(const VRegister& vn, const VRegister& vm, StatusFlags nzcv, |
| Condition cond); |
| // FP conditional select. |
| - void fcsel(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| + void fcsel(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| Condition cond); |
| - // Common FP Convert function |
| - void FPConvertToInt(const Register& rd, |
| - const FPRegister& fn, |
| - FPIntegerConvertOp op); |
| + // Common FP Convert functions. |
| + void NEONFPConvertToInt(const Register& rd, const VRegister& vn, Instr op); |
| + void NEONFPConvertToInt(const VRegister& vd, const VRegister& vn, Instr op); |
| + |
| + // FP convert between precisions. |
| + void fcvt(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to higher precision. |
| + void fcvtl(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to higher precision (second part). |
| + void fcvtl2(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to lower precision. |
| + void fcvtn(const VRegister& vd, const VRegister& vn); |
| - // FP convert between single and double precision. |
| - void fcvt(const FPRegister& fd, const FPRegister& fn); |
| + // FP convert to lower prevision (second part). |
| + void fcvtn2(const VRegister& vd, const VRegister& vn); |
| - // Convert FP to unsigned integer (nearest with ties to away). |
| - void fcvtau(const Register& rd, const FPRegister& fn); |
| + // FP convert to lower precision, rounding to odd. |
| + void fcvtxn(const VRegister& vd, const VRegister& vn); |
| - // Convert FP to signed integer (nearest with ties to away). |
| - void fcvtas(const Register& rd, const FPRegister& fn); |
| + // FP convert to lower precision, rounding to odd (second part). |
| + void fcvtxn2(const VRegister& vd, const VRegister& vn); |
| - // Convert FP to unsigned integer (round towards -infinity). |
| - void fcvtmu(const Register& rd, const FPRegister& fn); |
| + // FP convert to signed integer, nearest with ties to away. |
| + void fcvtas(const Register& rd, const VRegister& vn); |
| - // Convert FP to signed integer (round towards -infinity). |
| - void fcvtms(const Register& rd, const FPRegister& fn); |
| + // FP convert to unsigned integer, nearest with ties to away. |
| + void fcvtau(const Register& rd, const VRegister& vn); |
| - // Convert FP to unsigned integer (nearest with ties to even). |
| - void fcvtnu(const Register& rd, const FPRegister& fn); |
| + // FP convert to signed integer, nearest with ties to away. |
| + void fcvtas(const VRegister& vd, const VRegister& vn); |
| - // Convert FP to signed integer (nearest with ties to even). |
| - void fcvtns(const Register& rd, const FPRegister& fn); |
| + // FP convert to unsigned integer, nearest with ties to away. |
| + void fcvtau(const VRegister& vd, const VRegister& vn); |
| - // Convert FP to unsigned integer (round towards zero). |
| - void fcvtzu(const Register& rd, const FPRegister& fn); |
| + // FP convert to signed integer, round towards -infinity. |
| + void fcvtms(const Register& rd, const VRegister& vn); |
| - // Convert FP to signed integer (rounf towards zero). |
| - void fcvtzs(const Register& rd, const FPRegister& fn); |
| + // FP convert to unsigned integer, round towards -infinity. |
| + void fcvtmu(const Register& rd, const VRegister& vn); |
| + |
| + // FP convert to signed integer, round towards -infinity. |
| + void fcvtms(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to unsigned integer, round towards -infinity. |
| + void fcvtmu(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to signed integer, nearest with ties to even. |
| + void fcvtns(const Register& rd, const VRegister& vn); |
| + |
| + // FP convert to unsigned integer, nearest with ties to even. |
| + void fcvtnu(const Register& rd, const VRegister& vn); |
| + |
| + // FP convert to signed integer, nearest with ties to even. |
| + void fcvtns(const VRegister& rd, const VRegister& vn); |
| + |
| + // FP convert to unsigned integer, nearest with ties to even. |
| + void fcvtnu(const VRegister& rd, const VRegister& vn); |
| + |
| + // FP convert to signed integer or fixed-point, round towards zero. |
| + void fcvtzs(const Register& rd, const VRegister& vn, int fbits = 0); |
| + |
| + // FP convert to unsigned integer or fixed-point, round towards zero. |
| + void fcvtzu(const Register& rd, const VRegister& vn, int fbits = 0); |
| + |
| + // FP convert to signed integer or fixed-point, round towards zero. |
| + void fcvtzs(const VRegister& vd, const VRegister& vn, int fbits = 0); |
| + |
| + // FP convert to unsigned integer or fixed-point, round towards zero. |
| + void fcvtzu(const VRegister& vd, const VRegister& vn, int fbits = 0); |
| + |
| + // FP convert to signed integer, round towards +infinity. |
| + void fcvtps(const Register& rd, const VRegister& vn); |
| + |
| + // FP convert to unsigned integer, round towards +infinity. |
| + void fcvtpu(const Register& rd, const VRegister& vn); |
| + |
| + // FP convert to signed integer, round towards +infinity. |
| + void fcvtps(const VRegister& vd, const VRegister& vn); |
| + |
| + // FP convert to unsigned integer, round towards +infinity. |
| + void fcvtpu(const VRegister& vd, const VRegister& vn); |
| // Convert signed integer or fixed point to FP. |
| - void scvtf(const FPRegister& fd, const Register& rn, unsigned fbits = 0); |
| + void scvtf(const VRegister& fd, const Register& rn, int fbits = 0); |
| // Convert unsigned integer or fixed point to FP. |
| - void ucvtf(const FPRegister& fd, const Register& rn, unsigned fbits = 0); |
| + void ucvtf(const VRegister& fd, const Register& rn, int fbits = 0); |
| + |
| + // Convert signed integer or fixed-point to FP. |
| + void scvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); |
| + |
| + // Convert unsigned integer or fixed-point to FP. |
| + void ucvtf(const VRegister& fd, const VRegister& vn, int fbits = 0); |
| + |
| + // Extract vector from pair of vectors. |
| + void ext(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + int index); |
| + |
| + // Duplicate vector element to vector or scalar. |
| + void dup(const VRegister& vd, const VRegister& vn, int vn_index); |
| + |
| + // Duplicate general-purpose register to vector. |
| + void dup(const VRegister& vd, const Register& rn); |
| + |
| + // Insert vector element from general-purpose register. |
| + void ins(const VRegister& vd, int vd_index, const Register& rn); |
| + |
| + // Move general-purpose register to a vector element. |
| + void mov(const VRegister& vd, int vd_index, const Register& rn); |
| + |
| + // Unsigned move vector element to general-purpose register. |
| + void umov(const Register& rd, const VRegister& vn, int vn_index); |
| + |
| + // Move vector element to general-purpose register. |
| + void mov(const Register& rd, const VRegister& vn, int vn_index); |
| + |
| + // Move vector element to scalar. |
| + void mov(const VRegister& vd, const VRegister& vn, int vn_index); |
| + |
| + // Insert vector element from another vector element. |
| + void ins(const VRegister& vd, int vd_index, const VRegister& vn, |
| + int vn_index); |
| + |
| + // Move vector element to another vector element. |
| + void mov(const VRegister& vd, int vd_index, const VRegister& vn, |
| + int vn_index); |
| + |
| + // Signed move vector element to general-purpose register. |
| + void smov(const Register& rd, const VRegister& vn, int vn_index); |
| + |
| + // One-element structure load to one register. |
| + void ld1(const VRegister& vt, const MemOperand& src); |
| + |
| + // One-element structure load to two registers. |
| + void ld1(const VRegister& vt, const VRegister& vt2, const MemOperand& src); |
| + |
| + // One-element structure load to three registers. |
| + void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const MemOperand& src); |
| + |
| + // One-element structure load to four registers. |
| + void ld1(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, const MemOperand& src); |
| + |
| + // One-element single structure load to one lane. |
| + void ld1(const VRegister& vt, int lane, const MemOperand& src); |
| + |
| + // One-element single structure load to all lanes. |
| + void ld1r(const VRegister& vt, const MemOperand& src); |
| + |
| + // Two-element structure load. |
| + void ld2(const VRegister& vt, const VRegister& vt2, const MemOperand& src); |
| + |
| + // Two-element single structure load to one lane. |
| + void ld2(const VRegister& vt, const VRegister& vt2, int lane, |
| + const MemOperand& src); |
| + |
| + // Two-element single structure load to all lanes. |
| + void ld2r(const VRegister& vt, const VRegister& vt2, const MemOperand& src); |
| + |
| + // Three-element structure load. |
| + void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const MemOperand& src); |
| + |
| + // Three-element single structure load to one lane. |
| + void ld3(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + int lane, const MemOperand& src); |
| + |
| + // Three-element single structure load to all lanes. |
| + void ld3r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const MemOperand& src); |
| + |
| + // Four-element structure load. |
| + void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, const MemOperand& src); |
| + |
| + // Four-element single structure load to one lane. |
| + void ld4(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, int lane, const MemOperand& src); |
| + |
| + // Four-element single structure load to all lanes. |
| + void ld4r(const VRegister& vt, const VRegister& vt2, const VRegister& vt3, |
| + const VRegister& vt4, const MemOperand& src); |
| + |
| + // Count leading sign bits. |
| + void cls(const VRegister& vd, const VRegister& vn); |
| + |
| + // Count leading zero bits (vector). |
| + void clz(const VRegister& vd, const VRegister& vn); |
| + |
| + // Population count per byte. |
| + void cnt(const VRegister& vd, const VRegister& vn); |
| + |
| + // Reverse bit order. |
| + void rbit(const VRegister& vd, const VRegister& vn); |
| + |
| + // Reverse elements in 16-bit halfwords. |
| + void rev16(const VRegister& vd, const VRegister& vn); |
| + |
| + // Reverse elements in 32-bit words. |
| + void rev32(const VRegister& vd, const VRegister& vn); |
| + |
| + // Reverse elements in 64-bit doublewords. |
| + void rev64(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned reciprocal square root estimate. |
| + void ursqrte(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned reciprocal estimate. |
| + void urecpe(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed pairwise long add and accumulate. |
| + void sadalp(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed pairwise long add. |
| + void saddlp(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned pairwise long add. |
| + void uaddlp(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned pairwise long add and accumulate. |
| + void uadalp(const VRegister& vd, const VRegister& vn); |
| + |
| + // Shift left by immediate. |
| + void shl(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift left by immediate. |
| + void sqshl(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed saturating shift left unsigned by immediate. |
| + void sqshlu(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned saturating shift left by immediate. |
| + void uqshl(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed shift left long by immediate. |
| + void sshll(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed shift left long by immediate (second part). |
| + void sshll2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Signed extend long. |
| + void sxtl(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed extend long (second part). |
| + void sxtl2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned shift left long by immediate. |
| + void ushll(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned shift left long by immediate (second part). |
| + void ushll2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Shift left long by element size. |
| + void shll(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Shift left long by element size (second part). |
| + void shll2(const VRegister& vd, const VRegister& vn, int shift); |
| + |
| + // Unsigned extend long. |
| + void uxtl(const VRegister& vd, const VRegister& vn); |
| + |
| + // Unsigned extend long (second part). |
| + void uxtl2(const VRegister& vd, const VRegister& vn); |
| + |
| + // Signed rounding halving add. |
| + void srhadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned halving sub. |
| + void uhsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed halving sub. |
| + void shsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned saturating add. |
| + void uqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating add. |
| + void sqadd(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Unsigned saturating subtract. |
| + void uqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Signed saturating subtract. |
| + void sqsub(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Add pairwise. |
| + void addp(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Add pair of elements scalar. |
| + void addp(const VRegister& vd, const VRegister& vn); |
| + |
| + // Multiply-add to accumulator. |
| + void mla(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Multiply-subtract to accumulator. |
| + void mls(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Multiply. |
| + void mul(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Table lookup from one register. |
| + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Table lookup from two registers. |
| + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vm); |
| + |
| + // Table lookup from three registers. |
| + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vn3, const VRegister& vm); |
| + |
| + // Table lookup from four registers. |
| + void tbl(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vn3, const VRegister& vn4, const VRegister& vm); |
| + |
| + // Table lookup extension from one register. |
| + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vm); |
| + |
| + // Table lookup extension from two registers. |
| + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vm); |
| + |
| + // Table lookup extension from three registers. |
| + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vn3, const VRegister& vm); |
| + |
| + // Table lookup extension from four registers. |
| + void tbx(const VRegister& vd, const VRegister& vn, const VRegister& vn2, |
| + const VRegister& vn3, const VRegister& vn4, const VRegister& vm); |
| // Instruction functions used only for test, debug, and patching. |
| // Emit raw instructions in the instruction stream. |
| @@ -1698,37 +2858,43 @@ class Assembler : public AssemblerBase { |
| // Register encoding. |
| static Instr Rd(CPURegister rd) { |
| - DCHECK(rd.code() != kSPRegInternalCode); |
| + DCHECK_NE(rd.code(), kSPRegInternalCode); |
| return rd.code() << Rd_offset; |
| } |
| static Instr Rn(CPURegister rn) { |
| - DCHECK(rn.code() != kSPRegInternalCode); |
| + DCHECK_NE(rn.code(), kSPRegInternalCode); |
| return rn.code() << Rn_offset; |
| } |
| static Instr Rm(CPURegister rm) { |
| - DCHECK(rm.code() != kSPRegInternalCode); |
| + DCHECK_NE(rm.code(), kSPRegInternalCode); |
| return rm.code() << Rm_offset; |
| } |
| + static Instr RmNot31(CPURegister rm) { |
| + DCHECK_NE(rm.code(), kSPRegInternalCode); |
| + DCHECK(!rm.IsZero()); |
| + return Rm(rm); |
| + } |
| + |
| static Instr Ra(CPURegister ra) { |
| - DCHECK(ra.code() != kSPRegInternalCode); |
| + DCHECK_NE(ra.code(), kSPRegInternalCode); |
| return ra.code() << Ra_offset; |
| } |
| static Instr Rt(CPURegister rt) { |
| - DCHECK(rt.code() != kSPRegInternalCode); |
| + DCHECK_NE(rt.code(), kSPRegInternalCode); |
| return rt.code() << Rt_offset; |
| } |
| static Instr Rt2(CPURegister rt2) { |
| - DCHECK(rt2.code() != kSPRegInternalCode); |
| + DCHECK_NE(rt2.code(), kSPRegInternalCode); |
| return rt2.code() << Rt2_offset; |
| } |
| static Instr Rs(CPURegister rs) { |
| - DCHECK(rs.code() != kSPRegInternalCode); |
| + DCHECK_NE(rs.code(), kSPRegInternalCode); |
| return rs.code() << Rs_offset; |
| } |
| @@ -1784,17 +2950,179 @@ class Assembler : public AssemblerBase { |
| // MemOperand offset encoding. |
| inline static Instr ImmLSUnsigned(int imm12); |
| inline static Instr ImmLS(int imm9); |
| - inline static Instr ImmLSPair(int imm7, LSDataSize size); |
| + inline static Instr ImmLSPair(int imm7, unsigned size); |
| inline static Instr ImmShiftLS(unsigned shift_amount); |
| inline static Instr ImmException(int imm16); |
| inline static Instr ImmSystemRegister(int imm15); |
| inline static Instr ImmHint(int imm7); |
| inline static Instr ImmBarrierDomain(int imm2); |
| inline static Instr ImmBarrierType(int imm2); |
| - inline static LSDataSize CalcLSDataSize(LoadStoreOp op); |
| + inline static unsigned CalcLSDataSize(LoadStoreOp op); |
| + |
| + // Instruction bits for vector format in data processing operations. |
| + static Instr VFormat(VRegister vd) { |
| + if (vd.Is64Bits()) { |
| + switch (vd.LaneCount()) { |
| + case 2: |
| + return NEON_2S; |
| + case 4: |
| + return NEON_4H; |
| + case 8: |
| + return NEON_8B; |
| + default: |
| + UNREACHABLE(); |
| + return 0xffffffff; // Undefined instruction. |
| + } |
| + } else { |
| + DCHECK(vd.Is128Bits()); |
| + switch (vd.LaneCount()) { |
| + case 2: |
| + return NEON_2D; |
| + case 4: |
| + return NEON_4S; |
| + case 8: |
| + return NEON_8H; |
| + case 16: |
| + return NEON_16B; |
| + default: |
| + UNREACHABLE(); |
| + return 0xffffffff; // Undefined instruction. |
| + } |
| + } |
| + } |
| + |
| + // Instruction bits for vector format in floating point data processing |
| + // operations. |
| + static Instr FPFormat(VRegister vd) { |
| + if (vd.LaneCount() == 1) { |
| + // Floating point scalar formats. |
| + DCHECK(vd.Is32Bits() || vd.Is64Bits()); |
| + return vd.Is64Bits() ? FP64 : FP32; |
| + } |
| + |
| + // Two lane floating point vector formats. |
| + if (vd.LaneCount() == 2) { |
| + DCHECK(vd.Is64Bits() || vd.Is128Bits()); |
| + return vd.Is128Bits() ? NEON_FP_2D : NEON_FP_2S; |
| + } |
| + |
| + // Four lane floating point vector format. |
| + DCHECK((vd.LaneCount() == 4) && vd.Is128Bits()); |
| + return NEON_FP_4S; |
| + } |
| + |
| + // Instruction bits for vector format in load and store operations. |
| + static Instr LSVFormat(VRegister vd) { |
| + if (vd.Is64Bits()) { |
| + switch (vd.LaneCount()) { |
| + case 1: |
| + return LS_NEON_1D; |
| + case 2: |
| + return LS_NEON_2S; |
| + case 4: |
| + return LS_NEON_4H; |
| + case 8: |
| + return LS_NEON_8B; |
| + default: |
| + UNREACHABLE(); |
| + return 0xffffffff; // Undefined instruction. |
|
bbudge
2017/01/31 01:41:31
could we define a kUndefinedInstruction?
martyn.capewell
2017/02/03 11:01:31
Done.
|
| + } |
| + } else { |
| + DCHECK(vd.Is128Bits()); |
| + switch (vd.LaneCount()) { |
| + case 2: |
| + return LS_NEON_2D; |
| + case 4: |
| + return LS_NEON_4S; |
| + case 8: |
| + return LS_NEON_8H; |
| + case 16: |
| + return LS_NEON_16B; |
| + default: |
| + UNREACHABLE(); |
| + return 0xffffffff; // Undefined instruction. |
| + } |
| + } |
| + } |
| + |
| + // Instruction bits for scalar format in data processing operations. |
| + static Instr SFormat(VRegister vd) { |
| + DCHECK(vd.IsScalar()); |
| + switch (vd.SizeInBytes()) { |
| + case 1: |
| + return NEON_B; |
| + case 2: |
| + return NEON_H; |
| + case 4: |
| + return NEON_S; |
| + case 8: |
| + return NEON_D; |
| + default: |
| + UNREACHABLE(); |
| + return 0xffffffff; // Undefined instruction. |
| + } |
| + } |
| + |
| + static Instr ImmNEONHLM(int index, int num_bits) { |
| + int h, l, m; |
| + if (num_bits == 3) { |
| + DCHECK(is_uint3(index)); |
| + h = (index >> 2) & 1; |
| + l = (index >> 1) & 1; |
| + m = (index >> 0) & 1; |
| + } else if (num_bits == 2) { |
| + DCHECK(is_uint2(index)); |
| + h = (index >> 1) & 1; |
| + l = (index >> 0) & 1; |
| + m = 0; |
| + } else { |
| + DCHECK(is_uint1(index) && (num_bits == 1)); |
| + h = (index >> 0) & 1; |
| + l = 0; |
| + m = 0; |
| + } |
| + return (h << NEONH_offset) | (l << NEONL_offset) | (m << NEONM_offset); |
| + } |
| + |
| + static Instr ImmNEONExt(int imm4) { |
| + DCHECK(is_uint4(imm4)); |
| + return imm4 << ImmNEONExt_offset; |
| + } |
| + |
| + static Instr ImmNEON5(Instr format, int index) { |
| + DCHECK(is_uint4(index)); |
| + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); |
| + int imm5 = (index << (s + 1)) | (1 << s); |
| + return imm5 << ImmNEON5_offset; |
| + } |
| + |
| + static Instr ImmNEON4(Instr format, int index) { |
| + DCHECK(is_uint4(index)); |
| + int s = LaneSizeInBytesLog2FromFormat(static_cast<VectorFormat>(format)); |
| + int imm4 = index << s; |
| + return imm4 << ImmNEON4_offset; |
| + } |
| + |
| + static Instr ImmNEONabcdefgh(int imm8) { |
| + DCHECK(is_uint8(imm8)); |
| + Instr instr; |
| + instr = ((imm8 >> 5) & 7) << ImmNEONabc_offset; |
| + instr |= (imm8 & 0x1f) << ImmNEONdefgh_offset; |
| + return instr; |
| + } |
| + |
| + static Instr NEONCmode(int cmode) { |
| + DCHECK(is_uint4(cmode)); |
| + return cmode << NEONCmode_offset; |
| + } |
| + |
| + static Instr NEONModImmOp(int op) { |
| + DCHECK(is_uint1(op)); |
| + return op << NEONModImmOp_offset; |
| + } |
| static bool IsImmLSUnscaled(int64_t offset); |
| - static bool IsImmLSScaled(int64_t offset, LSDataSize size); |
| + static bool IsImmLSScaled(int64_t offset, unsigned size); |
| static bool IsImmLLiteral(int64_t offset); |
| // Move immediates encoding. |
| @@ -1802,12 +3130,12 @@ class Assembler : public AssemblerBase { |
| inline static Instr ShiftMoveWide(int shift); |
| // FP Immediates. |
| - static Instr ImmFP32(float imm); |
| - static Instr ImmFP64(double imm); |
| + static Instr ImmFP(double imm); |
| + static Instr ImmNEONFP(double imm); |
| inline static Instr FPScale(unsigned scale); |
| // FP register type. |
| - inline static Instr FPType(FPRegister fd); |
| + inline static Instr FPType(VRegister fd); |
| // Class for scoping postponing the constant pool generation. |
| class BlockConstPoolScope { |
| @@ -1881,10 +3209,22 @@ class Assembler : public AssemblerBase { |
| void LoadStore(const CPURegister& rt, |
| const MemOperand& addr, |
| LoadStoreOp op); |
| - |
| void LoadStorePair(const CPURegister& rt, const CPURegister& rt2, |
| const MemOperand& addr, LoadStorePairOp op); |
| - static bool IsImmLSPair(int64_t offset, LSDataSize size); |
| + void LoadStoreStruct(const VRegister& vt, const MemOperand& addr, |
| + NEONLoadStoreMultiStructOp op); |
| + void LoadStoreStruct1(const VRegister& vt, int reg_count, |
| + const MemOperand& addr); |
| + void LoadStoreStructSingle(const VRegister& vt, uint32_t lane, |
| + const MemOperand& addr, |
| + NEONLoadStoreSingleStructOp op); |
| + void LoadStoreStructSingleAllLanes(const VRegister& vt, |
| + const MemOperand& addr, |
| + NEONLoadStoreSingleStructOp op); |
| + void LoadStoreStructVerify(const VRegister& vt, const MemOperand& addr, |
| + Instr op); |
| + |
| + static bool IsImmLSPair(int64_t offset, unsigned size); |
| void Logical(const Register& rd, |
| const Register& rn, |
| @@ -1927,6 +3267,9 @@ class Assembler : public AssemblerBase { |
| FlagsUpdate S, |
| AddSubOp op); |
| + void NEONTable(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + NEONTableOp op); |
|
bbudge
2017/01/31 01:41:31
Could this be private instead of protected?
martyn.capewell
2017/02/03 11:01:31
Done.
|
| + |
| static bool IsImmFP32(float imm); |
| static bool IsImmFP64(double imm); |
| @@ -1949,6 +3292,8 @@ class Assembler : public AssemblerBase { |
| Instruction* label_veneer = NULL); |
| private: |
| + static uint32_t FPToImm8(double imm); |
| + |
| // Instruction helpers. |
| void MoveWide(const Register& rd, |
| uint64_t imm, |
| @@ -1977,18 +3322,64 @@ class Assembler : public AssemblerBase { |
| const Register& rm, |
| const Register& ra, |
| DataProcessing3SourceOp op); |
| - void FPDataProcessing1Source(const FPRegister& fd, |
| - const FPRegister& fn, |
| + void FPDataProcessing1Source(const VRegister& fd, const VRegister& fn, |
| FPDataProcessing1SourceOp op); |
| - void FPDataProcessing2Source(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| + void FPDataProcessing2Source(const VRegister& fd, const VRegister& fn, |
| + const VRegister& fm, |
| FPDataProcessing2SourceOp op); |
| - void FPDataProcessing3Source(const FPRegister& fd, |
| - const FPRegister& fn, |
| - const FPRegister& fm, |
| - const FPRegister& fa, |
| + void FPDataProcessing3Source(const VRegister& fd, const VRegister& fn, |
| + const VRegister& fm, const VRegister& fa, |
| FPDataProcessing3SourceOp op); |
| + void NEONAcrossLanesL(const VRegister& vd, const VRegister& vn, |
| + NEONAcrossLanesOp op); |
| + void NEONAcrossLanes(const VRegister& vd, const VRegister& vn, |
| + NEONAcrossLanesOp op); |
| + void NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8, |
| + const int left_shift, |
| + NEONModifiedImmediateOp op); |
| + void NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8, |
| + const int shift_amount, |
| + NEONModifiedImmediateOp op); |
| + void NEON3Same(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + NEON3SameOp vop); |
| + void NEONFP3Same(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, Instr op); |
| + void NEON3DifferentL(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, NEON3DifferentOp vop); |
| + void NEON3DifferentW(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, NEON3DifferentOp vop); |
| + void NEON3DifferentHN(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, NEON3DifferentOp vop); |
| + void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, |
| + NEON2RegMiscOp vop, double value = 0.0); |
| + void NEON2RegMisc(const VRegister& vd, const VRegister& vn, |
| + NEON2RegMiscOp vop, int value = 0); |
| + void NEONFP2RegMisc(const VRegister& vd, const VRegister& vn, Instr op); |
| + void NEONAddlp(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp op); |
| + void NEONPerm(const VRegister& vd, const VRegister& vn, const VRegister& vm, |
| + NEONPermOp op); |
| + void NEONFPByElement(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, int vm_index, |
| + NEONByIndexedElementOp op); |
| + void NEONByElement(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, int vm_index, |
| + NEONByIndexedElementOp op); |
| + void NEONByElementL(const VRegister& vd, const VRegister& vn, |
| + const VRegister& vm, int vm_index, |
| + NEONByIndexedElementOp op); |
| + void NEONShiftImmediate(const VRegister& vd, const VRegister& vn, |
| + NEONShiftImmediateOp op, int immh_immb); |
| + void NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn, |
| + int shift, NEONShiftImmediateOp op); |
| + void NEONShiftRightImmediate(const VRegister& vd, const VRegister& vn, |
| + int shift, NEONShiftImmediateOp op); |
| + void NEONShiftImmediateL(const VRegister& vd, const VRegister& vn, int shift, |
| + NEONShiftImmediateOp op); |
| + void NEONShiftImmediateN(const VRegister& vd, const VRegister& vn, int shift, |
| + NEONShiftImmediateOp op); |
| + void NEONXtn(const VRegister& vd, const VRegister& vn, NEON2RegMiscOp vop); |
| + |
| + Instr LoadStoreStructAddrModeField(const MemOperand& addr); |
| // Label helpers. |