| Index: gcc/gcc/config/i386/i386.c
|
| diff --git a/gcc/gcc/config/i386/i386.c b/gcc/gcc/config/i386/i386.c
|
| index 947ed5cadd860e67704741bfe532d38c1ad2bf32..070e13aa7a9d1b63c203f651f40a575739644886 100644
|
| --- a/gcc/gcc/config/i386/i386.c
|
| +++ b/gcc/gcc/config/i386/i386.c
|
| @@ -1,6 +1,6 @@
|
| /* Subroutines used for code generation on IA-32.
|
| - Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
| - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
| + Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
| + 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
|
| Free Software Foundation, Inc.
|
|
|
| This file is part of GCC.
|
| @@ -35,7 +35,6 @@ along with GCC; see the file COPYING3. If not see
|
| #include "insn-codes.h"
|
| #include "insn-attr.h"
|
| #include "flags.h"
|
| -#include "c-common.h"
|
| #include "except.h"
|
| #include "function.h"
|
| #include "recog.h"
|
| @@ -54,8 +53,9 @@ along with GCC; see the file COPYING3. If not see
|
| #include "tm-constrs.h"
|
| #include "params.h"
|
| #include "cselib.h"
|
| +#include "debug.h"
|
| +#include "dwarf2out.h"
|
|
|
| -static int x86_builtin_vectorization_cost (bool);
|
| static rtx legitimize_dllimport_symbol (rtx, bool);
|
|
|
| #ifndef CHECK_STACK_LIMIT
|
| @@ -1036,6 +1036,79 @@ struct processor_costs core2_cost = {
|
| 1, /* cond_not_taken_branch_cost. */
|
| };
|
|
|
| +static const
|
| +struct processor_costs atom_cost = {
|
| + COSTS_N_INSNS (1), /* cost of an add instruction */
|
| + COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
|
| + COSTS_N_INSNS (1), /* variable shift costs */
|
| + COSTS_N_INSNS (1), /* constant shift costs */
|
| + {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
|
| + COSTS_N_INSNS (4), /* HI */
|
| + COSTS_N_INSNS (3), /* SI */
|
| + COSTS_N_INSNS (4), /* DI */
|
| + COSTS_N_INSNS (2)}, /* other */
|
| + 0, /* cost of multiply per each bit set */
|
| + {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
|
| + COSTS_N_INSNS (26), /* HI */
|
| + COSTS_N_INSNS (42), /* SI */
|
| + COSTS_N_INSNS (74), /* DI */
|
| + COSTS_N_INSNS (74)}, /* other */
|
| + COSTS_N_INSNS (1), /* cost of movsx */
|
| + COSTS_N_INSNS (1), /* cost of movzx */
|
| + 8, /* "large" insn */
|
| + 17, /* MOVE_RATIO */
|
| + 2, /* cost for loading QImode using movzbl */
|
| + {4, 4, 4}, /* cost of loading integer registers
|
| + in QImode, HImode and SImode.
|
| + Relative to reg-reg move (2). */
|
| + {4, 4, 4}, /* cost of storing integer registers */
|
| + 4, /* cost of reg,reg fld/fst */
|
| + {12, 12, 12}, /* cost of loading fp registers
|
| + in SFmode, DFmode and XFmode */
|
| + {6, 6, 8}, /* cost of storing fp registers
|
| + in SFmode, DFmode and XFmode */
|
| + 2, /* cost of moving MMX register */
|
| + {8, 8}, /* cost of loading MMX registers
|
| + in SImode and DImode */
|
| + {8, 8}, /* cost of storing MMX registers
|
| + in SImode and DImode */
|
| + 2, /* cost of moving SSE register */
|
| + {8, 8, 8}, /* cost of loading SSE registers
|
| + in SImode, DImode and TImode */
|
| + {8, 8, 8}, /* cost of storing SSE registers
|
| + in SImode, DImode and TImode */
|
| + 5, /* MMX or SSE register to integer */
|
| + 32, /* size of l1 cache. */
|
| + 256, /* size of l2 cache. */
|
| + 64, /* size of prefetch block */
|
| + 6, /* number of parallel prefetches */
|
| + 3, /* Branch cost */
|
| + COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
|
| + COSTS_N_INSNS (8), /* cost of FMUL instruction. */
|
| + COSTS_N_INSNS (20), /* cost of FDIV instruction. */
|
| + COSTS_N_INSNS (8), /* cost of FABS instruction. */
|
| + COSTS_N_INSNS (8), /* cost of FCHS instruction. */
|
| + COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
| + {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
|
| + {libcall, {{32, loop}, {64, rep_prefix_4_byte},
|
| + {8192, rep_prefix_8_byte}, {-1, libcall}}}},
|
| + {{libcall, {{8, loop}, {15, unrolled_loop},
|
| + {2048, rep_prefix_4_byte}, {-1, libcall}}},
|
| + {libcall, {{24, loop}, {32, unrolled_loop},
|
| + {8192, rep_prefix_8_byte}, {-1, libcall}}}},
|
| + 1, /* scalar_stmt_cost. */
|
| + 1, /* scalar load_cost. */
|
| + 1, /* scalar_store_cost. */
|
| + 1, /* vec_stmt_cost. */
|
| + 1, /* vec_to_scalar_cost. */
|
| + 1, /* scalar_to_vec_cost. */
|
| + 1, /* vec_align_load_cost. */
|
| + 2, /* vec_unalign_load_cost. */
|
| + 1, /* vec_store_cost. */
|
| + 3, /* cond_taken_branch_cost. */
|
| + 1, /* cond_not_taken_branch_cost. */
|
| +};
|
| +
|
| /* Generic64 should produce code tuned for Nocona and K8. */
|
| static const
|
| struct processor_costs generic64_cost = {
|
| @@ -1194,6 +1267,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
| #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
|
| #define m_NOCONA (1<<PROCESSOR_NOCONA)
|
| #define m_CORE2 (1<<PROCESSOR_CORE2)
|
| +#define m_ATOM (1<<PROCESSOR_ATOM)
|
|
|
| #define m_GEODE (1<<PROCESSOR_GEODE)
|
| #define m_K6 (1<<PROCESSOR_K6)
|
| @@ -1231,10 +1305,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| m_486 | m_PENT,
|
|
|
| /* X86_TUNE_UNROLL_STRLEN */
|
| - m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
|
| + m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
|
| + | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_DEEP_BRANCH_PREDICTION */
|
| - m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
|
| + m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
|
|
|
| /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
|
| on simulation result. But after P4 was made, no performance benefit
|
| @@ -1246,12 +1321,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| ~m_386,
|
|
|
| /* X86_TUNE_USE_SAHF */
|
| - m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
|
| + m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
|
| | m_NOCONA | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
|
| partial dependencies. */
|
| - m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
|
| + m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
|
| | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
|
|
|
| /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
|
| @@ -1271,13 +1346,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| m_386 | m_486 | m_K6_GEODE,
|
|
|
| /* X86_TUNE_USE_SIMODE_FIOP */
|
| - ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
|
| + ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
|
|
|
| /* X86_TUNE_USE_MOV0 */
|
| m_K6,
|
|
|
| /* X86_TUNE_USE_CLTD */
|
| - ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
|
| + ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
|
|
|
| /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
|
| m_PENT4,
|
| @@ -1292,8 +1367,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| ~(m_PENT | m_PPRO),
|
|
|
| /* X86_TUNE_PROMOTE_QIMODE */
|
| - m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
|
| - | m_GENERIC /* | m_PENT4 ? */,
|
| + m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
|
| + | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
|
|
|
| /* X86_TUNE_FAST_PREFIX */
|
| ~(m_PENT | m_486 | m_386),
|
| @@ -1317,26 +1392,28 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| m_PPRO,
|
|
|
| /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
|
| - m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
| + m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
|
| + | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_ADD_ESP_8 */
|
| - m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
|
| + m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
|
| | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_SUB_ESP_4 */
|
| - m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
| + m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
|
| + | m_GENERIC,
|
|
|
| /* X86_TUNE_SUB_ESP_8 */
|
| - m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
|
| + m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
|
| | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
|
| for DFmode copies */
|
| - ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
| + ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
| | m_GENERIC | m_GEODE),
|
|
|
| /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
|
| - m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
| + m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
|
| conflict here in between PPro/Pentium4 based chips that thread 128bit
|
| @@ -1347,7 +1424,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| shows that disabling this option on P4 brings over 20% SPECfp regression,
|
| while enabling it on K8 brings roughly 2.4% regression that can be partly
|
| masked by careful scheduling of moves. */
|
| - m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
|
| + m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
|
| + | m_AMDFAM10,
|
|
|
| /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
|
| m_AMDFAM10,
|
| @@ -1365,13 +1443,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| m_PPRO | m_PENT4 | m_NOCONA,
|
|
|
| /* X86_TUNE_MEMORY_MISMATCH_STALL */
|
| - m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
| + m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_PROLOGUE_USING_MOVE */
|
| - m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
| + m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_EPILOGUE_USING_MOVE */
|
| - m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
| + m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_SHIFT1 */
|
| ~m_486,
|
| @@ -1387,22 +1465,25 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
|
|
| /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
| than 4 branch instructions in the 16 byte window. */
|
| - m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
| + m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
|
| + | m_GENERIC,
|
|
|
| /* X86_TUNE_SCHEDULE */
|
| - m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
|
| + m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
|
| + | m_GENERIC,
|
|
|
| /* X86_TUNE_USE_BT */
|
| - m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
|
| + m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_USE_INCDEC */
|
| - ~(m_PENT4 | m_NOCONA | m_GENERIC),
|
| + ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
|
|
|
| /* X86_TUNE_PAD_RETURNS */
|
| m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_EXT_80387_CONSTANTS */
|
| - m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
|
| + m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
|
| + | m_CORE2 | m_GENERIC,
|
|
|
| /* X86_TUNE_SHORTEN_X87_SSE */
|
| ~m_K8,
|
| @@ -1447,6 +1528,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
|
| with a subsequent conditional jump instruction into a single
|
| compare-and-branch uop. */
|
| m_CORE2,
|
| +
|
| + /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
| + will impact LEA instruction selection. */
|
| + m_ATOM,
|
| };
|
|
|
| /* Feature tests against the various architecture variations. */
|
| @@ -1472,10 +1557,11 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
|
| };
|
|
|
| static const unsigned int x86_accumulate_outgoing_args
|
| - = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
| + = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
| + | m_GENERIC;
|
|
|
| static const unsigned int x86_arch_always_fancy_math_387
|
| - = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
|
| + = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
|
| | m_NOCONA | m_CORE2 | m_GENERIC;
|
|
|
| static enum stringop_alg stringop_alg = no_stringop;
|
| @@ -1616,7 +1702,6 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
|
|
|
| rtx ix86_compare_op0 = NULL_RTX;
|
| rtx ix86_compare_op1 = NULL_RTX;
|
| -rtx ix86_compare_emitted = NULL_RTX;
|
|
|
| /* Define parameter passing and return registers. */
|
|
|
| @@ -1637,8 +1722,7 @@ static int const x86_64_int_return_registers[4] =
|
|
|
| /* Define the structure for the machine field in struct function. */
|
|
|
| -struct stack_local_entry GTY(())
|
| -{
|
| +struct GTY(()) stack_local_entry {
|
| unsigned short mode;
|
| unsigned short n;
|
| rtx rtl;
|
| @@ -1725,7 +1809,7 @@ static rtx (*ix86_gen_leave) (void);
|
| static rtx (*ix86_gen_pop1) (rtx);
|
| static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
|
| static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
|
| -static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
|
| +static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
|
| static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
|
| static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
|
| static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
|
| @@ -1743,6 +1827,9 @@ static unsigned int ix86_default_incoming_stack_boundary;
|
| /* Alignment for incoming stack boundary in bits. */
|
| unsigned int ix86_incoming_stack_boundary;
|
|
|
| +/* The abi used by target. */
|
| +enum calling_abi ix86_abi;
|
| +
|
| /* Values 1-5: see jump.c */
|
| int ix86_branch_cost;
|
|
|
| @@ -1793,11 +1880,13 @@ static bool ext_80387_constants_init = 0;
|
|
|
| static struct machine_function * ix86_init_machine_status (void);
|
| static rtx ix86_function_value (const_tree, const_tree, bool);
|
| +static rtx ix86_static_chain (const_tree, bool);
|
| static int ix86_function_regparm (const_tree, const_tree);
|
| static void ix86_compute_frame_layout (struct ix86_frame *);
|
| static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
|
| rtx, rtx, int);
|
| static void ix86_add_new_builtins (int);
|
| +static rtx ix86_expand_vec_perm_builtin (tree);
|
|
|
| enum ix86_function_specific_strings
|
| {
|
| @@ -1818,8 +1907,15 @@ static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
|
| static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
|
| static bool ix86_can_inline_p (tree, tree);
|
| static void ix86_set_current_function (tree);
|
| +static unsigned int ix86_minimum_incoming_stack_boundary (bool);
|
| +
|
| +static enum calling_abi ix86_function_abi (const_tree);
|
|
|
|
|
| +#ifndef SUBTARGET32_DEFAULT_CPU
|
| +#define SUBTARGET32_DEFAULT_CPU "i386"
|
| +#endif
|
| +
|
| /* The svr4 ABI for the i386 says that records and unions are returned
|
| in memory. */
|
| #ifndef DEFAULT_PCC_STRUCT_RETURN
|
| @@ -1866,8 +1962,13 @@ static int ix86_isa_flags_explicit;
|
|
|
| #define OPTION_MASK_ISA_SSE4A_SET \
|
| (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
|
| -#define OPTION_MASK_ISA_SSE5_SET \
|
| - (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
|
| +#define OPTION_MASK_ISA_FMA4_SET \
|
| + (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
|
| + | OPTION_MASK_ISA_AVX_SET)
|
| +#define OPTION_MASK_ISA_XOP_SET \
|
| + (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
|
| +#define OPTION_MASK_ISA_LWP_SET \
|
| + OPTION_MASK_ISA_LWP
|
|
|
| /* AES and PCLMUL need SSE2 because they use xmm registers */
|
| #define OPTION_MASK_ISA_AES_SET \
|
| @@ -1877,9 +1978,12 @@ static int ix86_isa_flags_explicit;
|
|
|
| #define OPTION_MASK_ISA_ABM_SET \
|
| (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
|
| +
|
| #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
|
| #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
|
| #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
|
| +#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
|
| +#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
|
|
|
| /* Define a set of ISAs which aren't available when a given ISA is
|
| disabled. MMX and SSE ISAs are handled separately. */
|
| @@ -1905,7 +2009,8 @@ static int ix86_isa_flags_explicit;
|
| #define OPTION_MASK_ISA_SSE4_2_UNSET \
|
| (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
|
| #define OPTION_MASK_ISA_AVX_UNSET \
|
| - (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
|
| + (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
|
| + | OPTION_MASK_ISA_FMA4_UNSET)
|
| #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
|
|
|
| /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
| @@ -1913,14 +2018,21 @@ static int ix86_isa_flags_explicit;
|
| #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
|
|
|
| #define OPTION_MASK_ISA_SSE4A_UNSET \
|
| - (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
|
| -#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
|
| + (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
|
| +
|
| +#define OPTION_MASK_ISA_FMA4_UNSET \
|
| + (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
|
| +#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
|
| +#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
|
| +
|
| #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
|
| #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
|
| #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
|
| #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
|
| #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
|
| #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
|
| +#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
|
| +#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
|
|
|
| /* Vectorization library interface and handlers. */
|
| tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
|
| @@ -1953,7 +2065,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
|
| {&core2_cost, 16, 10, 16, 10, 16},
|
| {&generic32_cost, 16, 7, 16, 7, 16},
|
| {&generic64_cost, 16, 10, 16, 10, 16},
|
| - {&amdfam10_cost, 32, 24, 32, 7, 32}
|
| + {&amdfam10_cost, 32, 24, 32, 7, 32},
|
| + {&atom_cost, 16, 7, 16, 7, 16}
|
| };
|
|
|
| static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
|
| @@ -1971,6 +2084,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
|
| "prescott",
|
| "nocona",
|
| "core2",
|
| + "atom",
|
| "geode",
|
| "k6",
|
| "k6-2",
|
| @@ -2144,16 +2258,42 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
| }
|
| return true;
|
|
|
| - case OPT_msse5:
|
| + case OPT_mfma4:
|
| + if (value)
|
| + {
|
| + ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
|
| + }
|
| + else
|
| + {
|
| + ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
|
| + }
|
| + return true;
|
| +
|
| + case OPT_mxop:
|
| if (value)
|
| {
|
| - ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
|
| - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
|
| + ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
|
| }
|
| else
|
| {
|
| - ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
|
| - ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
|
| + ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
|
| + }
|
| + return true;
|
| +
|
| + case OPT_mlwp:
|
| + if (value)
|
| + {
|
| + ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
|
| + }
|
| + else
|
| + {
|
| + ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
|
| }
|
| return true;
|
|
|
| @@ -2209,6 +2349,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
| }
|
| return true;
|
|
|
| + case OPT_mmovbe:
|
| + if (value)
|
| + {
|
| + ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
|
| + }
|
| + else
|
| + {
|
| + ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
|
| + }
|
| + return true;
|
| +
|
| + case OPT_mcrc32:
|
| + if (value)
|
| + {
|
| + ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
|
| + }
|
| + else
|
| + {
|
| + ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
|
| + ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
|
| + }
|
| + return true;
|
| +
|
| case OPT_maes:
|
| if (value)
|
| {
|
| @@ -2240,7 +2406,7 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
| }
|
| }
|
|
|
| -/* Return a string the documents the current -m options. The caller is
|
| +/* Return a string that documents the current -m options. The caller is
|
| responsible for freeing the string. */
|
|
|
| static char *
|
| @@ -2253,12 +2419,15 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
| int mask; /* isa mask options */
|
| };
|
|
|
| - /* This table is ordered so that options like -msse5 or -msse4.2 that imply
|
| + /* This table is ordered so that options like -msse4.2 that imply
|
| preceding options while match those first. */
|
| static struct ix86_target_opts isa_opts[] =
|
| {
|
| { "-m64", OPTION_MASK_ISA_64BIT },
|
| - { "-msse5", OPTION_MASK_ISA_SSE5 },
|
| + { "-mfma4", OPTION_MASK_ISA_FMA4 },
|
| + { "-mfma", OPTION_MASK_ISA_FMA },
|
| + { "-mxop", OPTION_MASK_ISA_XOP },
|
| + { "-mlwp", OPTION_MASK_ISA_LWP },
|
| { "-msse4a", OPTION_MASK_ISA_SSE4A },
|
| { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
|
| { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
|
| @@ -2271,6 +2440,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
| { "-mmmx", OPTION_MASK_ISA_MMX },
|
| { "-mabm", OPTION_MASK_ISA_ABM },
|
| { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
|
| + { "-mmovbe", OPTION_MASK_ISA_MOVBE },
|
| + { "-mcrc32", OPTION_MASK_ISA_CRC32 },
|
| { "-maes", OPTION_MASK_ISA_AES },
|
| { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
|
| };
|
| @@ -2290,7 +2461,6 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
| { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
|
| { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
|
| { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
|
| - { "-mno-fused-madd", MASK_NO_FUSED_MADD },
|
| { "-mno-push-args", MASK_NO_PUSH_ARGS },
|
| { "-mno-red-zone", MASK_NO_RED_ZONE },
|
| { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
|
| @@ -2358,7 +2528,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
| if (flags && add_nl_p)
|
| {
|
| opts[num++][0] = target_other;
|
| - sprintf (target_other, "(other flags: 0x%x)", isa);
|
| + sprintf (target_other, "(other flags: 0x%x)", flags);
|
| }
|
|
|
| /* Add -fpmath= option. */
|
| @@ -2439,7 +2609,7 @@ ix86_debug_options (void)
|
| free (opts);
|
| }
|
| else
|
| - fprintf (stderr, "<no options>\n\n");
|
| + fputs ("<no options>\n\n", stderr);
|
|
|
| return;
|
| }
|
| @@ -2458,6 +2628,7 @@ override_options (bool main_args_p)
|
| {
|
| int i;
|
| unsigned int ix86_arch_mask, ix86_tune_mask;
|
| + const bool ix86_tune_specified = (ix86_tune_string != NULL);
|
| const char *prefix;
|
| const char *suffix;
|
| const char *sw;
|
| @@ -2483,11 +2654,14 @@ override_options (bool main_args_p)
|
| PTA_NO_SAHF = 1 << 13,
|
| PTA_SSE4_1 = 1 << 14,
|
| PTA_SSE4_2 = 1 << 15,
|
| - PTA_SSE5 = 1 << 16,
|
| - PTA_AES = 1 << 17,
|
| - PTA_PCLMUL = 1 << 18,
|
| - PTA_AVX = 1 << 19,
|
| - PTA_FMA = 1 << 20
|
| + PTA_AES = 1 << 16,
|
| + PTA_PCLMUL = 1 << 17,
|
| + PTA_AVX = 1 << 18,
|
| + PTA_FMA = 1 << 19,
|
| + PTA_MOVBE = 1 << 20,
|
| + PTA_FMA4 = 1 << 21,
|
| + PTA_XOP = 1 << 22,
|
| + PTA_LWP = 1 << 23
|
| };
|
|
|
| static struct pta
|
| @@ -2529,6 +2703,9 @@ override_options (bool main_args_p)
|
| {"core2", PROCESSOR_CORE2, CPU_CORE2,
|
| PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
| | PTA_SSSE3 | PTA_CX16},
|
| + {"atom", PROCESSOR_ATOM, CPU_ATOM,
|
| + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
| + | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
|
| {"geode", PROCESSOR_GEODE, CPU_GEODE,
|
| PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
|
| {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
|
| @@ -2652,8 +2829,12 @@ override_options (bool main_args_p)
|
| || !strcmp (ix86_tune_string, "generic64")))
|
| ;
|
| else if (!strncmp (ix86_tune_string, "generic", 7))
|
| - error ("bad value (%s) for %stune=%s %s",
|
| + error ("bad value (%s) for %stune=%s %s",
|
| ix86_tune_string, prefix, suffix, sw);
|
| + else if (!strcmp (ix86_tune_string, "x86-64"))
|
| + warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
|
| + "%stune=k8%s or %stune=generic%s instead as appropriate.",
|
| + prefix, suffix, prefix, suffix, prefix, suffix);
|
| }
|
| else
|
| {
|
| @@ -2677,6 +2858,7 @@ override_options (bool main_args_p)
|
| ix86_tune_string = "generic32";
|
| }
|
| }
|
| +
|
| if (ix86_stringop_string)
|
| {
|
| if (!strcmp (ix86_stringop_string, "rep_byte"))
|
| @@ -2699,22 +2881,25 @@ override_options (bool main_args_p)
|
| error ("bad value (%s) for %sstringop-strategy=%s %s",
|
| ix86_stringop_string, prefix, suffix, sw);
|
| }
|
| - if (!strcmp (ix86_tune_string, "x86-64"))
|
| - warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
|
| - "%stune=k8%s or %stune=generic%s instead as appropriate.",
|
| - prefix, suffix, prefix, suffix, prefix, suffix);
|
|
|
| if (!ix86_arch_string)
|
| - ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
|
| + ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
|
| else
|
| ix86_arch_specified = 1;
|
|
|
| - if (!strcmp (ix86_arch_string, "generic"))
|
| - error ("generic CPU can be used only for %stune=%s %s",
|
| - prefix, suffix, sw);
|
| - if (!strncmp (ix86_arch_string, "generic", 7))
|
| - error ("bad value (%s) for %sarch=%s %s",
|
| - ix86_arch_string, prefix, suffix, sw);
|
| + /* Validate -mabi= value. */
|
| + if (ix86_abi_string)
|
| + {
|
| + if (strcmp (ix86_abi_string, "sysv") == 0)
|
| + ix86_abi = SYSV_ABI;
|
| + else if (strcmp (ix86_abi_string, "ms") == 0)
|
| + ix86_abi = MS_ABI;
|
| + else
|
| + error ("unknown ABI (%s) for %sabi=%s %s",
|
| + ix86_abi_string, prefix, suffix, sw);
|
| + }
|
| + else
|
| + ix86_abi = DEFAULT_ABI;
|
|
|
| if (ix86_cmodel_string != 0)
|
| {
|
| @@ -2813,9 +2998,15 @@ override_options (bool main_args_p)
|
| if (processor_alias_table[i].flags & PTA_SSE4A
|
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
|
| ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
|
| - if (processor_alias_table[i].flags & PTA_SSE5
|
| - && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
|
| - ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
|
| + if (processor_alias_table[i].flags & PTA_FMA4
|
| + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
|
| + ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
|
| + if (processor_alias_table[i].flags & PTA_XOP
|
| + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
|
| + ix86_isa_flags |= OPTION_MASK_ISA_XOP;
|
| + if (processor_alias_table[i].flags & PTA_LWP
|
| + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
|
| + ix86_isa_flags |= OPTION_MASK_ISA_LWP;
|
| if (processor_alias_table[i].flags & PTA_ABM
|
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
|
| ix86_isa_flags |= OPTION_MASK_ISA_ABM;
|
| @@ -2828,6 +3019,9 @@ override_options (bool main_args_p)
|
| if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
|
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
|
| ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
|
| + if (processor_alias_table[i].flags & PTA_MOVBE
|
| + && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
|
| + ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
|
| if (processor_alias_table[i].flags & PTA_AES
|
| && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
|
| ix86_isa_flags |= OPTION_MASK_ISA_AES;
|
| @@ -2840,7 +3034,10 @@ override_options (bool main_args_p)
|
| break;
|
| }
|
|
|
| - if (i == pta_size)
|
| + if (!strcmp (ix86_arch_string, "generic"))
|
| + error ("generic CPU can be used only for %stune=%s %s",
|
| + prefix, suffix, sw);
|
| + else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
|
| error ("bad value (%s) for %sarch=%s %s",
|
| ix86_arch_string, prefix, suffix, sw);
|
|
|
| @@ -2879,7 +3076,8 @@ override_options (bool main_args_p)
|
| x86_prefetch_sse = true;
|
| break;
|
| }
|
| - if (i == pta_size)
|
| +
|
| + if (ix86_tune_specified && i == pta_size)
|
| error ("bad value (%s) for %stune=%s %s",
|
| ix86_tune_string, prefix, suffix, sw);
|
|
|
| @@ -2999,8 +3197,6 @@ override_options (bool main_args_p)
|
| ix86_tls_dialect = TLS_DIALECT_GNU;
|
| else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
|
| ix86_tls_dialect = TLS_DIALECT_GNU2;
|
| - else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
|
| - ix86_tls_dialect = TLS_DIALECT_SUN;
|
| else
|
| error ("bad value (%s) for %stls-dialect=%s %s",
|
| ix86_tls_dialect_string, prefix, suffix, sw);
|
| @@ -3091,12 +3287,10 @@ override_options (bool main_args_p)
|
| if (ix86_force_align_arg_pointer == -1)
|
| ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
|
|
|
| + ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
|
| +
|
| /* Validate -mincoming-stack-boundary= value or default it to
|
| MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
|
| - if (ix86_force_align_arg_pointer)
|
| - ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
|
| - else
|
| - ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
|
| ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
|
| if (ix86_incoming_stack_boundary_string)
|
| {
|
| @@ -3246,7 +3440,7 @@ override_options (bool main_args_p)
|
| ix86_gen_pop1 = gen_popdi1;
|
| ix86_gen_add3 = gen_adddi3;
|
| ix86_gen_sub3 = gen_subdi3;
|
| - ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
|
| + ix86_gen_sub3_carry = gen_subdi3_carry;
|
| ix86_gen_one_cmpl2 = gen_one_cmpldi2;
|
| ix86_gen_monitor = gen_sse3_monitor64;
|
| ix86_gen_andsp = gen_anddi3;
|
| @@ -3353,12 +3547,6 @@ ix86_conditional_register_usage (void)
|
| static void
|
| ix86_function_specific_save (struct cl_target_option *ptr)
|
| {
|
| - gcc_assert (IN_RANGE (ix86_arch, 0, 255));
|
| - gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
|
| - gcc_assert (IN_RANGE (ix86_tune, 0, 255));
|
| - gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
|
| - gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
|
| -
|
| ptr->arch = ix86_arch;
|
| ptr->schedule = ix86_schedule;
|
| ptr->tune = ix86_tune;
|
| @@ -3368,6 +3556,14 @@ ix86_function_specific_save (struct cl_target_option *ptr)
|
| ptr->arch_specified = ix86_arch_specified;
|
| ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
|
| ptr->target_flags_explicit = target_flags_explicit;
|
| +
|
| + /* The fields are char but the variables are not; make sure the
|
| + values fit in the fields. */
|
| + gcc_assert (ptr->arch == ix86_arch);
|
| + gcc_assert (ptr->schedule == ix86_schedule);
|
| + gcc_assert (ptr->tune == ix86_tune);
|
| + gcc_assert (ptr->fpmath == ix86_fpmath);
|
| + gcc_assert (ptr->branch_cost == ix86_branch_cost);
|
| }
|
|
|
| /* Restore the current options */
|
| @@ -3380,10 +3576,10 @@ ix86_function_specific_restore (struct cl_target_option *ptr)
|
| unsigned int ix86_arch_mask, ix86_tune_mask;
|
| int i;
|
|
|
| - ix86_arch = ptr->arch;
|
| - ix86_schedule = ptr->schedule;
|
| - ix86_tune = ptr->tune;
|
| - ix86_fpmath = ptr->fpmath;
|
| + ix86_arch = (enum processor_type) ptr->arch;
|
| + ix86_schedule = (enum attr_cpu) ptr->schedule;
|
| + ix86_tune = (enum processor_type) ptr->tune;
|
| + ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
|
| ix86_branch_cost = ptr->branch_cost;
|
| ix86_tune_defaulted = ptr->tune_defaulted;
|
| ix86_arch_specified = ptr->arch_specified;
|
| @@ -3493,8 +3689,10 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
|
| IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
|
| IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
|
| IX86_ATTR_ISA ("sse4a", OPT_msse4a),
|
| - IX86_ATTR_ISA ("sse5", OPT_msse5),
|
| IX86_ATTR_ISA ("ssse3", OPT_mssse3),
|
| + IX86_ATTR_ISA ("fma4", OPT_mfma4),
|
| + IX86_ATTR_ISA ("xop", OPT_mxop),
|
| + IX86_ATTR_ISA ("lwp", OPT_mlwp),
|
|
|
| /* string options */
|
| IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
|
| @@ -3510,10 +3708,6 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
|
| OPT_mfancy_math_387,
|
| MASK_NO_FANCY_MATH_387),
|
|
|
| - IX86_ATTR_NO ("fused-madd",
|
| - OPT_mfused_madd,
|
| - MASK_NO_FUSED_MADD),
|
| -
|
| IX86_ATTR_YES ("ieee-fp",
|
| OPT_mieee_fp,
|
| MASK_IEEE_FP),
|
| @@ -3786,8 +3980,8 @@ ix86_can_inline_p (tree caller, tree callee)
|
| struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
|
| struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
|
|
|
| - /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
|
| - can inline a SSE2 function but a SSE2 function can't inline a SSE5
|
| + /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
|
| + can inline a SSE2 function but a SSE2 function can't inline a SSE4
|
| function. */
|
| if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
|
| != callee_opts->ix86_isa_flags)
|
| @@ -4051,11 +4245,11 @@ x86_elf_aligned_common (FILE *file,
|
| {
|
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
|
| && size > (unsigned int)ix86_section_threshold)
|
| - fprintf (file, ".largecomm\t");
|
| + fputs (".largecomm\t", file);
|
| else
|
| - fprintf (file, "%s", COMMON_ASM_OP);
|
| + fputs (COMMON_ASM_OP, file);
|
| assemble_name (file, name);
|
| - fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
|
| + fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
|
| size, align / BITS_PER_UNIT);
|
| }
|
| #endif
|
| @@ -4131,7 +4325,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
|
|
|
| /* If we need to align the outgoing stack, then sibcalling would
|
| unalign the stack, which may break the called function. */
|
| - if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
|
| + if (ix86_minimum_incoming_stack_boundary (true)
|
| + < PREFERRED_STACK_BOUNDARY)
|
| return false;
|
|
|
| if (decl)
|
| @@ -4215,8 +4410,8 @@ ix86_handle_cconv_attribute (tree *node, tree name,
|
| && TREE_CODE (*node) != FIELD_DECL
|
| && TREE_CODE (*node) != TYPE_DECL)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute only applies to functions",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute only applies to functions",
|
| + name);
|
| *no_add_attrs = true;
|
| return NULL_TREE;
|
| }
|
| @@ -4235,14 +4430,14 @@ ix86_handle_cconv_attribute (tree *node, tree name,
|
| if (TREE_CODE (cst) != INTEGER_CST)
|
| {
|
| warning (OPT_Wattributes,
|
| - "%qs attribute requires an integer constant argument",
|
| - IDENTIFIER_POINTER (name));
|
| + "%qE attribute requires an integer constant argument",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
| else if (compare_tree_int (cst, REGPARM_MAX) > 0)
|
| {
|
| - warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
|
| - IDENTIFIER_POINTER (name), REGPARM_MAX);
|
| + warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
|
| + name, REGPARM_MAX);
|
| *no_add_attrs = true;
|
| }
|
|
|
| @@ -4252,9 +4447,10 @@ ix86_handle_cconv_attribute (tree *node, tree name,
|
| if (TARGET_64BIT)
|
| {
|
| /* Do not warn when emulating the MS ABI. */
|
| - if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
|
| - warning (OPT_Wattributes, "%qs attribute ignored",
|
| - IDENTIFIER_POINTER (name));
|
| + if (TREE_CODE (*node) != FUNCTION_TYPE
|
| + || ix86_function_type_abi (*node) != MS_ABI)
|
| + warning (OPT_Wattributes, "%qE attribute ignored",
|
| + name);
|
| *no_add_attrs = true;
|
| return NULL_TREE;
|
| }
|
| @@ -4352,33 +4548,15 @@ ix86_function_regparm (const_tree type, const_tree decl)
|
| tree attr;
|
| int regparm;
|
|
|
| - static bool error_issued;
|
| -
|
| if (TARGET_64BIT)
|
| return (ix86_function_type_abi (type) == SYSV_ABI
|
| - ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
|
| + ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
|
|
|
| regparm = ix86_regparm;
|
| attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
|
| if (attr)
|
| {
|
| - regparm
|
| - = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
|
| -
|
| - if (decl && TREE_CODE (decl) == FUNCTION_DECL)
|
| - {
|
| - /* We can't use regparm(3) for nested functions because
|
| - these pass static chain pointer in %ecx register. */
|
| - if (!error_issued && regparm == 3
|
| - && decl_function_context (decl)
|
| - && !DECL_NO_STATIC_CHAIN (decl))
|
| - {
|
| - error ("nested functions are limited to 2 register parameters");
|
| - error_issued = true;
|
| - return 0;
|
| - }
|
| - }
|
| -
|
| + regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
|
| return regparm;
|
| }
|
|
|
| @@ -4392,11 +4570,10 @@ ix86_function_regparm (const_tree type, const_tree decl)
|
| && !profile_flag)
|
| {
|
| /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
|
| - struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
|
| + struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
|
| if (i && i->local)
|
| {
|
| int local_regparm, globals = 0, regno;
|
| - struct function *f;
|
|
|
| /* Make sure no regparm register is taken by a
|
| fixed register variable. */
|
| @@ -4404,23 +4581,12 @@ ix86_function_regparm (const_tree type, const_tree decl)
|
| if (fixed_regs[local_regparm])
|
| break;
|
|
|
| - /* We can't use regparm(3) for nested functions as these use
|
| - static chain pointer in third argument. */
|
| - if (local_regparm == 3
|
| - && decl_function_context (decl)
|
| - && !DECL_NO_STATIC_CHAIN (decl))
|
| + /* We don't want to use regparm(3) for nested functions as
|
| + these use a static chain pointer in the third argument. */
|
| + if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
|
| local_regparm = 2;
|
|
|
| - /* If the function realigns its stackpointer, the prologue will
|
| - clobber %ecx. If we've already generated code for the callee,
|
| - the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
|
| - scanning the attributes for the self-realigning property. */
|
| - f = DECL_STRUCT_FUNCTION (decl);
|
| - /* Since current internal arg pointer won't conflict with
|
| - parameter passing regs, so no need to change stack
|
| - realignment and adjust regparm number.
|
| -
|
| - Each fixed register usage increases register pressure,
|
| + /* Each fixed register usage increases register pressure,
|
| so less registers should be used for argument passing.
|
| This functionality can be overriden by an explicit
|
| regparm value. */
|
| @@ -4592,15 +4758,15 @@ ix86_function_arg_regno_p (int regno)
|
| default ABI. */
|
|
|
| /* RAX is used as hidden argument to va_arg functions. */
|
| - if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
|
| + if (ix86_abi == SYSV_ABI && regno == AX_REG)
|
| return true;
|
|
|
| - if (DEFAULT_ABI == MS_ABI)
|
| + if (ix86_abi == MS_ABI)
|
| parm_regs = x86_64_ms_abi_int_parameter_registers;
|
| else
|
| parm_regs = x86_64_int_parameter_registers;
|
| - for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
|
| - : X86_64_REGPARM_MAX); i++)
|
| + for (i = 0; i < (ix86_abi == MS_ABI
|
| + ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
|
| if (regno == parm_regs[i])
|
| return true;
|
| return false;
|
| @@ -4627,7 +4793,7 @@ ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
|
| int
|
| ix86_reg_parm_stack_space (const_tree fndecl)
|
| {
|
| - int call_abi = SYSV_ABI;
|
| + enum calling_abi call_abi = SYSV_ABI;
|
| if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
|
| call_abi = ix86_function_abi (fndecl);
|
| else
|
| @@ -4639,37 +4805,58 @@ ix86_reg_parm_stack_space (const_tree fndecl)
|
|
|
| /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
|
| call abi used. */
|
| -int
|
| +enum calling_abi
|
| ix86_function_type_abi (const_tree fntype)
|
| {
|
| if (TARGET_64BIT && fntype != NULL)
|
| {
|
| - int abi;
|
| - if (DEFAULT_ABI == SYSV_ABI)
|
| - abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
|
| - else
|
| - abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
|
| -
|
| + enum calling_abi abi = ix86_abi;
|
| + if (abi == SYSV_ABI)
|
| + {
|
| + if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
|
| + abi = MS_ABI;
|
| + }
|
| + else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
|
| + abi = SYSV_ABI;
|
| return abi;
|
| }
|
| - return DEFAULT_ABI;
|
| + return ix86_abi;
|
| }
|
|
|
| -int
|
| +static bool
|
| +ix86_function_ms_hook_prologue (const_tree fntype)
|
| +{
|
| + if (!TARGET_64BIT)
|
| + {
|
| + if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
|
| + {
|
| + if (decl_function_context (fntype) != NULL_TREE)
|
| + {
|
| + error_at (DECL_SOURCE_LOCATION (fntype),
|
| + "ms_hook_prologue is not compatible with nested function");
|
| + }
|
| +
|
| + return true;
|
| + }
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +static enum calling_abi
|
| ix86_function_abi (const_tree fndecl)
|
| {
|
| if (! fndecl)
|
| - return DEFAULT_ABI;
|
| + return ix86_abi;
|
| return ix86_function_type_abi (TREE_TYPE (fndecl));
|
| }
|
|
|
| /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
|
| call abi used. */
|
| -int
|
| +enum calling_abi
|
| ix86_cfun_abi (void)
|
| {
|
| if (! cfun || ! TARGET_64BIT)
|
| - return DEFAULT_ABI;
|
| + return ix86_abi;
|
| return cfun->machine->call_abi;
|
| }
|
|
|
| @@ -4683,7 +4870,7 @@ void
|
| ix86_call_abi_override (const_tree fndecl)
|
| {
|
| if (fndecl == NULL_TREE)
|
| - cfun->machine->call_abi = DEFAULT_ABI;
|
| + cfun->machine->call_abi = ix86_abi;
|
| else
|
| cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
|
| }
|
| @@ -4724,18 +4911,18 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
|
| cum->nregs = ix86_regparm;
|
| if (TARGET_64BIT)
|
| {
|
| - if (cum->call_abi != DEFAULT_ABI)
|
| - cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
|
| - : X64_REGPARM_MAX;
|
| + cum->nregs = (cum->call_abi == SYSV_ABI
|
| + ? X86_64_REGPARM_MAX
|
| + : X86_64_MS_REGPARM_MAX);
|
| }
|
| if (TARGET_SSE)
|
| {
|
| cum->sse_nregs = SSE_REGPARM_MAX;
|
| if (TARGET_64BIT)
|
| {
|
| - if (cum->call_abi != DEFAULT_ABI)
|
| - cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
|
| - : X64_SSE_REGPARM_MAX;
|
| + cum->sse_nregs = (cum->call_abi == SYSV_ABI
|
| + ? X86_64_SSE_REGPARM_MAX
|
| + : X86_64_MS_SSE_REGPARM_MAX);
|
| }
|
| }
|
| if (TARGET_MMX)
|
| @@ -5001,6 +5188,8 @@ classify_argument (enum machine_mode mode, const_tree type,
|
| }
|
| else
|
| {
|
| + int pos;
|
| +
|
| type = TREE_TYPE (field);
|
|
|
| /* Flexible array member is ignored. */
|
| @@ -5029,13 +5218,10 @@ classify_argument (enum machine_mode mode, const_tree type,
|
| + bit_offset) % 256);
|
| if (!num)
|
| return 0;
|
| - for (i = 0; i < num; i++)
|
| - {
|
| - int pos =
|
| - (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
|
| - classes[i + pos] =
|
| - merge_classes (subclasses[i], classes[i + pos]);
|
| - }
|
| + pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
|
| + for (i = 0; i < num && (i + pos) < words; i++)
|
| + classes[i + pos] =
|
| + merge_classes (subclasses[i], classes[i + pos]);
|
| }
|
| }
|
| }
|
| @@ -5163,7 +5349,7 @@ classify_argument (enum machine_mode mode, const_tree type,
|
| }
|
|
|
| /* for V1xx modes, just use the base mode */
|
| - if (VECTOR_MODE_P (mode) && mode != V1DImode
|
| + if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
|
| && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
|
| mode = GET_MODE_INNER (mode);
|
|
|
| @@ -5287,6 +5473,7 @@ classify_argument (enum machine_mode mode, const_tree type,
|
| classes[0] = X86_64_SSE_CLASS;
|
| classes[1] = X86_64_SSEUP_CLASS;
|
| return 2;
|
| + case V1TImode:
|
| case V1DImode:
|
| case V2SFmode:
|
| case V2SImode:
|
| @@ -5631,6 +5818,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| case V4HImode:
|
| case V2SImode:
|
| case V2SFmode:
|
| + case V1TImode:
|
| case V1DImode:
|
| if (!type || !AGGREGATE_TYPE_P (type))
|
| {
|
| @@ -5657,9 +5845,8 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| if (!named && VALID_AVX256_REG_MODE (mode))
|
| return;
|
|
|
| - if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
|
| - cum->words += words;
|
| - else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
|
| + if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
|
| + && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
|
| {
|
| cum->nregs -= int_nregs;
|
| cum->sse_nregs -= sse_nregs;
|
| @@ -5667,7 +5854,11 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| cum->sse_regno += sse_nregs;
|
| }
|
| else
|
| - cum->words += words;
|
| + {
|
| + int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
|
| + cum->words = (cum->words + align - 1) & ~(align - 1);
|
| + cum->words += words;
|
| + }
|
| }
|
|
|
| static void
|
| @@ -5700,7 +5891,7 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| if (type)
|
| mode = type_natural_mode (type, NULL);
|
|
|
| - if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
|
| + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
|
| function_arg_advance_ms_64 (cum, bytes, words);
|
| else if (TARGET_64BIT)
|
| function_arg_advance_64 (cum, mode, type, words, named);
|
| @@ -5818,6 +6009,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| case V4HImode:
|
| case V2SImode:
|
| case V2SFmode:
|
| + case V1TImode:
|
| case V1DImode:
|
| if (!type || !AGGREGATE_TYPE_P (type))
|
| {
|
| @@ -5846,11 +6038,8 @@ function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
|
| if (mode == VOIDmode)
|
| return GEN_INT (cum->maybe_vaarg
|
| ? (cum->sse_nregs < 0
|
| - ? (cum->call_abi == DEFAULT_ABI
|
| - ? SSE_REGPARM_MAX
|
| - : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
|
| - : X64_SSE_REGPARM_MAX))
|
| - : cum->sse_regno)
|
| + ? X86_64_SSE_REGPARM_MAX
|
| + : cum->sse_regno)
|
| : -1);
|
|
|
| switch (mode)
|
| @@ -5942,7 +6131,7 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
|
| if (type && TREE_CODE (type) == VECTOR_TYPE)
|
| mode = type_natural_mode (type, cum);
|
|
|
| - if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
|
| + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
|
| return function_arg_ms_64 (cum, mode, omode, named, bytes);
|
| else if (TARGET_64BIT)
|
| return function_arg_64 (cum, mode, omode, type, named);
|
| @@ -5962,7 +6151,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
|
| const_tree type, bool named ATTRIBUTE_UNUSED)
|
| {
|
| /* See Windows x64 Software Convention. */
|
| - if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
|
| + if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
|
| {
|
| int msize = (int) GET_MODE_SIZE (mode);
|
| if (type)
|
| @@ -5996,7 +6185,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
|
| /* Return true when TYPE should be 128bit aligned for 32bit argument passing
|
| ABI. */
|
| static bool
|
| -contains_aligned_value_p (tree type)
|
| +contains_aligned_value_p (const_tree type)
|
| {
|
| enum machine_mode mode = TYPE_MODE (type);
|
| if (((TARGET_SSE && SSE_REG_MODE_P (mode))
|
| @@ -6046,7 +6235,7 @@ contains_aligned_value_p (tree type)
|
| specified mode and type. */
|
|
|
| int
|
| -ix86_function_arg_boundary (enum machine_mode mode, tree type)
|
| +ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
|
| {
|
| int align;
|
| if (type)
|
| @@ -6102,7 +6291,7 @@ ix86_function_value_regno_p (int regno)
|
| /* TODO: The function should depend on current function ABI but
|
| builtins.c would need updating then. Therefore we use the
|
| default ABI. */
|
| - if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
|
| + if (TARGET_64BIT && ix86_abi == MS_ABI)
|
| return false;
|
| return TARGET_FLOAT_RETURNS_IN_80387;
|
|
|
| @@ -6362,12 +6551,12 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| }
|
|
|
| /* Return false iff TYPE is returned in memory. This version is used
|
| - on Solaris 10. It is similar to the generic ix86_return_in_memory,
|
| + on Solaris 2. It is similar to the generic ix86_return_in_memory,
|
| but differs notably in that when MMX is available, 8-byte vectors
|
| are returned in memory, rather than in MMX registers. */
|
|
|
| bool
|
| -ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| +ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| {
|
| int size;
|
| enum machine_mode mode = type_natural_mode (type, NULL);
|
| @@ -6460,15 +6649,20 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi)
|
| return build_pointer_type (char_type_node);
|
|
|
| record = (*lang_hooks.types.make_type) (RECORD_TYPE);
|
| - type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
|
| + type_decl = build_decl (BUILTINS_LOCATION,
|
| + TYPE_DECL, get_identifier ("__va_list_tag"), record);
|
|
|
| - f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
|
| + f_gpr = build_decl (BUILTINS_LOCATION,
|
| + FIELD_DECL, get_identifier ("gp_offset"),
|
| unsigned_type_node);
|
| - f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
|
| + f_fpr = build_decl (BUILTINS_LOCATION,
|
| + FIELD_DECL, get_identifier ("fp_offset"),
|
| unsigned_type_node);
|
| - f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
|
| + f_ovf = build_decl (BUILTINS_LOCATION,
|
| + FIELD_DECL, get_identifier ("overflow_arg_area"),
|
| ptr_type_node);
|
| - f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
|
| + f_sav = build_decl (BUILTINS_LOCATION,
|
| + FIELD_DECL, get_identifier ("reg_save_area"),
|
| ptr_type_node);
|
|
|
| va_list_gpr_counter_field = f_gpr;
|
| @@ -6498,13 +6692,13 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi)
|
| static tree
|
| ix86_build_builtin_va_list (void)
|
| {
|
| - tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
|
| + tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
|
|
|
| /* Initialize abi specific va_list builtin types. */
|
| if (TARGET_64BIT)
|
| {
|
| tree t;
|
| - if (DEFAULT_ABI == MS_ABI)
|
| + if (ix86_abi == MS_ABI)
|
| {
|
| t = ix86_build_builtin_va_list_abi (SYSV_ABI);
|
| if (TREE_CODE (t) != RECORD_TYPE)
|
| @@ -6518,7 +6712,7 @@ ix86_build_builtin_va_list (void)
|
| t = build_variant_type_copy (t);
|
| sysv_va_list_type_node = t;
|
| }
|
| - if (DEFAULT_ABI != MS_ABI)
|
| + if (ix86_abi != MS_ABI)
|
| {
|
| t = ix86_build_builtin_va_list_abi (MS_ABI);
|
| if (TREE_CODE (t) != RECORD_TYPE)
|
| @@ -6549,10 +6743,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
| rtx nsse_reg;
|
| alias_set_type set;
|
| int i;
|
| - int regparm = ix86_regparm;
|
| -
|
| - if (cum->call_abi != DEFAULT_ABI)
|
| - regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
|
|
|
| /* GPR size of varargs save area. */
|
| if (cfun->va_list_gpr_size)
|
| @@ -6574,7 +6764,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
| set = get_varargs_alias_set ();
|
|
|
| for (i = cum->regno;
|
| - i < regparm
|
| + i < X86_64_REGPARM_MAX
|
| && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
|
| i++)
|
| {
|
| @@ -6588,6 +6778,10 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
|
|
|
| if (ix86_varargs_fpr_size)
|
| {
|
| + /* Stack must be aligned to 16byte for FP register save area. */
|
| + if (crtl->stack_alignment_needed < 128)
|
| + crtl->stack_alignment_needed = 128;
|
| +
|
| /* Now emit code to save SSE registers. The AX parameter contains number
|
| of SSE parameter registers used to call this function. We use
|
| sse_prologue_save insn template that produces computed jump across
|
| @@ -6649,7 +6843,7 @@ setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
|
| alias_set_type set = get_varargs_alias_set ();
|
| int i;
|
|
|
| - for (i = cum->regno; i < X64_REGPARM_MAX; i++)
|
| + for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
|
| {
|
| rtx reg, mem;
|
|
|
| @@ -6705,7 +6899,7 @@ is_va_list_char_pointer (tree type)
|
| return true;
|
| canonic = ix86_canonical_va_list_type (type);
|
| return (canonic == ms_va_list_type_node
|
| - || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
|
| + || (ix86_abi == MS_ABI && canonic == va_list_type_node));
|
| }
|
|
|
| /* Implement va_start. */
|
| @@ -6851,7 +7045,6 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| /* Pull the value out of the saved registers. */
|
|
|
| addr = create_tmp_var (ptr_type_node, "addr");
|
| - DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
|
|
|
| if (container)
|
| {
|
| @@ -6859,8 +7052,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| bool need_temp;
|
| tree int_addr, sse_addr;
|
|
|
| - lab_false = create_artificial_label ();
|
| - lab_over = create_artificial_label ();
|
| + lab_false = create_artificial_label (UNKNOWN_LOCATION);
|
| + lab_over = create_artificial_label (UNKNOWN_LOCATION);
|
|
|
| examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
|
|
|
| @@ -6906,9 +7099,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| else
|
| {
|
| int_addr = create_tmp_var (ptr_type_node, "int_addr");
|
| - DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
|
| sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
|
| - DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
|
| }
|
|
|
| /* First ensure that we fit completely in registers. */
|
| @@ -6949,7 +7140,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| }
|
| if (need_temp)
|
| {
|
| - int i;
|
| + int i, prev_size = 0;
|
| tree temp = create_tmp_var (type, "va_arg_tmp");
|
|
|
| /* addr = &temp; */
|
| @@ -6961,13 +7152,29 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| rtx slot = XVECEXP (container, 0, i);
|
| rtx reg = XEXP (slot, 0);
|
| enum machine_mode mode = GET_MODE (reg);
|
| - tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
|
| - tree addr_type = build_pointer_type (piece_type);
|
| - tree daddr_type = build_pointer_type_for_mode (piece_type,
|
| - ptr_mode, true);
|
| + tree piece_type;
|
| + tree addr_type;
|
| + tree daddr_type;
|
| tree src_addr, src;
|
| int src_offset;
|
| tree dest_addr, dest;
|
| + int cur_size = GET_MODE_SIZE (mode);
|
| +
|
| + if (prev_size + cur_size > size)
|
| + {
|
| + cur_size = size - prev_size;
|
| + mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
|
| + if (mode == BLKmode)
|
| + mode = QImode;
|
| + }
|
| + piece_type = lang_hooks.types.type_for_mode (mode, 1);
|
| + if (mode == GET_MODE (reg))
|
| + addr_type = build_pointer_type (piece_type);
|
| + else
|
| + addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
|
| + true);
|
| + daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
|
| + true);
|
|
|
| if (SSE_REGNO_P (REGNO (reg)))
|
| {
|
| @@ -6982,14 +7189,26 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| src_addr = fold_convert (addr_type, src_addr);
|
| src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
|
| size_int (src_offset));
|
| - src = build_va_arg_indirect_ref (src_addr);
|
|
|
| dest_addr = fold_convert (daddr_type, addr);
|
| dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
|
| size_int (INTVAL (XEXP (slot, 1))));
|
| - dest = build_va_arg_indirect_ref (dest_addr);
|
| + if (cur_size == GET_MODE_SIZE (mode))
|
| + {
|
| + src = build_va_arg_indirect_ref (src_addr);
|
| + dest = build_va_arg_indirect_ref (dest_addr);
|
|
|
| - gimplify_assign (dest, src, pre_p);
|
| + gimplify_assign (dest, src, pre_p);
|
| + }
|
| + else
|
| + {
|
| + tree copy
|
| + = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
|
| + 3, dest_addr, src_addr,
|
| + size_int (cur_size));
|
| + gimplify_and_add (copy, pre_p);
|
| + }
|
| + prev_size += cur_size;
|
| }
|
| }
|
|
|
| @@ -7046,7 +7265,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
|
| if (container)
|
| gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
|
|
|
| - ptrtype = build_pointer_type (type);
|
| + ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
|
| addr = fold_convert (ptrtype, addr);
|
|
|
| if (indirect_p)
|
| @@ -7204,28 +7423,8 @@ standard_80387_constant_rtx (int idx)
|
| XFmode);
|
| }
|
|
|
| -/* Return 1 if mode is a valid mode for sse. */
|
| -static int
|
| -standard_sse_mode_p (enum machine_mode mode)
|
| -{
|
| - switch (mode)
|
| - {
|
| - case V16QImode:
|
| - case V8HImode:
|
| - case V4SImode:
|
| - case V2DImode:
|
| - case V4SFmode:
|
| - case V2DFmode:
|
| - return 1;
|
| -
|
| - default:
|
| - return 0;
|
| - }
|
| -}
|
| -
|
| -/* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
|
| - SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
|
| - modes and AVX is enabled. */
|
| +/* Return 1 if X is all 0s and 2 if x is all 1s
|
| + in supported SSE vector mode. */
|
|
|
| int
|
| standard_sse_constant_p (rtx x)
|
| @@ -7235,12 +7434,17 @@ standard_sse_constant_p (rtx x)
|
| if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
|
| return 1;
|
| if (vector_all_ones_operand (x, mode))
|
| - {
|
| - if (standard_sse_mode_p (mode))
|
| - return TARGET_SSE2 ? 2 : -2;
|
| - else if (VALID_AVX256_REG_MODE (mode))
|
| - return TARGET_AVX ? 3 : -3;
|
| - }
|
| + switch (mode)
|
| + {
|
| + case V16QImode:
|
| + case V8HImode:
|
| + case V4SImode:
|
| + case V2DImode:
|
| + if (TARGET_SSE2)
|
| + return 2;
|
| + default:
|
| + break;
|
| + }
|
|
|
| return 0;
|
| }
|
| @@ -7269,22 +7473,12 @@ standard_sse_constant_opcode (rtx insn, rtx x)
|
| case MODE_OI:
|
| return "vpxor\t%x0, %x0, %x0";
|
| default:
|
| - gcc_unreachable ();
|
| + break;
|
| }
|
| case 2:
|
| - if (TARGET_AVX)
|
| - switch (get_attr_mode (insn))
|
| - {
|
| - case MODE_V4SF:
|
| - case MODE_V2DF:
|
| - case MODE_TI:
|
| - return "vpcmpeqd\t%0, %0, %0";
|
| - break;
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| - else
|
| - return "pcmpeqd\t%0, %0";
|
| + return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
|
| + default:
|
| + break;
|
| }
|
| gcc_unreachable ();
|
| }
|
| @@ -7348,18 +7542,18 @@ ix86_can_use_return_insn_p (void)
|
| Zero means the frame pointer need not be set up (and parms may
|
| be accessed via the stack pointer) in functions that seem suitable. */
|
|
|
| -int
|
| +static bool
|
| ix86_frame_pointer_required (void)
|
| {
|
| /* If we accessed previous frames, then the generated code expects
|
| to be able to access the saved ebp value in our frame. */
|
| if (cfun->machine->accesses_prev_frame)
|
| - return 1;
|
| + return true;
|
|
|
| /* Several x86 os'es need a frame pointer for other reasons,
|
| usually pertaining to setjmp. */
|
| if (SUBTARGET_FRAME_POINTER_REQUIRED)
|
| - return 1;
|
| + return true;
|
|
|
| /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
|
| the frame pointer by default. Turn it back on now if we've not
|
| @@ -7367,12 +7561,12 @@ ix86_frame_pointer_required (void)
|
| if (TARGET_OMIT_LEAF_FRAME_POINTER
|
| && (!current_function_is_leaf
|
| || ix86_current_function_calls_tls_descriptor))
|
| - return 1;
|
| + return true;
|
|
|
| if (crtl->profile)
|
| - return 1;
|
| + return true;
|
|
|
| - return 0;
|
| + return false;
|
| }
|
|
|
| /* Record that the current function accesses previous call frames. */
|
| @@ -7411,8 +7605,8 @@ get_pc_thunk_name (char name[32], unsigned int regno)
|
| /* This function generates code for -fpic that loads %ebx with
|
| the return address of the caller and then returns. */
|
|
|
| -void
|
| -ix86_file_end (void)
|
| +static void
|
| +ix86_code_end (void)
|
| {
|
| rtx xops[2];
|
| int regno;
|
| @@ -7420,12 +7614,21 @@ ix86_file_end (void)
|
| for (regno = 0; regno < 8; ++regno)
|
| {
|
| char name[32];
|
| + tree decl;
|
|
|
| if (! ((pic_labels_used >> regno) & 1))
|
| continue;
|
|
|
| get_pc_thunk_name (name, regno);
|
|
|
| + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
|
| + get_identifier (name),
|
| + build_function_type (void_type_node, void_list_node));
|
| + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
|
| + NULL_TREE, void_type_node);
|
| + TREE_PUBLIC (decl) = 1;
|
| + TREE_STATIC (decl) = 1;
|
| +
|
| #if TARGET_MACHO
|
| if (TARGET_MACHO)
|
| {
|
| @@ -7436,18 +7639,13 @@ ix86_file_end (void)
|
| assemble_name (asm_out_file, name);
|
| fputs ("\n", asm_out_file);
|
| ASM_OUTPUT_LABEL (asm_out_file, name);
|
| + DECL_WEAK (decl) = 1;
|
| }
|
| else
|
| #endif
|
| if (USE_HIDDEN_LINKONCE)
|
| {
|
| - tree decl;
|
| -
|
| - decl = build_decl (FUNCTION_DECL, get_identifier (name),
|
| - error_mark_node);
|
| - TREE_PUBLIC (decl) = 1;
|
| - TREE_STATIC (decl) = 1;
|
| - DECL_ONE_ONLY (decl) = 1;
|
| + DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
|
|
|
| (*targetm.asm_out.unique_section) (decl, 0);
|
| switch_to_section (get_named_section (decl, NULL, 0));
|
| @@ -7455,7 +7653,7 @@ ix86_file_end (void)
|
| (*targetm.asm_out.globalize_label) (asm_out_file, name);
|
| fputs ("\t.hidden\t", asm_out_file);
|
| assemble_name (asm_out_file, name);
|
| - fputc ('\n', asm_out_file);
|
| + putc ('\n', asm_out_file);
|
| ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
|
| }
|
| else
|
| @@ -7464,14 +7662,23 @@ ix86_file_end (void)
|
| ASM_OUTPUT_LABEL (asm_out_file, name);
|
| }
|
|
|
| + DECL_INITIAL (decl) = make_node (BLOCK);
|
| + current_function_decl = decl;
|
| + init_function_start (decl);
|
| + first_function_block_is_cold = false;
|
| + /* Make sure unwind info is emitted for the thunk if needed. */
|
| + final_start_function (emit_barrier (), asm_out_file, 1);
|
| +
|
| xops[0] = gen_rtx_REG (Pmode, regno);
|
| xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
|
| output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
|
| output_asm_insn ("ret", xops);
|
| + final_end_function ();
|
| + init_insn_lengths ();
|
| + free_after_compilation (cfun);
|
| + set_cfun (NULL);
|
| + current_function_decl = NULL;
|
| }
|
| -
|
| - if (NEED_INDICATE_EXEC_STACK)
|
| - file_end_indicate_exec_stack ();
|
| }
|
|
|
| /* Emit code for the SET_GOT patterns. */
|
| @@ -7508,7 +7715,24 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
|
| if (!flag_pic)
|
| output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
|
| else
|
| - output_asm_insn ("call\t%a2", xops);
|
| + {
|
| + output_asm_insn ("call\t%a2", xops);
|
| +#ifdef DWARF2_UNWIND_INFO
|
| + /* The call to next label acts as a push. */
|
| + if (dwarf2out_do_frame ())
|
| + {
|
| + rtx insn;
|
| + start_sequence ();
|
| + insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
|
| + gen_rtx_PLUS (Pmode,
|
| + stack_pointer_rtx,
|
| + GEN_INT (-4))));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + dwarf2out_frame_debug (insn, true);
|
| + end_sequence ();
|
| + }
|
| +#endif
|
| + }
|
|
|
| #if TARGET_MACHO
|
| /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
|
| @@ -7521,7 +7745,27 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
|
| CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
|
|
|
| if (flag_pic)
|
| - output_asm_insn ("pop%z0\t%0", xops);
|
| + {
|
| + output_asm_insn ("pop%z0\t%0", xops);
|
| +#ifdef DWARF2_UNWIND_INFO
|
| + /* The pop is a pop and clobbers dest, but doesn't restore it
|
| + for unwind info purposes. */
|
| + if (dwarf2out_do_frame ())
|
| + {
|
| + rtx insn;
|
| + start_sequence ();
|
| + insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
|
| + dwarf2out_frame_debug (insn, true);
|
| + insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
|
| + gen_rtx_PLUS (Pmode,
|
| + stack_pointer_rtx,
|
| + GEN_INT (4))));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + dwarf2out_frame_debug (insn, true);
|
| + end_sequence ();
|
| + }
|
| +#endif
|
| + }
|
| }
|
| else
|
| {
|
| @@ -7529,6 +7773,18 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
|
| get_pc_thunk_name (name, REGNO (dest));
|
| pic_labels_used |= 1 << REGNO (dest);
|
|
|
| +#ifdef DWARF2_UNWIND_INFO
|
| + /* Ensure all queued register saves are flushed before the
|
| + call. */
|
| + if (dwarf2out_do_frame ())
|
| + {
|
| + rtx insn;
|
| + start_sequence ();
|
| + insn = emit_barrier ();
|
| + end_sequence ();
|
| + dwarf2out_frame_debug (insn, false);
|
| + }
|
| +#endif
|
| xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
|
| xops[2] = gen_rtx_MEM (QImode, xops[2]);
|
| output_asm_insn ("call\t%X2", xops);
|
| @@ -7559,6 +7815,9 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
|
| static rtx
|
| gen_push (rtx arg)
|
| {
|
| + if (ix86_cfa_state->reg == stack_pointer_rtx)
|
| + ix86_cfa_state->offset += UNITS_PER_WORD;
|
| +
|
| return gen_rtx_SET (VOIDmode,
|
| gen_rtx_MEM (Pmode,
|
| gen_rtx_PRE_DEC (Pmode,
|
| @@ -7618,8 +7877,7 @@ ix86_save_reg (unsigned int regno, int maybe_eh_return)
|
| }
|
| }
|
|
|
| - if (crtl->drap_reg
|
| - && regno == REGNO (crtl->drap_reg))
|
| + if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
|
| return 1;
|
|
|
| return (df_regs_ever_live_p (regno)
|
| @@ -7664,8 +7922,8 @@ ix86_nsaved_sseregs (void)
|
| pointer. Otherwise, frame pointer elimination is automatically
|
| handled and all other eliminations are valid. */
|
|
|
| -int
|
| -ix86_can_eliminate (int from, int to)
|
| +static bool
|
| +ix86_can_eliminate (const int from, const int to)
|
| {
|
| if (stack_realign_fp)
|
| return ((from == ARG_POINTER_REGNUM
|
| @@ -7673,7 +7931,7 @@ ix86_can_eliminate (int from, int to)
|
| || (from == FRAME_POINTER_REGNUM
|
| && to == STACK_POINTER_REGNUM));
|
| else
|
| - return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
|
| + return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
|
| }
|
|
|
| /* Return the offset between two registers, one to be eliminated, and the other
|
| @@ -7718,7 +7976,6 @@ ix86_builtin_setjmp_frame_value (void)
|
| static void
|
| ix86_compute_frame_layout (struct ix86_frame *frame)
|
| {
|
| - HOST_WIDE_INT total_size;
|
| unsigned int stack_alignment_needed;
|
| HOST_WIDE_INT offset;
|
| unsigned int preferred_alignment;
|
| @@ -7726,7 +7983,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
|
|
|
| frame->nregs = ix86_nsaved_regs ();
|
| frame->nsseregs = ix86_nsaved_sseregs ();
|
| - total_size = size;
|
|
|
| stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
| preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
|
| @@ -7781,9 +8037,16 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
|
| else
|
| frame->save_regs_using_mov = false;
|
|
|
| + /* Skip return address. */
|
| + offset = UNITS_PER_WORD;
|
| +
|
| + /* Skip pushed static chain. */
|
| + if (ix86_static_chain_on_stack)
|
| + offset += UNITS_PER_WORD;
|
|
|
| - /* Skip return address and saved base pointer. */
|
| - offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
|
| + /* Skip saved base pointer. */
|
| + if (frame_pointer_needed)
|
| + offset += UNITS_PER_WORD;
|
|
|
| frame->hard_frame_pointer_offset = offset;
|
|
|
| @@ -7872,26 +8135,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame)
|
| frame->red_zone_size = 0;
|
| frame->to_allocate -= frame->red_zone_size;
|
| frame->stack_pointer_offset -= frame->red_zone_size;
|
| -#if 0
|
| - fprintf (stderr, "\n");
|
| - fprintf (stderr, "size: %ld\n", (long)size);
|
| - fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
|
| - fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
|
| - fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
|
| - fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
|
| - fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
|
| - fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
|
| - fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
|
| - fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
|
| - fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
|
| - fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
|
| - fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
|
| - (long)frame->hard_frame_pointer_offset);
|
| - fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
|
| - fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
|
| - fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
|
| - fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
|
| -#endif
|
| }
|
|
|
| /* Emit code to save registers in the prologue. */
|
| @@ -7949,6 +8192,49 @@ ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
|
| }
|
| }
|
|
|
| +static GTY(()) rtx queued_cfa_restores;
|
| +
|
| +/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
|
| + manipulation insn. Don't add it if the previously
|
| + saved value will be left untouched within stack red-zone till return,
|
| + as unwinders can find the same value in the register and
|
| + on the stack. */
|
| +
|
| +static void
|
| +ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
|
| +{
|
| + if (TARGET_RED_ZONE
|
| + && !TARGET_64BIT_MS_ABI
|
| + && red_offset + RED_ZONE_SIZE >= 0
|
| + && crtl->args.pops_args < 65536)
|
| + return;
|
| +
|
| + if (insn)
|
| + {
|
| + add_reg_note (insn, REG_CFA_RESTORE, reg);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| + else
|
| + queued_cfa_restores
|
| + = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
|
| +}
|
| +
|
| +/* Add queued REG_CFA_RESTORE notes if any to INSN. */
|
| +
|
| +static void
|
| +ix86_add_queued_cfa_restore_notes (rtx insn)
|
| +{
|
| + rtx last;
|
| + if (!queued_cfa_restores)
|
| + return;
|
| + for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
|
| + ;
|
| + XEXP (last, 1) = REG_NOTES (insn);
|
| + REG_NOTES (insn) = queued_cfa_restores;
|
| + queued_cfa_restores = NULL_RTX;
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| +}
|
| +
|
| /* Expand prologue or epilogue stack adjustment.
|
| The pattern exist to put a dependency on all ebp-based memory accesses.
|
| STYLE should be negative if instructions should be marked as frame related,
|
| @@ -7956,7 +8242,8 @@ ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
|
| otherwise. */
|
|
|
| static void
|
| -pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
|
| +pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
|
| + int style, bool set_cfa)
|
| {
|
| rtx insn;
|
|
|
| @@ -7979,7 +8266,24 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
|
| insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
|
| offset));
|
| }
|
| - if (style < 0)
|
| +
|
| + if (style >= 0)
|
| + ix86_add_queued_cfa_restore_notes (insn);
|
| +
|
| + if (set_cfa)
|
| + {
|
| + rtx r;
|
| +
|
| + gcc_assert (ix86_cfa_state->reg == src);
|
| + ix86_cfa_state->offset += INTVAL (offset);
|
| + ix86_cfa_state->reg = dest;
|
| +
|
| + r = gen_rtx_PLUS (Pmode, src, offset);
|
| + r = gen_rtx_SET (VOIDmode, dest, r);
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| + else if (style < 0)
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| }
|
|
|
| @@ -8005,9 +8309,7 @@ find_drap_reg (void)
|
| Since function with tail call may use any caller-saved
|
| registers in epilogue, DRAP must not use caller-saved
|
| register in such case. */
|
| - if ((decl_function_context (decl)
|
| - && !DECL_NO_STATIC_CHAIN (decl))
|
| - || crtl->tail_call_emit)
|
| + if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
|
| return R13_REG;
|
|
|
| return R10_REG;
|
| @@ -8018,9 +8320,7 @@ find_drap_reg (void)
|
| Since function with tail call may use any caller-saved
|
| registers in epilogue, DRAP must not use caller-saved
|
| register in such case. */
|
| - if ((decl_function_context (decl)
|
| - && !DECL_NO_STATIC_CHAIN (decl))
|
| - || crtl->tail_call_emit)
|
| + if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
|
| return DI_REG;
|
|
|
| /* Reuse static chain register if it isn't used for parameter
|
| @@ -8034,37 +8334,58 @@ find_drap_reg (void)
|
| }
|
| }
|
|
|
| -/* Update incoming stack boundary and estimated stack alignment. */
|
| +/* Return minimum incoming stack alignment. */
|
|
|
| -static void
|
| -ix86_update_stack_boundary (void)
|
| +static unsigned int
|
| +ix86_minimum_incoming_stack_boundary (bool sibcall)
|
| {
|
| + unsigned int incoming_stack_boundary;
|
| +
|
| /* Prefer the one specified at command line. */
|
| - ix86_incoming_stack_boundary
|
| - = (ix86_user_incoming_stack_boundary
|
| - ? ix86_user_incoming_stack_boundary
|
| - : ix86_default_incoming_stack_boundary);
|
| + if (ix86_user_incoming_stack_boundary)
|
| + incoming_stack_boundary = ix86_user_incoming_stack_boundary;
|
| + /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
|
| + if -mstackrealign is used, it isn't used for sibcall check and
|
| + estimated stack alignment is 128bit. */
|
| + else if (!sibcall
|
| + && !TARGET_64BIT
|
| + && ix86_force_align_arg_pointer
|
| + && crtl->stack_alignment_estimated == 128)
|
| + incoming_stack_boundary = MIN_STACK_BOUNDARY;
|
| + else
|
| + incoming_stack_boundary = ix86_default_incoming_stack_boundary;
|
|
|
| /* Incoming stack alignment can be changed on individual functions
|
| via force_align_arg_pointer attribute. We use the smallest
|
| incoming stack boundary. */
|
| - if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
|
| + if (incoming_stack_boundary > MIN_STACK_BOUNDARY
|
| && lookup_attribute (ix86_force_align_arg_pointer_string,
|
| TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
|
| - ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
|
| + incoming_stack_boundary = MIN_STACK_BOUNDARY;
|
|
|
| /* The incoming stack frame has to be aligned at least at
|
| parm_stack_boundary. */
|
| - if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
|
| - ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
|
| + if (incoming_stack_boundary < crtl->parm_stack_boundary)
|
| + incoming_stack_boundary = crtl->parm_stack_boundary;
|
|
|
| /* Stack at entrance of main is aligned by runtime. We use the
|
| smallest incoming stack boundary. */
|
| - if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
|
| + if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
|
| && DECL_NAME (current_function_decl)
|
| && MAIN_NAME_P (DECL_NAME (current_function_decl))
|
| && DECL_FILE_SCOPE_P (current_function_decl))
|
| - ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
|
| + incoming_stack_boundary = MAIN_STACK_BOUNDARY;
|
| +
|
| + return incoming_stack_boundary;
|
| +}
|
| +
|
| +/* Update incoming stack boundary and estimated stack alignment. */
|
| +
|
| +static void
|
| +ix86_update_stack_boundary (void)
|
| +{
|
| + ix86_incoming_stack_boundary
|
| + = ix86_minimum_incoming_stack_boundary (false);
|
|
|
| /* x86_64 vararg needs 16byte stack alignment for register save
|
| area. */
|
| @@ -8100,7 +8421,11 @@ ix86_get_drap_rtx (void)
|
| end_sequence ();
|
|
|
| insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
|
| - RTX_FRAME_RELATED_P (insn) = 1;
|
| + if (!optimize)
|
| + {
|
| + add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| return drap_vreg;
|
| }
|
| else
|
| @@ -8115,30 +8440,6 @@ ix86_internal_arg_pointer (void)
|
| return virtual_incoming_args_rtx;
|
| }
|
|
|
| -/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
|
| - This is called from dwarf2out.c to emit call frame instructions
|
| - for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
|
| -static void
|
| -ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
|
| -{
|
| - rtx unspec = SET_SRC (pattern);
|
| - gcc_assert (GET_CODE (unspec) == UNSPEC);
|
| -
|
| - switch (index)
|
| - {
|
| - case UNSPEC_REG_SAVE:
|
| - dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
|
| - SET_DEST (pattern));
|
| - break;
|
| - case UNSPEC_DEF_CFA:
|
| - dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
|
| - INTVAL (XVECEXP (unspec, 0, 0)));
|
| - break;
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -}
|
| -
|
| /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
|
| to be generated in correct form. */
|
| static void
|
| @@ -8176,28 +8477,95 @@ ix86_expand_prologue (void)
|
| bool pic_reg_used;
|
| struct ix86_frame frame;
|
| HOST_WIDE_INT allocate;
|
| + int gen_frame_pointer = frame_pointer_needed;
|
|
|
| ix86_finalize_stack_realign_flags ();
|
|
|
| /* DRAP should not coexist with stack_realign_fp */
|
| gcc_assert (!(crtl->drap_reg && stack_realign_fp));
|
|
|
| + /* Initialize CFA state for before the prologue. */
|
| + ix86_cfa_state->reg = stack_pointer_rtx;
|
| + ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
|
| +
|
| ix86_compute_frame_layout (&frame);
|
|
|
| + if (ix86_function_ms_hook_prologue (current_function_decl))
|
| + {
|
| + rtx push, mov;
|
| +
|
| + /* Make sure the function starts with
|
| + 8b ff movl.s %edi,%edi
|
| + 55 push %ebp
|
| + 8b ec movl.s %esp,%ebp
|
| +
|
| + This matches the hookable function prologue in Win32 API
|
| + functions in Microsoft Windows XP Service Pack 2 and newer.
|
| + Wine uses this to enable Windows apps to hook the Win32 API
|
| + functions provided by Wine. */
|
| + insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
|
| + gen_rtx_REG (SImode, DI_REG)));
|
| + push = emit_insn (gen_push (hard_frame_pointer_rtx));
|
| + mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
|
| + stack_pointer_rtx));
|
| +
|
| + if (frame_pointer_needed && !(crtl->drap_reg
|
| + && crtl->stack_realign_needed))
|
| + {
|
| + /* The push %ebp and movl.s %esp, %ebp already set up
|
| + the frame pointer. No need to do this again. */
|
| + gen_frame_pointer = 0;
|
| + RTX_FRAME_RELATED_P (push) = 1;
|
| + RTX_FRAME_RELATED_P (mov) = 1;
|
| + if (ix86_cfa_state->reg == stack_pointer_rtx)
|
| + ix86_cfa_state->reg = hard_frame_pointer_rtx;
|
| + }
|
| + else
|
| + /* If the frame pointer is not needed, pop %ebp again. This
|
| + could be optimized for cases where ebp needs to be backed up
|
| + for some other reason. If stack realignment is needed, pop
|
| + the base pointer again, align the stack, and later regenerate
|
| + the frame pointer setup. The frame pointer generated by the
|
| + hook prologue is not aligned, so it can't be used. */
|
| + insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
|
| + }
|
| +
|
| + /* The first insn of a function that accepts its static chain on the
|
| + stack is to push the register that would be filled in by a direct
|
| + call. This insn will be skipped by the trampoline. */
|
| + if (ix86_static_chain_on_stack)
|
| + {
|
| + rtx t;
|
| +
|
| + insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
|
| + emit_insn (gen_blockage ());
|
| +
|
| + /* We don't want to interpret this push insn as a register save,
|
| + only as a stack adjustment. The real copy of the register as
|
| + a save will be done later, if needed. */
|
| + t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
|
| + t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| +
|
| /* Emit prologue code to adjust stack alignment and setup DRAP, in case
|
| of DRAP is needed and stack realignment is really needed after reload */
|
| if (crtl->drap_reg && crtl->stack_realign_needed)
|
| {
|
| rtx x, y;
|
| int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
|
| - int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
|
| - ? 0 : UNITS_PER_WORD);
|
| + int param_ptr_offset = UNITS_PER_WORD;
|
| +
|
| + if (ix86_static_chain_on_stack)
|
| + param_ptr_offset += UNITS_PER_WORD;
|
| + if (!call_used_regs[REGNO (crtl->drap_reg)])
|
| + param_ptr_offset += UNITS_PER_WORD;
|
|
|
| gcc_assert (stack_realign_drap);
|
|
|
| /* Grab the argument pointer. */
|
| - x = plus_constant (stack_pointer_rtx,
|
| - (UNITS_PER_WORD + param_ptr_offset));
|
| + x = plus_constant (stack_pointer_rtx, param_ptr_offset);
|
| y = crtl->drap_reg;
|
|
|
| /* Only need to push parameter pointer reg if it is caller
|
| @@ -8211,6 +8579,7 @@ ix86_expand_prologue (void)
|
|
|
| insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| + ix86_cfa_state->reg = crtl->drap_reg;
|
|
|
| /* Align the stack. */
|
| insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
|
| @@ -8232,13 +8601,16 @@ ix86_expand_prologue (void)
|
| /* Note: AT&T enter does NOT have reversed args. Enter is probably
|
| slower on all targets. Also sdb doesn't like it. */
|
|
|
| - if (frame_pointer_needed)
|
| + if (gen_frame_pointer)
|
| {
|
| insn = emit_insn (gen_push (hard_frame_pointer_rtx));
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
|
|
| insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| +
|
| + if (ix86_cfa_state->reg == stack_pointer_rtx)
|
| + ix86_cfa_state->reg = hard_frame_pointer_rtx;
|
| }
|
|
|
| if (stack_realign_fp)
|
| @@ -8277,16 +8649,14 @@ ix86_expand_prologue (void)
|
| ;
|
| else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
|
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
|
| - GEN_INT (-allocate), -1);
|
| + GEN_INT (-allocate), -1,
|
| + ix86_cfa_state->reg == stack_pointer_rtx);
|
| else
|
| {
|
| - /* Only valid for Win32. */
|
| rtx eax = gen_rtx_REG (Pmode, AX_REG);
|
| bool eax_live;
|
| rtx t;
|
|
|
| - gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
|
| -
|
| if (cfun->machine->call_abi == MS_ABI)
|
| eax_live = false;
|
| else
|
| @@ -8305,11 +8675,15 @@ ix86_expand_prologue (void)
|
| else
|
| insn = gen_allocate_stack_worker_32 (eax, eax);
|
| insn = emit_insn (insn);
|
| - RTX_FRAME_RELATED_P (insn) = 1;
|
| - t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
|
| - t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
|
| - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
| - t, REG_NOTES (insn));
|
| +
|
| + if (ix86_cfa_state->reg == stack_pointer_rtx)
|
| + {
|
| + ix86_cfa_state->offset += allocate;
|
| + t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
|
| + t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
|
|
| if (eax_live)
|
| {
|
| @@ -8397,14 +8771,18 @@ ix86_expand_prologue (void)
|
| /* vDRAP is setup but after reload it turns out stack realign
|
| isn't necessary, here we will emit prologue to setup DRAP
|
| without stack realign adjustment */
|
| + rtx x;
|
| int drap_bp_offset = UNITS_PER_WORD * 2;
|
| - rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
|
| +
|
| + if (ix86_static_chain_on_stack)
|
| + drap_bp_offset += UNITS_PER_WORD;
|
| + x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
|
| insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
|
| }
|
|
|
| /* Prevent instructions from being scheduled into register save push
|
| sequence when access to the redzone area is done through frame pointer.
|
| - The offset betweeh the frame pointer and the stack pointer is calculated
|
| + The offset between the frame pointer and the stack pointer is calculated
|
| relative to the value of the stack pointer at the end of the function
|
| prologue, and moving instructions that access redzone area via frame
|
| pointer inside push sequence violates this assumption. */
|
| @@ -8416,18 +8794,107 @@ ix86_expand_prologue (void)
|
| emit_insn (gen_cld ());
|
| }
|
|
|
| +/* Emit code to restore REG using a POP insn. */
|
| +
|
| +static void
|
| +ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
|
| +{
|
| + rtx insn = emit_insn (ix86_gen_pop1 (reg));
|
| +
|
| + if (ix86_cfa_state->reg == crtl->drap_reg
|
| + && REGNO (reg) == REGNO (crtl->drap_reg))
|
| + {
|
| + /* Previously we'd represented the CFA as an expression
|
| + like *(%ebp - 8). We've just popped that value from
|
| + the stack, which means we need to reset the CFA to
|
| + the drap register. This will remain until we restore
|
| + the stack pointer. */
|
| + add_reg_note (insn, REG_CFA_DEF_CFA, reg);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + return;
|
| + }
|
| +
|
| + if (ix86_cfa_state->reg == stack_pointer_rtx)
|
| + {
|
| + ix86_cfa_state->offset -= UNITS_PER_WORD;
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
| + copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| +
|
| + /* When the frame pointer is the CFA, and we pop it, we are
|
| + swapping back to the stack pointer as the CFA. This happens
|
| + for stack frames that don't allocate other data, so we assume
|
| + the stack pointer is now pointing at the return address, i.e.
|
| + the function entry state, which makes the offset be 1 word. */
|
| + else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
|
| + && reg == hard_frame_pointer_rtx)
|
| + {
|
| + ix86_cfa_state->reg = stack_pointer_rtx;
|
| + ix86_cfa_state->offset -= UNITS_PER_WORD;
|
| +
|
| + add_reg_note (insn, REG_CFA_DEF_CFA,
|
| + gen_rtx_PLUS (Pmode, stack_pointer_rtx,
|
| + GEN_INT (ix86_cfa_state->offset)));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| +
|
| + ix86_add_cfa_restore_note (insn, reg, red_offset);
|
| +}
|
| +
|
| +/* Emit code to restore saved registers using POP insns. */
|
| +
|
| +static void
|
| +ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
|
| +{
|
| + int regno;
|
| +
|
| + for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
| + if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
|
| + {
|
| + ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
|
| + red_offset);
|
| + red_offset += UNITS_PER_WORD;
|
| + }
|
| +}
|
| +
|
| +/* Emit code and notes for the LEAVE instruction. */
|
| +
|
| +static void
|
| +ix86_emit_leave (HOST_WIDE_INT red_offset)
|
| +{
|
| + rtx insn = emit_insn (ix86_gen_leave ());
|
| +
|
| + ix86_add_queued_cfa_restore_notes (insn);
|
| +
|
| + if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
|
| + {
|
| + ix86_cfa_state->reg = stack_pointer_rtx;
|
| + ix86_cfa_state->offset -= UNITS_PER_WORD;
|
| +
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
| + copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
|
| + }
|
| +}
|
| +
|
| /* Emit code to restore saved registers using MOV insns. First register
|
| is restored from POINTER + OFFSET. */
|
| static void
|
| ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| + HOST_WIDE_INT red_offset,
|
| int maybe_eh_return)
|
| {
|
| - int regno;
|
| + unsigned int regno;
|
| rtx base_address = gen_rtx_MEM (Pmode, pointer);
|
| + rtx insn;
|
|
|
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
| if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
|
| {
|
| + rtx reg = gen_rtx_REG (Pmode, regno);
|
| +
|
| /* Ensure that adjust_address won't be forced to produce pointer
|
| out of range allowed by x86-64 instruction set. */
|
| if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
|
| @@ -8440,9 +8907,25 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| base_address = gen_rtx_MEM (Pmode, r11);
|
| offset = 0;
|
| }
|
| - emit_move_insn (gen_rtx_REG (Pmode, regno),
|
| - adjust_address (base_address, Pmode, offset));
|
| + insn = emit_move_insn (reg,
|
| + adjust_address (base_address, Pmode, offset));
|
| offset += UNITS_PER_WORD;
|
| +
|
| + if (ix86_cfa_state->reg == crtl->drap_reg
|
| + && regno == REGNO (crtl->drap_reg))
|
| + {
|
| + /* Previously we'd represented the CFA as an expression
|
| + like *(%ebp - 8). We've just popped that value from
|
| + the stack, which means we need to reset the CFA to
|
| + the drap register. This will remain until we restore
|
| + the stack pointer. */
|
| + add_reg_note (insn, REG_CFA_DEF_CFA, reg);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| + }
|
| + else
|
| + ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
|
| +
|
| + red_offset += UNITS_PER_WORD;
|
| }
|
| }
|
|
|
| @@ -8450,6 +8933,7 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| is restored from POINTER + OFFSET. */
|
| static void
|
| ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| + HOST_WIDE_INT red_offset,
|
| int maybe_eh_return)
|
| {
|
| int regno;
|
| @@ -8459,6 +8943,8 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
| if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
|
| {
|
| + rtx reg = gen_rtx_REG (TImode, regno);
|
| +
|
| /* Ensure that adjust_address won't be forced to produce pointer
|
| out of range allowed by x86-64 instruction set. */
|
| if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
|
| @@ -8473,8 +8959,12 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| }
|
| mem = adjust_address (base_address, TImode, offset);
|
| set_mem_align (mem, 128);
|
| - emit_move_insn (gen_rtx_REG (TImode, regno), mem);
|
| + emit_move_insn (reg, mem);
|
| offset += 16;
|
| +
|
| + ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
|
| +
|
| + red_offset += 16;
|
| }
|
| }
|
|
|
| @@ -8483,10 +8973,11 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
|
| void
|
| ix86_expand_epilogue (int style)
|
| {
|
| - int regno;
|
| int sp_valid;
|
| struct ix86_frame frame;
|
| - HOST_WIDE_INT offset;
|
| + HOST_WIDE_INT offset, red_offset;
|
| + struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
|
| + bool using_drap;
|
|
|
| ix86_finalize_stack_realign_flags ();
|
|
|
| @@ -8502,6 +8993,9 @@ ix86_expand_epilogue (int style)
|
| if (frame_pointer_needed && frame.red_zone_size)
|
| emit_insn (gen_memory_blockage ());
|
|
|
| + using_drap = crtl->drap_reg && crtl->stack_realign_needed;
|
| + gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
|
| +
|
| /* Calculate start of saved registers relative to ebp. Special care
|
| must be taken for the normal return case of a function using
|
| eh_return: the eax and edx registers are marked as saved, but not
|
| @@ -8512,6 +9006,21 @@ ix86_expand_epilogue (int style)
|
| offset *= -UNITS_PER_WORD;
|
| offset -= frame.nsseregs * 16 + frame.padding0;
|
|
|
| + /* Calculate start of saved registers relative to esp on entry of the
|
| + function. When realigning stack, this needs to be the most negative
|
| + value possible at runtime. */
|
| + red_offset = offset;
|
| + if (using_drap)
|
| + red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
|
| + + UNITS_PER_WORD;
|
| + else if (stack_realign_fp)
|
| + red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
|
| + - UNITS_PER_WORD;
|
| + if (ix86_static_chain_on_stack)
|
| + red_offset -= UNITS_PER_WORD;
|
| + if (frame_pointer_needed)
|
| + red_offset -= UNITS_PER_WORD;
|
| +
|
| /* If we're only restoring one register and sp is not valid then
|
| using a move instruction to restore the register since it's
|
| less work than reloading sp and popping the register.
|
| @@ -8528,7 +9037,7 @@ ix86_expand_epilogue (int style)
|
| && ((frame.nregs + frame.nsseregs) > 1
|
| || (frame.to_allocate + frame.padding0) != 0))
|
| || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
|
| - && (frame.to_allocate + frame.padding0) != 0)
|
| + && (frame.to_allocate + frame.padding0) != 0)
|
| || (frame_pointer_needed && TARGET_USE_LEAVE
|
| && cfun->machine->use_fast_prologue_epilogue
|
| && (frame.nregs + frame.nsseregs) == 1)
|
| @@ -8538,32 +9047,42 @@ ix86_expand_epilogue (int style)
|
| locations. If both are available, default to ebp, since offsets
|
| are known to be small. Only exception is esp pointing directly
|
| to the end of block of saved registers, where we may simplify
|
| - addressing mode.
|
| + addressing mode.
|
|
|
| If we are realigning stack with bp and sp, regs restore can't
|
| be addressed by bp. sp must be used instead. */
|
|
|
| if (!frame_pointer_needed
|
| - || (sp_valid && !(frame.to_allocate + frame.padding0))
|
| + || (sp_valid && !(frame.to_allocate + frame.padding0))
|
| || stack_realign_fp)
|
| {
|
| ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
|
| - frame.to_allocate, style == 2);
|
| + frame.to_allocate, red_offset,
|
| + style == 2);
|
| ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
|
| frame.to_allocate
|
| + frame.nsseregs * 16
|
| + + frame.padding0,
|
| + red_offset
|
| + + frame.nsseregs * 16
|
| + frame.padding0, style == 2);
|
| }
|
| else
|
| {
|
| ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
|
| - offset, style == 2);
|
| + offset, red_offset,
|
| + style == 2);
|
| ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
|
| offset
|
| + frame.nsseregs * 16
|
| + + frame.padding0,
|
| + red_offset
|
| + + frame.nsseregs * 16
|
| + frame.padding0, style == 2);
|
| }
|
|
|
| + red_offset -= offset;
|
| +
|
| /* eh_return epilogues need %ecx added to the stack pointer. */
|
| if (style == 2)
|
| {
|
| @@ -8571,18 +9090,36 @@ ix86_expand_epilogue (int style)
|
|
|
| /* Stack align doesn't work with eh_return. */
|
| gcc_assert (!crtl->stack_realign_needed);
|
| + /* Neither does regparm nested functions. */
|
| + gcc_assert (!ix86_static_chain_on_stack);
|
|
|
| if (frame_pointer_needed)
|
| {
|
| tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
|
| tmp = plus_constant (tmp, UNITS_PER_WORD);
|
| - emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
|
| + tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
|
|
|
| tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
|
| - emit_move_insn (hard_frame_pointer_rtx, tmp);
|
| + tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
|
| +
|
| + /* Note that we use SA as a temporary CFA, as the return
|
| + address is at the proper place relative to it. We
|
| + pretend this happens at the FP restore insn because
|
| + prior to this insn the FP would be stored at the wrong
|
| + offset relative to SA, and after this insn we have no
|
| + other reasonable register to use for the CFA. We don't
|
| + bother resetting the CFA to the SP for the duration of
|
| + the return insn. */
|
| + add_reg_note (tmp, REG_CFA_DEF_CFA,
|
| + plus_constant (sa, UNITS_PER_WORD));
|
| + ix86_add_queued_cfa_restore_notes (tmp);
|
| + add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
|
| + RTX_FRAME_RELATED_P (tmp) = 1;
|
| + ix86_cfa_state->reg = sa;
|
| + ix86_cfa_state->offset = UNITS_PER_WORD;
|
|
|
| pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
|
| - const0_rtx, style);
|
| + const0_rtx, style, false);
|
| }
|
| else
|
| {
|
| @@ -8591,7 +9128,18 @@ ix86_expand_epilogue (int style)
|
| + frame.nregs * UNITS_PER_WORD
|
| + frame.nsseregs * 16
|
| + frame.padding0));
|
| - emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
|
| + tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
|
| + ix86_add_queued_cfa_restore_notes (tmp);
|
| +
|
| + gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
|
| + if (ix86_cfa_state->offset != UNITS_PER_WORD)
|
| + {
|
| + ix86_cfa_state->offset = UNITS_PER_WORD;
|
| + add_reg_note (tmp, REG_CFA_DEF_CFA,
|
| + plus_constant (stack_pointer_rtx,
|
| + UNITS_PER_WORD));
|
| + RTX_FRAME_RELATED_P (tmp) = 1;
|
| + }
|
| }
|
| }
|
| else if (!frame_pointer_needed)
|
| @@ -8600,18 +9148,18 @@ ix86_expand_epilogue (int style)
|
| + frame.nregs * UNITS_PER_WORD
|
| + frame.nsseregs * 16
|
| + frame.padding0),
|
| - style);
|
| + style, !using_drap);
|
| /* If not an i386, mov & pop is faster than "leave". */
|
| else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
|
| || !cfun->machine->use_fast_prologue_epilogue)
|
| - emit_insn ((*ix86_gen_leave) ());
|
| + ix86_emit_leave (red_offset);
|
| else
|
| {
|
| pro_epilogue_adjust_stack (stack_pointer_rtx,
|
| hard_frame_pointer_rtx,
|
| - const0_rtx, style);
|
| + const0_rtx, style, !using_drap);
|
|
|
| - emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
|
| + ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
|
| }
|
| }
|
| else
|
| @@ -8629,33 +9177,37 @@ ix86_expand_epilogue (int style)
|
| gcc_assert (!stack_realign_fp);
|
| pro_epilogue_adjust_stack (stack_pointer_rtx,
|
| hard_frame_pointer_rtx,
|
| - GEN_INT (offset), style);
|
| + GEN_INT (offset), style, false);
|
| ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
|
| - 0, style == 2);
|
| + 0, red_offset,
|
| + style == 2);
|
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
|
| - GEN_INT (frame.nsseregs * 16 +
|
| - frame.padding0), style);
|
| + GEN_INT (frame.nsseregs * 16
|
| + + frame.padding0),
|
| + style, false);
|
| }
|
| else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
|
| {
|
| ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
|
| - frame.to_allocate,
|
| + frame.to_allocate, red_offset,
|
| style == 2);
|
| pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
|
| GEN_INT (frame.to_allocate
|
| + frame.nsseregs * 16
|
| - + frame.padding0), style);
|
| + + frame.padding0), style,
|
| + !using_drap && !frame_pointer_needed);
|
| }
|
|
|
| - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
|
| - if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
|
| - emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
|
| + ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
|
| + + frame.padding0);
|
| + red_offset -= offset;
|
| +
|
| if (frame_pointer_needed)
|
| {
|
| /* Leave results in shorter dependency chains on CPUs that are
|
| able to grok it fast. */
|
| if (TARGET_USE_LEAVE)
|
| - emit_insn ((*ix86_gen_leave) ());
|
| + ix86_emit_leave (red_offset);
|
| else
|
| {
|
| /* For stack realigned really happens, recover stack
|
| @@ -8664,47 +9216,94 @@ ix86_expand_epilogue (int style)
|
| if (stack_realign_fp)
|
| pro_epilogue_adjust_stack (stack_pointer_rtx,
|
| hard_frame_pointer_rtx,
|
| - const0_rtx, style);
|
| - emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
|
| + const0_rtx, style, !using_drap);
|
| + ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
|
| + red_offset);
|
| }
|
| }
|
| }
|
|
|
| - if (crtl->drap_reg && crtl->stack_realign_needed)
|
| + if (using_drap)
|
| {
|
| - int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
|
| - ? 0 : UNITS_PER_WORD);
|
| + int param_ptr_offset = UNITS_PER_WORD;
|
| + rtx insn;
|
| +
|
| gcc_assert (stack_realign_drap);
|
| - emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
|
| - crtl->drap_reg,
|
| - GEN_INT (-(UNITS_PER_WORD
|
| - + param_ptr_offset))));
|
| +
|
| + if (ix86_static_chain_on_stack)
|
| + param_ptr_offset += UNITS_PER_WORD;
|
| if (!call_used_regs[REGNO (crtl->drap_reg)])
|
| - emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
|
| -
|
| + param_ptr_offset += UNITS_PER_WORD;
|
| +
|
| + insn = emit_insn (gen_rtx_SET
|
| + (VOIDmode, stack_pointer_rtx,
|
| + gen_rtx_PLUS (Pmode,
|
| + crtl->drap_reg,
|
| + GEN_INT (-param_ptr_offset))));
|
| +
|
| + ix86_cfa_state->reg = stack_pointer_rtx;
|
| + ix86_cfa_state->offset = param_ptr_offset;
|
| +
|
| + add_reg_note (insn, REG_CFA_DEF_CFA,
|
| + gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
|
| + GEN_INT (ix86_cfa_state->offset)));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| +
|
| + if (!call_used_regs[REGNO (crtl->drap_reg)])
|
| + ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
|
| + }
|
| +
|
| + /* Remove the saved static chain from the stack. The use of ECX is
|
| + merely as a scratch register, not as the actual static chain. */
|
| + if (ix86_static_chain_on_stack)
|
| + {
|
| + rtx r, insn;
|
| +
|
| + gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
|
| + ix86_cfa_state->offset += UNITS_PER_WORD;
|
| +
|
| + r = gen_rtx_REG (Pmode, CX_REG);
|
| + insn = emit_insn (ix86_gen_pop1 (r));
|
| +
|
| + r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
|
| + r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| }
|
|
|
| /* Sibcall epilogues don't want a return instruction. */
|
| if (style == 0)
|
| - return;
|
| + {
|
| + *ix86_cfa_state = cfa_state_save;
|
| + return;
|
| + }
|
|
|
| if (crtl->args.pops_args && crtl->args.size)
|
| {
|
| rtx popc = GEN_INT (crtl->args.pops_args);
|
|
|
| - /* i386 can only pop 64K bytes. If asked to pop more, pop
|
| - return address, do explicit add, and jump indirectly to the
|
| - caller. */
|
| + /* i386 can only pop 64K bytes. If asked to pop more, pop return
|
| + address, do explicit add, and jump indirectly to the caller. */
|
|
|
| if (crtl->args.pops_args >= 65536)
|
| {
|
| rtx ecx = gen_rtx_REG (SImode, CX_REG);
|
| + rtx insn;
|
|
|
| /* There is no "pascal" calling convention in any 64bit ABI. */
|
| gcc_assert (!TARGET_64BIT);
|
|
|
| - emit_insn (gen_popsi1 (ecx));
|
| - emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
|
| + insn = emit_insn (gen_popsi1 (ecx));
|
| + ix86_cfa_state->offset -= UNITS_PER_WORD;
|
| +
|
| + add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
| + copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
|
| + add_reg_note (insn, REG_CFA_REGISTER,
|
| + gen_rtx_SET (VOIDmode, ecx, pc_rtx));
|
| + RTX_FRAME_RELATED_P (insn) = 1;
|
| +
|
| + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
|
| + popc, -1, true);
|
| emit_jump_insn (gen_return_indirect_internal (ecx));
|
| }
|
| else
|
| @@ -8712,6 +9311,10 @@ ix86_expand_epilogue (int style)
|
| }
|
| else
|
| emit_jump_insn (gen_return_internal ());
|
| +
|
| + /* Restore the state back to the state from the prologue,
|
| + so that it's correct for the next epilogue. */
|
| + *ix86_cfa_state = cfa_state_save;
|
| }
|
|
|
| /* Reset from the function's potential modifications. */
|
| @@ -8855,6 +9458,10 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
|
| base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
|
| index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
|
|
|
| + /* Avoid useless 0 displacement. */
|
| + if (disp == const0_rtx && (base || index))
|
| + disp = NULL_RTX;
|
| +
|
| /* Allow arg pointer and stack pointer as index if there is not scaling. */
|
| if (base_reg && index_reg && scale == 1
|
| && (index_reg == arg_pointer_rtx
|
| @@ -8866,10 +9473,16 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
|
| tmp = base_reg, base_reg = index_reg, index_reg = tmp;
|
| }
|
|
|
| - /* Special case: %ebp cannot be encoded as a base without a displacement. */
|
| - if ((base_reg == hard_frame_pointer_rtx
|
| - || base_reg == frame_pointer_rtx
|
| - || base_reg == arg_pointer_rtx) && !disp)
|
| + /* Special case: %ebp cannot be encoded as a base without a displacement.
|
| + Similarly %r13. */
|
| + if (!disp
|
| + && base_reg
|
| + && (base_reg == hard_frame_pointer_rtx
|
| + || base_reg == frame_pointer_rtx
|
| + || base_reg == arg_pointer_rtx
|
| + || (REG_P (base_reg)
|
| + && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
|
| + || REGNO (base_reg) == R13_REG))))
|
| disp = const0_rtx;
|
|
|
| /* Special case: on K6, [%esi] makes the instruction vector decoded.
|
| @@ -8883,7 +9496,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
|
| disp = const0_rtx;
|
|
|
| /* Special case: encode reg+reg instead of reg*2. */
|
| - if (!base && index && scale && scale == 2)
|
| + if (!base && index && scale == 2)
|
| base = index, base_reg = index_reg, scale = 1;
|
|
|
| /* Special case: scaling cannot be encoded without base or displacement. */
|
| @@ -9036,9 +9649,8 @@ legitimate_constant_p (rtx x)
|
| break;
|
|
|
| case CONST_VECTOR:
|
| - if (x == CONST0_RTX (GET_MODE (x)))
|
| - return true;
|
| - return false;
|
| + if (!standard_sse_constant_p (x))
|
| + return false;
|
|
|
| default:
|
| break;
|
| @@ -9069,13 +9681,6 @@ ix86_cannot_force_const_mem (rtx x)
|
| return !legitimate_constant_p (x);
|
| }
|
|
|
| -/* Determine if a given RTX is a valid constant address. */
|
| -
|
| -bool
|
| -constant_address_p (rtx x)
|
| -{
|
| - return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
|
| -}
|
|
|
| /* Nonzero if the constant value X is a legitimate general operand
|
| when generating PIC code. It is given that flag_pic is on and
|
| @@ -9244,29 +9849,25 @@ legitimate_pic_address_disp_p (rtx disp)
|
| return 0;
|
| }
|
|
|
| -/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
|
| - memory address for an instruction. The MODE argument is the machine mode
|
| - for the MEM expression that wants to use this address.
|
| +/* Recognizes RTL expressions that are valid memory addresses for an
|
| + instruction. The MODE argument is the machine mode for the MEM
|
| + expression that wants to use this address.
|
|
|
| It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
|
| convert common non-canonical forms to canonical form so that they will
|
| be recognized. */
|
|
|
| -int
|
| -legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| - rtx addr, int strict)
|
| +static bool
|
| +ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| + rtx addr, bool strict)
|
| {
|
| struct ix86_address parts;
|
| rtx base, index, disp;
|
| HOST_WIDE_INT scale;
|
| - const char *reason = NULL;
|
| - rtx reason_rtx = NULL_RTX;
|
|
|
| if (ix86_decompose_address (addr, &parts) <= 0)
|
| - {
|
| - reason = "decomposition failed";
|
| - goto report_error;
|
| - }
|
| + /* Decomposition failed. */
|
| + return false;
|
|
|
| base = parts.base;
|
| index = parts.index;
|
| @@ -9282,7 +9883,6 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| if (base)
|
| {
|
| rtx reg;
|
| - reason_rtx = base;
|
|
|
| if (REG_P (base))
|
| reg = base;
|
| @@ -9292,23 +9892,17 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| <= UNITS_PER_WORD)
|
| reg = SUBREG_REG (base);
|
| else
|
| - {
|
| - reason = "base is not a register";
|
| - goto report_error;
|
| - }
|
| + /* Base is not a register. */
|
| + return false;
|
|
|
| if (GET_MODE (base) != Pmode)
|
| - {
|
| - reason = "base is not in Pmode";
|
| - goto report_error;
|
| - }
|
| + /* Base is not in Pmode. */
|
| + return false;
|
|
|
| if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
|
| || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
|
| - {
|
| - reason = "base is not valid";
|
| - goto report_error;
|
| - }
|
| + /* Base is not valid. */
|
| + return false;
|
| }
|
|
|
| /* Validate index register.
|
| @@ -9318,7 +9912,6 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| if (index)
|
| {
|
| rtx reg;
|
| - reason_rtx = index;
|
|
|
| if (REG_P (index))
|
| reg = index;
|
| @@ -9328,47 +9921,34 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| <= UNITS_PER_WORD)
|
| reg = SUBREG_REG (index);
|
| else
|
| - {
|
| - reason = "index is not a register";
|
| - goto report_error;
|
| - }
|
| + /* Index is not a register. */
|
| + return false;
|
|
|
| if (GET_MODE (index) != Pmode)
|
| - {
|
| - reason = "index is not in Pmode";
|
| - goto report_error;
|
| - }
|
| + /* Index is not in Pmode. */
|
| + return false;
|
|
|
| if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
|
| || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
|
| - {
|
| - reason = "index is not valid";
|
| - goto report_error;
|
| - }
|
| + /* Index is not valid. */
|
| + return false;
|
| }
|
|
|
| /* Validate scale factor. */
|
| if (scale != 1)
|
| {
|
| - reason_rtx = GEN_INT (scale);
|
| if (!index)
|
| - {
|
| - reason = "scale without index";
|
| - goto report_error;
|
| - }
|
| + /* Scale without index. */
|
| + return false;
|
|
|
| if (scale != 2 && scale != 4 && scale != 8)
|
| - {
|
| - reason = "scale is not a valid multiplier";
|
| - goto report_error;
|
| - }
|
| + /* Scale is not a valid multiplier. */
|
| + return false;
|
| }
|
|
|
| /* Validate displacement. */
|
| if (disp)
|
| {
|
| - reason_rtx = disp;
|
| -
|
| if (GET_CODE (disp) == CONST
|
| && GET_CODE (XEXP (disp, 0)) == UNSPEC
|
| && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
|
| @@ -9382,8 +9962,9 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| gcc_assert (flag_pic);
|
| if (!TARGET_64BIT)
|
| goto is_legitimate_pic;
|
| - reason = "64bit address unspec";
|
| - goto report_error;
|
| +
|
| + /* 64bit address unspec. */
|
| + return false;
|
|
|
| case UNSPEC_GOTPCREL:
|
| gcc_assert (flag_pic);
|
| @@ -9397,8 +9978,8 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| break;
|
|
|
| default:
|
| - reason = "invalid address unspec";
|
| - goto report_error;
|
| + /* Invalid address unspec. */
|
| + return false;
|
| }
|
|
|
| else if (SYMBOLIC_CONST (disp)
|
| @@ -9421,16 +10002,12 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
|
| || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
|
| && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
|
| - {
|
| - reason = "non-constant pic memory reference";
|
| - goto report_error;
|
| - }
|
| + /* Non-constant pic memory reference. */
|
| + return false;
|
| }
|
| else if (! legitimate_pic_address_disp_p (disp))
|
| - {
|
| - reason = "displacement is an invalid pic construct";
|
| - goto report_error;
|
| - }
|
| + /* Displacement is an invalid pic construct. */
|
| + return false;
|
|
|
| /* This code used to verify that a symbolic pic displacement
|
| includes the pic_offset_table_rtx register.
|
| @@ -9460,23 +10037,24 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
|
| || !legitimate_constant_p (disp))
|
| && (GET_CODE (disp) != SYMBOL_REF
|
| || !legitimate_constant_p (disp)))
|
| - {
|
| - reason = "displacement is not constant";
|
| - goto report_error;
|
| - }
|
| + /* Displacement is not constant. */
|
| + return false;
|
| else if (TARGET_64BIT
|
| && !x86_64_immediate_operand (disp, VOIDmode))
|
| - {
|
| - reason = "displacement is out of range";
|
| - goto report_error;
|
| - }
|
| + /* Displacement is out of range. */
|
| + return false;
|
| }
|
|
|
| /* Everything looks valid. */
|
| - return TRUE;
|
| + return true;
|
| +}
|
| +
|
| +/* Determine if a given RTX is a valid constant address. */
|
|
|
| - report_error:
|
| - return FALSE;
|
| +bool
|
| +constant_address_p (rtx x)
|
| +{
|
| + return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
|
| }
|
|
|
| /* Return a unique alias set for the GOT. */
|
| @@ -9505,7 +10083,7 @@ ix86_GOT_alias_set (void)
|
| differentiate them from global data objects. The returned
|
| address is the PIC reg + an unspec constant.
|
|
|
| - GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
|
| + TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
|
| reg also appears in the address. */
|
|
|
| static rtx
|
| @@ -9745,7 +10323,7 @@ get_thread_pointer (int to_reg)
|
| return reg;
|
| }
|
|
|
| -/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
|
| +/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
|
| false if we expect this to be used for a memory address and true if
|
| we expect to load the address into a register. */
|
|
|
| @@ -9934,7 +10512,8 @@ get_dllimport_decl (tree decl)
|
| *loc = h = GGC_NEW (struct tree_map);
|
| h->hash = in.hash;
|
| h->base.from = decl;
|
| - h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
|
| + h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
|
| + VAR_DECL, NULL, ptr_type_node);
|
| DECL_ARTIFICIAL (to) = 1;
|
| DECL_IGNORED_P (to) = 1;
|
| DECL_EXTERNAL (to) = 1;
|
| @@ -9989,9 +10568,6 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg)
|
| OLDX is the address as it was before break_out_memory_refs was called.
|
| In some cases it is useful to look at this to decide what needs to be done.
|
|
|
| - MODE and WIN are passed so that this macro can use
|
| - GO_IF_LEGITIMATE_ADDRESS.
|
| -
|
| It is always safe for this macro to do nothing. It exists to recognize
|
| opportunities to optimize the output.
|
|
|
| @@ -10003,8 +10579,9 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg)
|
| When -fpic is used, special handling is needed for symbolic references.
|
| See comments by legitimize_pic_address in i386.c for details. */
|
|
|
| -rtx
|
| -legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
|
| +static rtx
|
| +ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
|
| + enum machine_mode mode)
|
| {
|
| int changed = 0;
|
| unsigned log;
|
| @@ -10132,7 +10709,7 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
|
| }
|
| }
|
|
|
| - if (changed && legitimate_address_p (mode, x, FALSE))
|
| + if (changed && ix86_legitimate_address_p (mode, x, FALSE))
|
| return x;
|
|
|
| if (GET_CODE (XEXP (x, 0)) == MULT)
|
| @@ -10158,7 +10735,7 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
|
| x = legitimize_pic_address (x, 0);
|
| }
|
|
|
| - if (changed && legitimate_address_p (mode, x, FALSE))
|
| + if (changed && ix86_legitimate_address_p (mode, x, FALSE))
|
| return x;
|
|
|
| if (REG_P (XEXP (x, 0)))
|
| @@ -10310,29 +10887,29 @@ output_pic_addr_const (FILE *file, rtx x, int code)
|
| break;
|
| case UNSPEC_GOTTPOFF:
|
| /* FIXME: This might be @TPOFF in Sun ld too. */
|
| - fputs ("@GOTTPOFF", file);
|
| + fputs ("@gottpoff", file);
|
| break;
|
| case UNSPEC_TPOFF:
|
| - fputs ("@TPOFF", file);
|
| + fputs ("@tpoff", file);
|
| break;
|
| case UNSPEC_NTPOFF:
|
| if (TARGET_64BIT)
|
| - fputs ("@TPOFF", file);
|
| + fputs ("@tpoff", file);
|
| else
|
| - fputs ("@NTPOFF", file);
|
| + fputs ("@ntpoff", file);
|
| break;
|
| case UNSPEC_DTPOFF:
|
| - fputs ("@DTPOFF", file);
|
| + fputs ("@dtpoff", file);
|
| break;
|
| case UNSPEC_GOTNTPOFF:
|
| if (TARGET_64BIT)
|
| fputs (ASSEMBLER_DIALECT == ASM_ATT ?
|
| - "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
|
| + "@gottpoff(%rip)": "@gottpoff[rip]", file);
|
| else
|
| - fputs ("@GOTNTPOFF", file);
|
| + fputs ("@gotntpoff", file);
|
| break;
|
| case UNSPEC_INDNTPOFF:
|
| - fputs ("@INDNTPOFF", file);
|
| + fputs ("@indntpoff", file);
|
| break;
|
| #if TARGET_MACHO
|
| case UNSPEC_MACHOPIC_OFFSET:
|
| @@ -10359,7 +10936,7 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
|
| {
|
| fputs (ASM_LONG, file);
|
| output_addr_const (file, x);
|
| - fputs ("@DTPOFF", file);
|
| + fputs ("@dtpoff", file);
|
| switch (size)
|
| {
|
| case 4:
|
| @@ -10397,9 +10974,12 @@ ix86_pic_register_p (rtx x)
|
| the DWARF output code. */
|
|
|
| static rtx
|
| -ix86_delegitimize_address (rtx orig_x)
|
| +ix86_delegitimize_address (rtx x)
|
| {
|
| - rtx x = orig_x;
|
| + rtx orig_x = delegitimize_mem_from_attrs (x);
|
| + /* addend is NULL or some rtx if x is something+GOTOFF where
|
| + something doesn't include the PIC register. */
|
| + rtx addend = NULL_RTX;
|
| /* reg_addend is NULL or a multiple of some register. */
|
| rtx reg_addend = NULL_RTX;
|
| /* const_addend is NULL or a const_int. */
|
| @@ -10407,6 +10987,8 @@ ix86_delegitimize_address (rtx orig_x)
|
| /* This is the result, or NULL. */
|
| rtx result = NULL_RTX;
|
|
|
| + x = orig_x;
|
| +
|
| if (MEM_P (x))
|
| x = XEXP (x, 0);
|
|
|
| @@ -10417,7 +10999,10 @@ ix86_delegitimize_address (rtx orig_x)
|
| || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
|
| || !MEM_P (orig_x))
|
| return orig_x;
|
| - return XVECEXP (XEXP (x, 0), 0, 0);
|
| + x = XVECEXP (XEXP (x, 0), 0, 0);
|
| + if (GET_MODE (orig_x) != Pmode)
|
| + return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
|
| + return x;
|
| }
|
|
|
| if (GET_CODE (x) != PLUS
|
| @@ -10436,14 +11021,13 @@ ix86_delegitimize_address (rtx orig_x)
|
| else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
|
| reg_addend = XEXP (reg_addend, 0);
|
| else
|
| - return orig_x;
|
| - if (!REG_P (reg_addend)
|
| - && GET_CODE (reg_addend) != MULT
|
| - && GET_CODE (reg_addend) != ASHIFT)
|
| - return orig_x;
|
| + {
|
| + reg_addend = NULL_RTX;
|
| + addend = XEXP (x, 0);
|
| + }
|
| }
|
| else
|
| - return orig_x;
|
| + addend = XEXP (x, 0);
|
|
|
| x = XEXP (XEXP (x, 1), 0);
|
| if (GET_CODE (x) == PLUS
|
| @@ -10454,7 +11038,7 @@ ix86_delegitimize_address (rtx orig_x)
|
| }
|
|
|
| if (GET_CODE (x) == UNSPEC
|
| - && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
|
| + && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
|
| || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
|
| result = XVECEXP (x, 0, 0);
|
|
|
| @@ -10469,6 +11053,24 @@ ix86_delegitimize_address (rtx orig_x)
|
| result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
|
| if (reg_addend)
|
| result = gen_rtx_PLUS (Pmode, reg_addend, result);
|
| + if (addend)
|
| + {
|
| + /* If the rest of original X doesn't involve the PIC register, add
|
| + addend and subtract pic_offset_table_rtx. This can happen e.g.
|
| + for code like:
|
| + leal (%ebx, %ecx, 4), %ecx
|
| + ...
|
| + movl foo@GOTOFF(%ecx), %edx
|
| + in which case we return (%ecx - %ebx) + foo. */
|
| + if (pic_offset_table_rtx)
|
| + result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
|
| + pic_offset_table_rtx),
|
| + result);
|
| + else
|
| + return orig_x;
|
| + }
|
| + if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
|
| + return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
|
| return result;
|
| }
|
|
|
| @@ -10508,9 +11110,6 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
|
|
|
| if (mode == CCFPmode || mode == CCFPUmode)
|
| {
|
| - enum rtx_code second_code, bypass_code;
|
| - ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
|
| - gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
|
| code = ix86_fp_compare_code_to_integer (code);
|
| mode = CCmode;
|
| }
|
| @@ -10812,23 +11411,23 @@ get_some_local_dynamic_name (void)
|
| return cfun->machine->some_ld_name;
|
|
|
| for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
|
| - if (INSN_P (insn)
|
| + if (NONDEBUG_INSN_P (insn)
|
| && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
|
| return cfun->machine->some_ld_name;
|
|
|
| - gcc_unreachable ();
|
| + return NULL;
|
| }
|
|
|
| /* Meaning of CODE:
|
| L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
|
| C -- print opcode suffix for set/cmov insn.
|
| c -- like C, but print reversed condition
|
| - E,e -- likewise, but for compare-and-branch fused insn.
|
| F,f -- likewise, but for floating-point.
|
| O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
|
| otherwise nothing
|
| R -- print the prefix for register names.
|
| z -- print the opcode suffix for the size of the current operand.
|
| + Z -- likewise, with special suffixes for x87 instructions.
|
| * -- print a star (in certain assembler syntax)
|
| A -- print an absolute memory reference.
|
| w -- print the operand as if it's a "word" (HImode) even if it isn't.
|
| @@ -10849,7 +11448,7 @@ get_some_local_dynamic_name (void)
|
| X -- don't print any sort of PIC '@' suffix for a symbol.
|
| & -- print some in-use local-dynamic symbol name.
|
| H -- print a memory address offset by 8; used for sse high-parts
|
| - Y -- print condition for SSE5 com* instruction.
|
| + Y -- print condition for XOP pcom* instruction.
|
| + -- print a branch hint as 'cs' or 'ds' prefix
|
| ; -- print a semicolon (after prefixes due to bug in older gas).
|
| */
|
| @@ -10867,8 +11466,15 @@ print_operand (FILE *file, rtx x, int code)
|
| return;
|
|
|
| case '&':
|
| - assemble_name (file, get_some_local_dynamic_name ());
|
| - return;
|
| + {
|
| + const char *name = get_some_local_dynamic_name ();
|
| + if (name == NULL)
|
| + output_operand_lossage ("'%%&' used without any "
|
| + "local dynamic TLS references");
|
| + else
|
| + assemble_name (file, name);
|
| + return;
|
| + }
|
|
|
| case 'A':
|
| switch (ASSEMBLER_DIALECT)
|
| @@ -10928,72 +11534,110 @@ print_operand (FILE *file, rtx x, int code)
|
| return;
|
|
|
| case 'z':
|
| - /* 387 opcodes don't get size suffixes if the operands are
|
| - registers. */
|
| - if (STACK_REG_P (x))
|
| - return;
|
| + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
|
| + {
|
| + /* Opcodes don't get size suffixes if using Intel opcodes. */
|
| + if (ASSEMBLER_DIALECT == ASM_INTEL)
|
| + return;
|
|
|
| - /* Likewise if using Intel opcodes. */
|
| + switch (GET_MODE_SIZE (GET_MODE (x)))
|
| + {
|
| + case 1:
|
| + putc ('b', file);
|
| + return;
|
| +
|
| + case 2:
|
| + putc ('w', file);
|
| + return;
|
| +
|
| + case 4:
|
| + putc ('l', file);
|
| + return;
|
| +
|
| + case 8:
|
| + putc ('q', file);
|
| + return;
|
| +
|
| + default:
|
| + output_operand_lossage
|
| + ("invalid operand size for operand code '%c'", code);
|
| + return;
|
| + }
|
| + }
|
| +
|
| + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
|
| + warning
|
| + (0, "non-integer operand used with operand code '%c'", code);
|
| + /* FALLTHRU */
|
| +
|
| + case 'Z':
|
| + /* 387 opcodes don't get size suffixes if using Intel opcodes. */
|
| if (ASSEMBLER_DIALECT == ASM_INTEL)
|
| return;
|
|
|
| - /* This is the size of op from size of operand. */
|
| - switch (GET_MODE_SIZE (GET_MODE (x)))
|
| + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
|
| {
|
| - case 1:
|
| - putc ('b', file);
|
| - return;
|
| -
|
| - case 2:
|
| - if (MEM_P (x))
|
| + switch (GET_MODE_SIZE (GET_MODE (x)))
|
| {
|
| -#ifdef HAVE_GAS_FILDS_FISTS
|
| + case 2:
|
| +#ifdef HAVE_AS_IX86_FILDS
|
| putc ('s', file);
|
| #endif
|
| return;
|
| +
|
| + case 4:
|
| + putc ('l', file);
|
| + return;
|
| +
|
| + case 8:
|
| +#ifdef HAVE_AS_IX86_FILDQ
|
| + putc ('q', file);
|
| +#else
|
| + fputs ("ll", file);
|
| +#endif
|
| + return;
|
| +
|
| + default:
|
| + break;
|
| }
|
| - else
|
| - putc ('w', file);
|
| - return;
|
| + }
|
| + else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
|
| + {
|
| + /* 387 opcodes don't get size suffixes
|
| + if the operands are registers. */
|
| + if (STACK_REG_P (x))
|
| + return;
|
|
|
| - case 4:
|
| - if (GET_MODE (x) == SFmode)
|
| + switch (GET_MODE_SIZE (GET_MODE (x)))
|
| {
|
| + case 4:
|
| putc ('s', file);
|
| return;
|
| - }
|
| - else
|
| - putc ('l', file);
|
| - return;
|
|
|
| - case 12:
|
| - case 16:
|
| - putc ('t', file);
|
| - return;
|
| + case 8:
|
| + putc ('l', file);
|
| + return;
|
|
|
| - case 8:
|
| - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
|
| - {
|
| - if (MEM_P (x))
|
| - {
|
| -#ifdef GAS_MNEMONICS
|
| - putc ('q', file);
|
| -#else
|
| - putc ('l', file);
|
| - putc ('l', file);
|
| -#endif
|
| - }
|
| - else
|
| - putc ('q', file);
|
| + case 12:
|
| + case 16:
|
| + putc ('t', file);
|
| + return;
|
| +
|
| + default:
|
| + break;
|
| }
|
| - else
|
| - putc ('l', file);
|
| + }
|
| + else
|
| + {
|
| + output_operand_lossage
|
| + ("invalid operand type used with operand code '%c'", code);
|
| return;
|
| -
|
| - default:
|
| - gcc_unreachable ();
|
| }
|
|
|
| + output_operand_lossage
|
| + ("invalid operand size for operand code '%c'", code);
|
| + return;
|
| +
|
| case 'd':
|
| case 'b':
|
| case 'w':
|
| @@ -11066,7 +11710,8 @@ print_operand (FILE *file, rtx x, int code)
|
| fputs ("ord", file);
|
| break;
|
| default:
|
| - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
|
| + output_operand_lossage ("operand is not a condition code, "
|
| + "invalid operand code 'D'");
|
| return;
|
| }
|
| }
|
| @@ -11105,7 +11750,8 @@ print_operand (FILE *file, rtx x, int code)
|
| fputs ("ord", file);
|
| break;
|
| default:
|
| - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
|
| + output_operand_lossage ("operand is not a condition code, "
|
| + "invalid operand code 'D'");
|
| return;
|
| }
|
| }
|
| @@ -11180,14 +11826,6 @@ print_operand (FILE *file, rtx x, int code)
|
| put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
|
| return;
|
|
|
| - case 'E':
|
| - put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
|
| - return;
|
| -
|
| - case 'e':
|
| - put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
|
| - return;
|
| -
|
| case 'H':
|
| /* It doesn't actually matter what mode we use here, as we're
|
| only going to use this for printing. */
|
| @@ -11279,16 +11917,15 @@ print_operand (FILE *file, rtx x, int code)
|
| fputs ("une", file);
|
| break;
|
| default:
|
| - output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
|
| + output_operand_lossage ("operand is not a condition code, "
|
| + "invalid operand code 'Y'");
|
| return;
|
| }
|
| return;
|
|
|
| case ';':
|
| -#if TARGET_MACHO
|
| - fputs (" ; ", file);
|
| -#else
|
| - fputc (' ', file);
|
| +#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX
|
| + fputs (";", file);
|
| #endif
|
| return;
|
|
|
| @@ -11367,7 +12004,7 @@ print_operand (FILE *file, rtx x, int code)
|
| char dstr[30];
|
|
|
| real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
|
| - fprintf (file, "%s", dstr);
|
| + fputs (dstr, file);
|
| }
|
|
|
| else if (GET_CODE (x) == CONST_DOUBLE
|
| @@ -11376,7 +12013,7 @@ print_operand (FILE *file, rtx x, int code)
|
| char dstr[30];
|
|
|
| real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
|
| - fprintf (file, "%s", dstr);
|
| + fputs (dstr, file);
|
| }
|
|
|
| else
|
| @@ -11570,34 +12207,34 @@ output_addr_const_extra (FILE *file, rtx x)
|
| case UNSPEC_GOTTPOFF:
|
| output_addr_const (file, op);
|
| /* FIXME: This might be @TPOFF in Sun ld. */
|
| - fputs ("@GOTTPOFF", file);
|
| + fputs ("@gottpoff", file);
|
| break;
|
| case UNSPEC_TPOFF:
|
| output_addr_const (file, op);
|
| - fputs ("@TPOFF", file);
|
| + fputs ("@tpoff", file);
|
| break;
|
| case UNSPEC_NTPOFF:
|
| output_addr_const (file, op);
|
| if (TARGET_64BIT)
|
| - fputs ("@TPOFF", file);
|
| + fputs ("@tpoff", file);
|
| else
|
| - fputs ("@NTPOFF", file);
|
| + fputs ("@ntpoff", file);
|
| break;
|
| case UNSPEC_DTPOFF:
|
| output_addr_const (file, op);
|
| - fputs ("@DTPOFF", file);
|
| + fputs ("@dtpoff", file);
|
| break;
|
| case UNSPEC_GOTNTPOFF:
|
| output_addr_const (file, op);
|
| if (TARGET_64BIT)
|
| fputs (ASSEMBLER_DIALECT == ASM_ATT ?
|
| - "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
|
| + "@gottpoff(%rip)" : "@gottpoff[rip]", file);
|
| else
|
| - fputs ("@GOTNTPOFF", file);
|
| + fputs ("@gotntpoff", file);
|
| break;
|
| case UNSPEC_INDNTPOFF:
|
| output_addr_const (file, op);
|
| - fputs ("@INDNTPOFF", file);
|
| + fputs ("@indntpoff", file);
|
| break;
|
| #if TARGET_MACHO
|
| case UNSPEC_MACHOPIC_OFFSET:
|
| @@ -11793,7 +12430,7 @@ output_387_binary_op (rtx insn, rtx *operands)
|
|
|
| if (MEM_P (operands[2]))
|
| {
|
| - p = "%z2\t%2";
|
| + p = "%Z2\t%2";
|
| break;
|
| }
|
|
|
| @@ -11823,13 +12460,13 @@ output_387_binary_op (rtx insn, rtx *operands)
|
| case DIV:
|
| if (MEM_P (operands[1]))
|
| {
|
| - p = "r%z1\t%1";
|
| + p = "r%Z1\t%1";
|
| break;
|
| }
|
|
|
| if (MEM_P (operands[2]))
|
| {
|
| - p = "%z2\t%2";
|
| + p = "%Z2\t%2";
|
| break;
|
| }
|
|
|
| @@ -12071,15 +12708,15 @@ output_fix_trunc (rtx insn, rtx *operands, int fisttp)
|
| gcc_assert (GET_MODE (operands[1]) != TFmode);
|
|
|
| if (fisttp)
|
| - output_asm_insn ("fisttp%z0\t%0", operands);
|
| + output_asm_insn ("fisttp%Z0\t%0", operands);
|
| else
|
| {
|
| if (round_mode != I387_CW_ANY)
|
| output_asm_insn ("fldcw\t%3", operands);
|
| if (stack_top_dies || dimode_p)
|
| - output_asm_insn ("fistp%z0\t%0", operands);
|
| + output_asm_insn ("fistp%Z0\t%0", operands);
|
| else
|
| - output_asm_insn ("fist%z0\t%0", operands);
|
| + output_asm_insn ("fist%Z0\t%0", operands);
|
| if (round_mode != I387_CW_ANY)
|
| output_asm_insn ("fldcw\t%2", operands);
|
| }
|
| @@ -12203,13 +12840,13 @@ output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
|
|
|
| static const char * const alt[16] =
|
| {
|
| - "fcom%z2\t%y2\n\tfnstsw\t%0",
|
| - "fcomp%z2\t%y2\n\tfnstsw\t%0",
|
| - "fucom%z2\t%y2\n\tfnstsw\t%0",
|
| - "fucomp%z2\t%y2\n\tfnstsw\t%0",
|
| + "fcom%Z2\t%y2\n\tfnstsw\t%0",
|
| + "fcomp%Z2\t%y2\n\tfnstsw\t%0",
|
| + "fucom%Z2\t%y2\n\tfnstsw\t%0",
|
| + "fucomp%Z2\t%y2\n\tfnstsw\t%0",
|
|
|
| - "ficom%z2\t%y2\n\tfnstsw\t%0",
|
| - "ficomp%z2\t%y2\n\tfnstsw\t%0",
|
| + "ficom%Z2\t%y2\n\tfnstsw\t%0",
|
| + "ficomp%Z2\t%y2\n\tfnstsw\t%0",
|
| NULL,
|
| NULL,
|
|
|
| @@ -12271,18 +12908,18 @@ ix86_output_addr_diff_elt (FILE *file, int value, int rel)
|
| fprintf (file, "%s%s%d-%s%d\n",
|
| directive, LPREFIX, value, LPREFIX, rel);
|
| else if (HAVE_AS_GOTOFF_IN_DATA)
|
| - fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
|
| + fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
|
| #if TARGET_MACHO
|
| else if (TARGET_MACHO)
|
| {
|
| - fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
|
| + fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
|
| machopic_output_function_base_name (file);
|
| - fprintf(file, "\n");
|
| + putc ('\n', file);
|
| }
|
| #endif
|
| else
|
| - asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
|
| - ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
|
| + asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
|
| + GOT_SYMBOL_NAME, LPREFIX, value);
|
| }
|
|
|
| /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
|
| @@ -12302,7 +12939,7 @@ ix86_expand_clear (rtx dest)
|
| tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
|
|
|
| /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
|
| - if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
|
| + if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
|
| {
|
| rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
| tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
|
| @@ -12400,7 +13037,7 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
|
| op1 = force_reg (Pmode, op1);
|
| else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
|
| {
|
| - rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
|
| + rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
|
| op1 = legitimize_pic_address (op1, reg);
|
| if (op0 == op1)
|
| return;
|
| @@ -12466,7 +13103,7 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
|
| && (CONSTANT_P (op1)
|
| || (GET_CODE (op1) == SUBREG
|
| && CONSTANT_P (SUBREG_REG (op1))))
|
| - && standard_sse_constant_p (op1) <= 0)
|
| + && !standard_sse_constant_p (op1))
|
| op1 = validize_mem (force_const_mem (mode, op1));
|
|
|
| /* We need to check memory alignment for SSE mode since attribute
|
| @@ -12994,6 +13631,316 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
|
| emit_move_insn (operands[0], dst);
|
| }
|
|
|
| +#define LEA_SEARCH_THRESHOLD 12
|
| +
|
| +/* Search backward for non-agu definition of register number REGNO1
|
| + or register number REGNO2 in INSN's basic block until
|
| + 1. Pass LEA_SEARCH_THRESHOLD instructions, or
|
| + 2. Reach BB boundary, or
|
| + 3. Reach agu definition.
|
| + Returns the distance between the non-agu definition point and INSN.
|
| + If no definition point, returns -1. */
|
| +
|
| +static int
|
| +distance_non_agu_define (unsigned int regno1, unsigned int regno2,
|
| + rtx insn)
|
| +{
|
| + basic_block bb = BLOCK_FOR_INSN (insn);
|
| + int distance = 0;
|
| + df_ref *def_rec;
|
| + enum attr_type insn_type;
|
| +
|
| + if (insn != BB_HEAD (bb))
|
| + {
|
| + rtx prev = PREV_INSN (insn);
|
| + while (prev && distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + if (NONDEBUG_INSN_P (prev))
|
| + {
|
| + distance++;
|
| + for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
|
| + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
| + && !DF_REF_IS_ARTIFICIAL (*def_rec)
|
| + && (regno1 == DF_REF_REGNO (*def_rec)
|
| + || regno2 == DF_REF_REGNO (*def_rec)))
|
| + {
|
| + insn_type = get_attr_type (prev);
|
| + if (insn_type != TYPE_LEA)
|
| + goto done;
|
| + }
|
| + }
|
| + if (prev == BB_HEAD (bb))
|
| + break;
|
| + prev = PREV_INSN (prev);
|
| + }
|
| + }
|
| +
|
| + if (distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + edge e;
|
| + edge_iterator ei;
|
| + bool simple_loop = false;
|
| +
|
| + FOR_EACH_EDGE (e, ei, bb->preds)
|
| + if (e->src == bb)
|
| + {
|
| + simple_loop = true;
|
| + break;
|
| + }
|
| +
|
| + if (simple_loop)
|
| + {
|
| + rtx prev = BB_END (bb);
|
| + while (prev
|
| + && prev != insn
|
| + && distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + if (NONDEBUG_INSN_P (prev))
|
| + {
|
| + distance++;
|
| + for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
|
| + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
| + && !DF_REF_IS_ARTIFICIAL (*def_rec)
|
| + && (regno1 == DF_REF_REGNO (*def_rec)
|
| + || regno2 == DF_REF_REGNO (*def_rec)))
|
| + {
|
| + insn_type = get_attr_type (prev);
|
| + if (insn_type != TYPE_LEA)
|
| + goto done;
|
| + }
|
| + }
|
| + prev = PREV_INSN (prev);
|
| + }
|
| + }
|
| + }
|
| +
|
| + distance = -1;
|
| +
|
| +done:
|
| + /* get_attr_type may modify recog data. We want to make sure
|
| + that recog data is valid for instruction INSN, on which
|
| + distance_non_agu_define is called. INSN is unchanged here. */
|
| + extract_insn_cached (insn);
|
| + return distance;
|
| +}
|
| +
|
| +/* Return the distance between INSN and the next insn that uses
|
| + register number REGNO0 in memory address. Return -1 if no such
|
| + a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
|
| +
|
| +static int
|
| +distance_agu_use (unsigned int regno0, rtx insn)
|
| +{
|
| + basic_block bb = BLOCK_FOR_INSN (insn);
|
| + int distance = 0;
|
| + df_ref *def_rec;
|
| + df_ref *use_rec;
|
| +
|
| + if (insn != BB_END (bb))
|
| + {
|
| + rtx next = NEXT_INSN (insn);
|
| + while (next && distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + if (NONDEBUG_INSN_P (next))
|
| + {
|
| + distance++;
|
| +
|
| + for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
|
| + if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
|
| + || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
|
| + && regno0 == DF_REF_REGNO (*use_rec))
|
| + {
|
| + /* Return DISTANCE if OP0 is used in memory
|
| + address in NEXT. */
|
| + return distance;
|
| + }
|
| +
|
| + for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
|
| + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
| + && !DF_REF_IS_ARTIFICIAL (*def_rec)
|
| + && regno0 == DF_REF_REGNO (*def_rec))
|
| + {
|
| + /* Return -1 if OP0 is set in NEXT. */
|
| + return -1;
|
| + }
|
| + }
|
| + if (next == BB_END (bb))
|
| + break;
|
| + next = NEXT_INSN (next);
|
| + }
|
| + }
|
| +
|
| + if (distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + edge e;
|
| + edge_iterator ei;
|
| + bool simple_loop = false;
|
| +
|
| + FOR_EACH_EDGE (e, ei, bb->succs)
|
| + if (e->dest == bb)
|
| + {
|
| + simple_loop = true;
|
| + break;
|
| + }
|
| +
|
| + if (simple_loop)
|
| + {
|
| + rtx next = BB_HEAD (bb);
|
| + while (next
|
| + && next != insn
|
| + && distance < LEA_SEARCH_THRESHOLD)
|
| + {
|
| + if (NONDEBUG_INSN_P (next))
|
| + {
|
| + distance++;
|
| +
|
| + for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
|
| + if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
|
| + || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
|
| + && regno0 == DF_REF_REGNO (*use_rec))
|
| + {
|
| + /* Return DISTANCE if OP0 is used in memory
|
| + address in NEXT. */
|
| + return distance;
|
| + }
|
| +
|
| + for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
|
| + if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
|
| + && !DF_REF_IS_ARTIFICIAL (*def_rec)
|
| + && regno0 == DF_REF_REGNO (*def_rec))
|
| + {
|
| + /* Return -1 if OP0 is set in NEXT. */
|
| + return -1;
|
| + }
|
| +
|
| + }
|
| + next = NEXT_INSN (next);
|
| + }
|
| + }
|
| + }
|
| +
|
| + return -1;
|
| +}
|
| +
|
| +/* Define this macro to tune LEA priority vs ADD, it take effect when
|
| + there is a dilemma of choicing LEA or ADD
|
| + Negative value: ADD is more preferred than LEA
|
| + Zero: Netrual
|
| + Positive value: LEA is more preferred than ADD*/
|
| +#define IX86_LEA_PRIORITY 2
|
| +
|
| +/* Return true if it is ok to optimize an ADD operation to LEA
|
| + operation to avoid flag register consumation. For the processors
|
| + like ATOM, if the destination register of LEA holds an actual
|
| + address which will be used soon, LEA is better and otherwise ADD
|
| + is better. */
|
| +
|
| +bool
|
| +ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
|
| + rtx insn, rtx operands[])
|
| +{
|
| + unsigned int regno0 = true_regnum (operands[0]);
|
| + unsigned int regno1 = true_regnum (operands[1]);
|
| + unsigned int regno2;
|
| +
|
| + if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
|
| + return regno0 != regno1;
|
| +
|
| + regno2 = true_regnum (operands[2]);
|
| +
|
| + /* If a = b + c, (a!=b && a!=c), must use lea form. */
|
| + if (regno0 != regno1 && regno0 != regno2)
|
| + return true;
|
| + else
|
| + {
|
| + int dist_define, dist_use;
|
| + dist_define = distance_non_agu_define (regno1, regno2, insn);
|
| + if (dist_define <= 0)
|
| + return true;
|
| +
|
| + /* If this insn has both backward non-agu dependence and forward
|
| + agu dependence, the one with short distance take effect. */
|
| + dist_use = distance_agu_use (regno0, insn);
|
| + if (dist_use <= 0
|
| + || (dist_define + IX86_LEA_PRIORITY) < dist_use)
|
| + return false;
|
| +
|
| + return true;
|
| + }
|
| +}
|
| +
|
| +/* Return true if destination reg of SET_BODY is shift count of
|
| + USE_BODY. */
|
| +
|
| +static bool
|
| +ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
|
| +{
|
| + rtx set_dest;
|
| + rtx shift_rtx;
|
| + int i;
|
| +
|
| + /* Retrieve destination of SET_BODY. */
|
| + switch (GET_CODE (set_body))
|
| + {
|
| + case SET:
|
| + set_dest = SET_DEST (set_body);
|
| + if (!set_dest || !REG_P (set_dest))
|
| + return false;
|
| + break;
|
| + case PARALLEL:
|
| + for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
|
| + if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
|
| + use_body))
|
| + return true;
|
| + default:
|
| + return false;
|
| + break;
|
| + }
|
| +
|
| + /* Retrieve shift count of USE_BODY. */
|
| + switch (GET_CODE (use_body))
|
| + {
|
| + case SET:
|
| + shift_rtx = XEXP (use_body, 1);
|
| + break;
|
| + case PARALLEL:
|
| + for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
|
| + if (ix86_dep_by_shift_count_body (set_body,
|
| + XVECEXP (use_body, 0, i)))
|
| + return true;
|
| + default:
|
| + return false;
|
| + break;
|
| + }
|
| +
|
| + if (shift_rtx
|
| + && (GET_CODE (shift_rtx) == ASHIFT
|
| + || GET_CODE (shift_rtx) == LSHIFTRT
|
| + || GET_CODE (shift_rtx) == ASHIFTRT
|
| + || GET_CODE (shift_rtx) == ROTATE
|
| + || GET_CODE (shift_rtx) == ROTATERT))
|
| + {
|
| + rtx shift_count = XEXP (shift_rtx, 1);
|
| +
|
| + /* Return true if shift count is dest of SET_BODY. */
|
| + if (REG_P (shift_count)
|
| + && true_regnum (set_dest) == true_regnum (shift_count))
|
| + return true;
|
| + }
|
| +
|
| + return false;
|
| +}
|
| +
|
| +/* Return true if destination reg of SET_INSN is shift count of
|
| + USE_INSN. */
|
| +
|
| +bool
|
| +ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
|
| +{
|
| + return ix86_dep_by_shift_count_body (PATTERN (set_insn),
|
| + PATTERN (use_insn));
|
| +}
|
| +
|
| /* Return TRUE or FALSE depending on whether the unary operator meets the
|
| appropriate constraints. */
|
|
|
| @@ -13010,6 +13957,19 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
|
| return TRUE;
|
| }
|
|
|
| +/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
|
| + are ok, keeping in mind the possible movddup alternative. */
|
| +
|
| +bool
|
| +ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
|
| +{
|
| + if (MEM_P (operands[0]))
|
| + return rtx_equal_p (operands[0], operands[1 + high]);
|
| + if (MEM_P (operands[1]) && MEM_P (operands[2]))
|
| + return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
|
| + return true;
|
| +}
|
| +
|
| /* Post-reload splitter for converting an SF or DFmode value in an
|
| SSE register into an unsigned SImode. */
|
|
|
| @@ -13105,7 +14065,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
|
| exponents = validize_mem (force_const_mem (V4SImode, x));
|
|
|
| /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
|
| - emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
|
| + emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
|
|
|
| /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
|
| yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
|
| @@ -13131,7 +14091,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
|
| else
|
| {
|
| x = copy_to_mode_reg (V2DFmode, fp_xmm);
|
| - emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
|
| + emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
|
| emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
|
| }
|
|
|
| @@ -13221,7 +14181,7 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
|
| emit_move_insn (target, fp_hi);
|
| }
|
|
|
| -/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
|
| +/* A subroutine of ix86_build_signbit_mask. If VECT is true,
|
| then replicate the value for all elements of the vector
|
| register. */
|
|
|
| @@ -13431,15 +14391,9 @@ ix86_expand_copysign (rtx operands[])
|
| op0 = CONST0_RTX (vmode);
|
| else
|
| {
|
| - rtvec v;
|
| + rtx v = ix86_build_const_vector (mode, false, op0);
|
|
|
| - if (mode == SFmode)
|
| - v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
|
| - CONST0_RTX (SFmode), CONST0_RTX (SFmode));
|
| - else
|
| - v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
|
| -
|
| - op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
|
| + op0 = force_reg (vmode, v);
|
| }
|
| }
|
| else if (op0 != CONST0_RTX (mode))
|
| @@ -13481,11 +14435,10 @@ void
|
| ix86_split_copysign_const (rtx operands[])
|
| {
|
| enum machine_mode mode, vmode;
|
| - rtx dest, op0, op1, mask, x;
|
| + rtx dest, op0, mask, x;
|
|
|
| dest = operands[0];
|
| op0 = operands[1];
|
| - op1 = operands[2];
|
| mask = operands[3];
|
|
|
| mode = GET_MODE (dest);
|
| @@ -13789,84 +14742,41 @@ ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
|
| }
|
| }
|
|
|
| -/* Split comparison code CODE into comparisons we can do using branch
|
| - instructions. BYPASS_CODE is comparison code for branch that will
|
| - branch around FIRST_CODE and SECOND_CODE. If some of branches
|
| - is not required, set value to UNKNOWN.
|
| - We never require more than two branches. */
|
|
|
| -void
|
| -ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
|
| - enum rtx_code *first_code,
|
| - enum rtx_code *second_code)
|
| -{
|
| - *first_code = code;
|
| - *bypass_code = UNKNOWN;
|
| - *second_code = UNKNOWN;
|
| -
|
| - /* The fcomi comparison sets flags as follows:
|
| -
|
| - cmp ZF PF CF
|
| - > 0 0 0
|
| - < 0 0 1
|
| - = 1 0 0
|
| - un 1 1 1 */
|
| +/* Return a comparison we can do and that it is equivalent to
|
| + swap_condition (code) apart possibly from orderedness.
|
| + But, never change orderedness if TARGET_IEEE_FP, returning
|
| + UNKNOWN in that case if necessary. */
|
|
|
| +static enum rtx_code
|
| +ix86_fp_swap_condition (enum rtx_code code)
|
| +{
|
| switch (code)
|
| {
|
| - case GT: /* GTU - CF=0 & ZF=0 */
|
| - case GE: /* GEU - CF=0 */
|
| - case ORDERED: /* PF=0 */
|
| - case UNORDERED: /* PF=1 */
|
| - case UNEQ: /* EQ - ZF=1 */
|
| - case UNLT: /* LTU - CF=1 */
|
| - case UNLE: /* LEU - CF=1 | ZF=1 */
|
| - case LTGT: /* EQ - ZF=0 */
|
| - break;
|
| - case LT: /* LTU - CF=1 - fails on unordered */
|
| - *first_code = UNLT;
|
| - *bypass_code = UNORDERED;
|
| - break;
|
| - case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
|
| - *first_code = UNLE;
|
| - *bypass_code = UNORDERED;
|
| - break;
|
| - case EQ: /* EQ - ZF=1 - fails on unordered */
|
| - *first_code = UNEQ;
|
| - *bypass_code = UNORDERED;
|
| - break;
|
| - case NE: /* NE - ZF=0 - fails on unordered */
|
| - *first_code = LTGT;
|
| - *second_code = UNORDERED;
|
| - break;
|
| - case UNGE: /* GEU - CF=0 - fails on unordered */
|
| - *first_code = GE;
|
| - *second_code = UNORDERED;
|
| - break;
|
| - case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
|
| - *first_code = GT;
|
| - *second_code = UNORDERED;
|
| - break;
|
| + case GT: /* GTU - CF=0 & ZF=0 */
|
| + return TARGET_IEEE_FP ? UNKNOWN : UNLT;
|
| + case GE: /* GEU - CF=0 */
|
| + return TARGET_IEEE_FP ? UNKNOWN : UNLE;
|
| + case UNLT: /* LTU - CF=1 */
|
| + return TARGET_IEEE_FP ? UNKNOWN : GT;
|
| + case UNLE: /* LEU - CF=1 | ZF=1 */
|
| + return TARGET_IEEE_FP ? UNKNOWN : GE;
|
| default:
|
| - gcc_unreachable ();
|
| - }
|
| - if (!TARGET_IEEE_FP)
|
| - {
|
| - *second_code = UNKNOWN;
|
| - *bypass_code = UNKNOWN;
|
| + return swap_condition (code);
|
| }
|
| }
|
|
|
| -/* Return cost of comparison done fcom + arithmetics operations on AX.
|
| +/* Return cost of comparison CODE using the best strategy for performance.
|
| All following functions do use number of instructions as a cost metrics.
|
| In future this should be tweaked to compute bytes for optimize_size and
|
| take into account performance of various instructions on various CPUs. */
|
| +
|
| static int
|
| -ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
|
| +ix86_fp_comparison_cost (enum rtx_code code)
|
| {
|
| - if (!TARGET_IEEE_FP)
|
| - return 4;
|
| - /* The cost of code output by ix86_expand_fp_compare. */
|
| + int arith_cost;
|
| +
|
| + /* The cost of code using bit-twiddling on %ah. */
|
| switch (code)
|
| {
|
| case UNLE:
|
| @@ -13877,82 +14787,49 @@ ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
|
| case UNORDERED:
|
| case ORDERED:
|
| case UNEQ:
|
| - return 4;
|
| + arith_cost = 4;
|
| break;
|
| case LT:
|
| case NE:
|
| case EQ:
|
| case UNGE:
|
| - return 5;
|
| + arith_cost = TARGET_IEEE_FP ? 5 : 4;
|
| break;
|
| case LE:
|
| case UNGT:
|
| - return 6;
|
| + arith_cost = TARGET_IEEE_FP ? 6 : 4;
|
| break;
|
| default:
|
| gcc_unreachable ();
|
| }
|
| -}
|
|
|
| -/* Return cost of comparison done using fcomi operation.
|
| - See ix86_fp_comparison_arithmetics_cost for the metrics. */
|
| -static int
|
| -ix86_fp_comparison_fcomi_cost (enum rtx_code code)
|
| -{
|
| - enum rtx_code bypass_code, first_code, second_code;
|
| - /* Return arbitrarily high cost when instruction is not supported - this
|
| - prevents gcc from using it. */
|
| - if (!TARGET_CMOVE)
|
| - return 1024;
|
| - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
| - return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
|
| + switch (ix86_fp_comparison_strategy (code))
|
| + {
|
| + case IX86_FPCMP_COMI:
|
| + return arith_cost > 4 ? 3 : 2;
|
| + case IX86_FPCMP_SAHF:
|
| + return arith_cost > 4 ? 4 : 3;
|
| + default:
|
| + return arith_cost;
|
| + }
|
| }
|
|
|
| -/* Return cost of comparison done using sahf operation.
|
| - See ix86_fp_comparison_arithmetics_cost for the metrics. */
|
| -static int
|
| -ix86_fp_comparison_sahf_cost (enum rtx_code code)
|
| -{
|
| - enum rtx_code bypass_code, first_code, second_code;
|
| - /* Return arbitrarily high cost when instruction is not preferred - this
|
| - avoids gcc from using it. */
|
| - if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
|
| - return 1024;
|
| - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
| - return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
|
| -}
|
| +/* Return strategy to use for floating-point. We assume that fcomi is always
|
| + preferrable where available, since that is also true when looking at size
|
| + (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
|
|
|
| -/* Compute cost of the comparison done using any method.
|
| - See ix86_fp_comparison_arithmetics_cost for the metrics. */
|
| -static int
|
| -ix86_fp_comparison_cost (enum rtx_code code)
|
| +enum ix86_fpcmp_strategy
|
| +ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
|
| {
|
| - int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
|
| - int min;
|
| -
|
| - fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
|
| - sahf_cost = ix86_fp_comparison_sahf_cost (code);
|
| -
|
| - min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
|
| - if (min > sahf_cost)
|
| - min = sahf_cost;
|
| - if (min > fcomi_cost)
|
| - min = fcomi_cost;
|
| - return min;
|
| -}
|
| + /* Do fcomi/sahf based test when profitable. */
|
|
|
| -/* Return true if we should use an FCOMI instruction for this
|
| - fp comparison. */
|
| + if (TARGET_CMOVE)
|
| + return IX86_FPCMP_COMI;
|
|
|
| -int
|
| -ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
|
| -{
|
| - enum rtx_code swapped_code = swap_condition (code);
|
| + if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
|
| + return IX86_FPCMP_SAHF;
|
|
|
| - return ((ix86_fp_comparison_cost (code)
|
| - == ix86_fp_comparison_fcomi_cost (code))
|
| - || (ix86_fp_comparison_cost (swapped_code)
|
| - == ix86_fp_comparison_fcomi_cost (swapped_code)));
|
| + return IX86_FPCMP_ARITH;
|
| }
|
|
|
| /* Swap, force into registers, or otherwise massage the two operands
|
| @@ -13979,7 +14856,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
|
| && ! (standard_80387_constant_p (op0) == 1
|
| || standard_80387_constant_p (op1) == 1)
|
| && GET_CODE (op1) != FLOAT)
|
| - || ix86_use_fcomi_compare (code)))
|
| + || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
|
| {
|
| op0 = force_reg (op_mode, op0);
|
| op1 = force_reg (op_mode, op1);
|
| @@ -13995,9 +14872,13 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
|
| && ! (standard_80387_constant_p (op1) == 0
|
| || MEM_P (op1))))
|
| {
|
| - rtx tmp;
|
| - tmp = op0, op0 = op1, op1 = tmp;
|
| - code = swap_condition (code);
|
| + enum rtx_code new_code = ix86_fp_swap_condition (code);
|
| + if (new_code != UNKNOWN)
|
| + {
|
| + rtx tmp;
|
| + tmp = op0, op0 = op1, op1 = tmp;
|
| + code = new_code;
|
| + }
|
| }
|
|
|
| if (!REG_P (op0))
|
| @@ -14072,59 +14953,38 @@ ix86_fp_compare_code_to_integer (enum rtx_code code)
|
| /* Generate insn patterns to do a floating point compare of OPERANDS. */
|
|
|
| static rtx
|
| -ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| - rtx *second_test, rtx *bypass_test)
|
| +ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
|
| {
|
| enum machine_mode fpcmp_mode, intcmp_mode;
|
| rtx tmp, tmp2;
|
| - int cost = ix86_fp_comparison_cost (code);
|
| - enum rtx_code bypass_code, first_code, second_code;
|
|
|
| fpcmp_mode = ix86_fp_compare_mode (code);
|
| code = ix86_prepare_fp_compare_args (code, &op0, &op1);
|
|
|
| - if (second_test)
|
| - *second_test = NULL_RTX;
|
| - if (bypass_test)
|
| - *bypass_test = NULL_RTX;
|
| -
|
| - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
| -
|
| /* Do fcomi/sahf based test when profitable. */
|
| - if (ix86_fp_comparison_arithmetics_cost (code) > cost
|
| - && (bypass_code == UNKNOWN || bypass_test)
|
| - && (second_code == UNKNOWN || second_test))
|
| + switch (ix86_fp_comparison_strategy (code))
|
| {
|
| + case IX86_FPCMP_COMI:
|
| + intcmp_mode = fpcmp_mode;
|
| tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
|
| tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
|
| tmp);
|
| - if (TARGET_CMOVE)
|
| - emit_insn (tmp);
|
| - else
|
| - {
|
| - gcc_assert (TARGET_SAHF);
|
| + emit_insn (tmp);
|
| + break;
|
|
|
| - if (!scratch)
|
| - scratch = gen_reg_rtx (HImode);
|
| - tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
|
| + case IX86_FPCMP_SAHF:
|
| + intcmp_mode = fpcmp_mode;
|
| + tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
|
| + tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
|
| + tmp);
|
|
|
| - emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
|
| - }
|
| + if (!scratch)
|
| + scratch = gen_reg_rtx (HImode);
|
| + tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
|
| + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
|
| + break;
|
|
|
| - /* The FP codes work out to act like unsigned. */
|
| - intcmp_mode = fpcmp_mode;
|
| - code = first_code;
|
| - if (bypass_code != UNKNOWN)
|
| - *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
|
| - gen_rtx_REG (intcmp_mode, FLAGS_REG),
|
| - const0_rtx);
|
| - if (second_code != UNKNOWN)
|
| - *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
|
| - gen_rtx_REG (intcmp_mode, FLAGS_REG),
|
| - const0_rtx);
|
| - }
|
| - else
|
| - {
|
| + case IX86_FPCMP_ARITH:
|
| /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
|
| tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
|
| tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
|
| @@ -14161,13 +15021,13 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| if (code == LT && TARGET_IEEE_FP)
|
| {
|
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
|
| - emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
|
| + emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
|
| intcmp_mode = CCmode;
|
| code = EQ;
|
| }
|
| else
|
| {
|
| - emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
|
| + emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
|
| code = NE;
|
| }
|
| break;
|
| @@ -14181,8 +15041,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| else
|
| {
|
| emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
|
| - emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
|
| - GEN_INT (0x01)));
|
| + emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
|
| code = NE;
|
| }
|
| break;
|
| @@ -14215,7 +15074,6 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| {
|
| emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
|
| code = NE;
|
| - break;
|
| }
|
| break;
|
| case NE:
|
| @@ -14246,6 +15104,10 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| default:
|
| gcc_unreachable ();
|
| }
|
| + break;
|
| +
|
| + default:
|
| + gcc_unreachable();
|
| }
|
|
|
| /* Return the test that should be put into the flags user, i.e.
|
| @@ -14256,27 +15118,19 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
|
| }
|
|
|
| rtx
|
| -ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
|
| +ix86_expand_compare (enum rtx_code code)
|
| {
|
| rtx op0, op1, ret;
|
| op0 = ix86_compare_op0;
|
| op1 = ix86_compare_op1;
|
|
|
| - if (second_test)
|
| - *second_test = NULL_RTX;
|
| - if (bypass_test)
|
| - *bypass_test = NULL_RTX;
|
| + if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
|
| + ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
|
|
|
| - if (ix86_compare_emitted)
|
| - {
|
| - ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
|
| - ix86_compare_emitted = NULL_RTX;
|
| - }
|
| else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
|
| {
|
| gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
|
| - ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
|
| - second_test, bypass_test);
|
| + ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
|
| }
|
| else
|
| ret = ix86_expand_int_compare (code, op0, op1);
|
| @@ -14284,88 +15138,27 @@ ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
|
| return ret;
|
| }
|
|
|
| -/* Return true if the CODE will result in nontrivial jump sequence. */
|
| -bool
|
| -ix86_fp_jump_nontrivial_p (enum rtx_code code)
|
| -{
|
| - enum rtx_code bypass_code, first_code, second_code;
|
| - if (!TARGET_CMOVE)
|
| - return true;
|
| - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
| - return bypass_code != UNKNOWN || second_code != UNKNOWN;
|
| -}
|
| -
|
| void
|
| ix86_expand_branch (enum rtx_code code, rtx label)
|
| {
|
| rtx tmp;
|
|
|
| - /* If we have emitted a compare insn, go straight to simple.
|
| - ix86_expand_compare won't emit anything if ix86_compare_emitted
|
| - is non NULL. */
|
| - if (ix86_compare_emitted)
|
| - goto simple;
|
| -
|
| switch (GET_MODE (ix86_compare_op0))
|
| {
|
| + case SFmode:
|
| + case DFmode:
|
| + case XFmode:
|
| case QImode:
|
| case HImode:
|
| case SImode:
|
| simple:
|
| - tmp = ix86_expand_compare (code, NULL, NULL);
|
| + tmp = ix86_expand_compare (code);
|
| tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
| gen_rtx_LABEL_REF (VOIDmode, label),
|
| pc_rtx);
|
| emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
|
| return;
|
|
|
| - case SFmode:
|
| - case DFmode:
|
| - case XFmode:
|
| - {
|
| - rtvec vec;
|
| - int use_fcomi;
|
| - enum rtx_code bypass_code, first_code, second_code;
|
| -
|
| - code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
|
| - &ix86_compare_op1);
|
| -
|
| - ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
|
| -
|
| - /* Check whether we will use the natural sequence with one jump. If
|
| - so, we can expand jump early. Otherwise delay expansion by
|
| - creating compound insn to not confuse optimizers. */
|
| - if (bypass_code == UNKNOWN && second_code == UNKNOWN)
|
| - {
|
| - ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
|
| - gen_rtx_LABEL_REF (VOIDmode, label),
|
| - pc_rtx, NULL_RTX, NULL_RTX);
|
| - }
|
| - else
|
| - {
|
| - tmp = gen_rtx_fmt_ee (code, VOIDmode,
|
| - ix86_compare_op0, ix86_compare_op1);
|
| - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
|
| - gen_rtx_LABEL_REF (VOIDmode, label),
|
| - pc_rtx);
|
| - tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
|
| -
|
| - use_fcomi = ix86_use_fcomi_compare (code);
|
| - vec = rtvec_alloc (3 + !use_fcomi);
|
| - RTVEC_ELT (vec, 0) = tmp;
|
| - RTVEC_ELT (vec, 1)
|
| - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
|
| - RTVEC_ELT (vec, 2)
|
| - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
|
| - if (! use_fcomi)
|
| - RTVEC_ELT (vec, 3)
|
| - = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
|
| -
|
| - emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
|
| - }
|
| - return;
|
| - }
|
| -
|
| case DImode:
|
| if (TARGET_64BIT)
|
| goto simple;
|
| @@ -14506,7 +15299,11 @@ ix86_expand_branch (enum rtx_code code, rtx label)
|
| }
|
|
|
| default:
|
| - gcc_unreachable ();
|
| + /* If we have already emitted a compare insn, go straight to simple.
|
| + ix86_expand_compare won't emit anything if ix86_compare_emitted
|
| + is non NULL. */
|
| + gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
|
| + goto simple;
|
| }
|
| }
|
|
|
| @@ -14515,10 +15312,7 @@ void
|
| ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
|
| rtx target1, rtx target2, rtx tmp, rtx pushed)
|
| {
|
| - rtx second, bypass;
|
| - rtx label = NULL_RTX;
|
| rtx condition;
|
| - int bypass_probability = -1, second_probability = -1, probability = -1;
|
| rtx i;
|
|
|
| if (target2 != pc_rtx)
|
| @@ -14530,117 +15324,30 @@ ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
|
| }
|
|
|
| condition = ix86_expand_fp_compare (code, op1, op2,
|
| - tmp, &second, &bypass);
|
| + tmp);
|
|
|
| /* Remove pushed operand from stack. */
|
| if (pushed)
|
| ix86_free_from_memory (GET_MODE (pushed));
|
|
|
| - if (split_branch_probability >= 0)
|
| - {
|
| - /* Distribute the probabilities across the jumps.
|
| - Assume the BYPASS and SECOND to be always test
|
| - for UNORDERED. */
|
| - probability = split_branch_probability;
|
| -
|
| - /* Value of 1 is low enough to make no need for probability
|
| - to be updated. Later we may run some experiments and see
|
| - if unordered values are more frequent in practice. */
|
| - if (bypass)
|
| - bypass_probability = 1;
|
| - if (second)
|
| - second_probability = 1;
|
| - }
|
| - if (bypass != NULL_RTX)
|
| - {
|
| - label = gen_label_rtx ();
|
| - i = emit_jump_insn (gen_rtx_SET
|
| - (VOIDmode, pc_rtx,
|
| - gen_rtx_IF_THEN_ELSE (VOIDmode,
|
| - bypass,
|
| - gen_rtx_LABEL_REF (VOIDmode,
|
| - label),
|
| - pc_rtx)));
|
| - if (bypass_probability >= 0)
|
| - REG_NOTES (i)
|
| - = gen_rtx_EXPR_LIST (REG_BR_PROB,
|
| - GEN_INT (bypass_probability),
|
| - REG_NOTES (i));
|
| - }
|
| i = emit_jump_insn (gen_rtx_SET
|
| (VOIDmode, pc_rtx,
|
| gen_rtx_IF_THEN_ELSE (VOIDmode,
|
| condition, target1, target2)));
|
| - if (probability >= 0)
|
| - REG_NOTES (i)
|
| - = gen_rtx_EXPR_LIST (REG_BR_PROB,
|
| - GEN_INT (probability),
|
| - REG_NOTES (i));
|
| - if (second != NULL_RTX)
|
| - {
|
| - i = emit_jump_insn (gen_rtx_SET
|
| - (VOIDmode, pc_rtx,
|
| - gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
|
| - target2)));
|
| - if (second_probability >= 0)
|
| - REG_NOTES (i)
|
| - = gen_rtx_EXPR_LIST (REG_BR_PROB,
|
| - GEN_INT (second_probability),
|
| - REG_NOTES (i));
|
| - }
|
| - if (label != NULL_RTX)
|
| - emit_label (label);
|
| + if (split_branch_probability >= 0)
|
| + add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
|
| }
|
|
|
| -int
|
| +void
|
| ix86_expand_setcc (enum rtx_code code, rtx dest)
|
| {
|
| - rtx ret, tmp, tmpreg, equiv;
|
| - rtx second_test, bypass_test;
|
| -
|
| - if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
|
| - return 0; /* FAIL */
|
| + rtx ret;
|
|
|
| gcc_assert (GET_MODE (dest) == QImode);
|
|
|
| - ret = ix86_expand_compare (code, &second_test, &bypass_test);
|
| + ret = ix86_expand_compare (code);
|
| PUT_MODE (ret, QImode);
|
| -
|
| - tmp = dest;
|
| - tmpreg = dest;
|
| -
|
| - emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
|
| - if (bypass_test || second_test)
|
| - {
|
| - rtx test = second_test;
|
| - int bypass = 0;
|
| - rtx tmp2 = gen_reg_rtx (QImode);
|
| - if (bypass_test)
|
| - {
|
| - gcc_assert (!second_test);
|
| - test = bypass_test;
|
| - bypass = 1;
|
| - PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
|
| - }
|
| - PUT_MODE (test, QImode);
|
| - emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
|
| -
|
| - if (bypass)
|
| - emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
|
| - else
|
| - emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
|
| - }
|
| -
|
| - /* Attach a REG_EQUAL note describing the comparison result. */
|
| - if (ix86_compare_op0 && ix86_compare_op1)
|
| - {
|
| - equiv = simplify_gen_relational (code, QImode,
|
| - GET_MODE (ix86_compare_op0),
|
| - ix86_compare_op0, ix86_compare_op1);
|
| - set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
|
| - }
|
| -
|
| - return 1; /* DONE */
|
| + emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
|
| }
|
|
|
| /* Expand comparison setting or clearing carry flag. Return true when
|
| @@ -14657,7 +15364,6 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
|
|
|
| if (SCALAR_FLOAT_MODE_P (mode))
|
| {
|
| - rtx second_test = NULL, bypass_test = NULL;
|
| rtx compare_op, compare_seq;
|
|
|
| gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
|
| @@ -14683,14 +15389,10 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
|
| we decide to expand comparison using arithmetic that is not
|
| too common scenario. */
|
| start_sequence ();
|
| - compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
|
| - &second_test, &bypass_test);
|
| + compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
|
| compare_seq = get_insns ();
|
| end_sequence ();
|
|
|
| - if (second_test || bypass_test)
|
| - return false;
|
| -
|
| if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
|
| || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
|
| code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
|
| @@ -14773,7 +15475,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
|
| }
|
| ix86_compare_op0 = op0;
|
| ix86_compare_op1 = op1;
|
| - *pop = ix86_expand_compare (code, NULL, NULL);
|
| + *pop = ix86_expand_compare (code);
|
| gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
|
| return true;
|
| }
|
| @@ -14783,12 +15485,13 @@ ix86_expand_int_movcc (rtx operands[])
|
| {
|
| enum rtx_code code = GET_CODE (operands[1]), compare_code;
|
| rtx compare_seq, compare_op;
|
| - rtx second_test, bypass_test;
|
| enum machine_mode mode = GET_MODE (operands[0]);
|
| - bool sign_bit_compare_p = false;;
|
| + bool sign_bit_compare_p = false;
|
|
|
| start_sequence ();
|
| - compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
|
| + ix86_compare_op0 = XEXP (operands[1], 0);
|
| + ix86_compare_op1 = XEXP (operands[1], 1);
|
| + compare_op = ix86_expand_compare (code);
|
| compare_seq = get_insns ();
|
| end_sequence ();
|
|
|
| @@ -14823,15 +15526,19 @@ ix86_expand_int_movcc (rtx operands[])
|
|
|
| if (!sign_bit_compare_p)
|
| {
|
| + rtx flags;
|
| bool fpcmp = false;
|
|
|
| compare_code = GET_CODE (compare_op);
|
|
|
| - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
|
| - || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
|
| + flags = XEXP (compare_op, 0);
|
| +
|
| + if (GET_MODE (flags) == CCFPmode
|
| + || GET_MODE (flags) == CCFPUmode)
|
| {
|
| fpcmp = true;
|
| - compare_code = ix86_fp_compare_code_to_integer (compare_code);
|
| + compare_code
|
| + = ix86_fp_compare_code_to_integer (compare_code);
|
| }
|
|
|
| /* To simplify rest of code, restrict to the GEU case. */
|
| @@ -14850,7 +15557,8 @@ ix86_expand_int_movcc (rtx operands[])
|
| reverse_condition_maybe_unordered
|
| (GET_CODE (compare_op)));
|
| else
|
| - PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
|
| + PUT_CODE (compare_op,
|
| + reverse_condition (GET_CODE (compare_op)));
|
| }
|
| diff = ct - cf;
|
|
|
| @@ -14859,9 +15567,10 @@ ix86_expand_int_movcc (rtx operands[])
|
| tmp = gen_reg_rtx (mode);
|
|
|
| if (mode == DImode)
|
| - emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
|
| + emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
|
| else
|
| - emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
|
| + emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
|
| + flags, compare_op));
|
| }
|
| else
|
| {
|
| @@ -15260,19 +15969,6 @@ ix86_expand_int_movcc (rtx operands[])
|
| if (! nonimmediate_operand (operands[3], mode))
|
| operands[3] = force_reg (mode, operands[3]);
|
|
|
| - if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
|
| - {
|
| - rtx tmp = gen_reg_rtx (mode);
|
| - emit_move_insn (tmp, operands[3]);
|
| - operands[3] = tmp;
|
| - }
|
| - if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
|
| - {
|
| - rtx tmp = gen_reg_rtx (mode);
|
| - emit_move_insn (tmp, operands[2]);
|
| - operands[2] = tmp;
|
| - }
|
| -
|
| if (! register_operand (operands[2], VOIDmode)
|
| && (mode == QImode
|
| || ! register_operand (operands[3], VOIDmode)))
|
| @@ -15287,18 +15983,6 @@ ix86_expand_int_movcc (rtx operands[])
|
| gen_rtx_IF_THEN_ELSE (mode,
|
| compare_op, operands[2],
|
| operands[3])));
|
| - if (bypass_test)
|
| - emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
|
| - gen_rtx_IF_THEN_ELSE (mode,
|
| - bypass_test,
|
| - copy_rtx (operands[3]),
|
| - copy_rtx (operands[0]))));
|
| - if (second_test)
|
| - emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
|
| - gen_rtx_IF_THEN_ELSE (mode,
|
| - second_test,
|
| - copy_rtx (operands[2]),
|
| - copy_rtx (operands[0]))));
|
|
|
| return 1; /* DONE */
|
| }
|
| @@ -15465,7 +16149,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
|
| x = gen_rtx_AND (mode, x, op_false);
|
| emit_insn (gen_rtx_SET (VOIDmode, dest, x));
|
| }
|
| - else if (TARGET_SSE5)
|
| + else if (TARGET_XOP)
|
| {
|
| rtx pcmov = gen_rtx_SET (mode, dest,
|
| gen_rtx_IF_THEN_ELSE (mode, cmp,
|
| @@ -15503,8 +16187,10 @@ ix86_expand_fp_movcc (rtx operands[])
|
| {
|
| enum machine_mode mode = GET_MODE (operands[0]);
|
| enum rtx_code code = GET_CODE (operands[1]);
|
| - rtx tmp, compare_op, second_test, bypass_test;
|
| + rtx tmp, compare_op;
|
|
|
| + ix86_compare_op0 = XEXP (operands[1], 0);
|
| + ix86_compare_op1 = XEXP (operands[1], 1);
|
| if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
|
| {
|
| enum machine_mode cmode;
|
| @@ -15538,45 +16224,20 @@ ix86_expand_fp_movcc (rtx operands[])
|
| /* The floating point conditional move instructions don't directly
|
| support conditions resulting from a signed integer comparison. */
|
|
|
| - compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
|
| -
|
| - /* The floating point conditional move instructions don't directly
|
| - support signed integer comparisons. */
|
| -
|
| + compare_op = ix86_expand_compare (code);
|
| if (!fcmov_comparison_operator (compare_op, VOIDmode))
|
| {
|
| - gcc_assert (!second_test && !bypass_test);
|
| tmp = gen_reg_rtx (QImode);
|
| ix86_expand_setcc (code, tmp);
|
| code = NE;
|
| ix86_compare_op0 = tmp;
|
| ix86_compare_op1 = const0_rtx;
|
| - compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
|
| - }
|
| - if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
|
| - {
|
| - tmp = gen_reg_rtx (mode);
|
| - emit_move_insn (tmp, operands[3]);
|
| - operands[3] = tmp;
|
| - }
|
| - if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
|
| - {
|
| - tmp = gen_reg_rtx (mode);
|
| - emit_move_insn (tmp, operands[2]);
|
| - operands[2] = tmp;
|
| + compare_op = ix86_expand_compare (code);
|
| }
|
|
|
| emit_insn (gen_rtx_SET (VOIDmode, operands[0],
|
| gen_rtx_IF_THEN_ELSE (mode, compare_op,
|
| operands[2], operands[3])));
|
| - if (bypass_test)
|
| - emit_insn (gen_rtx_SET (VOIDmode, operands[0],
|
| - gen_rtx_IF_THEN_ELSE (mode, bypass_test,
|
| - operands[3], operands[0])));
|
| - if (second_test)
|
| - emit_insn (gen_rtx_SET (VOIDmode, operands[0],
|
| - gen_rtx_IF_THEN_ELSE (mode, second_test,
|
| - operands[2], operands[0])));
|
|
|
| return 1;
|
| }
|
| @@ -15618,8 +16279,8 @@ ix86_expand_int_vcond (rtx operands[])
|
| cop0 = operands[4];
|
| cop1 = operands[5];
|
|
|
| - /* SSE5 supports all of the comparisons on all vector int types. */
|
| - if (!TARGET_SSE5)
|
| + /* XOP supports all of the comparisons on all vector int types. */
|
| + if (!TARGET_XOP)
|
| {
|
| /* Canonicalize the comparison to EQ, GT, GTU. */
|
| switch (code)
|
| @@ -15820,9 +16481,9 @@ ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
|
| {
|
| /* Shift higher 8 bytes to lower 8 bytes. */
|
| src = gen_reg_rtx (imode);
|
| - emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
|
| - gen_lowpart (TImode, operands[1]),
|
| - GEN_INT (64)));
|
| + emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
|
| + gen_lowpart (V1TImode, operands[1]),
|
| + GEN_INT (64)));
|
| }
|
| else
|
| src = operands[1];
|
| @@ -15830,190 +16491,6 @@ ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
|
| emit_insn (unpack (dest, src));
|
| }
|
|
|
| -/* This function performs the same task as ix86_expand_sse_unpack,
|
| - but with sse5 instructions. */
|
| -
|
| -void
|
| -ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
|
| -{
|
| - enum machine_mode imode = GET_MODE (operands[1]);
|
| - int pperm_bytes[16];
|
| - int i;
|
| - int h = (high_p) ? 8 : 0;
|
| - int h2;
|
| - int sign_extend;
|
| - rtvec v = rtvec_alloc (16);
|
| - rtvec vs;
|
| - rtx x, p;
|
| - rtx op0 = operands[0], op1 = operands[1];
|
| -
|
| - switch (imode)
|
| - {
|
| - case V16QImode:
|
| - vs = rtvec_alloc (8);
|
| - h2 = (high_p) ? 8 : 0;
|
| - for (i = 0; i < 8; i++)
|
| - {
|
| - pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
|
| - pperm_bytes[2*i+1] = ((unsigned_p)
|
| - ? PPERM_ZERO
|
| - : PPERM_SIGN | PPERM_SRC2 | i | h);
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - for (i = 0; i < 8; i++)
|
| - RTVEC_ELT (vs, i) = GEN_INT (i + h2);
|
| -
|
| - p = gen_rtx_PARALLEL (VOIDmode, vs);
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - if (unsigned_p)
|
| - emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
|
| - else
|
| - emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
|
| - break;
|
| -
|
| - case V8HImode:
|
| - vs = rtvec_alloc (4);
|
| - h2 = (high_p) ? 4 : 0;
|
| - for (i = 0; i < 4; i++)
|
| - {
|
| - sign_extend = ((unsigned_p)
|
| - ? PPERM_ZERO
|
| - : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
|
| - pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
|
| - pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
|
| - pperm_bytes[4*i+2] = sign_extend;
|
| - pperm_bytes[4*i+3] = sign_extend;
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - for (i = 0; i < 4; i++)
|
| - RTVEC_ELT (vs, i) = GEN_INT (i + h2);
|
| -
|
| - p = gen_rtx_PARALLEL (VOIDmode, vs);
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - if (unsigned_p)
|
| - emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
|
| - else
|
| - emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
|
| - break;
|
| -
|
| - case V4SImode:
|
| - vs = rtvec_alloc (2);
|
| - h2 = (high_p) ? 2 : 0;
|
| - for (i = 0; i < 2; i++)
|
| - {
|
| - sign_extend = ((unsigned_p)
|
| - ? PPERM_ZERO
|
| - : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
|
| - pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
|
| - pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
|
| - pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
|
| - pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
|
| - pperm_bytes[8*i+4] = sign_extend;
|
| - pperm_bytes[8*i+5] = sign_extend;
|
| - pperm_bytes[8*i+6] = sign_extend;
|
| - pperm_bytes[8*i+7] = sign_extend;
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - for (i = 0; i < 2; i++)
|
| - RTVEC_ELT (vs, i) = GEN_INT (i + h2);
|
| -
|
| - p = gen_rtx_PARALLEL (VOIDmode, vs);
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - if (unsigned_p)
|
| - emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
|
| - else
|
| - emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
|
| - break;
|
| -
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -
|
| - return;
|
| -}
|
| -
|
| -/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
|
| - next narrower integer vector type */
|
| -void
|
| -ix86_expand_sse5_pack (rtx operands[3])
|
| -{
|
| - enum machine_mode imode = GET_MODE (operands[0]);
|
| - int pperm_bytes[16];
|
| - int i;
|
| - rtvec v = rtvec_alloc (16);
|
| - rtx x;
|
| - rtx op0 = operands[0];
|
| - rtx op1 = operands[1];
|
| - rtx op2 = operands[2];
|
| -
|
| - switch (imode)
|
| - {
|
| - case V16QImode:
|
| - for (i = 0; i < 8; i++)
|
| - {
|
| - pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
|
| - pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
|
| - break;
|
| -
|
| - case V8HImode:
|
| - for (i = 0; i < 4; i++)
|
| - {
|
| - pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
|
| - pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
|
| - pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
|
| - pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
|
| - break;
|
| -
|
| - case V4SImode:
|
| - for (i = 0; i < 2; i++)
|
| - {
|
| - pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
|
| - pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
|
| - pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
|
| - pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
|
| - pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
|
| - pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
|
| - pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
|
| - pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
|
| - }
|
| -
|
| - for (i = 0; i < 16; i++)
|
| - RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
|
| -
|
| - x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
|
| - emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
|
| - break;
|
| -
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -
|
| - return;
|
| -}
|
| -
|
| /* Expand conditional increment or decrement using adb/sbb instructions.
|
| The default case using setcc followed by the conditional move can be
|
| done by generic code. */
|
| @@ -16021,11 +16498,15 @@ int
|
| ix86_expand_int_addcc (rtx operands[])
|
| {
|
| enum rtx_code code = GET_CODE (operands[1]);
|
| + rtx flags;
|
| + rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
|
| rtx compare_op;
|
| rtx val = const0_rtx;
|
| bool fpcmp = false;
|
| - enum machine_mode mode = GET_MODE (operands[0]);
|
| + enum machine_mode mode;
|
|
|
| + ix86_compare_op0 = XEXP (operands[1], 0);
|
| + ix86_compare_op1 = XEXP (operands[1], 1);
|
| if (operands[3] != const1_rtx
|
| && operands[3] != constm1_rtx)
|
| return 0;
|
| @@ -16034,8 +16515,10 @@ ix86_expand_int_addcc (rtx operands[])
|
| return 0;
|
| code = GET_CODE (compare_op);
|
|
|
| - if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
|
| - || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
|
| + flags = XEXP (compare_op, 0);
|
| +
|
| + if (GET_MODE (flags) == CCFPmode
|
| + || GET_MODE (flags) == CCFPUmode)
|
| {
|
| fpcmp = true;
|
| code = ix86_fp_compare_code_to_integer (code);
|
| @@ -16051,24 +16534,25 @@ ix86_expand_int_addcc (rtx operands[])
|
| else
|
| PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
|
| }
|
| - PUT_MODE (compare_op, mode);
|
| +
|
| + mode = GET_MODE (operands[0]);
|
|
|
| /* Construct either adc or sbb insn. */
|
| if ((code == LTU) == (operands[3] == constm1_rtx))
|
| {
|
| - switch (GET_MODE (operands[0]))
|
| + switch (mode)
|
| {
|
| case QImode:
|
| - emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_subqi3_carry;
|
| break;
|
| case HImode:
|
| - emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_subhi3_carry;
|
| break;
|
| case SImode:
|
| - emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_subsi3_carry;
|
| break;
|
| case DImode:
|
| - emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
|
| + insn = gen_subdi3_carry;
|
| break;
|
| default:
|
| gcc_unreachable ();
|
| @@ -16076,24 +16560,26 @@ ix86_expand_int_addcc (rtx operands[])
|
| }
|
| else
|
| {
|
| - switch (GET_MODE (operands[0]))
|
| + switch (mode)
|
| {
|
| case QImode:
|
| - emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_addqi3_carry;
|
| break;
|
| case HImode:
|
| - emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_addhi3_carry;
|
| break;
|
| case SImode:
|
| - emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
|
| + insn = gen_addsi3_carry;
|
| break;
|
| case DImode:
|
| - emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
|
| + insn = gen_adddi3_carry;
|
| break;
|
| default:
|
| gcc_unreachable ();
|
| }
|
| }
|
| + emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
|
| +
|
| return 1; /* DONE */
|
| }
|
|
|
| @@ -16581,14 +17067,15 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
|
|
|
| emit_insn ((mode == DImode
|
| ? gen_lshrsi3
|
| - : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
|
| + : gen_lshrdi3) (high[0], high[0],
|
| + GEN_INT (mode == DImode ? 5 : 6)));
|
| emit_insn ((mode == DImode
|
| ? gen_andsi3
|
| - : gen_anddi3) (high[0], high[0], GEN_INT (1)));
|
| + : gen_anddi3) (high[0], high[0], const1_rtx));
|
| emit_move_insn (low[0], high[0]);
|
| emit_insn ((mode == DImode
|
| ? gen_xorsi3
|
| - : gen_xordi3) (low[0], low[0], GEN_INT (1)));
|
| + : gen_xordi3) (low[0], low[0], const1_rtx));
|
| }
|
|
|
| emit_insn ((mode == DImode
|
| @@ -16789,10 +17276,7 @@ predict_jump (int prob)
|
| {
|
| rtx insn = get_last_insn ();
|
| gcc_assert (JUMP_P (insn));
|
| - REG_NOTES (insn)
|
| - = gen_rtx_EXPR_LIST (REG_BR_PROB,
|
| - GEN_INT (prob),
|
| - REG_NOTES (insn));
|
| + add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
|
| }
|
|
|
| /* Helper function for the string operations below. Dest VARIABLE whether
|
| @@ -16844,7 +17328,6 @@ static rtx
|
| scale_counter (rtx countreg, int scale)
|
| {
|
| rtx sc;
|
| - rtx piece_size_mask;
|
|
|
| if (scale == 1)
|
| return countreg;
|
| @@ -16852,7 +17335,6 @@ scale_counter (rtx countreg, int scale)
|
| return GEN_INT (INTVAL (countreg) / scale);
|
| gcc_assert (REG_P (countreg));
|
|
|
| - piece_size_mask = GEN_INT (scale - 1);
|
| sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
|
| GEN_INT (exact_log2 (scale)),
|
| NULL, 1, OPTAB_DIRECT);
|
| @@ -16867,7 +17349,7 @@ counter_mode (rtx count_exp)
|
| {
|
| if (GET_MODE (count_exp) != VOIDmode)
|
| return GET_MODE (count_exp);
|
| - if (GET_CODE (count_exp) != CONST_INT)
|
| + if (!CONST_INT_P (count_exp))
|
| return Pmode;
|
| if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
|
| return DImode;
|
| @@ -18629,7 +19111,6 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
|
| gen_rtx_IF_THEN_ELSE (Pmode, tmp,
|
| reg2,
|
| out)));
|
| -
|
| }
|
| else
|
| {
|
| @@ -18656,8 +19137,9 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
|
| /* Avoid branch in fixing the byte. */
|
| tmpreg = gen_lowpart (QImode, tmpreg);
|
| emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
|
| - cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
|
| - emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
|
| + tmp = gen_rtx_REG (CCmode, FLAGS_REG);
|
| + cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
|
| + emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
|
|
|
| emit_label (end_0_label);
|
| }
|
| @@ -18779,7 +19261,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
|
| }
|
|
|
| if (ix86_cmodel == CM_LARGE_PIC
|
| - && GET_CODE (fnaddr) == MEM
|
| + && MEM_P (fnaddr)
|
| && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
|
| && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
|
| fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
|
| @@ -18850,7 +19332,7 @@ ix86_init_machine_status (void)
|
| f = GGC_CNEW (struct machine_function);
|
| f->use_fast_prologue_epilogue_nregs = -1;
|
| f->tls_descriptor_call_expanded_p = 0;
|
| - f->call_abi = DEFAULT_ABI;
|
| + f->call_abi = ix86_abi;
|
|
|
| return f;
|
| }
|
| @@ -18955,23 +19437,53 @@ memory_address_length (rtx addr)
|
|
|
| /* Rule of thumb:
|
| - esp as the base always wants an index,
|
| - - ebp as the base always wants a displacement. */
|
| + - ebp as the base always wants a displacement,
|
| + - r12 as the base always wants an index,
|
| + - r13 as the base always wants a displacement. */
|
|
|
| /* Register Indirect. */
|
| if (base && !index && !disp)
|
| {
|
| /* esp (for its index) and ebp (for its displacement) need
|
| - the two-byte modrm form. */
|
| - if (addr == stack_pointer_rtx
|
| - || addr == arg_pointer_rtx
|
| - || addr == frame_pointer_rtx
|
| - || addr == hard_frame_pointer_rtx)
|
| + the two-byte modrm form. Similarly for r12 and r13 in 64-bit
|
| + code. */
|
| + if (REG_P (addr)
|
| + && (addr == arg_pointer_rtx
|
| + || addr == frame_pointer_rtx
|
| + || REGNO (addr) == SP_REG
|
| + || REGNO (addr) == BP_REG
|
| + || REGNO (addr) == R12_REG
|
| + || REGNO (addr) == R13_REG))
|
| len = 1;
|
| }
|
|
|
| - /* Direct Addressing. */
|
| + /* Direct Addressing. In 64-bit mode mod 00 r/m 5
|
| + is not disp32, but disp32(%rip), so for disp32
|
| + SIB byte is needed, unless print_operand_address
|
| + optimizes it into disp32(%rip) or (%rip) is implied
|
| + by UNSPEC. */
|
| else if (disp && !base && !index)
|
| - len = 4;
|
| + {
|
| + len = 4;
|
| + if (TARGET_64BIT)
|
| + {
|
| + rtx symbol = disp;
|
| +
|
| + if (GET_CODE (disp) == CONST)
|
| + symbol = XEXP (disp, 0);
|
| + if (GET_CODE (symbol) == PLUS
|
| + && CONST_INT_P (XEXP (symbol, 1)))
|
| + symbol = XEXP (symbol, 0);
|
| +
|
| + if (GET_CODE (symbol) != LABEL_REF
|
| + && (GET_CODE (symbol) != SYMBOL_REF
|
| + || SYMBOL_REF_TLS_MODEL (symbol) != 0)
|
| + && (GET_CODE (symbol) != UNSPEC
|
| + || (XINT (symbol, 1) != UNSPEC_GOTPCREL
|
| + && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
|
| + len += 1;
|
| + }
|
| + }
|
|
|
| else
|
| {
|
| @@ -18983,19 +19495,31 @@ memory_address_length (rtx addr)
|
| else
|
| len = 4;
|
| }
|
| - /* ebp always wants a displacement. */
|
| - else if (base == hard_frame_pointer_rtx)
|
| - len = 1;
|
| + /* ebp always wants a displacement. Similarly r13. */
|
| + else if (base && REG_P (base)
|
| + && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
|
| + len = 1;
|
|
|
| /* An index requires the two-byte modrm form.... */
|
| if (index
|
| - /* ...like esp, which always wants an index. */
|
| - || base == stack_pointer_rtx
|
| + /* ...like esp (or r12), which always wants an index. */
|
| || base == arg_pointer_rtx
|
| - || base == frame_pointer_rtx)
|
| + || base == frame_pointer_rtx
|
| + || (base && REG_P (base)
|
| + && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
|
| len += 1;
|
| }
|
|
|
| + switch (parts.seg)
|
| + {
|
| + case SEG_FS:
|
| + case SEG_GS:
|
| + len += 1;
|
| + break;
|
| + default:
|
| + break;
|
| + }
|
| +
|
| return len;
|
| }
|
|
|
| @@ -19010,30 +19534,50 @@ ix86_attr_length_immediate_default (rtx insn, int shortform)
|
| for (i = recog_data.n_operands - 1; i >= 0; --i)
|
| if (CONSTANT_P (recog_data.operand[i]))
|
| {
|
| + enum attr_mode mode = get_attr_mode (insn);
|
| +
|
| gcc_assert (!len);
|
| - if (shortform && satisfies_constraint_K (recog_data.operand[i]))
|
| - len = 1;
|
| - else
|
| + if (shortform && CONST_INT_P (recog_data.operand[i]))
|
| {
|
| - switch (get_attr_mode (insn))
|
| + HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
|
| + switch (mode)
|
| {
|
| - case MODE_QI:
|
| - len+=1;
|
| - break;
|
| - case MODE_HI:
|
| - len+=2;
|
| - break;
|
| - case MODE_SI:
|
| - len+=4;
|
| - break;
|
| - /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
|
| - case MODE_DI:
|
| - len+=4;
|
| - break;
|
| - default:
|
| - fatal_insn ("unknown insn mode", insn);
|
| + case MODE_QI:
|
| + len = 1;
|
| + continue;
|
| + case MODE_HI:
|
| + ival = trunc_int_for_mode (ival, HImode);
|
| + break;
|
| + case MODE_SI:
|
| + ival = trunc_int_for_mode (ival, SImode);
|
| + break;
|
| + default:
|
| + break;
|
| + }
|
| + if (IN_RANGE (ival, -128, 127))
|
| + {
|
| + len = 1;
|
| + continue;
|
| }
|
| }
|
| + switch (mode)
|
| + {
|
| + case MODE_QI:
|
| + len = 1;
|
| + break;
|
| + case MODE_HI:
|
| + len = 2;
|
| + break;
|
| + case MODE_SI:
|
| + len = 4;
|
| + break;
|
| + /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
|
| + case MODE_DI:
|
| + len = 4;
|
| + break;
|
| + default:
|
| + fatal_insn ("unknown insn mode", insn);
|
| + }
|
| }
|
| return len;
|
| }
|
| @@ -19045,22 +19589,45 @@ ix86_attr_length_address_default (rtx insn)
|
|
|
| if (get_attr_type (insn) == TYPE_LEA)
|
| {
|
| - rtx set = PATTERN (insn);
|
| + rtx set = PATTERN (insn), addr;
|
|
|
| if (GET_CODE (set) == PARALLEL)
|
| set = XVECEXP (set, 0, 0);
|
|
|
| gcc_assert (GET_CODE (set) == SET);
|
|
|
| - return memory_address_length (SET_SRC (set));
|
| + addr = SET_SRC (set);
|
| + if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
|
| + {
|
| + if (GET_CODE (addr) == ZERO_EXTEND)
|
| + addr = XEXP (addr, 0);
|
| + if (GET_CODE (addr) == SUBREG)
|
| + addr = SUBREG_REG (addr);
|
| + }
|
| +
|
| + return memory_address_length (addr);
|
| }
|
|
|
| extract_insn_cached (insn);
|
| for (i = recog_data.n_operands - 1; i >= 0; --i)
|
| if (MEM_P (recog_data.operand[i]))
|
| {
|
| + constrain_operands_cached (reload_completed);
|
| + if (which_alternative != -1)
|
| + {
|
| + const char *constraints = recog_data.constraints[i];
|
| + int alt = which_alternative;
|
| +
|
| + while (*constraints == '=' || *constraints == '+')
|
| + constraints++;
|
| + while (alt-- > 0)
|
| + while (*constraints++ != ',')
|
| + ;
|
| + /* Skip ignored operands. */
|
| + if (*constraints == 'X')
|
| + continue;
|
| + }
|
| return memory_address_length (XEXP (recog_data.operand[i], 0));
|
| - break;
|
| }
|
| return 0;
|
| }
|
| @@ -19089,7 +19656,8 @@ ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
|
| if (REG_P (recog_data.operand[i]))
|
| {
|
| /* REX.W bit uses 3 byte VEX prefix. */
|
| - if (GET_MODE (recog_data.operand[i]) == DImode)
|
| + if (GET_MODE (recog_data.operand[i]) == DImode
|
| + && GENERAL_REG_P (recog_data.operand[i]))
|
| return 3 + 1;
|
| }
|
| else
|
| @@ -19111,6 +19679,7 @@ ix86_issue_rate (void)
|
| switch (ix86_tune)
|
| {
|
| case PROCESSOR_PENTIUM:
|
| + case PROCESSOR_ATOM:
|
| case PROCESSOR_K6:
|
| return 2;
|
|
|
| @@ -19177,41 +19746,21 @@ ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
|
| return 1;
|
| }
|
|
|
| -/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
|
| - address with operands set by DEP_INSN. */
|
| +/* Return true iff USE_INSN has a memory address with operands set by
|
| + SET_INSN. */
|
|
|
| -static int
|
| -ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
|
| +bool
|
| +ix86_agi_dependent (rtx set_insn, rtx use_insn)
|
| {
|
| - rtx addr;
|
| -
|
| - if (insn_type == TYPE_LEA
|
| - && TARGET_PENTIUM)
|
| - {
|
| - addr = PATTERN (insn);
|
| -
|
| - if (GET_CODE (addr) == PARALLEL)
|
| - addr = XVECEXP (addr, 0, 0);
|
| -
|
| - gcc_assert (GET_CODE (addr) == SET);
|
| -
|
| - addr = SET_SRC (addr);
|
| - }
|
| - else
|
| - {
|
| - int i;
|
| - extract_insn_cached (insn);
|
| - for (i = recog_data.n_operands - 1; i >= 0; --i)
|
| - if (MEM_P (recog_data.operand[i]))
|
| - {
|
| - addr = XEXP (recog_data.operand[i], 0);
|
| - goto found;
|
| - }
|
| - return 0;
|
| - found:;
|
| - }
|
| -
|
| - return modified_in_p (addr, dep_insn);
|
| + int i;
|
| + extract_insn_cached (use_insn);
|
| + for (i = recog_data.n_operands - 1; i >= 0; --i)
|
| + if (MEM_P (recog_data.operand[i]))
|
| + {
|
| + rtx addr = XEXP (recog_data.operand[i], 0);
|
| + return modified_in_p (addr, set_insn) != 0;
|
| + }
|
| + return false;
|
| }
|
|
|
| static int
|
| @@ -19239,7 +19788,20 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| {
|
| case PROCESSOR_PENTIUM:
|
| /* Address Generation Interlock adds a cycle of latency. */
|
| - if (ix86_agi_dependent (insn, dep_insn, insn_type))
|
| + if (insn_type == TYPE_LEA)
|
| + {
|
| + rtx addr = PATTERN (insn);
|
| +
|
| + if (GET_CODE (addr) == PARALLEL)
|
| + addr = XVECEXP (addr, 0, 0);
|
| +
|
| + gcc_assert (GET_CODE (addr) == SET);
|
| +
|
| + addr = SET_SRC (addr);
|
| + if (modified_in_p (addr, dep_insn))
|
| + cost += 1;
|
| + }
|
| + else if (ix86_agi_dependent (dep_insn, insn))
|
| cost += 1;
|
|
|
| /* ??? Compares pair with jump/setcc. */
|
| @@ -19249,7 +19811,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| /* Floating point stores require value to be ready one cycle earlier. */
|
| if (insn_type == TYPE_FMOV
|
| && get_attr_memory (insn) == MEMORY_STORE
|
| - && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
| + && !ix86_agi_dependent (dep_insn, insn))
|
| cost += 1;
|
| break;
|
|
|
| @@ -19272,7 +19834,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| in parallel with previous instruction in case
|
| previous instruction is not needed to compute the address. */
|
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
| - && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
| + && !ix86_agi_dependent (dep_insn, insn))
|
| {
|
| /* Claim moves to take one cycle, as core can issue one load
|
| at time and the next load can start cycle later. */
|
| @@ -19301,7 +19863,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| in parallel with previous instruction in case
|
| previous instruction is not needed to compute the address. */
|
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
| - && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
| + && !ix86_agi_dependent (dep_insn, insn))
|
| {
|
| /* Claim moves to take one cycle, as core can issue one load
|
| at time and the next load can start cycle later. */
|
| @@ -19318,6 +19880,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| case PROCESSOR_ATHLON:
|
| case PROCESSOR_K8:
|
| case PROCESSOR_AMDFAM10:
|
| + case PROCESSOR_ATOM:
|
| case PROCESSOR_GENERIC32:
|
| case PROCESSOR_GENERIC64:
|
| memory = get_attr_memory (insn);
|
| @@ -19326,7 +19889,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|
| in parallel with previous instruction in case
|
| previous instruction is not needed to compute the address. */
|
| if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
| - && !ix86_agi_dependent (insn, dep_insn, insn_type))
|
| + && !ix86_agi_dependent (dep_insn, insn))
|
| {
|
| enum attr_unit unit = get_attr_unit (insn);
|
| int loadcost = 3;
|
| @@ -19594,69 +20157,270 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode,
|
| return align;
|
| }
|
|
|
| +/* Find a location for the static chain incoming to a nested function.
|
| + This is a register, unless all free registers are used by arguments. */
|
| +
|
| +static rtx
|
| +ix86_static_chain (const_tree fndecl, bool incoming_p)
|
| +{
|
| + unsigned regno;
|
| +
|
| + if (!DECL_STATIC_CHAIN (fndecl))
|
| + return NULL;
|
| +
|
| + if (TARGET_64BIT)
|
| + {
|
| + /* We always use R10 in 64-bit mode. */
|
| + regno = R10_REG;
|
| + }
|
| + else
|
| + {
|
| + tree fntype;
|
| + /* By default in 32-bit mode we use ECX to pass the static chain. */
|
| + regno = CX_REG;
|
| +
|
| + fntype = TREE_TYPE (fndecl);
|
| + if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
|
| + {
|
| + /* Fastcall functions use ecx/edx for arguments, which leaves
|
| + us with EAX for the static chain. */
|
| + regno = AX_REG;
|
| + }
|
| + else if (ix86_function_regparm (fntype, fndecl) == 3)
|
| + {
|
| + /* For regparm 3, we have no free call-clobbered registers in
|
| + which to store the static chain. In order to implement this,
|
| + we have the trampoline push the static chain to the stack.
|
| + However, we can't push a value below the return address when
|
| + we call the nested function directly, so we have to use an
|
| + alternate entry point. For this we use ESI, and have the
|
| + alternate entry point push ESI, so that things appear the
|
| + same once we're executing the nested function. */
|
| + if (incoming_p)
|
| + {
|
| + if (fndecl == current_function_decl)
|
| + ix86_static_chain_on_stack = true;
|
| + return gen_frame_mem (SImode,
|
| + plus_constant (arg_pointer_rtx, -8));
|
| + }
|
| + regno = SI_REG;
|
| + }
|
| + }
|
| +
|
| + return gen_rtx_REG (Pmode, regno);
|
| +}
|
| +
|
| /* Emit RTL insns to initialize the variable parts of a trampoline.
|
| - FNADDR is an RTX for the address of the function's pure code.
|
| - CXT is an RTX for the static chain value for the function. */
|
| -void
|
| -x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
|
| + FNDECL is the decl of the target address; M_TRAMP is a MEM for
|
| + the trampoline, and CHAIN_VALUE is an RTX for the static chain
|
| + to be passed to the target function. */
|
| +
|
| +static void
|
| +ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
|
| {
|
| + rtx mem, fnaddr;
|
| +
|
| + fnaddr = XEXP (DECL_RTL (fndecl), 0);
|
| +
|
| if (!TARGET_64BIT)
|
| {
|
| - /* Compute offset from the end of the jmp to the target function. */
|
| - rtx disp = expand_binop (SImode, sub_optab, fnaddr,
|
| - plus_constant (tramp, 10),
|
| - NULL_RTX, 1, OPTAB_DIRECT);
|
| - emit_move_insn (gen_rtx_MEM (QImode, tramp),
|
| - gen_int_mode (0xb9, QImode));
|
| - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
|
| - emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
|
| - gen_int_mode (0xe9, QImode));
|
| - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
|
| + rtx disp, chain;
|
| + int opcode;
|
| +
|
| + /* Depending on the static chain location, either load a register
|
| + with a constant, or push the constant to the stack. All of the
|
| + instructions are the same size. */
|
| + chain = ix86_static_chain (fndecl, true);
|
| + if (REG_P (chain))
|
| + {
|
| + if (REGNO (chain) == CX_REG)
|
| + opcode = 0xb9;
|
| + else if (REGNO (chain) == AX_REG)
|
| + opcode = 0xb8;
|
| + else
|
| + gcc_unreachable ();
|
| + }
|
| + else
|
| + opcode = 0x68;
|
| +
|
| + mem = adjust_address (m_tramp, QImode, 0);
|
| + emit_move_insn (mem, gen_int_mode (opcode, QImode));
|
| +
|
| + mem = adjust_address (m_tramp, SImode, 1);
|
| + emit_move_insn (mem, chain_value);
|
| +
|
| + /* Compute offset from the end of the jmp to the target function.
|
| + In the case in which the trampoline stores the static chain on
|
| + the stack, we need to skip the first insn which pushes the
|
| + (call-saved) register static chain; this push is 1 byte. */
|
| + disp = expand_binop (SImode, sub_optab, fnaddr,
|
| + plus_constant (XEXP (m_tramp, 0),
|
| + MEM_P (chain) ? 9 : 10),
|
| + NULL_RTX, 1, OPTAB_DIRECT);
|
| +
|
| + mem = adjust_address (m_tramp, QImode, 5);
|
| + emit_move_insn (mem, gen_int_mode (0xe9, QImode));
|
| +
|
| + mem = adjust_address (m_tramp, SImode, 6);
|
| + emit_move_insn (mem, disp);
|
| }
|
| else
|
| {
|
| int offset = 0;
|
| - /* Try to load address using shorter movl instead of movabs.
|
| - We may want to support movq for kernel mode, but kernel does not use
|
| - trampolines at the moment. */
|
| +
|
| + /* Load the function address to r11. Try to load address using
|
| + the shorter movl instead of movabs. We may want to support
|
| + movq for kernel mode, but kernel does not use trampolines at
|
| + the moment. */
|
| if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
|
| {
|
| fnaddr = copy_to_mode_reg (DImode, fnaddr);
|
| - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
|
| - gen_int_mode (0xbb41, HImode));
|
| - emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
|
| - gen_lowpart (SImode, fnaddr));
|
| +
|
| + mem = adjust_address (m_tramp, HImode, offset);
|
| + emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
|
| +
|
| + mem = adjust_address (m_tramp, SImode, offset + 2);
|
| + emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
|
| offset += 6;
|
| }
|
| else
|
| {
|
| - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
|
| - gen_int_mode (0xbb49, HImode));
|
| - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
|
| - fnaddr);
|
| + mem = adjust_address (m_tramp, HImode, offset);
|
| + emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
|
| +
|
| + mem = adjust_address (m_tramp, DImode, offset + 2);
|
| + emit_move_insn (mem, fnaddr);
|
| offset += 10;
|
| }
|
| +
|
| /* Load static chain using movabs to r10. */
|
| - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
|
| - gen_int_mode (0xba49, HImode));
|
| - emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
|
| - cxt);
|
| + mem = adjust_address (m_tramp, HImode, offset);
|
| + emit_move_insn (mem, gen_int_mode (0xba49, HImode));
|
| +
|
| + mem = adjust_address (m_tramp, DImode, offset + 2);
|
| + emit_move_insn (mem, chain_value);
|
| offset += 10;
|
| - /* Jump to the r11 */
|
| - emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
|
| - gen_int_mode (0xff49, HImode));
|
| - emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
|
| - gen_int_mode (0xe3, QImode));
|
| - offset += 3;
|
| +
|
| + /* Jump to r11; the last (unused) byte is a nop, only there to
|
| + pad the write out to a single 32-bit store. */
|
| + mem = adjust_address (m_tramp, SImode, offset);
|
| + emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
|
| + offset += 4;
|
| +
|
| gcc_assert (offset <= TRAMPOLINE_SIZE);
|
| }
|
|
|
| #ifdef ENABLE_EXECUTE_STACK
|
| +#ifdef CHECK_EXECUTE_STACK_ENABLED
|
| + if (CHECK_EXECUTE_STACK_ENABLED)
|
| +#endif
|
| emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
|
| - LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
|
| + LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
|
| #endif
|
| }
|
|
|
| +/* The following file contains several enumerations and data structures
|
| + built from the definitions in i386-builtin-types.def. */
|
| +
|
| +#include "i386-builtin-types.inc"
|
| +
|
| +/* Table for the ix86 builtin non-function types. */
|
| +static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
|
| +
|
| +/* Retrieve an element from the above table, building some of
|
| + the types lazily. */
|
| +
|
| +static tree
|
| +ix86_get_builtin_type (enum ix86_builtin_type tcode)
|
| +{
|
| + unsigned int index;
|
| + tree type, itype;
|
| +
|
| + gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
|
| +
|
| + type = ix86_builtin_type_tab[(int) tcode];
|
| + if (type != NULL)
|
| + return type;
|
| +
|
| + gcc_assert (tcode > IX86_BT_LAST_PRIM);
|
| + if (tcode <= IX86_BT_LAST_VECT)
|
| + {
|
| + enum machine_mode mode;
|
| +
|
| + index = tcode - IX86_BT_LAST_PRIM - 1;
|
| + itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
|
| + mode = ix86_builtin_type_vect_mode[index];
|
| +
|
| + type = build_vector_type_for_mode (itype, mode);
|
| + }
|
| + else
|
| + {
|
| + int quals;
|
| +
|
| + index = tcode - IX86_BT_LAST_VECT - 1;
|
| + if (tcode <= IX86_BT_LAST_PTR)
|
| + quals = TYPE_UNQUALIFIED;
|
| + else
|
| + quals = TYPE_QUAL_CONST;
|
| +
|
| + itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
|
| + if (quals != TYPE_UNQUALIFIED)
|
| + itype = build_qualified_type (itype, quals);
|
| +
|
| + type = build_pointer_type (itype);
|
| + }
|
| +
|
| + ix86_builtin_type_tab[(int) tcode] = type;
|
| + return type;
|
| +}
|
| +
|
| +/* Table for the ix86 builtin function types. */
|
| +static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
|
| +
|
| +/* Retrieve an element from the above table, building some of
|
| + the types lazily. */
|
| +
|
| +static tree
|
| +ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
|
| +{
|
| + tree type;
|
| +
|
| + gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
|
| +
|
| + type = ix86_builtin_func_type_tab[(int) tcode];
|
| + if (type != NULL)
|
| + return type;
|
| +
|
| + if (tcode <= IX86_BT_LAST_FUNC)
|
| + {
|
| + unsigned start = ix86_builtin_func_start[(int) tcode];
|
| + unsigned after = ix86_builtin_func_start[(int) tcode + 1];
|
| + tree rtype, atype, args = void_list_node;
|
| + unsigned i;
|
| +
|
| + rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
|
| + for (i = after - 1; i > start; --i)
|
| + {
|
| + atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
|
| + args = tree_cons (NULL, atype, args);
|
| + }
|
| +
|
| + type = build_function_type (rtype, args);
|
| + }
|
| + else
|
| + {
|
| + unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
|
| + enum ix86_builtin_func_type icode;
|
| +
|
| + icode = ix86_builtin_func_alias_base[index];
|
| + type = ix86_get_builtin_func_type (icode);
|
| + }
|
| +
|
| + ix86_builtin_func_type_tab[(int) tcode] = type;
|
| + return type;
|
| +}
|
| +
|
| +
|
| /* Codes for all the SSE/MMX builtins. */
|
| enum ix86_builtins
|
| {
|
| @@ -20062,6 +20826,16 @@ enum ix86_builtins
|
| IX86_BUILTIN_MFENCE,
|
| IX86_BUILTIN_LFENCE,
|
|
|
| + IX86_BUILTIN_BSRSI,
|
| + IX86_BUILTIN_BSRDI,
|
| + IX86_BUILTIN_RDPMC,
|
| + IX86_BUILTIN_RDTSC,
|
| + IX86_BUILTIN_RDTSCP,
|
| + IX86_BUILTIN_ROLQI,
|
| + IX86_BUILTIN_ROLHI,
|
| + IX86_BUILTIN_RORQI,
|
| + IX86_BUILTIN_RORHI,
|
| +
|
| /* SSE3. */
|
| IX86_BUILTIN_ADDSUBPS,
|
| IX86_BUILTIN_HADDPS,
|
| @@ -20281,7 +21055,6 @@ enum ix86_builtins
|
| IX86_BUILTIN_EXTRACTF128SI256,
|
| IX86_BUILTIN_VZEROALL,
|
| IX86_BUILTIN_VZEROUPPER,
|
| - IX86_BUILTIN_VZEROUPPER_REX64,
|
| IX86_BUILTIN_VPERMILVARPD,
|
| IX86_BUILTIN_VPERMILVARPS,
|
| IX86_BUILTIN_VPERMILVARPD256,
|
| @@ -20290,6 +21063,10 @@ enum ix86_builtins
|
| IX86_BUILTIN_VPERMILPS,
|
| IX86_BUILTIN_VPERMILPD256,
|
| IX86_BUILTIN_VPERMILPS256,
|
| + IX86_BUILTIN_VPERMIL2PD,
|
| + IX86_BUILTIN_VPERMIL2PS,
|
| + IX86_BUILTIN_VPERMIL2PD256,
|
| + IX86_BUILTIN_VPERMIL2PS256,
|
| IX86_BUILTIN_VPERM2F128PD256,
|
| IX86_BUILTIN_VPERM2F128PS256,
|
| IX86_BUILTIN_VPERM2F128SI256,
|
| @@ -20367,219 +21144,215 @@ enum ix86_builtins
|
|
|
| /* TFmode support builtins. */
|
| IX86_BUILTIN_INFQ,
|
| + IX86_BUILTIN_HUGE_VALQ,
|
| IX86_BUILTIN_FABSQ,
|
| IX86_BUILTIN_COPYSIGNQ,
|
|
|
| - /* SSE5 instructions */
|
| - IX86_BUILTIN_FMADDSS,
|
| - IX86_BUILTIN_FMADDSD,
|
| - IX86_BUILTIN_FMADDPS,
|
| - IX86_BUILTIN_FMADDPD,
|
| - IX86_BUILTIN_FMSUBSS,
|
| - IX86_BUILTIN_FMSUBSD,
|
| - IX86_BUILTIN_FMSUBPS,
|
| - IX86_BUILTIN_FMSUBPD,
|
| - IX86_BUILTIN_FNMADDSS,
|
| - IX86_BUILTIN_FNMADDSD,
|
| - IX86_BUILTIN_FNMADDPS,
|
| - IX86_BUILTIN_FNMADDPD,
|
| - IX86_BUILTIN_FNMSUBSS,
|
| - IX86_BUILTIN_FNMSUBSD,
|
| - IX86_BUILTIN_FNMSUBPS,
|
| - IX86_BUILTIN_FNMSUBPD,
|
| - IX86_BUILTIN_PCMOV,
|
| - IX86_BUILTIN_PCMOV_V2DI,
|
| - IX86_BUILTIN_PCMOV_V4SI,
|
| - IX86_BUILTIN_PCMOV_V8HI,
|
| - IX86_BUILTIN_PCMOV_V16QI,
|
| - IX86_BUILTIN_PCMOV_V4SF,
|
| - IX86_BUILTIN_PCMOV_V2DF,
|
| - IX86_BUILTIN_PPERM,
|
| - IX86_BUILTIN_PERMPS,
|
| - IX86_BUILTIN_PERMPD,
|
| - IX86_BUILTIN_PMACSSWW,
|
| - IX86_BUILTIN_PMACSWW,
|
| - IX86_BUILTIN_PMACSSWD,
|
| - IX86_BUILTIN_PMACSWD,
|
| - IX86_BUILTIN_PMACSSDD,
|
| - IX86_BUILTIN_PMACSDD,
|
| - IX86_BUILTIN_PMACSSDQL,
|
| - IX86_BUILTIN_PMACSSDQH,
|
| - IX86_BUILTIN_PMACSDQL,
|
| - IX86_BUILTIN_PMACSDQH,
|
| - IX86_BUILTIN_PMADCSSWD,
|
| - IX86_BUILTIN_PMADCSWD,
|
| - IX86_BUILTIN_PHADDBW,
|
| - IX86_BUILTIN_PHADDBD,
|
| - IX86_BUILTIN_PHADDBQ,
|
| - IX86_BUILTIN_PHADDWD,
|
| - IX86_BUILTIN_PHADDWQ,
|
| - IX86_BUILTIN_PHADDDQ,
|
| - IX86_BUILTIN_PHADDUBW,
|
| - IX86_BUILTIN_PHADDUBD,
|
| - IX86_BUILTIN_PHADDUBQ,
|
| - IX86_BUILTIN_PHADDUWD,
|
| - IX86_BUILTIN_PHADDUWQ,
|
| - IX86_BUILTIN_PHADDUDQ,
|
| - IX86_BUILTIN_PHSUBBW,
|
| - IX86_BUILTIN_PHSUBWD,
|
| - IX86_BUILTIN_PHSUBDQ,
|
| - IX86_BUILTIN_PROTB,
|
| - IX86_BUILTIN_PROTW,
|
| - IX86_BUILTIN_PROTD,
|
| - IX86_BUILTIN_PROTQ,
|
| - IX86_BUILTIN_PROTB_IMM,
|
| - IX86_BUILTIN_PROTW_IMM,
|
| - IX86_BUILTIN_PROTD_IMM,
|
| - IX86_BUILTIN_PROTQ_IMM,
|
| - IX86_BUILTIN_PSHLB,
|
| - IX86_BUILTIN_PSHLW,
|
| - IX86_BUILTIN_PSHLD,
|
| - IX86_BUILTIN_PSHLQ,
|
| - IX86_BUILTIN_PSHAB,
|
| - IX86_BUILTIN_PSHAW,
|
| - IX86_BUILTIN_PSHAD,
|
| - IX86_BUILTIN_PSHAQ,
|
| - IX86_BUILTIN_FRCZSS,
|
| - IX86_BUILTIN_FRCZSD,
|
| - IX86_BUILTIN_FRCZPS,
|
| - IX86_BUILTIN_FRCZPD,
|
| - IX86_BUILTIN_CVTPH2PS,
|
| - IX86_BUILTIN_CVTPS2PH,
|
| -
|
| - IX86_BUILTIN_COMEQSS,
|
| - IX86_BUILTIN_COMNESS,
|
| - IX86_BUILTIN_COMLTSS,
|
| - IX86_BUILTIN_COMLESS,
|
| - IX86_BUILTIN_COMGTSS,
|
| - IX86_BUILTIN_COMGESS,
|
| - IX86_BUILTIN_COMUEQSS,
|
| - IX86_BUILTIN_COMUNESS,
|
| - IX86_BUILTIN_COMULTSS,
|
| - IX86_BUILTIN_COMULESS,
|
| - IX86_BUILTIN_COMUGTSS,
|
| - IX86_BUILTIN_COMUGESS,
|
| - IX86_BUILTIN_COMORDSS,
|
| - IX86_BUILTIN_COMUNORDSS,
|
| - IX86_BUILTIN_COMFALSESS,
|
| - IX86_BUILTIN_COMTRUESS,
|
| -
|
| - IX86_BUILTIN_COMEQSD,
|
| - IX86_BUILTIN_COMNESD,
|
| - IX86_BUILTIN_COMLTSD,
|
| - IX86_BUILTIN_COMLESD,
|
| - IX86_BUILTIN_COMGTSD,
|
| - IX86_BUILTIN_COMGESD,
|
| - IX86_BUILTIN_COMUEQSD,
|
| - IX86_BUILTIN_COMUNESD,
|
| - IX86_BUILTIN_COMULTSD,
|
| - IX86_BUILTIN_COMULESD,
|
| - IX86_BUILTIN_COMUGTSD,
|
| - IX86_BUILTIN_COMUGESD,
|
| - IX86_BUILTIN_COMORDSD,
|
| - IX86_BUILTIN_COMUNORDSD,
|
| - IX86_BUILTIN_COMFALSESD,
|
| - IX86_BUILTIN_COMTRUESD,
|
| -
|
| - IX86_BUILTIN_COMEQPS,
|
| - IX86_BUILTIN_COMNEPS,
|
| - IX86_BUILTIN_COMLTPS,
|
| - IX86_BUILTIN_COMLEPS,
|
| - IX86_BUILTIN_COMGTPS,
|
| - IX86_BUILTIN_COMGEPS,
|
| - IX86_BUILTIN_COMUEQPS,
|
| - IX86_BUILTIN_COMUNEPS,
|
| - IX86_BUILTIN_COMULTPS,
|
| - IX86_BUILTIN_COMULEPS,
|
| - IX86_BUILTIN_COMUGTPS,
|
| - IX86_BUILTIN_COMUGEPS,
|
| - IX86_BUILTIN_COMORDPS,
|
| - IX86_BUILTIN_COMUNORDPS,
|
| - IX86_BUILTIN_COMFALSEPS,
|
| - IX86_BUILTIN_COMTRUEPS,
|
| -
|
| - IX86_BUILTIN_COMEQPD,
|
| - IX86_BUILTIN_COMNEPD,
|
| - IX86_BUILTIN_COMLTPD,
|
| - IX86_BUILTIN_COMLEPD,
|
| - IX86_BUILTIN_COMGTPD,
|
| - IX86_BUILTIN_COMGEPD,
|
| - IX86_BUILTIN_COMUEQPD,
|
| - IX86_BUILTIN_COMUNEPD,
|
| - IX86_BUILTIN_COMULTPD,
|
| - IX86_BUILTIN_COMULEPD,
|
| - IX86_BUILTIN_COMUGTPD,
|
| - IX86_BUILTIN_COMUGEPD,
|
| - IX86_BUILTIN_COMORDPD,
|
| - IX86_BUILTIN_COMUNORDPD,
|
| - IX86_BUILTIN_COMFALSEPD,
|
| - IX86_BUILTIN_COMTRUEPD,
|
| -
|
| - IX86_BUILTIN_PCOMEQUB,
|
| - IX86_BUILTIN_PCOMNEUB,
|
| - IX86_BUILTIN_PCOMLTUB,
|
| - IX86_BUILTIN_PCOMLEUB,
|
| - IX86_BUILTIN_PCOMGTUB,
|
| - IX86_BUILTIN_PCOMGEUB,
|
| - IX86_BUILTIN_PCOMFALSEUB,
|
| - IX86_BUILTIN_PCOMTRUEUB,
|
| - IX86_BUILTIN_PCOMEQUW,
|
| - IX86_BUILTIN_PCOMNEUW,
|
| - IX86_BUILTIN_PCOMLTUW,
|
| - IX86_BUILTIN_PCOMLEUW,
|
| - IX86_BUILTIN_PCOMGTUW,
|
| - IX86_BUILTIN_PCOMGEUW,
|
| - IX86_BUILTIN_PCOMFALSEUW,
|
| - IX86_BUILTIN_PCOMTRUEUW,
|
| - IX86_BUILTIN_PCOMEQUD,
|
| - IX86_BUILTIN_PCOMNEUD,
|
| - IX86_BUILTIN_PCOMLTUD,
|
| - IX86_BUILTIN_PCOMLEUD,
|
| - IX86_BUILTIN_PCOMGTUD,
|
| - IX86_BUILTIN_PCOMGEUD,
|
| - IX86_BUILTIN_PCOMFALSEUD,
|
| - IX86_BUILTIN_PCOMTRUEUD,
|
| - IX86_BUILTIN_PCOMEQUQ,
|
| - IX86_BUILTIN_PCOMNEUQ,
|
| - IX86_BUILTIN_PCOMLTUQ,
|
| - IX86_BUILTIN_PCOMLEUQ,
|
| - IX86_BUILTIN_PCOMGTUQ,
|
| - IX86_BUILTIN_PCOMGEUQ,
|
| - IX86_BUILTIN_PCOMFALSEUQ,
|
| - IX86_BUILTIN_PCOMTRUEUQ,
|
| -
|
| - IX86_BUILTIN_PCOMEQB,
|
| - IX86_BUILTIN_PCOMNEB,
|
| - IX86_BUILTIN_PCOMLTB,
|
| - IX86_BUILTIN_PCOMLEB,
|
| - IX86_BUILTIN_PCOMGTB,
|
| - IX86_BUILTIN_PCOMGEB,
|
| - IX86_BUILTIN_PCOMFALSEB,
|
| - IX86_BUILTIN_PCOMTRUEB,
|
| - IX86_BUILTIN_PCOMEQW,
|
| - IX86_BUILTIN_PCOMNEW,
|
| - IX86_BUILTIN_PCOMLTW,
|
| - IX86_BUILTIN_PCOMLEW,
|
| - IX86_BUILTIN_PCOMGTW,
|
| - IX86_BUILTIN_PCOMGEW,
|
| - IX86_BUILTIN_PCOMFALSEW,
|
| - IX86_BUILTIN_PCOMTRUEW,
|
| - IX86_BUILTIN_PCOMEQD,
|
| - IX86_BUILTIN_PCOMNED,
|
| - IX86_BUILTIN_PCOMLTD,
|
| - IX86_BUILTIN_PCOMLED,
|
| - IX86_BUILTIN_PCOMGTD,
|
| - IX86_BUILTIN_PCOMGED,
|
| - IX86_BUILTIN_PCOMFALSED,
|
| - IX86_BUILTIN_PCOMTRUED,
|
| - IX86_BUILTIN_PCOMEQQ,
|
| - IX86_BUILTIN_PCOMNEQ,
|
| - IX86_BUILTIN_PCOMLTQ,
|
| - IX86_BUILTIN_PCOMLEQ,
|
| - IX86_BUILTIN_PCOMGTQ,
|
| - IX86_BUILTIN_PCOMGEQ,
|
| - IX86_BUILTIN_PCOMFALSEQ,
|
| - IX86_BUILTIN_PCOMTRUEQ,
|
| + /* Vectorizer support builtins. */
|
| + IX86_BUILTIN_CPYSGNPS,
|
| + IX86_BUILTIN_CPYSGNPD,
|
| +
|
| + IX86_BUILTIN_CVTUDQ2PS,
|
| +
|
| + IX86_BUILTIN_VEC_PERM_V2DF,
|
| + IX86_BUILTIN_VEC_PERM_V4SF,
|
| + IX86_BUILTIN_VEC_PERM_V2DI,
|
| + IX86_BUILTIN_VEC_PERM_V4SI,
|
| + IX86_BUILTIN_VEC_PERM_V8HI,
|
| + IX86_BUILTIN_VEC_PERM_V16QI,
|
| + IX86_BUILTIN_VEC_PERM_V2DI_U,
|
| + IX86_BUILTIN_VEC_PERM_V4SI_U,
|
| + IX86_BUILTIN_VEC_PERM_V8HI_U,
|
| + IX86_BUILTIN_VEC_PERM_V16QI_U,
|
| + IX86_BUILTIN_VEC_PERM_V4DF,
|
| + IX86_BUILTIN_VEC_PERM_V8SF,
|
| +
|
| + /* FMA4 and XOP instructions. */
|
| + IX86_BUILTIN_VFMADDSS,
|
| + IX86_BUILTIN_VFMADDSD,
|
| + IX86_BUILTIN_VFMADDPS,
|
| + IX86_BUILTIN_VFMADDPD,
|
| + IX86_BUILTIN_VFMSUBSS,
|
| + IX86_BUILTIN_VFMSUBSD,
|
| + IX86_BUILTIN_VFMSUBPS,
|
| + IX86_BUILTIN_VFMSUBPD,
|
| + IX86_BUILTIN_VFMADDSUBPS,
|
| + IX86_BUILTIN_VFMADDSUBPD,
|
| + IX86_BUILTIN_VFMSUBADDPS,
|
| + IX86_BUILTIN_VFMSUBADDPD,
|
| + IX86_BUILTIN_VFNMADDSS,
|
| + IX86_BUILTIN_VFNMADDSD,
|
| + IX86_BUILTIN_VFNMADDPS,
|
| + IX86_BUILTIN_VFNMADDPD,
|
| + IX86_BUILTIN_VFNMSUBSS,
|
| + IX86_BUILTIN_VFNMSUBSD,
|
| + IX86_BUILTIN_VFNMSUBPS,
|
| + IX86_BUILTIN_VFNMSUBPD,
|
| + IX86_BUILTIN_VFMADDPS256,
|
| + IX86_BUILTIN_VFMADDPD256,
|
| + IX86_BUILTIN_VFMSUBPS256,
|
| + IX86_BUILTIN_VFMSUBPD256,
|
| + IX86_BUILTIN_VFMADDSUBPS256,
|
| + IX86_BUILTIN_VFMADDSUBPD256,
|
| + IX86_BUILTIN_VFMSUBADDPS256,
|
| + IX86_BUILTIN_VFMSUBADDPD256,
|
| + IX86_BUILTIN_VFNMADDPS256,
|
| + IX86_BUILTIN_VFNMADDPD256,
|
| + IX86_BUILTIN_VFNMSUBPS256,
|
| + IX86_BUILTIN_VFNMSUBPD256,
|
| +
|
| + IX86_BUILTIN_VPCMOV,
|
| + IX86_BUILTIN_VPCMOV_V2DI,
|
| + IX86_BUILTIN_VPCMOV_V4SI,
|
| + IX86_BUILTIN_VPCMOV_V8HI,
|
| + IX86_BUILTIN_VPCMOV_V16QI,
|
| + IX86_BUILTIN_VPCMOV_V4SF,
|
| + IX86_BUILTIN_VPCMOV_V2DF,
|
| + IX86_BUILTIN_VPCMOV256,
|
| + IX86_BUILTIN_VPCMOV_V4DI256,
|
| + IX86_BUILTIN_VPCMOV_V8SI256,
|
| + IX86_BUILTIN_VPCMOV_V16HI256,
|
| + IX86_BUILTIN_VPCMOV_V32QI256,
|
| + IX86_BUILTIN_VPCMOV_V8SF256,
|
| + IX86_BUILTIN_VPCMOV_V4DF256,
|
| +
|
| + IX86_BUILTIN_VPPERM,
|
| +
|
| + IX86_BUILTIN_VPMACSSWW,
|
| + IX86_BUILTIN_VPMACSWW,
|
| + IX86_BUILTIN_VPMACSSWD,
|
| + IX86_BUILTIN_VPMACSWD,
|
| + IX86_BUILTIN_VPMACSSDD,
|
| + IX86_BUILTIN_VPMACSDD,
|
| + IX86_BUILTIN_VPMACSSDQL,
|
| + IX86_BUILTIN_VPMACSSDQH,
|
| + IX86_BUILTIN_VPMACSDQL,
|
| + IX86_BUILTIN_VPMACSDQH,
|
| + IX86_BUILTIN_VPMADCSSWD,
|
| + IX86_BUILTIN_VPMADCSWD,
|
| +
|
| + IX86_BUILTIN_VPHADDBW,
|
| + IX86_BUILTIN_VPHADDBD,
|
| + IX86_BUILTIN_VPHADDBQ,
|
| + IX86_BUILTIN_VPHADDWD,
|
| + IX86_BUILTIN_VPHADDWQ,
|
| + IX86_BUILTIN_VPHADDDQ,
|
| + IX86_BUILTIN_VPHADDUBW,
|
| + IX86_BUILTIN_VPHADDUBD,
|
| + IX86_BUILTIN_VPHADDUBQ,
|
| + IX86_BUILTIN_VPHADDUWD,
|
| + IX86_BUILTIN_VPHADDUWQ,
|
| + IX86_BUILTIN_VPHADDUDQ,
|
| + IX86_BUILTIN_VPHSUBBW,
|
| + IX86_BUILTIN_VPHSUBWD,
|
| + IX86_BUILTIN_VPHSUBDQ,
|
| +
|
| + IX86_BUILTIN_VPROTB,
|
| + IX86_BUILTIN_VPROTW,
|
| + IX86_BUILTIN_VPROTD,
|
| + IX86_BUILTIN_VPROTQ,
|
| + IX86_BUILTIN_VPROTB_IMM,
|
| + IX86_BUILTIN_VPROTW_IMM,
|
| + IX86_BUILTIN_VPROTD_IMM,
|
| + IX86_BUILTIN_VPROTQ_IMM,
|
| +
|
| + IX86_BUILTIN_VPSHLB,
|
| + IX86_BUILTIN_VPSHLW,
|
| + IX86_BUILTIN_VPSHLD,
|
| + IX86_BUILTIN_VPSHLQ,
|
| + IX86_BUILTIN_VPSHAB,
|
| + IX86_BUILTIN_VPSHAW,
|
| + IX86_BUILTIN_VPSHAD,
|
| + IX86_BUILTIN_VPSHAQ,
|
| +
|
| + IX86_BUILTIN_VFRCZSS,
|
| + IX86_BUILTIN_VFRCZSD,
|
| + IX86_BUILTIN_VFRCZPS,
|
| + IX86_BUILTIN_VFRCZPD,
|
| + IX86_BUILTIN_VFRCZPS256,
|
| + IX86_BUILTIN_VFRCZPD256,
|
| +
|
| + IX86_BUILTIN_VPCOMEQUB,
|
| + IX86_BUILTIN_VPCOMNEUB,
|
| + IX86_BUILTIN_VPCOMLTUB,
|
| + IX86_BUILTIN_VPCOMLEUB,
|
| + IX86_BUILTIN_VPCOMGTUB,
|
| + IX86_BUILTIN_VPCOMGEUB,
|
| + IX86_BUILTIN_VPCOMFALSEUB,
|
| + IX86_BUILTIN_VPCOMTRUEUB,
|
| +
|
| + IX86_BUILTIN_VPCOMEQUW,
|
| + IX86_BUILTIN_VPCOMNEUW,
|
| + IX86_BUILTIN_VPCOMLTUW,
|
| + IX86_BUILTIN_VPCOMLEUW,
|
| + IX86_BUILTIN_VPCOMGTUW,
|
| + IX86_BUILTIN_VPCOMGEUW,
|
| + IX86_BUILTIN_VPCOMFALSEUW,
|
| + IX86_BUILTIN_VPCOMTRUEUW,
|
| +
|
| + IX86_BUILTIN_VPCOMEQUD,
|
| + IX86_BUILTIN_VPCOMNEUD,
|
| + IX86_BUILTIN_VPCOMLTUD,
|
| + IX86_BUILTIN_VPCOMLEUD,
|
| + IX86_BUILTIN_VPCOMGTUD,
|
| + IX86_BUILTIN_VPCOMGEUD,
|
| + IX86_BUILTIN_VPCOMFALSEUD,
|
| + IX86_BUILTIN_VPCOMTRUEUD,
|
| +
|
| + IX86_BUILTIN_VPCOMEQUQ,
|
| + IX86_BUILTIN_VPCOMNEUQ,
|
| + IX86_BUILTIN_VPCOMLTUQ,
|
| + IX86_BUILTIN_VPCOMLEUQ,
|
| + IX86_BUILTIN_VPCOMGTUQ,
|
| + IX86_BUILTIN_VPCOMGEUQ,
|
| + IX86_BUILTIN_VPCOMFALSEUQ,
|
| + IX86_BUILTIN_VPCOMTRUEUQ,
|
| +
|
| + IX86_BUILTIN_VPCOMEQB,
|
| + IX86_BUILTIN_VPCOMNEB,
|
| + IX86_BUILTIN_VPCOMLTB,
|
| + IX86_BUILTIN_VPCOMLEB,
|
| + IX86_BUILTIN_VPCOMGTB,
|
| + IX86_BUILTIN_VPCOMGEB,
|
| + IX86_BUILTIN_VPCOMFALSEB,
|
| + IX86_BUILTIN_VPCOMTRUEB,
|
| +
|
| + IX86_BUILTIN_VPCOMEQW,
|
| + IX86_BUILTIN_VPCOMNEW,
|
| + IX86_BUILTIN_VPCOMLTW,
|
| + IX86_BUILTIN_VPCOMLEW,
|
| + IX86_BUILTIN_VPCOMGTW,
|
| + IX86_BUILTIN_VPCOMGEW,
|
| + IX86_BUILTIN_VPCOMFALSEW,
|
| + IX86_BUILTIN_VPCOMTRUEW,
|
| +
|
| + IX86_BUILTIN_VPCOMEQD,
|
| + IX86_BUILTIN_VPCOMNED,
|
| + IX86_BUILTIN_VPCOMLTD,
|
| + IX86_BUILTIN_VPCOMLED,
|
| + IX86_BUILTIN_VPCOMGTD,
|
| + IX86_BUILTIN_VPCOMGED,
|
| + IX86_BUILTIN_VPCOMFALSED,
|
| + IX86_BUILTIN_VPCOMTRUED,
|
| +
|
| + IX86_BUILTIN_VPCOMEQQ,
|
| + IX86_BUILTIN_VPCOMNEQ,
|
| + IX86_BUILTIN_VPCOMLTQ,
|
| + IX86_BUILTIN_VPCOMLEQ,
|
| + IX86_BUILTIN_VPCOMGTQ,
|
| + IX86_BUILTIN_VPCOMGEQ,
|
| + IX86_BUILTIN_VPCOMFALSEQ,
|
| + IX86_BUILTIN_VPCOMTRUEQ,
|
| +
|
| + /* LWP instructions. */
|
| + IX86_BUILTIN_LLWPCB,
|
| + IX86_BUILTIN_SLWPCB,
|
| + IX86_BUILTIN_LWPVAL32,
|
| + IX86_BUILTIN_LWPVAL64,
|
| + IX86_BUILTIN_LWPINS32,
|
| + IX86_BUILTIN_LWPINS64,
|
| +
|
| + IX86_BUILTIN_CLZS,
|
|
|
| IX86_BUILTIN_MAX
|
| };
|
| @@ -20590,35 +21363,36 @@ static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
|
| /* Table of all of the builtin functions that are possible with different ISA's
|
| but are waiting to be built until a function is declared to use that
|
| ISA. */
|
| -struct builtin_isa GTY(())
|
| -{
|
| - tree type; /* builtin type to use in the declaration */
|
| +struct builtin_isa {
|
| const char *name; /* function name */
|
| + enum ix86_builtin_func_type tcode; /* type to use in the declaration */
|
| int isa; /* isa_flags this builtin is defined for */
|
| bool const_p; /* true if the declaration is constant */
|
| + bool set_and_not_built_p;
|
| };
|
|
|
| -static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
|
| +static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
|
|
|
|
|
| /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
|
| - * of which isa_flags to use in the ix86_builtins_isa array. Stores the
|
| - * function decl in the ix86_builtins array. Returns the function decl or
|
| - * NULL_TREE, if the builtin was not added.
|
| - *
|
| - * If the front end has a special hook for builtin functions, delay adding
|
| - * builtin functions that aren't in the current ISA until the ISA is changed
|
| - * with function specific optimization. Doing so, can save about 300K for the
|
| - * default compiler. When the builtin is expanded, check at that time whether
|
| - * it is valid.
|
| - *
|
| - * If the front end doesn't have a special hook, record all builtins, even if
|
| - * it isn't an instruction set in the current ISA in case the user uses
|
| - * function specific options for a different ISA, so that we don't get scope
|
| - * errors if a builtin is added in the middle of a function scope. */
|
| + of which isa_flags to use in the ix86_builtins_isa array. Stores the
|
| + function decl in the ix86_builtins array. Returns the function decl or
|
| + NULL_TREE, if the builtin was not added.
|
| +
|
| + If the front end has a special hook for builtin functions, delay adding
|
| + builtin functions that aren't in the current ISA until the ISA is changed
|
| + with function specific optimization. Doing so, can save about 300K for the
|
| + default compiler. When the builtin is expanded, check at that time whether
|
| + it is valid.
|
| +
|
| + If the front end doesn't have a special hook, record all builtins, even if
|
| + it isn't an instruction set in the current ISA in case the user uses
|
| + function specific options for a different ISA, so that we don't get scope
|
| + errors if a builtin is added in the middle of a function scope. */
|
|
|
| static inline tree
|
| -def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
|
| +def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
|
| + enum ix86_builtins code)
|
| {
|
| tree decl = NULL_TREE;
|
|
|
| @@ -20626,22 +21400,26 @@ def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
|
| {
|
| ix86_builtins_isa[(int) code].isa = mask;
|
|
|
| - if ((mask & ix86_isa_flags) != 0
|
| + mask &= ~OPTION_MASK_ISA_64BIT;
|
| + if (mask == 0
|
| + || (mask & ix86_isa_flags) != 0
|
| || (lang_hooks.builtin_function
|
| == lang_hooks.builtin_function_ext_scope))
|
|
|
| {
|
| - decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
|
| - NULL_TREE);
|
| + tree type = ix86_get_builtin_func_type (tcode);
|
| + decl = add_builtin_function (name, type, code, BUILT_IN_MD,
|
| + NULL, NULL_TREE);
|
| ix86_builtins[(int) code] = decl;
|
| - ix86_builtins_isa[(int) code].type = NULL_TREE;
|
| + ix86_builtins_isa[(int) code].set_and_not_built_p = false;
|
| }
|
| else
|
| {
|
| ix86_builtins[(int) code] = NULL_TREE;
|
| - ix86_builtins_isa[(int) code].const_p = false;
|
| - ix86_builtins_isa[(int) code].type = type;
|
| + ix86_builtins_isa[(int) code].tcode = tcode;
|
| ix86_builtins_isa[(int) code].name = name;
|
| + ix86_builtins_isa[(int) code].const_p = false;
|
| + ix86_builtins_isa[(int) code].set_and_not_built_p = true;
|
| }
|
| }
|
|
|
| @@ -20651,10 +21429,10 @@ def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
|
| /* Like def_builtin, but also marks the function decl "const". */
|
|
|
| static inline tree
|
| -def_builtin_const (int mask, const char *name, tree type,
|
| - enum ix86_builtins code)
|
| +def_builtin_const (int mask, const char *name,
|
| + enum ix86_builtin_func_type tcode, enum ix86_builtins code)
|
| {
|
| - tree decl = def_builtin (mask, name, type, code);
|
| + tree decl = def_builtin (mask, name, tcode, code);
|
| if (decl)
|
| TREE_READONLY (decl) = 1;
|
| else
|
| @@ -20671,20 +21449,23 @@ static void
|
| ix86_add_new_builtins (int isa)
|
| {
|
| int i;
|
| - tree decl;
|
|
|
| for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
|
| {
|
| if ((ix86_builtins_isa[i].isa & isa) != 0
|
| - && ix86_builtins_isa[i].type != NULL_TREE)
|
| + && ix86_builtins_isa[i].set_and_not_built_p)
|
| {
|
| + tree decl, type;
|
| +
|
| + /* Don't define the builtin again. */
|
| + ix86_builtins_isa[i].set_and_not_built_p = false;
|
| +
|
| + type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
|
| decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
|
| - ix86_builtins_isa[i].type,
|
| - i, BUILT_IN_MD, NULL,
|
| + type, i, BUILT_IN_MD, NULL,
|
| NULL_TREE);
|
|
|
| ix86_builtins[i] = decl;
|
| - ix86_builtins_isa[i].type = NULL_TREE;
|
| if (ix86_builtins_isa[i].const_p)
|
| TREE_READONLY (decl) = 1;
|
| }
|
| @@ -20759,204 +21540,12 @@ static const struct builtin_description bdesc_pcmpistr[] =
|
| { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
|
| };
|
|
|
| -/* Special builtin types */
|
| -enum ix86_special_builtin_type
|
| -{
|
| - SPECIAL_FTYPE_UNKNOWN,
|
| - VOID_FTYPE_VOID,
|
| - V32QI_FTYPE_PCCHAR,
|
| - V16QI_FTYPE_PCCHAR,
|
| - V8SF_FTYPE_PCV4SF,
|
| - V8SF_FTYPE_PCFLOAT,
|
| - V4DF_FTYPE_PCV2DF,
|
| - V4DF_FTYPE_PCDOUBLE,
|
| - V4SF_FTYPE_PCFLOAT,
|
| - V2DF_FTYPE_PCDOUBLE,
|
| - V8SF_FTYPE_PCV8SF_V8SF,
|
| - V4DF_FTYPE_PCV4DF_V4DF,
|
| - V4SF_FTYPE_V4SF_PCV2SF,
|
| - V4SF_FTYPE_PCV4SF_V4SF,
|
| - V2DF_FTYPE_V2DF_PCDOUBLE,
|
| - V2DF_FTYPE_PCV2DF_V2DF,
|
| - V2DI_FTYPE_PV2DI,
|
| - VOID_FTYPE_PV2SF_V4SF,
|
| - VOID_FTYPE_PV4DI_V4DI,
|
| - VOID_FTYPE_PV2DI_V2DI,
|
| - VOID_FTYPE_PCHAR_V32QI,
|
| - VOID_FTYPE_PCHAR_V16QI,
|
| - VOID_FTYPE_PFLOAT_V8SF,
|
| - VOID_FTYPE_PFLOAT_V4SF,
|
| - VOID_FTYPE_PDOUBLE_V4DF,
|
| - VOID_FTYPE_PDOUBLE_V2DF,
|
| - VOID_FTYPE_PDI_DI,
|
| - VOID_FTYPE_PINT_INT,
|
| - VOID_FTYPE_PV8SF_V8SF_V8SF,
|
| - VOID_FTYPE_PV4DF_V4DF_V4DF,
|
| - VOID_FTYPE_PV4SF_V4SF_V4SF,
|
| - VOID_FTYPE_PV2DF_V2DF_V2DF
|
| -};
|
| -
|
| -/* Builtin types */
|
| -enum ix86_builtin_type
|
| -{
|
| - FTYPE_UNKNOWN,
|
| - FLOAT128_FTYPE_FLOAT128,
|
| - FLOAT_FTYPE_FLOAT,
|
| - FLOAT128_FTYPE_FLOAT128_FLOAT128,
|
| - INT_FTYPE_V8SF_V8SF_PTEST,
|
| - INT_FTYPE_V4DI_V4DI_PTEST,
|
| - INT_FTYPE_V4DF_V4DF_PTEST,
|
| - INT_FTYPE_V4SF_V4SF_PTEST,
|
| - INT_FTYPE_V2DI_V2DI_PTEST,
|
| - INT_FTYPE_V2DF_V2DF_PTEST,
|
| - INT64_FTYPE_V4SF,
|
| - INT64_FTYPE_V2DF,
|
| - INT_FTYPE_V16QI,
|
| - INT_FTYPE_V8QI,
|
| - INT_FTYPE_V8SF,
|
| - INT_FTYPE_V4DF,
|
| - INT_FTYPE_V4SF,
|
| - INT_FTYPE_V2DF,
|
| - V16QI_FTYPE_V16QI,
|
| - V8SI_FTYPE_V8SF,
|
| - V8SI_FTYPE_V4SI,
|
| - V8HI_FTYPE_V8HI,
|
| - V8HI_FTYPE_V16QI,
|
| - V8QI_FTYPE_V8QI,
|
| - V8SF_FTYPE_V8SF,
|
| - V8SF_FTYPE_V8SI,
|
| - V8SF_FTYPE_V4SF,
|
| - V4SI_FTYPE_V4SI,
|
| - V4SI_FTYPE_V16QI,
|
| - V4SI_FTYPE_V8SI,
|
| - V4SI_FTYPE_V8HI,
|
| - V4SI_FTYPE_V4DF,
|
| - V4SI_FTYPE_V4SF,
|
| - V4SI_FTYPE_V2DF,
|
| - V4HI_FTYPE_V4HI,
|
| - V4DF_FTYPE_V4DF,
|
| - V4DF_FTYPE_V4SI,
|
| - V4DF_FTYPE_V4SF,
|
| - V4DF_FTYPE_V2DF,
|
| - V4SF_FTYPE_V4DF,
|
| - V4SF_FTYPE_V4SF,
|
| - V4SF_FTYPE_V4SF_VEC_MERGE,
|
| - V4SF_FTYPE_V8SF,
|
| - V4SF_FTYPE_V4SI,
|
| - V4SF_FTYPE_V2DF,
|
| - V2DI_FTYPE_V2DI,
|
| - V2DI_FTYPE_V16QI,
|
| - V2DI_FTYPE_V8HI,
|
| - V2DI_FTYPE_V4SI,
|
| - V2DF_FTYPE_V2DF,
|
| - V2DF_FTYPE_V2DF_VEC_MERGE,
|
| - V2DF_FTYPE_V4SI,
|
| - V2DF_FTYPE_V4DF,
|
| - V2DF_FTYPE_V4SF,
|
| - V2DF_FTYPE_V2SI,
|
| - V2SI_FTYPE_V2SI,
|
| - V2SI_FTYPE_V4SF,
|
| - V2SI_FTYPE_V2SF,
|
| - V2SI_FTYPE_V2DF,
|
| - V2SF_FTYPE_V2SF,
|
| - V2SF_FTYPE_V2SI,
|
| - V16QI_FTYPE_V16QI_V16QI,
|
| - V16QI_FTYPE_V8HI_V8HI,
|
| - V8QI_FTYPE_V8QI_V8QI,
|
| - V8QI_FTYPE_V4HI_V4HI,
|
| - V8HI_FTYPE_V8HI_V8HI,
|
| - V8HI_FTYPE_V8HI_V8HI_COUNT,
|
| - V8HI_FTYPE_V16QI_V16QI,
|
| - V8HI_FTYPE_V4SI_V4SI,
|
| - V8HI_FTYPE_V8HI_SI_COUNT,
|
| - V8SF_FTYPE_V8SF_V8SF,
|
| - V8SF_FTYPE_V8SF_V8SI,
|
| - V4SI_FTYPE_V4SI_V4SI,
|
| - V4SI_FTYPE_V4SI_V4SI_COUNT,
|
| - V4SI_FTYPE_V8HI_V8HI,
|
| - V4SI_FTYPE_V4SF_V4SF,
|
| - V4SI_FTYPE_V2DF_V2DF,
|
| - V4SI_FTYPE_V4SI_SI_COUNT,
|
| - V4HI_FTYPE_V4HI_V4HI,
|
| - V4HI_FTYPE_V4HI_V4HI_COUNT,
|
| - V4HI_FTYPE_V8QI_V8QI,
|
| - V4HI_FTYPE_V2SI_V2SI,
|
| - V4HI_FTYPE_V4HI_SI_COUNT,
|
| - V4DF_FTYPE_V4DF_V4DF,
|
| - V4DF_FTYPE_V4DF_V4DI,
|
| - V4SF_FTYPE_V4SF_V4SF,
|
| - V4SF_FTYPE_V4SF_V4SF_SWAP,
|
| - V4SF_FTYPE_V4SF_V4SI,
|
| - V4SF_FTYPE_V4SF_V2SI,
|
| - V4SF_FTYPE_V4SF_V2DF,
|
| - V4SF_FTYPE_V4SF_DI,
|
| - V4SF_FTYPE_V4SF_SI,
|
| - V2DI_FTYPE_V2DI_V2DI,
|
| - V2DI_FTYPE_V2DI_V2DI_COUNT,
|
| - V2DI_FTYPE_V16QI_V16QI,
|
| - V2DI_FTYPE_V4SI_V4SI,
|
| - V2DI_FTYPE_V2DI_V16QI,
|
| - V2DI_FTYPE_V2DF_V2DF,
|
| - V2DI_FTYPE_V2DI_SI_COUNT,
|
| - V2SI_FTYPE_V2SI_V2SI,
|
| - V2SI_FTYPE_V2SI_V2SI_COUNT,
|
| - V2SI_FTYPE_V4HI_V4HI,
|
| - V2SI_FTYPE_V2SF_V2SF,
|
| - V2SI_FTYPE_V2SI_SI_COUNT,
|
| - V2DF_FTYPE_V2DF_V2DF,
|
| - V2DF_FTYPE_V2DF_V2DF_SWAP,
|
| - V2DF_FTYPE_V2DF_V4SF,
|
| - V2DF_FTYPE_V2DF_V2DI,
|
| - V2DF_FTYPE_V2DF_DI,
|
| - V2DF_FTYPE_V2DF_SI,
|
| - V2SF_FTYPE_V2SF_V2SF,
|
| - V1DI_FTYPE_V1DI_V1DI,
|
| - V1DI_FTYPE_V1DI_V1DI_COUNT,
|
| - V1DI_FTYPE_V8QI_V8QI,
|
| - V1DI_FTYPE_V2SI_V2SI,
|
| - V1DI_FTYPE_V1DI_SI_COUNT,
|
| - UINT64_FTYPE_UINT64_UINT64,
|
| - UINT_FTYPE_UINT_UINT,
|
| - UINT_FTYPE_UINT_USHORT,
|
| - UINT_FTYPE_UINT_UCHAR,
|
| - V8HI_FTYPE_V8HI_INT,
|
| - V4SI_FTYPE_V4SI_INT,
|
| - V4HI_FTYPE_V4HI_INT,
|
| - V8SF_FTYPE_V8SF_INT,
|
| - V4SI_FTYPE_V8SI_INT,
|
| - V4SF_FTYPE_V8SF_INT,
|
| - V2DF_FTYPE_V4DF_INT,
|
| - V4DF_FTYPE_V4DF_INT,
|
| - V4SF_FTYPE_V4SF_INT,
|
| - V2DI_FTYPE_V2DI_INT,
|
| - V2DI2TI_FTYPE_V2DI_INT,
|
| - V2DF_FTYPE_V2DF_INT,
|
| - V16QI_FTYPE_V16QI_V16QI_V16QI,
|
| - V8SF_FTYPE_V8SF_V8SF_V8SF,
|
| - V4DF_FTYPE_V4DF_V4DF_V4DF,
|
| - V4SF_FTYPE_V4SF_V4SF_V4SF,
|
| - V2DF_FTYPE_V2DF_V2DF_V2DF,
|
| - V16QI_FTYPE_V16QI_V16QI_INT,
|
| - V8SI_FTYPE_V8SI_V8SI_INT,
|
| - V8SI_FTYPE_V8SI_V4SI_INT,
|
| - V8HI_FTYPE_V8HI_V8HI_INT,
|
| - V8SF_FTYPE_V8SF_V8SF_INT,
|
| - V8SF_FTYPE_V8SF_V4SF_INT,
|
| - V4SI_FTYPE_V4SI_V4SI_INT,
|
| - V4DF_FTYPE_V4DF_V4DF_INT,
|
| - V4DF_FTYPE_V4DF_V2DF_INT,
|
| - V4SF_FTYPE_V4SF_V4SF_INT,
|
| - V2DI_FTYPE_V2DI_V2DI_INT,
|
| - V2DI2TI_FTYPE_V2DI_V2DI_INT,
|
| - V1DI2DI_FTYPE_V1DI_V1DI_INT,
|
| - V2DF_FTYPE_V2DF_V2DF_INT,
|
| - V2DI_FTYPE_V2DI_UINT_UINT,
|
| - V2DI_FTYPE_V2DI_V2DI_UINT_UINT
|
| -};
|
| -
|
| /* Special builtins with variable number of arguments. */
|
| static const struct builtin_description bdesc_special_args[] =
|
| {
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
|
| +
|
| /* MMX */
|
| { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
|
|
|
| @@ -20975,7 +21564,7 @@ static const struct builtin_description bdesc_special_args[] =
|
|
|
| /* SSE or 3DNow!A */
|
| { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
|
| - { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
|
| + { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
|
|
|
| /* SSE2 */
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
|
| @@ -21003,14 +21592,13 @@ static const struct builtin_description bdesc_special_args[] =
|
|
|
| /* AVX */
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
|
| - { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
|
|
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
|
| - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
|
|
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
|
| @@ -21032,11 +21620,27 @@ static const struct builtin_description bdesc_special_args[] =
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
|
| +
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
|
| + { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
|
| +
|
| };
|
|
|
| /* Builtins with variable number of arguments. */
|
| static const struct builtin_description bdesc_args[] =
|
| {
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
|
| + { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
|
| + { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
|
| +
|
| /* MMX */
|
| { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
|
| { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
|
| @@ -21189,11 +21793,13 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
|
|
| + { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| +
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| - { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
| + { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
|
|
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
|
| { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
|
| @@ -21223,11 +21829,25 @@ static const struct builtin_description bdesc_args[] =
|
| /* SSE2 */
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
|
|
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
|
| + { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
|
| + { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
|
| +
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
|
|
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
|
| @@ -21286,9 +21906,11 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
|
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| +
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
|
|
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
|
|
|
| @@ -21333,14 +21955,14 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
|
|
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
|
|
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
|
| @@ -21359,7 +21981,7 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
|
|
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
|
| @@ -21367,7 +21989,7 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
|
|
|
| - { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
|
| + { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
|
| { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
|
| @@ -21440,8 +22062,8 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
|
|
|
| /* SSSE3. */
|
| - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
|
| - { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
|
| + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
|
| + { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
|
|
|
| /* SSE4.1 */
|
| { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
|
| @@ -21482,7 +22104,7 @@ static const struct builtin_description bdesc_args[] =
|
| { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
|
| { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
|
|
|
| - /* SSE4.1 and SSE5 */
|
| + /* SSE4.1 */
|
| { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
|
| { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
|
| { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
|
| @@ -21494,10 +22116,10 @@ static const struct builtin_description bdesc_args[] =
|
|
|
| /* SSE4.2 */
|
| { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
|
| - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
|
| - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
|
| - { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
|
| - { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
|
| + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
|
| + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
|
| + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
|
| + { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
|
|
|
| /* SSE4A */
|
| { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
|
| @@ -21630,295 +22252,262 @@ static const struct builtin_description bdesc_args[] =
|
|
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
|
| { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
|
| -};
|
|
|
| -/* SSE5 */
|
| -enum multi_arg_type {
|
| - MULTI_ARG_UNKNOWN,
|
| - MULTI_ARG_3_SF,
|
| - MULTI_ARG_3_DF,
|
| - MULTI_ARG_3_DI,
|
| - MULTI_ARG_3_SI,
|
| - MULTI_ARG_3_SI_DI,
|
| - MULTI_ARG_3_HI,
|
| - MULTI_ARG_3_HI_SI,
|
| - MULTI_ARG_3_QI,
|
| - MULTI_ARG_3_PERMPS,
|
| - MULTI_ARG_3_PERMPD,
|
| - MULTI_ARG_2_SF,
|
| - MULTI_ARG_2_DF,
|
| - MULTI_ARG_2_DI,
|
| - MULTI_ARG_2_SI,
|
| - MULTI_ARG_2_HI,
|
| - MULTI_ARG_2_QI,
|
| - MULTI_ARG_2_DI_IMM,
|
| - MULTI_ARG_2_SI_IMM,
|
| - MULTI_ARG_2_HI_IMM,
|
| - MULTI_ARG_2_QI_IMM,
|
| - MULTI_ARG_2_SF_CMP,
|
| - MULTI_ARG_2_DF_CMP,
|
| - MULTI_ARG_2_DI_CMP,
|
| - MULTI_ARG_2_SI_CMP,
|
| - MULTI_ARG_2_HI_CMP,
|
| - MULTI_ARG_2_QI_CMP,
|
| - MULTI_ARG_2_DI_TF,
|
| - MULTI_ARG_2_SI_TF,
|
| - MULTI_ARG_2_HI_TF,
|
| - MULTI_ARG_2_QI_TF,
|
| - MULTI_ARG_2_SF_TF,
|
| - MULTI_ARG_2_DF_TF,
|
| - MULTI_ARG_1_SF,
|
| - MULTI_ARG_1_DF,
|
| - MULTI_ARG_1_DI,
|
| - MULTI_ARG_1_SI,
|
| - MULTI_ARG_1_HI,
|
| - MULTI_ARG_1_QI,
|
| - MULTI_ARG_1_SI_DI,
|
| - MULTI_ARG_1_HI_DI,
|
| - MULTI_ARG_1_HI_SI,
|
| - MULTI_ARG_1_QI_DI,
|
| - MULTI_ARG_1_QI_SI,
|
| - MULTI_ARG_1_QI_HI,
|
| - MULTI_ARG_1_PH2PS,
|
| - MULTI_ARG_1_PS2PH
|
| + { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
|
| };
|
|
|
| +/* FMA4 and XOP. */
|
| +#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
|
| +#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
|
| +#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
|
| +#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
|
| +#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
|
| +#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
|
| +#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
|
| +#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
|
| +#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
|
| +#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
|
| +#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
|
| +#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
|
| +#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
|
| +#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
|
| +#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
|
| +#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
|
| +#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
|
| +#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
|
| +#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
|
| +#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
|
| +#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
|
| +#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
|
| +#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
|
| +#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
|
| +#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
|
| +#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
|
| +#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
|
| +#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
|
| +#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
|
| +#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
|
| +#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
|
| +#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
|
| +#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
|
| +#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
|
| +#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
|
| +#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
|
| +#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
|
| +#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
|
| +#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
|
| +#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
|
| +#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
|
| +#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
|
| +#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
|
| +#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
|
| +#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
|
| +#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
|
| +#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
|
| +#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
|
| +#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
|
| +#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
|
| +#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
|
| +#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
|
| +
|
| static const struct builtin_description bdesc_multi_arg[] =
|
| {
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
|
| -
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
|
| - { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| +
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| +
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| +
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| +
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| +
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| + { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
|
| +
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
|
| + { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
|
| +
|
| };
|
|
|
| /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
|
| @@ -21929,870 +22518,19 @@ static void
|
| ix86_init_mmx_sse_builtins (void)
|
| {
|
| const struct builtin_description * d;
|
| + enum ix86_builtin_func_type ftype;
|
| size_t i;
|
|
|
| - tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
|
| - tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
|
| - tree V1DI_type_node
|
| - = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
|
| - tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
|
| - tree V2DI_type_node
|
| - = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
|
| - tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
|
| - tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
|
| - tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
|
| - tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
|
| - tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
|
| - tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
|
| -
|
| - tree pchar_type_node = build_pointer_type (char_type_node);
|
| - tree pcchar_type_node
|
| - = build_pointer_type (build_type_variant (char_type_node, 1, 0));
|
| - tree pfloat_type_node = build_pointer_type (float_type_node);
|
| - tree pcfloat_type_node
|
| - = build_pointer_type (build_type_variant (float_type_node, 1, 0));
|
| - tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
|
| - tree pcv2sf_type_node
|
| - = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
|
| - tree pv2di_type_node = build_pointer_type (V2DI_type_node);
|
| - tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
|
| -
|
| - /* Comparisons. */
|
| - tree int_ftype_v4sf_v4sf
|
| - = build_function_type_list (integer_type_node,
|
| - V4SF_type_node, V4SF_type_node, NULL_TREE);
|
| - tree v4si_ftype_v4sf_v4sf
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SF_type_node, V4SF_type_node, NULL_TREE);
|
| - /* MMX/SSE/integer conversions. */
|
| - tree int_ftype_v4sf
|
| - = build_function_type_list (integer_type_node,
|
| - V4SF_type_node, NULL_TREE);
|
| - tree int64_ftype_v4sf
|
| - = build_function_type_list (long_long_integer_type_node,
|
| - V4SF_type_node, NULL_TREE);
|
| - tree int_ftype_v8qi
|
| - = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4sf_int
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, integer_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4sf_int64
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, long_long_integer_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v4sf_v2si
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V2SI_type_node, NULL_TREE);
|
| -
|
| - /* Miscellaneous. */
|
| - tree v8qi_ftype_v4hi_v4hi
|
| - = build_function_type_list (V8QI_type_node,
|
| - V4HI_type_node, V4HI_type_node, NULL_TREE);
|
| - tree v4hi_ftype_v2si_v2si
|
| - = build_function_type_list (V4HI_type_node,
|
| - V2SI_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4sf_v4sf_int
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V4SF_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - tree v2si_ftype_v4hi_v4hi
|
| - = build_function_type_list (V2SI_type_node,
|
| - V4HI_type_node, V4HI_type_node, NULL_TREE);
|
| - tree v4hi_ftype_v4hi_int
|
| - = build_function_type_list (V4HI_type_node,
|
| - V4HI_type_node, integer_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2si_int
|
| - = build_function_type_list (V2SI_type_node,
|
| - V2SI_type_node, integer_type_node, NULL_TREE);
|
| - tree v1di_ftype_v1di_int
|
| - = build_function_type_list (V1DI_type_node,
|
| - V1DI_type_node, integer_type_node, NULL_TREE);
|
| -
|
| - tree void_ftype_void
|
| - = build_function_type (void_type_node, void_list_node);
|
| - tree void_ftype_unsigned
|
| - = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
|
| - tree void_ftype_unsigned_unsigned
|
| - = build_function_type_list (void_type_node, unsigned_type_node,
|
| - unsigned_type_node, NULL_TREE);
|
| - tree void_ftype_pcvoid_unsigned_unsigned
|
| - = build_function_type_list (void_type_node, const_ptr_type_node,
|
| - unsigned_type_node, unsigned_type_node,
|
| - NULL_TREE);
|
| - tree unsigned_ftype_void
|
| - = build_function_type (unsigned_type_node, void_list_node);
|
| - tree v2si_ftype_v4sf
|
| - = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
|
| - /* Loads/stores. */
|
| - tree void_ftype_v8qi_v8qi_pchar
|
| - = build_function_type_list (void_type_node,
|
| - V8QI_type_node, V8QI_type_node,
|
| - pchar_type_node, NULL_TREE);
|
| - tree v4sf_ftype_pcfloat
|
| - = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4sf_pcv2sf
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, pcv2sf_type_node, NULL_TREE);
|
| - tree void_ftype_pv2sf_v4sf
|
| - = build_function_type_list (void_type_node,
|
| - pv2sf_type_node, V4SF_type_node, NULL_TREE);
|
| - tree void_ftype_pfloat_v4sf
|
| - = build_function_type_list (void_type_node,
|
| - pfloat_type_node, V4SF_type_node, NULL_TREE);
|
| - tree void_ftype_pdi_di
|
| - = build_function_type_list (void_type_node,
|
| - pdi_type_node, long_long_unsigned_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pv2di_v2di
|
| - = build_function_type_list (void_type_node,
|
| - pv2di_type_node, V2DI_type_node, NULL_TREE);
|
| - /* Normal vector unops. */
|
| - tree v4sf_ftype_v4sf
|
| - = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
|
| - tree v16qi_ftype_v16qi
|
| - = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
|
| - tree v8hi_ftype_v8hi
|
| - = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
|
| - tree v4si_ftype_v4si
|
| - = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
|
| - tree v8qi_ftype_v8qi
|
| - = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
|
| - tree v4hi_ftype_v4hi
|
| - = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
|
| -
|
| - /* Normal vector binops. */
|
| - tree v4sf_ftype_v4sf_v4sf
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V4SF_type_node, NULL_TREE);
|
| - tree v8qi_ftype_v8qi_v8qi
|
| - = build_function_type_list (V8QI_type_node,
|
| - V8QI_type_node, V8QI_type_node, NULL_TREE);
|
| - tree v4hi_ftype_v4hi_v4hi
|
| - = build_function_type_list (V4HI_type_node,
|
| - V4HI_type_node, V4HI_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2si_v2si
|
| - = build_function_type_list (V2SI_type_node,
|
| - V2SI_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v1di_ftype_v1di_v1di
|
| - = build_function_type_list (V1DI_type_node,
|
| - V1DI_type_node, V1DI_type_node, NULL_TREE);
|
| - tree v1di_ftype_v1di_v1di_int
|
| - = build_function_type_list (V1DI_type_node,
|
| - V1DI_type_node, V1DI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2sf
|
| - = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
|
| - tree v2sf_ftype_v2si
|
| - = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2si
|
| - = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v2sf_ftype_v2sf
|
| - = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
|
| - tree v2sf_ftype_v2sf_v2sf
|
| - = build_function_type_list (V2SF_type_node,
|
| - V2SF_type_node, V2SF_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2sf_v2sf
|
| - = build_function_type_list (V2SI_type_node,
|
| - V2SF_type_node, V2SF_type_node, NULL_TREE);
|
| - tree pint_type_node = build_pointer_type (integer_type_node);
|
| - tree pdouble_type_node = build_pointer_type (double_type_node);
|
| - tree pcdouble_type_node = build_pointer_type (
|
| - build_type_variant (double_type_node, 1, 0));
|
| - tree int_ftype_v2df_v2df
|
| - = build_function_type_list (integer_type_node,
|
| - V2DF_type_node, V2DF_type_node, NULL_TREE);
|
| -
|
| - tree void_ftype_pcvoid
|
| - = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4si
|
| - = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
|
| - tree v4si_ftype_v4sf
|
| - = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v4si
|
| - = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
|
| - tree v4si_ftype_v2df
|
| - = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v4si_ftype_v2df_v2df
|
| - = build_function_type_list (V4SI_type_node,
|
| - V2DF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v2si_ftype_v2df
|
| - = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v2df
|
| - = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2si
|
| - = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v2df_ftype_v4sf
|
| - = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
|
| - tree int_ftype_v2df
|
| - = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
|
| - tree int64_ftype_v2df
|
| - = build_function_type_list (long_long_integer_type_node,
|
| - V2DF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_int
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, integer_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_int64
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, long_long_integer_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v4sf_v2df
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_v4sf
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, V4SF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_v2df_int
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, V2DF_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v2df_ftype_v2df_pcdouble
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, pcdouble_type_node, NULL_TREE);
|
| - tree void_ftype_pdouble_v2df
|
| - = build_function_type_list (void_type_node,
|
| - pdouble_type_node, V2DF_type_node, NULL_TREE);
|
| - tree void_ftype_pint_int
|
| - = build_function_type_list (void_type_node,
|
| - pint_type_node, integer_type_node, NULL_TREE);
|
| - tree void_ftype_v16qi_v16qi_pchar
|
| - = build_function_type_list (void_type_node,
|
| - V16QI_type_node, V16QI_type_node,
|
| - pchar_type_node, NULL_TREE);
|
| - tree v2df_ftype_pcdouble
|
| - = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_v2df
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v16qi_ftype_v16qi_v16qi
|
| - = build_function_type_list (V16QI_type_node,
|
| - V16QI_type_node, V16QI_type_node, NULL_TREE);
|
| - tree v8hi_ftype_v8hi_v8hi
|
| - = build_function_type_list (V8HI_type_node,
|
| - V8HI_type_node, V8HI_type_node, NULL_TREE);
|
| - tree v4si_ftype_v4si_v4si
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SI_type_node, V4SI_type_node, NULL_TREE);
|
| - tree v2di_ftype_v2di_v2di
|
| - = build_function_type_list (V2DI_type_node,
|
| - V2DI_type_node, V2DI_type_node, NULL_TREE);
|
| - tree v2di_ftype_v2df_v2df
|
| - = build_function_type_list (V2DI_type_node,
|
| - V2DF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df
|
| - = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
|
| - tree v2di_ftype_v2di_int
|
| - = build_function_type_list (V2DI_type_node,
|
| - V2DI_type_node, integer_type_node, NULL_TREE);
|
| - tree v2di_ftype_v2di_v2di_int
|
| - = build_function_type_list (V2DI_type_node, V2DI_type_node,
|
| - V2DI_type_node, integer_type_node, NULL_TREE);
|
| - tree v4si_ftype_v4si_int
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SI_type_node, integer_type_node, NULL_TREE);
|
| - tree v8hi_ftype_v8hi_int
|
| - = build_function_type_list (V8HI_type_node,
|
| - V8HI_type_node, integer_type_node, NULL_TREE);
|
| - tree v4si_ftype_v8hi_v8hi
|
| - = build_function_type_list (V4SI_type_node,
|
| - V8HI_type_node, V8HI_type_node, NULL_TREE);
|
| - tree v1di_ftype_v8qi_v8qi
|
| - = build_function_type_list (V1DI_type_node,
|
| - V8QI_type_node, V8QI_type_node, NULL_TREE);
|
| - tree v1di_ftype_v2si_v2si
|
| - = build_function_type_list (V1DI_type_node,
|
| - V2SI_type_node, V2SI_type_node, NULL_TREE);
|
| - tree v2di_ftype_v16qi_v16qi
|
| - = build_function_type_list (V2DI_type_node,
|
| - V16QI_type_node, V16QI_type_node, NULL_TREE);
|
| - tree v2di_ftype_v4si_v4si
|
| - = build_function_type_list (V2DI_type_node,
|
| - V4SI_type_node, V4SI_type_node, NULL_TREE);
|
| - tree int_ftype_v16qi
|
| - = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
|
| - tree v16qi_ftype_pcchar
|
| - = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
|
| - tree void_ftype_pchar_v16qi
|
| - = build_function_type_list (void_type_node,
|
| - pchar_type_node, V16QI_type_node, NULL_TREE);
|
| -
|
| - tree v2di_ftype_v2di_unsigned_unsigned
|
| - = build_function_type_list (V2DI_type_node, V2DI_type_node,
|
| - unsigned_type_node, unsigned_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_v2di_v2di_unsigned_unsigned
|
| - = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
|
| - unsigned_type_node, unsigned_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_v2di_v16qi
|
| - = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v2df_ftype_v2df_v2df_v2df
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, V2DF_type_node,
|
| - V2DF_type_node, NULL_TREE);
|
| - tree v4sf_ftype_v4sf_v4sf_v4sf
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V4SF_type_node,
|
| - V4SF_type_node, NULL_TREE);
|
| - tree v8hi_ftype_v16qi
|
| - = build_function_type_list (V8HI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v16qi
|
| - = build_function_type_list (V4SI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_v16qi
|
| - = build_function_type_list (V2DI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v8hi
|
| - = build_function_type_list (V4SI_type_node, V8HI_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_v8hi
|
| - = build_function_type_list (V2DI_type_node, V8HI_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_v4si
|
| - = build_function_type_list (V2DI_type_node, V4SI_type_node,
|
| - NULL_TREE);
|
| - tree v2di_ftype_pv2di
|
| - = build_function_type_list (V2DI_type_node, pv2di_type_node,
|
| - NULL_TREE);
|
| - tree v16qi_ftype_v16qi_v16qi_int
|
| - = build_function_type_list (V16QI_type_node, V16QI_type_node,
|
| - V16QI_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v16qi_ftype_v16qi_v16qi_v16qi
|
| - = build_function_type_list (V16QI_type_node, V16QI_type_node,
|
| - V16QI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v8hi_ftype_v8hi_v8hi_int
|
| - = build_function_type_list (V8HI_type_node, V8HI_type_node,
|
| - V8HI_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v4si_v4si_int
|
| - = build_function_type_list (V4SI_type_node, V4SI_type_node,
|
| - V4SI_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v2di_v2di
|
| - = build_function_type_list (integer_type_node,
|
| - V2DI_type_node, V2DI_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v16qi_int_v16qi_int_int
|
| - = build_function_type_list (integer_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v16qi_ftype_v16qi_int_v16qi_int_int
|
| - = build_function_type_list (V16QI_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v16qi_v16qi_int
|
| - = build_function_type_list (integer_type_node,
|
| - V16QI_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| -
|
| - /* SSE5 instructions */
|
| - tree v2di_ftype_v2di_v2di_v2di
|
| - = build_function_type_list (V2DI_type_node,
|
| - V2DI_type_node,
|
| - V2DI_type_node,
|
| - V2DI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v4si_ftype_v4si_v4si_v4si
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SI_type_node,
|
| - V4SI_type_node,
|
| - V4SI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v4si_ftype_v4si_v4si_v2di
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SI_type_node,
|
| - V4SI_type_node,
|
| - V2DI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v8hi_ftype_v8hi_v8hi_v8hi
|
| - = build_function_type_list (V8HI_type_node,
|
| - V8HI_type_node,
|
| - V8HI_type_node,
|
| - V8HI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v8hi_ftype_v8hi_v8hi_v4si
|
| - = build_function_type_list (V8HI_type_node,
|
| - V8HI_type_node,
|
| - V8HI_type_node,
|
| - V4SI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v2df_ftype_v2df_v2df_v16qi
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node,
|
| - V2DF_type_node,
|
| - V16QI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v4sf_ftype_v4sf_v4sf_v16qi
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node,
|
| - V4SF_type_node,
|
| - V16QI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v2di_ftype_v2di_si
|
| - = build_function_type_list (V2DI_type_node,
|
| - V2DI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v4si_ftype_v4si_si
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4SI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v8hi_ftype_v8hi_si
|
| - = build_function_type_list (V8HI_type_node,
|
| - V8HI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v16qi_ftype_v16qi_si
|
| - = build_function_type_list (V16QI_type_node,
|
| - V16QI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v4hi
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4HI_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v4hi_ftype_v4sf
|
| - = build_function_type_list (V4HI_type_node,
|
| - V4SF_type_node,
|
| - NULL_TREE);
|
| -
|
| - tree v2di_ftype_v2di
|
| - = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
|
| -
|
| - tree v16qi_ftype_v8hi_v8hi
|
| - = build_function_type_list (V16QI_type_node,
|
| - V8HI_type_node, V8HI_type_node,
|
| - NULL_TREE);
|
| - tree v8hi_ftype_v4si_v4si
|
| - = build_function_type_list (V8HI_type_node,
|
| - V4SI_type_node, V4SI_type_node,
|
| - NULL_TREE);
|
| - tree v8hi_ftype_v16qi_v16qi
|
| - = build_function_type_list (V8HI_type_node,
|
| - V16QI_type_node, V16QI_type_node,
|
| - NULL_TREE);
|
| - tree v4hi_ftype_v8qi_v8qi
|
| - = build_function_type_list (V4HI_type_node,
|
| - V8QI_type_node, V8QI_type_node,
|
| - NULL_TREE);
|
| - tree unsigned_ftype_unsigned_uchar
|
| - = build_function_type_list (unsigned_type_node,
|
| - unsigned_type_node,
|
| - unsigned_char_type_node,
|
| - NULL_TREE);
|
| - tree unsigned_ftype_unsigned_ushort
|
| - = build_function_type_list (unsigned_type_node,
|
| - unsigned_type_node,
|
| - short_unsigned_type_node,
|
| - NULL_TREE);
|
| - tree unsigned_ftype_unsigned_unsigned
|
| - = build_function_type_list (unsigned_type_node,
|
| - unsigned_type_node,
|
| - unsigned_type_node,
|
| - NULL_TREE);
|
| - tree uint64_ftype_uint64_uint64
|
| - = build_function_type_list (long_long_unsigned_type_node,
|
| - long_long_unsigned_type_node,
|
| - long_long_unsigned_type_node,
|
| - NULL_TREE);
|
| - tree float_ftype_float
|
| - = build_function_type_list (float_type_node,
|
| - float_type_node,
|
| - NULL_TREE);
|
| -
|
| - /* AVX builtins */
|
| - tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
|
| - V32QImode);
|
| - tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
|
| - V8SImode);
|
| - tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
|
| - V8SFmode);
|
| - tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
|
| - V4DImode);
|
| - tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
|
| - V4DFmode);
|
| - tree v8sf_ftype_v8sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v8si_ftype_v8sf
|
| - = build_function_type_list (V8SI_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8si
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SI_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v4df
|
| - = build_function_type_list (V4SI_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4si
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4SI_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4sf
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4SF_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v4df
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_v8sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_v4df
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_int
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v8si_int
|
| - = build_function_type_list (V4SI_type_node,
|
| - V8SI_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_int
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v8sf_int
|
| - = build_function_type_list (V4SF_type_node,
|
| - V8SF_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v2df_ftype_v4df_int
|
| - = build_function_type_list (V2DF_type_node,
|
| - V4DF_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_v8sf_int
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, V8SF_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_v8sf_v8sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, V8SF_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_v4df_v4df
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, V4DF_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v8si_ftype_v8si_v8si_int
|
| - = build_function_type_list (V8SI_type_node,
|
| - V8SI_type_node, V8SI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_v4df_int
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, V4DF_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_pcfloat
|
| - = build_function_type_list (V8SF_type_node,
|
| - pcfloat_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_pcdouble
|
| - = build_function_type_list (V4DF_type_node,
|
| - pcdouble_type_node,
|
| - NULL_TREE);
|
| - tree pcv4sf_type_node
|
| - = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
|
| - tree pcv2df_type_node
|
| - = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
|
| - tree v8sf_ftype_pcv4sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - pcv4sf_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_pcv2df
|
| - = build_function_type_list (V4DF_type_node,
|
| - pcv2df_type_node,
|
| - NULL_TREE);
|
| - tree v32qi_ftype_pcchar
|
| - = build_function_type_list (V32QI_type_node,
|
| - pcchar_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pchar_v32qi
|
| - = build_function_type_list (void_type_node,
|
| - pchar_type_node, V32QI_type_node,
|
| - NULL_TREE);
|
| - tree v8si_ftype_v8si_v4si_int
|
| - = build_function_type_list (V8SI_type_node,
|
| - V8SI_type_node, V4SI_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree pv4di_type_node = build_pointer_type (V4DI_type_node);
|
| - tree void_ftype_pv4di_v4di
|
| - = build_function_type_list (void_type_node,
|
| - pv4di_type_node, V4DI_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_v4sf_int
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, V4SF_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_v2df_int
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, V2DF_type_node,
|
| - integer_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pfloat_v8sf
|
| - = build_function_type_list (void_type_node,
|
| - pfloat_type_node, V8SF_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pdouble_v4df
|
| - = build_function_type_list (void_type_node,
|
| - pdouble_type_node, V4DF_type_node,
|
| - NULL_TREE);
|
| - tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
|
| - tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
|
| - tree pv4df_type_node = build_pointer_type (V4DF_type_node);
|
| - tree pv2df_type_node = build_pointer_type (V2DF_type_node);
|
| - tree pcv8sf_type_node
|
| - = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
|
| - tree pcv4df_type_node
|
| - = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
|
| - tree v8sf_ftype_pcv8sf_v8sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - pcv8sf_type_node, V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_pcv4df_v4df
|
| - = build_function_type_list (V4DF_type_node,
|
| - pcv4df_type_node, V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_pcv4sf_v4sf
|
| - = build_function_type_list (V4SF_type_node,
|
| - pcv4sf_type_node, V4SF_type_node,
|
| - NULL_TREE);
|
| - tree v2df_ftype_pcv2df_v2df
|
| - = build_function_type_list (V2DF_type_node,
|
| - pcv2df_type_node, V2DF_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pv8sf_v8sf_v8sf
|
| - = build_function_type_list (void_type_node,
|
| - pv8sf_type_node, V8SF_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pv4df_v4df_v4df
|
| - = build_function_type_list (void_type_node,
|
| - pv4df_type_node, V4DF_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pv4sf_v4sf_v4sf
|
| - = build_function_type_list (void_type_node,
|
| - pv4sf_type_node, V4SF_type_node,
|
| - V4SF_type_node,
|
| - NULL_TREE);
|
| - tree void_ftype_pv2df_v2df_v2df
|
| - = build_function_type_list (void_type_node,
|
| - pv2df_type_node, V2DF_type_node,
|
| - V2DF_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v2df
|
| - = build_function_type_list (V4DF_type_node,
|
| - V2DF_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v4sf
|
| - = build_function_type_list (V8SF_type_node,
|
| - V4SF_type_node,
|
| - NULL_TREE);
|
| - tree v8si_ftype_v4si
|
| - = build_function_type_list (V8SI_type_node,
|
| - V4SI_type_node,
|
| - NULL_TREE);
|
| - tree v2df_ftype_v4df
|
| - = build_function_type_list (V2DF_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v8sf
|
| - = build_function_type_list (V4SF_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree v4si_ftype_v8si
|
| - = build_function_type_list (V4SI_type_node,
|
| - V8SI_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v4df
|
| - = build_function_type_list (integer_type_node,
|
| - V4DF_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v8sf
|
| - = build_function_type_list (integer_type_node,
|
| - V8SF_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v8sf_v8sf
|
| - = build_function_type_list (integer_type_node,
|
| - V8SF_type_node, V8SF_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v4di_v4di
|
| - = build_function_type_list (integer_type_node,
|
| - V4DI_type_node, V4DI_type_node,
|
| - NULL_TREE);
|
| - tree int_ftype_v4df_v4df
|
| - = build_function_type_list (integer_type_node,
|
| - V4DF_type_node, V4DF_type_node,
|
| - NULL_TREE);
|
| - tree v8sf_ftype_v8sf_v8si
|
| - = build_function_type_list (V8SF_type_node,
|
| - V8SF_type_node, V8SI_type_node,
|
| - NULL_TREE);
|
| - tree v4df_ftype_v4df_v4di
|
| - = build_function_type_list (V4DF_type_node,
|
| - V4DF_type_node, V4DI_type_node,
|
| - NULL_TREE);
|
| - tree v4sf_ftype_v4sf_v4si
|
| - = build_function_type_list (V4SF_type_node,
|
| - V4SF_type_node, V4SI_type_node, NULL_TREE);
|
| - tree v2df_ftype_v2df_v2di
|
| - = build_function_type_list (V2DF_type_node,
|
| - V2DF_type_node, V2DI_type_node, NULL_TREE);
|
| -
|
| - tree ftype;
|
| -
|
| /* Add all special builtins with variable number of operands. */
|
| for (i = 0, d = bdesc_special_args;
|
| i < ARRAY_SIZE (bdesc_special_args);
|
| i++, d++)
|
| {
|
| - tree type;
|
| -
|
| if (d->name == 0)
|
| continue;
|
|
|
| - switch ((enum ix86_special_builtin_type) d->flag)
|
| - {
|
| - case VOID_FTYPE_VOID:
|
| - type = void_ftype_void;
|
| - break;
|
| - case V32QI_FTYPE_PCCHAR:
|
| - type = v32qi_ftype_pcchar;
|
| - break;
|
| - case V16QI_FTYPE_PCCHAR:
|
| - type = v16qi_ftype_pcchar;
|
| - break;
|
| - case V8SF_FTYPE_PCV4SF:
|
| - type = v8sf_ftype_pcv4sf;
|
| - break;
|
| - case V8SF_FTYPE_PCFLOAT:
|
| - type = v8sf_ftype_pcfloat;
|
| - break;
|
| - case V4DF_FTYPE_PCV2DF:
|
| - type = v4df_ftype_pcv2df;
|
| - break;
|
| - case V4DF_FTYPE_PCDOUBLE:
|
| - type = v4df_ftype_pcdouble;
|
| - break;
|
| - case V4SF_FTYPE_PCFLOAT:
|
| - type = v4sf_ftype_pcfloat;
|
| - break;
|
| - case V2DI_FTYPE_PV2DI:
|
| - type = v2di_ftype_pv2di;
|
| - break;
|
| - case V2DF_FTYPE_PCDOUBLE:
|
| - type = v2df_ftype_pcdouble;
|
| - break;
|
| - case V8SF_FTYPE_PCV8SF_V8SF:
|
| - type = v8sf_ftype_pcv8sf_v8sf;
|
| - break;
|
| - case V4DF_FTYPE_PCV4DF_V4DF:
|
| - type = v4df_ftype_pcv4df_v4df;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_PCV2SF:
|
| - type = v4sf_ftype_v4sf_pcv2sf;
|
| - break;
|
| - case V4SF_FTYPE_PCV4SF_V4SF:
|
| - type = v4sf_ftype_pcv4sf_v4sf;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_PCDOUBLE:
|
| - type = v2df_ftype_v2df_pcdouble;
|
| - break;
|
| - case V2DF_FTYPE_PCV2DF_V2DF:
|
| - type = v2df_ftype_pcv2df_v2df;
|
| - break;
|
| - case VOID_FTYPE_PV2SF_V4SF:
|
| - type = void_ftype_pv2sf_v4sf;
|
| - break;
|
| - case VOID_FTYPE_PV4DI_V4DI:
|
| - type = void_ftype_pv4di_v4di;
|
| - break;
|
| - case VOID_FTYPE_PV2DI_V2DI:
|
| - type = void_ftype_pv2di_v2di;
|
| - break;
|
| - case VOID_FTYPE_PCHAR_V32QI:
|
| - type = void_ftype_pchar_v32qi;
|
| - break;
|
| - case VOID_FTYPE_PCHAR_V16QI:
|
| - type = void_ftype_pchar_v16qi;
|
| - break;
|
| - case VOID_FTYPE_PFLOAT_V8SF:
|
| - type = void_ftype_pfloat_v8sf;
|
| - break;
|
| - case VOID_FTYPE_PFLOAT_V4SF:
|
| - type = void_ftype_pfloat_v4sf;
|
| - break;
|
| - case VOID_FTYPE_PDOUBLE_V4DF:
|
| - type = void_ftype_pdouble_v4df;
|
| - break;
|
| - case VOID_FTYPE_PDOUBLE_V2DF:
|
| - type = void_ftype_pdouble_v2df;
|
| - break;
|
| - case VOID_FTYPE_PDI_DI:
|
| - type = void_ftype_pdi_di;
|
| - break;
|
| - case VOID_FTYPE_PINT_INT:
|
| - type = void_ftype_pint_int;
|
| - break;
|
| - case VOID_FTYPE_PV8SF_V8SF_V8SF:
|
| - type = void_ftype_pv8sf_v8sf_v8sf;
|
| - break;
|
| - case VOID_FTYPE_PV4DF_V4DF_V4DF:
|
| - type = void_ftype_pv4df_v4df_v4df;
|
| - break;
|
| - case VOID_FTYPE_PV4SF_V4SF_V4SF:
|
| - type = void_ftype_pv4sf_v4sf_v4sf;
|
| - break;
|
| - case VOID_FTYPE_PV2DF_V2DF_V2DF:
|
| - type = void_ftype_pv2df_v2df_v2df;
|
| - break;
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -
|
| - def_builtin (d->mask, d->name, type, d->code);
|
| + ftype = (enum ix86_builtin_func_type) d->flag;
|
| + def_builtin (d->mask, d->name, ftype, d->code);
|
| }
|
|
|
| /* Add all builtins with variable number of operands. */
|
| @@ -22800,444 +22538,11 @@ ix86_init_mmx_sse_builtins (void)
|
| i < ARRAY_SIZE (bdesc_args);
|
| i++, d++)
|
| {
|
| - tree type;
|
| -
|
| if (d->name == 0)
|
| continue;
|
|
|
| - switch ((enum ix86_builtin_type) d->flag)
|
| - {
|
| - case FLOAT_FTYPE_FLOAT:
|
| - type = float_ftype_float;
|
| - break;
|
| - case INT_FTYPE_V8SF_V8SF_PTEST:
|
| - type = int_ftype_v8sf_v8sf;
|
| - break;
|
| - case INT_FTYPE_V4DI_V4DI_PTEST:
|
| - type = int_ftype_v4di_v4di;
|
| - break;
|
| - case INT_FTYPE_V4DF_V4DF_PTEST:
|
| - type = int_ftype_v4df_v4df;
|
| - break;
|
| - case INT_FTYPE_V4SF_V4SF_PTEST:
|
| - type = int_ftype_v4sf_v4sf;
|
| - break;
|
| - case INT_FTYPE_V2DI_V2DI_PTEST:
|
| - type = int_ftype_v2di_v2di;
|
| - break;
|
| - case INT_FTYPE_V2DF_V2DF_PTEST:
|
| - type = int_ftype_v2df_v2df;
|
| - break;
|
| - case INT64_FTYPE_V4SF:
|
| - type = int64_ftype_v4sf;
|
| - break;
|
| - case INT64_FTYPE_V2DF:
|
| - type = int64_ftype_v2df;
|
| - break;
|
| - case INT_FTYPE_V16QI:
|
| - type = int_ftype_v16qi;
|
| - break;
|
| - case INT_FTYPE_V8QI:
|
| - type = int_ftype_v8qi;
|
| - break;
|
| - case INT_FTYPE_V8SF:
|
| - type = int_ftype_v8sf;
|
| - break;
|
| - case INT_FTYPE_V4DF:
|
| - type = int_ftype_v4df;
|
| - break;
|
| - case INT_FTYPE_V4SF:
|
| - type = int_ftype_v4sf;
|
| - break;
|
| - case INT_FTYPE_V2DF:
|
| - type = int_ftype_v2df;
|
| - break;
|
| - case V16QI_FTYPE_V16QI:
|
| - type = v16qi_ftype_v16qi;
|
| - break;
|
| - case V8SI_FTYPE_V8SF:
|
| - type = v8si_ftype_v8sf;
|
| - break;
|
| - case V8SI_FTYPE_V4SI:
|
| - type = v8si_ftype_v4si;
|
| - break;
|
| - case V8HI_FTYPE_V8HI:
|
| - type = v8hi_ftype_v8hi;
|
| - break;
|
| - case V8HI_FTYPE_V16QI:
|
| - type = v8hi_ftype_v16qi;
|
| - break;
|
| - case V8QI_FTYPE_V8QI:
|
| - type = v8qi_ftype_v8qi;
|
| - break;
|
| - case V8SF_FTYPE_V8SF:
|
| - type = v8sf_ftype_v8sf;
|
| - break;
|
| - case V8SF_FTYPE_V8SI:
|
| - type = v8sf_ftype_v8si;
|
| - break;
|
| - case V8SF_FTYPE_V4SF:
|
| - type = v8sf_ftype_v4sf;
|
| - break;
|
| - case V4SI_FTYPE_V4DF:
|
| - type = v4si_ftype_v4df;
|
| - break;
|
| - case V4SI_FTYPE_V4SI:
|
| - type = v4si_ftype_v4si;
|
| - break;
|
| - case V4SI_FTYPE_V16QI:
|
| - type = v4si_ftype_v16qi;
|
| - break;
|
| - case V4SI_FTYPE_V8SI:
|
| - type = v4si_ftype_v8si;
|
| - break;
|
| - case V4SI_FTYPE_V8HI:
|
| - type = v4si_ftype_v8hi;
|
| - break;
|
| - case V4SI_FTYPE_V4SF:
|
| - type = v4si_ftype_v4sf;
|
| - break;
|
| - case V4SI_FTYPE_V2DF:
|
| - type = v4si_ftype_v2df;
|
| - break;
|
| - case V4HI_FTYPE_V4HI:
|
| - type = v4hi_ftype_v4hi;
|
| - break;
|
| - case V4DF_FTYPE_V4DF:
|
| - type = v4df_ftype_v4df;
|
| - break;
|
| - case V4DF_FTYPE_V4SI:
|
| - type = v4df_ftype_v4si;
|
| - break;
|
| - case V4DF_FTYPE_V4SF:
|
| - type = v4df_ftype_v4sf;
|
| - break;
|
| - case V4DF_FTYPE_V2DF:
|
| - type = v4df_ftype_v2df;
|
| - break;
|
| - case V4SF_FTYPE_V4SF:
|
| - case V4SF_FTYPE_V4SF_VEC_MERGE:
|
| - type = v4sf_ftype_v4sf;
|
| - break;
|
| - case V4SF_FTYPE_V8SF:
|
| - type = v4sf_ftype_v8sf;
|
| - break;
|
| - case V4SF_FTYPE_V4SI:
|
| - type = v4sf_ftype_v4si;
|
| - break;
|
| - case V4SF_FTYPE_V4DF:
|
| - type = v4sf_ftype_v4df;
|
| - break;
|
| - case V4SF_FTYPE_V2DF:
|
| - type = v4sf_ftype_v2df;
|
| - break;
|
| - case V2DI_FTYPE_V2DI:
|
| - type = v2di_ftype_v2di;
|
| - break;
|
| - case V2DI_FTYPE_V16QI:
|
| - type = v2di_ftype_v16qi;
|
| - break;
|
| - case V2DI_FTYPE_V8HI:
|
| - type = v2di_ftype_v8hi;
|
| - break;
|
| - case V2DI_FTYPE_V4SI:
|
| - type = v2di_ftype_v4si;
|
| - break;
|
| - case V2SI_FTYPE_V2SI:
|
| - type = v2si_ftype_v2si;
|
| - break;
|
| - case V2SI_FTYPE_V4SF:
|
| - type = v2si_ftype_v4sf;
|
| - break;
|
| - case V2SI_FTYPE_V2DF:
|
| - type = v2si_ftype_v2df;
|
| - break;
|
| - case V2SI_FTYPE_V2SF:
|
| - type = v2si_ftype_v2sf;
|
| - break;
|
| - case V2DF_FTYPE_V4DF:
|
| - type = v2df_ftype_v4df;
|
| - break;
|
| - case V2DF_FTYPE_V4SF:
|
| - type = v2df_ftype_v4sf;
|
| - break;
|
| - case V2DF_FTYPE_V2DF:
|
| - case V2DF_FTYPE_V2DF_VEC_MERGE:
|
| - type = v2df_ftype_v2df;
|
| - break;
|
| - case V2DF_FTYPE_V2SI:
|
| - type = v2df_ftype_v2si;
|
| - break;
|
| - case V2DF_FTYPE_V4SI:
|
| - type = v2df_ftype_v4si;
|
| - break;
|
| - case V2SF_FTYPE_V2SF:
|
| - type = v2sf_ftype_v2sf;
|
| - break;
|
| - case V2SF_FTYPE_V2SI:
|
| - type = v2sf_ftype_v2si;
|
| - break;
|
| - case V16QI_FTYPE_V16QI_V16QI:
|
| - type = v16qi_ftype_v16qi_v16qi;
|
| - break;
|
| - case V16QI_FTYPE_V8HI_V8HI:
|
| - type = v16qi_ftype_v8hi_v8hi;
|
| - break;
|
| - case V8QI_FTYPE_V8QI_V8QI:
|
| - type = v8qi_ftype_v8qi_v8qi;
|
| - break;
|
| - case V8QI_FTYPE_V4HI_V4HI:
|
| - type = v8qi_ftype_v4hi_v4hi;
|
| - break;
|
| - case V8HI_FTYPE_V8HI_V8HI:
|
| - case V8HI_FTYPE_V8HI_V8HI_COUNT:
|
| - type = v8hi_ftype_v8hi_v8hi;
|
| - break;
|
| - case V8HI_FTYPE_V16QI_V16QI:
|
| - type = v8hi_ftype_v16qi_v16qi;
|
| - break;
|
| - case V8HI_FTYPE_V4SI_V4SI:
|
| - type = v8hi_ftype_v4si_v4si;
|
| - break;
|
| - case V8HI_FTYPE_V8HI_SI_COUNT:
|
| - type = v8hi_ftype_v8hi_int;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_V8SF:
|
| - type = v8sf_ftype_v8sf_v8sf;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_V8SI:
|
| - type = v8sf_ftype_v8sf_v8si;
|
| - break;
|
| - case V4SI_FTYPE_V4SI_V4SI:
|
| - case V4SI_FTYPE_V4SI_V4SI_COUNT:
|
| - type = v4si_ftype_v4si_v4si;
|
| - break;
|
| - case V4SI_FTYPE_V8HI_V8HI:
|
| - type = v4si_ftype_v8hi_v8hi;
|
| - break;
|
| - case V4SI_FTYPE_V4SF_V4SF:
|
| - type = v4si_ftype_v4sf_v4sf;
|
| - break;
|
| - case V4SI_FTYPE_V2DF_V2DF:
|
| - type = v4si_ftype_v2df_v2df;
|
| - break;
|
| - case V4SI_FTYPE_V4SI_SI_COUNT:
|
| - type = v4si_ftype_v4si_int;
|
| - break;
|
| - case V4HI_FTYPE_V4HI_V4HI:
|
| - case V4HI_FTYPE_V4HI_V4HI_COUNT:
|
| - type = v4hi_ftype_v4hi_v4hi;
|
| - break;
|
| - case V4HI_FTYPE_V8QI_V8QI:
|
| - type = v4hi_ftype_v8qi_v8qi;
|
| - break;
|
| - case V4HI_FTYPE_V2SI_V2SI:
|
| - type = v4hi_ftype_v2si_v2si;
|
| - break;
|
| - case V4HI_FTYPE_V4HI_SI_COUNT:
|
| - type = v4hi_ftype_v4hi_int;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_V4DF:
|
| - type = v4df_ftype_v4df_v4df;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_V4DI:
|
| - type = v4df_ftype_v4df_v4di;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V4SF:
|
| - case V4SF_FTYPE_V4SF_V4SF_SWAP:
|
| - type = v4sf_ftype_v4sf_v4sf;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V4SI:
|
| - type = v4sf_ftype_v4sf_v4si;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V2SI:
|
| - type = v4sf_ftype_v4sf_v2si;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V2DF:
|
| - type = v4sf_ftype_v4sf_v2df;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_DI:
|
| - type = v4sf_ftype_v4sf_int64;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_SI:
|
| - type = v4sf_ftype_v4sf_int;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_V2DI:
|
| - case V2DI_FTYPE_V2DI_V2DI_COUNT:
|
| - type = v2di_ftype_v2di_v2di;
|
| - break;
|
| - case V2DI_FTYPE_V16QI_V16QI:
|
| - type = v2di_ftype_v16qi_v16qi;
|
| - break;
|
| - case V2DI_FTYPE_V4SI_V4SI:
|
| - type = v2di_ftype_v4si_v4si;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_V16QI:
|
| - type = v2di_ftype_v2di_v16qi;
|
| - break;
|
| - case V2DI_FTYPE_V2DF_V2DF:
|
| - type = v2di_ftype_v2df_v2df;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_SI_COUNT:
|
| - type = v2di_ftype_v2di_int;
|
| - break;
|
| - case V2SI_FTYPE_V2SI_V2SI:
|
| - case V2SI_FTYPE_V2SI_V2SI_COUNT:
|
| - type = v2si_ftype_v2si_v2si;
|
| - break;
|
| - case V2SI_FTYPE_V4HI_V4HI:
|
| - type = v2si_ftype_v4hi_v4hi;
|
| - break;
|
| - case V2SI_FTYPE_V2SF_V2SF:
|
| - type = v2si_ftype_v2sf_v2sf;
|
| - break;
|
| - case V2SI_FTYPE_V2SI_SI_COUNT:
|
| - type = v2si_ftype_v2si_int;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_V2DF:
|
| - case V2DF_FTYPE_V2DF_V2DF_SWAP:
|
| - type = v2df_ftype_v2df_v2df;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_V4SF:
|
| - type = v2df_ftype_v2df_v4sf;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_V2DI:
|
| - type = v2df_ftype_v2df_v2di;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_DI:
|
| - type = v2df_ftype_v2df_int64;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_SI:
|
| - type = v2df_ftype_v2df_int;
|
| - break;
|
| - case V2SF_FTYPE_V2SF_V2SF:
|
| - type = v2sf_ftype_v2sf_v2sf;
|
| - break;
|
| - case V1DI_FTYPE_V1DI_V1DI:
|
| - case V1DI_FTYPE_V1DI_V1DI_COUNT:
|
| - type = v1di_ftype_v1di_v1di;
|
| - break;
|
| - case V1DI_FTYPE_V8QI_V8QI:
|
| - type = v1di_ftype_v8qi_v8qi;
|
| - break;
|
| - case V1DI_FTYPE_V2SI_V2SI:
|
| - type = v1di_ftype_v2si_v2si;
|
| - break;
|
| - case V1DI_FTYPE_V1DI_SI_COUNT:
|
| - type = v1di_ftype_v1di_int;
|
| - break;
|
| - case UINT64_FTYPE_UINT64_UINT64:
|
| - type = uint64_ftype_uint64_uint64;
|
| - break;
|
| - case UINT_FTYPE_UINT_UINT:
|
| - type = unsigned_ftype_unsigned_unsigned;
|
| - break;
|
| - case UINT_FTYPE_UINT_USHORT:
|
| - type = unsigned_ftype_unsigned_ushort;
|
| - break;
|
| - case UINT_FTYPE_UINT_UCHAR:
|
| - type = unsigned_ftype_unsigned_uchar;
|
| - break;
|
| - case V8HI_FTYPE_V8HI_INT:
|
| - type = v8hi_ftype_v8hi_int;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_INT:
|
| - type = v8sf_ftype_v8sf_int;
|
| - break;
|
| - case V4SI_FTYPE_V4SI_INT:
|
| - type = v4si_ftype_v4si_int;
|
| - break;
|
| - case V4SI_FTYPE_V8SI_INT:
|
| - type = v4si_ftype_v8si_int;
|
| - break;
|
| - case V4HI_FTYPE_V4HI_INT:
|
| - type = v4hi_ftype_v4hi_int;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_INT:
|
| - type = v4df_ftype_v4df_int;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_INT:
|
| - type = v4sf_ftype_v4sf_int;
|
| - break;
|
| - case V4SF_FTYPE_V8SF_INT:
|
| - type = v4sf_ftype_v8sf_int;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_INT:
|
| - case V2DI2TI_FTYPE_V2DI_INT:
|
| - type = v2di_ftype_v2di_int;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_INT:
|
| - type = v2df_ftype_v2df_int;
|
| - break;
|
| - case V2DF_FTYPE_V4DF_INT:
|
| - type = v2df_ftype_v4df_int;
|
| - break;
|
| - case V16QI_FTYPE_V16QI_V16QI_V16QI:
|
| - type = v16qi_ftype_v16qi_v16qi_v16qi;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_V8SF_V8SF:
|
| - type = v8sf_ftype_v8sf_v8sf_v8sf;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_V4DF_V4DF:
|
| - type = v4df_ftype_v4df_v4df_v4df;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V4SF_V4SF:
|
| - type = v4sf_ftype_v4sf_v4sf_v4sf;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_V2DF_V2DF:
|
| - type = v2df_ftype_v2df_v2df_v2df;
|
| - break;
|
| - case V16QI_FTYPE_V16QI_V16QI_INT:
|
| - type = v16qi_ftype_v16qi_v16qi_int;
|
| - break;
|
| - case V8SI_FTYPE_V8SI_V8SI_INT:
|
| - type = v8si_ftype_v8si_v8si_int;
|
| - break;
|
| - case V8SI_FTYPE_V8SI_V4SI_INT:
|
| - type = v8si_ftype_v8si_v4si_int;
|
| - break;
|
| - case V8HI_FTYPE_V8HI_V8HI_INT:
|
| - type = v8hi_ftype_v8hi_v8hi_int;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_V8SF_INT:
|
| - type = v8sf_ftype_v8sf_v8sf_int;
|
| - break;
|
| - case V8SF_FTYPE_V8SF_V4SF_INT:
|
| - type = v8sf_ftype_v8sf_v4sf_int;
|
| - break;
|
| - case V4SI_FTYPE_V4SI_V4SI_INT:
|
| - type = v4si_ftype_v4si_v4si_int;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_V4DF_INT:
|
| - type = v4df_ftype_v4df_v4df_int;
|
| - break;
|
| - case V4DF_FTYPE_V4DF_V2DF_INT:
|
| - type = v4df_ftype_v4df_v2df_int;
|
| - break;
|
| - case V4SF_FTYPE_V4SF_V4SF_INT:
|
| - type = v4sf_ftype_v4sf_v4sf_int;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_V2DI_INT:
|
| - case V2DI2TI_FTYPE_V2DI_V2DI_INT:
|
| - type = v2di_ftype_v2di_v2di_int;
|
| - break;
|
| - case V2DF_FTYPE_V2DF_V2DF_INT:
|
| - type = v2df_ftype_v2df_v2df_int;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_UINT_UINT:
|
| - type = v2di_ftype_v2di_unsigned_unsigned;
|
| - break;
|
| - case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
|
| - type = v2di_ftype_v2di_v2di_unsigned_unsigned;
|
| - break;
|
| - case V1DI2DI_FTYPE_V1DI_V1DI_INT:
|
| - type = v1di_ftype_v1di_v1di_int;
|
| - break;
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -
|
| - def_builtin_const (d->mask, d->name, type, d->code);
|
| + ftype = (enum ix86_builtin_func_type) d->flag;
|
| + def_builtin_const (d->mask, d->name, ftype, d->code);
|
| }
|
|
|
| /* pcmpestr[im] insns. */
|
| @@ -23246,9 +22551,9 @@ ix86_init_mmx_sse_builtins (void)
|
| i++, d++)
|
| {
|
| if (d->code == IX86_BUILTIN_PCMPESTRM128)
|
| - ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
|
| + ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
|
| else
|
| - ftype = int_ftype_v16qi_int_v16qi_int_int;
|
| + ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
|
| def_builtin_const (d->mask, d->name, ftype, d->code);
|
| }
|
|
|
| @@ -23258,197 +22563,129 @@ ix86_init_mmx_sse_builtins (void)
|
| i++, d++)
|
| {
|
| if (d->code == IX86_BUILTIN_PCMPISTRM128)
|
| - ftype = v16qi_ftype_v16qi_v16qi_int;
|
| + ftype = V16QI_FTYPE_V16QI_V16QI_INT;
|
| else
|
| - ftype = int_ftype_v16qi_v16qi_int;
|
| + ftype = INT_FTYPE_V16QI_V16QI_INT;
|
| def_builtin_const (d->mask, d->name, ftype, d->code);
|
| }
|
|
|
| /* comi/ucomi insns. */
|
| for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
|
| - if (d->mask == OPTION_MASK_ISA_SSE2)
|
| - def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
|
| - else
|
| - def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
|
| + {
|
| + if (d->mask == OPTION_MASK_ISA_SSE2)
|
| + ftype = INT_FTYPE_V2DF_V2DF;
|
| + else
|
| + ftype = INT_FTYPE_V4SF_V4SF;
|
| + def_builtin_const (d->mask, d->name, ftype, d->code);
|
| + }
|
|
|
| /* SSE */
|
| - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
|
| - def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
|
| + def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
|
| + VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
|
| + def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
|
| + UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
|
|
|
| /* SSE or 3DNow!A */
|
| - def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
|
| + def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
|
| + "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
|
| + IX86_BUILTIN_MASKMOVQ);
|
|
|
| /* SSE2 */
|
| - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
|
| + def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
|
| + VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
|
|
|
| - def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
|
| - x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
|
| + def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
|
| + VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
|
| + x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
|
| + VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
|
|
|
| /* SSE3. */
|
| - def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
|
| - def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
|
| + def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
|
| + VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
|
| + def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
|
| + VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
|
|
|
| /* AES */
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
|
| - def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
|
| + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
|
| + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
|
| + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
|
| + V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
|
| + V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
|
| + def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
|
| + V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
|
|
|
| /* PCLMUL */
|
| - def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
|
| + def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
|
| + V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
|
|
|
| - /* AVX */
|
| - def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
|
| - TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
|
| -
|
| - /* Access to the vec_init patterns. */
|
| - ftype = build_function_type_list (V2SI_type_node, integer_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
|
| -
|
| - ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
|
| - short_integer_type_node,
|
| - short_integer_type_node,
|
| - short_integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
|
| -
|
| - ftype = build_function_type_list (V8QI_type_node, char_type_node,
|
| - char_type_node, char_type_node,
|
| - char_type_node, char_type_node,
|
| - char_type_node, char_type_node,
|
| - char_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
|
| + /* MMX access to the vec_init patterns. */
|
| + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
|
| + V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
|
|
|
| - /* Access to the vec_extract patterns. */
|
| - ftype = build_function_type_list (double_type_node, V2DF_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
|
| + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
|
| + V4HI_FTYPE_HI_HI_HI_HI,
|
| + IX86_BUILTIN_VEC_INIT_V4HI);
|
|
|
| - ftype = build_function_type_list (long_long_integer_type_node,
|
| - V2DI_type_node, integer_type_node,
|
| - NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
|
| + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
|
| + V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
|
| + IX86_BUILTIN_VEC_INIT_V8QI);
|
|
|
| - ftype = build_function_type_list (float_type_node, V4SF_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
|
| + /* Access to the vec_extract patterns. */
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
|
| + DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
|
| + DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
|
| + FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
|
| + SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
|
| + HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
|
| +
|
| + def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
|
| + "__builtin_ia32_vec_ext_v4hi",
|
| + HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
|
| +
|
| + def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
|
| + SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
|
| +
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
|
| + QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
|
|
|
| - ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
|
| + /* Access to the vec_set patterns. */
|
| + def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
|
| + "__builtin_ia32_vec_set_v2di",
|
| + V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
|
|
|
| - ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
|
| + V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
|
|
|
| - ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
|
| + V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
|
|
|
| - ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
|
| + V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
|
|
|
| - ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
|
| + def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
|
| + "__builtin_ia32_vec_set_v4hi",
|
| + V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
|
|
|
| - /* Access to the vec_set patterns. */
|
| - ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
|
| - intDI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
|
| -
|
| - ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
|
| - float_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
|
| -
|
| - ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
|
| - intSI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
|
| -
|
| - ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
|
| - intHI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
|
| -
|
| - ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
|
| - intHI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
|
| -
|
| - ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
|
| - intQI_type_node,
|
| - integer_type_node, NULL_TREE);
|
| - def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
|
| -
|
| - /* Add SSE5 multi-arg argument instructions */
|
| + def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
|
| + V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
|
| +
|
| + /* Add FMA4 multi-arg argument instructions */
|
| for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
|
| {
|
| - tree mtype = NULL_TREE;
|
| -
|
| if (d->name == 0)
|
| continue;
|
|
|
| - switch ((enum multi_arg_type)d->flag)
|
| - {
|
| - case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
|
| - case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
|
| - case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
|
| - case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
|
| - case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
|
| - case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
|
| - case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
|
| - case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
|
| - case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
|
| - case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
|
| - case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
|
| - case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
|
| - case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
|
| - case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
|
| - case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
|
| - case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
|
| - case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
|
| - case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
|
| - case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
|
| - case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
|
| - case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
|
| - case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
|
| - case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
|
| - case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
|
| - case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
|
| - case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
|
| - case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
|
| - case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
|
| - case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
|
| - case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
|
| - case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
|
| - case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
|
| - case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
|
| - case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
|
| - case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
|
| - case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
|
| - case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
|
| - case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
|
| - case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
|
| - case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
|
| - case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
|
| - case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
|
| - case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
|
| - case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
|
| - case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
|
| - case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
|
| - case MULTI_ARG_UNKNOWN:
|
| - default:
|
| - gcc_unreachable ();
|
| - }
|
| -
|
| - if (mtype)
|
| - def_builtin_const (d->mask, d->name, mtype, d->code);
|
| + ftype = (enum ix86_builtin_func_type) d->flag;
|
| + def_builtin_const (d->mask, d->name, ftype, d->code);
|
| }
|
| }
|
|
|
| @@ -23502,65 +22739,76 @@ ix86_init_builtins_va_builtins_abi (void)
|
| }
|
|
|
| static void
|
| -ix86_init_builtins (void)
|
| +ix86_init_builtin_types (void)
|
| {
|
| - tree float128_type_node = make_node (REAL_TYPE);
|
| - tree ftype, decl;
|
| + tree float128_type_node, float80_type_node;
|
|
|
| /* The __float80 type. */
|
| - if (TYPE_MODE (long_double_type_node) == XFmode)
|
| - (*lang_hooks.types.register_builtin_type) (long_double_type_node,
|
| - "__float80");
|
| - else
|
| + float80_type_node = long_double_type_node;
|
| + if (TYPE_MODE (float80_type_node) != XFmode)
|
| {
|
| /* The __float80 type. */
|
| - tree float80_type_node = make_node (REAL_TYPE);
|
| + float80_type_node = make_node (REAL_TYPE);
|
|
|
| TYPE_PRECISION (float80_type_node) = 80;
|
| layout_type (float80_type_node);
|
| - (*lang_hooks.types.register_builtin_type) (float80_type_node,
|
| - "__float80");
|
| }
|
| + (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
|
|
|
| /* The __float128 type. */
|
| + float128_type_node = make_node (REAL_TYPE);
|
| TYPE_PRECISION (float128_type_node) = 128;
|
| layout_type (float128_type_node);
|
| - (*lang_hooks.types.register_builtin_type) (float128_type_node,
|
| - "__float128");
|
| + (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
|
| +
|
| + /* This macro is built by i386-builtin-types.awk. */
|
| + DEFINE_BUILTIN_PRIMITIVE_TYPES;
|
| +}
|
| +
|
| +static void
|
| +ix86_init_builtins (void)
|
| +{
|
| + tree t;
|
| +
|
| + ix86_init_builtin_types ();
|
|
|
| /* TFmode support builtins. */
|
| - ftype = build_function_type (float128_type_node, void_list_node);
|
| - decl = add_builtin_function ("__builtin_infq", ftype,
|
| - IX86_BUILTIN_INFQ, BUILT_IN_MD,
|
| - NULL, NULL_TREE);
|
| - ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
|
| + def_builtin_const (0, "__builtin_infq",
|
| + FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
|
| + def_builtin_const (0, "__builtin_huge_valq",
|
| + FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
|
|
|
| /* We will expand them to normal call if SSE2 isn't available since
|
| they are used by libgcc. */
|
| - ftype = build_function_type_list (float128_type_node,
|
| - float128_type_node,
|
| - NULL_TREE);
|
| - decl = add_builtin_function ("__builtin_fabsq", ftype,
|
| - IX86_BUILTIN_FABSQ, BUILT_IN_MD,
|
| - "__fabstf2", NULL_TREE);
|
| - ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
|
| - TREE_READONLY (decl) = 1;
|
| -
|
| - ftype = build_function_type_list (float128_type_node,
|
| - float128_type_node,
|
| - float128_type_node,
|
| - NULL_TREE);
|
| - decl = add_builtin_function ("__builtin_copysignq", ftype,
|
| - IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
|
| - "__copysigntf3", NULL_TREE);
|
| - ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
|
| - TREE_READONLY (decl) = 1;
|
| + t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
|
| + t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
|
| + BUILT_IN_MD, "__fabstf2", NULL_TREE);
|
| + TREE_READONLY (t) = 1;
|
| + ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
|
| +
|
| + t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
|
| + t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
|
| + BUILT_IN_MD, "__copysigntf3", NULL_TREE);
|
| + TREE_READONLY (t) = 1;
|
| + ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
|
|
|
| ix86_init_mmx_sse_builtins ();
|
| +
|
| if (TARGET_64BIT)
|
| ix86_init_builtins_va_builtins_abi ();
|
| }
|
|
|
| +/* Return the ix86 builtin for CODE. */
|
| +
|
| +static tree
|
| +ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
|
| +{
|
| + if (code >= IX86_BUILTIN_MAX)
|
| + return error_mark_node;
|
| +
|
| + return ix86_builtins[code];
|
| +}
|
| +
|
| /* Errors in the source file can cause expand_expr to return const0_rtx
|
| where we expect a vector. To avoid crashing, use one of the vector
|
| clear instructions. */
|
| @@ -23621,8 +22869,8 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
|
|
|
| static rtx
|
| ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
| - enum multi_arg_type m_type,
|
| - enum insn_code sub_code)
|
| + enum ix86_builtin_func_type m_type,
|
| + enum rtx_code sub_code)
|
| {
|
| rtx pat;
|
| int i;
|
| @@ -23640,16 +22888,28 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
|
|
| switch (m_type)
|
| {
|
| + case MULTI_ARG_4_DF2_DI_I:
|
| + case MULTI_ARG_4_DF2_DI_I1:
|
| + case MULTI_ARG_4_SF2_SI_I:
|
| + case MULTI_ARG_4_SF2_SI_I1:
|
| + nargs = 4;
|
| + last_arg_constant = true;
|
| + break;
|
| +
|
| case MULTI_ARG_3_SF:
|
| case MULTI_ARG_3_DF:
|
| + case MULTI_ARG_3_SF2:
|
| + case MULTI_ARG_3_DF2:
|
| case MULTI_ARG_3_DI:
|
| case MULTI_ARG_3_SI:
|
| case MULTI_ARG_3_SI_DI:
|
| case MULTI_ARG_3_HI:
|
| case MULTI_ARG_3_HI_SI:
|
| case MULTI_ARG_3_QI:
|
| - case MULTI_ARG_3_PERMPS:
|
| - case MULTI_ARG_3_PERMPD:
|
| + case MULTI_ARG_3_DI2:
|
| + case MULTI_ARG_3_SI2:
|
| + case MULTI_ARG_3_HI2:
|
| + case MULTI_ARG_3_QI2:
|
| nargs = 3;
|
| break;
|
|
|
| @@ -23672,6 +22932,8 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
|
|
| case MULTI_ARG_1_SF:
|
| case MULTI_ARG_1_DF:
|
| + case MULTI_ARG_1_SF2:
|
| + case MULTI_ARG_1_DF2:
|
| case MULTI_ARG_1_DI:
|
| case MULTI_ARG_1_SI:
|
| case MULTI_ARG_1_HI:
|
| @@ -23682,13 +22944,9 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
| case MULTI_ARG_1_QI_DI:
|
| case MULTI_ARG_1_QI_SI:
|
| case MULTI_ARG_1_QI_HI:
|
| - case MULTI_ARG_1_PH2PS:
|
| - case MULTI_ARG_1_PS2PH:
|
| nargs = 1;
|
| break;
|
|
|
| - case MULTI_ARG_2_SF_CMP:
|
| - case MULTI_ARG_2_DF_CMP:
|
| case MULTI_ARG_2_DI_CMP:
|
| case MULTI_ARG_2_SI_CMP:
|
| case MULTI_ARG_2_HI_CMP:
|
| @@ -23707,7 +22965,6 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
| tf_p = true;
|
| break;
|
|
|
| - case MULTI_ARG_UNKNOWN:
|
| default:
|
| gcc_unreachable ();
|
| }
|
| @@ -23728,7 +22985,7 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
|
|
| if (last_arg_constant && i == nargs-1)
|
| {
|
| - if (GET_CODE (op) != CONST_INT)
|
| + if (!CONST_INT_P (op))
|
| {
|
| error ("last argument must be an immediate");
|
| return gen_reg_rtx (tmode);
|
| @@ -23782,6 +23039,10 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
|
| pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
|
| break;
|
|
|
| + case 4:
|
| + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
|
| + break;
|
| +
|
| default:
|
| gcc_unreachable ();
|
| }
|
| @@ -24201,7 +23462,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
| bool swap = false;
|
| enum rtx_code comparison = d->comparison;
|
|
|
| - switch ((enum ix86_builtin_type) d->flag)
|
| + switch ((enum ix86_builtin_func_type) d->flag)
|
| {
|
| case INT_FTYPE_V8SF_V8SF_PTEST:
|
| case INT_FTYPE_V4DI_V4DI_PTEST:
|
| @@ -24212,6 +23473,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
| return ix86_expand_sse_ptest (d, exp, target);
|
| case FLOAT128_FTYPE_FLOAT128:
|
| case FLOAT_FTYPE_FLOAT:
|
| + case INT_FTYPE_INT:
|
| + case UINT64_FTYPE_INT:
|
| + case UINT16_FTYPE_UINT16:
|
| + case INT64_FTYPE_INT64:
|
| case INT64_FTYPE_V4SF:
|
| case INT64_FTYPE_V2DF:
|
| case INT_FTYPE_V16QI:
|
| @@ -24337,11 +23602,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
| case UINT_FTYPE_UINT_UINT:
|
| case UINT_FTYPE_UINT_USHORT:
|
| case UINT_FTYPE_UINT_UCHAR:
|
| + case UINT16_FTYPE_UINT16_INT:
|
| + case UINT8_FTYPE_UINT8_INT:
|
| nargs = 2;
|
| break;
|
| - case V2DI2TI_FTYPE_V2DI_INT:
|
| + case V2DI_FTYPE_V2DI_INT_CONVERT:
|
| nargs = 2;
|
| - rmode = V2DImode;
|
| + rmode = V1TImode;
|
| nargs_constant = 1;
|
| break;
|
| case V8HI_FTYPE_V8HI_INT:
|
| @@ -24380,12 +23647,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
| nargs = 3;
|
| nargs_constant = 1;
|
| break;
|
| - case V2DI2TI_FTYPE_V2DI_V2DI_INT:
|
| + case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
|
| nargs = 3;
|
| rmode = V2DImode;
|
| nargs_constant = 1;
|
| break;
|
| - case V1DI2DI_FTYPE_V1DI_V1DI_INT:
|
| + case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
|
| nargs = 3;
|
| rmode = DImode;
|
| nargs_constant = 1;
|
| @@ -24394,6 +23661,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
| nargs = 3;
|
| nargs_constant = 2;
|
| break;
|
| + case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
|
| + case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
|
| + case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
|
| + case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
|
| + nargs = 4;
|
| + nargs_constant = 1;
|
| + break;
|
| case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
|
| nargs = 4;
|
| nargs_constant = 2;
|
| @@ -24463,6 +23737,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
|
|
| case CODE_FOR_sse4_1_blendpd:
|
| case CODE_FOR_avx_vpermilv2df:
|
| + case CODE_FOR_xop_vpermil2v2df3:
|
| + case CODE_FOR_xop_vpermil2v4sf3:
|
| + case CODE_FOR_xop_vpermil2v4df3:
|
| + case CODE_FOR_xop_vpermil2v8sf3:
|
| error ("the last argument must be a 2-bit immediate");
|
| return const0_rtx;
|
|
|
| @@ -24569,18 +23847,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
| {
|
| rtx op;
|
| enum machine_mode mode;
|
| - } args[2];
|
| + } args[3];
|
| enum insn_code icode = d->icode;
|
| bool last_arg_constant = false;
|
| const struct insn_data *insn_p = &insn_data[icode];
|
| enum machine_mode tmode = insn_p->operand[0].mode;
|
| enum { load, store } klass;
|
|
|
| - switch ((enum ix86_special_builtin_type) d->flag)
|
| + switch ((enum ix86_builtin_func_type) d->flag)
|
| {
|
| case VOID_FTYPE_VOID:
|
| emit_insn (GEN_FCN (icode) (target));
|
| return 0;
|
| + case UINT64_FTYPE_VOID:
|
| + nargs = 0;
|
| + klass = load;
|
| + memory = 0;
|
| + break;
|
| + case UINT64_FTYPE_PUNSIGNED:
|
| case V2DI_FTYPE_PV2DI:
|
| case V32QI_FTYPE_PCCHAR:
|
| case V16QI_FTYPE_PCCHAR:
|
| @@ -24590,6 +23874,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
| case V4DF_FTYPE_PCV2DF:
|
| case V4DF_FTYPE_PCDOUBLE:
|
| case V2DF_FTYPE_PCDOUBLE:
|
| + case VOID_FTYPE_PVOID:
|
| nargs = 1;
|
| klass = load;
|
| memory = 0;
|
| @@ -24603,7 +23888,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
| case VOID_FTYPE_PFLOAT_V4SF:
|
| case VOID_FTYPE_PDOUBLE_V4DF:
|
| case VOID_FTYPE_PDOUBLE_V2DF:
|
| - case VOID_FTYPE_PDI_DI:
|
| + case VOID_FTYPE_PULONGLONG_ULONGLONG:
|
| case VOID_FTYPE_PINT_INT:
|
| nargs = 1;
|
| klass = store;
|
| @@ -24633,6 +23918,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
| /* Reserve memory operand for target. */
|
| memory = ARRAY_SIZE (args);
|
| break;
|
| + case VOID_FTYPE_UINT_UINT_UINT:
|
| + case VOID_FTYPE_UINT64_UINT_UINT:
|
| + case UCHAR_FTYPE_UINT_UINT_UINT:
|
| + case UCHAR_FTYPE_UINT64_UINT_UINT:
|
| + nargs = 3;
|
| + klass = load;
|
| + memory = ARRAY_SIZE (args);
|
| + last_arg_constant = true;
|
| + break;
|
| default:
|
| gcc_unreachable ();
|
| }
|
| @@ -24669,12 +23963,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
| if (last_arg_constant && (i + 1) == nargs)
|
| {
|
| if (!match)
|
| - switch (icode)
|
| - {
|
| - default:
|
| + {
|
| + if (icode == CODE_FOR_lwp_lwpvalsi3
|
| + || icode == CODE_FOR_lwp_lwpinssi3
|
| + || icode == CODE_FOR_lwp_lwpvaldi3
|
| + || icode == CODE_FOR_lwp_lwpinsdi3)
|
| + error ("the last argument must be a 32-bit immediate");
|
| + else
|
| error ("the last argument must be an 8-bit immediate");
|
| - return const0_rtx;
|
| - }
|
| + return const0_rtx;
|
| + }
|
| }
|
| else
|
| {
|
| @@ -24703,12 +24001,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
|
|
|
| switch (nargs)
|
| {
|
| + case 0:
|
| + pat = GEN_FCN (icode) (target);
|
| + break;
|
| case 1:
|
| pat = GEN_FCN (icode) (target, args[0].op);
|
| break;
|
| case 2:
|
| pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
|
| break;
|
| + case 3:
|
| + pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
|
| + break;
|
| default:
|
| gcc_unreachable ();
|
| }
|
| @@ -24989,7 +24293,22 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
| case IX86_BUILTIN_VEC_SET_V16QI:
|
| return ix86_expand_vec_set_builtin (exp);
|
|
|
| + case IX86_BUILTIN_VEC_PERM_V2DF:
|
| + case IX86_BUILTIN_VEC_PERM_V4SF:
|
| + case IX86_BUILTIN_VEC_PERM_V2DI:
|
| + case IX86_BUILTIN_VEC_PERM_V4SI:
|
| + case IX86_BUILTIN_VEC_PERM_V8HI:
|
| + case IX86_BUILTIN_VEC_PERM_V16QI:
|
| + case IX86_BUILTIN_VEC_PERM_V2DI_U:
|
| + case IX86_BUILTIN_VEC_PERM_V4SI_U:
|
| + case IX86_BUILTIN_VEC_PERM_V8HI_U:
|
| + case IX86_BUILTIN_VEC_PERM_V16QI_U:
|
| + case IX86_BUILTIN_VEC_PERM_V4DF:
|
| + case IX86_BUILTIN_VEC_PERM_V8SF:
|
| + return ix86_expand_vec_perm_builtin (exp);
|
| +
|
| case IX86_BUILTIN_INFQ:
|
| + case IX86_BUILTIN_HUGE_VALQ:
|
| {
|
| REAL_VALUE_TYPE inf;
|
| rtx tmp;
|
| @@ -25006,6 +24325,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
| return target;
|
| }
|
|
|
| + case IX86_BUILTIN_LLWPCB:
|
| + arg0 = CALL_EXPR_ARG (exp, 0);
|
| + op0 = expand_normal (arg0);
|
| + icode = CODE_FOR_lwp_llwpcb;
|
| + if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
|
| + op0 = copy_to_mode_reg (Pmode, op0);
|
| + emit_insn (gen_lwp_llwpcb (op0));
|
| + return 0;
|
| +
|
| + case IX86_BUILTIN_SLWPCB:
|
| + icode = CODE_FOR_lwp_slwpcb;
|
| + if (!target
|
| + || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
|
| + target = gen_reg_rtx (Pmode);
|
| + emit_insn (gen_lwp_slwpcb (target));
|
| + return target;
|
| +
|
| default:
|
| break;
|
| }
|
| @@ -25050,8 +24386,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
| for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
|
| if (d->code == fcode)
|
| return ix86_expand_multi_arg_builtin (d->icode, exp, target,
|
| - (enum multi_arg_type)d->flag,
|
| - d->comparison);
|
| + (enum ix86_builtin_func_type)
|
| + d->flag, d->comparison);
|
|
|
| gcc_unreachable ();
|
| }
|
| @@ -25061,14 +24397,16 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
| if it is not available. */
|
|
|
| static tree
|
| -ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
|
| +ix86_builtin_vectorized_function (tree fndecl, tree type_out,
|
| tree type_in)
|
| {
|
| enum machine_mode in_mode, out_mode;
|
| int in_n, out_n;
|
| + enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
|
|
|
| if (TREE_CODE (type_out) != VECTOR_TYPE
|
| - || TREE_CODE (type_in) != VECTOR_TYPE)
|
| + || TREE_CODE (type_in) != VECTOR_TYPE
|
| + || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
|
| return NULL_TREE;
|
|
|
| out_mode = TYPE_MODE (TREE_TYPE (type_out));
|
| @@ -25102,13 +24440,26 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
|
| return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
|
| break;
|
|
|
| + case BUILT_IN_COPYSIGN:
|
| + if (out_mode == DFmode && out_n == 2
|
| + && in_mode == DFmode && in_n == 2)
|
| + return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
|
| + break;
|
| +
|
| + case BUILT_IN_COPYSIGNF:
|
| + if (out_mode == SFmode && out_n == 4
|
| + && in_mode == SFmode && in_n == 4)
|
| + return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
|
| + break;
|
| +
|
| default:
|
| ;
|
| }
|
|
|
| /* Dispatch to a handler for a vectorization library. */
|
| if (ix86_veclib_handler)
|
| - return (*ix86_veclib_handler)(fn, type_out, type_in);
|
| + return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
|
| + type_in);
|
|
|
| return NULL_TREE;
|
| }
|
| @@ -25216,7 +24567,8 @@ ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
|
| fntype = build_function_type_list (type_out, type_in, type_in, NULL);
|
|
|
| /* Build a function declaration for the vectorized function. */
|
| - new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
|
| + new_fndecl = build_decl (BUILTINS_LOCATION,
|
| + FUNCTION_DECL, get_identifier (name), fntype);
|
| TREE_PUBLIC (new_fndecl) = 1;
|
| DECL_EXTERNAL (new_fndecl) = 1;
|
| DECL_IS_NOVOPS (new_fndecl) = 1;
|
| @@ -25300,7 +24652,8 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
|
| fntype = build_function_type_list (type_out, type_in, type_in, NULL);
|
|
|
| /* Build a function declaration for the vectorized function. */
|
| - new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
|
| + new_fndecl = build_decl (BUILTINS_LOCATION,
|
| + FUNCTION_DECL, get_identifier (name), fntype);
|
| TREE_PUBLIC (new_fndecl) = 1;
|
| DECL_EXTERNAL (new_fndecl) = 1;
|
| DECL_IS_NOVOPS (new_fndecl) = 1;
|
| @@ -25318,9 +24671,7 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
|
| static tree
|
| ix86_vectorize_builtin_conversion (unsigned int code, tree type)
|
| {
|
| - if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE
|
| - /* There are only conversions from/to signed integers. */
|
| - || TYPE_UNSIGNED (TREE_TYPE (type)))
|
| + if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
|
| return NULL_TREE;
|
|
|
| switch (code)
|
| @@ -25329,7 +24680,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type)
|
| switch (TYPE_MODE (type))
|
| {
|
| case V4SImode:
|
| - return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
|
| + return TYPE_UNSIGNED (type)
|
| + ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
|
| + : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
|
| default:
|
| return NULL_TREE;
|
| }
|
| @@ -25338,7 +24691,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type)
|
| switch (TYPE_MODE (type))
|
| {
|
| case V4SImode:
|
| - return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
|
| + return TYPE_UNSIGNED (type)
|
| + ? NULL_TREE
|
| + : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
|
| default:
|
| return NULL_TREE;
|
| }
|
| @@ -25355,7 +24710,7 @@ static tree
|
| ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
|
| bool sqrt ATTRIBUTE_UNUSED)
|
| {
|
| - if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
|
| + if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
|
| && flag_finite_math_only && !flag_trapping_math
|
| && flag_unsafe_math_optimizations))
|
| return NULL_TREE;
|
| @@ -25383,6 +24738,134 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
|
| return NULL_TREE;
|
| }
|
| }
|
| +
|
| +/* Helper for avx_vpermilps256_operand et al. This is also used by
|
| + the expansion functions to turn the parallel back into a mask.
|
| + The return value is 0 for no match and the imm8+1 for a match. */
|
| +
|
| +int
|
| +avx_vpermilp_parallel (rtx par, enum machine_mode mode)
|
| +{
|
| + unsigned i, nelt = GET_MODE_NUNITS (mode);
|
| + unsigned mask = 0;
|
| + unsigned char ipar[8];
|
| +
|
| + if (XVECLEN (par, 0) != (int) nelt)
|
| + return 0;
|
| +
|
| + /* Validate that all of the elements are constants, and not totally
|
| + out of range. Copy the data into an integral array to make the
|
| + subsequent checks easier. */
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + rtx er = XVECEXP (par, 0, i);
|
| + unsigned HOST_WIDE_INT ei;
|
| +
|
| + if (!CONST_INT_P (er))
|
| + return 0;
|
| + ei = INTVAL (er);
|
| + if (ei >= nelt)
|
| + return 0;
|
| + ipar[i] = ei;
|
| + }
|
| +
|
| + switch (mode)
|
| + {
|
| + case V4DFmode:
|
| + /* In the 256-bit DFmode case, we can only move elements within
|
| + a 128-bit lane. */
|
| + for (i = 0; i < 2; ++i)
|
| + {
|
| + if (ipar[i] >= 2)
|
| + return 0;
|
| + mask |= ipar[i] << i;
|
| + }
|
| + for (i = 2; i < 4; ++i)
|
| + {
|
| + if (ipar[i] < 2)
|
| + return 0;
|
| + mask |= (ipar[i] - 2) << i;
|
| + }
|
| + break;
|
| +
|
| + case V8SFmode:
|
| + /* In the 256-bit SFmode case, we have full freedom of movement
|
| + within the low 128-bit lane, but the high 128-bit lane must
|
| + mirror the exact same pattern. */
|
| + for (i = 0; i < 4; ++i)
|
| + if (ipar[i] + 4 != ipar[i + 4])
|
| + return 0;
|
| + nelt = 4;
|
| + /* FALLTHRU */
|
| +
|
| + case V2DFmode:
|
| + case V4SFmode:
|
| + /* In the 128-bit case, we've full freedom in the placement of
|
| + the elements from the source operand. */
|
| + for (i = 0; i < nelt; ++i)
|
| + mask |= ipar[i] << (i * (nelt / 2));
|
| + break;
|
| +
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| +
|
| + /* Make sure success has a non-zero value by adding one. */
|
| + return mask + 1;
|
| +}
|
| +
|
| +/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
|
| + the expansion functions to turn the parallel back into a mask.
|
| + The return value is 0 for no match and the imm8+1 for a match. */
|
| +
|
| +int
|
| +avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
|
| +{
|
| + unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
|
| + unsigned mask = 0;
|
| + unsigned char ipar[8];
|
| +
|
| + if (XVECLEN (par, 0) != (int) nelt)
|
| + return 0;
|
| +
|
| + /* Validate that all of the elements are constants, and not totally
|
| + out of range. Copy the data into an integral array to make the
|
| + subsequent checks easier. */
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + rtx er = XVECEXP (par, 0, i);
|
| + unsigned HOST_WIDE_INT ei;
|
| +
|
| + if (!CONST_INT_P (er))
|
| + return 0;
|
| + ei = INTVAL (er);
|
| + if (ei >= 2 * nelt)
|
| + return 0;
|
| + ipar[i] = ei;
|
| + }
|
| +
|
| + /* Validate that the halves of the permute are halves. */
|
| + for (i = 0; i < nelt2 - 1; ++i)
|
| + if (ipar[i] + 1 != ipar[i + 1])
|
| + return 0;
|
| + for (i = nelt2; i < nelt - 1; ++i)
|
| + if (ipar[i] + 1 != ipar[i + 1])
|
| + return 0;
|
| +
|
| + /* Reconstruct the mask. */
|
| + for (i = 0; i < 2; ++i)
|
| + {
|
| + unsigned e = ipar[i * nelt2];
|
| + if (e % nelt2)
|
| + return 0;
|
| + e /= nelt2;
|
| + mask |= e << (i * 4);
|
| + }
|
| +
|
| + /* Make sure success has a non-zero value by adding one. */
|
| + return mask + 1;
|
| +}
|
| +
|
|
|
| /* Store OPERAND to the memory after reload is completed. This means
|
| that we can't easily use assign_stack_local. */
|
| @@ -25483,6 +24966,22 @@ ix86_free_from_memory (enum machine_mode mode)
|
| }
|
| }
|
|
|
| +/* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
|
| + SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
|
| + same. */
|
| +static const enum reg_class *
|
| +i386_ira_cover_classes (void)
|
| +{
|
| + static const enum reg_class sse_fpmath_classes[] = {
|
| + GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
|
| + };
|
| + static const enum reg_class no_sse_fpmath_classes[] = {
|
| + GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
|
| + };
|
| +
|
| + return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
|
| +}
|
| +
|
| /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
|
| QImode must go into class Q_REGS.
|
| Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
|
| @@ -26381,6 +25880,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
|
| *total = 0;
|
| return false;
|
|
|
| + case VEC_SELECT:
|
| + case VEC_CONCAT:
|
| + case VEC_MERGE:
|
| + case VEC_DUPLICATE:
|
| + /* ??? Assume all of these vector manipulation patterns are
|
| + recognizable. In which case they all pretty much have the
|
| + same cost. */
|
| + *total = COSTS_N_INSNS (1);
|
| + return true;
|
| +
|
| default:
|
| return false;
|
| }
|
| @@ -26438,24 +25947,17 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub)
|
| if (MACHOPIC_PURE)
|
| {
|
| fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
|
| - fprintf (file, "\tpushl\t%%eax\n");
|
| + fputs ("\tpushl\t%eax\n", file);
|
| }
|
| else
|
| fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
|
|
|
| - fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
|
| + fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
|
|
|
| switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
|
| fprintf (file, "%s:\n", lazy_ptr_name);
|
| fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
|
| - fprintf (file, "\t.long %s\n", binder_name);
|
| -}
|
| -
|
| -void
|
| -darwin_x86_file_end (void)
|
| -{
|
| - darwin_file_end ();
|
| - ix86_file_end ();
|
| + fprintf (file, ASM_LONG "%s\n", binder_name);
|
| }
|
| #endif /* TARGET_MACHO */
|
|
|
| @@ -26515,15 +26017,15 @@ ix86_handle_abi_attribute (tree *node, tree name,
|
| && TREE_CODE (*node) != FIELD_DECL
|
| && TREE_CODE (*node) != TYPE_DECL)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute only applies to functions",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute only applies to functions",
|
| + name);
|
| *no_add_attrs = true;
|
| return NULL_TREE;
|
| }
|
| if (!TARGET_64BIT)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
|
| + name);
|
| *no_add_attrs = true;
|
| return NULL_TREE;
|
| }
|
| @@ -26570,8 +26072,8 @@ ix86_handle_struct_attribute (tree *node, tree name,
|
| if (!(type && (TREE_CODE (*type) == RECORD_TYPE
|
| || TREE_CODE (*type) == UNION_TYPE)))
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute ignored",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute ignored",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
|
|
| @@ -26580,14 +26082,41 @@ ix86_handle_struct_attribute (tree *node, tree name,
|
| || ((is_attribute_p ("gcc_struct", name)
|
| && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
|
| {
|
| - warning (OPT_Wattributes, "%qs incompatible attribute ignored",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE incompatible attribute ignored",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
|
|
| return NULL_TREE;
|
| }
|
|
|
| +static tree
|
| +ix86_handle_fndecl_attribute (tree *node, tree name,
|
| + tree args ATTRIBUTE_UNUSED,
|
| + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
|
| +{
|
| + if (TREE_CODE (*node) != FUNCTION_DECL)
|
| + {
|
| + warning (OPT_Wattributes, "%qE attribute only applies to functions",
|
| + name);
|
| + *no_add_attrs = true;
|
| + return NULL_TREE;
|
| + }
|
| +
|
| + if (TARGET_64BIT)
|
| + {
|
| + warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
|
| + name);
|
| + return NULL_TREE;
|
| + }
|
| +
|
| +#ifndef HAVE_AS_IX86_SWAP
|
| + sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
|
| +#endif
|
| +
|
| + return NULL_TREE;
|
| +}
|
| +
|
| static bool
|
| ix86_ms_bitfield_layout_p (const_tree record_type)
|
| {
|
| @@ -26676,7 +26205,7 @@ x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
|
| *(*this + vcall_offset) should be added to THIS. */
|
|
|
| static void
|
| -x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
|
| +x86_output_mi_thunk (FILE *file,
|
| tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
|
| HOST_WIDE_INT vcall_offset, tree function)
|
| {
|
| @@ -26684,6 +26213,9 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
|
| rtx this_param = x86_this_parameter (function);
|
| rtx this_reg, tmp;
|
|
|
| + /* Make sure unwind info is emitted for the thunk if needed. */
|
| + final_start_function (emit_barrier (), file, 1);
|
| +
|
| /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
|
| pull it in now and let DELTA benefit. */
|
| if (REG_P (this_param))
|
| @@ -26701,7 +26233,10 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
|
| /* Adjust the this parameter by a fixed constant. */
|
| if (delta)
|
| {
|
| - xops[0] = GEN_INT (delta);
|
| + /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
|
| + Exceptions: -128 encodes smaller than 128, so swap sign and op. */
|
| + bool sub = delta < 0 || delta == 128;
|
| + xops[0] = GEN_INT (sub ? -delta : delta);
|
| xops[1] = this_reg ? this_reg : this_param;
|
| if (TARGET_64BIT)
|
| {
|
| @@ -26713,8 +26248,13 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
|
| xops[0] = tmp;
|
| xops[1] = this_param;
|
| }
|
| - output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
|
| + if (sub)
|
| + output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
|
| + else
|
| + output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
|
| }
|
| + else if (sub)
|
| + output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
|
| else
|
| output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
|
| }
|
| @@ -26804,6 +26344,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
|
| output_asm_insn ("jmp\t{*}%1", xops);
|
| }
|
| }
|
| + final_end_function ();
|
| }
|
|
|
| static void
|
| @@ -26845,32 +26386,33 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
| if (TARGET_64BIT)
|
| {
|
| #ifndef NO_PROFILE_COUNTERS
|
| - fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
|
| + fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
|
| #endif
|
|
|
| if (DEFAULT_ABI == SYSV_ABI && flag_pic)
|
| - fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
|
| + fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
|
| else
|
| - fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
|
| + fputs ("\tcall\t" MCOUNT_NAME "\n", file);
|
| }
|
| else if (flag_pic)
|
| {
|
| #ifndef NO_PROFILE_COUNTERS
|
| - fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
|
| - LPREFIX, labelno, PROFILE_COUNT_REGISTER);
|
| + fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
|
| + LPREFIX, labelno);
|
| #endif
|
| - fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
|
| + fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
|
| }
|
| else
|
| {
|
| #ifndef NO_PROFILE_COUNTERS
|
| - fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
|
| - PROFILE_COUNT_REGISTER);
|
| + fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
|
| + LPREFIX, labelno);
|
| #endif
|
| - fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
|
| + fputs ("\tcall\t" MCOUNT_NAME "\n", file);
|
| }
|
| }
|
|
|
| +#ifdef ASM_OUTPUT_MAX_SKIP_PAD
|
| /* We don't have exact information about the insn sizes, but we may assume
|
| quite safely that we are informed about all 1 byte insns and memory
|
| address sizes. This is enough to eliminate unnecessary padding in
|
| @@ -26879,7 +26421,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
| static int
|
| min_insn_size (rtx insn)
|
| {
|
| - int l = 0;
|
| + int l = 0, len;
|
|
|
| if (!INSN_P (insn) || !active_insn_p (insn))
|
| return 0;
|
| @@ -26888,9 +26430,7 @@ min_insn_size (rtx insn)
|
| if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
|
| && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
|
| return 0;
|
| - if (JUMP_P (insn)
|
| - && (GET_CODE (PATTERN (insn)) == ADDR_VEC
|
| - || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
|
| + if (JUMP_TABLE_DATA_P (insn))
|
| return 0;
|
|
|
| /* Important case - calls are always 5 bytes.
|
| @@ -26899,14 +26439,31 @@ min_insn_size (rtx insn)
|
| && symbolic_reference_mentioned_p (PATTERN (insn))
|
| && !SIBLING_CALL_P (insn))
|
| return 5;
|
| - if (get_attr_length (insn) <= 1)
|
| + len = get_attr_length (insn);
|
| + if (len <= 1)
|
| return 1;
|
|
|
| - /* For normal instructions we may rely on the sizes of addresses
|
| - and the presence of symbol to require 4 bytes of encoding.
|
| - This is not the case for jumps where references are PC relative. */
|
| + /* For normal instructions we rely on get_attr_length being exact,
|
| + with a few exceptions. */
|
| if (!JUMP_P (insn))
|
| {
|
| + enum attr_type type = get_attr_type (insn);
|
| +
|
| + switch (type)
|
| + {
|
| + case TYPE_MULTI:
|
| + if (GET_CODE (PATTERN (insn)) == ASM_INPUT
|
| + || asm_noperands (PATTERN (insn)) >= 0)
|
| + return 0;
|
| + break;
|
| + case TYPE_OTHER:
|
| + case TYPE_FCMP:
|
| + break;
|
| + default:
|
| + /* Otherwise trust get_attr_length. */
|
| + return len;
|
| + }
|
| +
|
| l = get_attr_length_address (insn);
|
| if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
|
| l = 4;
|
| @@ -26921,7 +26478,7 @@ min_insn_size (rtx insn)
|
| window. */
|
|
|
| static void
|
| -ix86_avoid_jump_misspredicts (void)
|
| +ix86_avoid_jump_mispredicts (void)
|
| {
|
| rtx insn, start = get_insns ();
|
| int nbytes = 0, njumps = 0;
|
| @@ -26935,15 +26492,52 @@ ix86_avoid_jump_misspredicts (void)
|
|
|
| The smallest offset in the page INSN can start is the case where START
|
| ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
|
| - We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
|
| + We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
|
| */
|
| - for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
| + for (insn = start; insn; insn = NEXT_INSN (insn))
|
| {
|
| + int min_size;
|
|
|
| - nbytes += min_insn_size (insn);
|
| + if (LABEL_P (insn))
|
| + {
|
| + int align = label_to_alignment (insn);
|
| + int max_skip = label_to_max_skip (insn);
|
| +
|
| + if (max_skip > 15)
|
| + max_skip = 15;
|
| + /* If align > 3, only up to 16 - max_skip - 1 bytes can be
|
| + already in the current 16 byte page, because otherwise
|
| + ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
|
| + bytes to reach 16 byte boundary. */
|
| + if (align <= 0
|
| + || (align <= 3 && max_skip != (1 << align) - 1))
|
| + max_skip = 0;
|
| + if (dump_file)
|
| + fprintf (dump_file, "Label %i with max_skip %i\n",
|
| + INSN_UID (insn), max_skip);
|
| + if (max_skip)
|
| + {
|
| + while (nbytes + max_skip >= 16)
|
| + {
|
| + start = NEXT_INSN (start);
|
| + if ((JUMP_P (start)
|
| + && GET_CODE (PATTERN (start)) != ADDR_VEC
|
| + && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
|
| + || CALL_P (start))
|
| + njumps--, isjump = 1;
|
| + else
|
| + isjump = 0;
|
| + nbytes -= min_insn_size (start);
|
| + }
|
| + }
|
| + continue;
|
| + }
|
| +
|
| + min_size = min_insn_size (insn);
|
| + nbytes += min_size;
|
| if (dump_file)
|
| - fprintf(dump_file, "Insn %i estimated to %i bytes\n",
|
| - INSN_UID (insn), min_insn_size (insn));
|
| + fprintf (dump_file, "Insn %i estimated to %i bytes\n",
|
| + INSN_UID (insn), min_size);
|
| if ((JUMP_P (insn)
|
| && GET_CODE (PATTERN (insn)) != ADDR_VEC
|
| && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
|
| @@ -26967,7 +26561,7 @@ ix86_avoid_jump_misspredicts (void)
|
| gcc_assert (njumps >= 0);
|
| if (dump_file)
|
| fprintf (dump_file, "Interval %i to %i has %i bytes\n",
|
| - INSN_UID (start), INSN_UID (insn), nbytes);
|
| + INSN_UID (start), INSN_UID (insn), nbytes);
|
|
|
| if (njumps == 3 && isjump && nbytes < 16)
|
| {
|
| @@ -26976,10 +26570,11 @@ ix86_avoid_jump_misspredicts (void)
|
| if (dump_file)
|
| fprintf (dump_file, "Padding insn %i by %i bytes!\n",
|
| INSN_UID (insn), padsize);
|
| - emit_insn_before (gen_align (GEN_INT (padsize)), insn);
|
| + emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
|
| }
|
| }
|
| }
|
| +#endif
|
|
|
| /* AMD Athlon works faster
|
| when RET is not destination of conditional jump or directly preceded
|
| @@ -27028,7 +26623,7 @@ ix86_pad_returns (void)
|
| }
|
| if (replace)
|
| {
|
| - emit_insn_before (gen_return_internal_long (), ret);
|
| + emit_jump_insn_before (gen_return_internal_long (), ret);
|
| delete_insn (ret);
|
| }
|
| }
|
| @@ -27039,12 +26634,15 @@ ix86_pad_returns (void)
|
| static void
|
| ix86_reorg (void)
|
| {
|
| - if (TARGET_PAD_RETURNS && optimize
|
| - && optimize_function_for_speed_p (cfun))
|
| - ix86_pad_returns ();
|
| - if (TARGET_FOUR_JUMP_LIMIT && optimize
|
| - && optimize_function_for_speed_p (cfun))
|
| - ix86_avoid_jump_misspredicts ();
|
| + if (optimize && optimize_function_for_speed_p (cfun))
|
| + {
|
| + if (TARGET_PAD_RETURNS)
|
| + ix86_pad_returns ();
|
| +#ifdef ASM_OUTPUT_MAX_SKIP_PAD
|
| + if (TARGET_FOUR_JUMP_LIMIT)
|
| + ix86_avoid_jump_mispredicts ();
|
| +#endif
|
| + }
|
| }
|
|
|
| /* Return nonzero when QImode register that must be represented via REX prefix
|
| @@ -27123,6 +26721,35 @@ x86_emit_floatuns (rtx operands[2])
|
| emit_label (donelab);
|
| }
|
|
|
| +/* AVX does not support 32-byte integer vector operations,
|
| + thus the longest vector we are faced with is V16QImode. */
|
| +#define MAX_VECT_LEN 16
|
| +
|
| +struct expand_vec_perm_d
|
| +{
|
| + rtx target, op0, op1;
|
| + unsigned char perm[MAX_VECT_LEN];
|
| + enum machine_mode vmode;
|
| + unsigned char nelt;
|
| + bool testing_p;
|
| +};
|
| +
|
| +static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
|
| +static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
|
| +
|
| +/* Get a vector mode of the same size as the original but with elements
|
| + twice as wide. This is only guaranteed to apply to integral vectors. */
|
| +
|
| +static inline enum machine_mode
|
| +get_mode_wider_vector (enum machine_mode o)
|
| +{
|
| + /* ??? Rely on the ordering that genmodes.c gives to vectors. */
|
| + enum machine_mode n = GET_MODE_WIDER_MODE (o);
|
| + gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
|
| + gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
|
| + return n;
|
| +}
|
| +
|
| /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
|
| with all elements equal to VAR. Return true if successful. */
|
|
|
| @@ -27130,8 +26757,7 @@ static bool
|
| ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
| rtx target, rtx val)
|
| {
|
| - enum machine_mode hmode, smode, wsmode, wvmode;
|
| - rtx x;
|
| + bool ok;
|
|
|
| switch (mode)
|
| {
|
| @@ -27141,13 +26767,36 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
| return false;
|
| /* FALLTHRU */
|
|
|
| + case V4DFmode:
|
| + case V4DImode:
|
| + case V8SFmode:
|
| + case V8SImode:
|
| case V2DFmode:
|
| case V2DImode:
|
| case V4SFmode:
|
| case V4SImode:
|
| - val = force_reg (GET_MODE_INNER (mode), val);
|
| - x = gen_rtx_VEC_DUPLICATE (mode, val);
|
| - emit_insn (gen_rtx_SET (VOIDmode, target, x));
|
| + {
|
| + rtx insn, dup;
|
| +
|
| + /* First attempt to recognize VAL as-is. */
|
| + dup = gen_rtx_VEC_DUPLICATE (mode, val);
|
| + insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
|
| + if (recog_memoized (insn) < 0)
|
| + {
|
| + rtx seq;
|
| + /* If that fails, force VAL into a register. */
|
| +
|
| + start_sequence ();
|
| + XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
|
| + seq = get_insns ();
|
| + end_sequence ();
|
| + if (seq)
|
| + emit_insn_before (seq, insn);
|
| +
|
| + ok = recog_memoized (insn) >= 0;
|
| + gcc_assert (ok);
|
| + }
|
| + }
|
| return true;
|
|
|
| case V4HImode:
|
| @@ -27155,130 +26804,87 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
|
| return false;
|
| if (TARGET_SSE || TARGET_3DNOW_A)
|
| {
|
| + rtx x;
|
| +
|
| val = gen_lowpart (SImode, val);
|
| x = gen_rtx_TRUNCATE (HImode, val);
|
| x = gen_rtx_VEC_DUPLICATE (mode, x);
|
| emit_insn (gen_rtx_SET (VOIDmode, target, x));
|
| return true;
|
| }
|
| - else
|
| - {
|
| - smode = HImode;
|
| - wsmode = SImode;
|
| - wvmode = V2SImode;
|
| - goto widen;
|
| - }
|
| + goto widen;
|
|
|
| case V8QImode:
|
| if (!mmx_ok)
|
| return false;
|
| - smode = QImode;
|
| - wsmode = HImode;
|
| - wvmode = V4HImode;
|
| goto widen;
|
| +
|
| case V8HImode:
|
| if (TARGET_SSE2)
|
| {
|
| + struct expand_vec_perm_d dperm;
|
| rtx tmp1, tmp2;
|
| - /* Extend HImode to SImode using a paradoxical SUBREG. */
|
| +
|
| + permute:
|
| + memset (&dperm, 0, sizeof (dperm));
|
| + dperm.target = target;
|
| + dperm.vmode = mode;
|
| + dperm.nelt = GET_MODE_NUNITS (mode);
|
| + dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
|
| +
|
| + /* Extend to SImode using a paradoxical SUBREG. */
|
| tmp1 = gen_reg_rtx (SImode);
|
| emit_move_insn (tmp1, gen_lowpart (SImode, val));
|
| - /* Insert the SImode value as low element of V4SImode vector. */
|
| - tmp2 = gen_reg_rtx (V4SImode);
|
| - tmp1 = gen_rtx_VEC_MERGE (V4SImode,
|
| - gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
|
| - CONST0_RTX (V4SImode),
|
| - const1_rtx);
|
| - emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
|
| - /* Cast the V4SImode vector back to a V8HImode vector. */
|
| - tmp1 = gen_reg_rtx (V8HImode);
|
| - emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
|
| - /* Duplicate the low short through the whole low SImode word. */
|
| - emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
|
| - /* Cast the V8HImode vector back to a V4SImode vector. */
|
| - tmp2 = gen_reg_rtx (V4SImode);
|
| - emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
|
| - /* Replicate the low element of the V4SImode vector. */
|
| - emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
|
| - /* Cast the V2SImode back to V8HImode, and store in target. */
|
| - emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
|
| - return true;
|
| +
|
| + /* Insert the SImode value as low element of a V4SImode vector. */
|
| + tmp2 = gen_lowpart (V4SImode, dperm.op0);
|
| + emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
|
| +
|
| + ok = (expand_vec_perm_1 (&dperm)
|
| + || expand_vec_perm_broadcast_1 (&dperm));
|
| + gcc_assert (ok);
|
| + return ok;
|
| }
|
| - smode = HImode;
|
| - wsmode = SImode;
|
| - wvmode = V4SImode;
|
| goto widen;
|
| +
|
| case V16QImode:
|
| if (TARGET_SSE2)
|
| - {
|
| - rtx tmp1, tmp2;
|
| - /* Extend QImode to SImode using a paradoxical SUBREG. */
|
| - tmp1 = gen_reg_rtx (SImode);
|
| - emit_move_insn (tmp1, gen_lowpart (SImode, val));
|
| - /* Insert the SImode value as low element of V4SImode vector. */
|
| - tmp2 = gen_reg_rtx (V4SImode);
|
| - tmp1 = gen_rtx_VEC_MERGE (V4SImode,
|
| - gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
|
| - CONST0_RTX (V4SImode),
|
| - const1_rtx);
|
| - emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
|
| - /* Cast the V4SImode vector back to a V16QImode vector. */
|
| - tmp1 = gen_reg_rtx (V16QImode);
|
| - emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
|
| - /* Duplicate the low byte through the whole low SImode word. */
|
| - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
|
| - emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
|
| - /* Cast the V16QImode vector back to a V4SImode vector. */
|
| - tmp2 = gen_reg_rtx (V4SImode);
|
| - emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
|
| - /* Replicate the low element of the V4SImode vector. */
|
| - emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
|
| - /* Cast the V2SImode back to V16QImode, and store in target. */
|
| - emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
|
| - return true;
|
| - }
|
| - smode = QImode;
|
| - wsmode = HImode;
|
| - wvmode = V8HImode;
|
| + goto permute;
|
| goto widen;
|
| +
|
| widen:
|
| /* Replicate the value once into the next wider mode and recurse. */
|
| - val = convert_modes (wsmode, smode, val, true);
|
| - x = expand_simple_binop (wsmode, ASHIFT, val,
|
| - GEN_INT (GET_MODE_BITSIZE (smode)),
|
| - NULL_RTX, 1, OPTAB_LIB_WIDEN);
|
| - val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
|
| -
|
| - x = gen_reg_rtx (wvmode);
|
| - if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
|
| - gcc_unreachable ();
|
| - emit_move_insn (target, gen_lowpart (mode, x));
|
| - return true;
|
| + {
|
| + enum machine_mode smode, wsmode, wvmode;
|
| + rtx x;
|
| +
|
| + smode = GET_MODE_INNER (mode);
|
| + wvmode = get_mode_wider_vector (mode);
|
| + wsmode = GET_MODE_INNER (wvmode);
|
| +
|
| + val = convert_modes (wsmode, smode, val, true);
|
| + x = expand_simple_binop (wsmode, ASHIFT, val,
|
| + GEN_INT (GET_MODE_BITSIZE (smode)),
|
| + NULL_RTX, 1, OPTAB_LIB_WIDEN);
|
| + val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
|
| +
|
| + x = gen_lowpart (wvmode, target);
|
| + ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
|
| + gcc_assert (ok);
|
| + return ok;
|
| + }
|
|
|
| - case V4DFmode:
|
| - hmode = V2DFmode;
|
| - goto half;
|
| - case V4DImode:
|
| - hmode = V2DImode;
|
| - goto half;
|
| - case V8SFmode:
|
| - hmode = V4SFmode;
|
| - goto half;
|
| - case V8SImode:
|
| - hmode = V4SImode;
|
| - goto half;
|
| case V16HImode:
|
| - hmode = V8HImode;
|
| - goto half;
|
| case V32QImode:
|
| - hmode = V16QImode;
|
| - goto half;
|
| -half:
|
| {
|
| - rtx tmp = gen_reg_rtx (hmode);
|
| - ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
|
| - emit_insn (gen_rtx_SET (VOIDmode, target,
|
| - gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
|
| + enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
|
| + rtx x = gen_reg_rtx (hvmode);
|
| +
|
| + ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
|
| + gcc_assert (ok);
|
| +
|
| + x = gen_rtx_VEC_CONCAT (mode, x, x);
|
| + emit_insn (gen_rtx_SET (VOIDmode, target, x));
|
| }
|
| return true;
|
|
|
| @@ -27381,7 +26987,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
|
| if (mode != V4SFmode && TARGET_SSE2)
|
| {
|
| emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
|
| - GEN_INT (1),
|
| + const1_rtx,
|
| GEN_INT (one_var == 1 ? 0 : 1),
|
| GEN_INT (one_var == 2 ? 0 : 1),
|
| GEN_INT (one_var == 3 ? 0 : 1)));
|
| @@ -27401,7 +27007,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
|
| tmp = new_target;
|
|
|
| emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
|
| - GEN_INT (1),
|
| + const1_rtx,
|
| GEN_INT (one_var == 1 ? 0 : 1),
|
| GEN_INT (one_var == 2 ? 0+4 : 1+4),
|
| GEN_INT (one_var == 3 ? 0+4 : 1+4)));
|
| @@ -28059,13 +27665,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
| /* tmp = target = A B C D */
|
| tmp = copy_to_reg (target);
|
| /* target = A A B B */
|
| - emit_insn (gen_sse_unpcklps (target, target, target));
|
| + emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
|
| /* target = X A B B */
|
| ix86_expand_vector_set (false, target, val, 0);
|
| /* target = A X C D */
|
| emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
|
| - GEN_INT (1), GEN_INT (0),
|
| - GEN_INT (2+4), GEN_INT (3+4)));
|
| + const1_rtx, const0_rtx,
|
| + GEN_INT (2+4), GEN_INT (3+4)));
|
| return;
|
|
|
| case 2:
|
| @@ -28075,8 +27681,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
| ix86_expand_vector_set (false, tmp, val, 0);
|
| /* target = A B X D */
|
| emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
|
| - GEN_INT (0), GEN_INT (1),
|
| - GEN_INT (0+4), GEN_INT (3+4)));
|
| + const0_rtx, const1_rtx,
|
| + GEN_INT (0+4), GEN_INT (3+4)));
|
| return;
|
|
|
| case 3:
|
| @@ -28086,8 +27692,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
| ix86_expand_vector_set (false, tmp, val, 0);
|
| /* target = A B X D */
|
| emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
|
| - GEN_INT (0), GEN_INT (1),
|
| - GEN_INT (2+4), GEN_INT (0+4)));
|
| + const0_rtx, const1_rtx,
|
| + GEN_INT (2+4), GEN_INT (0+4)));
|
| return;
|
|
|
| default:
|
| @@ -28269,7 +27875,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|
|
| case 2:
|
| tmp = gen_reg_rtx (mode);
|
| - emit_insn (gen_sse_unpckhps (tmp, vec, vec));
|
| + emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
|
| break;
|
|
|
| default:
|
| @@ -28303,7 +27909,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
|
|
| case 2:
|
| tmp = gen_reg_rtx (mode);
|
| - emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
|
| + emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
|
| break;
|
|
|
| default:
|
| @@ -28380,8 +27986,8 @@ ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
|
| emit_insn (fn (tmp2, tmp1, in));
|
|
|
| emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
|
| - GEN_INT (1), GEN_INT (1),
|
| - GEN_INT (1+4), GEN_INT (1+4)));
|
| + const1_rtx, const1_rtx,
|
| + GEN_INT (1+4), GEN_INT (1+4)));
|
| emit_insn (fn (dest, tmp2, tmp3));
|
| }
|
|
|
| @@ -28390,7 +27996,7 @@ static bool
|
| ix86_scalar_mode_supported_p (enum machine_mode mode)
|
| {
|
| if (DECIMAL_FLOAT_MODE_P (mode))
|
| - return true;
|
| + return default_decimal_float_supported_p ();
|
| else if (mode == TFmode)
|
| return true;
|
| else
|
| @@ -28483,22 +28089,22 @@ output_387_reg_move (rtx insn, rtx *operands)
|
| return "fstp\t%y0";
|
| }
|
| if (STACK_TOP_P (operands[0]))
|
| - return "fld%z1\t%y1";
|
| + return "fld%Z1\t%y1";
|
| return "fst\t%y0";
|
| }
|
| else if (MEM_P (operands[0]))
|
| {
|
| gcc_assert (REG_P (operands[1]));
|
| if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
|
| - return "fstp%z0\t%y0";
|
| + return "fstp%Z0\t%y0";
|
| else
|
| {
|
| /* There is no non-popping store to memory for XFmode.
|
| So if we need one, follow the store with a load. */
|
| if (GET_MODE (operands[0]) == XFmode)
|
| - return "fstp%z0\t%y0\n\tfld%z0\t%y0";
|
| + return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
|
| else
|
| - return "fst%z0\t%y0";
|
| + return "fst%Z0\t%y0";
|
| }
|
| }
|
| else
|
| @@ -28549,13 +28155,14 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1)
|
|
|
| rtx tmp = gen_reg_rtx (XFmode);
|
| rtx tmp2 = gen_reg_rtx (XFmode);
|
| + rtx test;
|
|
|
| emit_insn (gen_absxf2 (tmp, op1));
|
| - emit_insn (gen_cmpxf (tmp,
|
| + test = gen_rtx_GE (VOIDmode, tmp,
|
| CONST_DOUBLE_FROM_REAL_VALUE (
|
| REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
|
| - XFmode)));
|
| - emit_jump_insn (gen_bge (label1));
|
| + XFmode));
|
| + emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
|
|
|
| emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
|
| emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
|
| @@ -28595,18 +28202,18 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
|
| emit_insn (gen_rtx_SET (VOIDmode, x0,
|
| gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
|
| UNSPEC_RCP)));
|
| - /* e0 = x0 * b */
|
| + /* e0 = x0 * a */
|
| emit_insn (gen_rtx_SET (VOIDmode, e0,
|
| - gen_rtx_MULT (mode, x0, b)));
|
| - /* e1 = 2. - e0 */
|
| + gen_rtx_MULT (mode, x0, a)));
|
| + /* e1 = x0 * b */
|
| emit_insn (gen_rtx_SET (VOIDmode, e1,
|
| - gen_rtx_MINUS (mode, two, e0)));
|
| - /* x1 = x0 * e1 */
|
| + gen_rtx_MULT (mode, x0, b)));
|
| + /* x1 = 2. - e1 */
|
| emit_insn (gen_rtx_SET (VOIDmode, x1,
|
| - gen_rtx_MULT (mode, x0, e1)));
|
| - /* res = a * x1 */
|
| + gen_rtx_MINUS (mode, two, e1)));
|
| + /* res = e0 * x1 */
|
| emit_insn (gen_rtx_SET (VOIDmode, res,
|
| - gen_rtx_MULT (mode, a, x1)));
|
| + gen_rtx_MULT (mode, e0, x1)));
|
| }
|
|
|
| /* Output code to perform a Newton-Rhapson approximation of a
|
| @@ -29343,277 +28950,1260 @@ ix86_expand_round (rtx operand0, rtx operand1)
|
|
|
| emit_move_insn (operand0, res);
|
| }
|
| -
|
|
|
| -/* Validate whether a SSE5 instruction is valid or not.
|
| - OPERANDS is the array of operands.
|
| - NUM is the number of operands.
|
| - USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
|
| - NUM_MEMORY is the maximum number of memory operands to accept.
|
| - when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
|
|
|
| -bool
|
| -ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
|
| - bool uses_oc0, int num_memory, bool commutative)
|
| +/* Table of valid machine attributes. */
|
| +static const struct attribute_spec ix86_attribute_table[] =
|
| {
|
| - int mem_mask;
|
| - int mem_count;
|
| - int i;
|
| + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
| + /* Stdcall attribute says callee is responsible for popping arguments
|
| + if they are not variable. */
|
| + { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| + /* Fastcall attribute says callee is responsible for popping arguments
|
| + if they are not variable. */
|
| + { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| + /* Cdecl attribute says the callee is a normal C declaration */
|
| + { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| + /* Regparm attribute specifies how many integer arguments are to be
|
| + passed in registers. */
|
| + { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
|
| + /* Sseregparm attribute says we are using x86_64 calling conventions
|
| + for FP arguments. */
|
| + { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| + /* force_align_arg_pointer says this function realigns the stack at entry. */
|
| + { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
|
| + false, true, true, ix86_handle_cconv_attribute },
|
| +#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| + { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
|
| + { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
|
| + { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
|
| +#endif
|
| + { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
|
| + { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
|
| +#ifdef SUBTARGET_ATTRIBUTE_TABLE
|
| + SUBTARGET_ATTRIBUTE_TABLE,
|
| +#endif
|
| + /* ms_abi and sysv_abi calling convention function attributes. */
|
| + { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
|
| + { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
|
| + { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
|
| + /* End element. */
|
| + { NULL, 0, 0, false, false, false, NULL }
|
| +};
|
|
|
| - /* Count the number of memory arguments */
|
| - mem_mask = 0;
|
| - mem_count = 0;
|
| - for (i = 0; i < num; i++)
|
| +/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
| +static int
|
| +ix86_builtin_vectorization_cost (bool runtime_test)
|
| +{
|
| + /* If the branch of the runtime test is taken - i.e. - the vectorized
|
| + version is skipped - this incurs a misprediction cost (because the
|
| + vectorized version is expected to be the fall-through). So we subtract
|
| + the latency of a mispredicted branch from the costs that are incured
|
| + when the vectorized version is executed.
|
| +
|
| + TODO: The values in individual target tables have to be tuned or new
|
| + fields may be needed. For eg. on K8, the default branch path is the
|
| + not-taken path. If the taken path is predicted correctly, the minimum
|
| + penalty of going down the taken-path is 1 cycle. If the taken-path is
|
| + not predicted correctly, then the minimum penalty is 10 cycles. */
|
| +
|
| + if (runtime_test)
|
| {
|
| - enum machine_mode mode = GET_MODE (operands[i]);
|
| - if (register_operand (operands[i], mode))
|
| - ;
|
| + return (-(ix86_cost->cond_taken_branch_cost));
|
| + }
|
| + else
|
| + return 0;
|
| +}
|
| +
|
| +/* Implement targetm.vectorize.builtin_vec_perm. */
|
| +
|
| +static tree
|
| +ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
|
| +{
|
| + tree itype = TREE_TYPE (vec_type);
|
| + bool u = TYPE_UNSIGNED (itype);
|
| + enum machine_mode vmode = TYPE_MODE (vec_type);
|
| + enum ix86_builtins fcode = fcode; /* Silence bogus warning. */
|
| + bool ok = TARGET_SSE2;
|
| +
|
| + switch (vmode)
|
| + {
|
| + case V4DFmode:
|
| + ok = TARGET_AVX;
|
| + fcode = IX86_BUILTIN_VEC_PERM_V4DF;
|
| + goto get_di;
|
| + case V2DFmode:
|
| + fcode = IX86_BUILTIN_VEC_PERM_V2DF;
|
| + get_di:
|
| + itype = ix86_get_builtin_type (IX86_BT_DI);
|
| + break;
|
| +
|
| + case V8SFmode:
|
| + ok = TARGET_AVX;
|
| + fcode = IX86_BUILTIN_VEC_PERM_V8SF;
|
| + goto get_si;
|
| + case V4SFmode:
|
| + ok = TARGET_SSE;
|
| + fcode = IX86_BUILTIN_VEC_PERM_V4SF;
|
| + get_si:
|
| + itype = ix86_get_builtin_type (IX86_BT_SI);
|
| + break;
|
| +
|
| + case V2DImode:
|
| + fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
|
| + break;
|
| + case V4SImode:
|
| + fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
|
| + break;
|
| + case V8HImode:
|
| + fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
|
| + break;
|
| + case V16QImode:
|
| + fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
|
| + break;
|
| + default:
|
| + ok = false;
|
| + break;
|
| + }
|
| +
|
| + if (!ok)
|
| + return NULL_TREE;
|
| +
|
| + *mask_type = itype;
|
| + return ix86_builtins[(int) fcode];
|
| +}
|
| +
|
| +/* Return a vector mode with twice as many elements as VMODE. */
|
| +/* ??? Consider moving this to a table generated by genmodes.c. */
|
| +
|
| +static enum machine_mode
|
| +doublesize_vector_mode (enum machine_mode vmode)
|
| +{
|
| + switch (vmode)
|
| + {
|
| + case V2SFmode: return V4SFmode;
|
| + case V1DImode: return V2DImode;
|
| + case V2SImode: return V4SImode;
|
| + case V4HImode: return V8HImode;
|
| + case V8QImode: return V16QImode;
|
| +
|
| + case V2DFmode: return V4DFmode;
|
| + case V4SFmode: return V8SFmode;
|
| + case V2DImode: return V4DImode;
|
| + case V4SImode: return V8SImode;
|
| + case V8HImode: return V16HImode;
|
| + case V16QImode: return V32QImode;
|
| +
|
| + case V4DFmode: return V8DFmode;
|
| + case V8SFmode: return V16SFmode;
|
| + case V4DImode: return V8DImode;
|
| + case V8SImode: return V16SImode;
|
| + case V16HImode: return V32HImode;
|
| + case V32QImode: return V64QImode;
|
| +
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| +}
|
| +
|
| +/* Construct (set target (vec_select op0 (parallel perm))) and
|
| + return true if that's a valid instruction in the active ISA. */
|
| +
|
| +static bool
|
| +expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
|
| +{
|
| + rtx rperm[MAX_VECT_LEN], x;
|
| + unsigned i;
|
| +
|
| + for (i = 0; i < nelt; ++i)
|
| + rperm[i] = GEN_INT (perm[i]);
|
| +
|
| + x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
|
| + x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
|
| + x = gen_rtx_SET (VOIDmode, target, x);
|
| +
|
| + x = emit_insn (x);
|
| + if (recog_memoized (x) < 0)
|
| + {
|
| + remove_insn (x);
|
| + return false;
|
| + }
|
| + return true;
|
| +}
|
| +
|
| +/* Similar, but generate a vec_concat from op0 and op1 as well. */
|
| +
|
| +static bool
|
| +expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
| + const unsigned char *perm, unsigned nelt)
|
| +{
|
| + enum machine_mode v2mode;
|
| + rtx x;
|
| +
|
| + v2mode = doublesize_vector_mode (GET_MODE (op0));
|
| + x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
|
| + return expand_vselect (target, x, perm, nelt);
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
| + in terms of blendp[sd] / pblendw / pblendvb. */
|
| +
|
| +static bool
|
| +expand_vec_perm_blend (struct expand_vec_perm_d *d)
|
| +{
|
| + enum machine_mode vmode = d->vmode;
|
| + unsigned i, mask, nelt = d->nelt;
|
| + rtx target, op0, op1, x;
|
|
|
| - else if (memory_operand (operands[i], mode))
|
| + if (!TARGET_SSE4_1 || d->op0 == d->op1)
|
| + return false;
|
| + if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
|
| + return false;
|
| +
|
| + /* This is a blend, not a permute. Elements must stay in their
|
| + respective lanes. */
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned e = d->perm[i];
|
| + if (!(e == i || e == i + nelt))
|
| + return false;
|
| + }
|
| +
|
| + if (d->testing_p)
|
| + return true;
|
| +
|
| + /* ??? Without SSE4.1, we could implement this with and/andn/or. This
|
| + decision should be extracted elsewhere, so that we only try that
|
| + sequence once all budget==3 options have been tried. */
|
| +
|
| + /* For bytes, see if bytes move in pairs so we can use pblendw with
|
| + an immediate argument, rather than pblendvb with a vector argument. */
|
| + if (vmode == V16QImode)
|
| + {
|
| + bool pblendw_ok = true;
|
| + for (i = 0; i < 16 && pblendw_ok; i += 2)
|
| + pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
|
| +
|
| + if (!pblendw_ok)
|
| {
|
| - mem_mask |= (1 << i);
|
| - mem_count++;
|
| + rtx rperm[16], vperm;
|
| +
|
| + for (i = 0; i < nelt; ++i)
|
| + rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
|
| +
|
| + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
|
| + vperm = force_reg (V16QImode, vperm);
|
| +
|
| + emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
|
| + return true;
|
| }
|
| + }
|
|
|
| - else
|
| + target = d->target;
|
| + op0 = d->op0;
|
| + op1 = d->op1;
|
| + mask = 0;
|
| +
|
| + switch (vmode)
|
| + {
|
| + case V4DFmode:
|
| + case V8SFmode:
|
| + case V2DFmode:
|
| + case V4SFmode:
|
| + case V8HImode:
|
| + for (i = 0; i < nelt; ++i)
|
| + mask |= (d->perm[i] >= nelt) << i;
|
| + break;
|
| +
|
| + case V2DImode:
|
| + for (i = 0; i < 2; ++i)
|
| + mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
|
| + goto do_subreg;
|
| +
|
| + case V4SImode:
|
| + for (i = 0; i < 4; ++i)
|
| + mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
|
| + goto do_subreg;
|
| +
|
| + case V16QImode:
|
| + for (i = 0; i < 8; ++i)
|
| + mask |= (d->perm[i * 2] >= 16) << i;
|
| +
|
| + do_subreg:
|
| + vmode = V8HImode;
|
| + target = gen_lowpart (vmode, target);
|
| + op0 = gen_lowpart (vmode, op0);
|
| + op1 = gen_lowpart (vmode, op1);
|
| + break;
|
| +
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| +
|
| + /* This matches five different patterns with the different modes. */
|
| + x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
|
| + x = gen_rtx_SET (VOIDmode, target, x);
|
| + emit_insn (x);
|
| +
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
| + in terms of the variable form of vpermilps.
|
| +
|
| + Note that we will have already failed the immediate input vpermilps,
|
| + which requires that the high and low part shuffle be identical; the
|
| + variable form doesn't require that. */
|
| +
|
| +static bool
|
| +expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
|
| +{
|
| + rtx rperm[8], vperm;
|
| + unsigned i;
|
| +
|
| + if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
|
| + return false;
|
| +
|
| + /* We can only permute within the 128-bit lane. */
|
| + for (i = 0; i < 8; ++i)
|
| + {
|
| + unsigned e = d->perm[i];
|
| + if (i < 4 ? e >= 4 : e < 4)
|
| + return false;
|
| + }
|
| +
|
| + if (d->testing_p)
|
| + return true;
|
| +
|
| + for (i = 0; i < 8; ++i)
|
| + {
|
| + unsigned e = d->perm[i];
|
| +
|
| + /* Within each 128-bit lane, the elements of op0 are numbered
|
| + from 0 and the elements of op1 are numbered from 4. */
|
| + if (e >= 8 + 4)
|
| + e -= 8;
|
| + else if (e >= 4)
|
| + e -= 4;
|
| +
|
| + rperm[i] = GEN_INT (e);
|
| + }
|
| +
|
| + vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
|
| + vperm = force_reg (V8SImode, vperm);
|
| + emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
|
| +
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
| + in terms of pshufb or vpperm. */
|
| +
|
| +static bool
|
| +expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned i, nelt, eltsz;
|
| + rtx rperm[16], vperm, target, op0, op1;
|
| +
|
| + if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
|
| + return false;
|
| + if (GET_MODE_SIZE (d->vmode) != 16)
|
| + return false;
|
| +
|
| + if (d->testing_p)
|
| + return true;
|
| +
|
| + nelt = d->nelt;
|
| + eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
| +
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned j, e = d->perm[i];
|
| + for (j = 0; j < eltsz; ++j)
|
| + rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
|
| + }
|
| +
|
| + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
|
| + vperm = force_reg (V16QImode, vperm);
|
| +
|
| + target = gen_lowpart (V16QImode, d->target);
|
| + op0 = gen_lowpart (V16QImode, d->op0);
|
| + if (d->op0 == d->op1)
|
| + emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
|
| + else
|
| + {
|
| + op1 = gen_lowpart (V16QImode, d->op1);
|
| + emit_insn (gen_xop_pperm (target, op0, op1, vperm));
|
| + }
|
| +
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
|
| + in a single instruction. */
|
| +
|
| +static bool
|
| +expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned i, nelt = d->nelt;
|
| + unsigned char perm2[MAX_VECT_LEN];
|
| +
|
| + /* Check plain VEC_SELECT first, because AVX has instructions that could
|
| + match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
|
| + input where SEL+CONCAT may not. */
|
| + if (d->op0 == d->op1)
|
| + {
|
| + int mask = nelt - 1;
|
| +
|
| + for (i = 0; i < nelt; i++)
|
| + perm2[i] = d->perm[i] & mask;
|
| +
|
| + if (expand_vselect (d->target, d->op0, perm2, nelt))
|
| + return true;
|
| +
|
| + /* There are plenty of patterns in sse.md that are written for
|
| + SEL+CONCAT and are not replicated for a single op. Perhaps
|
| + that should be changed, to avoid the nastiness here. */
|
| +
|
| + /* Recognize interleave style patterns, which means incrementing
|
| + every other permutation operand. */
|
| + for (i = 0; i < nelt; i += 2)
|
| {
|
| - rtx pattern = PATTERN (insn);
|
| + perm2[i] = d->perm[i] & mask;
|
| + perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
|
| + }
|
| + if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
|
| + return true;
|
|
|
| - /* allow 0 for pcmov */
|
| - if (GET_CODE (pattern) != SET
|
| - || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
|
| - || i < 2
|
| - || operands[i] != CONST0_RTX (mode))
|
| - return false;
|
| + /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
|
| + if (nelt >= 4)
|
| + {
|
| + for (i = 0; i < nelt; i += 4)
|
| + {
|
| + perm2[i + 0] = d->perm[i + 0] & mask;
|
| + perm2[i + 1] = d->perm[i + 1] & mask;
|
| + perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
|
| + perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
|
| + }
|
| +
|
| + if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
|
| + return true;
|
| }
|
| }
|
|
|
| - /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
|
| - a memory operation. */
|
| - if (num_memory < 0)
|
| + /* Finally, try the fully general two operand permute. */
|
| + if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
|
| + return true;
|
| +
|
| + /* Recognize interleave style patterns with reversed operands. */
|
| + if (d->op0 != d->op1)
|
| {
|
| - num_memory = -num_memory;
|
| - if ((mem_mask & (1 << (num-1))) != 0)
|
| + for (i = 0; i < nelt; ++i)
|
| {
|
| - mem_mask &= ~(1 << (num-1));
|
| - mem_count--;
|
| + unsigned e = d->perm[i];
|
| + if (e >= nelt)
|
| + e -= nelt;
|
| + else
|
| + e += nelt;
|
| + perm2[i] = e;
|
| }
|
| +
|
| + if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
|
| + return true;
|
| }
|
|
|
| - /* If there were no memory operations, allow the insn */
|
| - if (mem_mask == 0)
|
| + /* Try the SSE4.1 blend variable merge instructions. */
|
| + if (expand_vec_perm_blend (d))
|
| + return true;
|
| +
|
| + /* Try one of the AVX vpermil variable permutations. */
|
| + if (expand_vec_perm_vpermil (d))
|
| return true;
|
|
|
| - /* Do not allow the destination register to be a memory operand. */
|
| - else if (mem_mask & (1 << 0))
|
| + /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
|
| + if (expand_vec_perm_pshufb (d))
|
| + return true;
|
| +
|
| + return false;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
| + in terms of a pair of pshuflw + pshufhw instructions. */
|
| +
|
| +static bool
|
| +expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned char perm2[MAX_VECT_LEN];
|
| + unsigned i;
|
| + bool ok;
|
| +
|
| + if (d->vmode != V8HImode || d->op0 != d->op1)
|
| return false;
|
|
|
| - /* If there are too many memory operations, disallow the instruction. While
|
| - the hardware only allows 1 memory reference, before register allocation
|
| - for some insns, we allow two memory operations sometimes in order to allow
|
| - code like the following to be optimized:
|
| + /* The two permutations only operate in 64-bit lanes. */
|
| + for (i = 0; i < 4; ++i)
|
| + if (d->perm[i] >= 4)
|
| + return false;
|
| + for (i = 4; i < 8; ++i)
|
| + if (d->perm[i] < 4)
|
| + return false;
|
|
|
| - float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
|
| + if (d->testing_p)
|
| + return true;
|
|
|
| - or similar cases that are vectorized into using the fmaddss
|
| - instruction. */
|
| - else if (mem_count > num_memory)
|
| + /* Emit the pshuflw. */
|
| + memcpy (perm2, d->perm, 4);
|
| + for (i = 4; i < 8; ++i)
|
| + perm2[i] = i;
|
| + ok = expand_vselect (d->target, d->op0, perm2, 8);
|
| + gcc_assert (ok);
|
| +
|
| + /* Emit the pshufhw. */
|
| + memcpy (perm2 + 4, d->perm + 4, 4);
|
| + for (i = 0; i < 4; ++i)
|
| + perm2[i] = i;
|
| + ok = expand_vselect (d->target, d->target, perm2, 8);
|
| + gcc_assert (ok);
|
| +
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
| + the permutation using the SSSE3 palignr instruction. This succeeds
|
| + when all of the elements in PERM fit within one vector and we merely
|
| + need to shift them down so that a single vector permutation has a
|
| + chance to succeed. */
|
| +
|
| +static bool
|
| +expand_vec_perm_palignr (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned i, nelt = d->nelt;
|
| + unsigned min, max;
|
| + bool in_order, ok;
|
| + rtx shift;
|
| +
|
| + /* Even with AVX, palignr only operates on 128-bit vectors. */
|
| + if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
| return false;
|
|
|
| - /* Don't allow more than one memory operation if not optimizing. */
|
| - else if (mem_count > 1 && !optimize)
|
| + min = nelt, max = 0;
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned e = d->perm[i];
|
| + if (e < min)
|
| + min = e;
|
| + if (e > max)
|
| + max = e;
|
| + }
|
| + if (min == 0 || max - min >= nelt)
|
| return false;
|
|
|
| - else if (num == 4 && mem_count == 1)
|
| - {
|
| - /* formats (destination is the first argument), example fmaddss:
|
| - xmm1, xmm1, xmm2, xmm3/mem
|
| - xmm1, xmm1, xmm2/mem, xmm3
|
| - xmm1, xmm2, xmm3/mem, xmm1
|
| - xmm1, xmm2/mem, xmm3, xmm1 */
|
| - if (uses_oc0)
|
| - return ((mem_mask == (1 << 1))
|
| - || (mem_mask == (1 << 2))
|
| - || (mem_mask == (1 << 3)));
|
| -
|
| - /* format, example pmacsdd:
|
| - xmm1, xmm2, xmm3/mem, xmm1 */
|
| - if (commutative)
|
| - return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
|
| - else
|
| - return (mem_mask == (1 << 2));
|
| + /* Given that we have SSSE3, we know we'll be able to implement the
|
| + single operand permutation after the palignr with pshufb. */
|
| + if (d->testing_p)
|
| + return true;
|
| +
|
| + shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
|
| + emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
|
| + gen_lowpart (TImode, d->op1),
|
| + gen_lowpart (TImode, d->op0), shift));
|
| +
|
| + d->op0 = d->op1 = d->target;
|
| +
|
| + in_order = true;
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned e = d->perm[i] - min;
|
| + if (e != i)
|
| + in_order = false;
|
| + d->perm[i] = e;
|
| }
|
|
|
| - else if (num == 4 && num_memory == 2)
|
| + /* Test for the degenerate case where the alignment by itself
|
| + produces the desired permutation. */
|
| + if (in_order)
|
| + return true;
|
| +
|
| + ok = expand_vec_perm_1 (d);
|
| + gcc_assert (ok);
|
| +
|
| + return ok;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
|
| + a two vector permutation into a single vector permutation by using
|
| + an interleave operation to merge the vectors. */
|
| +
|
| +static bool
|
| +expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
|
| +{
|
| + struct expand_vec_perm_d dremap, dfinal;
|
| + unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
|
| + unsigned contents, h1, h2, h3, h4;
|
| + unsigned char remap[2 * MAX_VECT_LEN];
|
| + rtx seq;
|
| + bool ok;
|
| +
|
| + if (d->op0 == d->op1)
|
| + return false;
|
| +
|
| + /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
|
| + lanes. We can use similar techniques with the vperm2f128 instruction,
|
| + but it requires slightly different logic. */
|
| + if (GET_MODE_SIZE (d->vmode) != 16)
|
| + return false;
|
| +
|
| + /* Examine from whence the elements come. */
|
| + contents = 0;
|
| + for (i = 0; i < nelt; ++i)
|
| + contents |= 1u << d->perm[i];
|
| +
|
| + /* Split the two input vectors into 4 halves. */
|
| + h1 = (1u << nelt2) - 1;
|
| + h2 = h1 << nelt2;
|
| + h3 = h2 << nelt2;
|
| + h4 = h3 << nelt2;
|
| +
|
| + memset (remap, 0xff, sizeof (remap));
|
| + dremap = *d;
|
| +
|
| + /* If the elements from the low halves use interleave low, and similarly
|
| + for interleave high. If the elements are from mis-matched halves, we
|
| + can use shufps for V4SF/V4SI or do a DImode shuffle. */
|
| + if ((contents & (h1 | h3)) == contents)
|
| {
|
| - /* If there are two memory operations, we can load one of the memory ops
|
| - into the destination register. This is for optimizing the
|
| - multiply/add ops, which the combiner has optimized both the multiply
|
| - and the add insns to have a memory operation. We have to be careful
|
| - that the destination doesn't overlap with the inputs. */
|
| - rtx op0 = operands[0];
|
| + for (i = 0; i < nelt2; ++i)
|
| + {
|
| + remap[i] = i * 2;
|
| + remap[i + nelt] = i * 2 + 1;
|
| + dremap.perm[i * 2] = i;
|
| + dremap.perm[i * 2 + 1] = i + nelt;
|
| + }
|
| + }
|
| + else if ((contents & (h2 | h4)) == contents)
|
| + {
|
| + for (i = 0; i < nelt2; ++i)
|
| + {
|
| + remap[i + nelt2] = i * 2;
|
| + remap[i + nelt + nelt2] = i * 2 + 1;
|
| + dremap.perm[i * 2] = i + nelt2;
|
| + dremap.perm[i * 2 + 1] = i + nelt + nelt2;
|
| + }
|
| + }
|
| + else if ((contents & (h1 | h4)) == contents)
|
| + {
|
| + for (i = 0; i < nelt2; ++i)
|
| + {
|
| + remap[i] = i;
|
| + remap[i + nelt + nelt2] = i + nelt2;
|
| + dremap.perm[i] = i;
|
| + dremap.perm[i + nelt2] = i + nelt + nelt2;
|
| + }
|
| + if (nelt != 4)
|
| + {
|
| + dremap.vmode = V2DImode;
|
| + dremap.nelt = 2;
|
| + dremap.perm[0] = 0;
|
| + dremap.perm[1] = 3;
|
| + }
|
| + }
|
| + else if ((contents & (h2 | h3)) == contents)
|
| + {
|
| + for (i = 0; i < nelt2; ++i)
|
| + {
|
| + remap[i + nelt2] = i;
|
| + remap[i + nelt] = i + nelt2;
|
| + dremap.perm[i] = i + nelt2;
|
| + dremap.perm[i + nelt2] = i + nelt;
|
| + }
|
| + if (nelt != 4)
|
| + {
|
| + dremap.vmode = V2DImode;
|
| + dremap.nelt = 2;
|
| + dremap.perm[0] = 1;
|
| + dremap.perm[1] = 2;
|
| + }
|
| + }
|
| + else
|
| + return false;
|
|
|
| - if (reg_mentioned_p (op0, operands[1])
|
| - || reg_mentioned_p (op0, operands[2])
|
| - || reg_mentioned_p (op0, operands[3]))
|
| - return false;
|
| + /* Use the remapping array set up above to move the elements from their
|
| + swizzled locations into their final destinations. */
|
| + dfinal = *d;
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned e = remap[d->perm[i]];
|
| + gcc_assert (e < nelt);
|
| + dfinal.perm[i] = e;
|
| + }
|
| + dfinal.op0 = gen_reg_rtx (dfinal.vmode);
|
| + dfinal.op1 = dfinal.op0;
|
| + dremap.target = dfinal.op0;
|
|
|
| - /* formats (destination is the first argument), example fmaddss:
|
| - xmm1, xmm1, xmm2, xmm3/mem
|
| - xmm1, xmm1, xmm2/mem, xmm3
|
| - xmm1, xmm2, xmm3/mem, xmm1
|
| - xmm1, xmm2/mem, xmm3, xmm1
|
| + /* Test if the final remap can be done with a single insn. For V4SFmode or
|
| + V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
|
| + start_sequence ();
|
| + ok = expand_vec_perm_1 (&dfinal);
|
| + seq = get_insns ();
|
| + end_sequence ();
|
|
|
| - For the oc0 case, we will load either operands[1] or operands[3] into
|
| - operands[0], so any combination of 2 memory operands is ok. */
|
| - if (uses_oc0)
|
| - return true;
|
| + if (!ok)
|
| + return false;
|
|
|
| - /* format, example pmacsdd:
|
| - xmm1, xmm2, xmm3/mem, xmm1
|
| + if (dremap.vmode != dfinal.vmode)
|
| + {
|
| + dremap.target = gen_lowpart (dremap.vmode, dremap.target);
|
| + dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
|
| + dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
|
| + }
|
|
|
| - For the integer multiply/add instructions be more restrictive and
|
| - require operands[2] and operands[3] to be the memory operands. */
|
| - if (commutative)
|
| - return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
|
| - else
|
| - return (mem_mask == ((1 << 2) | (1 << 3)));
|
| + ok = expand_vec_perm_1 (&dremap);
|
| + gcc_assert (ok);
|
| +
|
| + emit_insn (seq);
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
|
| + permutation with two pshufb insns and an ior. We should have already
|
| + failed all two instruction sequences. */
|
| +
|
| +static bool
|
| +expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
|
| +{
|
| + rtx rperm[2][16], vperm, l, h, op, m128;
|
| + unsigned int i, nelt, eltsz;
|
| +
|
| + if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
|
| + return false;
|
| + gcc_assert (d->op0 != d->op1);
|
| +
|
| + nelt = d->nelt;
|
| + eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
|
| +
|
| + /* Generate two permutation masks. If the required element is within
|
| + the given vector it is shuffled into the proper lane. If the required
|
| + element is in the other vector, force a zero into the lane by setting
|
| + bit 7 in the permutation mask. */
|
| + m128 = GEN_INT (-128);
|
| + for (i = 0; i < nelt; ++i)
|
| + {
|
| + unsigned j, e = d->perm[i];
|
| + unsigned which = (e >= nelt);
|
| + if (e >= nelt)
|
| + e -= nelt;
|
| +
|
| + for (j = 0; j < eltsz; ++j)
|
| + {
|
| + rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
|
| + rperm[1-which][i*eltsz + j] = m128;
|
| + }
|
| }
|
|
|
| - else if (num == 3 && num_memory == 1)
|
| + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
|
| + vperm = force_reg (V16QImode, vperm);
|
| +
|
| + l = gen_reg_rtx (V16QImode);
|
| + op = gen_lowpart (V16QImode, d->op0);
|
| + emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
|
| +
|
| + vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
|
| + vperm = force_reg (V16QImode, vperm);
|
| +
|
| + h = gen_reg_rtx (V16QImode);
|
| + op = gen_lowpart (V16QImode, d->op1);
|
| + emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
|
| +
|
| + op = gen_lowpart (V16QImode, d->target);
|
| + emit_insn (gen_iorv16qi3 (op, l, h));
|
| +
|
| + return true;
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
|
| + and extract-odd permutations. */
|
| +
|
| +static bool
|
| +expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
|
| +{
|
| + rtx t1, t2, t3, t4;
|
| +
|
| + switch (d->vmode)
|
| {
|
| - /* formats, example protb:
|
| - xmm1, xmm2, xmm3/mem
|
| - xmm1, xmm2/mem, xmm3 */
|
| - if (uses_oc0)
|
| - return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
|
| + case V4DFmode:
|
| + t1 = gen_reg_rtx (V4DFmode);
|
| + t2 = gen_reg_rtx (V4DFmode);
|
| +
|
| + /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
|
| + emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
|
| + emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
|
| +
|
| + /* Now an unpck[lh]pd will produce the result required. */
|
| + if (odd)
|
| + t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
|
| + else
|
| + t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
|
| + emit_insn (t3);
|
| + break;
|
| +
|
| + case V8SFmode:
|
| + {
|
| + static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
|
| + static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
|
| + static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
|
| +
|
| + t1 = gen_reg_rtx (V8SFmode);
|
| + t2 = gen_reg_rtx (V8SFmode);
|
| + t3 = gen_reg_rtx (V8SFmode);
|
| + t4 = gen_reg_rtx (V8SFmode);
|
| +
|
| + /* Shuffle within the 128-bit lanes to produce:
|
| + { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
|
| + expand_vselect (t1, d->op0, perm1, 8);
|
| + expand_vselect (t2, d->op1, perm1, 8);
|
| +
|
| + /* Shuffle the lanes around to produce:
|
| + { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
|
| + emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
|
| + emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
|
| +
|
| + /* Now a vpermil2p will produce the result required. */
|
| + /* ??? The vpermil2p requires a vector constant. Another option
|
| + is a unpck[lh]ps to merge the two vectors to produce
|
| + { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
|
| + vpermilps to get the elements into the final order. */
|
| + d->op0 = t3;
|
| + d->op1 = t4;
|
| + memcpy (d->perm, odd ? permo: perme, 8);
|
| + expand_vec_perm_vpermil (d);
|
| + }
|
| + break;
|
| +
|
| + case V2DFmode:
|
| + case V4SFmode:
|
| + case V2DImode:
|
| + case V4SImode:
|
| + /* These are always directly implementable by expand_vec_perm_1. */
|
| + gcc_unreachable ();
|
| +
|
| + case V8HImode:
|
| + if (TARGET_SSSE3)
|
| + return expand_vec_perm_pshufb2 (d);
|
| + else
|
| + {
|
| + /* We need 2*log2(N)-1 operations to achieve odd/even
|
| + with interleave. */
|
| + t1 = gen_reg_rtx (V8HImode);
|
| + t2 = gen_reg_rtx (V8HImode);
|
| + emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
|
| + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
|
| + emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
|
| + emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
|
| + if (odd)
|
| + t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
|
| + else
|
| + t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
|
| + emit_insn (t3);
|
| + }
|
| + break;
|
|
|
| - /* format, example comeq:
|
| - xmm1, xmm2, xmm3/mem */
|
| + case V16QImode:
|
| + if (TARGET_SSSE3)
|
| + return expand_vec_perm_pshufb2 (d);
|
| else
|
| - return (mem_mask == (1 << 2));
|
| + {
|
| + t1 = gen_reg_rtx (V16QImode);
|
| + t2 = gen_reg_rtx (V16QImode);
|
| + t3 = gen_reg_rtx (V16QImode);
|
| + emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
|
| + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
|
| + emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
|
| + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
|
| + emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
|
| + emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
|
| + if (odd)
|
| + t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
|
| + else
|
| + t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
|
| + emit_insn (t3);
|
| + }
|
| + break;
|
| +
|
| + default:
|
| + gcc_unreachable ();
|
| }
|
|
|
| - else
|
| - gcc_unreachable ();
|
| + return true;
|
| +}
|
|
|
| - return false;
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
|
| + extract-even and extract-odd permutations. */
|
| +
|
| +static bool
|
| +expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned i, odd, nelt = d->nelt;
|
| +
|
| + odd = d->perm[0];
|
| + if (odd != 0 && odd != 1)
|
| + return false;
|
| +
|
| + for (i = 1; i < nelt; ++i)
|
| + if (d->perm[i] != 2 * i + odd)
|
| + return false;
|
| +
|
| + return expand_vec_perm_even_odd_1 (d, odd);
|
| }
|
|
|
| -
|
| -/* Fixup an SSE5 instruction that has 2 memory input references into a form the
|
| - hardware will allow by using the destination register to load one of the
|
| - memory operations. Presently this is used by the multiply/add routines to
|
| - allow 2 memory references. */
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
|
| + permutations. We assume that expand_vec_perm_1 has already failed. */
|
|
|
| -void
|
| -ix86_expand_sse5_multiple_memory (rtx operands[],
|
| - int num,
|
| - enum machine_mode mode)
|
| -{
|
| - rtx op0 = operands[0];
|
| - if (num != 4
|
| - || memory_operand (op0, mode)
|
| - || reg_mentioned_p (op0, operands[1])
|
| - || reg_mentioned_p (op0, operands[2])
|
| - || reg_mentioned_p (op0, operands[3]))
|
| - gcc_unreachable ();
|
| +static bool
|
| +expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
|
| + enum machine_mode vmode = d->vmode;
|
| + unsigned char perm2[4];
|
| + rtx op0 = d->op0;
|
| + bool ok;
|
|
|
| - /* For 2 memory operands, pick either operands[1] or operands[3] to move into
|
| - the destination register. */
|
| - if (memory_operand (operands[1], mode))
|
| + switch (vmode)
|
| {
|
| - emit_move_insn (op0, operands[1]);
|
| - operands[1] = op0;
|
| + case V4DFmode:
|
| + case V8SFmode:
|
| + /* These are special-cased in sse.md so that we can optionally
|
| + use the vbroadcast instruction. They expand to two insns
|
| + if the input happens to be in a register. */
|
| + gcc_unreachable ();
|
| +
|
| + case V2DFmode:
|
| + case V2DImode:
|
| + case V4SFmode:
|
| + case V4SImode:
|
| + /* These are always implementable using standard shuffle patterns. */
|
| + gcc_unreachable ();
|
| +
|
| + case V8HImode:
|
| + case V16QImode:
|
| + /* These can be implemented via interleave. We save one insn by
|
| + stopping once we have promoted to V4SImode and then use pshufd. */
|
| + do
|
| + {
|
| + optab otab = vec_interleave_low_optab;
|
| +
|
| + if (elt >= nelt2)
|
| + {
|
| + otab = vec_interleave_high_optab;
|
| + elt -= nelt2;
|
| + }
|
| + nelt2 /= 2;
|
| +
|
| + op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
|
| + vmode = get_mode_wider_vector (vmode);
|
| + op0 = gen_lowpart (vmode, op0);
|
| + }
|
| + while (vmode != V4SImode);
|
| +
|
| + memset (perm2, elt, 4);
|
| + ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
|
| + gcc_assert (ok);
|
| + return true;
|
| +
|
| + default:
|
| + gcc_unreachable ();
|
| }
|
| - else if (memory_operand (operands[3], mode))
|
| +}
|
| +
|
| +/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
|
| + broadcast permutations. */
|
| +
|
| +static bool
|
| +expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
|
| +{
|
| + unsigned i, elt, nelt = d->nelt;
|
| +
|
| + if (d->op0 != d->op1)
|
| + return false;
|
| +
|
| + elt = d->perm[0];
|
| + for (i = 1; i < nelt; ++i)
|
| + if (d->perm[i] != elt)
|
| + return false;
|
| +
|
| + return expand_vec_perm_broadcast_1 (d);
|
| +}
|
| +
|
| +/* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
|
| + With all of the interface bits taken care of, perform the expansion
|
| + in D and return true on success. */
|
| +
|
| +static bool
|
| +ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
|
| +{
|
| + /* Try a single instruction expansion. */
|
| + if (expand_vec_perm_1 (d))
|
| + return true;
|
| +
|
| + /* Try sequences of two instructions. */
|
| +
|
| + if (expand_vec_perm_pshuflw_pshufhw (d))
|
| + return true;
|
| +
|
| + if (expand_vec_perm_palignr (d))
|
| + return true;
|
| +
|
| + if (expand_vec_perm_interleave2 (d))
|
| + return true;
|
| +
|
| + if (expand_vec_perm_broadcast (d))
|
| + return true;
|
| +
|
| + /* Try sequences of three instructions. */
|
| +
|
| + if (expand_vec_perm_pshufb2 (d))
|
| + return true;
|
| +
|
| + /* ??? Look for narrow permutations whose element orderings would
|
| + allow the promotion to a wider mode. */
|
| +
|
| + /* ??? Look for sequences of interleave or a wider permute that place
|
| + the data into the correct lanes for a half-vector shuffle like
|
| + pshuf[lh]w or vpermilps. */
|
| +
|
| + /* ??? Look for sequences of interleave that produce the desired results.
|
| + The combinatorics of punpck[lh] get pretty ugly... */
|
| +
|
| + if (expand_vec_perm_even_odd (d))
|
| + return true;
|
| +
|
| + return false;
|
| +}
|
| +
|
| +/* Extract the values from the vector CST into the permutation array in D.
|
| + Return 0 on error, 1 if all values from the permutation come from the
|
| + first vector, 2 if all values from the second vector, and 3 otherwise. */
|
| +
|
| +static int
|
| +extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
|
| +{
|
| + tree list = TREE_VECTOR_CST_ELTS (cst);
|
| + unsigned i, nelt = d->nelt;
|
| + int ret = 0;
|
| +
|
| + for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
|
| {
|
| - emit_move_insn (op0, operands[3]);
|
| - operands[3] = op0;
|
| + unsigned HOST_WIDE_INT e;
|
| +
|
| + if (!host_integerp (TREE_VALUE (list), 1))
|
| + return 0;
|
| + e = tree_low_cst (TREE_VALUE (list), 1);
|
| + if (e >= 2 * nelt)
|
| + return 0;
|
| +
|
| + ret |= (e < nelt ? 1 : 2);
|
| + d->perm[i] = e;
|
| }
|
| - else
|
| - gcc_unreachable ();
|
| + gcc_assert (list == NULL);
|
|
|
| - return;
|
| + /* For all elements from second vector, fold the elements to first. */
|
| + if (ret == 2)
|
| + for (i = 0; i < nelt; ++i)
|
| + d->perm[i] -= nelt;
|
| +
|
| + return ret;
|
| }
|
|
|
| -
|
| -/* Table of valid machine attributes. */
|
| -static const struct attribute_spec ix86_attribute_table[] =
|
| +static rtx
|
| +ix86_expand_vec_perm_builtin (tree exp)
|
| {
|
| - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
| - /* Stdcall attribute says callee is responsible for popping arguments
|
| - if they are not variable. */
|
| - { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| - /* Fastcall attribute says callee is responsible for popping arguments
|
| - if they are not variable. */
|
| - { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| - /* Cdecl attribute says the callee is a normal C declaration */
|
| - { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| - /* Regparm attribute specifies how many integer arguments are to be
|
| - passed in registers. */
|
| - { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
|
| - /* Sseregparm attribute says we are using x86_64 calling conventions
|
| - for FP arguments. */
|
| - { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
|
| - /* force_align_arg_pointer says this function realigns the stack at entry. */
|
| - { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
|
| - false, true, true, ix86_handle_cconv_attribute },
|
| -#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| - { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
|
| - { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
|
| - { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
|
| -#endif
|
| - { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
|
| - { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
|
| -#ifdef SUBTARGET_ATTRIBUTE_TABLE
|
| - SUBTARGET_ATTRIBUTE_TABLE,
|
| -#endif
|
| - /* ms_abi and sysv_abi calling convention function attributes. */
|
| - { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
|
| - { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
|
| - /* End element. */
|
| - { NULL, 0, 0, false, false, false, NULL }
|
| -};
|
| + struct expand_vec_perm_d d;
|
| + tree arg0, arg1, arg2;
|
|
|
| -/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
| -static int
|
| -x86_builtin_vectorization_cost (bool runtime_test)
|
| + arg0 = CALL_EXPR_ARG (exp, 0);
|
| + arg1 = CALL_EXPR_ARG (exp, 1);
|
| + arg2 = CALL_EXPR_ARG (exp, 2);
|
| +
|
| + d.vmode = TYPE_MODE (TREE_TYPE (arg0));
|
| + d.nelt = GET_MODE_NUNITS (d.vmode);
|
| + d.testing_p = false;
|
| + gcc_assert (VECTOR_MODE_P (d.vmode));
|
| +
|
| + if (TREE_CODE (arg2) != VECTOR_CST)
|
| + {
|
| + error_at (EXPR_LOCATION (exp),
|
| + "vector permutation requires vector constant");
|
| + goto exit_error;
|
| + }
|
| +
|
| + switch (extract_vec_perm_cst (&d, arg2))
|
| + {
|
| + default:
|
| + gcc_unreachable();
|
| +
|
| + case 0:
|
| + error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
|
| + goto exit_error;
|
| +
|
| + case 3:
|
| + if (!operand_equal_p (arg0, arg1, 0))
|
| + {
|
| + d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
|
| + d.op0 = force_reg (d.vmode, d.op0);
|
| + d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
|
| + d.op1 = force_reg (d.vmode, d.op1);
|
| + break;
|
| + }
|
| +
|
| + /* The elements of PERM do not suggest that only the first operand
|
| + is used, but both operands are identical. Allow easier matching
|
| + of the permutation by folding the permutation into the single
|
| + input vector. */
|
| + {
|
| + unsigned i, nelt = d.nelt;
|
| + for (i = 0; i < nelt; ++i)
|
| + if (d.perm[i] >= nelt)
|
| + d.perm[i] -= nelt;
|
| + }
|
| + /* FALLTHRU */
|
| +
|
| + case 1:
|
| + d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
|
| + d.op0 = force_reg (d.vmode, d.op0);
|
| + d.op1 = d.op0;
|
| + break;
|
| +
|
| + case 2:
|
| + d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
|
| + d.op0 = force_reg (d.vmode, d.op0);
|
| + d.op1 = d.op0;
|
| + break;
|
| + }
|
| +
|
| + d.target = gen_reg_rtx (d.vmode);
|
| + if (ix86_expand_vec_perm_builtin_1 (&d))
|
| + return d.target;
|
| +
|
| + /* For compiler generated permutations, we should never got here, because
|
| + the compiler should also be checking the ok hook. But since this is a
|
| + builtin the user has access too, so don't abort. */
|
| + switch (d.nelt)
|
| + {
|
| + case 2:
|
| + sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
|
| + break;
|
| + case 4:
|
| + sorry ("vector permutation (%d %d %d %d)",
|
| + d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
|
| + break;
|
| + case 8:
|
| + sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
|
| + d.perm[0], d.perm[1], d.perm[2], d.perm[3],
|
| + d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
|
| + break;
|
| + case 16:
|
| + sorry ("vector permutation "
|
| + "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
|
| + d.perm[0], d.perm[1], d.perm[2], d.perm[3],
|
| + d.perm[4], d.perm[5], d.perm[6], d.perm[7],
|
| + d.perm[8], d.perm[9], d.perm[10], d.perm[11],
|
| + d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
|
| + break;
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| + exit_error:
|
| + return CONST0_RTX (d.vmode);
|
| +}
|
| +
|
| +/* Implement targetm.vectorize.builtin_vec_perm_ok. */
|
| +
|
| +static bool
|
| +ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
|
| {
|
| - /* If the branch of the runtime test is taken - i.e. - the vectorized
|
| - version is skipped - this incurs a misprediction cost (because the
|
| - vectorized version is expected to be the fall-through). So we subtract
|
| - the latency of a mispredicted branch from the costs that are incured
|
| - when the vectorized version is executed.
|
| + struct expand_vec_perm_d d;
|
| + int vec_mask;
|
| + bool ret, one_vec;
|
|
|
| - TODO: The values in individual target tables have to be tuned or new
|
| - fields may be needed. For eg. on K8, the default branch path is the
|
| - not-taken path. If the taken path is predicted correctly, the minimum
|
| - penalty of going down the taken-path is 1 cycle. If the taken-path is
|
| - not predicted correctly, then the minimum penalty is 10 cycles. */
|
| + d.vmode = TYPE_MODE (vec_type);
|
| + d.nelt = GET_MODE_NUNITS (d.vmode);
|
| + d.testing_p = true;
|
|
|
| - if (runtime_test)
|
| + /* Given sufficient ISA support we can just return true here
|
| + for selected vector modes. */
|
| + if (GET_MODE_SIZE (d.vmode) == 16)
|
| {
|
| - return (-(ix86_cost->cond_taken_branch_cost));
|
| + /* All implementable with a single vpperm insn. */
|
| + if (TARGET_XOP)
|
| + return true;
|
| + /* All implementable with 2 pshufb + 1 ior. */
|
| + if (TARGET_SSSE3)
|
| + return true;
|
| + /* All implementable with shufpd or unpck[lh]pd. */
|
| + if (d.nelt == 2)
|
| + return true;
|
| }
|
| - else
|
| - return 0;
|
| +
|
| + vec_mask = extract_vec_perm_cst (&d, mask);
|
| +
|
| + /* This hook is cannot be called in response to something that the
|
| + user does (unlike the builtin expander) so we shouldn't ever see
|
| + an error generated from the extract. */
|
| + gcc_assert (vec_mask > 0 && vec_mask <= 3);
|
| + one_vec = (vec_mask != 3);
|
| +
|
| + /* Implementable with shufps or pshufd. */
|
| + if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
|
| + return true;
|
| +
|
| + /* Otherwise we have to go through the motions and see if we can
|
| + figure out how to generate the requested permutation. */
|
| + d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
|
| + d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
|
| + if (!one_vec)
|
| + d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
|
| +
|
| + start_sequence ();
|
| + ret = ix86_expand_vec_perm_builtin_1 (&d);
|
| + end_sequence ();
|
| +
|
| + return ret;
|
| }
|
|
|
| +void
|
| +ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
|
| +{
|
| + struct expand_vec_perm_d d;
|
| + unsigned i, nelt;
|
| +
|
| + d.target = targ;
|
| + d.op0 = op0;
|
| + d.op1 = op1;
|
| + d.vmode = GET_MODE (targ);
|
| + d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
| + d.testing_p = false;
|
| +
|
| + for (i = 0; i < nelt; ++i)
|
| + d.perm[i] = i * 2 + odd;
|
| +
|
| + /* We'll either be able to implement the permutation directly... */
|
| + if (expand_vec_perm_1 (&d))
|
| + return;
|
| +
|
| + /* ... or we use the special-case patterns. */
|
| + expand_vec_perm_even_odd_1 (&d, odd);
|
| +}
|
| +
|
| /* This function returns the calling abi specific va_list type node.
|
| It returns the FNDECL specific va_list type. */
|
|
|
| tree
|
| ix86_fn_abi_va_list (tree fndecl)
|
| {
|
| - int abi;
|
| -
|
| if (!TARGET_64BIT)
|
| return va_list_type_node;
|
| gcc_assert (fndecl != NULL_TREE);
|
| - abi = ix86_function_abi ((const_tree) fndecl);
|
|
|
| - if (abi == MS_ABI)
|
| + if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
|
| return ms_va_list_type_node;
|
| else
|
| return sysv_va_list_type_node;
|
| @@ -29726,6 +30316,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #undef TARGET_RETURN_IN_MEMORY
|
| #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
|
|
|
| +#undef TARGET_LEGITIMIZE_ADDRESS
|
| +#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
|
| +
|
| #undef TARGET_ATTRIBUTE_TABLE
|
| #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
|
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| @@ -29738,6 +30331,8 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
|
|
| #undef TARGET_INIT_BUILTINS
|
| #define TARGET_INIT_BUILTINS ix86_init_builtins
|
| +#undef TARGET_BUILTIN_DECL
|
| +#define TARGET_BUILTIN_DECL ix86_builtin_decl
|
| #undef TARGET_EXPAND_BUILTIN
|
| #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
|
|
|
| @@ -29766,6 +30361,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #undef TARGET_ASM_CLOSE_PAREN
|
| #define TARGET_ASM_CLOSE_PAREN ""
|
|
|
| +#undef TARGET_ASM_BYTE_OP
|
| +#define TARGET_ASM_BYTE_OP ASM_BYTE
|
| +
|
| #undef TARGET_ASM_ALIGNED_HI_OP
|
| #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
|
| #undef TARGET_ASM_ALIGNED_SI_OP
|
| @@ -29829,7 +30427,8 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #define TARGET_DEFAULT_TARGET_FLAGS \
|
| (TARGET_DEFAULT \
|
| | TARGET_SUBTARGET_DEFAULT \
|
| - | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
|
| + | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
|
| + | MASK_FUSED_MADD)
|
|
|
| #undef TARGET_HANDLE_OPTION
|
| #define TARGET_HANDLE_OPTION ix86_handle_option
|
| @@ -29881,10 +30480,12 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
|
| #undef TARGET_GET_DRAP_RTX
|
| #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
|
| -#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
|
| -#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
|
| #undef TARGET_STRICT_ARGUMENT_NAMING
|
| #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
|
| +#undef TARGET_STATIC_CHAIN
|
| +#define TARGET_STATIC_CHAIN ix86_static_chain
|
| +#undef TARGET_TRAMPOLINE_INIT
|
| +#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
|
|
|
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR
|
| #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
|
| @@ -29921,7 +30522,14 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
|
|
|
| #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
| -#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
|
| +#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
| + ix86_builtin_vectorization_cost
|
| +#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
|
| +#define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
|
| + ix86_vectorize_builtin_vec_perm
|
| +#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
|
| +#define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
|
| + ix86_vectorize_builtin_vec_perm_ok
|
|
|
| #undef TARGET_SET_CURRENT_FUNCTION
|
| #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
|
| @@ -29938,12 +30546,27 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
|
| #undef TARGET_OPTION_PRINT
|
| #define TARGET_OPTION_PRINT ix86_function_specific_print
|
|
|
| -#undef TARGET_OPTION_CAN_INLINE_P
|
| -#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
|
| +#undef TARGET_CAN_INLINE_P
|
| +#define TARGET_CAN_INLINE_P ix86_can_inline_p
|
|
|
| #undef TARGET_EXPAND_TO_RTL_HOOK
|
| #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
|
|
|
| +#undef TARGET_LEGITIMATE_ADDRESS_P
|
| +#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
|
| +
|
| +#undef TARGET_IRA_COVER_CLASSES
|
| +#define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
|
| +
|
| +#undef TARGET_FRAME_POINTER_REQUIRED
|
| +#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
|
| +
|
| +#undef TARGET_CAN_ELIMINATE
|
| +#define TARGET_CAN_ELIMINATE ix86_can_eliminate
|
| +
|
| +#undef TARGET_ASM_CODE_END
|
| +#define TARGET_ASM_CODE_END ix86_code_end
|
| +
|
| struct gcc_target targetm = TARGET_INITIALIZER;
|
|
|
| #include "gt-i386.h"
|
|
|