Index: gcc/gcc/config/i386/i386.c |
diff --git a/gcc/gcc/config/i386/i386.c b/gcc/gcc/config/i386/i386.c |
index 947ed5cadd860e67704741bfe532d38c1ad2bf32..070e13aa7a9d1b63c203f651f40a575739644886 100644 |
--- a/gcc/gcc/config/i386/i386.c |
+++ b/gcc/gcc/config/i386/i386.c |
@@ -1,6 +1,6 @@ |
/* Subroutines used for code generation on IA-32. |
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, |
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
Free Software Foundation, Inc. |
This file is part of GCC. |
@@ -35,7 +35,6 @@ along with GCC; see the file COPYING3. If not see |
#include "insn-codes.h" |
#include "insn-attr.h" |
#include "flags.h" |
-#include "c-common.h" |
#include "except.h" |
#include "function.h" |
#include "recog.h" |
@@ -54,8 +53,9 @@ along with GCC; see the file COPYING3. If not see |
#include "tm-constrs.h" |
#include "params.h" |
#include "cselib.h" |
+#include "debug.h" |
+#include "dwarf2out.h" |
-static int x86_builtin_vectorization_cost (bool); |
static rtx legitimize_dllimport_symbol (rtx, bool); |
#ifndef CHECK_STACK_LIMIT |
@@ -1036,6 +1036,79 @@ struct processor_costs core2_cost = { |
1, /* cond_not_taken_branch_cost. */ |
}; |
+static const |
+struct processor_costs atom_cost = { |
+ COSTS_N_INSNS (1), /* cost of an add instruction */ |
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ |
+ COSTS_N_INSNS (1), /* variable shift costs */ |
+ COSTS_N_INSNS (1), /* constant shift costs */ |
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ |
+ COSTS_N_INSNS (4), /* HI */ |
+ COSTS_N_INSNS (3), /* SI */ |
+ COSTS_N_INSNS (4), /* DI */ |
+ COSTS_N_INSNS (2)}, /* other */ |
+ 0, /* cost of multiply per each bit set */ |
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ |
+ COSTS_N_INSNS (26), /* HI */ |
+ COSTS_N_INSNS (42), /* SI */ |
+ COSTS_N_INSNS (74), /* DI */ |
+ COSTS_N_INSNS (74)}, /* other */ |
+ COSTS_N_INSNS (1), /* cost of movsx */ |
+ COSTS_N_INSNS (1), /* cost of movzx */ |
+ 8, /* "large" insn */ |
+ 17, /* MOVE_RATIO */ |
+ 2, /* cost for loading QImode using movzbl */ |
+ {4, 4, 4}, /* cost of loading integer registers |
+ in QImode, HImode and SImode. |
+ Relative to reg-reg move (2). */ |
+ {4, 4, 4}, /* cost of storing integer registers */ |
+ 4, /* cost of reg,reg fld/fst */ |
+ {12, 12, 12}, /* cost of loading fp registers |
+ in SFmode, DFmode and XFmode */ |
+ {6, 6, 8}, /* cost of storing fp registers |
+ in SFmode, DFmode and XFmode */ |
+ 2, /* cost of moving MMX register */ |
+ {8, 8}, /* cost of loading MMX registers |
+ in SImode and DImode */ |
+ {8, 8}, /* cost of storing MMX registers |
+ in SImode and DImode */ |
+ 2, /* cost of moving SSE register */ |
+ {8, 8, 8}, /* cost of loading SSE registers |
+ in SImode, DImode and TImode */ |
+ {8, 8, 8}, /* cost of storing SSE registers |
+ in SImode, DImode and TImode */ |
+ 5, /* MMX or SSE register to integer */ |
+ 32, /* size of l1 cache. */ |
+ 256, /* size of l2 cache. */ |
+ 64, /* size of prefetch block */ |
+ 6, /* number of parallel prefetches */ |
+ 3, /* Branch cost */ |
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ |
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */ |
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */ |
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */ |
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */ |
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ |
+ {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, |
+ {libcall, {{32, loop}, {64, rep_prefix_4_byte}, |
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, |
+ {{libcall, {{8, loop}, {15, unrolled_loop}, |
+ {2048, rep_prefix_4_byte}, {-1, libcall}}}, |
+ {libcall, {{24, loop}, {32, unrolled_loop}, |
+ {8192, rep_prefix_8_byte}, {-1, libcall}}}}, |
+ 1, /* scalar_stmt_cost. */ |
+ 1, /* scalar load_cost. */ |
+ 1, /* scalar_store_cost. */ |
+ 1, /* vec_stmt_cost. */ |
+ 1, /* vec_to_scalar_cost. */ |
+ 1, /* scalar_to_vec_cost. */ |
+ 1, /* vec_align_load_cost. */ |
+ 2, /* vec_unalign_load_cost. */ |
+ 1, /* vec_store_cost. */ |
+ 3, /* cond_taken_branch_cost. */ |
+ 1, /* cond_not_taken_branch_cost. */ |
+}; |
+ |
/* Generic64 should produce code tuned for Nocona and K8. */ |
static const |
struct processor_costs generic64_cost = { |
@@ -1194,6 +1267,7 @@ const struct processor_costs *ix86_cost = &pentium_cost; |
#define m_PENT4 (1<<PROCESSOR_PENTIUM4) |
#define m_NOCONA (1<<PROCESSOR_NOCONA) |
#define m_CORE2 (1<<PROCESSOR_CORE2) |
+#define m_ATOM (1<<PROCESSOR_ATOM) |
#define m_GEODE (1<<PROCESSOR_GEODE) |
#define m_K6 (1<<PROCESSOR_K6) |
@@ -1231,10 +1305,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
m_486 | m_PENT, |
/* X86_TUNE_UNROLL_STRLEN */ |
- m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC, |
+ m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 |
+ | m_CORE2 | m_GENERIC, |
/* X86_TUNE_DEEP_BRANCH_PREDICTION */ |
- m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, |
+ m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, |
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based |
on simulation result. But after P4 was made, no performance benefit |
@@ -1246,12 +1321,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
~m_386, |
/* X86_TUNE_USE_SAHF */ |
- m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 |
+ m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 |
| m_NOCONA | m_CORE2 | m_GENERIC, |
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid |
partial dependencies. */ |
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA |
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA |
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, |
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial |
@@ -1271,13 +1346,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
m_386 | m_486 | m_K6_GEODE, |
/* X86_TUNE_USE_SIMODE_FIOP */ |
- ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC), |
+ ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC), |
/* X86_TUNE_USE_MOV0 */ |
m_K6, |
/* X86_TUNE_USE_CLTD */ |
- ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC), |
+ ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC), |
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ |
m_PENT4, |
@@ -1292,8 +1367,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
~(m_PENT | m_PPRO), |
/* X86_TUNE_PROMOTE_QIMODE */ |
- m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2 |
- | m_GENERIC /* | m_PENT4 ? */, |
+ m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE |
+ | m_CORE2 | m_GENERIC /* | m_PENT4 ? */, |
/* X86_TUNE_FAST_PREFIX */ |
~(m_PENT | m_486 | m_386), |
@@ -1317,26 +1392,28 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
m_PPRO, |
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */ |
- m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
+ m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA |
+ | m_CORE2 | m_GENERIC, |
/* X86_TUNE_ADD_ESP_8 */ |
- m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386 |
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386 |
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
/* X86_TUNE_SUB_ESP_4 */ |
- m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 |
+ | m_GENERIC, |
/* X86_TUNE_SUB_ESP_8 */ |
- m_AMD_MULTIPLE | m_PPRO | m_386 | m_486 |
+ m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486 |
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred |
for DFmode copies */ |
- ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 |
+ ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 |
| m_GENERIC | m_GEODE), |
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */ |
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a |
conflict here in between PPro/Pentium4 based chips that thread 128bit |
@@ -1347,7 +1424,8 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
shows that disabling this option on P4 brings over 20% SPECfp regression, |
while enabling it on K8 brings roughly 2.4% regression that can be partly |
masked by careful scheduling of moves. */ |
- m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10, |
+ m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC |
+ | m_AMDFAM10, |
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ |
m_AMDFAM10, |
@@ -1365,13 +1443,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
m_PPRO | m_PENT4 | m_NOCONA, |
/* X86_TUNE_MEMORY_MISMATCH_STALL */ |
- m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
+ m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
/* X86_TUNE_PROLOGUE_USING_MOVE */ |
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, |
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, |
/* X86_TUNE_EPILOGUE_USING_MOVE */ |
- m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, |
+ m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC, |
/* X86_TUNE_SHIFT1 */ |
~m_486, |
@@ -1387,22 +1465,25 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more |
than 4 branch instructions in the 16 byte window. */ |
- m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, |
+ m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 |
+ | m_GENERIC, |
/* X86_TUNE_SCHEDULE */ |
- m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC, |
+ m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2 |
+ | m_GENERIC, |
/* X86_TUNE_USE_BT */ |
- m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, |
+ m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC, |
/* X86_TUNE_USE_INCDEC */ |
- ~(m_PENT4 | m_NOCONA | m_GENERIC), |
+ ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM), |
/* X86_TUNE_PAD_RETURNS */ |
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, |
/* X86_TUNE_EXT_80387_CONSTANTS */ |
- m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, |
+ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO |
+ | m_CORE2 | m_GENERIC, |
/* X86_TUNE_SHORTEN_X87_SSE */ |
~m_K8, |
@@ -1447,6 +1528,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { |
with a subsequent conditional jump instruction into a single |
compare-and-branch uop. */ |
m_CORE2, |
+ |
+ /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag |
+ will impact LEA instruction selection. */ |
+ m_ATOM, |
}; |
/* Feature tests against the various architecture variations. */ |
@@ -1472,10 +1557,11 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { |
}; |
static const unsigned int x86_accumulate_outgoing_args |
- = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; |
+ = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 |
+ | m_GENERIC; |
static const unsigned int x86_arch_always_fancy_math_387 |
- = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4 |
+ = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 |
| m_NOCONA | m_CORE2 | m_GENERIC; |
static enum stringop_alg stringop_alg = no_stringop; |
@@ -1616,7 +1702,6 @@ int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = |
rtx ix86_compare_op0 = NULL_RTX; |
rtx ix86_compare_op1 = NULL_RTX; |
-rtx ix86_compare_emitted = NULL_RTX; |
/* Define parameter passing and return registers. */ |
@@ -1637,8 +1722,7 @@ static int const x86_64_int_return_registers[4] = |
/* Define the structure for the machine field in struct function. */ |
-struct stack_local_entry GTY(()) |
-{ |
+struct GTY(()) stack_local_entry { |
unsigned short mode; |
unsigned short n; |
rtx rtl; |
@@ -1725,7 +1809,7 @@ static rtx (*ix86_gen_leave) (void); |
static rtx (*ix86_gen_pop1) (rtx); |
static rtx (*ix86_gen_add3) (rtx, rtx, rtx); |
static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); |
-static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx); |
+static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); |
static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); |
static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); |
static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); |
@@ -1743,6 +1827,9 @@ static unsigned int ix86_default_incoming_stack_boundary; |
/* Alignment for incoming stack boundary in bits. */ |
unsigned int ix86_incoming_stack_boundary; |
+/* The abi used by target. */ |
+enum calling_abi ix86_abi; |
+ |
/* Values 1-5: see jump.c */ |
int ix86_branch_cost; |
@@ -1793,11 +1880,13 @@ static bool ext_80387_constants_init = 0; |
static struct machine_function * ix86_init_machine_status (void); |
static rtx ix86_function_value (const_tree, const_tree, bool); |
+static rtx ix86_static_chain (const_tree, bool); |
static int ix86_function_regparm (const_tree, const_tree); |
static void ix86_compute_frame_layout (struct ix86_frame *); |
static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, |
rtx, rtx, int); |
static void ix86_add_new_builtins (int); |
+static rtx ix86_expand_vec_perm_builtin (tree); |
enum ix86_function_specific_strings |
{ |
@@ -1818,8 +1907,15 @@ static bool ix86_valid_target_attribute_p (tree, tree, tree, int); |
static bool ix86_valid_target_attribute_inner_p (tree, char *[]); |
static bool ix86_can_inline_p (tree, tree); |
static void ix86_set_current_function (tree); |
+static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
+ |
+static enum calling_abi ix86_function_abi (const_tree); |
+#ifndef SUBTARGET32_DEFAULT_CPU |
+#define SUBTARGET32_DEFAULT_CPU "i386" |
+#endif |
+ |
/* The svr4 ABI for the i386 says that records and unions are returned |
in memory. */ |
#ifndef DEFAULT_PCC_STRUCT_RETURN |
@@ -1866,8 +1962,13 @@ static int ix86_isa_flags_explicit; |
#define OPTION_MASK_ISA_SSE4A_SET \ |
(OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET) |
-#define OPTION_MASK_ISA_SSE5_SET \ |
- (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) |
+#define OPTION_MASK_ISA_FMA4_SET \ |
+ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \ |
+ | OPTION_MASK_ISA_AVX_SET) |
+#define OPTION_MASK_ISA_XOP_SET \ |
+ (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET) |
+#define OPTION_MASK_ISA_LWP_SET \ |
+ OPTION_MASK_ISA_LWP |
/* AES and PCLMUL need SSE2 because they use xmm registers */ |
#define OPTION_MASK_ISA_AES_SET \ |
@@ -1877,9 +1978,12 @@ static int ix86_isa_flags_explicit; |
#define OPTION_MASK_ISA_ABM_SET \ |
(OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) |
+ |
#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT |
#define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 |
#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF |
+#define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE |
+#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32 |
/* Define a set of ISAs which aren't available when a given ISA is |
disabled. MMX and SSE ISAs are handled separately. */ |
@@ -1905,7 +2009,8 @@ static int ix86_isa_flags_explicit; |
#define OPTION_MASK_ISA_SSE4_2_UNSET \ |
(OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET ) |
#define OPTION_MASK_ISA_AVX_UNSET \ |
- (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET) |
+ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \ |
+ | OPTION_MASK_ISA_FMA4_UNSET) |
#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA |
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same |
@@ -1913,14 +2018,21 @@ static int ix86_isa_flags_explicit; |
#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET |
#define OPTION_MASK_ISA_SSE4A_UNSET \ |
- (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) |
-#define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 |
+ (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET) |
+ |
+#define OPTION_MASK_ISA_FMA4_UNSET \ |
+ (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET) |
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP |
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP |
+ |
#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES |
#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL |
#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM |
#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT |
#define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 |
#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF |
+#define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE |
+#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32 |
/* Vectorization library interface and handlers. */ |
tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; |
@@ -1953,7 +2065,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] = |
{&core2_cost, 16, 10, 16, 10, 16}, |
{&generic32_cost, 16, 7, 16, 7, 16}, |
{&generic64_cost, 16, 10, 16, 10, 16}, |
- {&amdfam10_cost, 32, 24, 32, 7, 32} |
+ {&amdfam10_cost, 32, 24, 32, 7, 32}, |
+ {&atom_cost, 16, 7, 16, 7, 16} |
}; |
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = |
@@ -1971,6 +2084,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = |
"prescott", |
"nocona", |
"core2", |
+ "atom", |
"geode", |
"k6", |
"k6-2", |
@@ -2144,16 +2258,42 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) |
} |
return true; |
- case OPT_msse5: |
+ case OPT_mfma4: |
+ if (value) |
+ { |
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET; |
+ } |
+ else |
+ { |
+ ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET; |
+ } |
+ return true; |
+ |
+ case OPT_mxop: |
if (value) |
{ |
- ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET; |
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET; |
+ ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET; |
} |
else |
{ |
- ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET; |
- ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET; |
+ ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET; |
+ } |
+ return true; |
+ |
+ case OPT_mlwp: |
+ if (value) |
+ { |
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET; |
+ } |
+ else |
+ { |
+ ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET; |
} |
return true; |
@@ -2209,6 +2349,32 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) |
} |
return true; |
+ case OPT_mmovbe: |
+ if (value) |
+ { |
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET; |
+ } |
+ else |
+ { |
+ ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET; |
+ } |
+ return true; |
+ |
+ case OPT_mcrc32: |
+ if (value) |
+ { |
+ ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET; |
+ } |
+ else |
+ { |
+ ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET; |
+ ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET; |
+ } |
+ return true; |
+ |
case OPT_maes: |
if (value) |
{ |
@@ -2240,7 +2406,7 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) |
} |
} |
-/* Return a string the documents the current -m options. The caller is |
+/* Return a string that documents the current -m options. The caller is |
responsible for freeing the string. */ |
static char * |
@@ -2253,12 +2419,15 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, |
int mask; /* isa mask options */ |
}; |
- /* This table is ordered so that options like -msse5 or -msse4.2 that imply |
+ /* This table is ordered so that options like -msse4.2 that imply |
preceding options while match those first. */ |
static struct ix86_target_opts isa_opts[] = |
{ |
{ "-m64", OPTION_MASK_ISA_64BIT }, |
- { "-msse5", OPTION_MASK_ISA_SSE5 }, |
+ { "-mfma4", OPTION_MASK_ISA_FMA4 }, |
+ { "-mfma", OPTION_MASK_ISA_FMA }, |
+ { "-mxop", OPTION_MASK_ISA_XOP }, |
+ { "-mlwp", OPTION_MASK_ISA_LWP }, |
{ "-msse4a", OPTION_MASK_ISA_SSE4A }, |
{ "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, |
{ "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, |
@@ -2271,6 +2440,8 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, |
{ "-mmmx", OPTION_MASK_ISA_MMX }, |
{ "-mabm", OPTION_MASK_ISA_ABM }, |
{ "-mpopcnt", OPTION_MASK_ISA_POPCNT }, |
+ { "-mmovbe", OPTION_MASK_ISA_MOVBE }, |
+ { "-mcrc32", OPTION_MASK_ISA_CRC32 }, |
{ "-maes", OPTION_MASK_ISA_AES }, |
{ "-mpclmul", OPTION_MASK_ISA_PCLMUL }, |
}; |
@@ -2290,7 +2461,6 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, |
{ "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, |
{ "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, |
{ "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, |
- { "-mno-fused-madd", MASK_NO_FUSED_MADD }, |
{ "-mno-push-args", MASK_NO_PUSH_ARGS }, |
{ "-mno-red-zone", MASK_NO_RED_ZONE }, |
{ "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, |
@@ -2358,7 +2528,7 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune, |
if (flags && add_nl_p) |
{ |
opts[num++][0] = target_other; |
- sprintf (target_other, "(other flags: 0x%x)", isa); |
+ sprintf (target_other, "(other flags: 0x%x)", flags); |
} |
/* Add -fpmath= option. */ |
@@ -2439,7 +2609,7 @@ ix86_debug_options (void) |
free (opts); |
} |
else |
- fprintf (stderr, "<no options>\n\n"); |
+ fputs ("<no options>\n\n", stderr); |
return; |
} |
@@ -2458,6 +2628,7 @@ override_options (bool main_args_p) |
{ |
int i; |
unsigned int ix86_arch_mask, ix86_tune_mask; |
+ const bool ix86_tune_specified = (ix86_tune_string != NULL); |
const char *prefix; |
const char *suffix; |
const char *sw; |
@@ -2483,11 +2654,14 @@ override_options (bool main_args_p) |
PTA_NO_SAHF = 1 << 13, |
PTA_SSE4_1 = 1 << 14, |
PTA_SSE4_2 = 1 << 15, |
- PTA_SSE5 = 1 << 16, |
- PTA_AES = 1 << 17, |
- PTA_PCLMUL = 1 << 18, |
- PTA_AVX = 1 << 19, |
- PTA_FMA = 1 << 20 |
+ PTA_AES = 1 << 16, |
+ PTA_PCLMUL = 1 << 17, |
+ PTA_AVX = 1 << 18, |
+ PTA_FMA = 1 << 19, |
+ PTA_MOVBE = 1 << 20, |
+ PTA_FMA4 = 1 << 21, |
+ PTA_XOP = 1 << 22, |
+ PTA_LWP = 1 << 23 |
}; |
static struct pta |
@@ -2529,6 +2703,9 @@ override_options (bool main_args_p) |
{"core2", PROCESSOR_CORE2, CPU_CORE2, |
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
| PTA_SSSE3 | PTA_CX16}, |
+ {"atom", PROCESSOR_ATOM, CPU_ATOM, |
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 |
+ | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE}, |
{"geode", PROCESSOR_GEODE, CPU_GEODE, |
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE}, |
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, |
@@ -2652,8 +2829,12 @@ override_options (bool main_args_p) |
|| !strcmp (ix86_tune_string, "generic64"))) |
; |
else if (!strncmp (ix86_tune_string, "generic", 7)) |
- error ("bad value (%s) for %stune=%s %s", |
+ error ("bad value (%s) for %stune=%s %s", |
ix86_tune_string, prefix, suffix, sw); |
+ else if (!strcmp (ix86_tune_string, "x86-64")) |
+ warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " |
+ "%stune=k8%s or %stune=generic%s instead as appropriate.", |
+ prefix, suffix, prefix, suffix, prefix, suffix); |
} |
else |
{ |
@@ -2677,6 +2858,7 @@ override_options (bool main_args_p) |
ix86_tune_string = "generic32"; |
} |
} |
+ |
if (ix86_stringop_string) |
{ |
if (!strcmp (ix86_stringop_string, "rep_byte")) |
@@ -2699,22 +2881,25 @@ override_options (bool main_args_p) |
error ("bad value (%s) for %sstringop-strategy=%s %s", |
ix86_stringop_string, prefix, suffix, sw); |
} |
- if (!strcmp (ix86_tune_string, "x86-64")) |
- warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " |
- "%stune=k8%s or %stune=generic%s instead as appropriate.", |
- prefix, suffix, prefix, suffix, prefix, suffix); |
if (!ix86_arch_string) |
- ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; |
+ ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU; |
else |
ix86_arch_specified = 1; |
- if (!strcmp (ix86_arch_string, "generic")) |
- error ("generic CPU can be used only for %stune=%s %s", |
- prefix, suffix, sw); |
- if (!strncmp (ix86_arch_string, "generic", 7)) |
- error ("bad value (%s) for %sarch=%s %s", |
- ix86_arch_string, prefix, suffix, sw); |
+ /* Validate -mabi= value. */ |
+ if (ix86_abi_string) |
+ { |
+ if (strcmp (ix86_abi_string, "sysv") == 0) |
+ ix86_abi = SYSV_ABI; |
+ else if (strcmp (ix86_abi_string, "ms") == 0) |
+ ix86_abi = MS_ABI; |
+ else |
+ error ("unknown ABI (%s) for %sabi=%s %s", |
+ ix86_abi_string, prefix, suffix, sw); |
+ } |
+ else |
+ ix86_abi = DEFAULT_ABI; |
if (ix86_cmodel_string != 0) |
{ |
@@ -2813,9 +2998,15 @@ override_options (bool main_args_p) |
if (processor_alias_table[i].flags & PTA_SSE4A |
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) |
ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; |
- if (processor_alias_table[i].flags & PTA_SSE5 |
- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) |
- ix86_isa_flags |= OPTION_MASK_ISA_SSE5; |
+ if (processor_alias_table[i].flags & PTA_FMA4 |
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) |
+ ix86_isa_flags |= OPTION_MASK_ISA_FMA4; |
+ if (processor_alias_table[i].flags & PTA_XOP |
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) |
+ ix86_isa_flags |= OPTION_MASK_ISA_XOP; |
+ if (processor_alias_table[i].flags & PTA_LWP |
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) |
+ ix86_isa_flags |= OPTION_MASK_ISA_LWP; |
if (processor_alias_table[i].flags & PTA_ABM |
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) |
ix86_isa_flags |= OPTION_MASK_ISA_ABM; |
@@ -2828,6 +3019,9 @@ override_options (bool main_args_p) |
if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) |
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) |
ix86_isa_flags |= OPTION_MASK_ISA_SAHF; |
+ if (processor_alias_table[i].flags & PTA_MOVBE |
+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE)) |
+ ix86_isa_flags |= OPTION_MASK_ISA_MOVBE; |
if (processor_alias_table[i].flags & PTA_AES |
&& !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) |
ix86_isa_flags |= OPTION_MASK_ISA_AES; |
@@ -2840,7 +3034,10 @@ override_options (bool main_args_p) |
break; |
} |
- if (i == pta_size) |
+ if (!strcmp (ix86_arch_string, "generic")) |
+ error ("generic CPU can be used only for %stune=%s %s", |
+ prefix, suffix, sw); |
+ else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size) |
error ("bad value (%s) for %sarch=%s %s", |
ix86_arch_string, prefix, suffix, sw); |
@@ -2879,7 +3076,8 @@ override_options (bool main_args_p) |
x86_prefetch_sse = true; |
break; |
} |
- if (i == pta_size) |
+ |
+ if (ix86_tune_specified && i == pta_size) |
error ("bad value (%s) for %stune=%s %s", |
ix86_tune_string, prefix, suffix, sw); |
@@ -2999,8 +3197,6 @@ override_options (bool main_args_p) |
ix86_tls_dialect = TLS_DIALECT_GNU; |
else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) |
ix86_tls_dialect = TLS_DIALECT_GNU2; |
- else if (strcmp (ix86_tls_dialect_string, "sun") == 0) |
- ix86_tls_dialect = TLS_DIALECT_SUN; |
else |
error ("bad value (%s) for %stls-dialect=%s %s", |
ix86_tls_dialect_string, prefix, suffix, sw); |
@@ -3091,12 +3287,10 @@ override_options (bool main_args_p) |
if (ix86_force_align_arg_pointer == -1) |
ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; |
+ ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; |
+ |
/* Validate -mincoming-stack-boundary= value or default it to |
MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ |
- if (ix86_force_align_arg_pointer) |
- ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY; |
- else |
- ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; |
ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
if (ix86_incoming_stack_boundary_string) |
{ |
@@ -3246,7 +3440,7 @@ override_options (bool main_args_p) |
ix86_gen_pop1 = gen_popdi1; |
ix86_gen_add3 = gen_adddi3; |
ix86_gen_sub3 = gen_subdi3; |
- ix86_gen_sub3_carry = gen_subdi3_carry_rex64; |
+ ix86_gen_sub3_carry = gen_subdi3_carry; |
ix86_gen_one_cmpl2 = gen_one_cmpldi2; |
ix86_gen_monitor = gen_sse3_monitor64; |
ix86_gen_andsp = gen_anddi3; |
@@ -3353,12 +3547,6 @@ ix86_conditional_register_usage (void) |
static void |
ix86_function_specific_save (struct cl_target_option *ptr) |
{ |
- gcc_assert (IN_RANGE (ix86_arch, 0, 255)); |
- gcc_assert (IN_RANGE (ix86_schedule, 0, 255)); |
- gcc_assert (IN_RANGE (ix86_tune, 0, 255)); |
- gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); |
- gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); |
- |
ptr->arch = ix86_arch; |
ptr->schedule = ix86_schedule; |
ptr->tune = ix86_tune; |
@@ -3368,6 +3556,14 @@ ix86_function_specific_save (struct cl_target_option *ptr) |
ptr->arch_specified = ix86_arch_specified; |
ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; |
ptr->target_flags_explicit = target_flags_explicit; |
+ |
+ /* The fields are char but the variables are not; make sure the |
+ values fit in the fields. */ |
+ gcc_assert (ptr->arch == ix86_arch); |
+ gcc_assert (ptr->schedule == ix86_schedule); |
+ gcc_assert (ptr->tune == ix86_tune); |
+ gcc_assert (ptr->fpmath == ix86_fpmath); |
+ gcc_assert (ptr->branch_cost == ix86_branch_cost); |
} |
/* Restore the current options */ |
@@ -3380,10 +3576,10 @@ ix86_function_specific_restore (struct cl_target_option *ptr) |
unsigned int ix86_arch_mask, ix86_tune_mask; |
int i; |
- ix86_arch = ptr->arch; |
- ix86_schedule = ptr->schedule; |
- ix86_tune = ptr->tune; |
- ix86_fpmath = ptr->fpmath; |
+ ix86_arch = (enum processor_type) ptr->arch; |
+ ix86_schedule = (enum attr_cpu) ptr->schedule; |
+ ix86_tune = (enum processor_type) ptr->tune; |
+ ix86_fpmath = (enum fpmath_unit) ptr->fpmath; |
ix86_branch_cost = ptr->branch_cost; |
ix86_tune_defaulted = ptr->tune_defaulted; |
ix86_arch_specified = ptr->arch_specified; |
@@ -3493,8 +3689,10 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) |
IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), |
IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), |
IX86_ATTR_ISA ("sse4a", OPT_msse4a), |
- IX86_ATTR_ISA ("sse5", OPT_msse5), |
IX86_ATTR_ISA ("ssse3", OPT_mssse3), |
+ IX86_ATTR_ISA ("fma4", OPT_mfma4), |
+ IX86_ATTR_ISA ("xop", OPT_mxop), |
+ IX86_ATTR_ISA ("lwp", OPT_mlwp), |
/* string options */ |
IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), |
@@ -3510,10 +3708,6 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) |
OPT_mfancy_math_387, |
MASK_NO_FANCY_MATH_387), |
- IX86_ATTR_NO ("fused-madd", |
- OPT_mfused_madd, |
- MASK_NO_FUSED_MADD), |
- |
IX86_ATTR_YES ("ieee-fp", |
OPT_mieee_fp, |
MASK_IEEE_FP), |
@@ -3786,8 +3980,8 @@ ix86_can_inline_p (tree caller, tree callee) |
struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
- /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function |
- can inline a SSE2 function but a SSE2 function can't inline a SSE5 |
+ /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function |
+ can inline a SSE2 function but a SSE2 function can't inline a SSE4 |
function. */ |
if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) |
!= callee_opts->ix86_isa_flags) |
@@ -4051,11 +4245,11 @@ x86_elf_aligned_common (FILE *file, |
{ |
if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
&& size > (unsigned int)ix86_section_threshold) |
- fprintf (file, ".largecomm\t"); |
+ fputs (".largecomm\t", file); |
else |
- fprintf (file, "%s", COMMON_ASM_OP); |
+ fputs (COMMON_ASM_OP, file); |
assemble_name (file, name); |
- fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", |
+ fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", |
size, align / BITS_PER_UNIT); |
} |
#endif |
@@ -4131,7 +4325,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp) |
/* If we need to align the outgoing stack, then sibcalling would |
unalign the stack, which may break the called function. */ |
- if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY) |
+ if (ix86_minimum_incoming_stack_boundary (true) |
+ < PREFERRED_STACK_BOUNDARY) |
return false; |
if (decl) |
@@ -4215,8 +4410,8 @@ ix86_handle_cconv_attribute (tree *node, tree name, |
&& TREE_CODE (*node) != FIELD_DECL |
&& TREE_CODE (*node) != TYPE_DECL) |
{ |
- warning (OPT_Wattributes, "%qs attribute only applies to functions", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute only applies to functions", |
+ name); |
*no_add_attrs = true; |
return NULL_TREE; |
} |
@@ -4235,14 +4430,14 @@ ix86_handle_cconv_attribute (tree *node, tree name, |
if (TREE_CODE (cst) != INTEGER_CST) |
{ |
warning (OPT_Wattributes, |
- "%qs attribute requires an integer constant argument", |
- IDENTIFIER_POINTER (name)); |
+ "%qE attribute requires an integer constant argument", |
+ name); |
*no_add_attrs = true; |
} |
else if (compare_tree_int (cst, REGPARM_MAX) > 0) |
{ |
- warning (OPT_Wattributes, "argument to %qs attribute larger than %d", |
- IDENTIFIER_POINTER (name), REGPARM_MAX); |
+ warning (OPT_Wattributes, "argument to %qE attribute larger than %d", |
+ name, REGPARM_MAX); |
*no_add_attrs = true; |
} |
@@ -4252,9 +4447,10 @@ ix86_handle_cconv_attribute (tree *node, tree name, |
if (TARGET_64BIT) |
{ |
/* Do not warn when emulating the MS ABI. */ |
- if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI) |
- warning (OPT_Wattributes, "%qs attribute ignored", |
- IDENTIFIER_POINTER (name)); |
+ if (TREE_CODE (*node) != FUNCTION_TYPE |
+ || ix86_function_type_abi (*node) != MS_ABI) |
+ warning (OPT_Wattributes, "%qE attribute ignored", |
+ name); |
*no_add_attrs = true; |
return NULL_TREE; |
} |
@@ -4352,33 +4548,15 @@ ix86_function_regparm (const_tree type, const_tree decl) |
tree attr; |
int regparm; |
- static bool error_issued; |
- |
if (TARGET_64BIT) |
return (ix86_function_type_abi (type) == SYSV_ABI |
- ? X86_64_REGPARM_MAX : X64_REGPARM_MAX); |
+ ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
regparm = ix86_regparm; |
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); |
if (attr) |
{ |
- regparm |
- = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
- |
- if (decl && TREE_CODE (decl) == FUNCTION_DECL) |
- { |
- /* We can't use regparm(3) for nested functions because |
- these pass static chain pointer in %ecx register. */ |
- if (!error_issued && regparm == 3 |
- && decl_function_context (decl) |
- && !DECL_NO_STATIC_CHAIN (decl)) |
- { |
- error ("nested functions are limited to 2 register parameters"); |
- error_issued = true; |
- return 0; |
- } |
- } |
- |
+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
return regparm; |
} |
@@ -4392,11 +4570,10 @@ ix86_function_regparm (const_tree type, const_tree decl) |
&& !profile_flag) |
{ |
/* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ |
- struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); |
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl)); |
if (i && i->local) |
{ |
int local_regparm, globals = 0, regno; |
- struct function *f; |
/* Make sure no regparm register is taken by a |
fixed register variable. */ |
@@ -4404,23 +4581,12 @@ ix86_function_regparm (const_tree type, const_tree decl) |
if (fixed_regs[local_regparm]) |
break; |
- /* We can't use regparm(3) for nested functions as these use |
- static chain pointer in third argument. */ |
- if (local_regparm == 3 |
- && decl_function_context (decl) |
- && !DECL_NO_STATIC_CHAIN (decl)) |
+ /* We don't want to use regparm(3) for nested functions as |
+ these use a static chain pointer in the third argument. */ |
+ if (local_regparm == 3 && DECL_STATIC_CHAIN (decl)) |
local_regparm = 2; |
- /* If the function realigns its stackpointer, the prologue will |
- clobber %ecx. If we've already generated code for the callee, |
- the callee DECL_STRUCT_FUNCTION is gone, so we fall back to |
- scanning the attributes for the self-realigning property. */ |
- f = DECL_STRUCT_FUNCTION (decl); |
- /* Since current internal arg pointer won't conflict with |
- parameter passing regs, so no need to change stack |
- realignment and adjust regparm number. |
- |
- Each fixed register usage increases register pressure, |
+ /* Each fixed register usage increases register pressure, |
so less registers should be used for argument passing. |
This functionality can be overriden by an explicit |
regparm value. */ |
@@ -4592,15 +4758,15 @@ ix86_function_arg_regno_p (int regno) |
default ABI. */ |
/* RAX is used as hidden argument to va_arg functions. */ |
- if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG) |
+ if (ix86_abi == SYSV_ABI && regno == AX_REG) |
return true; |
- if (DEFAULT_ABI == MS_ABI) |
+ if (ix86_abi == MS_ABI) |
parm_regs = x86_64_ms_abi_int_parameter_registers; |
else |
parm_regs = x86_64_int_parameter_registers; |
- for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX |
- : X86_64_REGPARM_MAX); i++) |
+ for (i = 0; i < (ix86_abi == MS_ABI |
+ ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
if (regno == parm_regs[i]) |
return true; |
return false; |
@@ -4627,7 +4793,7 @@ ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) |
int |
ix86_reg_parm_stack_space (const_tree fndecl) |
{ |
- int call_abi = SYSV_ABI; |
+ enum calling_abi call_abi = SYSV_ABI; |
if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
call_abi = ix86_function_abi (fndecl); |
else |
@@ -4639,37 +4805,58 @@ ix86_reg_parm_stack_space (const_tree fndecl) |
/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the |
call abi used. */ |
-int |
+enum calling_abi |
ix86_function_type_abi (const_tree fntype) |
{ |
if (TARGET_64BIT && fntype != NULL) |
{ |
- int abi; |
- if (DEFAULT_ABI == SYSV_ABI) |
- abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI; |
- else |
- abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI; |
- |
+ enum calling_abi abi = ix86_abi; |
+ if (abi == SYSV_ABI) |
+ { |
+ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) |
+ abi = MS_ABI; |
+ } |
+ else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) |
+ abi = SYSV_ABI; |
return abi; |
} |
- return DEFAULT_ABI; |
+ return ix86_abi; |
} |
-int |
+static bool |
+ix86_function_ms_hook_prologue (const_tree fntype) |
+{ |
+ if (!TARGET_64BIT) |
+ { |
+ if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype))) |
+ { |
+ if (decl_function_context (fntype) != NULL_TREE) |
+ { |
+ error_at (DECL_SOURCE_LOCATION (fntype), |
+ "ms_hook_prologue is not compatible with nested function"); |
+ } |
+ |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
+ |
+static enum calling_abi |
ix86_function_abi (const_tree fndecl) |
{ |
if (! fndecl) |
- return DEFAULT_ABI; |
+ return ix86_abi; |
return ix86_function_type_abi (TREE_TYPE (fndecl)); |
} |
/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the |
call abi used. */ |
-int |
+enum calling_abi |
ix86_cfun_abi (void) |
{ |
if (! cfun || ! TARGET_64BIT) |
- return DEFAULT_ABI; |
+ return ix86_abi; |
return cfun->machine->call_abi; |
} |
@@ -4683,7 +4870,7 @@ void |
ix86_call_abi_override (const_tree fndecl) |
{ |
if (fndecl == NULL_TREE) |
- cfun->machine->call_abi = DEFAULT_ABI; |
+ cfun->machine->call_abi = ix86_abi; |
else |
cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); |
} |
@@ -4724,18 +4911,18 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
cum->nregs = ix86_regparm; |
if (TARGET_64BIT) |
{ |
- if (cum->call_abi != DEFAULT_ABI) |
- cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX |
- : X64_REGPARM_MAX; |
+ cum->nregs = (cum->call_abi == SYSV_ABI |
+ ? X86_64_REGPARM_MAX |
+ : X86_64_MS_REGPARM_MAX); |
} |
if (TARGET_SSE) |
{ |
cum->sse_nregs = SSE_REGPARM_MAX; |
if (TARGET_64BIT) |
{ |
- if (cum->call_abi != DEFAULT_ABI) |
- cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX |
- : X64_SSE_REGPARM_MAX; |
+ cum->sse_nregs = (cum->call_abi == SYSV_ABI |
+ ? X86_64_SSE_REGPARM_MAX |
+ : X86_64_MS_SSE_REGPARM_MAX); |
} |
} |
if (TARGET_MMX) |
@@ -5001,6 +5188,8 @@ classify_argument (enum machine_mode mode, const_tree type, |
} |
else |
{ |
+ int pos; |
+ |
type = TREE_TYPE (field); |
/* Flexible array member is ignored. */ |
@@ -5029,13 +5218,10 @@ classify_argument (enum machine_mode mode, const_tree type, |
+ bit_offset) % 256); |
if (!num) |
return 0; |
- for (i = 0; i < num; i++) |
- { |
- int pos = |
- (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; |
- classes[i + pos] = |
- merge_classes (subclasses[i], classes[i + pos]); |
- } |
+ pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; |
+ for (i = 0; i < num && (i + pos) < words; i++) |
+ classes[i + pos] = |
+ merge_classes (subclasses[i], classes[i + pos]); |
} |
} |
} |
@@ -5163,7 +5349,7 @@ classify_argument (enum machine_mode mode, const_tree type, |
} |
/* for V1xx modes, just use the base mode */ |
- if (VECTOR_MODE_P (mode) && mode != V1DImode |
+ if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
&& GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) |
mode = GET_MODE_INNER (mode); |
@@ -5287,6 +5473,7 @@ classify_argument (enum machine_mode mode, const_tree type, |
classes[0] = X86_64_SSE_CLASS; |
classes[1] = X86_64_SSEUP_CLASS; |
return 2; |
+ case V1TImode: |
case V1DImode: |
case V2SFmode: |
case V2SImode: |
@@ -5631,6 +5818,7 @@ function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
case V4HImode: |
case V2SImode: |
case V2SFmode: |
+ case V1TImode: |
case V1DImode: |
if (!type || !AGGREGATE_TYPE_P (type)) |
{ |
@@ -5657,9 +5845,8 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
if (!named && VALID_AVX256_REG_MODE (mode)) |
return; |
- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) |
- cum->words += words; |
- else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
+ if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs) |
+ && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
{ |
cum->nregs -= int_nregs; |
cum->sse_nregs -= sse_nregs; |
@@ -5667,7 +5854,11 @@ function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
cum->sse_regno += sse_nregs; |
} |
else |
- cum->words += words; |
+ { |
+ int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
+ cum->words = (cum->words + align - 1) & ~(align - 1); |
+ cum->words += words; |
+ } |
} |
static void |
@@ -5700,7 +5891,7 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
if (type) |
mode = type_natural_mode (type, NULL); |
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) |
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
function_arg_advance_ms_64 (cum, bytes, words); |
else if (TARGET_64BIT) |
function_arg_advance_64 (cum, mode, type, words, named); |
@@ -5818,6 +6009,7 @@ function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
case V4HImode: |
case V2SImode: |
case V2SFmode: |
+ case V1TImode: |
case V1DImode: |
if (!type || !AGGREGATE_TYPE_P (type)) |
{ |
@@ -5846,11 +6038,8 @@ function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, |
if (mode == VOIDmode) |
return GEN_INT (cum->maybe_vaarg |
? (cum->sse_nregs < 0 |
- ? (cum->call_abi == DEFAULT_ABI |
- ? SSE_REGPARM_MAX |
- : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX |
- : X64_SSE_REGPARM_MAX)) |
- : cum->sse_regno) |
+ ? X86_64_SSE_REGPARM_MAX |
+ : cum->sse_regno) |
: -1); |
switch (mode) |
@@ -5942,7 +6131,7 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode, |
if (type && TREE_CODE (type) == VECTOR_TYPE) |
mode = type_natural_mode (type, cum); |
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) |
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
return function_arg_ms_64 (cum, mode, omode, named, bytes); |
else if (TARGET_64BIT) |
return function_arg_64 (cum, mode, omode, type, named); |
@@ -5962,7 +6151,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, |
const_tree type, bool named ATTRIBUTE_UNUSED) |
{ |
/* See Windows x64 Software Convention. */ |
- if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) |
+ if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI) |
{ |
int msize = (int) GET_MODE_SIZE (mode); |
if (type) |
@@ -5996,7 +6185,7 @@ ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, |
/* Return true when TYPE should be 128bit aligned for 32bit argument passing |
ABI. */ |
static bool |
-contains_aligned_value_p (tree type) |
+contains_aligned_value_p (const_tree type) |
{ |
enum machine_mode mode = TYPE_MODE (type); |
if (((TARGET_SSE && SSE_REG_MODE_P (mode)) |
@@ -6046,7 +6235,7 @@ contains_aligned_value_p (tree type) |
specified mode and type. */ |
int |
-ix86_function_arg_boundary (enum machine_mode mode, tree type) |
+ix86_function_arg_boundary (enum machine_mode mode, const_tree type) |
{ |
int align; |
if (type) |
@@ -6102,7 +6291,7 @@ ix86_function_value_regno_p (int regno) |
/* TODO: The function should depend on current function ABI but |
builtins.c would need updating then. Therefore we use the |
default ABI. */ |
- if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) |
+ if (TARGET_64BIT && ix86_abi == MS_ABI) |
return false; |
return TARGET_FLOAT_RETURNS_IN_80387; |
@@ -6362,12 +6551,12 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
} |
/* Return false iff TYPE is returned in memory. This version is used |
- on Solaris 10. It is similar to the generic ix86_return_in_memory, |
+ on Solaris 2. It is similar to the generic ix86_return_in_memory, |
but differs notably in that when MMX is available, 8-byte vectors |
are returned in memory, rather than in MMX registers. */ |
bool |
-ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
+ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
{ |
int size; |
enum machine_mode mode = type_natural_mode (type, NULL); |
@@ -6460,15 +6649,20 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi) |
return build_pointer_type (char_type_node); |
record = (*lang_hooks.types.make_type) (RECORD_TYPE); |
- type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); |
+ type_decl = build_decl (BUILTINS_LOCATION, |
+ TYPE_DECL, get_identifier ("__va_list_tag"), record); |
- f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), |
+ f_gpr = build_decl (BUILTINS_LOCATION, |
+ FIELD_DECL, get_identifier ("gp_offset"), |
unsigned_type_node); |
- f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), |
+ f_fpr = build_decl (BUILTINS_LOCATION, |
+ FIELD_DECL, get_identifier ("fp_offset"), |
unsigned_type_node); |
- f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), |
+ f_ovf = build_decl (BUILTINS_LOCATION, |
+ FIELD_DECL, get_identifier ("overflow_arg_area"), |
ptr_type_node); |
- f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), |
+ f_sav = build_decl (BUILTINS_LOCATION, |
+ FIELD_DECL, get_identifier ("reg_save_area"), |
ptr_type_node); |
va_list_gpr_counter_field = f_gpr; |
@@ -6498,13 +6692,13 @@ ix86_build_builtin_va_list_abi (enum calling_abi abi) |
static tree |
ix86_build_builtin_va_list (void) |
{ |
- tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI); |
+ tree ret = ix86_build_builtin_va_list_abi (ix86_abi); |
/* Initialize abi specific va_list builtin types. */ |
if (TARGET_64BIT) |
{ |
tree t; |
- if (DEFAULT_ABI == MS_ABI) |
+ if (ix86_abi == MS_ABI) |
{ |
t = ix86_build_builtin_va_list_abi (SYSV_ABI); |
if (TREE_CODE (t) != RECORD_TYPE) |
@@ -6518,7 +6712,7 @@ ix86_build_builtin_va_list (void) |
t = build_variant_type_copy (t); |
sysv_va_list_type_node = t; |
} |
- if (DEFAULT_ABI != MS_ABI) |
+ if (ix86_abi != MS_ABI) |
{ |
t = ix86_build_builtin_va_list_abi (MS_ABI); |
if (TREE_CODE (t) != RECORD_TYPE) |
@@ -6549,10 +6743,6 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
rtx nsse_reg; |
alias_set_type set; |
int i; |
- int regparm = ix86_regparm; |
- |
- if (cum->call_abi != DEFAULT_ABI) |
- regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX; |
/* GPR size of varargs save area. */ |
if (cfun->va_list_gpr_size) |
@@ -6574,7 +6764,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
set = get_varargs_alias_set (); |
for (i = cum->regno; |
- i < regparm |
+ i < X86_64_REGPARM_MAX |
&& i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; |
i++) |
{ |
@@ -6588,6 +6778,10 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
if (ix86_varargs_fpr_size) |
{ |
+ /* Stack must be aligned to 16byte for FP register save area. */ |
+ if (crtl->stack_alignment_needed < 128) |
+ crtl->stack_alignment_needed = 128; |
+ |
/* Now emit code to save SSE registers. The AX parameter contains number |
of SSE parameter registers used to call this function. We use |
sse_prologue_save insn template that produces computed jump across |
@@ -6649,7 +6843,7 @@ setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
alias_set_type set = get_varargs_alias_set (); |
int i; |
- for (i = cum->regno; i < X64_REGPARM_MAX; i++) |
+ for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) |
{ |
rtx reg, mem; |
@@ -6705,7 +6899,7 @@ is_va_list_char_pointer (tree type) |
return true; |
canonic = ix86_canonical_va_list_type (type); |
return (canonic == ms_va_list_type_node |
- || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node)); |
+ || (ix86_abi == MS_ABI && canonic == va_list_type_node)); |
} |
/* Implement va_start. */ |
@@ -6851,7 +7045,6 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
/* Pull the value out of the saved registers. */ |
addr = create_tmp_var (ptr_type_node, "addr"); |
- DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); |
if (container) |
{ |
@@ -6859,8 +7052,8 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
bool need_temp; |
tree int_addr, sse_addr; |
- lab_false = create_artificial_label (); |
- lab_over = create_artificial_label (); |
+ lab_false = create_artificial_label (UNKNOWN_LOCATION); |
+ lab_over = create_artificial_label (UNKNOWN_LOCATION); |
examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); |
@@ -6906,9 +7099,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
else |
{ |
int_addr = create_tmp_var (ptr_type_node, "int_addr"); |
- DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); |
sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); |
- DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); |
} |
/* First ensure that we fit completely in registers. */ |
@@ -6949,7 +7140,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
} |
if (need_temp) |
{ |
- int i; |
+ int i, prev_size = 0; |
tree temp = create_tmp_var (type, "va_arg_tmp"); |
/* addr = &temp; */ |
@@ -6961,13 +7152,29 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
rtx slot = XVECEXP (container, 0, i); |
rtx reg = XEXP (slot, 0); |
enum machine_mode mode = GET_MODE (reg); |
- tree piece_type = lang_hooks.types.type_for_mode (mode, 1); |
- tree addr_type = build_pointer_type (piece_type); |
- tree daddr_type = build_pointer_type_for_mode (piece_type, |
- ptr_mode, true); |
+ tree piece_type; |
+ tree addr_type; |
+ tree daddr_type; |
tree src_addr, src; |
int src_offset; |
tree dest_addr, dest; |
+ int cur_size = GET_MODE_SIZE (mode); |
+ |
+ if (prev_size + cur_size > size) |
+ { |
+ cur_size = size - prev_size; |
+ mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1); |
+ if (mode == BLKmode) |
+ mode = QImode; |
+ } |
+ piece_type = lang_hooks.types.type_for_mode (mode, 1); |
+ if (mode == GET_MODE (reg)) |
+ addr_type = build_pointer_type (piece_type); |
+ else |
+ addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
+ true); |
+ daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, |
+ true); |
if (SSE_REGNO_P (REGNO (reg))) |
{ |
@@ -6982,14 +7189,26 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
src_addr = fold_convert (addr_type, src_addr); |
src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr, |
size_int (src_offset)); |
- src = build_va_arg_indirect_ref (src_addr); |
dest_addr = fold_convert (daddr_type, addr); |
dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr, |
size_int (INTVAL (XEXP (slot, 1)))); |
- dest = build_va_arg_indirect_ref (dest_addr); |
+ if (cur_size == GET_MODE_SIZE (mode)) |
+ { |
+ src = build_va_arg_indirect_ref (src_addr); |
+ dest = build_va_arg_indirect_ref (dest_addr); |
- gimplify_assign (dest, src, pre_p); |
+ gimplify_assign (dest, src, pre_p); |
+ } |
+ else |
+ { |
+ tree copy |
+ = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY], |
+ 3, dest_addr, src_addr, |
+ size_int (cur_size)); |
+ gimplify_and_add (copy, pre_p); |
+ } |
+ prev_size += cur_size; |
} |
} |
@@ -7046,7 +7265,7 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, |
if (container) |
gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); |
- ptrtype = build_pointer_type (type); |
+ ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); |
addr = fold_convert (ptrtype, addr); |
if (indirect_p) |
@@ -7204,28 +7423,8 @@ standard_80387_constant_rtx (int idx) |
XFmode); |
} |
-/* Return 1 if mode is a valid mode for sse. */ |
-static int |
-standard_sse_mode_p (enum machine_mode mode) |
-{ |
- switch (mode) |
- { |
- case V16QImode: |
- case V8HImode: |
- case V4SImode: |
- case V2DImode: |
- case V4SFmode: |
- case V2DFmode: |
- return 1; |
- |
- default: |
- return 0; |
- } |
-} |
- |
-/* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit |
- SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX |
- modes and AVX is enabled. */ |
+/* Return 1 if X is all 0s and 2 if x is all 1s |
+ in supported SSE vector mode. */ |
int |
standard_sse_constant_p (rtx x) |
@@ -7235,12 +7434,17 @@ standard_sse_constant_p (rtx x) |
if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) |
return 1; |
if (vector_all_ones_operand (x, mode)) |
- { |
- if (standard_sse_mode_p (mode)) |
- return TARGET_SSE2 ? 2 : -2; |
- else if (VALID_AVX256_REG_MODE (mode)) |
- return TARGET_AVX ? 3 : -3; |
- } |
+ switch (mode) |
+ { |
+ case V16QImode: |
+ case V8HImode: |
+ case V4SImode: |
+ case V2DImode: |
+ if (TARGET_SSE2) |
+ return 2; |
+ default: |
+ break; |
+ } |
return 0; |
} |
@@ -7269,22 +7473,12 @@ standard_sse_constant_opcode (rtx insn, rtx x) |
case MODE_OI: |
return "vpxor\t%x0, %x0, %x0"; |
default: |
- gcc_unreachable (); |
+ break; |
} |
case 2: |
- if (TARGET_AVX) |
- switch (get_attr_mode (insn)) |
- { |
- case MODE_V4SF: |
- case MODE_V2DF: |
- case MODE_TI: |
- return "vpcmpeqd\t%0, %0, %0"; |
- break; |
- default: |
- gcc_unreachable (); |
- } |
- else |
- return "pcmpeqd\t%0, %0"; |
+ return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0"; |
+ default: |
+ break; |
} |
gcc_unreachable (); |
} |
@@ -7348,18 +7542,18 @@ ix86_can_use_return_insn_p (void) |
Zero means the frame pointer need not be set up (and parms may |
be accessed via the stack pointer) in functions that seem suitable. */ |
-int |
+static bool |
ix86_frame_pointer_required (void) |
{ |
/* If we accessed previous frames, then the generated code expects |
to be able to access the saved ebp value in our frame. */ |
if (cfun->machine->accesses_prev_frame) |
- return 1; |
+ return true; |
/* Several x86 os'es need a frame pointer for other reasons, |
usually pertaining to setjmp. */ |
if (SUBTARGET_FRAME_POINTER_REQUIRED) |
- return 1; |
+ return true; |
/* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off |
the frame pointer by default. Turn it back on now if we've not |
@@ -7367,12 +7561,12 @@ ix86_frame_pointer_required (void) |
if (TARGET_OMIT_LEAF_FRAME_POINTER |
&& (!current_function_is_leaf |
|| ix86_current_function_calls_tls_descriptor)) |
- return 1; |
+ return true; |
if (crtl->profile) |
- return 1; |
+ return true; |
- return 0; |
+ return false; |
} |
/* Record that the current function accesses previous call frames. */ |
@@ -7411,8 +7605,8 @@ get_pc_thunk_name (char name[32], unsigned int regno) |
/* This function generates code for -fpic that loads %ebx with |
the return address of the caller and then returns. */ |
-void |
-ix86_file_end (void) |
+static void |
+ix86_code_end (void) |
{ |
rtx xops[2]; |
int regno; |
@@ -7420,12 +7614,21 @@ ix86_file_end (void) |
for (regno = 0; regno < 8; ++regno) |
{ |
char name[32]; |
+ tree decl; |
if (! ((pic_labels_used >> regno) & 1)) |
continue; |
get_pc_thunk_name (name, regno); |
+ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, |
+ get_identifier (name), |
+ build_function_type (void_type_node, void_list_node)); |
+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, |
+ NULL_TREE, void_type_node); |
+ TREE_PUBLIC (decl) = 1; |
+ TREE_STATIC (decl) = 1; |
+ |
#if TARGET_MACHO |
if (TARGET_MACHO) |
{ |
@@ -7436,18 +7639,13 @@ ix86_file_end (void) |
assemble_name (asm_out_file, name); |
fputs ("\n", asm_out_file); |
ASM_OUTPUT_LABEL (asm_out_file, name); |
+ DECL_WEAK (decl) = 1; |
} |
else |
#endif |
if (USE_HIDDEN_LINKONCE) |
{ |
- tree decl; |
- |
- decl = build_decl (FUNCTION_DECL, get_identifier (name), |
- error_mark_node); |
- TREE_PUBLIC (decl) = 1; |
- TREE_STATIC (decl) = 1; |
- DECL_ONE_ONLY (decl) = 1; |
+ DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl); |
(*targetm.asm_out.unique_section) (decl, 0); |
switch_to_section (get_named_section (decl, NULL, 0)); |
@@ -7455,7 +7653,7 @@ ix86_file_end (void) |
(*targetm.asm_out.globalize_label) (asm_out_file, name); |
fputs ("\t.hidden\t", asm_out_file); |
assemble_name (asm_out_file, name); |
- fputc ('\n', asm_out_file); |
+ putc ('\n', asm_out_file); |
ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); |
} |
else |
@@ -7464,14 +7662,23 @@ ix86_file_end (void) |
ASM_OUTPUT_LABEL (asm_out_file, name); |
} |
+ DECL_INITIAL (decl) = make_node (BLOCK); |
+ current_function_decl = decl; |
+ init_function_start (decl); |
+ first_function_block_is_cold = false; |
+ /* Make sure unwind info is emitted for the thunk if needed. */ |
+ final_start_function (emit_barrier (), asm_out_file, 1); |
+ |
xops[0] = gen_rtx_REG (Pmode, regno); |
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); |
output_asm_insn ("ret", xops); |
+ final_end_function (); |
+ init_insn_lengths (); |
+ free_after_compilation (cfun); |
+ set_cfun (NULL); |
+ current_function_decl = NULL; |
} |
- |
- if (NEED_INDICATE_EXEC_STACK) |
- file_end_indicate_exec_stack (); |
} |
/* Emit code for the SET_GOT patterns. */ |
@@ -7508,7 +7715,24 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) |
if (!flag_pic) |
output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); |
else |
- output_asm_insn ("call\t%a2", xops); |
+ { |
+ output_asm_insn ("call\t%a2", xops); |
+#ifdef DWARF2_UNWIND_INFO |
+ /* The call to next label acts as a push. */ |
+ if (dwarf2out_do_frame ()) |
+ { |
+ rtx insn; |
+ start_sequence (); |
+ insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
+ gen_rtx_PLUS (Pmode, |
+ stack_pointer_rtx, |
+ GEN_INT (-4)))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ dwarf2out_frame_debug (insn, true); |
+ end_sequence (); |
+ } |
+#endif |
+ } |
#if TARGET_MACHO |
/* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
@@ -7521,7 +7745,27 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) |
CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
if (flag_pic) |
- output_asm_insn ("pop%z0\t%0", xops); |
+ { |
+ output_asm_insn ("pop%z0\t%0", xops); |
+#ifdef DWARF2_UNWIND_INFO |
+ /* The pop is a pop and clobbers dest, but doesn't restore it |
+ for unwind info purposes. */ |
+ if (dwarf2out_do_frame ()) |
+ { |
+ rtx insn; |
+ start_sequence (); |
+ insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); |
+ dwarf2out_frame_debug (insn, true); |
+ insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
+ gen_rtx_PLUS (Pmode, |
+ stack_pointer_rtx, |
+ GEN_INT (4)))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ dwarf2out_frame_debug (insn, true); |
+ end_sequence (); |
+ } |
+#endif |
+ } |
} |
else |
{ |
@@ -7529,6 +7773,18 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) |
get_pc_thunk_name (name, REGNO (dest)); |
pic_labels_used |= 1 << REGNO (dest); |
+#ifdef DWARF2_UNWIND_INFO |
+ /* Ensure all queued register saves are flushed before the |
+ call. */ |
+ if (dwarf2out_do_frame ()) |
+ { |
+ rtx insn; |
+ start_sequence (); |
+ insn = emit_barrier (); |
+ end_sequence (); |
+ dwarf2out_frame_debug (insn, false); |
+ } |
+#endif |
xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
xops[2] = gen_rtx_MEM (QImode, xops[2]); |
output_asm_insn ("call\t%X2", xops); |
@@ -7559,6 +7815,9 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) |
static rtx |
gen_push (rtx arg) |
{ |
+ if (ix86_cfa_state->reg == stack_pointer_rtx) |
+ ix86_cfa_state->offset += UNITS_PER_WORD; |
+ |
return gen_rtx_SET (VOIDmode, |
gen_rtx_MEM (Pmode, |
gen_rtx_PRE_DEC (Pmode, |
@@ -7618,8 +7877,7 @@ ix86_save_reg (unsigned int regno, int maybe_eh_return) |
} |
} |
- if (crtl->drap_reg |
- && regno == REGNO (crtl->drap_reg)) |
+ if (crtl->drap_reg && regno == REGNO (crtl->drap_reg)) |
return 1; |
return (df_regs_ever_live_p (regno) |
@@ -7664,8 +7922,8 @@ ix86_nsaved_sseregs (void) |
pointer. Otherwise, frame pointer elimination is automatically |
handled and all other eliminations are valid. */ |
-int |
-ix86_can_eliminate (int from, int to) |
+static bool |
+ix86_can_eliminate (const int from, const int to) |
{ |
if (stack_realign_fp) |
return ((from == ARG_POINTER_REGNUM |
@@ -7673,7 +7931,7 @@ ix86_can_eliminate (int from, int to) |
|| (from == FRAME_POINTER_REGNUM |
&& to == STACK_POINTER_REGNUM)); |
else |
- return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1; |
+ return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; |
} |
/* Return the offset between two registers, one to be eliminated, and the other |
@@ -7718,7 +7976,6 @@ ix86_builtin_setjmp_frame_value (void) |
static void |
ix86_compute_frame_layout (struct ix86_frame *frame) |
{ |
- HOST_WIDE_INT total_size; |
unsigned int stack_alignment_needed; |
HOST_WIDE_INT offset; |
unsigned int preferred_alignment; |
@@ -7726,7 +7983,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame) |
frame->nregs = ix86_nsaved_regs (); |
frame->nsseregs = ix86_nsaved_sseregs (); |
- total_size = size; |
stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; |
preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; |
@@ -7781,9 +8037,16 @@ ix86_compute_frame_layout (struct ix86_frame *frame) |
else |
frame->save_regs_using_mov = false; |
+ /* Skip return address. */ |
+ offset = UNITS_PER_WORD; |
+ |
+ /* Skip pushed static chain. */ |
+ if (ix86_static_chain_on_stack) |
+ offset += UNITS_PER_WORD; |
- /* Skip return address and saved base pointer. */ |
- offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; |
+ /* Skip saved base pointer. */ |
+ if (frame_pointer_needed) |
+ offset += UNITS_PER_WORD; |
frame->hard_frame_pointer_offset = offset; |
@@ -7872,26 +8135,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame) |
frame->red_zone_size = 0; |
frame->to_allocate -= frame->red_zone_size; |
frame->stack_pointer_offset -= frame->red_zone_size; |
-#if 0 |
- fprintf (stderr, "\n"); |
- fprintf (stderr, "size: %ld\n", (long)size); |
- fprintf (stderr, "nregs: %ld\n", (long)frame->nregs); |
- fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs); |
- fprintf (stderr, "padding0: %ld\n", (long)frame->padding0); |
- fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed); |
- fprintf (stderr, "padding1: %ld\n", (long)frame->padding1); |
- fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size); |
- fprintf (stderr, "padding2: %ld\n", (long)frame->padding2); |
- fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate); |
- fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size); |
- fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset); |
- fprintf (stderr, "hard_frame_pointer_offset: %ld\n", |
- (long)frame->hard_frame_pointer_offset); |
- fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset); |
- fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf); |
- fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca); |
- fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor); |
-#endif |
} |
/* Emit code to save registers in the prologue. */ |
@@ -7949,6 +8192,49 @@ ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) |
} |
} |
+static GTY(()) rtx queued_cfa_restores; |
+ |
+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack |
+ manipulation insn. Don't add it if the previously |
+ saved value will be left untouched within stack red-zone till return, |
+ as unwinders can find the same value in the register and |
+ on the stack. */ |
+ |
+static void |
+ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset) |
+{ |
+ if (TARGET_RED_ZONE |
+ && !TARGET_64BIT_MS_ABI |
+ && red_offset + RED_ZONE_SIZE >= 0 |
+ && crtl->args.pops_args < 65536) |
+ return; |
+ |
+ if (insn) |
+ { |
+ add_reg_note (insn, REG_CFA_RESTORE, reg); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ else |
+ queued_cfa_restores |
+ = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); |
+} |
+ |
+/* Add queued REG_CFA_RESTORE notes if any to INSN. */ |
+ |
+static void |
+ix86_add_queued_cfa_restore_notes (rtx insn) |
+{ |
+ rtx last; |
+ if (!queued_cfa_restores) |
+ return; |
+ for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) |
+ ; |
+ XEXP (last, 1) = REG_NOTES (insn); |
+ REG_NOTES (insn) = queued_cfa_restores; |
+ queued_cfa_restores = NULL_RTX; |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+} |
+ |
/* Expand prologue or epilogue stack adjustment. |
The pattern exist to put a dependency on all ebp-based memory accesses. |
STYLE should be negative if instructions should be marked as frame related, |
@@ -7956,7 +8242,8 @@ ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) |
otherwise. */ |
static void |
-pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) |
+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, |
+ int style, bool set_cfa) |
{ |
rtx insn; |
@@ -7979,7 +8266,24 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) |
insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, |
offset)); |
} |
- if (style < 0) |
+ |
+ if (style >= 0) |
+ ix86_add_queued_cfa_restore_notes (insn); |
+ |
+ if (set_cfa) |
+ { |
+ rtx r; |
+ |
+ gcc_assert (ix86_cfa_state->reg == src); |
+ ix86_cfa_state->offset += INTVAL (offset); |
+ ix86_cfa_state->reg = dest; |
+ |
+ r = gen_rtx_PLUS (Pmode, src, offset); |
+ r = gen_rtx_SET (VOIDmode, dest, r); |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ else if (style < 0) |
RTX_FRAME_RELATED_P (insn) = 1; |
} |
@@ -8005,9 +8309,7 @@ find_drap_reg (void) |
Since function with tail call may use any caller-saved |
registers in epilogue, DRAP must not use caller-saved |
register in such case. */ |
- if ((decl_function_context (decl) |
- && !DECL_NO_STATIC_CHAIN (decl)) |
- || crtl->tail_call_emit) |
+ if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) |
return R13_REG; |
return R10_REG; |
@@ -8018,9 +8320,7 @@ find_drap_reg (void) |
Since function with tail call may use any caller-saved |
registers in epilogue, DRAP must not use caller-saved |
register in such case. */ |
- if ((decl_function_context (decl) |
- && !DECL_NO_STATIC_CHAIN (decl)) |
- || crtl->tail_call_emit) |
+ if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit) |
return DI_REG; |
/* Reuse static chain register if it isn't used for parameter |
@@ -8034,37 +8334,58 @@ find_drap_reg (void) |
} |
} |
-/* Update incoming stack boundary and estimated stack alignment. */ |
+/* Return minimum incoming stack alignment. */ |
-static void |
-ix86_update_stack_boundary (void) |
+static unsigned int |
+ix86_minimum_incoming_stack_boundary (bool sibcall) |
{ |
+ unsigned int incoming_stack_boundary; |
+ |
/* Prefer the one specified at command line. */ |
- ix86_incoming_stack_boundary |
- = (ix86_user_incoming_stack_boundary |
- ? ix86_user_incoming_stack_boundary |
- : ix86_default_incoming_stack_boundary); |
+ if (ix86_user_incoming_stack_boundary) |
+ incoming_stack_boundary = ix86_user_incoming_stack_boundary; |
+ /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary |
+ if -mstackrealign is used, it isn't used for sibcall check and |
+ estimated stack alignment is 128bit. */ |
+ else if (!sibcall |
+ && !TARGET_64BIT |
+ && ix86_force_align_arg_pointer |
+ && crtl->stack_alignment_estimated == 128) |
+ incoming_stack_boundary = MIN_STACK_BOUNDARY; |
+ else |
+ incoming_stack_boundary = ix86_default_incoming_stack_boundary; |
/* Incoming stack alignment can be changed on individual functions |
via force_align_arg_pointer attribute. We use the smallest |
incoming stack boundary. */ |
- if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY |
+ if (incoming_stack_boundary > MIN_STACK_BOUNDARY |
&& lookup_attribute (ix86_force_align_arg_pointer_string, |
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) |
- ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY; |
+ incoming_stack_boundary = MIN_STACK_BOUNDARY; |
/* The incoming stack frame has to be aligned at least at |
parm_stack_boundary. */ |
- if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary) |
- ix86_incoming_stack_boundary = crtl->parm_stack_boundary; |
+ if (incoming_stack_boundary < crtl->parm_stack_boundary) |
+ incoming_stack_boundary = crtl->parm_stack_boundary; |
/* Stack at entrance of main is aligned by runtime. We use the |
smallest incoming stack boundary. */ |
- if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY |
+ if (incoming_stack_boundary > MAIN_STACK_BOUNDARY |
&& DECL_NAME (current_function_decl) |
&& MAIN_NAME_P (DECL_NAME (current_function_decl)) |
&& DECL_FILE_SCOPE_P (current_function_decl)) |
- ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
+ incoming_stack_boundary = MAIN_STACK_BOUNDARY; |
+ |
+ return incoming_stack_boundary; |
+} |
+ |
+/* Update incoming stack boundary and estimated stack alignment. */ |
+ |
+static void |
+ix86_update_stack_boundary (void) |
+{ |
+ ix86_incoming_stack_boundary |
+ = ix86_minimum_incoming_stack_boundary (false); |
/* x86_64 vararg needs 16byte stack alignment for register save |
area. */ |
@@ -8100,7 +8421,11 @@ ix86_get_drap_rtx (void) |
end_sequence (); |
insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); |
- RTX_FRAME_RELATED_P (insn) = 1; |
+ if (!optimize) |
+ { |
+ add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
return drap_vreg; |
} |
else |
@@ -8115,30 +8440,6 @@ ix86_internal_arg_pointer (void) |
return virtual_incoming_args_rtx; |
} |
-/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. |
- This is called from dwarf2out.c to emit call frame instructions |
- for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ |
-static void |
-ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) |
-{ |
- rtx unspec = SET_SRC (pattern); |
- gcc_assert (GET_CODE (unspec) == UNSPEC); |
- |
- switch (index) |
- { |
- case UNSPEC_REG_SAVE: |
- dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), |
- SET_DEST (pattern)); |
- break; |
- case UNSPEC_DEF_CFA: |
- dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), |
- INTVAL (XVECEXP (unspec, 0, 0))); |
- break; |
- default: |
- gcc_unreachable (); |
- } |
-} |
- |
/* Finalize stack_realign_needed flag, which will guide prologue/epilogue |
to be generated in correct form. */ |
static void |
@@ -8176,28 +8477,95 @@ ix86_expand_prologue (void) |
bool pic_reg_used; |
struct ix86_frame frame; |
HOST_WIDE_INT allocate; |
+ int gen_frame_pointer = frame_pointer_needed; |
ix86_finalize_stack_realign_flags (); |
/* DRAP should not coexist with stack_realign_fp */ |
gcc_assert (!(crtl->drap_reg && stack_realign_fp)); |
+ /* Initialize CFA state for before the prologue. */ |
+ ix86_cfa_state->reg = stack_pointer_rtx; |
+ ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET; |
+ |
ix86_compute_frame_layout (&frame); |
+ if (ix86_function_ms_hook_prologue (current_function_decl)) |
+ { |
+ rtx push, mov; |
+ |
+ /* Make sure the function starts with |
+ 8b ff movl.s %edi,%edi |
+ 55 push %ebp |
+ 8b ec movl.s %esp,%ebp |
+ |
+ This matches the hookable function prologue in Win32 API |
+ functions in Microsoft Windows XP Service Pack 2 and newer. |
+ Wine uses this to enable Windows apps to hook the Win32 API |
+ functions provided by Wine. */ |
+ insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG), |
+ gen_rtx_REG (SImode, DI_REG))); |
+ push = emit_insn (gen_push (hard_frame_pointer_rtx)); |
+ mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx, |
+ stack_pointer_rtx)); |
+ |
+ if (frame_pointer_needed && !(crtl->drap_reg |
+ && crtl->stack_realign_needed)) |
+ { |
+ /* The push %ebp and movl.s %esp, %ebp already set up |
+ the frame pointer. No need to do this again. */ |
+ gen_frame_pointer = 0; |
+ RTX_FRAME_RELATED_P (push) = 1; |
+ RTX_FRAME_RELATED_P (mov) = 1; |
+ if (ix86_cfa_state->reg == stack_pointer_rtx) |
+ ix86_cfa_state->reg = hard_frame_pointer_rtx; |
+ } |
+ else |
+ /* If the frame pointer is not needed, pop %ebp again. This |
+ could be optimized for cases where ebp needs to be backed up |
+ for some other reason. If stack realignment is needed, pop |
+ the base pointer again, align the stack, and later regenerate |
+ the frame pointer setup. The frame pointer generated by the |
+ hook prologue is not aligned, so it can't be used. */ |
+ insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); |
+ } |
+ |
+ /* The first insn of a function that accepts its static chain on the |
+ stack is to push the register that would be filled in by a direct |
+ call. This insn will be skipped by the trampoline. */ |
+ if (ix86_static_chain_on_stack) |
+ { |
+ rtx t; |
+ |
+ insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false))); |
+ emit_insn (gen_blockage ()); |
+ |
+ /* We don't want to interpret this push insn as a register save, |
+ only as a stack adjustment. The real copy of the register as |
+ a save will be done later, if needed. */ |
+ t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD); |
+ t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ |
/* Emit prologue code to adjust stack alignment and setup DRAP, in case |
of DRAP is needed and stack realignment is really needed after reload */ |
if (crtl->drap_reg && crtl->stack_realign_needed) |
{ |
rtx x, y; |
int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; |
- int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] |
- ? 0 : UNITS_PER_WORD); |
+ int param_ptr_offset = UNITS_PER_WORD; |
+ |
+ if (ix86_static_chain_on_stack) |
+ param_ptr_offset += UNITS_PER_WORD; |
+ if (!call_used_regs[REGNO (crtl->drap_reg)]) |
+ param_ptr_offset += UNITS_PER_WORD; |
gcc_assert (stack_realign_drap); |
/* Grab the argument pointer. */ |
- x = plus_constant (stack_pointer_rtx, |
- (UNITS_PER_WORD + param_ptr_offset)); |
+ x = plus_constant (stack_pointer_rtx, param_ptr_offset); |
y = crtl->drap_reg; |
/* Only need to push parameter pointer reg if it is caller |
@@ -8211,6 +8579,7 @@ ix86_expand_prologue (void) |
insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); |
RTX_FRAME_RELATED_P (insn) = 1; |
+ ix86_cfa_state->reg = crtl->drap_reg; |
/* Align the stack. */ |
insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, |
@@ -8232,13 +8601,16 @@ ix86_expand_prologue (void) |
/* Note: AT&T enter does NOT have reversed args. Enter is probably |
slower on all targets. Also sdb doesn't like it. */ |
- if (frame_pointer_needed) |
+ if (gen_frame_pointer) |
{ |
insn = emit_insn (gen_push (hard_frame_pointer_rtx)); |
RTX_FRAME_RELATED_P (insn) = 1; |
insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); |
RTX_FRAME_RELATED_P (insn) = 1; |
+ |
+ if (ix86_cfa_state->reg == stack_pointer_rtx) |
+ ix86_cfa_state->reg = hard_frame_pointer_rtx; |
} |
if (stack_realign_fp) |
@@ -8277,16 +8649,14 @@ ix86_expand_prologue (void) |
; |
else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) |
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
- GEN_INT (-allocate), -1); |
+ GEN_INT (-allocate), -1, |
+ ix86_cfa_state->reg == stack_pointer_rtx); |
else |
{ |
- /* Only valid for Win32. */ |
rtx eax = gen_rtx_REG (Pmode, AX_REG); |
bool eax_live; |
rtx t; |
- gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI); |
- |
if (cfun->machine->call_abi == MS_ABI) |
eax_live = false; |
else |
@@ -8305,11 +8675,15 @@ ix86_expand_prologue (void) |
else |
insn = gen_allocate_stack_worker_32 (eax, eax); |
insn = emit_insn (insn); |
- RTX_FRAME_RELATED_P (insn) = 1; |
- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); |
- t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); |
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, |
- t, REG_NOTES (insn)); |
+ |
+ if (ix86_cfa_state->reg == stack_pointer_rtx) |
+ { |
+ ix86_cfa_state->offset += allocate; |
+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); |
+ t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, t); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
if (eax_live) |
{ |
@@ -8397,14 +8771,18 @@ ix86_expand_prologue (void) |
/* vDRAP is setup but after reload it turns out stack realign |
isn't necessary, here we will emit prologue to setup DRAP |
without stack realign adjustment */ |
+ rtx x; |
int drap_bp_offset = UNITS_PER_WORD * 2; |
- rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); |
+ |
+ if (ix86_static_chain_on_stack) |
+ drap_bp_offset += UNITS_PER_WORD; |
+ x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); |
insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x)); |
} |
/* Prevent instructions from being scheduled into register save push |
sequence when access to the redzone area is done through frame pointer. |
- The offset betweeh the frame pointer and the stack pointer is calculated |
+ The offset between the frame pointer and the stack pointer is calculated |
relative to the value of the stack pointer at the end of the function |
prologue, and moving instructions that access redzone area via frame |
pointer inside push sequence violates this assumption. */ |
@@ -8416,18 +8794,107 @@ ix86_expand_prologue (void) |
emit_insn (gen_cld ()); |
} |
+/* Emit code to restore REG using a POP insn. */ |
+ |
+static void |
+ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset) |
+{ |
+ rtx insn = emit_insn (ix86_gen_pop1 (reg)); |
+ |
+ if (ix86_cfa_state->reg == crtl->drap_reg |
+ && REGNO (reg) == REGNO (crtl->drap_reg)) |
+ { |
+ /* Previously we'd represented the CFA as an expression |
+ like *(%ebp - 8). We've just popped that value from |
+ the stack, which means we need to reset the CFA to |
+ the drap register. This will remain until we restore |
+ the stack pointer. */ |
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ return; |
+ } |
+ |
+ if (ix86_cfa_state->reg == stack_pointer_rtx) |
+ { |
+ ix86_cfa_state->offset -= UNITS_PER_WORD; |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, |
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 1))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ |
+ /* When the frame pointer is the CFA, and we pop it, we are |
+ swapping back to the stack pointer as the CFA. This happens |
+ for stack frames that don't allocate other data, so we assume |
+ the stack pointer is now pointing at the return address, i.e. |
+ the function entry state, which makes the offset be 1 word. */ |
+ else if (ix86_cfa_state->reg == hard_frame_pointer_rtx |
+ && reg == hard_frame_pointer_rtx) |
+ { |
+ ix86_cfa_state->reg = stack_pointer_rtx; |
+ ix86_cfa_state->offset -= UNITS_PER_WORD; |
+ |
+ add_reg_note (insn, REG_CFA_DEF_CFA, |
+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
+ GEN_INT (ix86_cfa_state->offset))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ |
+ ix86_add_cfa_restore_note (insn, reg, red_offset); |
+} |
+ |
+/* Emit code to restore saved registers using POP insns. */ |
+ |
+static void |
+ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset) |
+{ |
+ int regno; |
+ |
+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
+ if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) |
+ { |
+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno), |
+ red_offset); |
+ red_offset += UNITS_PER_WORD; |
+ } |
+} |
+ |
+/* Emit code and notes for the LEAVE instruction. */ |
+ |
+static void |
+ix86_emit_leave (HOST_WIDE_INT red_offset) |
+{ |
+ rtx insn = emit_insn (ix86_gen_leave ()); |
+ |
+ ix86_add_queued_cfa_restore_notes (insn); |
+ |
+ if (ix86_cfa_state->reg == hard_frame_pointer_rtx) |
+ { |
+ ix86_cfa_state->reg = stack_pointer_rtx; |
+ ix86_cfa_state->offset -= UNITS_PER_WORD; |
+ |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, |
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 0))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset); |
+ } |
+} |
+ |
/* Emit code to restore saved registers using MOV insns. First register |
is restored from POINTER + OFFSET. */ |
static void |
ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
+ HOST_WIDE_INT red_offset, |
int maybe_eh_return) |
{ |
- int regno; |
+ unsigned int regno; |
rtx base_address = gen_rtx_MEM (Pmode, pointer); |
+ rtx insn; |
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) |
{ |
+ rtx reg = gen_rtx_REG (Pmode, regno); |
+ |
/* Ensure that adjust_address won't be forced to produce pointer |
out of range allowed by x86-64 instruction set. */ |
if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) |
@@ -8440,9 +8907,25 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
base_address = gen_rtx_MEM (Pmode, r11); |
offset = 0; |
} |
- emit_move_insn (gen_rtx_REG (Pmode, regno), |
- adjust_address (base_address, Pmode, offset)); |
+ insn = emit_move_insn (reg, |
+ adjust_address (base_address, Pmode, offset)); |
offset += UNITS_PER_WORD; |
+ |
+ if (ix86_cfa_state->reg == crtl->drap_reg |
+ && regno == REGNO (crtl->drap_reg)) |
+ { |
+ /* Previously we'd represented the CFA as an expression |
+ like *(%ebp - 8). We've just popped that value from |
+ the stack, which means we need to reset the CFA to |
+ the drap register. This will remain until we restore |
+ the stack pointer. */ |
+ add_reg_note (insn, REG_CFA_DEF_CFA, reg); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ } |
+ else |
+ ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset); |
+ |
+ red_offset += UNITS_PER_WORD; |
} |
} |
@@ -8450,6 +8933,7 @@ ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
is restored from POINTER + OFFSET. */ |
static void |
ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
+ HOST_WIDE_INT red_offset, |
int maybe_eh_return) |
{ |
int regno; |
@@ -8459,6 +8943,8 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) |
{ |
+ rtx reg = gen_rtx_REG (TImode, regno); |
+ |
/* Ensure that adjust_address won't be forced to produce pointer |
out of range allowed by x86-64 instruction set. */ |
if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) |
@@ -8473,8 +8959,12 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
} |
mem = adjust_address (base_address, TImode, offset); |
set_mem_align (mem, 128); |
- emit_move_insn (gen_rtx_REG (TImode, regno), mem); |
+ emit_move_insn (reg, mem); |
offset += 16; |
+ |
+ ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset); |
+ |
+ red_offset += 16; |
} |
} |
@@ -8483,10 +8973,11 @@ ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, |
void |
ix86_expand_epilogue (int style) |
{ |
- int regno; |
int sp_valid; |
struct ix86_frame frame; |
- HOST_WIDE_INT offset; |
+ HOST_WIDE_INT offset, red_offset; |
+ struct machine_cfa_state cfa_state_save = *ix86_cfa_state; |
+ bool using_drap; |
ix86_finalize_stack_realign_flags (); |
@@ -8502,6 +8993,9 @@ ix86_expand_epilogue (int style) |
if (frame_pointer_needed && frame.red_zone_size) |
emit_insn (gen_memory_blockage ()); |
+ using_drap = crtl->drap_reg && crtl->stack_realign_needed; |
+ gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg); |
+ |
/* Calculate start of saved registers relative to ebp. Special care |
must be taken for the normal return case of a function using |
eh_return: the eax and edx registers are marked as saved, but not |
@@ -8512,6 +9006,21 @@ ix86_expand_epilogue (int style) |
offset *= -UNITS_PER_WORD; |
offset -= frame.nsseregs * 16 + frame.padding0; |
+ /* Calculate start of saved registers relative to esp on entry of the |
+ function. When realigning stack, this needs to be the most negative |
+ value possible at runtime. */ |
+ red_offset = offset; |
+ if (using_drap) |
+ red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT |
+ + UNITS_PER_WORD; |
+ else if (stack_realign_fp) |
+ red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT |
+ - UNITS_PER_WORD; |
+ if (ix86_static_chain_on_stack) |
+ red_offset -= UNITS_PER_WORD; |
+ if (frame_pointer_needed) |
+ red_offset -= UNITS_PER_WORD; |
+ |
/* If we're only restoring one register and sp is not valid then |
using a move instruction to restore the register since it's |
less work than reloading sp and popping the register. |
@@ -8528,7 +9037,7 @@ ix86_expand_epilogue (int style) |
&& ((frame.nregs + frame.nsseregs) > 1 |
|| (frame.to_allocate + frame.padding0) != 0)) |
|| (frame_pointer_needed && !(frame.nregs + frame.nsseregs) |
- && (frame.to_allocate + frame.padding0) != 0) |
+ && (frame.to_allocate + frame.padding0) != 0) |
|| (frame_pointer_needed && TARGET_USE_LEAVE |
&& cfun->machine->use_fast_prologue_epilogue |
&& (frame.nregs + frame.nsseregs) == 1) |
@@ -8538,32 +9047,42 @@ ix86_expand_epilogue (int style) |
locations. If both are available, default to ebp, since offsets |
are known to be small. Only exception is esp pointing directly |
to the end of block of saved registers, where we may simplify |
- addressing mode. |
+ addressing mode. |
If we are realigning stack with bp and sp, regs restore can't |
be addressed by bp. sp must be used instead. */ |
if (!frame_pointer_needed |
- || (sp_valid && !(frame.to_allocate + frame.padding0)) |
+ || (sp_valid && !(frame.to_allocate + frame.padding0)) |
|| stack_realign_fp) |
{ |
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, |
- frame.to_allocate, style == 2); |
+ frame.to_allocate, red_offset, |
+ style == 2); |
ix86_emit_restore_regs_using_mov (stack_pointer_rtx, |
frame.to_allocate |
+ frame.nsseregs * 16 |
+ + frame.padding0, |
+ red_offset |
+ + frame.nsseregs * 16 |
+ frame.padding0, style == 2); |
} |
else |
{ |
ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx, |
- offset, style == 2); |
+ offset, red_offset, |
+ style == 2); |
ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, |
offset |
+ frame.nsseregs * 16 |
+ + frame.padding0, |
+ red_offset |
+ + frame.nsseregs * 16 |
+ frame.padding0, style == 2); |
} |
+ red_offset -= offset; |
+ |
/* eh_return epilogues need %ecx added to the stack pointer. */ |
if (style == 2) |
{ |
@@ -8571,18 +9090,36 @@ ix86_expand_epilogue (int style) |
/* Stack align doesn't work with eh_return. */ |
gcc_assert (!crtl->stack_realign_needed); |
+ /* Neither does regparm nested functions. */ |
+ gcc_assert (!ix86_static_chain_on_stack); |
if (frame_pointer_needed) |
{ |
tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); |
tmp = plus_constant (tmp, UNITS_PER_WORD); |
- emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); |
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); |
tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); |
- emit_move_insn (hard_frame_pointer_rtx, tmp); |
+ tmp = emit_move_insn (hard_frame_pointer_rtx, tmp); |
+ |
+ /* Note that we use SA as a temporary CFA, as the return |
+ address is at the proper place relative to it. We |
+ pretend this happens at the FP restore insn because |
+ prior to this insn the FP would be stored at the wrong |
+ offset relative to SA, and after this insn we have no |
+ other reasonable register to use for the CFA. We don't |
+ bother resetting the CFA to the SP for the duration of |
+ the return insn. */ |
+ add_reg_note (tmp, REG_CFA_DEF_CFA, |
+ plus_constant (sa, UNITS_PER_WORD)); |
+ ix86_add_queued_cfa_restore_notes (tmp); |
+ add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx); |
+ RTX_FRAME_RELATED_P (tmp) = 1; |
+ ix86_cfa_state->reg = sa; |
+ ix86_cfa_state->offset = UNITS_PER_WORD; |
pro_epilogue_adjust_stack (stack_pointer_rtx, sa, |
- const0_rtx, style); |
+ const0_rtx, style, false); |
} |
else |
{ |
@@ -8591,7 +9128,18 @@ ix86_expand_epilogue (int style) |
+ frame.nregs * UNITS_PER_WORD |
+ frame.nsseregs * 16 |
+ frame.padding0)); |
- emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); |
+ tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); |
+ ix86_add_queued_cfa_restore_notes (tmp); |
+ |
+ gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx); |
+ if (ix86_cfa_state->offset != UNITS_PER_WORD) |
+ { |
+ ix86_cfa_state->offset = UNITS_PER_WORD; |
+ add_reg_note (tmp, REG_CFA_DEF_CFA, |
+ plus_constant (stack_pointer_rtx, |
+ UNITS_PER_WORD)); |
+ RTX_FRAME_RELATED_P (tmp) = 1; |
+ } |
} |
} |
else if (!frame_pointer_needed) |
@@ -8600,18 +9148,18 @@ ix86_expand_epilogue (int style) |
+ frame.nregs * UNITS_PER_WORD |
+ frame.nsseregs * 16 |
+ frame.padding0), |
- style); |
+ style, !using_drap); |
/* If not an i386, mov & pop is faster than "leave". */ |
else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun) |
|| !cfun->machine->use_fast_prologue_epilogue) |
- emit_insn ((*ix86_gen_leave) ()); |
+ ix86_emit_leave (red_offset); |
else |
{ |
pro_epilogue_adjust_stack (stack_pointer_rtx, |
hard_frame_pointer_rtx, |
- const0_rtx, style); |
+ const0_rtx, style, !using_drap); |
- emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); |
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset); |
} |
} |
else |
@@ -8629,33 +9177,37 @@ ix86_expand_epilogue (int style) |
gcc_assert (!stack_realign_fp); |
pro_epilogue_adjust_stack (stack_pointer_rtx, |
hard_frame_pointer_rtx, |
- GEN_INT (offset), style); |
+ GEN_INT (offset), style, false); |
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, |
- 0, style == 2); |
+ 0, red_offset, |
+ style == 2); |
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
- GEN_INT (frame.nsseregs * 16 + |
- frame.padding0), style); |
+ GEN_INT (frame.nsseregs * 16 |
+ + frame.padding0), |
+ style, false); |
} |
else if (frame.to_allocate || frame.padding0 || frame.nsseregs) |
{ |
ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, |
- frame.to_allocate, |
+ frame.to_allocate, red_offset, |
style == 2); |
pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
GEN_INT (frame.to_allocate |
+ frame.nsseregs * 16 |
- + frame.padding0), style); |
+ + frame.padding0), style, |
+ !using_drap && !frame_pointer_needed); |
} |
- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
- if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) |
- emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno))); |
+ ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16 |
+ + frame.padding0); |
+ red_offset -= offset; |
+ |
if (frame_pointer_needed) |
{ |
/* Leave results in shorter dependency chains on CPUs that are |
able to grok it fast. */ |
if (TARGET_USE_LEAVE) |
- emit_insn ((*ix86_gen_leave) ()); |
+ ix86_emit_leave (red_offset); |
else |
{ |
/* For stack realigned really happens, recover stack |
@@ -8664,47 +9216,94 @@ ix86_expand_epilogue (int style) |
if (stack_realign_fp) |
pro_epilogue_adjust_stack (stack_pointer_rtx, |
hard_frame_pointer_rtx, |
- const0_rtx, style); |
- emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); |
+ const0_rtx, style, !using_drap); |
+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, |
+ red_offset); |
} |
} |
} |
- if (crtl->drap_reg && crtl->stack_realign_needed) |
+ if (using_drap) |
{ |
- int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] |
- ? 0 : UNITS_PER_WORD); |
+ int param_ptr_offset = UNITS_PER_WORD; |
+ rtx insn; |
+ |
gcc_assert (stack_realign_drap); |
- emit_insn ((*ix86_gen_add3) (stack_pointer_rtx, |
- crtl->drap_reg, |
- GEN_INT (-(UNITS_PER_WORD |
- + param_ptr_offset)))); |
+ |
+ if (ix86_static_chain_on_stack) |
+ param_ptr_offset += UNITS_PER_WORD; |
if (!call_used_regs[REGNO (crtl->drap_reg)]) |
- emit_insn ((*ix86_gen_pop1) (crtl->drap_reg)); |
- |
+ param_ptr_offset += UNITS_PER_WORD; |
+ |
+ insn = emit_insn (gen_rtx_SET |
+ (VOIDmode, stack_pointer_rtx, |
+ gen_rtx_PLUS (Pmode, |
+ crtl->drap_reg, |
+ GEN_INT (-param_ptr_offset)))); |
+ |
+ ix86_cfa_state->reg = stack_pointer_rtx; |
+ ix86_cfa_state->offset = param_ptr_offset; |
+ |
+ add_reg_note (insn, REG_CFA_DEF_CFA, |
+ gen_rtx_PLUS (Pmode, ix86_cfa_state->reg, |
+ GEN_INT (ix86_cfa_state->offset))); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ |
+ if (!call_used_regs[REGNO (crtl->drap_reg)]) |
+ ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD); |
+ } |
+ |
+ /* Remove the saved static chain from the stack. The use of ECX is |
+ merely as a scratch register, not as the actual static chain. */ |
+ if (ix86_static_chain_on_stack) |
+ { |
+ rtx r, insn; |
+ |
+ gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx); |
+ ix86_cfa_state->offset += UNITS_PER_WORD; |
+ |
+ r = gen_rtx_REG (Pmode, CX_REG); |
+ insn = emit_insn (ix86_gen_pop1 (r)); |
+ |
+ r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD); |
+ r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r); |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, r); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
} |
/* Sibcall epilogues don't want a return instruction. */ |
if (style == 0) |
- return; |
+ { |
+ *ix86_cfa_state = cfa_state_save; |
+ return; |
+ } |
if (crtl->args.pops_args && crtl->args.size) |
{ |
rtx popc = GEN_INT (crtl->args.pops_args); |
- /* i386 can only pop 64K bytes. If asked to pop more, pop |
- return address, do explicit add, and jump indirectly to the |
- caller. */ |
+ /* i386 can only pop 64K bytes. If asked to pop more, pop return |
+ address, do explicit add, and jump indirectly to the caller. */ |
if (crtl->args.pops_args >= 65536) |
{ |
rtx ecx = gen_rtx_REG (SImode, CX_REG); |
+ rtx insn; |
/* There is no "pascal" calling convention in any 64bit ABI. */ |
gcc_assert (!TARGET_64BIT); |
- emit_insn (gen_popsi1 (ecx)); |
- emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); |
+ insn = emit_insn (gen_popsi1 (ecx)); |
+ ix86_cfa_state->offset -= UNITS_PER_WORD; |
+ |
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, |
+ copy_rtx (XVECEXP (PATTERN (insn), 0, 1))); |
+ add_reg_note (insn, REG_CFA_REGISTER, |
+ gen_rtx_SET (VOIDmode, ecx, pc_rtx)); |
+ RTX_FRAME_RELATED_P (insn) = 1; |
+ |
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
+ popc, -1, true); |
emit_jump_insn (gen_return_indirect_internal (ecx)); |
} |
else |
@@ -8712,6 +9311,10 @@ ix86_expand_epilogue (int style) |
} |
else |
emit_jump_insn (gen_return_internal ()); |
+ |
+ /* Restore the state back to the state from the prologue, |
+ so that it's correct for the next epilogue. */ |
+ *ix86_cfa_state = cfa_state_save; |
} |
/* Reset from the function's potential modifications. */ |
@@ -8855,6 +9458,10 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) |
base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; |
index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; |
+ /* Avoid useless 0 displacement. */ |
+ if (disp == const0_rtx && (base || index)) |
+ disp = NULL_RTX; |
+ |
/* Allow arg pointer and stack pointer as index if there is not scaling. */ |
if (base_reg && index_reg && scale == 1 |
&& (index_reg == arg_pointer_rtx |
@@ -8866,10 +9473,16 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) |
tmp = base_reg, base_reg = index_reg, index_reg = tmp; |
} |
- /* Special case: %ebp cannot be encoded as a base without a displacement. */ |
- if ((base_reg == hard_frame_pointer_rtx |
- || base_reg == frame_pointer_rtx |
- || base_reg == arg_pointer_rtx) && !disp) |
+ /* Special case: %ebp cannot be encoded as a base without a displacement. |
+ Similarly %r13. */ |
+ if (!disp |
+ && base_reg |
+ && (base_reg == hard_frame_pointer_rtx |
+ || base_reg == frame_pointer_rtx |
+ || base_reg == arg_pointer_rtx |
+ || (REG_P (base_reg) |
+ && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM |
+ || REGNO (base_reg) == R13_REG)))) |
disp = const0_rtx; |
/* Special case: on K6, [%esi] makes the instruction vector decoded. |
@@ -8883,7 +9496,7 @@ ix86_decompose_address (rtx addr, struct ix86_address *out) |
disp = const0_rtx; |
/* Special case: encode reg+reg instead of reg*2. */ |
- if (!base && index && scale && scale == 2) |
+ if (!base && index && scale == 2) |
base = index, base_reg = index_reg, scale = 1; |
/* Special case: scaling cannot be encoded without base or displacement. */ |
@@ -9036,9 +9649,8 @@ legitimate_constant_p (rtx x) |
break; |
case CONST_VECTOR: |
- if (x == CONST0_RTX (GET_MODE (x))) |
- return true; |
- return false; |
+ if (!standard_sse_constant_p (x)) |
+ return false; |
default: |
break; |
@@ -9069,13 +9681,6 @@ ix86_cannot_force_const_mem (rtx x) |
return !legitimate_constant_p (x); |
} |
-/* Determine if a given RTX is a valid constant address. */ |
- |
-bool |
-constant_address_p (rtx x) |
-{ |
- return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); |
-} |
/* Nonzero if the constant value X is a legitimate general operand |
when generating PIC code. It is given that flag_pic is on and |
@@ -9244,29 +9849,25 @@ legitimate_pic_address_disp_p (rtx disp) |
return 0; |
} |
-/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid |
- memory address for an instruction. The MODE argument is the machine mode |
- for the MEM expression that wants to use this address. |
+/* Recognizes RTL expressions that are valid memory addresses for an |
+ instruction. The MODE argument is the machine mode for the MEM |
+ expression that wants to use this address. |
It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should |
convert common non-canonical forms to canonical form so that they will |
be recognized. */ |
-int |
-legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
- rtx addr, int strict) |
+static bool |
+ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
+ rtx addr, bool strict) |
{ |
struct ix86_address parts; |
rtx base, index, disp; |
HOST_WIDE_INT scale; |
- const char *reason = NULL; |
- rtx reason_rtx = NULL_RTX; |
if (ix86_decompose_address (addr, &parts) <= 0) |
- { |
- reason = "decomposition failed"; |
- goto report_error; |
- } |
+ /* Decomposition failed. */ |
+ return false; |
base = parts.base; |
index = parts.index; |
@@ -9282,7 +9883,6 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
if (base) |
{ |
rtx reg; |
- reason_rtx = base; |
if (REG_P (base)) |
reg = base; |
@@ -9292,23 +9892,17 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
<= UNITS_PER_WORD) |
reg = SUBREG_REG (base); |
else |
- { |
- reason = "base is not a register"; |
- goto report_error; |
- } |
+ /* Base is not a register. */ |
+ return false; |
if (GET_MODE (base) != Pmode) |
- { |
- reason = "base is not in Pmode"; |
- goto report_error; |
- } |
+ /* Base is not in Pmode. */ |
+ return false; |
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) |
|| (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) |
- { |
- reason = "base is not valid"; |
- goto report_error; |
- } |
+ /* Base is not valid. */ |
+ return false; |
} |
/* Validate index register. |
@@ -9318,7 +9912,6 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
if (index) |
{ |
rtx reg; |
- reason_rtx = index; |
if (REG_P (index)) |
reg = index; |
@@ -9328,47 +9921,34 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
<= UNITS_PER_WORD) |
reg = SUBREG_REG (index); |
else |
- { |
- reason = "index is not a register"; |
- goto report_error; |
- } |
+ /* Index is not a register. */ |
+ return false; |
if (GET_MODE (index) != Pmode) |
- { |
- reason = "index is not in Pmode"; |
- goto report_error; |
- } |
+ /* Index is not in Pmode. */ |
+ return false; |
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) |
|| (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) |
- { |
- reason = "index is not valid"; |
- goto report_error; |
- } |
+ /* Index is not valid. */ |
+ return false; |
} |
/* Validate scale factor. */ |
if (scale != 1) |
{ |
- reason_rtx = GEN_INT (scale); |
if (!index) |
- { |
- reason = "scale without index"; |
- goto report_error; |
- } |
+ /* Scale without index. */ |
+ return false; |
if (scale != 2 && scale != 4 && scale != 8) |
- { |
- reason = "scale is not a valid multiplier"; |
- goto report_error; |
- } |
+ /* Scale is not a valid multiplier. */ |
+ return false; |
} |
/* Validate displacement. */ |
if (disp) |
{ |
- reason_rtx = disp; |
- |
if (GET_CODE (disp) == CONST |
&& GET_CODE (XEXP (disp, 0)) == UNSPEC |
&& XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) |
@@ -9382,8 +9962,9 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
gcc_assert (flag_pic); |
if (!TARGET_64BIT) |
goto is_legitimate_pic; |
- reason = "64bit address unspec"; |
- goto report_error; |
+ |
+ /* 64bit address unspec. */ |
+ return false; |
case UNSPEC_GOTPCREL: |
gcc_assert (flag_pic); |
@@ -9397,8 +9978,8 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
break; |
default: |
- reason = "invalid address unspec"; |
- goto report_error; |
+ /* Invalid address unspec. */ |
+ return false; |
} |
else if (SYMBOLIC_CONST (disp) |
@@ -9421,16 +10002,12 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
|| !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) |
|| (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF |
&& XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) |
- { |
- reason = "non-constant pic memory reference"; |
- goto report_error; |
- } |
+ /* Non-constant pic memory reference. */ |
+ return false; |
} |
else if (! legitimate_pic_address_disp_p (disp)) |
- { |
- reason = "displacement is an invalid pic construct"; |
- goto report_error; |
- } |
+ /* Displacement is an invalid pic construct. */ |
+ return false; |
/* This code used to verify that a symbolic pic displacement |
includes the pic_offset_table_rtx register. |
@@ -9460,23 +10037,24 @@ legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, |
|| !legitimate_constant_p (disp)) |
&& (GET_CODE (disp) != SYMBOL_REF |
|| !legitimate_constant_p (disp))) |
- { |
- reason = "displacement is not constant"; |
- goto report_error; |
- } |
+ /* Displacement is not constant. */ |
+ return false; |
else if (TARGET_64BIT |
&& !x86_64_immediate_operand (disp, VOIDmode)) |
- { |
- reason = "displacement is out of range"; |
- goto report_error; |
- } |
+ /* Displacement is out of range. */ |
+ return false; |
} |
/* Everything looks valid. */ |
- return TRUE; |
+ return true; |
+} |
+ |
+/* Determine if a given RTX is a valid constant address. */ |
- report_error: |
- return FALSE; |
+bool |
+constant_address_p (rtx x) |
+{ |
+ return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); |
} |
/* Return a unique alias set for the GOT. */ |
@@ -9505,7 +10083,7 @@ ix86_GOT_alias_set (void) |
differentiate them from global data objects. The returned |
address is the PIC reg + an unspec constant. |
- GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC |
+ TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC |
reg also appears in the address. */ |
static rtx |
@@ -9745,7 +10323,7 @@ get_thread_pointer (int to_reg) |
return reg; |
} |
-/* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is |
+/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is |
false if we expect this to be used for a memory address and true if |
we expect to load the address into a register. */ |
@@ -9934,7 +10512,8 @@ get_dllimport_decl (tree decl) |
*loc = h = GGC_NEW (struct tree_map); |
h->hash = in.hash; |
h->base.from = decl; |
- h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node); |
+ h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), |
+ VAR_DECL, NULL, ptr_type_node); |
DECL_ARTIFICIAL (to) = 1; |
DECL_IGNORED_P (to) = 1; |
DECL_EXTERNAL (to) = 1; |
@@ -9989,9 +10568,6 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg) |
OLDX is the address as it was before break_out_memory_refs was called. |
In some cases it is useful to look at this to decide what needs to be done. |
- MODE and WIN are passed so that this macro can use |
- GO_IF_LEGITIMATE_ADDRESS. |
- |
It is always safe for this macro to do nothing. It exists to recognize |
opportunities to optimize the output. |
@@ -10003,8 +10579,9 @@ legitimize_dllimport_symbol (rtx symbol, bool want_reg) |
When -fpic is used, special handling is needed for symbolic references. |
See comments by legitimize_pic_address in i386.c for details. */ |
-rtx |
-legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) |
+static rtx |
+ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
+ enum machine_mode mode) |
{ |
int changed = 0; |
unsigned log; |
@@ -10132,7 +10709,7 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) |
} |
} |
- if (changed && legitimate_address_p (mode, x, FALSE)) |
+ if (changed && ix86_legitimate_address_p (mode, x, FALSE)) |
return x; |
if (GET_CODE (XEXP (x, 0)) == MULT) |
@@ -10158,7 +10735,7 @@ legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) |
x = legitimize_pic_address (x, 0); |
} |
- if (changed && legitimate_address_p (mode, x, FALSE)) |
+ if (changed && ix86_legitimate_address_p (mode, x, FALSE)) |
return x; |
if (REG_P (XEXP (x, 0))) |
@@ -10310,29 +10887,29 @@ output_pic_addr_const (FILE *file, rtx x, int code) |
break; |
case UNSPEC_GOTTPOFF: |
/* FIXME: This might be @TPOFF in Sun ld too. */ |
- fputs ("@GOTTPOFF", file); |
+ fputs ("@gottpoff", file); |
break; |
case UNSPEC_TPOFF: |
- fputs ("@TPOFF", file); |
+ fputs ("@tpoff", file); |
break; |
case UNSPEC_NTPOFF: |
if (TARGET_64BIT) |
- fputs ("@TPOFF", file); |
+ fputs ("@tpoff", file); |
else |
- fputs ("@NTPOFF", file); |
+ fputs ("@ntpoff", file); |
break; |
case UNSPEC_DTPOFF: |
- fputs ("@DTPOFF", file); |
+ fputs ("@dtpoff", file); |
break; |
case UNSPEC_GOTNTPOFF: |
if (TARGET_64BIT) |
fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
- "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file); |
+ "@gottpoff(%rip)": "@gottpoff[rip]", file); |
else |
- fputs ("@GOTNTPOFF", file); |
+ fputs ("@gotntpoff", file); |
break; |
case UNSPEC_INDNTPOFF: |
- fputs ("@INDNTPOFF", file); |
+ fputs ("@indntpoff", file); |
break; |
#if TARGET_MACHO |
case UNSPEC_MACHOPIC_OFFSET: |
@@ -10359,7 +10936,7 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
{ |
fputs (ASM_LONG, file); |
output_addr_const (file, x); |
- fputs ("@DTPOFF", file); |
+ fputs ("@dtpoff", file); |
switch (size) |
{ |
case 4: |
@@ -10397,9 +10974,12 @@ ix86_pic_register_p (rtx x) |
the DWARF output code. */ |
static rtx |
-ix86_delegitimize_address (rtx orig_x) |
+ix86_delegitimize_address (rtx x) |
{ |
- rtx x = orig_x; |
+ rtx orig_x = delegitimize_mem_from_attrs (x); |
+ /* addend is NULL or some rtx if x is something+GOTOFF where |
+ something doesn't include the PIC register. */ |
+ rtx addend = NULL_RTX; |
/* reg_addend is NULL or a multiple of some register. */ |
rtx reg_addend = NULL_RTX; |
/* const_addend is NULL or a const_int. */ |
@@ -10407,6 +10987,8 @@ ix86_delegitimize_address (rtx orig_x) |
/* This is the result, or NULL. */ |
rtx result = NULL_RTX; |
+ x = orig_x; |
+ |
if (MEM_P (x)) |
x = XEXP (x, 0); |
@@ -10417,7 +10999,10 @@ ix86_delegitimize_address (rtx orig_x) |
|| XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL |
|| !MEM_P (orig_x)) |
return orig_x; |
- return XVECEXP (XEXP (x, 0), 0, 0); |
+ x = XVECEXP (XEXP (x, 0), 0, 0); |
+ if (GET_MODE (orig_x) != Pmode) |
+ return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0); |
+ return x; |
} |
if (GET_CODE (x) != PLUS |
@@ -10436,14 +11021,13 @@ ix86_delegitimize_address (rtx orig_x) |
else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
reg_addend = XEXP (reg_addend, 0); |
else |
- return orig_x; |
- if (!REG_P (reg_addend) |
- && GET_CODE (reg_addend) != MULT |
- && GET_CODE (reg_addend) != ASHIFT) |
- return orig_x; |
+ { |
+ reg_addend = NULL_RTX; |
+ addend = XEXP (x, 0); |
+ } |
} |
else |
- return orig_x; |
+ addend = XEXP (x, 0); |
x = XEXP (XEXP (x, 1), 0); |
if (GET_CODE (x) == PLUS |
@@ -10454,7 +11038,7 @@ ix86_delegitimize_address (rtx orig_x) |
} |
if (GET_CODE (x) == UNSPEC |
- && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x)) |
+ && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
|| (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) |
result = XVECEXP (x, 0, 0); |
@@ -10469,6 +11053,24 @@ ix86_delegitimize_address (rtx orig_x) |
result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
if (reg_addend) |
result = gen_rtx_PLUS (Pmode, reg_addend, result); |
+ if (addend) |
+ { |
+ /* If the rest of original X doesn't involve the PIC register, add |
+ addend and subtract pic_offset_table_rtx. This can happen e.g. |
+ for code like: |
+ leal (%ebx, %ecx, 4), %ecx |
+ ... |
+ movl foo@GOTOFF(%ecx), %edx |
+ in which case we return (%ecx - %ebx) + foo. */ |
+ if (pic_offset_table_rtx) |
+ result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), |
+ pic_offset_table_rtx), |
+ result); |
+ else |
+ return orig_x; |
+ } |
+ if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) |
+ return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0); |
return result; |
} |
@@ -10508,9 +11110,6 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, |
if (mode == CCFPmode || mode == CCFPUmode) |
{ |
- enum rtx_code second_code, bypass_code; |
- ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); |
- gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); |
code = ix86_fp_compare_code_to_integer (code); |
mode = CCmode; |
} |
@@ -10812,23 +11411,23 @@ get_some_local_dynamic_name (void) |
return cfun->machine->some_ld_name; |
for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) |
- if (INSN_P (insn) |
+ if (NONDEBUG_INSN_P (insn) |
&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) |
return cfun->machine->some_ld_name; |
- gcc_unreachable (); |
+ return NULL; |
} |
/* Meaning of CODE: |
L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
C -- print opcode suffix for set/cmov insn. |
c -- like C, but print reversed condition |
- E,e -- likewise, but for compare-and-branch fused insn. |
F,f -- likewise, but for floating-point. |
O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
otherwise nothing |
R -- print the prefix for register names. |
z -- print the opcode suffix for the size of the current operand. |
+ Z -- likewise, with special suffixes for x87 instructions. |
* -- print a star (in certain assembler syntax) |
A -- print an absolute memory reference. |
w -- print the operand as if it's a "word" (HImode) even if it isn't. |
@@ -10849,7 +11448,7 @@ get_some_local_dynamic_name (void) |
X -- don't print any sort of PIC '@' suffix for a symbol. |
& -- print some in-use local-dynamic symbol name. |
H -- print a memory address offset by 8; used for sse high-parts |
- Y -- print condition for SSE5 com* instruction. |
+ Y -- print condition for XOP pcom* instruction. |
+ -- print a branch hint as 'cs' or 'ds' prefix |
; -- print a semicolon (after prefixes due to bug in older gas). |
*/ |
@@ -10867,8 +11466,15 @@ print_operand (FILE *file, rtx x, int code) |
return; |
case '&': |
- assemble_name (file, get_some_local_dynamic_name ()); |
- return; |
+ { |
+ const char *name = get_some_local_dynamic_name (); |
+ if (name == NULL) |
+ output_operand_lossage ("'%%&' used without any " |
+ "local dynamic TLS references"); |
+ else |
+ assemble_name (file, name); |
+ return; |
+ } |
case 'A': |
switch (ASSEMBLER_DIALECT) |
@@ -10928,72 +11534,110 @@ print_operand (FILE *file, rtx x, int code) |
return; |
case 'z': |
- /* 387 opcodes don't get size suffixes if the operands are |
- registers. */ |
- if (STACK_REG_P (x)) |
- return; |
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
+ { |
+ /* Opcodes don't get size suffixes if using Intel opcodes. */ |
+ if (ASSEMBLER_DIALECT == ASM_INTEL) |
+ return; |
- /* Likewise if using Intel opcodes. */ |
+ switch (GET_MODE_SIZE (GET_MODE (x))) |
+ { |
+ case 1: |
+ putc ('b', file); |
+ return; |
+ |
+ case 2: |
+ putc ('w', file); |
+ return; |
+ |
+ case 4: |
+ putc ('l', file); |
+ return; |
+ |
+ case 8: |
+ putc ('q', file); |
+ return; |
+ |
+ default: |
+ output_operand_lossage |
+ ("invalid operand size for operand code '%c'", code); |
+ return; |
+ } |
+ } |
+ |
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
+ warning |
+ (0, "non-integer operand used with operand code '%c'", code); |
+ /* FALLTHRU */ |
+ |
+ case 'Z': |
+ /* 387 opcodes don't get size suffixes if using Intel opcodes. */ |
if (ASSEMBLER_DIALECT == ASM_INTEL) |
return; |
- /* This is the size of op from size of operand. */ |
- switch (GET_MODE_SIZE (GET_MODE (x))) |
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
{ |
- case 1: |
- putc ('b', file); |
- return; |
- |
- case 2: |
- if (MEM_P (x)) |
+ switch (GET_MODE_SIZE (GET_MODE (x))) |
{ |
-#ifdef HAVE_GAS_FILDS_FISTS |
+ case 2: |
+#ifdef HAVE_AS_IX86_FILDS |
putc ('s', file); |
#endif |
return; |
+ |
+ case 4: |
+ putc ('l', file); |
+ return; |
+ |
+ case 8: |
+#ifdef HAVE_AS_IX86_FILDQ |
+ putc ('q', file); |
+#else |
+ fputs ("ll", file); |
+#endif |
+ return; |
+ |
+ default: |
+ break; |
} |
- else |
- putc ('w', file); |
- return; |
+ } |
+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) |
+ { |
+ /* 387 opcodes don't get size suffixes |
+ if the operands are registers. */ |
+ if (STACK_REG_P (x)) |
+ return; |
- case 4: |
- if (GET_MODE (x) == SFmode) |
+ switch (GET_MODE_SIZE (GET_MODE (x))) |
{ |
+ case 4: |
putc ('s', file); |
return; |
- } |
- else |
- putc ('l', file); |
- return; |
- case 12: |
- case 16: |
- putc ('t', file); |
- return; |
+ case 8: |
+ putc ('l', file); |
+ return; |
- case 8: |
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) |
- { |
- if (MEM_P (x)) |
- { |
-#ifdef GAS_MNEMONICS |
- putc ('q', file); |
-#else |
- putc ('l', file); |
- putc ('l', file); |
-#endif |
- } |
- else |
- putc ('q', file); |
+ case 12: |
+ case 16: |
+ putc ('t', file); |
+ return; |
+ |
+ default: |
+ break; |
} |
- else |
- putc ('l', file); |
+ } |
+ else |
+ { |
+ output_operand_lossage |
+ ("invalid operand type used with operand code '%c'", code); |
return; |
- |
- default: |
- gcc_unreachable (); |
} |
+ output_operand_lossage |
+ ("invalid operand size for operand code '%c'", code); |
+ return; |
+ |
case 'd': |
case 'b': |
case 'w': |
@@ -11066,7 +11710,8 @@ print_operand (FILE *file, rtx x, int code) |
fputs ("ord", file); |
break; |
default: |
- output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); |
+ output_operand_lossage ("operand is not a condition code, " |
+ "invalid operand code 'D'"); |
return; |
} |
} |
@@ -11105,7 +11750,8 @@ print_operand (FILE *file, rtx x, int code) |
fputs ("ord", file); |
break; |
default: |
- output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); |
+ output_operand_lossage ("operand is not a condition code, " |
+ "invalid operand code 'D'"); |
return; |
} |
} |
@@ -11180,14 +11826,6 @@ print_operand (FILE *file, rtx x, int code) |
put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); |
return; |
- case 'E': |
- put_condition_code (GET_CODE (x), CCmode, 0, 0, file); |
- return; |
- |
- case 'e': |
- put_condition_code (GET_CODE (x), CCmode, 1, 0, file); |
- return; |
- |
case 'H': |
/* It doesn't actually matter what mode we use here, as we're |
only going to use this for printing. */ |
@@ -11279,16 +11917,15 @@ print_operand (FILE *file, rtx x, int code) |
fputs ("une", file); |
break; |
default: |
- output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); |
+ output_operand_lossage ("operand is not a condition code, " |
+ "invalid operand code 'Y'"); |
return; |
} |
return; |
case ';': |
-#if TARGET_MACHO |
- fputs (" ; ", file); |
-#else |
- fputc (' ', file); |
+#if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX |
+ fputs (";", file); |
#endif |
return; |
@@ -11367,7 +12004,7 @@ print_operand (FILE *file, rtx x, int code) |
char dstr[30]; |
real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
- fprintf (file, "%s", dstr); |
+ fputs (dstr, file); |
} |
else if (GET_CODE (x) == CONST_DOUBLE |
@@ -11376,7 +12013,7 @@ print_operand (FILE *file, rtx x, int code) |
char dstr[30]; |
real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); |
- fprintf (file, "%s", dstr); |
+ fputs (dstr, file); |
} |
else |
@@ -11570,34 +12207,34 @@ output_addr_const_extra (FILE *file, rtx x) |
case UNSPEC_GOTTPOFF: |
output_addr_const (file, op); |
/* FIXME: This might be @TPOFF in Sun ld. */ |
- fputs ("@GOTTPOFF", file); |
+ fputs ("@gottpoff", file); |
break; |
case UNSPEC_TPOFF: |
output_addr_const (file, op); |
- fputs ("@TPOFF", file); |
+ fputs ("@tpoff", file); |
break; |
case UNSPEC_NTPOFF: |
output_addr_const (file, op); |
if (TARGET_64BIT) |
- fputs ("@TPOFF", file); |
+ fputs ("@tpoff", file); |
else |
- fputs ("@NTPOFF", file); |
+ fputs ("@ntpoff", file); |
break; |
case UNSPEC_DTPOFF: |
output_addr_const (file, op); |
- fputs ("@DTPOFF", file); |
+ fputs ("@dtpoff", file); |
break; |
case UNSPEC_GOTNTPOFF: |
output_addr_const (file, op); |
if (TARGET_64BIT) |
fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
- "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file); |
+ "@gottpoff(%rip)" : "@gottpoff[rip]", file); |
else |
- fputs ("@GOTNTPOFF", file); |
+ fputs ("@gotntpoff", file); |
break; |
case UNSPEC_INDNTPOFF: |
output_addr_const (file, op); |
- fputs ("@INDNTPOFF", file); |
+ fputs ("@indntpoff", file); |
break; |
#if TARGET_MACHO |
case UNSPEC_MACHOPIC_OFFSET: |
@@ -11793,7 +12430,7 @@ output_387_binary_op (rtx insn, rtx *operands) |
if (MEM_P (operands[2])) |
{ |
- p = "%z2\t%2"; |
+ p = "%Z2\t%2"; |
break; |
} |
@@ -11823,13 +12460,13 @@ output_387_binary_op (rtx insn, rtx *operands) |
case DIV: |
if (MEM_P (operands[1])) |
{ |
- p = "r%z1\t%1"; |
+ p = "r%Z1\t%1"; |
break; |
} |
if (MEM_P (operands[2])) |
{ |
- p = "%z2\t%2"; |
+ p = "%Z2\t%2"; |
break; |
} |
@@ -12071,15 +12708,15 @@ output_fix_trunc (rtx insn, rtx *operands, int fisttp) |
gcc_assert (GET_MODE (operands[1]) != TFmode); |
if (fisttp) |
- output_asm_insn ("fisttp%z0\t%0", operands); |
+ output_asm_insn ("fisttp%Z0\t%0", operands); |
else |
{ |
if (round_mode != I387_CW_ANY) |
output_asm_insn ("fldcw\t%3", operands); |
if (stack_top_dies || dimode_p) |
- output_asm_insn ("fistp%z0\t%0", operands); |
+ output_asm_insn ("fistp%Z0\t%0", operands); |
else |
- output_asm_insn ("fist%z0\t%0", operands); |
+ output_asm_insn ("fist%Z0\t%0", operands); |
if (round_mode != I387_CW_ANY) |
output_asm_insn ("fldcw\t%2", operands); |
} |
@@ -12203,13 +12840,13 @@ output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) |
static const char * const alt[16] = |
{ |
- "fcom%z2\t%y2\n\tfnstsw\t%0", |
- "fcomp%z2\t%y2\n\tfnstsw\t%0", |
- "fucom%z2\t%y2\n\tfnstsw\t%0", |
- "fucomp%z2\t%y2\n\tfnstsw\t%0", |
+ "fcom%Z2\t%y2\n\tfnstsw\t%0", |
+ "fcomp%Z2\t%y2\n\tfnstsw\t%0", |
+ "fucom%Z2\t%y2\n\tfnstsw\t%0", |
+ "fucomp%Z2\t%y2\n\tfnstsw\t%0", |
- "ficom%z2\t%y2\n\tfnstsw\t%0", |
- "ficomp%z2\t%y2\n\tfnstsw\t%0", |
+ "ficom%Z2\t%y2\n\tfnstsw\t%0", |
+ "ficomp%Z2\t%y2\n\tfnstsw\t%0", |
NULL, |
NULL, |
@@ -12271,18 +12908,18 @@ ix86_output_addr_diff_elt (FILE *file, int value, int rel) |
fprintf (file, "%s%s%d-%s%d\n", |
directive, LPREFIX, value, LPREFIX, rel); |
else if (HAVE_AS_GOTOFF_IN_DATA) |
- fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); |
+ fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); |
#if TARGET_MACHO |
else if (TARGET_MACHO) |
{ |
- fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); |
+ fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); |
machopic_output_function_base_name (file); |
- fprintf(file, "\n"); |
+ putc ('\n', file); |
} |
#endif |
else |
- asm_fprintf (file, "%s%U%s+[.-%s%d]\n", |
- ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); |
+ asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", |
+ GOT_SYMBOL_NAME, LPREFIX, value); |
} |
/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate |
@@ -12302,7 +12939,7 @@ ix86_expand_clear (rtx dest) |
tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); |
/* This predicate should match that for movsi_xor and movdi_xor_rex64. */ |
- if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())) |
+ if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()) |
{ |
rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); |
tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); |
@@ -12400,7 +13037,7 @@ ix86_expand_move (enum machine_mode mode, rtx operands[]) |
op1 = force_reg (Pmode, op1); |
else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode)) |
{ |
- rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX; |
+ rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; |
op1 = legitimize_pic_address (op1, reg); |
if (op0 == op1) |
return; |
@@ -12466,7 +13103,7 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) |
&& (CONSTANT_P (op1) |
|| (GET_CODE (op1) == SUBREG |
&& CONSTANT_P (SUBREG_REG (op1)))) |
- && standard_sse_constant_p (op1) <= 0) |
+ && !standard_sse_constant_p (op1)) |
op1 = validize_mem (force_const_mem (mode, op1)); |
/* We need to check memory alignment for SSE mode since attribute |
@@ -12994,6 +13631,316 @@ ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, |
emit_move_insn (operands[0], dst); |
} |
+#define LEA_SEARCH_THRESHOLD 12 |
+ |
+/* Search backward for non-agu definition of register number REGNO1 |
+ or register number REGNO2 in INSN's basic block until |
+ 1. Pass LEA_SEARCH_THRESHOLD instructions, or |
+ 2. Reach BB boundary, or |
+ 3. Reach agu definition. |
+ Returns the distance between the non-agu definition point and INSN. |
+ If no definition point, returns -1. */ |
+ |
+static int |
+distance_non_agu_define (unsigned int regno1, unsigned int regno2, |
+ rtx insn) |
+{ |
+ basic_block bb = BLOCK_FOR_INSN (insn); |
+ int distance = 0; |
+ df_ref *def_rec; |
+ enum attr_type insn_type; |
+ |
+ if (insn != BB_HEAD (bb)) |
+ { |
+ rtx prev = PREV_INSN (insn); |
+ while (prev && distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ if (NONDEBUG_INSN_P (prev)) |
+ { |
+ distance++; |
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) |
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
+ && !DF_REF_IS_ARTIFICIAL (*def_rec) |
+ && (regno1 == DF_REF_REGNO (*def_rec) |
+ || regno2 == DF_REF_REGNO (*def_rec))) |
+ { |
+ insn_type = get_attr_type (prev); |
+ if (insn_type != TYPE_LEA) |
+ goto done; |
+ } |
+ } |
+ if (prev == BB_HEAD (bb)) |
+ break; |
+ prev = PREV_INSN (prev); |
+ } |
+ } |
+ |
+ if (distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ edge e; |
+ edge_iterator ei; |
+ bool simple_loop = false; |
+ |
+ FOR_EACH_EDGE (e, ei, bb->preds) |
+ if (e->src == bb) |
+ { |
+ simple_loop = true; |
+ break; |
+ } |
+ |
+ if (simple_loop) |
+ { |
+ rtx prev = BB_END (bb); |
+ while (prev |
+ && prev != insn |
+ && distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ if (NONDEBUG_INSN_P (prev)) |
+ { |
+ distance++; |
+ for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) |
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
+ && !DF_REF_IS_ARTIFICIAL (*def_rec) |
+ && (regno1 == DF_REF_REGNO (*def_rec) |
+ || regno2 == DF_REF_REGNO (*def_rec))) |
+ { |
+ insn_type = get_attr_type (prev); |
+ if (insn_type != TYPE_LEA) |
+ goto done; |
+ } |
+ } |
+ prev = PREV_INSN (prev); |
+ } |
+ } |
+ } |
+ |
+ distance = -1; |
+ |
+done: |
+ /* get_attr_type may modify recog data. We want to make sure |
+ that recog data is valid for instruction INSN, on which |
+ distance_non_agu_define is called. INSN is unchanged here. */ |
+ extract_insn_cached (insn); |
+ return distance; |
+} |
+ |
+/* Return the distance between INSN and the next insn that uses |
+ register number REGNO0 in memory address. Return -1 if no such |
+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ |
+ |
+static int |
+distance_agu_use (unsigned int regno0, rtx insn) |
+{ |
+ basic_block bb = BLOCK_FOR_INSN (insn); |
+ int distance = 0; |
+ df_ref *def_rec; |
+ df_ref *use_rec; |
+ |
+ if (insn != BB_END (bb)) |
+ { |
+ rtx next = NEXT_INSN (insn); |
+ while (next && distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ if (NONDEBUG_INSN_P (next)) |
+ { |
+ distance++; |
+ |
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) |
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD |
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) |
+ && regno0 == DF_REF_REGNO (*use_rec)) |
+ { |
+ /* Return DISTANCE if OP0 is used in memory |
+ address in NEXT. */ |
+ return distance; |
+ } |
+ |
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) |
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
+ && !DF_REF_IS_ARTIFICIAL (*def_rec) |
+ && regno0 == DF_REF_REGNO (*def_rec)) |
+ { |
+ /* Return -1 if OP0 is set in NEXT. */ |
+ return -1; |
+ } |
+ } |
+ if (next == BB_END (bb)) |
+ break; |
+ next = NEXT_INSN (next); |
+ } |
+ } |
+ |
+ if (distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ edge e; |
+ edge_iterator ei; |
+ bool simple_loop = false; |
+ |
+ FOR_EACH_EDGE (e, ei, bb->succs) |
+ if (e->dest == bb) |
+ { |
+ simple_loop = true; |
+ break; |
+ } |
+ |
+ if (simple_loop) |
+ { |
+ rtx next = BB_HEAD (bb); |
+ while (next |
+ && next != insn |
+ && distance < LEA_SEARCH_THRESHOLD) |
+ { |
+ if (NONDEBUG_INSN_P (next)) |
+ { |
+ distance++; |
+ |
+ for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) |
+ if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD |
+ || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE) |
+ && regno0 == DF_REF_REGNO (*use_rec)) |
+ { |
+ /* Return DISTANCE if OP0 is used in memory |
+ address in NEXT. */ |
+ return distance; |
+ } |
+ |
+ for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++) |
+ if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
+ && !DF_REF_IS_ARTIFICIAL (*def_rec) |
+ && regno0 == DF_REF_REGNO (*def_rec)) |
+ { |
+ /* Return -1 if OP0 is set in NEXT. */ |
+ return -1; |
+ } |
+ |
+ } |
+ next = NEXT_INSN (next); |
+ } |
+ } |
+ } |
+ |
+ return -1; |
+} |
+ |
+/* Define this macro to tune LEA priority vs ADD, it take effect when |
+ there is a dilemma of choicing LEA or ADD |
+ Negative value: ADD is more preferred than LEA |
+ Zero: Netrual |
+ Positive value: LEA is more preferred than ADD*/ |
+#define IX86_LEA_PRIORITY 2 |
+ |
+/* Return true if it is ok to optimize an ADD operation to LEA |
+ operation to avoid flag register consumation. For the processors |
+ like ATOM, if the destination register of LEA holds an actual |
+ address which will be used soon, LEA is better and otherwise ADD |
+ is better. */ |
+ |
+bool |
+ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED, |
+ rtx insn, rtx operands[]) |
+{ |
+ unsigned int regno0 = true_regnum (operands[0]); |
+ unsigned int regno1 = true_regnum (operands[1]); |
+ unsigned int regno2; |
+ |
+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) |
+ return regno0 != regno1; |
+ |
+ regno2 = true_regnum (operands[2]); |
+ |
+ /* If a = b + c, (a!=b && a!=c), must use lea form. */ |
+ if (regno0 != regno1 && regno0 != regno2) |
+ return true; |
+ else |
+ { |
+ int dist_define, dist_use; |
+ dist_define = distance_non_agu_define (regno1, regno2, insn); |
+ if (dist_define <= 0) |
+ return true; |
+ |
+ /* If this insn has both backward non-agu dependence and forward |
+ agu dependence, the one with short distance take effect. */ |
+ dist_use = distance_agu_use (regno0, insn); |
+ if (dist_use <= 0 |
+ || (dist_define + IX86_LEA_PRIORITY) < dist_use) |
+ return false; |
+ |
+ return true; |
+ } |
+} |
+ |
+/* Return true if destination reg of SET_BODY is shift count of |
+ USE_BODY. */ |
+ |
+static bool |
+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) |
+{ |
+ rtx set_dest; |
+ rtx shift_rtx; |
+ int i; |
+ |
+ /* Retrieve destination of SET_BODY. */ |
+ switch (GET_CODE (set_body)) |
+ { |
+ case SET: |
+ set_dest = SET_DEST (set_body); |
+ if (!set_dest || !REG_P (set_dest)) |
+ return false; |
+ break; |
+ case PARALLEL: |
+ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) |
+ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), |
+ use_body)) |
+ return true; |
+ default: |
+ return false; |
+ break; |
+ } |
+ |
+ /* Retrieve shift count of USE_BODY. */ |
+ switch (GET_CODE (use_body)) |
+ { |
+ case SET: |
+ shift_rtx = XEXP (use_body, 1); |
+ break; |
+ case PARALLEL: |
+ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) |
+ if (ix86_dep_by_shift_count_body (set_body, |
+ XVECEXP (use_body, 0, i))) |
+ return true; |
+ default: |
+ return false; |
+ break; |
+ } |
+ |
+ if (shift_rtx |
+ && (GET_CODE (shift_rtx) == ASHIFT |
+ || GET_CODE (shift_rtx) == LSHIFTRT |
+ || GET_CODE (shift_rtx) == ASHIFTRT |
+ || GET_CODE (shift_rtx) == ROTATE |
+ || GET_CODE (shift_rtx) == ROTATERT)) |
+ { |
+ rtx shift_count = XEXP (shift_rtx, 1); |
+ |
+ /* Return true if shift count is dest of SET_BODY. */ |
+ if (REG_P (shift_count) |
+ && true_regnum (set_dest) == true_regnum (shift_count)) |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+/* Return true if destination reg of SET_INSN is shift count of |
+ USE_INSN. */ |
+ |
+bool |
+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) |
+{ |
+ return ix86_dep_by_shift_count_body (PATTERN (set_insn), |
+ PATTERN (use_insn)); |
+} |
+ |
/* Return TRUE or FALSE depending on whether the unary operator meets the |
appropriate constraints. */ |
@@ -13010,6 +13957,19 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, |
return TRUE; |
} |
+/* Return TRUE if the operands to a vec_interleave_{high,low}v2df |
+ are ok, keeping in mind the possible movddup alternative. */ |
+ |
+bool |
+ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) |
+{ |
+ if (MEM_P (operands[0])) |
+ return rtx_equal_p (operands[0], operands[1 + high]); |
+ if (MEM_P (operands[1]) && MEM_P (operands[2])) |
+ return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); |
+ return true; |
+} |
+ |
/* Post-reload splitter for converting an SF or DFmode value in an |
SSE register into an unsigned SImode. */ |
@@ -13105,7 +14065,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) |
exponents = validize_mem (force_const_mem (V4SImode, x)); |
/* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ |
- emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); |
+ emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); |
/* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) |
yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). |
@@ -13131,7 +14091,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input) |
else |
{ |
x = copy_to_mode_reg (V2DFmode, fp_xmm); |
- emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); |
+ emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); |
emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); |
} |
@@ -13221,7 +14181,7 @@ ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) |
emit_move_insn (target, fp_hi); |
} |
-/* A subroutine of ix86_build_signbit_mask_vector. If VECT is true, |
+/* A subroutine of ix86_build_signbit_mask. If VECT is true, |
then replicate the value for all elements of the vector |
register. */ |
@@ -13431,15 +14391,9 @@ ix86_expand_copysign (rtx operands[]) |
op0 = CONST0_RTX (vmode); |
else |
{ |
- rtvec v; |
+ rtx v = ix86_build_const_vector (mode, false, op0); |
- if (mode == SFmode) |
- v = gen_rtvec (4, op0, CONST0_RTX (SFmode), |
- CONST0_RTX (SFmode), CONST0_RTX (SFmode)); |
- else |
- v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); |
- |
- op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); |
+ op0 = force_reg (vmode, v); |
} |
} |
else if (op0 != CONST0_RTX (mode)) |
@@ -13481,11 +14435,10 @@ void |
ix86_split_copysign_const (rtx operands[]) |
{ |
enum machine_mode mode, vmode; |
- rtx dest, op0, op1, mask, x; |
+ rtx dest, op0, mask, x; |
dest = operands[0]; |
op0 = operands[1]; |
- op1 = operands[2]; |
mask = operands[3]; |
mode = GET_MODE (dest); |
@@ -13789,84 +14742,41 @@ ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) |
} |
} |
-/* Split comparison code CODE into comparisons we can do using branch |
- instructions. BYPASS_CODE is comparison code for branch that will |
- branch around FIRST_CODE and SECOND_CODE. If some of branches |
- is not required, set value to UNKNOWN. |
- We never require more than two branches. */ |
-void |
-ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, |
- enum rtx_code *first_code, |
- enum rtx_code *second_code) |
-{ |
- *first_code = code; |
- *bypass_code = UNKNOWN; |
- *second_code = UNKNOWN; |
- |
- /* The fcomi comparison sets flags as follows: |
- |
- cmp ZF PF CF |
- > 0 0 0 |
- < 0 0 1 |
- = 1 0 0 |
- un 1 1 1 */ |
+/* Return a comparison we can do and that it is equivalent to |
+ swap_condition (code) apart possibly from orderedness. |
+ But, never change orderedness if TARGET_IEEE_FP, returning |
+ UNKNOWN in that case if necessary. */ |
+static enum rtx_code |
+ix86_fp_swap_condition (enum rtx_code code) |
+{ |
switch (code) |
{ |
- case GT: /* GTU - CF=0 & ZF=0 */ |
- case GE: /* GEU - CF=0 */ |
- case ORDERED: /* PF=0 */ |
- case UNORDERED: /* PF=1 */ |
- case UNEQ: /* EQ - ZF=1 */ |
- case UNLT: /* LTU - CF=1 */ |
- case UNLE: /* LEU - CF=1 | ZF=1 */ |
- case LTGT: /* EQ - ZF=0 */ |
- break; |
- case LT: /* LTU - CF=1 - fails on unordered */ |
- *first_code = UNLT; |
- *bypass_code = UNORDERED; |
- break; |
- case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ |
- *first_code = UNLE; |
- *bypass_code = UNORDERED; |
- break; |
- case EQ: /* EQ - ZF=1 - fails on unordered */ |
- *first_code = UNEQ; |
- *bypass_code = UNORDERED; |
- break; |
- case NE: /* NE - ZF=0 - fails on unordered */ |
- *first_code = LTGT; |
- *second_code = UNORDERED; |
- break; |
- case UNGE: /* GEU - CF=0 - fails on unordered */ |
- *first_code = GE; |
- *second_code = UNORDERED; |
- break; |
- case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ |
- *first_code = GT; |
- *second_code = UNORDERED; |
- break; |
+ case GT: /* GTU - CF=0 & ZF=0 */ |
+ return TARGET_IEEE_FP ? UNKNOWN : UNLT; |
+ case GE: /* GEU - CF=0 */ |
+ return TARGET_IEEE_FP ? UNKNOWN : UNLE; |
+ case UNLT: /* LTU - CF=1 */ |
+ return TARGET_IEEE_FP ? UNKNOWN : GT; |
+ case UNLE: /* LEU - CF=1 | ZF=1 */ |
+ return TARGET_IEEE_FP ? UNKNOWN : GE; |
default: |
- gcc_unreachable (); |
- } |
- if (!TARGET_IEEE_FP) |
- { |
- *second_code = UNKNOWN; |
- *bypass_code = UNKNOWN; |
+ return swap_condition (code); |
} |
} |
-/* Return cost of comparison done fcom + arithmetics operations on AX. |
+/* Return cost of comparison CODE using the best strategy for performance. |
All following functions do use number of instructions as a cost metrics. |
In future this should be tweaked to compute bytes for optimize_size and |
take into account performance of various instructions on various CPUs. */ |
+ |
static int |
-ix86_fp_comparison_arithmetics_cost (enum rtx_code code) |
+ix86_fp_comparison_cost (enum rtx_code code) |
{ |
- if (!TARGET_IEEE_FP) |
- return 4; |
- /* The cost of code output by ix86_expand_fp_compare. */ |
+ int arith_cost; |
+ |
+ /* The cost of code using bit-twiddling on %ah. */ |
switch (code) |
{ |
case UNLE: |
@@ -13877,82 +14787,49 @@ ix86_fp_comparison_arithmetics_cost (enum rtx_code code) |
case UNORDERED: |
case ORDERED: |
case UNEQ: |
- return 4; |
+ arith_cost = 4; |
break; |
case LT: |
case NE: |
case EQ: |
case UNGE: |
- return 5; |
+ arith_cost = TARGET_IEEE_FP ? 5 : 4; |
break; |
case LE: |
case UNGT: |
- return 6; |
+ arith_cost = TARGET_IEEE_FP ? 6 : 4; |
break; |
default: |
gcc_unreachable (); |
} |
-} |
-/* Return cost of comparison done using fcomi operation. |
- See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
-static int |
-ix86_fp_comparison_fcomi_cost (enum rtx_code code) |
-{ |
- enum rtx_code bypass_code, first_code, second_code; |
- /* Return arbitrarily high cost when instruction is not supported - this |
- prevents gcc from using it. */ |
- if (!TARGET_CMOVE) |
- return 1024; |
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
- return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; |
+ switch (ix86_fp_comparison_strategy (code)) |
+ { |
+ case IX86_FPCMP_COMI: |
+ return arith_cost > 4 ? 3 : 2; |
+ case IX86_FPCMP_SAHF: |
+ return arith_cost > 4 ? 4 : 3; |
+ default: |
+ return arith_cost; |
+ } |
} |
-/* Return cost of comparison done using sahf operation. |
- See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
-static int |
-ix86_fp_comparison_sahf_cost (enum rtx_code code) |
-{ |
- enum rtx_code bypass_code, first_code, second_code; |
- /* Return arbitrarily high cost when instruction is not preferred - this |
- avoids gcc from using it. */ |
- if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))) |
- return 1024; |
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
- return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; |
-} |
+/* Return strategy to use for floating-point. We assume that fcomi is always |
+ preferrable where available, since that is also true when looking at size |
+ (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ |
-/* Compute cost of the comparison done using any method. |
- See ix86_fp_comparison_arithmetics_cost for the metrics. */ |
-static int |
-ix86_fp_comparison_cost (enum rtx_code code) |
+enum ix86_fpcmp_strategy |
+ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED) |
{ |
- int fcomi_cost, sahf_cost, arithmetics_cost = 1024; |
- int min; |
- |
- fcomi_cost = ix86_fp_comparison_fcomi_cost (code); |
- sahf_cost = ix86_fp_comparison_sahf_cost (code); |
- |
- min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); |
- if (min > sahf_cost) |
- min = sahf_cost; |
- if (min > fcomi_cost) |
- min = fcomi_cost; |
- return min; |
-} |
+ /* Do fcomi/sahf based test when profitable. */ |
-/* Return true if we should use an FCOMI instruction for this |
- fp comparison. */ |
+ if (TARGET_CMOVE) |
+ return IX86_FPCMP_COMI; |
-int |
-ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) |
-{ |
- enum rtx_code swapped_code = swap_condition (code); |
+ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun))) |
+ return IX86_FPCMP_SAHF; |
- return ((ix86_fp_comparison_cost (code) |
- == ix86_fp_comparison_fcomi_cost (code)) |
- || (ix86_fp_comparison_cost (swapped_code) |
- == ix86_fp_comparison_fcomi_cost (swapped_code))); |
+ return IX86_FPCMP_ARITH; |
} |
/* Swap, force into registers, or otherwise massage the two operands |
@@ -13979,7 +14856,7 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) |
&& ! (standard_80387_constant_p (op0) == 1 |
|| standard_80387_constant_p (op1) == 1) |
&& GET_CODE (op1) != FLOAT) |
- || ix86_use_fcomi_compare (code))) |
+ || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) |
{ |
op0 = force_reg (op_mode, op0); |
op1 = force_reg (op_mode, op1); |
@@ -13995,9 +14872,13 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) |
&& ! (standard_80387_constant_p (op1) == 0 |
|| MEM_P (op1)))) |
{ |
- rtx tmp; |
- tmp = op0, op0 = op1, op1 = tmp; |
- code = swap_condition (code); |
+ enum rtx_code new_code = ix86_fp_swap_condition (code); |
+ if (new_code != UNKNOWN) |
+ { |
+ rtx tmp; |
+ tmp = op0, op0 = op1, op1 = tmp; |
+ code = new_code; |
+ } |
} |
if (!REG_P (op0)) |
@@ -14072,59 +14953,38 @@ ix86_fp_compare_code_to_integer (enum rtx_code code) |
/* Generate insn patterns to do a floating point compare of OPERANDS. */ |
static rtx |
-ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
- rtx *second_test, rtx *bypass_test) |
+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch) |
{ |
enum machine_mode fpcmp_mode, intcmp_mode; |
rtx tmp, tmp2; |
- int cost = ix86_fp_comparison_cost (code); |
- enum rtx_code bypass_code, first_code, second_code; |
fpcmp_mode = ix86_fp_compare_mode (code); |
code = ix86_prepare_fp_compare_args (code, &op0, &op1); |
- if (second_test) |
- *second_test = NULL_RTX; |
- if (bypass_test) |
- *bypass_test = NULL_RTX; |
- |
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
- |
/* Do fcomi/sahf based test when profitable. */ |
- if (ix86_fp_comparison_arithmetics_cost (code) > cost |
- && (bypass_code == UNKNOWN || bypass_test) |
- && (second_code == UNKNOWN || second_test)) |
+ switch (ix86_fp_comparison_strategy (code)) |
{ |
+ case IX86_FPCMP_COMI: |
+ intcmp_mode = fpcmp_mode; |
tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), |
tmp); |
- if (TARGET_CMOVE) |
- emit_insn (tmp); |
- else |
- { |
- gcc_assert (TARGET_SAHF); |
+ emit_insn (tmp); |
+ break; |
- if (!scratch) |
- scratch = gen_reg_rtx (HImode); |
- tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); |
+ case IX86_FPCMP_SAHF: |
+ intcmp_mode = fpcmp_mode; |
+ tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
+ tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), |
+ tmp); |
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); |
- } |
+ if (!scratch) |
+ scratch = gen_reg_rtx (HImode); |
+ tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); |
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); |
+ break; |
- /* The FP codes work out to act like unsigned. */ |
- intcmp_mode = fpcmp_mode; |
- code = first_code; |
- if (bypass_code != UNKNOWN) |
- *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, |
- gen_rtx_REG (intcmp_mode, FLAGS_REG), |
- const0_rtx); |
- if (second_code != UNKNOWN) |
- *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, |
- gen_rtx_REG (intcmp_mode, FLAGS_REG), |
- const0_rtx); |
- } |
- else |
- { |
+ case IX86_FPCMP_ARITH: |
/* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ |
tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); |
tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); |
@@ -14161,13 +15021,13 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
if (code == LT && TARGET_IEEE_FP) |
{ |
emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); |
+ emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); |
intcmp_mode = CCmode; |
code = EQ; |
} |
else |
{ |
- emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); |
+ emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx)); |
code = NE; |
} |
break; |
@@ -14181,8 +15041,7 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
else |
{ |
emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); |
- emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, |
- GEN_INT (0x01))); |
+ emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx)); |
code = NE; |
} |
break; |
@@ -14215,7 +15074,6 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
{ |
emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); |
code = NE; |
- break; |
} |
break; |
case NE: |
@@ -14246,6 +15104,10 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
default: |
gcc_unreachable (); |
} |
+ break; |
+ |
+ default: |
+ gcc_unreachable(); |
} |
/* Return the test that should be put into the flags user, i.e. |
@@ -14256,27 +15118,19 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, |
} |
rtx |
-ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) |
+ix86_expand_compare (enum rtx_code code) |
{ |
rtx op0, op1, ret; |
op0 = ix86_compare_op0; |
op1 = ix86_compare_op1; |
- if (second_test) |
- *second_test = NULL_RTX; |
- if (bypass_test) |
- *bypass_test = NULL_RTX; |
+ if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC) |
+ ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1); |
- if (ix86_compare_emitted) |
- { |
- ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); |
- ix86_compare_emitted = NULL_RTX; |
- } |
else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) |
{ |
gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); |
- ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, |
- second_test, bypass_test); |
+ ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX); |
} |
else |
ret = ix86_expand_int_compare (code, op0, op1); |
@@ -14284,88 +15138,27 @@ ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) |
return ret; |
} |
-/* Return true if the CODE will result in nontrivial jump sequence. */ |
-bool |
-ix86_fp_jump_nontrivial_p (enum rtx_code code) |
-{ |
- enum rtx_code bypass_code, first_code, second_code; |
- if (!TARGET_CMOVE) |
- return true; |
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
- return bypass_code != UNKNOWN || second_code != UNKNOWN; |
-} |
- |
void |
ix86_expand_branch (enum rtx_code code, rtx label) |
{ |
rtx tmp; |
- /* If we have emitted a compare insn, go straight to simple. |
- ix86_expand_compare won't emit anything if ix86_compare_emitted |
- is non NULL. */ |
- if (ix86_compare_emitted) |
- goto simple; |
- |
switch (GET_MODE (ix86_compare_op0)) |
{ |
+ case SFmode: |
+ case DFmode: |
+ case XFmode: |
case QImode: |
case HImode: |
case SImode: |
simple: |
- tmp = ix86_expand_compare (code, NULL, NULL); |
+ tmp = ix86_expand_compare (code); |
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, |
gen_rtx_LABEL_REF (VOIDmode, label), |
pc_rtx); |
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); |
return; |
- case SFmode: |
- case DFmode: |
- case XFmode: |
- { |
- rtvec vec; |
- int use_fcomi; |
- enum rtx_code bypass_code, first_code, second_code; |
- |
- code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, |
- &ix86_compare_op1); |
- |
- ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); |
- |
- /* Check whether we will use the natural sequence with one jump. If |
- so, we can expand jump early. Otherwise delay expansion by |
- creating compound insn to not confuse optimizers. */ |
- if (bypass_code == UNKNOWN && second_code == UNKNOWN) |
- { |
- ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, |
- gen_rtx_LABEL_REF (VOIDmode, label), |
- pc_rtx, NULL_RTX, NULL_RTX); |
- } |
- else |
- { |
- tmp = gen_rtx_fmt_ee (code, VOIDmode, |
- ix86_compare_op0, ix86_compare_op1); |
- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, |
- gen_rtx_LABEL_REF (VOIDmode, label), |
- pc_rtx); |
- tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); |
- |
- use_fcomi = ix86_use_fcomi_compare (code); |
- vec = rtvec_alloc (3 + !use_fcomi); |
- RTVEC_ELT (vec, 0) = tmp; |
- RTVEC_ELT (vec, 1) |
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG)); |
- RTVEC_ELT (vec, 2) |
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG)); |
- if (! use_fcomi) |
- RTVEC_ELT (vec, 3) |
- = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); |
- |
- emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); |
- } |
- return; |
- } |
- |
case DImode: |
if (TARGET_64BIT) |
goto simple; |
@@ -14506,7 +15299,11 @@ ix86_expand_branch (enum rtx_code code, rtx label) |
} |
default: |
- gcc_unreachable (); |
+ /* If we have already emitted a compare insn, go straight to simple. |
+ ix86_expand_compare won't emit anything if ix86_compare_emitted |
+ is non NULL. */ |
+ gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC); |
+ goto simple; |
} |
} |
@@ -14515,10 +15312,7 @@ void |
ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, |
rtx target1, rtx target2, rtx tmp, rtx pushed) |
{ |
- rtx second, bypass; |
- rtx label = NULL_RTX; |
rtx condition; |
- int bypass_probability = -1, second_probability = -1, probability = -1; |
rtx i; |
if (target2 != pc_rtx) |
@@ -14530,117 +15324,30 @@ ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, |
} |
condition = ix86_expand_fp_compare (code, op1, op2, |
- tmp, &second, &bypass); |
+ tmp); |
/* Remove pushed operand from stack. */ |
if (pushed) |
ix86_free_from_memory (GET_MODE (pushed)); |
- if (split_branch_probability >= 0) |
- { |
- /* Distribute the probabilities across the jumps. |
- Assume the BYPASS and SECOND to be always test |
- for UNORDERED. */ |
- probability = split_branch_probability; |
- |
- /* Value of 1 is low enough to make no need for probability |
- to be updated. Later we may run some experiments and see |
- if unordered values are more frequent in practice. */ |
- if (bypass) |
- bypass_probability = 1; |
- if (second) |
- second_probability = 1; |
- } |
- if (bypass != NULL_RTX) |
- { |
- label = gen_label_rtx (); |
- i = emit_jump_insn (gen_rtx_SET |
- (VOIDmode, pc_rtx, |
- gen_rtx_IF_THEN_ELSE (VOIDmode, |
- bypass, |
- gen_rtx_LABEL_REF (VOIDmode, |
- label), |
- pc_rtx))); |
- if (bypass_probability >= 0) |
- REG_NOTES (i) |
- = gen_rtx_EXPR_LIST (REG_BR_PROB, |
- GEN_INT (bypass_probability), |
- REG_NOTES (i)); |
- } |
i = emit_jump_insn (gen_rtx_SET |
(VOIDmode, pc_rtx, |
gen_rtx_IF_THEN_ELSE (VOIDmode, |
condition, target1, target2))); |
- if (probability >= 0) |
- REG_NOTES (i) |
- = gen_rtx_EXPR_LIST (REG_BR_PROB, |
- GEN_INT (probability), |
- REG_NOTES (i)); |
- if (second != NULL_RTX) |
- { |
- i = emit_jump_insn (gen_rtx_SET |
- (VOIDmode, pc_rtx, |
- gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, |
- target2))); |
- if (second_probability >= 0) |
- REG_NOTES (i) |
- = gen_rtx_EXPR_LIST (REG_BR_PROB, |
- GEN_INT (second_probability), |
- REG_NOTES (i)); |
- } |
- if (label != NULL_RTX) |
- emit_label (label); |
+ if (split_branch_probability >= 0) |
+ add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability)); |
} |
-int |
+void |
ix86_expand_setcc (enum rtx_code code, rtx dest) |
{ |
- rtx ret, tmp, tmpreg, equiv; |
- rtx second_test, bypass_test; |
- |
- if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) |
- return 0; /* FAIL */ |
+ rtx ret; |
gcc_assert (GET_MODE (dest) == QImode); |
- ret = ix86_expand_compare (code, &second_test, &bypass_test); |
+ ret = ix86_expand_compare (code); |
PUT_MODE (ret, QImode); |
- |
- tmp = dest; |
- tmpreg = dest; |
- |
- emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); |
- if (bypass_test || second_test) |
- { |
- rtx test = second_test; |
- int bypass = 0; |
- rtx tmp2 = gen_reg_rtx (QImode); |
- if (bypass_test) |
- { |
- gcc_assert (!second_test); |
- test = bypass_test; |
- bypass = 1; |
- PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); |
- } |
- PUT_MODE (test, QImode); |
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); |
- |
- if (bypass) |
- emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); |
- else |
- emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); |
- } |
- |
- /* Attach a REG_EQUAL note describing the comparison result. */ |
- if (ix86_compare_op0 && ix86_compare_op1) |
- { |
- equiv = simplify_gen_relational (code, QImode, |
- GET_MODE (ix86_compare_op0), |
- ix86_compare_op0, ix86_compare_op1); |
- set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); |
- } |
- |
- return 1; /* DONE */ |
+ emit_insn (gen_rtx_SET (VOIDmode, dest, ret)); |
} |
/* Expand comparison setting or clearing carry flag. Return true when |
@@ -14657,7 +15364,6 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) |
if (SCALAR_FLOAT_MODE_P (mode)) |
{ |
- rtx second_test = NULL, bypass_test = NULL; |
rtx compare_op, compare_seq; |
gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); |
@@ -14683,14 +15389,10 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) |
we decide to expand comparison using arithmetic that is not |
too common scenario. */ |
start_sequence (); |
- compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, |
- &second_test, &bypass_test); |
+ compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX); |
compare_seq = get_insns (); |
end_sequence (); |
- if (second_test || bypass_test) |
- return false; |
- |
if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
|| GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); |
@@ -14773,7 +15475,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) |
} |
ix86_compare_op0 = op0; |
ix86_compare_op1 = op1; |
- *pop = ix86_expand_compare (code, NULL, NULL); |
+ *pop = ix86_expand_compare (code); |
gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); |
return true; |
} |
@@ -14783,12 +15485,13 @@ ix86_expand_int_movcc (rtx operands[]) |
{ |
enum rtx_code code = GET_CODE (operands[1]), compare_code; |
rtx compare_seq, compare_op; |
- rtx second_test, bypass_test; |
enum machine_mode mode = GET_MODE (operands[0]); |
- bool sign_bit_compare_p = false;; |
+ bool sign_bit_compare_p = false; |
start_sequence (); |
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
+ ix86_compare_op0 = XEXP (operands[1], 0); |
+ ix86_compare_op1 = XEXP (operands[1], 1); |
+ compare_op = ix86_expand_compare (code); |
compare_seq = get_insns (); |
end_sequence (); |
@@ -14823,15 +15526,19 @@ ix86_expand_int_movcc (rtx operands[]) |
if (!sign_bit_compare_p) |
{ |
+ rtx flags; |
bool fpcmp = false; |
compare_code = GET_CODE (compare_op); |
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
- || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
+ flags = XEXP (compare_op, 0); |
+ |
+ if (GET_MODE (flags) == CCFPmode |
+ || GET_MODE (flags) == CCFPUmode) |
{ |
fpcmp = true; |
- compare_code = ix86_fp_compare_code_to_integer (compare_code); |
+ compare_code |
+ = ix86_fp_compare_code_to_integer (compare_code); |
} |
/* To simplify rest of code, restrict to the GEU case. */ |
@@ -14850,7 +15557,8 @@ ix86_expand_int_movcc (rtx operands[]) |
reverse_condition_maybe_unordered |
(GET_CODE (compare_op))); |
else |
- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); |
+ PUT_CODE (compare_op, |
+ reverse_condition (GET_CODE (compare_op))); |
} |
diff = ct - cf; |
@@ -14859,9 +15567,10 @@ ix86_expand_int_movcc (rtx operands[]) |
tmp = gen_reg_rtx (mode); |
if (mode == DImode) |
- emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); |
+ emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); |
else |
- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); |
+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), |
+ flags, compare_op)); |
} |
else |
{ |
@@ -15260,19 +15969,6 @@ ix86_expand_int_movcc (rtx operands[]) |
if (! nonimmediate_operand (operands[3], mode)) |
operands[3] = force_reg (mode, operands[3]); |
- if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) |
- { |
- rtx tmp = gen_reg_rtx (mode); |
- emit_move_insn (tmp, operands[3]); |
- operands[3] = tmp; |
- } |
- if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) |
- { |
- rtx tmp = gen_reg_rtx (mode); |
- emit_move_insn (tmp, operands[2]); |
- operands[2] = tmp; |
- } |
- |
if (! register_operand (operands[2], VOIDmode) |
&& (mode == QImode |
|| ! register_operand (operands[3], VOIDmode))) |
@@ -15287,18 +15983,6 @@ ix86_expand_int_movcc (rtx operands[]) |
gen_rtx_IF_THEN_ELSE (mode, |
compare_op, operands[2], |
operands[3]))); |
- if (bypass_test) |
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), |
- gen_rtx_IF_THEN_ELSE (mode, |
- bypass_test, |
- copy_rtx (operands[3]), |
- copy_rtx (operands[0])))); |
- if (second_test) |
- emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), |
- gen_rtx_IF_THEN_ELSE (mode, |
- second_test, |
- copy_rtx (operands[2]), |
- copy_rtx (operands[0])))); |
return 1; /* DONE */ |
} |
@@ -15465,7 +16149,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) |
x = gen_rtx_AND (mode, x, op_false); |
emit_insn (gen_rtx_SET (VOIDmode, dest, x)); |
} |
- else if (TARGET_SSE5) |
+ else if (TARGET_XOP) |
{ |
rtx pcmov = gen_rtx_SET (mode, dest, |
gen_rtx_IF_THEN_ELSE (mode, cmp, |
@@ -15503,8 +16187,10 @@ ix86_expand_fp_movcc (rtx operands[]) |
{ |
enum machine_mode mode = GET_MODE (operands[0]); |
enum rtx_code code = GET_CODE (operands[1]); |
- rtx tmp, compare_op, second_test, bypass_test; |
+ rtx tmp, compare_op; |
+ ix86_compare_op0 = XEXP (operands[1], 0); |
+ ix86_compare_op1 = XEXP (operands[1], 1); |
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) |
{ |
enum machine_mode cmode; |
@@ -15538,45 +16224,20 @@ ix86_expand_fp_movcc (rtx operands[]) |
/* The floating point conditional move instructions don't directly |
support conditions resulting from a signed integer comparison. */ |
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
- |
- /* The floating point conditional move instructions don't directly |
- support signed integer comparisons. */ |
- |
+ compare_op = ix86_expand_compare (code); |
if (!fcmov_comparison_operator (compare_op, VOIDmode)) |
{ |
- gcc_assert (!second_test && !bypass_test); |
tmp = gen_reg_rtx (QImode); |
ix86_expand_setcc (code, tmp); |
code = NE; |
ix86_compare_op0 = tmp; |
ix86_compare_op1 = const0_rtx; |
- compare_op = ix86_expand_compare (code, &second_test, &bypass_test); |
- } |
- if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) |
- { |
- tmp = gen_reg_rtx (mode); |
- emit_move_insn (tmp, operands[3]); |
- operands[3] = tmp; |
- } |
- if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) |
- { |
- tmp = gen_reg_rtx (mode); |
- emit_move_insn (tmp, operands[2]); |
- operands[2] = tmp; |
+ compare_op = ix86_expand_compare (code); |
} |
emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
gen_rtx_IF_THEN_ELSE (mode, compare_op, |
operands[2], operands[3]))); |
- if (bypass_test) |
- emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
- gen_rtx_IF_THEN_ELSE (mode, bypass_test, |
- operands[3], operands[0]))); |
- if (second_test) |
- emit_insn (gen_rtx_SET (VOIDmode, operands[0], |
- gen_rtx_IF_THEN_ELSE (mode, second_test, |
- operands[2], operands[0]))); |
return 1; |
} |
@@ -15618,8 +16279,8 @@ ix86_expand_int_vcond (rtx operands[]) |
cop0 = operands[4]; |
cop1 = operands[5]; |
- /* SSE5 supports all of the comparisons on all vector int types. */ |
- if (!TARGET_SSE5) |
+ /* XOP supports all of the comparisons on all vector int types. */ |
+ if (!TARGET_XOP) |
{ |
/* Canonicalize the comparison to EQ, GT, GTU. */ |
switch (code) |
@@ -15820,9 +16481,9 @@ ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) |
{ |
/* Shift higher 8 bytes to lower 8 bytes. */ |
src = gen_reg_rtx (imode); |
- emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), |
- gen_lowpart (TImode, operands[1]), |
- GEN_INT (64))); |
+ emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src), |
+ gen_lowpart (V1TImode, operands[1]), |
+ GEN_INT (64))); |
} |
else |
src = operands[1]; |
@@ -15830,190 +16491,6 @@ ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) |
emit_insn (unpack (dest, src)); |
} |
-/* This function performs the same task as ix86_expand_sse_unpack, |
- but with sse5 instructions. */ |
- |
-void |
-ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p) |
-{ |
- enum machine_mode imode = GET_MODE (operands[1]); |
- int pperm_bytes[16]; |
- int i; |
- int h = (high_p) ? 8 : 0; |
- int h2; |
- int sign_extend; |
- rtvec v = rtvec_alloc (16); |
- rtvec vs; |
- rtx x, p; |
- rtx op0 = operands[0], op1 = operands[1]; |
- |
- switch (imode) |
- { |
- case V16QImode: |
- vs = rtvec_alloc (8); |
- h2 = (high_p) ? 8 : 0; |
- for (i = 0; i < 8; i++) |
- { |
- pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h; |
- pperm_bytes[2*i+1] = ((unsigned_p) |
- ? PPERM_ZERO |
- : PPERM_SIGN | PPERM_SRC2 | i | h); |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- for (i = 0; i < 8; i++) |
- RTVEC_ELT (vs, i) = GEN_INT (i + h2); |
- |
- p = gen_rtx_PARALLEL (VOIDmode, vs); |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- if (unsigned_p) |
- emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x)); |
- else |
- emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x)); |
- break; |
- |
- case V8HImode: |
- vs = rtvec_alloc (4); |
- h2 = (high_p) ? 4 : 0; |
- for (i = 0; i < 4; i++) |
- { |
- sign_extend = ((unsigned_p) |
- ? PPERM_ZERO |
- : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h)); |
- pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h); |
- pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h); |
- pperm_bytes[4*i+2] = sign_extend; |
- pperm_bytes[4*i+3] = sign_extend; |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- for (i = 0; i < 4; i++) |
- RTVEC_ELT (vs, i) = GEN_INT (i + h2); |
- |
- p = gen_rtx_PARALLEL (VOIDmode, vs); |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- if (unsigned_p) |
- emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x)); |
- else |
- emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x)); |
- break; |
- |
- case V4SImode: |
- vs = rtvec_alloc (2); |
- h2 = (high_p) ? 2 : 0; |
- for (i = 0; i < 2; i++) |
- { |
- sign_extend = ((unsigned_p) |
- ? PPERM_ZERO |
- : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h)); |
- pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h); |
- pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h); |
- pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h); |
- pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h); |
- pperm_bytes[8*i+4] = sign_extend; |
- pperm_bytes[8*i+5] = sign_extend; |
- pperm_bytes[8*i+6] = sign_extend; |
- pperm_bytes[8*i+7] = sign_extend; |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- for (i = 0; i < 2; i++) |
- RTVEC_ELT (vs, i) = GEN_INT (i + h2); |
- |
- p = gen_rtx_PARALLEL (VOIDmode, vs); |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- if (unsigned_p) |
- emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x)); |
- else |
- emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x)); |
- break; |
- |
- default: |
- gcc_unreachable (); |
- } |
- |
- return; |
-} |
- |
-/* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the |
- next narrower integer vector type */ |
-void |
-ix86_expand_sse5_pack (rtx operands[3]) |
-{ |
- enum machine_mode imode = GET_MODE (operands[0]); |
- int pperm_bytes[16]; |
- int i; |
- rtvec v = rtvec_alloc (16); |
- rtx x; |
- rtx op0 = operands[0]; |
- rtx op1 = operands[1]; |
- rtx op2 = operands[2]; |
- |
- switch (imode) |
- { |
- case V16QImode: |
- for (i = 0; i < 8; i++) |
- { |
- pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2); |
- pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2); |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x)); |
- break; |
- |
- case V8HImode: |
- for (i = 0; i < 4; i++) |
- { |
- pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0); |
- pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1); |
- pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0); |
- pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1); |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x)); |
- break; |
- |
- case V4SImode: |
- for (i = 0; i < 2; i++) |
- { |
- pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0); |
- pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1); |
- pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2); |
- pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3); |
- pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0); |
- pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1); |
- pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2); |
- pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3); |
- } |
- |
- for (i = 0; i < 16; i++) |
- RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); |
- |
- x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); |
- emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x)); |
- break; |
- |
- default: |
- gcc_unreachable (); |
- } |
- |
- return; |
-} |
- |
/* Expand conditional increment or decrement using adb/sbb instructions. |
The default case using setcc followed by the conditional move can be |
done by generic code. */ |
@@ -16021,11 +16498,15 @@ int |
ix86_expand_int_addcc (rtx operands[]) |
{ |
enum rtx_code code = GET_CODE (operands[1]); |
+ rtx flags; |
+ rtx (*insn)(rtx, rtx, rtx, rtx, rtx); |
rtx compare_op; |
rtx val = const0_rtx; |
bool fpcmp = false; |
- enum machine_mode mode = GET_MODE (operands[0]); |
+ enum machine_mode mode; |
+ ix86_compare_op0 = XEXP (operands[1], 0); |
+ ix86_compare_op1 = XEXP (operands[1], 1); |
if (operands[3] != const1_rtx |
&& operands[3] != constm1_rtx) |
return 0; |
@@ -16034,8 +16515,10 @@ ix86_expand_int_addcc (rtx operands[]) |
return 0; |
code = GET_CODE (compare_op); |
- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode |
- || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) |
+ flags = XEXP (compare_op, 0); |
+ |
+ if (GET_MODE (flags) == CCFPmode |
+ || GET_MODE (flags) == CCFPUmode) |
{ |
fpcmp = true; |
code = ix86_fp_compare_code_to_integer (code); |
@@ -16051,24 +16534,25 @@ ix86_expand_int_addcc (rtx operands[]) |
else |
PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); |
} |
- PUT_MODE (compare_op, mode); |
+ |
+ mode = GET_MODE (operands[0]); |
/* Construct either adc or sbb insn. */ |
if ((code == LTU) == (operands[3] == constm1_rtx)) |
{ |
- switch (GET_MODE (operands[0])) |
+ switch (mode) |
{ |
case QImode: |
- emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_subqi3_carry; |
break; |
case HImode: |
- emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_subhi3_carry; |
break; |
case SImode: |
- emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_subsi3_carry; |
break; |
case DImode: |
- emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); |
+ insn = gen_subdi3_carry; |
break; |
default: |
gcc_unreachable (); |
@@ -16076,24 +16560,26 @@ ix86_expand_int_addcc (rtx operands[]) |
} |
else |
{ |
- switch (GET_MODE (operands[0])) |
+ switch (mode) |
{ |
case QImode: |
- emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_addqi3_carry; |
break; |
case HImode: |
- emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_addhi3_carry; |
break; |
case SImode: |
- emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); |
+ insn = gen_addsi3_carry; |
break; |
case DImode: |
- emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); |
+ insn = gen_adddi3_carry; |
break; |
default: |
gcc_unreachable (); |
} |
} |
+ emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); |
+ |
return 1; /* DONE */ |
} |
@@ -16581,14 +17067,15 @@ ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) |
emit_insn ((mode == DImode |
? gen_lshrsi3 |
- : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); |
+ : gen_lshrdi3) (high[0], high[0], |
+ GEN_INT (mode == DImode ? 5 : 6))); |
emit_insn ((mode == DImode |
? gen_andsi3 |
- : gen_anddi3) (high[0], high[0], GEN_INT (1))); |
+ : gen_anddi3) (high[0], high[0], const1_rtx)); |
emit_move_insn (low[0], high[0]); |
emit_insn ((mode == DImode |
? gen_xorsi3 |
- : gen_xordi3) (low[0], low[0], GEN_INT (1))); |
+ : gen_xordi3) (low[0], low[0], const1_rtx)); |
} |
emit_insn ((mode == DImode |
@@ -16789,10 +17276,7 @@ predict_jump (int prob) |
{ |
rtx insn = get_last_insn (); |
gcc_assert (JUMP_P (insn)); |
- REG_NOTES (insn) |
- = gen_rtx_EXPR_LIST (REG_BR_PROB, |
- GEN_INT (prob), |
- REG_NOTES (insn)); |
+ add_reg_note (insn, REG_BR_PROB, GEN_INT (prob)); |
} |
/* Helper function for the string operations below. Dest VARIABLE whether |
@@ -16844,7 +17328,6 @@ static rtx |
scale_counter (rtx countreg, int scale) |
{ |
rtx sc; |
- rtx piece_size_mask; |
if (scale == 1) |
return countreg; |
@@ -16852,7 +17335,6 @@ scale_counter (rtx countreg, int scale) |
return GEN_INT (INTVAL (countreg) / scale); |
gcc_assert (REG_P (countreg)); |
- piece_size_mask = GEN_INT (scale - 1); |
sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, |
GEN_INT (exact_log2 (scale)), |
NULL, 1, OPTAB_DIRECT); |
@@ -16867,7 +17349,7 @@ counter_mode (rtx count_exp) |
{ |
if (GET_MODE (count_exp) != VOIDmode) |
return GET_MODE (count_exp); |
- if (GET_CODE (count_exp) != CONST_INT) |
+ if (!CONST_INT_P (count_exp)) |
return Pmode; |
if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) |
return DImode; |
@@ -18629,7 +19111,6 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) |
gen_rtx_IF_THEN_ELSE (Pmode, tmp, |
reg2, |
out))); |
- |
} |
else |
{ |
@@ -18656,8 +19137,9 @@ ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) |
/* Avoid branch in fixing the byte. */ |
tmpreg = gen_lowpart (QImode, tmpreg); |
emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); |
- cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); |
- emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp)); |
+ tmp = gen_rtx_REG (CCmode, FLAGS_REG); |
+ cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); |
+ emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp)); |
emit_label (end_0_label); |
} |
@@ -18779,7 +19261,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, |
} |
if (ix86_cmodel == CM_LARGE_PIC |
- && GET_CODE (fnaddr) == MEM |
+ && MEM_P (fnaddr) |
&& GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF |
&& !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) |
fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); |
@@ -18850,7 +19332,7 @@ ix86_init_machine_status (void) |
f = GGC_CNEW (struct machine_function); |
f->use_fast_prologue_epilogue_nregs = -1; |
f->tls_descriptor_call_expanded_p = 0; |
- f->call_abi = DEFAULT_ABI; |
+ f->call_abi = ix86_abi; |
return f; |
} |
@@ -18955,23 +19437,53 @@ memory_address_length (rtx addr) |
/* Rule of thumb: |
- esp as the base always wants an index, |
- - ebp as the base always wants a displacement. */ |
+ - ebp as the base always wants a displacement, |
+ - r12 as the base always wants an index, |
+ - r13 as the base always wants a displacement. */ |
/* Register Indirect. */ |
if (base && !index && !disp) |
{ |
/* esp (for its index) and ebp (for its displacement) need |
- the two-byte modrm form. */ |
- if (addr == stack_pointer_rtx |
- || addr == arg_pointer_rtx |
- || addr == frame_pointer_rtx |
- || addr == hard_frame_pointer_rtx) |
+ the two-byte modrm form. Similarly for r12 and r13 in 64-bit |
+ code. */ |
+ if (REG_P (addr) |
+ && (addr == arg_pointer_rtx |
+ || addr == frame_pointer_rtx |
+ || REGNO (addr) == SP_REG |
+ || REGNO (addr) == BP_REG |
+ || REGNO (addr) == R12_REG |
+ || REGNO (addr) == R13_REG)) |
len = 1; |
} |
- /* Direct Addressing. */ |
+ /* Direct Addressing. In 64-bit mode mod 00 r/m 5 |
+ is not disp32, but disp32(%rip), so for disp32 |
+ SIB byte is needed, unless print_operand_address |
+ optimizes it into disp32(%rip) or (%rip) is implied |
+ by UNSPEC. */ |
else if (disp && !base && !index) |
- len = 4; |
+ { |
+ len = 4; |
+ if (TARGET_64BIT) |
+ { |
+ rtx symbol = disp; |
+ |
+ if (GET_CODE (disp) == CONST) |
+ symbol = XEXP (disp, 0); |
+ if (GET_CODE (symbol) == PLUS |
+ && CONST_INT_P (XEXP (symbol, 1))) |
+ symbol = XEXP (symbol, 0); |
+ |
+ if (GET_CODE (symbol) != LABEL_REF |
+ && (GET_CODE (symbol) != SYMBOL_REF |
+ || SYMBOL_REF_TLS_MODEL (symbol) != 0) |
+ && (GET_CODE (symbol) != UNSPEC |
+ || (XINT (symbol, 1) != UNSPEC_GOTPCREL |
+ && XINT (symbol, 1) != UNSPEC_GOTNTPOFF))) |
+ len += 1; |
+ } |
+ } |
else |
{ |
@@ -18983,19 +19495,31 @@ memory_address_length (rtx addr) |
else |
len = 4; |
} |
- /* ebp always wants a displacement. */ |
- else if (base == hard_frame_pointer_rtx) |
- len = 1; |
+ /* ebp always wants a displacement. Similarly r13. */ |
+ else if (base && REG_P (base) |
+ && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) |
+ len = 1; |
/* An index requires the two-byte modrm form.... */ |
if (index |
- /* ...like esp, which always wants an index. */ |
- || base == stack_pointer_rtx |
+ /* ...like esp (or r12), which always wants an index. */ |
|| base == arg_pointer_rtx |
- || base == frame_pointer_rtx) |
+ || base == frame_pointer_rtx |
+ || (base && REG_P (base) |
+ && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) |
len += 1; |
} |
+ switch (parts.seg) |
+ { |
+ case SEG_FS: |
+ case SEG_GS: |
+ len += 1; |
+ break; |
+ default: |
+ break; |
+ } |
+ |
return len; |
} |
@@ -19010,30 +19534,50 @@ ix86_attr_length_immediate_default (rtx insn, int shortform) |
for (i = recog_data.n_operands - 1; i >= 0; --i) |
if (CONSTANT_P (recog_data.operand[i])) |
{ |
+ enum attr_mode mode = get_attr_mode (insn); |
+ |
gcc_assert (!len); |
- if (shortform && satisfies_constraint_K (recog_data.operand[i])) |
- len = 1; |
- else |
+ if (shortform && CONST_INT_P (recog_data.operand[i])) |
{ |
- switch (get_attr_mode (insn)) |
+ HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); |
+ switch (mode) |
{ |
- case MODE_QI: |
- len+=1; |
- break; |
- case MODE_HI: |
- len+=2; |
- break; |
- case MODE_SI: |
- len+=4; |
- break; |
- /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ |
- case MODE_DI: |
- len+=4; |
- break; |
- default: |
- fatal_insn ("unknown insn mode", insn); |
+ case MODE_QI: |
+ len = 1; |
+ continue; |
+ case MODE_HI: |
+ ival = trunc_int_for_mode (ival, HImode); |
+ break; |
+ case MODE_SI: |
+ ival = trunc_int_for_mode (ival, SImode); |
+ break; |
+ default: |
+ break; |
+ } |
+ if (IN_RANGE (ival, -128, 127)) |
+ { |
+ len = 1; |
+ continue; |
} |
} |
+ switch (mode) |
+ { |
+ case MODE_QI: |
+ len = 1; |
+ break; |
+ case MODE_HI: |
+ len = 2; |
+ break; |
+ case MODE_SI: |
+ len = 4; |
+ break; |
+ /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ |
+ case MODE_DI: |
+ len = 4; |
+ break; |
+ default: |
+ fatal_insn ("unknown insn mode", insn); |
+ } |
} |
return len; |
} |
@@ -19045,22 +19589,45 @@ ix86_attr_length_address_default (rtx insn) |
if (get_attr_type (insn) == TYPE_LEA) |
{ |
- rtx set = PATTERN (insn); |
+ rtx set = PATTERN (insn), addr; |
if (GET_CODE (set) == PARALLEL) |
set = XVECEXP (set, 0, 0); |
gcc_assert (GET_CODE (set) == SET); |
- return memory_address_length (SET_SRC (set)); |
+ addr = SET_SRC (set); |
+ if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI) |
+ { |
+ if (GET_CODE (addr) == ZERO_EXTEND) |
+ addr = XEXP (addr, 0); |
+ if (GET_CODE (addr) == SUBREG) |
+ addr = SUBREG_REG (addr); |
+ } |
+ |
+ return memory_address_length (addr); |
} |
extract_insn_cached (insn); |
for (i = recog_data.n_operands - 1; i >= 0; --i) |
if (MEM_P (recog_data.operand[i])) |
{ |
+ constrain_operands_cached (reload_completed); |
+ if (which_alternative != -1) |
+ { |
+ const char *constraints = recog_data.constraints[i]; |
+ int alt = which_alternative; |
+ |
+ while (*constraints == '=' || *constraints == '+') |
+ constraints++; |
+ while (alt-- > 0) |
+ while (*constraints++ != ',') |
+ ; |
+ /* Skip ignored operands. */ |
+ if (*constraints == 'X') |
+ continue; |
+ } |
return memory_address_length (XEXP (recog_data.operand[i], 0)); |
- break; |
} |
return 0; |
} |
@@ -19089,7 +19656,8 @@ ix86_attr_length_vex_default (rtx insn, int has_0f_opcode, |
if (REG_P (recog_data.operand[i])) |
{ |
/* REX.W bit uses 3 byte VEX prefix. */ |
- if (GET_MODE (recog_data.operand[i]) == DImode) |
+ if (GET_MODE (recog_data.operand[i]) == DImode |
+ && GENERAL_REG_P (recog_data.operand[i])) |
return 3 + 1; |
} |
else |
@@ -19111,6 +19679,7 @@ ix86_issue_rate (void) |
switch (ix86_tune) |
{ |
case PROCESSOR_PENTIUM: |
+ case PROCESSOR_ATOM: |
case PROCESSOR_K6: |
return 2; |
@@ -19177,41 +19746,21 @@ ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) |
return 1; |
} |
-/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory |
- address with operands set by DEP_INSN. */ |
+/* Return true iff USE_INSN has a memory address with operands set by |
+ SET_INSN. */ |
-static int |
-ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) |
+bool |
+ix86_agi_dependent (rtx set_insn, rtx use_insn) |
{ |
- rtx addr; |
- |
- if (insn_type == TYPE_LEA |
- && TARGET_PENTIUM) |
- { |
- addr = PATTERN (insn); |
- |
- if (GET_CODE (addr) == PARALLEL) |
- addr = XVECEXP (addr, 0, 0); |
- |
- gcc_assert (GET_CODE (addr) == SET); |
- |
- addr = SET_SRC (addr); |
- } |
- else |
- { |
- int i; |
- extract_insn_cached (insn); |
- for (i = recog_data.n_operands - 1; i >= 0; --i) |
- if (MEM_P (recog_data.operand[i])) |
- { |
- addr = XEXP (recog_data.operand[i], 0); |
- goto found; |
- } |
- return 0; |
- found:; |
- } |
- |
- return modified_in_p (addr, dep_insn); |
+ int i; |
+ extract_insn_cached (use_insn); |
+ for (i = recog_data.n_operands - 1; i >= 0; --i) |
+ if (MEM_P (recog_data.operand[i])) |
+ { |
+ rtx addr = XEXP (recog_data.operand[i], 0); |
+ return modified_in_p (addr, set_insn) != 0; |
+ } |
+ return false; |
} |
static int |
@@ -19239,7 +19788,20 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
{ |
case PROCESSOR_PENTIUM: |
/* Address Generation Interlock adds a cycle of latency. */ |
- if (ix86_agi_dependent (insn, dep_insn, insn_type)) |
+ if (insn_type == TYPE_LEA) |
+ { |
+ rtx addr = PATTERN (insn); |
+ |
+ if (GET_CODE (addr) == PARALLEL) |
+ addr = XVECEXP (addr, 0, 0); |
+ |
+ gcc_assert (GET_CODE (addr) == SET); |
+ |
+ addr = SET_SRC (addr); |
+ if (modified_in_p (addr, dep_insn)) |
+ cost += 1; |
+ } |
+ else if (ix86_agi_dependent (dep_insn, insn)) |
cost += 1; |
/* ??? Compares pair with jump/setcc. */ |
@@ -19249,7 +19811,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
/* Floating point stores require value to be ready one cycle earlier. */ |
if (insn_type == TYPE_FMOV |
&& get_attr_memory (insn) == MEMORY_STORE |
- && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
+ && !ix86_agi_dependent (dep_insn, insn)) |
cost += 1; |
break; |
@@ -19272,7 +19834,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
in parallel with previous instruction in case |
previous instruction is not needed to compute the address. */ |
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
- && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
+ && !ix86_agi_dependent (dep_insn, insn)) |
{ |
/* Claim moves to take one cycle, as core can issue one load |
at time and the next load can start cycle later. */ |
@@ -19301,7 +19863,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
in parallel with previous instruction in case |
previous instruction is not needed to compute the address. */ |
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
- && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
+ && !ix86_agi_dependent (dep_insn, insn)) |
{ |
/* Claim moves to take one cycle, as core can issue one load |
at time and the next load can start cycle later. */ |
@@ -19318,6 +19880,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
case PROCESSOR_ATHLON: |
case PROCESSOR_K8: |
case PROCESSOR_AMDFAM10: |
+ case PROCESSOR_ATOM: |
case PROCESSOR_GENERIC32: |
case PROCESSOR_GENERIC64: |
memory = get_attr_memory (insn); |
@@ -19326,7 +19889,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) |
in parallel with previous instruction in case |
previous instruction is not needed to compute the address. */ |
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) |
- && !ix86_agi_dependent (insn, dep_insn, insn_type)) |
+ && !ix86_agi_dependent (dep_insn, insn)) |
{ |
enum attr_unit unit = get_attr_unit (insn); |
int loadcost = 3; |
@@ -19594,69 +20157,270 @@ ix86_minimum_alignment (tree exp, enum machine_mode mode, |
return align; |
} |
+/* Find a location for the static chain incoming to a nested function. |
+ This is a register, unless all free registers are used by arguments. */ |
+ |
+static rtx |
+ix86_static_chain (const_tree fndecl, bool incoming_p) |
+{ |
+ unsigned regno; |
+ |
+ if (!DECL_STATIC_CHAIN (fndecl)) |
+ return NULL; |
+ |
+ if (TARGET_64BIT) |
+ { |
+ /* We always use R10 in 64-bit mode. */ |
+ regno = R10_REG; |
+ } |
+ else |
+ { |
+ tree fntype; |
+ /* By default in 32-bit mode we use ECX to pass the static chain. */ |
+ regno = CX_REG; |
+ |
+ fntype = TREE_TYPE (fndecl); |
+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) |
+ { |
+ /* Fastcall functions use ecx/edx for arguments, which leaves |
+ us with EAX for the static chain. */ |
+ regno = AX_REG; |
+ } |
+ else if (ix86_function_regparm (fntype, fndecl) == 3) |
+ { |
+ /* For regparm 3, we have no free call-clobbered registers in |
+ which to store the static chain. In order to implement this, |
+ we have the trampoline push the static chain to the stack. |
+ However, we can't push a value below the return address when |
+ we call the nested function directly, so we have to use an |
+ alternate entry point. For this we use ESI, and have the |
+ alternate entry point push ESI, so that things appear the |
+ same once we're executing the nested function. */ |
+ if (incoming_p) |
+ { |
+ if (fndecl == current_function_decl) |
+ ix86_static_chain_on_stack = true; |
+ return gen_frame_mem (SImode, |
+ plus_constant (arg_pointer_rtx, -8)); |
+ } |
+ regno = SI_REG; |
+ } |
+ } |
+ |
+ return gen_rtx_REG (Pmode, regno); |
+} |
+ |
/* Emit RTL insns to initialize the variable parts of a trampoline. |
- FNADDR is an RTX for the address of the function's pure code. |
- CXT is an RTX for the static chain value for the function. */ |
-void |
-x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) |
+ FNDECL is the decl of the target address; M_TRAMP is a MEM for |
+ the trampoline, and CHAIN_VALUE is an RTX for the static chain |
+ to be passed to the target function. */ |
+ |
+static void |
+ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
{ |
+ rtx mem, fnaddr; |
+ |
+ fnaddr = XEXP (DECL_RTL (fndecl), 0); |
+ |
if (!TARGET_64BIT) |
{ |
- /* Compute offset from the end of the jmp to the target function. */ |
- rtx disp = expand_binop (SImode, sub_optab, fnaddr, |
- plus_constant (tramp, 10), |
- NULL_RTX, 1, OPTAB_DIRECT); |
- emit_move_insn (gen_rtx_MEM (QImode, tramp), |
- gen_int_mode (0xb9, QImode)); |
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); |
- emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), |
- gen_int_mode (0xe9, QImode)); |
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); |
+ rtx disp, chain; |
+ int opcode; |
+ |
+ /* Depending on the static chain location, either load a register |
+ with a constant, or push the constant to the stack. All of the |
+ instructions are the same size. */ |
+ chain = ix86_static_chain (fndecl, true); |
+ if (REG_P (chain)) |
+ { |
+ if (REGNO (chain) == CX_REG) |
+ opcode = 0xb9; |
+ else if (REGNO (chain) == AX_REG) |
+ opcode = 0xb8; |
+ else |
+ gcc_unreachable (); |
+ } |
+ else |
+ opcode = 0x68; |
+ |
+ mem = adjust_address (m_tramp, QImode, 0); |
+ emit_move_insn (mem, gen_int_mode (opcode, QImode)); |
+ |
+ mem = adjust_address (m_tramp, SImode, 1); |
+ emit_move_insn (mem, chain_value); |
+ |
+ /* Compute offset from the end of the jmp to the target function. |
+ In the case in which the trampoline stores the static chain on |
+ the stack, we need to skip the first insn which pushes the |
+ (call-saved) register static chain; this push is 1 byte. */ |
+ disp = expand_binop (SImode, sub_optab, fnaddr, |
+ plus_constant (XEXP (m_tramp, 0), |
+ MEM_P (chain) ? 9 : 10), |
+ NULL_RTX, 1, OPTAB_DIRECT); |
+ |
+ mem = adjust_address (m_tramp, QImode, 5); |
+ emit_move_insn (mem, gen_int_mode (0xe9, QImode)); |
+ |
+ mem = adjust_address (m_tramp, SImode, 6); |
+ emit_move_insn (mem, disp); |
} |
else |
{ |
int offset = 0; |
- /* Try to load address using shorter movl instead of movabs. |
- We may want to support movq for kernel mode, but kernel does not use |
- trampolines at the moment. */ |
+ |
+ /* Load the function address to r11. Try to load address using |
+ the shorter movl instead of movabs. We may want to support |
+ movq for kernel mode, but kernel does not use trampolines at |
+ the moment. */ |
if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) |
{ |
fnaddr = copy_to_mode_reg (DImode, fnaddr); |
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
- gen_int_mode (0xbb41, HImode)); |
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), |
- gen_lowpart (SImode, fnaddr)); |
+ |
+ mem = adjust_address (m_tramp, HImode, offset); |
+ emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); |
+ |
+ mem = adjust_address (m_tramp, SImode, offset + 2); |
+ emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); |
offset += 6; |
} |
else |
{ |
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
- gen_int_mode (0xbb49, HImode)); |
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), |
- fnaddr); |
+ mem = adjust_address (m_tramp, HImode, offset); |
+ emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); |
+ |
+ mem = adjust_address (m_tramp, DImode, offset + 2); |
+ emit_move_insn (mem, fnaddr); |
offset += 10; |
} |
+ |
/* Load static chain using movabs to r10. */ |
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
- gen_int_mode (0xba49, HImode)); |
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), |
- cxt); |
+ mem = adjust_address (m_tramp, HImode, offset); |
+ emit_move_insn (mem, gen_int_mode (0xba49, HImode)); |
+ |
+ mem = adjust_address (m_tramp, DImode, offset + 2); |
+ emit_move_insn (mem, chain_value); |
offset += 10; |
- /* Jump to the r11 */ |
- emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), |
- gen_int_mode (0xff49, HImode)); |
- emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), |
- gen_int_mode (0xe3, QImode)); |
- offset += 3; |
+ |
+ /* Jump to r11; the last (unused) byte is a nop, only there to |
+ pad the write out to a single 32-bit store. */ |
+ mem = adjust_address (m_tramp, SImode, offset); |
+ emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); |
+ offset += 4; |
+ |
gcc_assert (offset <= TRAMPOLINE_SIZE); |
} |
#ifdef ENABLE_EXECUTE_STACK |
+#ifdef CHECK_EXECUTE_STACK_ENABLED |
+ if (CHECK_EXECUTE_STACK_ENABLED) |
+#endif |
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), |
- LCT_NORMAL, VOIDmode, 1, tramp, Pmode); |
+ LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); |
#endif |
} |
+/* The following file contains several enumerations and data structures |
+ built from the definitions in i386-builtin-types.def. */ |
+ |
+#include "i386-builtin-types.inc" |
+ |
+/* Table for the ix86 builtin non-function types. */ |
+static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; |
+ |
+/* Retrieve an element from the above table, building some of |
+ the types lazily. */ |
+ |
+static tree |
+ix86_get_builtin_type (enum ix86_builtin_type tcode) |
+{ |
+ unsigned int index; |
+ tree type, itype; |
+ |
+ gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); |
+ |
+ type = ix86_builtin_type_tab[(int) tcode]; |
+ if (type != NULL) |
+ return type; |
+ |
+ gcc_assert (tcode > IX86_BT_LAST_PRIM); |
+ if (tcode <= IX86_BT_LAST_VECT) |
+ { |
+ enum machine_mode mode; |
+ |
+ index = tcode - IX86_BT_LAST_PRIM - 1; |
+ itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); |
+ mode = ix86_builtin_type_vect_mode[index]; |
+ |
+ type = build_vector_type_for_mode (itype, mode); |
+ } |
+ else |
+ { |
+ int quals; |
+ |
+ index = tcode - IX86_BT_LAST_VECT - 1; |
+ if (tcode <= IX86_BT_LAST_PTR) |
+ quals = TYPE_UNQUALIFIED; |
+ else |
+ quals = TYPE_QUAL_CONST; |
+ |
+ itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); |
+ if (quals != TYPE_UNQUALIFIED) |
+ itype = build_qualified_type (itype, quals); |
+ |
+ type = build_pointer_type (itype); |
+ } |
+ |
+ ix86_builtin_type_tab[(int) tcode] = type; |
+ return type; |
+} |
+ |
+/* Table for the ix86 builtin function types. */ |
+static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; |
+ |
+/* Retrieve an element from the above table, building some of |
+ the types lazily. */ |
+ |
+static tree |
+ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) |
+{ |
+ tree type; |
+ |
+ gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); |
+ |
+ type = ix86_builtin_func_type_tab[(int) tcode]; |
+ if (type != NULL) |
+ return type; |
+ |
+ if (tcode <= IX86_BT_LAST_FUNC) |
+ { |
+ unsigned start = ix86_builtin_func_start[(int) tcode]; |
+ unsigned after = ix86_builtin_func_start[(int) tcode + 1]; |
+ tree rtype, atype, args = void_list_node; |
+ unsigned i; |
+ |
+ rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); |
+ for (i = after - 1; i > start; --i) |
+ { |
+ atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); |
+ args = tree_cons (NULL, atype, args); |
+ } |
+ |
+ type = build_function_type (rtype, args); |
+ } |
+ else |
+ { |
+ unsigned index = tcode - IX86_BT_LAST_FUNC - 1; |
+ enum ix86_builtin_func_type icode; |
+ |
+ icode = ix86_builtin_func_alias_base[index]; |
+ type = ix86_get_builtin_func_type (icode); |
+ } |
+ |
+ ix86_builtin_func_type_tab[(int) tcode] = type; |
+ return type; |
+} |
+ |
+ |
/* Codes for all the SSE/MMX builtins. */ |
enum ix86_builtins |
{ |
@@ -20062,6 +20826,16 @@ enum ix86_builtins |
IX86_BUILTIN_MFENCE, |
IX86_BUILTIN_LFENCE, |
+ IX86_BUILTIN_BSRSI, |
+ IX86_BUILTIN_BSRDI, |
+ IX86_BUILTIN_RDPMC, |
+ IX86_BUILTIN_RDTSC, |
+ IX86_BUILTIN_RDTSCP, |
+ IX86_BUILTIN_ROLQI, |
+ IX86_BUILTIN_ROLHI, |
+ IX86_BUILTIN_RORQI, |
+ IX86_BUILTIN_RORHI, |
+ |
/* SSE3. */ |
IX86_BUILTIN_ADDSUBPS, |
IX86_BUILTIN_HADDPS, |
@@ -20281,7 +21055,6 @@ enum ix86_builtins |
IX86_BUILTIN_EXTRACTF128SI256, |
IX86_BUILTIN_VZEROALL, |
IX86_BUILTIN_VZEROUPPER, |
- IX86_BUILTIN_VZEROUPPER_REX64, |
IX86_BUILTIN_VPERMILVARPD, |
IX86_BUILTIN_VPERMILVARPS, |
IX86_BUILTIN_VPERMILVARPD256, |
@@ -20290,6 +21063,10 @@ enum ix86_builtins |
IX86_BUILTIN_VPERMILPS, |
IX86_BUILTIN_VPERMILPD256, |
IX86_BUILTIN_VPERMILPS256, |
+ IX86_BUILTIN_VPERMIL2PD, |
+ IX86_BUILTIN_VPERMIL2PS, |
+ IX86_BUILTIN_VPERMIL2PD256, |
+ IX86_BUILTIN_VPERMIL2PS256, |
IX86_BUILTIN_VPERM2F128PD256, |
IX86_BUILTIN_VPERM2F128PS256, |
IX86_BUILTIN_VPERM2F128SI256, |
@@ -20367,219 +21144,215 @@ enum ix86_builtins |
/* TFmode support builtins. */ |
IX86_BUILTIN_INFQ, |
+ IX86_BUILTIN_HUGE_VALQ, |
IX86_BUILTIN_FABSQ, |
IX86_BUILTIN_COPYSIGNQ, |
- /* SSE5 instructions */ |
- IX86_BUILTIN_FMADDSS, |
- IX86_BUILTIN_FMADDSD, |
- IX86_BUILTIN_FMADDPS, |
- IX86_BUILTIN_FMADDPD, |
- IX86_BUILTIN_FMSUBSS, |
- IX86_BUILTIN_FMSUBSD, |
- IX86_BUILTIN_FMSUBPS, |
- IX86_BUILTIN_FMSUBPD, |
- IX86_BUILTIN_FNMADDSS, |
- IX86_BUILTIN_FNMADDSD, |
- IX86_BUILTIN_FNMADDPS, |
- IX86_BUILTIN_FNMADDPD, |
- IX86_BUILTIN_FNMSUBSS, |
- IX86_BUILTIN_FNMSUBSD, |
- IX86_BUILTIN_FNMSUBPS, |
- IX86_BUILTIN_FNMSUBPD, |
- IX86_BUILTIN_PCMOV, |
- IX86_BUILTIN_PCMOV_V2DI, |
- IX86_BUILTIN_PCMOV_V4SI, |
- IX86_BUILTIN_PCMOV_V8HI, |
- IX86_BUILTIN_PCMOV_V16QI, |
- IX86_BUILTIN_PCMOV_V4SF, |
- IX86_BUILTIN_PCMOV_V2DF, |
- IX86_BUILTIN_PPERM, |
- IX86_BUILTIN_PERMPS, |
- IX86_BUILTIN_PERMPD, |
- IX86_BUILTIN_PMACSSWW, |
- IX86_BUILTIN_PMACSWW, |
- IX86_BUILTIN_PMACSSWD, |
- IX86_BUILTIN_PMACSWD, |
- IX86_BUILTIN_PMACSSDD, |
- IX86_BUILTIN_PMACSDD, |
- IX86_BUILTIN_PMACSSDQL, |
- IX86_BUILTIN_PMACSSDQH, |
- IX86_BUILTIN_PMACSDQL, |
- IX86_BUILTIN_PMACSDQH, |
- IX86_BUILTIN_PMADCSSWD, |
- IX86_BUILTIN_PMADCSWD, |
- IX86_BUILTIN_PHADDBW, |
- IX86_BUILTIN_PHADDBD, |
- IX86_BUILTIN_PHADDBQ, |
- IX86_BUILTIN_PHADDWD, |
- IX86_BUILTIN_PHADDWQ, |
- IX86_BUILTIN_PHADDDQ, |
- IX86_BUILTIN_PHADDUBW, |
- IX86_BUILTIN_PHADDUBD, |
- IX86_BUILTIN_PHADDUBQ, |
- IX86_BUILTIN_PHADDUWD, |
- IX86_BUILTIN_PHADDUWQ, |
- IX86_BUILTIN_PHADDUDQ, |
- IX86_BUILTIN_PHSUBBW, |
- IX86_BUILTIN_PHSUBWD, |
- IX86_BUILTIN_PHSUBDQ, |
- IX86_BUILTIN_PROTB, |
- IX86_BUILTIN_PROTW, |
- IX86_BUILTIN_PROTD, |
- IX86_BUILTIN_PROTQ, |
- IX86_BUILTIN_PROTB_IMM, |
- IX86_BUILTIN_PROTW_IMM, |
- IX86_BUILTIN_PROTD_IMM, |
- IX86_BUILTIN_PROTQ_IMM, |
- IX86_BUILTIN_PSHLB, |
- IX86_BUILTIN_PSHLW, |
- IX86_BUILTIN_PSHLD, |
- IX86_BUILTIN_PSHLQ, |
- IX86_BUILTIN_PSHAB, |
- IX86_BUILTIN_PSHAW, |
- IX86_BUILTIN_PSHAD, |
- IX86_BUILTIN_PSHAQ, |
- IX86_BUILTIN_FRCZSS, |
- IX86_BUILTIN_FRCZSD, |
- IX86_BUILTIN_FRCZPS, |
- IX86_BUILTIN_FRCZPD, |
- IX86_BUILTIN_CVTPH2PS, |
- IX86_BUILTIN_CVTPS2PH, |
- |
- IX86_BUILTIN_COMEQSS, |
- IX86_BUILTIN_COMNESS, |
- IX86_BUILTIN_COMLTSS, |
- IX86_BUILTIN_COMLESS, |
- IX86_BUILTIN_COMGTSS, |
- IX86_BUILTIN_COMGESS, |
- IX86_BUILTIN_COMUEQSS, |
- IX86_BUILTIN_COMUNESS, |
- IX86_BUILTIN_COMULTSS, |
- IX86_BUILTIN_COMULESS, |
- IX86_BUILTIN_COMUGTSS, |
- IX86_BUILTIN_COMUGESS, |
- IX86_BUILTIN_COMORDSS, |
- IX86_BUILTIN_COMUNORDSS, |
- IX86_BUILTIN_COMFALSESS, |
- IX86_BUILTIN_COMTRUESS, |
- |
- IX86_BUILTIN_COMEQSD, |
- IX86_BUILTIN_COMNESD, |
- IX86_BUILTIN_COMLTSD, |
- IX86_BUILTIN_COMLESD, |
- IX86_BUILTIN_COMGTSD, |
- IX86_BUILTIN_COMGESD, |
- IX86_BUILTIN_COMUEQSD, |
- IX86_BUILTIN_COMUNESD, |
- IX86_BUILTIN_COMULTSD, |
- IX86_BUILTIN_COMULESD, |
- IX86_BUILTIN_COMUGTSD, |
- IX86_BUILTIN_COMUGESD, |
- IX86_BUILTIN_COMORDSD, |
- IX86_BUILTIN_COMUNORDSD, |
- IX86_BUILTIN_COMFALSESD, |
- IX86_BUILTIN_COMTRUESD, |
- |
- IX86_BUILTIN_COMEQPS, |
- IX86_BUILTIN_COMNEPS, |
- IX86_BUILTIN_COMLTPS, |
- IX86_BUILTIN_COMLEPS, |
- IX86_BUILTIN_COMGTPS, |
- IX86_BUILTIN_COMGEPS, |
- IX86_BUILTIN_COMUEQPS, |
- IX86_BUILTIN_COMUNEPS, |
- IX86_BUILTIN_COMULTPS, |
- IX86_BUILTIN_COMULEPS, |
- IX86_BUILTIN_COMUGTPS, |
- IX86_BUILTIN_COMUGEPS, |
- IX86_BUILTIN_COMORDPS, |
- IX86_BUILTIN_COMUNORDPS, |
- IX86_BUILTIN_COMFALSEPS, |
- IX86_BUILTIN_COMTRUEPS, |
- |
- IX86_BUILTIN_COMEQPD, |
- IX86_BUILTIN_COMNEPD, |
- IX86_BUILTIN_COMLTPD, |
- IX86_BUILTIN_COMLEPD, |
- IX86_BUILTIN_COMGTPD, |
- IX86_BUILTIN_COMGEPD, |
- IX86_BUILTIN_COMUEQPD, |
- IX86_BUILTIN_COMUNEPD, |
- IX86_BUILTIN_COMULTPD, |
- IX86_BUILTIN_COMULEPD, |
- IX86_BUILTIN_COMUGTPD, |
- IX86_BUILTIN_COMUGEPD, |
- IX86_BUILTIN_COMORDPD, |
- IX86_BUILTIN_COMUNORDPD, |
- IX86_BUILTIN_COMFALSEPD, |
- IX86_BUILTIN_COMTRUEPD, |
- |
- IX86_BUILTIN_PCOMEQUB, |
- IX86_BUILTIN_PCOMNEUB, |
- IX86_BUILTIN_PCOMLTUB, |
- IX86_BUILTIN_PCOMLEUB, |
- IX86_BUILTIN_PCOMGTUB, |
- IX86_BUILTIN_PCOMGEUB, |
- IX86_BUILTIN_PCOMFALSEUB, |
- IX86_BUILTIN_PCOMTRUEUB, |
- IX86_BUILTIN_PCOMEQUW, |
- IX86_BUILTIN_PCOMNEUW, |
- IX86_BUILTIN_PCOMLTUW, |
- IX86_BUILTIN_PCOMLEUW, |
- IX86_BUILTIN_PCOMGTUW, |
- IX86_BUILTIN_PCOMGEUW, |
- IX86_BUILTIN_PCOMFALSEUW, |
- IX86_BUILTIN_PCOMTRUEUW, |
- IX86_BUILTIN_PCOMEQUD, |
- IX86_BUILTIN_PCOMNEUD, |
- IX86_BUILTIN_PCOMLTUD, |
- IX86_BUILTIN_PCOMLEUD, |
- IX86_BUILTIN_PCOMGTUD, |
- IX86_BUILTIN_PCOMGEUD, |
- IX86_BUILTIN_PCOMFALSEUD, |
- IX86_BUILTIN_PCOMTRUEUD, |
- IX86_BUILTIN_PCOMEQUQ, |
- IX86_BUILTIN_PCOMNEUQ, |
- IX86_BUILTIN_PCOMLTUQ, |
- IX86_BUILTIN_PCOMLEUQ, |
- IX86_BUILTIN_PCOMGTUQ, |
- IX86_BUILTIN_PCOMGEUQ, |
- IX86_BUILTIN_PCOMFALSEUQ, |
- IX86_BUILTIN_PCOMTRUEUQ, |
- |
- IX86_BUILTIN_PCOMEQB, |
- IX86_BUILTIN_PCOMNEB, |
- IX86_BUILTIN_PCOMLTB, |
- IX86_BUILTIN_PCOMLEB, |
- IX86_BUILTIN_PCOMGTB, |
- IX86_BUILTIN_PCOMGEB, |
- IX86_BUILTIN_PCOMFALSEB, |
- IX86_BUILTIN_PCOMTRUEB, |
- IX86_BUILTIN_PCOMEQW, |
- IX86_BUILTIN_PCOMNEW, |
- IX86_BUILTIN_PCOMLTW, |
- IX86_BUILTIN_PCOMLEW, |
- IX86_BUILTIN_PCOMGTW, |
- IX86_BUILTIN_PCOMGEW, |
- IX86_BUILTIN_PCOMFALSEW, |
- IX86_BUILTIN_PCOMTRUEW, |
- IX86_BUILTIN_PCOMEQD, |
- IX86_BUILTIN_PCOMNED, |
- IX86_BUILTIN_PCOMLTD, |
- IX86_BUILTIN_PCOMLED, |
- IX86_BUILTIN_PCOMGTD, |
- IX86_BUILTIN_PCOMGED, |
- IX86_BUILTIN_PCOMFALSED, |
- IX86_BUILTIN_PCOMTRUED, |
- IX86_BUILTIN_PCOMEQQ, |
- IX86_BUILTIN_PCOMNEQ, |
- IX86_BUILTIN_PCOMLTQ, |
- IX86_BUILTIN_PCOMLEQ, |
- IX86_BUILTIN_PCOMGTQ, |
- IX86_BUILTIN_PCOMGEQ, |
- IX86_BUILTIN_PCOMFALSEQ, |
- IX86_BUILTIN_PCOMTRUEQ, |
+ /* Vectorizer support builtins. */ |
+ IX86_BUILTIN_CPYSGNPS, |
+ IX86_BUILTIN_CPYSGNPD, |
+ |
+ IX86_BUILTIN_CVTUDQ2PS, |
+ |
+ IX86_BUILTIN_VEC_PERM_V2DF, |
+ IX86_BUILTIN_VEC_PERM_V4SF, |
+ IX86_BUILTIN_VEC_PERM_V2DI, |
+ IX86_BUILTIN_VEC_PERM_V4SI, |
+ IX86_BUILTIN_VEC_PERM_V8HI, |
+ IX86_BUILTIN_VEC_PERM_V16QI, |
+ IX86_BUILTIN_VEC_PERM_V2DI_U, |
+ IX86_BUILTIN_VEC_PERM_V4SI_U, |
+ IX86_BUILTIN_VEC_PERM_V8HI_U, |
+ IX86_BUILTIN_VEC_PERM_V16QI_U, |
+ IX86_BUILTIN_VEC_PERM_V4DF, |
+ IX86_BUILTIN_VEC_PERM_V8SF, |
+ |
+ /* FMA4 and XOP instructions. */ |
+ IX86_BUILTIN_VFMADDSS, |
+ IX86_BUILTIN_VFMADDSD, |
+ IX86_BUILTIN_VFMADDPS, |
+ IX86_BUILTIN_VFMADDPD, |
+ IX86_BUILTIN_VFMSUBSS, |
+ IX86_BUILTIN_VFMSUBSD, |
+ IX86_BUILTIN_VFMSUBPS, |
+ IX86_BUILTIN_VFMSUBPD, |
+ IX86_BUILTIN_VFMADDSUBPS, |
+ IX86_BUILTIN_VFMADDSUBPD, |
+ IX86_BUILTIN_VFMSUBADDPS, |
+ IX86_BUILTIN_VFMSUBADDPD, |
+ IX86_BUILTIN_VFNMADDSS, |
+ IX86_BUILTIN_VFNMADDSD, |
+ IX86_BUILTIN_VFNMADDPS, |
+ IX86_BUILTIN_VFNMADDPD, |
+ IX86_BUILTIN_VFNMSUBSS, |
+ IX86_BUILTIN_VFNMSUBSD, |
+ IX86_BUILTIN_VFNMSUBPS, |
+ IX86_BUILTIN_VFNMSUBPD, |
+ IX86_BUILTIN_VFMADDPS256, |
+ IX86_BUILTIN_VFMADDPD256, |
+ IX86_BUILTIN_VFMSUBPS256, |
+ IX86_BUILTIN_VFMSUBPD256, |
+ IX86_BUILTIN_VFMADDSUBPS256, |
+ IX86_BUILTIN_VFMADDSUBPD256, |
+ IX86_BUILTIN_VFMSUBADDPS256, |
+ IX86_BUILTIN_VFMSUBADDPD256, |
+ IX86_BUILTIN_VFNMADDPS256, |
+ IX86_BUILTIN_VFNMADDPD256, |
+ IX86_BUILTIN_VFNMSUBPS256, |
+ IX86_BUILTIN_VFNMSUBPD256, |
+ |
+ IX86_BUILTIN_VPCMOV, |
+ IX86_BUILTIN_VPCMOV_V2DI, |
+ IX86_BUILTIN_VPCMOV_V4SI, |
+ IX86_BUILTIN_VPCMOV_V8HI, |
+ IX86_BUILTIN_VPCMOV_V16QI, |
+ IX86_BUILTIN_VPCMOV_V4SF, |
+ IX86_BUILTIN_VPCMOV_V2DF, |
+ IX86_BUILTIN_VPCMOV256, |
+ IX86_BUILTIN_VPCMOV_V4DI256, |
+ IX86_BUILTIN_VPCMOV_V8SI256, |
+ IX86_BUILTIN_VPCMOV_V16HI256, |
+ IX86_BUILTIN_VPCMOV_V32QI256, |
+ IX86_BUILTIN_VPCMOV_V8SF256, |
+ IX86_BUILTIN_VPCMOV_V4DF256, |
+ |
+ IX86_BUILTIN_VPPERM, |
+ |
+ IX86_BUILTIN_VPMACSSWW, |
+ IX86_BUILTIN_VPMACSWW, |
+ IX86_BUILTIN_VPMACSSWD, |
+ IX86_BUILTIN_VPMACSWD, |
+ IX86_BUILTIN_VPMACSSDD, |
+ IX86_BUILTIN_VPMACSDD, |
+ IX86_BUILTIN_VPMACSSDQL, |
+ IX86_BUILTIN_VPMACSSDQH, |
+ IX86_BUILTIN_VPMACSDQL, |
+ IX86_BUILTIN_VPMACSDQH, |
+ IX86_BUILTIN_VPMADCSSWD, |
+ IX86_BUILTIN_VPMADCSWD, |
+ |
+ IX86_BUILTIN_VPHADDBW, |
+ IX86_BUILTIN_VPHADDBD, |
+ IX86_BUILTIN_VPHADDBQ, |
+ IX86_BUILTIN_VPHADDWD, |
+ IX86_BUILTIN_VPHADDWQ, |
+ IX86_BUILTIN_VPHADDDQ, |
+ IX86_BUILTIN_VPHADDUBW, |
+ IX86_BUILTIN_VPHADDUBD, |
+ IX86_BUILTIN_VPHADDUBQ, |
+ IX86_BUILTIN_VPHADDUWD, |
+ IX86_BUILTIN_VPHADDUWQ, |
+ IX86_BUILTIN_VPHADDUDQ, |
+ IX86_BUILTIN_VPHSUBBW, |
+ IX86_BUILTIN_VPHSUBWD, |
+ IX86_BUILTIN_VPHSUBDQ, |
+ |
+ IX86_BUILTIN_VPROTB, |
+ IX86_BUILTIN_VPROTW, |
+ IX86_BUILTIN_VPROTD, |
+ IX86_BUILTIN_VPROTQ, |
+ IX86_BUILTIN_VPROTB_IMM, |
+ IX86_BUILTIN_VPROTW_IMM, |
+ IX86_BUILTIN_VPROTD_IMM, |
+ IX86_BUILTIN_VPROTQ_IMM, |
+ |
+ IX86_BUILTIN_VPSHLB, |
+ IX86_BUILTIN_VPSHLW, |
+ IX86_BUILTIN_VPSHLD, |
+ IX86_BUILTIN_VPSHLQ, |
+ IX86_BUILTIN_VPSHAB, |
+ IX86_BUILTIN_VPSHAW, |
+ IX86_BUILTIN_VPSHAD, |
+ IX86_BUILTIN_VPSHAQ, |
+ |
+ IX86_BUILTIN_VFRCZSS, |
+ IX86_BUILTIN_VFRCZSD, |
+ IX86_BUILTIN_VFRCZPS, |
+ IX86_BUILTIN_VFRCZPD, |
+ IX86_BUILTIN_VFRCZPS256, |
+ IX86_BUILTIN_VFRCZPD256, |
+ |
+ IX86_BUILTIN_VPCOMEQUB, |
+ IX86_BUILTIN_VPCOMNEUB, |
+ IX86_BUILTIN_VPCOMLTUB, |
+ IX86_BUILTIN_VPCOMLEUB, |
+ IX86_BUILTIN_VPCOMGTUB, |
+ IX86_BUILTIN_VPCOMGEUB, |
+ IX86_BUILTIN_VPCOMFALSEUB, |
+ IX86_BUILTIN_VPCOMTRUEUB, |
+ |
+ IX86_BUILTIN_VPCOMEQUW, |
+ IX86_BUILTIN_VPCOMNEUW, |
+ IX86_BUILTIN_VPCOMLTUW, |
+ IX86_BUILTIN_VPCOMLEUW, |
+ IX86_BUILTIN_VPCOMGTUW, |
+ IX86_BUILTIN_VPCOMGEUW, |
+ IX86_BUILTIN_VPCOMFALSEUW, |
+ IX86_BUILTIN_VPCOMTRUEUW, |
+ |
+ IX86_BUILTIN_VPCOMEQUD, |
+ IX86_BUILTIN_VPCOMNEUD, |
+ IX86_BUILTIN_VPCOMLTUD, |
+ IX86_BUILTIN_VPCOMLEUD, |
+ IX86_BUILTIN_VPCOMGTUD, |
+ IX86_BUILTIN_VPCOMGEUD, |
+ IX86_BUILTIN_VPCOMFALSEUD, |
+ IX86_BUILTIN_VPCOMTRUEUD, |
+ |
+ IX86_BUILTIN_VPCOMEQUQ, |
+ IX86_BUILTIN_VPCOMNEUQ, |
+ IX86_BUILTIN_VPCOMLTUQ, |
+ IX86_BUILTIN_VPCOMLEUQ, |
+ IX86_BUILTIN_VPCOMGTUQ, |
+ IX86_BUILTIN_VPCOMGEUQ, |
+ IX86_BUILTIN_VPCOMFALSEUQ, |
+ IX86_BUILTIN_VPCOMTRUEUQ, |
+ |
+ IX86_BUILTIN_VPCOMEQB, |
+ IX86_BUILTIN_VPCOMNEB, |
+ IX86_BUILTIN_VPCOMLTB, |
+ IX86_BUILTIN_VPCOMLEB, |
+ IX86_BUILTIN_VPCOMGTB, |
+ IX86_BUILTIN_VPCOMGEB, |
+ IX86_BUILTIN_VPCOMFALSEB, |
+ IX86_BUILTIN_VPCOMTRUEB, |
+ |
+ IX86_BUILTIN_VPCOMEQW, |
+ IX86_BUILTIN_VPCOMNEW, |
+ IX86_BUILTIN_VPCOMLTW, |
+ IX86_BUILTIN_VPCOMLEW, |
+ IX86_BUILTIN_VPCOMGTW, |
+ IX86_BUILTIN_VPCOMGEW, |
+ IX86_BUILTIN_VPCOMFALSEW, |
+ IX86_BUILTIN_VPCOMTRUEW, |
+ |
+ IX86_BUILTIN_VPCOMEQD, |
+ IX86_BUILTIN_VPCOMNED, |
+ IX86_BUILTIN_VPCOMLTD, |
+ IX86_BUILTIN_VPCOMLED, |
+ IX86_BUILTIN_VPCOMGTD, |
+ IX86_BUILTIN_VPCOMGED, |
+ IX86_BUILTIN_VPCOMFALSED, |
+ IX86_BUILTIN_VPCOMTRUED, |
+ |
+ IX86_BUILTIN_VPCOMEQQ, |
+ IX86_BUILTIN_VPCOMNEQ, |
+ IX86_BUILTIN_VPCOMLTQ, |
+ IX86_BUILTIN_VPCOMLEQ, |
+ IX86_BUILTIN_VPCOMGTQ, |
+ IX86_BUILTIN_VPCOMGEQ, |
+ IX86_BUILTIN_VPCOMFALSEQ, |
+ IX86_BUILTIN_VPCOMTRUEQ, |
+ |
+ /* LWP instructions. */ |
+ IX86_BUILTIN_LLWPCB, |
+ IX86_BUILTIN_SLWPCB, |
+ IX86_BUILTIN_LWPVAL32, |
+ IX86_BUILTIN_LWPVAL64, |
+ IX86_BUILTIN_LWPINS32, |
+ IX86_BUILTIN_LWPINS64, |
+ |
+ IX86_BUILTIN_CLZS, |
IX86_BUILTIN_MAX |
}; |
@@ -20590,35 +21363,36 @@ static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; |
/* Table of all of the builtin functions that are possible with different ISA's |
but are waiting to be built until a function is declared to use that |
ISA. */ |
-struct builtin_isa GTY(()) |
-{ |
- tree type; /* builtin type to use in the declaration */ |
+struct builtin_isa { |
const char *name; /* function name */ |
+ enum ix86_builtin_func_type tcode; /* type to use in the declaration */ |
int isa; /* isa_flags this builtin is defined for */ |
bool const_p; /* true if the declaration is constant */ |
+ bool set_and_not_built_p; |
}; |
-static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; |
+static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; |
/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK |
- * of which isa_flags to use in the ix86_builtins_isa array. Stores the |
- * function decl in the ix86_builtins array. Returns the function decl or |
- * NULL_TREE, if the builtin was not added. |
- * |
- * If the front end has a special hook for builtin functions, delay adding |
- * builtin functions that aren't in the current ISA until the ISA is changed |
- * with function specific optimization. Doing so, can save about 300K for the |
- * default compiler. When the builtin is expanded, check at that time whether |
- * it is valid. |
- * |
- * If the front end doesn't have a special hook, record all builtins, even if |
- * it isn't an instruction set in the current ISA in case the user uses |
- * function specific options for a different ISA, so that we don't get scope |
- * errors if a builtin is added in the middle of a function scope. */ |
+ of which isa_flags to use in the ix86_builtins_isa array. Stores the |
+ function decl in the ix86_builtins array. Returns the function decl or |
+ NULL_TREE, if the builtin was not added. |
+ |
+ If the front end has a special hook for builtin functions, delay adding |
+ builtin functions that aren't in the current ISA until the ISA is changed |
+ with function specific optimization. Doing so, can save about 300K for the |
+ default compiler. When the builtin is expanded, check at that time whether |
+ it is valid. |
+ |
+ If the front end doesn't have a special hook, record all builtins, even if |
+ it isn't an instruction set in the current ISA in case the user uses |
+ function specific options for a different ISA, so that we don't get scope |
+ errors if a builtin is added in the middle of a function scope. */ |
static inline tree |
-def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) |
+def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode, |
+ enum ix86_builtins code) |
{ |
tree decl = NULL_TREE; |
@@ -20626,22 +21400,26 @@ def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) |
{ |
ix86_builtins_isa[(int) code].isa = mask; |
- if ((mask & ix86_isa_flags) != 0 |
+ mask &= ~OPTION_MASK_ISA_64BIT; |
+ if (mask == 0 |
+ || (mask & ix86_isa_flags) != 0 |
|| (lang_hooks.builtin_function |
== lang_hooks.builtin_function_ext_scope)) |
{ |
- decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, |
- NULL_TREE); |
+ tree type = ix86_get_builtin_func_type (tcode); |
+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, |
+ NULL, NULL_TREE); |
ix86_builtins[(int) code] = decl; |
- ix86_builtins_isa[(int) code].type = NULL_TREE; |
+ ix86_builtins_isa[(int) code].set_and_not_built_p = false; |
} |
else |
{ |
ix86_builtins[(int) code] = NULL_TREE; |
- ix86_builtins_isa[(int) code].const_p = false; |
- ix86_builtins_isa[(int) code].type = type; |
+ ix86_builtins_isa[(int) code].tcode = tcode; |
ix86_builtins_isa[(int) code].name = name; |
+ ix86_builtins_isa[(int) code].const_p = false; |
+ ix86_builtins_isa[(int) code].set_and_not_built_p = true; |
} |
} |
@@ -20651,10 +21429,10 @@ def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) |
/* Like def_builtin, but also marks the function decl "const". */ |
static inline tree |
-def_builtin_const (int mask, const char *name, tree type, |
- enum ix86_builtins code) |
+def_builtin_const (int mask, const char *name, |
+ enum ix86_builtin_func_type tcode, enum ix86_builtins code) |
{ |
- tree decl = def_builtin (mask, name, type, code); |
+ tree decl = def_builtin (mask, name, tcode, code); |
if (decl) |
TREE_READONLY (decl) = 1; |
else |
@@ -20671,20 +21449,23 @@ static void |
ix86_add_new_builtins (int isa) |
{ |
int i; |
- tree decl; |
for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) |
{ |
if ((ix86_builtins_isa[i].isa & isa) != 0 |
- && ix86_builtins_isa[i].type != NULL_TREE) |
+ && ix86_builtins_isa[i].set_and_not_built_p) |
{ |
+ tree decl, type; |
+ |
+ /* Don't define the builtin again. */ |
+ ix86_builtins_isa[i].set_and_not_built_p = false; |
+ |
+ type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); |
decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, |
- ix86_builtins_isa[i].type, |
- i, BUILT_IN_MD, NULL, |
+ type, i, BUILT_IN_MD, NULL, |
NULL_TREE); |
ix86_builtins[i] = decl; |
- ix86_builtins_isa[i].type = NULL_TREE; |
if (ix86_builtins_isa[i].const_p) |
TREE_READONLY (decl) = 1; |
} |
@@ -20759,204 +21540,12 @@ static const struct builtin_description bdesc_pcmpistr[] = |
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, |
}; |
-/* Special builtin types */ |
-enum ix86_special_builtin_type |
-{ |
- SPECIAL_FTYPE_UNKNOWN, |
- VOID_FTYPE_VOID, |
- V32QI_FTYPE_PCCHAR, |
- V16QI_FTYPE_PCCHAR, |
- V8SF_FTYPE_PCV4SF, |
- V8SF_FTYPE_PCFLOAT, |
- V4DF_FTYPE_PCV2DF, |
- V4DF_FTYPE_PCDOUBLE, |
- V4SF_FTYPE_PCFLOAT, |
- V2DF_FTYPE_PCDOUBLE, |
- V8SF_FTYPE_PCV8SF_V8SF, |
- V4DF_FTYPE_PCV4DF_V4DF, |
- V4SF_FTYPE_V4SF_PCV2SF, |
- V4SF_FTYPE_PCV4SF_V4SF, |
- V2DF_FTYPE_V2DF_PCDOUBLE, |
- V2DF_FTYPE_PCV2DF_V2DF, |
- V2DI_FTYPE_PV2DI, |
- VOID_FTYPE_PV2SF_V4SF, |
- VOID_FTYPE_PV4DI_V4DI, |
- VOID_FTYPE_PV2DI_V2DI, |
- VOID_FTYPE_PCHAR_V32QI, |
- VOID_FTYPE_PCHAR_V16QI, |
- VOID_FTYPE_PFLOAT_V8SF, |
- VOID_FTYPE_PFLOAT_V4SF, |
- VOID_FTYPE_PDOUBLE_V4DF, |
- VOID_FTYPE_PDOUBLE_V2DF, |
- VOID_FTYPE_PDI_DI, |
- VOID_FTYPE_PINT_INT, |
- VOID_FTYPE_PV8SF_V8SF_V8SF, |
- VOID_FTYPE_PV4DF_V4DF_V4DF, |
- VOID_FTYPE_PV4SF_V4SF_V4SF, |
- VOID_FTYPE_PV2DF_V2DF_V2DF |
-}; |
- |
-/* Builtin types */ |
-enum ix86_builtin_type |
-{ |
- FTYPE_UNKNOWN, |
- FLOAT128_FTYPE_FLOAT128, |
- FLOAT_FTYPE_FLOAT, |
- FLOAT128_FTYPE_FLOAT128_FLOAT128, |
- INT_FTYPE_V8SF_V8SF_PTEST, |
- INT_FTYPE_V4DI_V4DI_PTEST, |
- INT_FTYPE_V4DF_V4DF_PTEST, |
- INT_FTYPE_V4SF_V4SF_PTEST, |
- INT_FTYPE_V2DI_V2DI_PTEST, |
- INT_FTYPE_V2DF_V2DF_PTEST, |
- INT64_FTYPE_V4SF, |
- INT64_FTYPE_V2DF, |
- INT_FTYPE_V16QI, |
- INT_FTYPE_V8QI, |
- INT_FTYPE_V8SF, |
- INT_FTYPE_V4DF, |
- INT_FTYPE_V4SF, |
- INT_FTYPE_V2DF, |
- V16QI_FTYPE_V16QI, |
- V8SI_FTYPE_V8SF, |
- V8SI_FTYPE_V4SI, |
- V8HI_FTYPE_V8HI, |
- V8HI_FTYPE_V16QI, |
- V8QI_FTYPE_V8QI, |
- V8SF_FTYPE_V8SF, |
- V8SF_FTYPE_V8SI, |
- V8SF_FTYPE_V4SF, |
- V4SI_FTYPE_V4SI, |
- V4SI_FTYPE_V16QI, |
- V4SI_FTYPE_V8SI, |
- V4SI_FTYPE_V8HI, |
- V4SI_FTYPE_V4DF, |
- V4SI_FTYPE_V4SF, |
- V4SI_FTYPE_V2DF, |
- V4HI_FTYPE_V4HI, |
- V4DF_FTYPE_V4DF, |
- V4DF_FTYPE_V4SI, |
- V4DF_FTYPE_V4SF, |
- V4DF_FTYPE_V2DF, |
- V4SF_FTYPE_V4DF, |
- V4SF_FTYPE_V4SF, |
- V4SF_FTYPE_V4SF_VEC_MERGE, |
- V4SF_FTYPE_V8SF, |
- V4SF_FTYPE_V4SI, |
- V4SF_FTYPE_V2DF, |
- V2DI_FTYPE_V2DI, |
- V2DI_FTYPE_V16QI, |
- V2DI_FTYPE_V8HI, |
- V2DI_FTYPE_V4SI, |
- V2DF_FTYPE_V2DF, |
- V2DF_FTYPE_V2DF_VEC_MERGE, |
- V2DF_FTYPE_V4SI, |
- V2DF_FTYPE_V4DF, |
- V2DF_FTYPE_V4SF, |
- V2DF_FTYPE_V2SI, |
- V2SI_FTYPE_V2SI, |
- V2SI_FTYPE_V4SF, |
- V2SI_FTYPE_V2SF, |
- V2SI_FTYPE_V2DF, |
- V2SF_FTYPE_V2SF, |
- V2SF_FTYPE_V2SI, |
- V16QI_FTYPE_V16QI_V16QI, |
- V16QI_FTYPE_V8HI_V8HI, |
- V8QI_FTYPE_V8QI_V8QI, |
- V8QI_FTYPE_V4HI_V4HI, |
- V8HI_FTYPE_V8HI_V8HI, |
- V8HI_FTYPE_V8HI_V8HI_COUNT, |
- V8HI_FTYPE_V16QI_V16QI, |
- V8HI_FTYPE_V4SI_V4SI, |
- V8HI_FTYPE_V8HI_SI_COUNT, |
- V8SF_FTYPE_V8SF_V8SF, |
- V8SF_FTYPE_V8SF_V8SI, |
- V4SI_FTYPE_V4SI_V4SI, |
- V4SI_FTYPE_V4SI_V4SI_COUNT, |
- V4SI_FTYPE_V8HI_V8HI, |
- V4SI_FTYPE_V4SF_V4SF, |
- V4SI_FTYPE_V2DF_V2DF, |
- V4SI_FTYPE_V4SI_SI_COUNT, |
- V4HI_FTYPE_V4HI_V4HI, |
- V4HI_FTYPE_V4HI_V4HI_COUNT, |
- V4HI_FTYPE_V8QI_V8QI, |
- V4HI_FTYPE_V2SI_V2SI, |
- V4HI_FTYPE_V4HI_SI_COUNT, |
- V4DF_FTYPE_V4DF_V4DF, |
- V4DF_FTYPE_V4DF_V4DI, |
- V4SF_FTYPE_V4SF_V4SF, |
- V4SF_FTYPE_V4SF_V4SF_SWAP, |
- V4SF_FTYPE_V4SF_V4SI, |
- V4SF_FTYPE_V4SF_V2SI, |
- V4SF_FTYPE_V4SF_V2DF, |
- V4SF_FTYPE_V4SF_DI, |
- V4SF_FTYPE_V4SF_SI, |
- V2DI_FTYPE_V2DI_V2DI, |
- V2DI_FTYPE_V2DI_V2DI_COUNT, |
- V2DI_FTYPE_V16QI_V16QI, |
- V2DI_FTYPE_V4SI_V4SI, |
- V2DI_FTYPE_V2DI_V16QI, |
- V2DI_FTYPE_V2DF_V2DF, |
- V2DI_FTYPE_V2DI_SI_COUNT, |
- V2SI_FTYPE_V2SI_V2SI, |
- V2SI_FTYPE_V2SI_V2SI_COUNT, |
- V2SI_FTYPE_V4HI_V4HI, |
- V2SI_FTYPE_V2SF_V2SF, |
- V2SI_FTYPE_V2SI_SI_COUNT, |
- V2DF_FTYPE_V2DF_V2DF, |
- V2DF_FTYPE_V2DF_V2DF_SWAP, |
- V2DF_FTYPE_V2DF_V4SF, |
- V2DF_FTYPE_V2DF_V2DI, |
- V2DF_FTYPE_V2DF_DI, |
- V2DF_FTYPE_V2DF_SI, |
- V2SF_FTYPE_V2SF_V2SF, |
- V1DI_FTYPE_V1DI_V1DI, |
- V1DI_FTYPE_V1DI_V1DI_COUNT, |
- V1DI_FTYPE_V8QI_V8QI, |
- V1DI_FTYPE_V2SI_V2SI, |
- V1DI_FTYPE_V1DI_SI_COUNT, |
- UINT64_FTYPE_UINT64_UINT64, |
- UINT_FTYPE_UINT_UINT, |
- UINT_FTYPE_UINT_USHORT, |
- UINT_FTYPE_UINT_UCHAR, |
- V8HI_FTYPE_V8HI_INT, |
- V4SI_FTYPE_V4SI_INT, |
- V4HI_FTYPE_V4HI_INT, |
- V8SF_FTYPE_V8SF_INT, |
- V4SI_FTYPE_V8SI_INT, |
- V4SF_FTYPE_V8SF_INT, |
- V2DF_FTYPE_V4DF_INT, |
- V4DF_FTYPE_V4DF_INT, |
- V4SF_FTYPE_V4SF_INT, |
- V2DI_FTYPE_V2DI_INT, |
- V2DI2TI_FTYPE_V2DI_INT, |
- V2DF_FTYPE_V2DF_INT, |
- V16QI_FTYPE_V16QI_V16QI_V16QI, |
- V8SF_FTYPE_V8SF_V8SF_V8SF, |
- V4DF_FTYPE_V4DF_V4DF_V4DF, |
- V4SF_FTYPE_V4SF_V4SF_V4SF, |
- V2DF_FTYPE_V2DF_V2DF_V2DF, |
- V16QI_FTYPE_V16QI_V16QI_INT, |
- V8SI_FTYPE_V8SI_V8SI_INT, |
- V8SI_FTYPE_V8SI_V4SI_INT, |
- V8HI_FTYPE_V8HI_V8HI_INT, |
- V8SF_FTYPE_V8SF_V8SF_INT, |
- V8SF_FTYPE_V8SF_V4SF_INT, |
- V4SI_FTYPE_V4SI_V4SI_INT, |
- V4DF_FTYPE_V4DF_V4DF_INT, |
- V4DF_FTYPE_V4DF_V2DF_INT, |
- V4SF_FTYPE_V4SF_V4SF_INT, |
- V2DI_FTYPE_V2DI_V2DI_INT, |
- V2DI2TI_FTYPE_V2DI_V2DI_INT, |
- V1DI2DI_FTYPE_V1DI_V1DI_INT, |
- V2DF_FTYPE_V2DF_V2DF_INT, |
- V2DI_FTYPE_V2DI_UINT_UINT, |
- V2DI_FTYPE_V2DI_V2DI_UINT_UINT |
-}; |
- |
/* Special builtins with variable number of arguments. */ |
static const struct builtin_description bdesc_special_args[] = |
{ |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED }, |
+ |
/* MMX */ |
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, |
@@ -20975,7 +21564,7 @@ static const struct builtin_description bdesc_special_args[] = |
/* SSE or 3DNow!A */ |
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, |
- { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI }, |
+ { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG }, |
/* SSE2 */ |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, |
@@ -21003,14 +21592,13 @@ static const struct builtin_description bdesc_special_args[] = |
/* AVX */ |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, |
- { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, |
- { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, |
@@ -21032,11 +21620,27 @@ static const struct builtin_description bdesc_special_args[] = |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, |
+ |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID }, |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID }, |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT }, |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT }, |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT }, |
+ { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT }, |
+ |
}; |
/* Builtins with variable number of arguments. */ |
static const struct builtin_description bdesc_args[] = |
{ |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT }, |
+ { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT }, |
+ { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT }, |
+ |
/* MMX */ |
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, |
{ OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, |
@@ -21189,11 +21793,13 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
+ { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
+ |
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
+ { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
+ { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, |
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, |
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, |
@@ -21223,11 +21829,25 @@ static const struct builtin_description bdesc_args[] = |
/* SSE2 */ |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI }, |
+ { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI }, |
+ { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI }, |
+ |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, |
@@ -21286,9 +21906,11 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
+ |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, |
@@ -21333,14 +21955,14 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, |
@@ -21359,7 +21981,7 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, |
@@ -21367,7 +21989,7 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, |
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, |
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, |
{ OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, |
@@ -21440,8 +22062,8 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, |
/* SSSE3. */ |
- { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT }, |
- { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT }, |
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT }, |
+ { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT }, |
/* SSE4.1 */ |
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, |
@@ -21482,7 +22104,7 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, |
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, |
- /* SSE4.1 and SSE5 */ |
+ /* SSE4.1 */ |
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, |
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, |
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, |
@@ -21494,10 +22116,10 @@ static const struct builtin_description bdesc_args[] = |
/* SSE4.2 */ |
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, |
- { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, |
- { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, |
- { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, |
- { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, |
+ { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, |
+ { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, |
+ { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, |
+ { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, |
/* SSE4A */ |
{ OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT }, |
@@ -21630,295 +22252,262 @@ static const struct builtin_description bdesc_args[] = |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF }, |
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, |
-}; |
-/* SSE5 */ |
-enum multi_arg_type { |
- MULTI_ARG_UNKNOWN, |
- MULTI_ARG_3_SF, |
- MULTI_ARG_3_DF, |
- MULTI_ARG_3_DI, |
- MULTI_ARG_3_SI, |
- MULTI_ARG_3_SI_DI, |
- MULTI_ARG_3_HI, |
- MULTI_ARG_3_HI_SI, |
- MULTI_ARG_3_QI, |
- MULTI_ARG_3_PERMPS, |
- MULTI_ARG_3_PERMPD, |
- MULTI_ARG_2_SF, |
- MULTI_ARG_2_DF, |
- MULTI_ARG_2_DI, |
- MULTI_ARG_2_SI, |
- MULTI_ARG_2_HI, |
- MULTI_ARG_2_QI, |
- MULTI_ARG_2_DI_IMM, |
- MULTI_ARG_2_SI_IMM, |
- MULTI_ARG_2_HI_IMM, |
- MULTI_ARG_2_QI_IMM, |
- MULTI_ARG_2_SF_CMP, |
- MULTI_ARG_2_DF_CMP, |
- MULTI_ARG_2_DI_CMP, |
- MULTI_ARG_2_SI_CMP, |
- MULTI_ARG_2_HI_CMP, |
- MULTI_ARG_2_QI_CMP, |
- MULTI_ARG_2_DI_TF, |
- MULTI_ARG_2_SI_TF, |
- MULTI_ARG_2_HI_TF, |
- MULTI_ARG_2_QI_TF, |
- MULTI_ARG_2_SF_TF, |
- MULTI_ARG_2_DF_TF, |
- MULTI_ARG_1_SF, |
- MULTI_ARG_1_DF, |
- MULTI_ARG_1_DI, |
- MULTI_ARG_1_SI, |
- MULTI_ARG_1_HI, |
- MULTI_ARG_1_QI, |
- MULTI_ARG_1_SI_DI, |
- MULTI_ARG_1_HI_DI, |
- MULTI_ARG_1_HI_SI, |
- MULTI_ARG_1_QI_DI, |
- MULTI_ARG_1_QI_SI, |
- MULTI_ARG_1_QI_HI, |
- MULTI_ARG_1_PH2PS, |
- MULTI_ARG_1_PS2PH |
+ { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, |
}; |
+/* FMA4 and XOP. */ |
+#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT |
+#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT |
+#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT |
+#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT |
+#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF |
+#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF |
+#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF |
+#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF |
+#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI |
+#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI |
+#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI |
+#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI |
+#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI |
+#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI |
+#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI |
+#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI |
+#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI |
+#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI |
+#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF |
+#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF |
+#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI |
+#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI |
+#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI |
+#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI |
+#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI |
+#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI |
+#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI |
+#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI |
+#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP |
+#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP |
+#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP |
+#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP |
+#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF |
+#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF |
+#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF |
+#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF |
+#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF |
+#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF |
+#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF |
+#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF |
+#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF |
+#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF |
+#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI |
+#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI |
+#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI |
+#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI |
+#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI |
+#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI |
+#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI |
+#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI |
+#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI |
+#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI |
+ |
static const struct builtin_description bdesc_multi_arg[] = |
{ |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, |
- |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, |
- { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, |
+ |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 }, |
+ { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 }, |
+ |
}; |
/* Set up all the MMX/SSE builtins, even builtins for instructions that are not |
@@ -21929,870 +22518,19 @@ static void |
ix86_init_mmx_sse_builtins (void) |
{ |
const struct builtin_description * d; |
+ enum ix86_builtin_func_type ftype; |
size_t i; |
- tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); |
- tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); |
- tree V1DI_type_node |
- = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); |
- tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); |
- tree V2DI_type_node |
- = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); |
- tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); |
- tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); |
- tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); |
- tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); |
- tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); |
- tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); |
- |
- tree pchar_type_node = build_pointer_type (char_type_node); |
- tree pcchar_type_node |
- = build_pointer_type (build_type_variant (char_type_node, 1, 0)); |
- tree pfloat_type_node = build_pointer_type (float_type_node); |
- tree pcfloat_type_node |
- = build_pointer_type (build_type_variant (float_type_node, 1, 0)); |
- tree pv2sf_type_node = build_pointer_type (V2SF_type_node); |
- tree pcv2sf_type_node |
- = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0)); |
- tree pv2di_type_node = build_pointer_type (V2DI_type_node); |
- tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); |
- |
- /* Comparisons. */ |
- tree int_ftype_v4sf_v4sf |
- = build_function_type_list (integer_type_node, |
- V4SF_type_node, V4SF_type_node, NULL_TREE); |
- tree v4si_ftype_v4sf_v4sf |
- = build_function_type_list (V4SI_type_node, |
- V4SF_type_node, V4SF_type_node, NULL_TREE); |
- /* MMX/SSE/integer conversions. */ |
- tree int_ftype_v4sf |
- = build_function_type_list (integer_type_node, |
- V4SF_type_node, NULL_TREE); |
- tree int64_ftype_v4sf |
- = build_function_type_list (long_long_integer_type_node, |
- V4SF_type_node, NULL_TREE); |
- tree int_ftype_v8qi |
- = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); |
- tree v4sf_ftype_v4sf_int |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, integer_type_node, NULL_TREE); |
- tree v4sf_ftype_v4sf_int64 |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, long_long_integer_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v4sf_v2si |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V2SI_type_node, NULL_TREE); |
- |
- /* Miscellaneous. */ |
- tree v8qi_ftype_v4hi_v4hi |
- = build_function_type_list (V8QI_type_node, |
- V4HI_type_node, V4HI_type_node, NULL_TREE); |
- tree v4hi_ftype_v2si_v2si |
- = build_function_type_list (V4HI_type_node, |
- V2SI_type_node, V2SI_type_node, NULL_TREE); |
- tree v4sf_ftype_v4sf_v4sf_int |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V4SF_type_node, |
- integer_type_node, NULL_TREE); |
- tree v2si_ftype_v4hi_v4hi |
- = build_function_type_list (V2SI_type_node, |
- V4HI_type_node, V4HI_type_node, NULL_TREE); |
- tree v4hi_ftype_v4hi_int |
- = build_function_type_list (V4HI_type_node, |
- V4HI_type_node, integer_type_node, NULL_TREE); |
- tree v2si_ftype_v2si_int |
- = build_function_type_list (V2SI_type_node, |
- V2SI_type_node, integer_type_node, NULL_TREE); |
- tree v1di_ftype_v1di_int |
- = build_function_type_list (V1DI_type_node, |
- V1DI_type_node, integer_type_node, NULL_TREE); |
- |
- tree void_ftype_void |
- = build_function_type (void_type_node, void_list_node); |
- tree void_ftype_unsigned |
- = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); |
- tree void_ftype_unsigned_unsigned |
- = build_function_type_list (void_type_node, unsigned_type_node, |
- unsigned_type_node, NULL_TREE); |
- tree void_ftype_pcvoid_unsigned_unsigned |
- = build_function_type_list (void_type_node, const_ptr_type_node, |
- unsigned_type_node, unsigned_type_node, |
- NULL_TREE); |
- tree unsigned_ftype_void |
- = build_function_type (unsigned_type_node, void_list_node); |
- tree v2si_ftype_v4sf |
- = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); |
- /* Loads/stores. */ |
- tree void_ftype_v8qi_v8qi_pchar |
- = build_function_type_list (void_type_node, |
- V8QI_type_node, V8QI_type_node, |
- pchar_type_node, NULL_TREE); |
- tree v4sf_ftype_pcfloat |
- = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); |
- tree v4sf_ftype_v4sf_pcv2sf |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, pcv2sf_type_node, NULL_TREE); |
- tree void_ftype_pv2sf_v4sf |
- = build_function_type_list (void_type_node, |
- pv2sf_type_node, V4SF_type_node, NULL_TREE); |
- tree void_ftype_pfloat_v4sf |
- = build_function_type_list (void_type_node, |
- pfloat_type_node, V4SF_type_node, NULL_TREE); |
- tree void_ftype_pdi_di |
- = build_function_type_list (void_type_node, |
- pdi_type_node, long_long_unsigned_type_node, |
- NULL_TREE); |
- tree void_ftype_pv2di_v2di |
- = build_function_type_list (void_type_node, |
- pv2di_type_node, V2DI_type_node, NULL_TREE); |
- /* Normal vector unops. */ |
- tree v4sf_ftype_v4sf |
- = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); |
- tree v16qi_ftype_v16qi |
- = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); |
- tree v8hi_ftype_v8hi |
- = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); |
- tree v4si_ftype_v4si |
- = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); |
- tree v8qi_ftype_v8qi |
- = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); |
- tree v4hi_ftype_v4hi |
- = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); |
- |
- /* Normal vector binops. */ |
- tree v4sf_ftype_v4sf_v4sf |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V4SF_type_node, NULL_TREE); |
- tree v8qi_ftype_v8qi_v8qi |
- = build_function_type_list (V8QI_type_node, |
- V8QI_type_node, V8QI_type_node, NULL_TREE); |
- tree v4hi_ftype_v4hi_v4hi |
- = build_function_type_list (V4HI_type_node, |
- V4HI_type_node, V4HI_type_node, NULL_TREE); |
- tree v2si_ftype_v2si_v2si |
- = build_function_type_list (V2SI_type_node, |
- V2SI_type_node, V2SI_type_node, NULL_TREE); |
- tree v1di_ftype_v1di_v1di |
- = build_function_type_list (V1DI_type_node, |
- V1DI_type_node, V1DI_type_node, NULL_TREE); |
- tree v1di_ftype_v1di_v1di_int |
- = build_function_type_list (V1DI_type_node, |
- V1DI_type_node, V1DI_type_node, |
- integer_type_node, NULL_TREE); |
- tree v2si_ftype_v2sf |
- = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); |
- tree v2sf_ftype_v2si |
- = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); |
- tree v2si_ftype_v2si |
- = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); |
- tree v2sf_ftype_v2sf |
- = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); |
- tree v2sf_ftype_v2sf_v2sf |
- = build_function_type_list (V2SF_type_node, |
- V2SF_type_node, V2SF_type_node, NULL_TREE); |
- tree v2si_ftype_v2sf_v2sf |
- = build_function_type_list (V2SI_type_node, |
- V2SF_type_node, V2SF_type_node, NULL_TREE); |
- tree pint_type_node = build_pointer_type (integer_type_node); |
- tree pdouble_type_node = build_pointer_type (double_type_node); |
- tree pcdouble_type_node = build_pointer_type ( |
- build_type_variant (double_type_node, 1, 0)); |
- tree int_ftype_v2df_v2df |
- = build_function_type_list (integer_type_node, |
- V2DF_type_node, V2DF_type_node, NULL_TREE); |
- |
- tree void_ftype_pcvoid |
- = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); |
- tree v4sf_ftype_v4si |
- = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); |
- tree v4si_ftype_v4sf |
- = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); |
- tree v2df_ftype_v4si |
- = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); |
- tree v4si_ftype_v2df |
- = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); |
- tree v4si_ftype_v2df_v2df |
- = build_function_type_list (V4SI_type_node, |
- V2DF_type_node, V2DF_type_node, NULL_TREE); |
- tree v2si_ftype_v2df |
- = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); |
- tree v4sf_ftype_v2df |
- = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); |
- tree v2df_ftype_v2si |
- = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); |
- tree v2df_ftype_v4sf |
- = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); |
- tree int_ftype_v2df |
- = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); |
- tree int64_ftype_v2df |
- = build_function_type_list (long_long_integer_type_node, |
- V2DF_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_int |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, integer_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_int64 |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, long_long_integer_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v4sf_v2df |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V2DF_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_v4sf |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, V4SF_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_v2df_int |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, V2DF_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v2df_ftype_v2df_pcdouble |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, pcdouble_type_node, NULL_TREE); |
- tree void_ftype_pdouble_v2df |
- = build_function_type_list (void_type_node, |
- pdouble_type_node, V2DF_type_node, NULL_TREE); |
- tree void_ftype_pint_int |
- = build_function_type_list (void_type_node, |
- pint_type_node, integer_type_node, NULL_TREE); |
- tree void_ftype_v16qi_v16qi_pchar |
- = build_function_type_list (void_type_node, |
- V16QI_type_node, V16QI_type_node, |
- pchar_type_node, NULL_TREE); |
- tree v2df_ftype_pcdouble |
- = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_v2df |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, V2DF_type_node, NULL_TREE); |
- tree v16qi_ftype_v16qi_v16qi |
- = build_function_type_list (V16QI_type_node, |
- V16QI_type_node, V16QI_type_node, NULL_TREE); |
- tree v8hi_ftype_v8hi_v8hi |
- = build_function_type_list (V8HI_type_node, |
- V8HI_type_node, V8HI_type_node, NULL_TREE); |
- tree v4si_ftype_v4si_v4si |
- = build_function_type_list (V4SI_type_node, |
- V4SI_type_node, V4SI_type_node, NULL_TREE); |
- tree v2di_ftype_v2di_v2di |
- = build_function_type_list (V2DI_type_node, |
- V2DI_type_node, V2DI_type_node, NULL_TREE); |
- tree v2di_ftype_v2df_v2df |
- = build_function_type_list (V2DI_type_node, |
- V2DF_type_node, V2DF_type_node, NULL_TREE); |
- tree v2df_ftype_v2df |
- = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); |
- tree v2di_ftype_v2di_int |
- = build_function_type_list (V2DI_type_node, |
- V2DI_type_node, integer_type_node, NULL_TREE); |
- tree v2di_ftype_v2di_v2di_int |
- = build_function_type_list (V2DI_type_node, V2DI_type_node, |
- V2DI_type_node, integer_type_node, NULL_TREE); |
- tree v4si_ftype_v4si_int |
- = build_function_type_list (V4SI_type_node, |
- V4SI_type_node, integer_type_node, NULL_TREE); |
- tree v8hi_ftype_v8hi_int |
- = build_function_type_list (V8HI_type_node, |
- V8HI_type_node, integer_type_node, NULL_TREE); |
- tree v4si_ftype_v8hi_v8hi |
- = build_function_type_list (V4SI_type_node, |
- V8HI_type_node, V8HI_type_node, NULL_TREE); |
- tree v1di_ftype_v8qi_v8qi |
- = build_function_type_list (V1DI_type_node, |
- V8QI_type_node, V8QI_type_node, NULL_TREE); |
- tree v1di_ftype_v2si_v2si |
- = build_function_type_list (V1DI_type_node, |
- V2SI_type_node, V2SI_type_node, NULL_TREE); |
- tree v2di_ftype_v16qi_v16qi |
- = build_function_type_list (V2DI_type_node, |
- V16QI_type_node, V16QI_type_node, NULL_TREE); |
- tree v2di_ftype_v4si_v4si |
- = build_function_type_list (V2DI_type_node, |
- V4SI_type_node, V4SI_type_node, NULL_TREE); |
- tree int_ftype_v16qi |
- = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); |
- tree v16qi_ftype_pcchar |
- = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); |
- tree void_ftype_pchar_v16qi |
- = build_function_type_list (void_type_node, |
- pchar_type_node, V16QI_type_node, NULL_TREE); |
- |
- tree v2di_ftype_v2di_unsigned_unsigned |
- = build_function_type_list (V2DI_type_node, V2DI_type_node, |
- unsigned_type_node, unsigned_type_node, |
- NULL_TREE); |
- tree v2di_ftype_v2di_v2di_unsigned_unsigned |
- = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, |
- unsigned_type_node, unsigned_type_node, |
- NULL_TREE); |
- tree v2di_ftype_v2di_v16qi |
- = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v2df_ftype_v2df_v2df_v2df |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, V2DF_type_node, |
- V2DF_type_node, NULL_TREE); |
- tree v4sf_ftype_v4sf_v4sf_v4sf |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V4SF_type_node, |
- V4SF_type_node, NULL_TREE); |
- tree v8hi_ftype_v16qi |
- = build_function_type_list (V8HI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v16qi |
- = build_function_type_list (V4SI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v2di_ftype_v16qi |
- = build_function_type_list (V2DI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v8hi |
- = build_function_type_list (V4SI_type_node, V8HI_type_node, |
- NULL_TREE); |
- tree v2di_ftype_v8hi |
- = build_function_type_list (V2DI_type_node, V8HI_type_node, |
- NULL_TREE); |
- tree v2di_ftype_v4si |
- = build_function_type_list (V2DI_type_node, V4SI_type_node, |
- NULL_TREE); |
- tree v2di_ftype_pv2di |
- = build_function_type_list (V2DI_type_node, pv2di_type_node, |
- NULL_TREE); |
- tree v16qi_ftype_v16qi_v16qi_int |
- = build_function_type_list (V16QI_type_node, V16QI_type_node, |
- V16QI_type_node, integer_type_node, |
- NULL_TREE); |
- tree v16qi_ftype_v16qi_v16qi_v16qi |
- = build_function_type_list (V16QI_type_node, V16QI_type_node, |
- V16QI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v8hi_ftype_v8hi_v8hi_int |
- = build_function_type_list (V8HI_type_node, V8HI_type_node, |
- V8HI_type_node, integer_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v4si_v4si_int |
- = build_function_type_list (V4SI_type_node, V4SI_type_node, |
- V4SI_type_node, integer_type_node, |
- NULL_TREE); |
- tree int_ftype_v2di_v2di |
- = build_function_type_list (integer_type_node, |
- V2DI_type_node, V2DI_type_node, |
- NULL_TREE); |
- tree int_ftype_v16qi_int_v16qi_int_int |
- = build_function_type_list (integer_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v16qi_ftype_v16qi_int_v16qi_int_int |
- = build_function_type_list (V16QI_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree int_ftype_v16qi_v16qi_int |
- = build_function_type_list (integer_type_node, |
- V16QI_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- |
- /* SSE5 instructions */ |
- tree v2di_ftype_v2di_v2di_v2di |
- = build_function_type_list (V2DI_type_node, |
- V2DI_type_node, |
- V2DI_type_node, |
- V2DI_type_node, |
- NULL_TREE); |
- |
- tree v4si_ftype_v4si_v4si_v4si |
- = build_function_type_list (V4SI_type_node, |
- V4SI_type_node, |
- V4SI_type_node, |
- V4SI_type_node, |
- NULL_TREE); |
- |
- tree v4si_ftype_v4si_v4si_v2di |
- = build_function_type_list (V4SI_type_node, |
- V4SI_type_node, |
- V4SI_type_node, |
- V2DI_type_node, |
- NULL_TREE); |
- |
- tree v8hi_ftype_v8hi_v8hi_v8hi |
- = build_function_type_list (V8HI_type_node, |
- V8HI_type_node, |
- V8HI_type_node, |
- V8HI_type_node, |
- NULL_TREE); |
- |
- tree v8hi_ftype_v8hi_v8hi_v4si |
- = build_function_type_list (V8HI_type_node, |
- V8HI_type_node, |
- V8HI_type_node, |
- V4SI_type_node, |
- NULL_TREE); |
- |
- tree v2df_ftype_v2df_v2df_v16qi |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, |
- V2DF_type_node, |
- V16QI_type_node, |
- NULL_TREE); |
- |
- tree v4sf_ftype_v4sf_v4sf_v16qi |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, |
- V4SF_type_node, |
- V16QI_type_node, |
- NULL_TREE); |
- |
- tree v2di_ftype_v2di_si |
- = build_function_type_list (V2DI_type_node, |
- V2DI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- |
- tree v4si_ftype_v4si_si |
- = build_function_type_list (V4SI_type_node, |
- V4SI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- |
- tree v8hi_ftype_v8hi_si |
- = build_function_type_list (V8HI_type_node, |
- V8HI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- |
- tree v16qi_ftype_v16qi_si |
- = build_function_type_list (V16QI_type_node, |
- V16QI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v4hi |
- = build_function_type_list (V4SF_type_node, |
- V4HI_type_node, |
- NULL_TREE); |
- |
- tree v4hi_ftype_v4sf |
- = build_function_type_list (V4HI_type_node, |
- V4SF_type_node, |
- NULL_TREE); |
- |
- tree v2di_ftype_v2di |
- = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); |
- |
- tree v16qi_ftype_v8hi_v8hi |
- = build_function_type_list (V16QI_type_node, |
- V8HI_type_node, V8HI_type_node, |
- NULL_TREE); |
- tree v8hi_ftype_v4si_v4si |
- = build_function_type_list (V8HI_type_node, |
- V4SI_type_node, V4SI_type_node, |
- NULL_TREE); |
- tree v8hi_ftype_v16qi_v16qi |
- = build_function_type_list (V8HI_type_node, |
- V16QI_type_node, V16QI_type_node, |
- NULL_TREE); |
- tree v4hi_ftype_v8qi_v8qi |
- = build_function_type_list (V4HI_type_node, |
- V8QI_type_node, V8QI_type_node, |
- NULL_TREE); |
- tree unsigned_ftype_unsigned_uchar |
- = build_function_type_list (unsigned_type_node, |
- unsigned_type_node, |
- unsigned_char_type_node, |
- NULL_TREE); |
- tree unsigned_ftype_unsigned_ushort |
- = build_function_type_list (unsigned_type_node, |
- unsigned_type_node, |
- short_unsigned_type_node, |
- NULL_TREE); |
- tree unsigned_ftype_unsigned_unsigned |
- = build_function_type_list (unsigned_type_node, |
- unsigned_type_node, |
- unsigned_type_node, |
- NULL_TREE); |
- tree uint64_ftype_uint64_uint64 |
- = build_function_type_list (long_long_unsigned_type_node, |
- long_long_unsigned_type_node, |
- long_long_unsigned_type_node, |
- NULL_TREE); |
- tree float_ftype_float |
- = build_function_type_list (float_type_node, |
- float_type_node, |
- NULL_TREE); |
- |
- /* AVX builtins */ |
- tree V32QI_type_node = build_vector_type_for_mode (char_type_node, |
- V32QImode); |
- tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node, |
- V8SImode); |
- tree V8SF_type_node = build_vector_type_for_mode (float_type_node, |
- V8SFmode); |
- tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, |
- V4DImode); |
- tree V4DF_type_node = build_vector_type_for_mode (double_type_node, |
- V4DFmode); |
- tree v8sf_ftype_v8sf |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree v8si_ftype_v8sf |
- = build_function_type_list (V8SI_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8si |
- = build_function_type_list (V8SF_type_node, |
- V8SI_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v4df |
- = build_function_type_list (V4SI_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4si |
- = build_function_type_list (V4DF_type_node, |
- V4SI_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4sf |
- = build_function_type_list (V4DF_type_node, |
- V4SF_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v4df |
- = build_function_type_list (V4SF_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_v8sf |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, V8SF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_v4df |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, V4DF_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_int |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, integer_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v8si_int |
- = build_function_type_list (V4SI_type_node, |
- V8SI_type_node, integer_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_int |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, integer_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v8sf_int |
- = build_function_type_list (V4SF_type_node, |
- V8SF_type_node, integer_type_node, |
- NULL_TREE); |
- tree v2df_ftype_v4df_int |
- = build_function_type_list (V2DF_type_node, |
- V4DF_type_node, integer_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_v8sf_int |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, V8SF_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_v8sf_v8sf |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, V8SF_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_v4df_v4df |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, V4DF_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree v8si_ftype_v8si_v8si_int |
- = build_function_type_list (V8SI_type_node, |
- V8SI_type_node, V8SI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_v4df_int |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, V4DF_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_pcfloat |
- = build_function_type_list (V8SF_type_node, |
- pcfloat_type_node, |
- NULL_TREE); |
- tree v4df_ftype_pcdouble |
- = build_function_type_list (V4DF_type_node, |
- pcdouble_type_node, |
- NULL_TREE); |
- tree pcv4sf_type_node |
- = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0)); |
- tree pcv2df_type_node |
- = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0)); |
- tree v8sf_ftype_pcv4sf |
- = build_function_type_list (V8SF_type_node, |
- pcv4sf_type_node, |
- NULL_TREE); |
- tree v4df_ftype_pcv2df |
- = build_function_type_list (V4DF_type_node, |
- pcv2df_type_node, |
- NULL_TREE); |
- tree v32qi_ftype_pcchar |
- = build_function_type_list (V32QI_type_node, |
- pcchar_type_node, |
- NULL_TREE); |
- tree void_ftype_pchar_v32qi |
- = build_function_type_list (void_type_node, |
- pchar_type_node, V32QI_type_node, |
- NULL_TREE); |
- tree v8si_ftype_v8si_v4si_int |
- = build_function_type_list (V8SI_type_node, |
- V8SI_type_node, V4SI_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree pv4di_type_node = build_pointer_type (V4DI_type_node); |
- tree void_ftype_pv4di_v4di |
- = build_function_type_list (void_type_node, |
- pv4di_type_node, V4DI_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_v4sf_int |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, V4SF_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_v2df_int |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, V2DF_type_node, |
- integer_type_node, |
- NULL_TREE); |
- tree void_ftype_pfloat_v8sf |
- = build_function_type_list (void_type_node, |
- pfloat_type_node, V8SF_type_node, |
- NULL_TREE); |
- tree void_ftype_pdouble_v4df |
- = build_function_type_list (void_type_node, |
- pdouble_type_node, V4DF_type_node, |
- NULL_TREE); |
- tree pv8sf_type_node = build_pointer_type (V8SF_type_node); |
- tree pv4sf_type_node = build_pointer_type (V4SF_type_node); |
- tree pv4df_type_node = build_pointer_type (V4DF_type_node); |
- tree pv2df_type_node = build_pointer_type (V2DF_type_node); |
- tree pcv8sf_type_node |
- = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0)); |
- tree pcv4df_type_node |
- = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0)); |
- tree v8sf_ftype_pcv8sf_v8sf |
- = build_function_type_list (V8SF_type_node, |
- pcv8sf_type_node, V8SF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_pcv4df_v4df |
- = build_function_type_list (V4DF_type_node, |
- pcv4df_type_node, V4DF_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_pcv4sf_v4sf |
- = build_function_type_list (V4SF_type_node, |
- pcv4sf_type_node, V4SF_type_node, |
- NULL_TREE); |
- tree v2df_ftype_pcv2df_v2df |
- = build_function_type_list (V2DF_type_node, |
- pcv2df_type_node, V2DF_type_node, |
- NULL_TREE); |
- tree void_ftype_pv8sf_v8sf_v8sf |
- = build_function_type_list (void_type_node, |
- pv8sf_type_node, V8SF_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree void_ftype_pv4df_v4df_v4df |
- = build_function_type_list (void_type_node, |
- pv4df_type_node, V4DF_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree void_ftype_pv4sf_v4sf_v4sf |
- = build_function_type_list (void_type_node, |
- pv4sf_type_node, V4SF_type_node, |
- V4SF_type_node, |
- NULL_TREE); |
- tree void_ftype_pv2df_v2df_v2df |
- = build_function_type_list (void_type_node, |
- pv2df_type_node, V2DF_type_node, |
- V2DF_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v2df |
- = build_function_type_list (V4DF_type_node, |
- V2DF_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v4sf |
- = build_function_type_list (V8SF_type_node, |
- V4SF_type_node, |
- NULL_TREE); |
- tree v8si_ftype_v4si |
- = build_function_type_list (V8SI_type_node, |
- V4SI_type_node, |
- NULL_TREE); |
- tree v2df_ftype_v4df |
- = build_function_type_list (V2DF_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v8sf |
- = build_function_type_list (V4SF_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree v4si_ftype_v8si |
- = build_function_type_list (V4SI_type_node, |
- V8SI_type_node, |
- NULL_TREE); |
- tree int_ftype_v4df |
- = build_function_type_list (integer_type_node, |
- V4DF_type_node, |
- NULL_TREE); |
- tree int_ftype_v8sf |
- = build_function_type_list (integer_type_node, |
- V8SF_type_node, |
- NULL_TREE); |
- tree int_ftype_v8sf_v8sf |
- = build_function_type_list (integer_type_node, |
- V8SF_type_node, V8SF_type_node, |
- NULL_TREE); |
- tree int_ftype_v4di_v4di |
- = build_function_type_list (integer_type_node, |
- V4DI_type_node, V4DI_type_node, |
- NULL_TREE); |
- tree int_ftype_v4df_v4df |
- = build_function_type_list (integer_type_node, |
- V4DF_type_node, V4DF_type_node, |
- NULL_TREE); |
- tree v8sf_ftype_v8sf_v8si |
- = build_function_type_list (V8SF_type_node, |
- V8SF_type_node, V8SI_type_node, |
- NULL_TREE); |
- tree v4df_ftype_v4df_v4di |
- = build_function_type_list (V4DF_type_node, |
- V4DF_type_node, V4DI_type_node, |
- NULL_TREE); |
- tree v4sf_ftype_v4sf_v4si |
- = build_function_type_list (V4SF_type_node, |
- V4SF_type_node, V4SI_type_node, NULL_TREE); |
- tree v2df_ftype_v2df_v2di |
- = build_function_type_list (V2DF_type_node, |
- V2DF_type_node, V2DI_type_node, NULL_TREE); |
- |
- tree ftype; |
- |
/* Add all special builtins with variable number of operands. */ |
for (i = 0, d = bdesc_special_args; |
i < ARRAY_SIZE (bdesc_special_args); |
i++, d++) |
{ |
- tree type; |
- |
if (d->name == 0) |
continue; |
- switch ((enum ix86_special_builtin_type) d->flag) |
- { |
- case VOID_FTYPE_VOID: |
- type = void_ftype_void; |
- break; |
- case V32QI_FTYPE_PCCHAR: |
- type = v32qi_ftype_pcchar; |
- break; |
- case V16QI_FTYPE_PCCHAR: |
- type = v16qi_ftype_pcchar; |
- break; |
- case V8SF_FTYPE_PCV4SF: |
- type = v8sf_ftype_pcv4sf; |
- break; |
- case V8SF_FTYPE_PCFLOAT: |
- type = v8sf_ftype_pcfloat; |
- break; |
- case V4DF_FTYPE_PCV2DF: |
- type = v4df_ftype_pcv2df; |
- break; |
- case V4DF_FTYPE_PCDOUBLE: |
- type = v4df_ftype_pcdouble; |
- break; |
- case V4SF_FTYPE_PCFLOAT: |
- type = v4sf_ftype_pcfloat; |
- break; |
- case V2DI_FTYPE_PV2DI: |
- type = v2di_ftype_pv2di; |
- break; |
- case V2DF_FTYPE_PCDOUBLE: |
- type = v2df_ftype_pcdouble; |
- break; |
- case V8SF_FTYPE_PCV8SF_V8SF: |
- type = v8sf_ftype_pcv8sf_v8sf; |
- break; |
- case V4DF_FTYPE_PCV4DF_V4DF: |
- type = v4df_ftype_pcv4df_v4df; |
- break; |
- case V4SF_FTYPE_V4SF_PCV2SF: |
- type = v4sf_ftype_v4sf_pcv2sf; |
- break; |
- case V4SF_FTYPE_PCV4SF_V4SF: |
- type = v4sf_ftype_pcv4sf_v4sf; |
- break; |
- case V2DF_FTYPE_V2DF_PCDOUBLE: |
- type = v2df_ftype_v2df_pcdouble; |
- break; |
- case V2DF_FTYPE_PCV2DF_V2DF: |
- type = v2df_ftype_pcv2df_v2df; |
- break; |
- case VOID_FTYPE_PV2SF_V4SF: |
- type = void_ftype_pv2sf_v4sf; |
- break; |
- case VOID_FTYPE_PV4DI_V4DI: |
- type = void_ftype_pv4di_v4di; |
- break; |
- case VOID_FTYPE_PV2DI_V2DI: |
- type = void_ftype_pv2di_v2di; |
- break; |
- case VOID_FTYPE_PCHAR_V32QI: |
- type = void_ftype_pchar_v32qi; |
- break; |
- case VOID_FTYPE_PCHAR_V16QI: |
- type = void_ftype_pchar_v16qi; |
- break; |
- case VOID_FTYPE_PFLOAT_V8SF: |
- type = void_ftype_pfloat_v8sf; |
- break; |
- case VOID_FTYPE_PFLOAT_V4SF: |
- type = void_ftype_pfloat_v4sf; |
- break; |
- case VOID_FTYPE_PDOUBLE_V4DF: |
- type = void_ftype_pdouble_v4df; |
- break; |
- case VOID_FTYPE_PDOUBLE_V2DF: |
- type = void_ftype_pdouble_v2df; |
- break; |
- case VOID_FTYPE_PDI_DI: |
- type = void_ftype_pdi_di; |
- break; |
- case VOID_FTYPE_PINT_INT: |
- type = void_ftype_pint_int; |
- break; |
- case VOID_FTYPE_PV8SF_V8SF_V8SF: |
- type = void_ftype_pv8sf_v8sf_v8sf; |
- break; |
- case VOID_FTYPE_PV4DF_V4DF_V4DF: |
- type = void_ftype_pv4df_v4df_v4df; |
- break; |
- case VOID_FTYPE_PV4SF_V4SF_V4SF: |
- type = void_ftype_pv4sf_v4sf_v4sf; |
- break; |
- case VOID_FTYPE_PV2DF_V2DF_V2DF: |
- type = void_ftype_pv2df_v2df_v2df; |
- break; |
- default: |
- gcc_unreachable (); |
- } |
- |
- def_builtin (d->mask, d->name, type, d->code); |
+ ftype = (enum ix86_builtin_func_type) d->flag; |
+ def_builtin (d->mask, d->name, ftype, d->code); |
} |
/* Add all builtins with variable number of operands. */ |
@@ -22800,444 +22538,11 @@ ix86_init_mmx_sse_builtins (void) |
i < ARRAY_SIZE (bdesc_args); |
i++, d++) |
{ |
- tree type; |
- |
if (d->name == 0) |
continue; |
- switch ((enum ix86_builtin_type) d->flag) |
- { |
- case FLOAT_FTYPE_FLOAT: |
- type = float_ftype_float; |
- break; |
- case INT_FTYPE_V8SF_V8SF_PTEST: |
- type = int_ftype_v8sf_v8sf; |
- break; |
- case INT_FTYPE_V4DI_V4DI_PTEST: |
- type = int_ftype_v4di_v4di; |
- break; |
- case INT_FTYPE_V4DF_V4DF_PTEST: |
- type = int_ftype_v4df_v4df; |
- break; |
- case INT_FTYPE_V4SF_V4SF_PTEST: |
- type = int_ftype_v4sf_v4sf; |
- break; |
- case INT_FTYPE_V2DI_V2DI_PTEST: |
- type = int_ftype_v2di_v2di; |
- break; |
- case INT_FTYPE_V2DF_V2DF_PTEST: |
- type = int_ftype_v2df_v2df; |
- break; |
- case INT64_FTYPE_V4SF: |
- type = int64_ftype_v4sf; |
- break; |
- case INT64_FTYPE_V2DF: |
- type = int64_ftype_v2df; |
- break; |
- case INT_FTYPE_V16QI: |
- type = int_ftype_v16qi; |
- break; |
- case INT_FTYPE_V8QI: |
- type = int_ftype_v8qi; |
- break; |
- case INT_FTYPE_V8SF: |
- type = int_ftype_v8sf; |
- break; |
- case INT_FTYPE_V4DF: |
- type = int_ftype_v4df; |
- break; |
- case INT_FTYPE_V4SF: |
- type = int_ftype_v4sf; |
- break; |
- case INT_FTYPE_V2DF: |
- type = int_ftype_v2df; |
- break; |
- case V16QI_FTYPE_V16QI: |
- type = v16qi_ftype_v16qi; |
- break; |
- case V8SI_FTYPE_V8SF: |
- type = v8si_ftype_v8sf; |
- break; |
- case V8SI_FTYPE_V4SI: |
- type = v8si_ftype_v4si; |
- break; |
- case V8HI_FTYPE_V8HI: |
- type = v8hi_ftype_v8hi; |
- break; |
- case V8HI_FTYPE_V16QI: |
- type = v8hi_ftype_v16qi; |
- break; |
- case V8QI_FTYPE_V8QI: |
- type = v8qi_ftype_v8qi; |
- break; |
- case V8SF_FTYPE_V8SF: |
- type = v8sf_ftype_v8sf; |
- break; |
- case V8SF_FTYPE_V8SI: |
- type = v8sf_ftype_v8si; |
- break; |
- case V8SF_FTYPE_V4SF: |
- type = v8sf_ftype_v4sf; |
- break; |
- case V4SI_FTYPE_V4DF: |
- type = v4si_ftype_v4df; |
- break; |
- case V4SI_FTYPE_V4SI: |
- type = v4si_ftype_v4si; |
- break; |
- case V4SI_FTYPE_V16QI: |
- type = v4si_ftype_v16qi; |
- break; |
- case V4SI_FTYPE_V8SI: |
- type = v4si_ftype_v8si; |
- break; |
- case V4SI_FTYPE_V8HI: |
- type = v4si_ftype_v8hi; |
- break; |
- case V4SI_FTYPE_V4SF: |
- type = v4si_ftype_v4sf; |
- break; |
- case V4SI_FTYPE_V2DF: |
- type = v4si_ftype_v2df; |
- break; |
- case V4HI_FTYPE_V4HI: |
- type = v4hi_ftype_v4hi; |
- break; |
- case V4DF_FTYPE_V4DF: |
- type = v4df_ftype_v4df; |
- break; |
- case V4DF_FTYPE_V4SI: |
- type = v4df_ftype_v4si; |
- break; |
- case V4DF_FTYPE_V4SF: |
- type = v4df_ftype_v4sf; |
- break; |
- case V4DF_FTYPE_V2DF: |
- type = v4df_ftype_v2df; |
- break; |
- case V4SF_FTYPE_V4SF: |
- case V4SF_FTYPE_V4SF_VEC_MERGE: |
- type = v4sf_ftype_v4sf; |
- break; |
- case V4SF_FTYPE_V8SF: |
- type = v4sf_ftype_v8sf; |
- break; |
- case V4SF_FTYPE_V4SI: |
- type = v4sf_ftype_v4si; |
- break; |
- case V4SF_FTYPE_V4DF: |
- type = v4sf_ftype_v4df; |
- break; |
- case V4SF_FTYPE_V2DF: |
- type = v4sf_ftype_v2df; |
- break; |
- case V2DI_FTYPE_V2DI: |
- type = v2di_ftype_v2di; |
- break; |
- case V2DI_FTYPE_V16QI: |
- type = v2di_ftype_v16qi; |
- break; |
- case V2DI_FTYPE_V8HI: |
- type = v2di_ftype_v8hi; |
- break; |
- case V2DI_FTYPE_V4SI: |
- type = v2di_ftype_v4si; |
- break; |
- case V2SI_FTYPE_V2SI: |
- type = v2si_ftype_v2si; |
- break; |
- case V2SI_FTYPE_V4SF: |
- type = v2si_ftype_v4sf; |
- break; |
- case V2SI_FTYPE_V2DF: |
- type = v2si_ftype_v2df; |
- break; |
- case V2SI_FTYPE_V2SF: |
- type = v2si_ftype_v2sf; |
- break; |
- case V2DF_FTYPE_V4DF: |
- type = v2df_ftype_v4df; |
- break; |
- case V2DF_FTYPE_V4SF: |
- type = v2df_ftype_v4sf; |
- break; |
- case V2DF_FTYPE_V2DF: |
- case V2DF_FTYPE_V2DF_VEC_MERGE: |
- type = v2df_ftype_v2df; |
- break; |
- case V2DF_FTYPE_V2SI: |
- type = v2df_ftype_v2si; |
- break; |
- case V2DF_FTYPE_V4SI: |
- type = v2df_ftype_v4si; |
- break; |
- case V2SF_FTYPE_V2SF: |
- type = v2sf_ftype_v2sf; |
- break; |
- case V2SF_FTYPE_V2SI: |
- type = v2sf_ftype_v2si; |
- break; |
- case V16QI_FTYPE_V16QI_V16QI: |
- type = v16qi_ftype_v16qi_v16qi; |
- break; |
- case V16QI_FTYPE_V8HI_V8HI: |
- type = v16qi_ftype_v8hi_v8hi; |
- break; |
- case V8QI_FTYPE_V8QI_V8QI: |
- type = v8qi_ftype_v8qi_v8qi; |
- break; |
- case V8QI_FTYPE_V4HI_V4HI: |
- type = v8qi_ftype_v4hi_v4hi; |
- break; |
- case V8HI_FTYPE_V8HI_V8HI: |
- case V8HI_FTYPE_V8HI_V8HI_COUNT: |
- type = v8hi_ftype_v8hi_v8hi; |
- break; |
- case V8HI_FTYPE_V16QI_V16QI: |
- type = v8hi_ftype_v16qi_v16qi; |
- break; |
- case V8HI_FTYPE_V4SI_V4SI: |
- type = v8hi_ftype_v4si_v4si; |
- break; |
- case V8HI_FTYPE_V8HI_SI_COUNT: |
- type = v8hi_ftype_v8hi_int; |
- break; |
- case V8SF_FTYPE_V8SF_V8SF: |
- type = v8sf_ftype_v8sf_v8sf; |
- break; |
- case V8SF_FTYPE_V8SF_V8SI: |
- type = v8sf_ftype_v8sf_v8si; |
- break; |
- case V4SI_FTYPE_V4SI_V4SI: |
- case V4SI_FTYPE_V4SI_V4SI_COUNT: |
- type = v4si_ftype_v4si_v4si; |
- break; |
- case V4SI_FTYPE_V8HI_V8HI: |
- type = v4si_ftype_v8hi_v8hi; |
- break; |
- case V4SI_FTYPE_V4SF_V4SF: |
- type = v4si_ftype_v4sf_v4sf; |
- break; |
- case V4SI_FTYPE_V2DF_V2DF: |
- type = v4si_ftype_v2df_v2df; |
- break; |
- case V4SI_FTYPE_V4SI_SI_COUNT: |
- type = v4si_ftype_v4si_int; |
- break; |
- case V4HI_FTYPE_V4HI_V4HI: |
- case V4HI_FTYPE_V4HI_V4HI_COUNT: |
- type = v4hi_ftype_v4hi_v4hi; |
- break; |
- case V4HI_FTYPE_V8QI_V8QI: |
- type = v4hi_ftype_v8qi_v8qi; |
- break; |
- case V4HI_FTYPE_V2SI_V2SI: |
- type = v4hi_ftype_v2si_v2si; |
- break; |
- case V4HI_FTYPE_V4HI_SI_COUNT: |
- type = v4hi_ftype_v4hi_int; |
- break; |
- case V4DF_FTYPE_V4DF_V4DF: |
- type = v4df_ftype_v4df_v4df; |
- break; |
- case V4DF_FTYPE_V4DF_V4DI: |
- type = v4df_ftype_v4df_v4di; |
- break; |
- case V4SF_FTYPE_V4SF_V4SF: |
- case V4SF_FTYPE_V4SF_V4SF_SWAP: |
- type = v4sf_ftype_v4sf_v4sf; |
- break; |
- case V4SF_FTYPE_V4SF_V4SI: |
- type = v4sf_ftype_v4sf_v4si; |
- break; |
- case V4SF_FTYPE_V4SF_V2SI: |
- type = v4sf_ftype_v4sf_v2si; |
- break; |
- case V4SF_FTYPE_V4SF_V2DF: |
- type = v4sf_ftype_v4sf_v2df; |
- break; |
- case V4SF_FTYPE_V4SF_DI: |
- type = v4sf_ftype_v4sf_int64; |
- break; |
- case V4SF_FTYPE_V4SF_SI: |
- type = v4sf_ftype_v4sf_int; |
- break; |
- case V2DI_FTYPE_V2DI_V2DI: |
- case V2DI_FTYPE_V2DI_V2DI_COUNT: |
- type = v2di_ftype_v2di_v2di; |
- break; |
- case V2DI_FTYPE_V16QI_V16QI: |
- type = v2di_ftype_v16qi_v16qi; |
- break; |
- case V2DI_FTYPE_V4SI_V4SI: |
- type = v2di_ftype_v4si_v4si; |
- break; |
- case V2DI_FTYPE_V2DI_V16QI: |
- type = v2di_ftype_v2di_v16qi; |
- break; |
- case V2DI_FTYPE_V2DF_V2DF: |
- type = v2di_ftype_v2df_v2df; |
- break; |
- case V2DI_FTYPE_V2DI_SI_COUNT: |
- type = v2di_ftype_v2di_int; |
- break; |
- case V2SI_FTYPE_V2SI_V2SI: |
- case V2SI_FTYPE_V2SI_V2SI_COUNT: |
- type = v2si_ftype_v2si_v2si; |
- break; |
- case V2SI_FTYPE_V4HI_V4HI: |
- type = v2si_ftype_v4hi_v4hi; |
- break; |
- case V2SI_FTYPE_V2SF_V2SF: |
- type = v2si_ftype_v2sf_v2sf; |
- break; |
- case V2SI_FTYPE_V2SI_SI_COUNT: |
- type = v2si_ftype_v2si_int; |
- break; |
- case V2DF_FTYPE_V2DF_V2DF: |
- case V2DF_FTYPE_V2DF_V2DF_SWAP: |
- type = v2df_ftype_v2df_v2df; |
- break; |
- case V2DF_FTYPE_V2DF_V4SF: |
- type = v2df_ftype_v2df_v4sf; |
- break; |
- case V2DF_FTYPE_V2DF_V2DI: |
- type = v2df_ftype_v2df_v2di; |
- break; |
- case V2DF_FTYPE_V2DF_DI: |
- type = v2df_ftype_v2df_int64; |
- break; |
- case V2DF_FTYPE_V2DF_SI: |
- type = v2df_ftype_v2df_int; |
- break; |
- case V2SF_FTYPE_V2SF_V2SF: |
- type = v2sf_ftype_v2sf_v2sf; |
- break; |
- case V1DI_FTYPE_V1DI_V1DI: |
- case V1DI_FTYPE_V1DI_V1DI_COUNT: |
- type = v1di_ftype_v1di_v1di; |
- break; |
- case V1DI_FTYPE_V8QI_V8QI: |
- type = v1di_ftype_v8qi_v8qi; |
- break; |
- case V1DI_FTYPE_V2SI_V2SI: |
- type = v1di_ftype_v2si_v2si; |
- break; |
- case V1DI_FTYPE_V1DI_SI_COUNT: |
- type = v1di_ftype_v1di_int; |
- break; |
- case UINT64_FTYPE_UINT64_UINT64: |
- type = uint64_ftype_uint64_uint64; |
- break; |
- case UINT_FTYPE_UINT_UINT: |
- type = unsigned_ftype_unsigned_unsigned; |
- break; |
- case UINT_FTYPE_UINT_USHORT: |
- type = unsigned_ftype_unsigned_ushort; |
- break; |
- case UINT_FTYPE_UINT_UCHAR: |
- type = unsigned_ftype_unsigned_uchar; |
- break; |
- case V8HI_FTYPE_V8HI_INT: |
- type = v8hi_ftype_v8hi_int; |
- break; |
- case V8SF_FTYPE_V8SF_INT: |
- type = v8sf_ftype_v8sf_int; |
- break; |
- case V4SI_FTYPE_V4SI_INT: |
- type = v4si_ftype_v4si_int; |
- break; |
- case V4SI_FTYPE_V8SI_INT: |
- type = v4si_ftype_v8si_int; |
- break; |
- case V4HI_FTYPE_V4HI_INT: |
- type = v4hi_ftype_v4hi_int; |
- break; |
- case V4DF_FTYPE_V4DF_INT: |
- type = v4df_ftype_v4df_int; |
- break; |
- case V4SF_FTYPE_V4SF_INT: |
- type = v4sf_ftype_v4sf_int; |
- break; |
- case V4SF_FTYPE_V8SF_INT: |
- type = v4sf_ftype_v8sf_int; |
- break; |
- case V2DI_FTYPE_V2DI_INT: |
- case V2DI2TI_FTYPE_V2DI_INT: |
- type = v2di_ftype_v2di_int; |
- break; |
- case V2DF_FTYPE_V2DF_INT: |
- type = v2df_ftype_v2df_int; |
- break; |
- case V2DF_FTYPE_V4DF_INT: |
- type = v2df_ftype_v4df_int; |
- break; |
- case V16QI_FTYPE_V16QI_V16QI_V16QI: |
- type = v16qi_ftype_v16qi_v16qi_v16qi; |
- break; |
- case V8SF_FTYPE_V8SF_V8SF_V8SF: |
- type = v8sf_ftype_v8sf_v8sf_v8sf; |
- break; |
- case V4DF_FTYPE_V4DF_V4DF_V4DF: |
- type = v4df_ftype_v4df_v4df_v4df; |
- break; |
- case V4SF_FTYPE_V4SF_V4SF_V4SF: |
- type = v4sf_ftype_v4sf_v4sf_v4sf; |
- break; |
- case V2DF_FTYPE_V2DF_V2DF_V2DF: |
- type = v2df_ftype_v2df_v2df_v2df; |
- break; |
- case V16QI_FTYPE_V16QI_V16QI_INT: |
- type = v16qi_ftype_v16qi_v16qi_int; |
- break; |
- case V8SI_FTYPE_V8SI_V8SI_INT: |
- type = v8si_ftype_v8si_v8si_int; |
- break; |
- case V8SI_FTYPE_V8SI_V4SI_INT: |
- type = v8si_ftype_v8si_v4si_int; |
- break; |
- case V8HI_FTYPE_V8HI_V8HI_INT: |
- type = v8hi_ftype_v8hi_v8hi_int; |
- break; |
- case V8SF_FTYPE_V8SF_V8SF_INT: |
- type = v8sf_ftype_v8sf_v8sf_int; |
- break; |
- case V8SF_FTYPE_V8SF_V4SF_INT: |
- type = v8sf_ftype_v8sf_v4sf_int; |
- break; |
- case V4SI_FTYPE_V4SI_V4SI_INT: |
- type = v4si_ftype_v4si_v4si_int; |
- break; |
- case V4DF_FTYPE_V4DF_V4DF_INT: |
- type = v4df_ftype_v4df_v4df_int; |
- break; |
- case V4DF_FTYPE_V4DF_V2DF_INT: |
- type = v4df_ftype_v4df_v2df_int; |
- break; |
- case V4SF_FTYPE_V4SF_V4SF_INT: |
- type = v4sf_ftype_v4sf_v4sf_int; |
- break; |
- case V2DI_FTYPE_V2DI_V2DI_INT: |
- case V2DI2TI_FTYPE_V2DI_V2DI_INT: |
- type = v2di_ftype_v2di_v2di_int; |
- break; |
- case V2DF_FTYPE_V2DF_V2DF_INT: |
- type = v2df_ftype_v2df_v2df_int; |
- break; |
- case V2DI_FTYPE_V2DI_UINT_UINT: |
- type = v2di_ftype_v2di_unsigned_unsigned; |
- break; |
- case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: |
- type = v2di_ftype_v2di_v2di_unsigned_unsigned; |
- break; |
- case V1DI2DI_FTYPE_V1DI_V1DI_INT: |
- type = v1di_ftype_v1di_v1di_int; |
- break; |
- default: |
- gcc_unreachable (); |
- } |
- |
- def_builtin_const (d->mask, d->name, type, d->code); |
+ ftype = (enum ix86_builtin_func_type) d->flag; |
+ def_builtin_const (d->mask, d->name, ftype, d->code); |
} |
/* pcmpestr[im] insns. */ |
@@ -23246,9 +22551,9 @@ ix86_init_mmx_sse_builtins (void) |
i++, d++) |
{ |
if (d->code == IX86_BUILTIN_PCMPESTRM128) |
- ftype = v16qi_ftype_v16qi_int_v16qi_int_int; |
+ ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; |
else |
- ftype = int_ftype_v16qi_int_v16qi_int_int; |
+ ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; |
def_builtin_const (d->mask, d->name, ftype, d->code); |
} |
@@ -23258,197 +22563,129 @@ ix86_init_mmx_sse_builtins (void) |
i++, d++) |
{ |
if (d->code == IX86_BUILTIN_PCMPISTRM128) |
- ftype = v16qi_ftype_v16qi_v16qi_int; |
+ ftype = V16QI_FTYPE_V16QI_V16QI_INT; |
else |
- ftype = int_ftype_v16qi_v16qi_int; |
+ ftype = INT_FTYPE_V16QI_V16QI_INT; |
def_builtin_const (d->mask, d->name, ftype, d->code); |
} |
/* comi/ucomi insns. */ |
for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) |
- if (d->mask == OPTION_MASK_ISA_SSE2) |
- def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code); |
- else |
- def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); |
+ { |
+ if (d->mask == OPTION_MASK_ISA_SSE2) |
+ ftype = INT_FTYPE_V2DF_V2DF; |
+ else |
+ ftype = INT_FTYPE_V4SF_V4SF; |
+ def_builtin_const (d->mask, d->name, ftype, d->code); |
+ } |
/* SSE */ |
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); |
- def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); |
+ def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", |
+ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); |
+ def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", |
+ UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); |
/* SSE or 3DNow!A */ |
- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); |
+ def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, |
+ "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, |
+ IX86_BUILTIN_MASKMOVQ); |
/* SSE2 */ |
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); |
+ def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", |
+ VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); |
- def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); |
- x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); |
+ def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", |
+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); |
+ x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", |
+ VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); |
/* SSE3. */ |
- def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR); |
- def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); |
+ def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", |
+ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); |
+ def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", |
+ VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); |
/* AES */ |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); |
- def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", |
+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", |
+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", |
+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", |
+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", |
+ V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); |
+ def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", |
+ V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); |
/* PCLMUL */ |
- def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); |
+ def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", |
+ V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); |
- /* AVX */ |
- def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void, |
- TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER); |
- |
- /* Access to the vec_init patterns. */ |
- ftype = build_function_type_list (V2SI_type_node, integer_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI); |
- |
- ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, |
- short_integer_type_node, |
- short_integer_type_node, |
- short_integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI); |
- |
- ftype = build_function_type_list (V8QI_type_node, char_type_node, |
- char_type_node, char_type_node, |
- char_type_node, char_type_node, |
- char_type_node, char_type_node, |
- char_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI); |
+ /* MMX access to the vec_init patterns. */ |
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", |
+ V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); |
- /* Access to the vec_extract patterns. */ |
- ftype = build_function_type_list (double_type_node, V2DF_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF); |
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", |
+ V4HI_FTYPE_HI_HI_HI_HI, |
+ IX86_BUILTIN_VEC_INIT_V4HI); |
- ftype = build_function_type_list (long_long_integer_type_node, |
- V2DI_type_node, integer_type_node, |
- NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI); |
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", |
+ V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, |
+ IX86_BUILTIN_VEC_INIT_V8QI); |
- ftype = build_function_type_list (float_type_node, V4SF_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF); |
+ /* Access to the vec_extract patterns. */ |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", |
+ DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", |
+ DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", |
+ FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", |
+ SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", |
+ HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); |
+ |
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, |
+ "__builtin_ia32_vec_ext_v4hi", |
+ HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); |
+ |
+ def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", |
+ SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); |
+ |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", |
+ QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); |
- ftype = build_function_type_list (intSI_type_node, V4SI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI); |
+ /* Access to the vec_set patterns. */ |
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, |
+ "__builtin_ia32_vec_set_v2di", |
+ V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); |
- ftype = build_function_type_list (intHI_type_node, V8HI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", |
+ V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); |
- ftype = build_function_type_list (intHI_type_node, V4HI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", |
+ V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); |
- ftype = build_function_type_list (intSI_type_node, V2SI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", |
+ V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); |
- ftype = build_function_type_list (intQI_type_node, V16QI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); |
+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, |
+ "__builtin_ia32_vec_set_v4hi", |
+ V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); |
- /* Access to the vec_set patterns. */ |
- ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, |
- intDI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); |
- |
- ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, |
- float_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); |
- |
- ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, |
- intSI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); |
- |
- ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, |
- intHI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI); |
- |
- ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, |
- intHI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI); |
- |
- ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, |
- intQI_type_node, |
- integer_type_node, NULL_TREE); |
- def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); |
- |
- /* Add SSE5 multi-arg argument instructions */ |
+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", |
+ V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); |
+ |
+ /* Add FMA4 multi-arg argument instructions */ |
for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) |
{ |
- tree mtype = NULL_TREE; |
- |
if (d->name == 0) |
continue; |
- switch ((enum multi_arg_type)d->flag) |
- { |
- case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break; |
- case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; |
- case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; |
- case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; |
- case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; |
- case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; |
- case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; |
- case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; |
- case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break; |
- case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break; |
- case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; |
- case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; |
- case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; |
- case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; |
- case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; |
- case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; |
- case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; |
- case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; |
- case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; |
- case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; |
- case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break; |
- case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break; |
- case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; |
- case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; |
- case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; |
- case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; |
- case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; |
- case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; |
- case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; |
- case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; |
- case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; |
- case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; |
- case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; |
- case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; |
- case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; |
- case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; |
- case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; |
- case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; |
- case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; |
- case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; |
- case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; |
- case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; |
- case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; |
- case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; |
- case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break; |
- case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break; |
- case MULTI_ARG_UNKNOWN: |
- default: |
- gcc_unreachable (); |
- } |
- |
- if (mtype) |
- def_builtin_const (d->mask, d->name, mtype, d->code); |
+ ftype = (enum ix86_builtin_func_type) d->flag; |
+ def_builtin_const (d->mask, d->name, ftype, d->code); |
} |
} |
@@ -23502,65 +22739,76 @@ ix86_init_builtins_va_builtins_abi (void) |
} |
static void |
-ix86_init_builtins (void) |
+ix86_init_builtin_types (void) |
{ |
- tree float128_type_node = make_node (REAL_TYPE); |
- tree ftype, decl; |
+ tree float128_type_node, float80_type_node; |
/* The __float80 type. */ |
- if (TYPE_MODE (long_double_type_node) == XFmode) |
- (*lang_hooks.types.register_builtin_type) (long_double_type_node, |
- "__float80"); |
- else |
+ float80_type_node = long_double_type_node; |
+ if (TYPE_MODE (float80_type_node) != XFmode) |
{ |
/* The __float80 type. */ |
- tree float80_type_node = make_node (REAL_TYPE); |
+ float80_type_node = make_node (REAL_TYPE); |
TYPE_PRECISION (float80_type_node) = 80; |
layout_type (float80_type_node); |
- (*lang_hooks.types.register_builtin_type) (float80_type_node, |
- "__float80"); |
} |
+ (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80"); |
/* The __float128 type. */ |
+ float128_type_node = make_node (REAL_TYPE); |
TYPE_PRECISION (float128_type_node) = 128; |
layout_type (float128_type_node); |
- (*lang_hooks.types.register_builtin_type) (float128_type_node, |
- "__float128"); |
+ (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128"); |
+ |
+ /* This macro is built by i386-builtin-types.awk. */ |
+ DEFINE_BUILTIN_PRIMITIVE_TYPES; |
+} |
+ |
+static void |
+ix86_init_builtins (void) |
+{ |
+ tree t; |
+ |
+ ix86_init_builtin_types (); |
/* TFmode support builtins. */ |
- ftype = build_function_type (float128_type_node, void_list_node); |
- decl = add_builtin_function ("__builtin_infq", ftype, |
- IX86_BUILTIN_INFQ, BUILT_IN_MD, |
- NULL, NULL_TREE); |
- ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl; |
+ def_builtin_const (0, "__builtin_infq", |
+ FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); |
+ def_builtin_const (0, "__builtin_huge_valq", |
+ FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); |
/* We will expand them to normal call if SSE2 isn't available since |
they are used by libgcc. */ |
- ftype = build_function_type_list (float128_type_node, |
- float128_type_node, |
- NULL_TREE); |
- decl = add_builtin_function ("__builtin_fabsq", ftype, |
- IX86_BUILTIN_FABSQ, BUILT_IN_MD, |
- "__fabstf2", NULL_TREE); |
- ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; |
- TREE_READONLY (decl) = 1; |
- |
- ftype = build_function_type_list (float128_type_node, |
- float128_type_node, |
- float128_type_node, |
- NULL_TREE); |
- decl = add_builtin_function ("__builtin_copysignq", ftype, |
- IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, |
- "__copysigntf3", NULL_TREE); |
- ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; |
- TREE_READONLY (decl) = 1; |
+ t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); |
+ t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ, |
+ BUILT_IN_MD, "__fabstf2", NULL_TREE); |
+ TREE_READONLY (t) = 1; |
+ ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t; |
+ |
+ t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); |
+ t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ, |
+ BUILT_IN_MD, "__copysigntf3", NULL_TREE); |
+ TREE_READONLY (t) = 1; |
+ ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t; |
ix86_init_mmx_sse_builtins (); |
+ |
if (TARGET_64BIT) |
ix86_init_builtins_va_builtins_abi (); |
} |
+/* Return the ix86 builtin for CODE. */ |
+ |
+static tree |
+ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) |
+{ |
+ if (code >= IX86_BUILTIN_MAX) |
+ return error_mark_node; |
+ |
+ return ix86_builtins[code]; |
+} |
+ |
/* Errors in the source file can cause expand_expr to return const0_rtx |
where we expect a vector. To avoid crashing, use one of the vector |
clear instructions. */ |
@@ -23621,8 +22869,8 @@ ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) |
static rtx |
ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
- enum multi_arg_type m_type, |
- enum insn_code sub_code) |
+ enum ix86_builtin_func_type m_type, |
+ enum rtx_code sub_code) |
{ |
rtx pat; |
int i; |
@@ -23640,16 +22888,28 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
switch (m_type) |
{ |
+ case MULTI_ARG_4_DF2_DI_I: |
+ case MULTI_ARG_4_DF2_DI_I1: |
+ case MULTI_ARG_4_SF2_SI_I: |
+ case MULTI_ARG_4_SF2_SI_I1: |
+ nargs = 4; |
+ last_arg_constant = true; |
+ break; |
+ |
case MULTI_ARG_3_SF: |
case MULTI_ARG_3_DF: |
+ case MULTI_ARG_3_SF2: |
+ case MULTI_ARG_3_DF2: |
case MULTI_ARG_3_DI: |
case MULTI_ARG_3_SI: |
case MULTI_ARG_3_SI_DI: |
case MULTI_ARG_3_HI: |
case MULTI_ARG_3_HI_SI: |
case MULTI_ARG_3_QI: |
- case MULTI_ARG_3_PERMPS: |
- case MULTI_ARG_3_PERMPD: |
+ case MULTI_ARG_3_DI2: |
+ case MULTI_ARG_3_SI2: |
+ case MULTI_ARG_3_HI2: |
+ case MULTI_ARG_3_QI2: |
nargs = 3; |
break; |
@@ -23672,6 +22932,8 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
case MULTI_ARG_1_SF: |
case MULTI_ARG_1_DF: |
+ case MULTI_ARG_1_SF2: |
+ case MULTI_ARG_1_DF2: |
case MULTI_ARG_1_DI: |
case MULTI_ARG_1_SI: |
case MULTI_ARG_1_HI: |
@@ -23682,13 +22944,9 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
case MULTI_ARG_1_QI_DI: |
case MULTI_ARG_1_QI_SI: |
case MULTI_ARG_1_QI_HI: |
- case MULTI_ARG_1_PH2PS: |
- case MULTI_ARG_1_PS2PH: |
nargs = 1; |
break; |
- case MULTI_ARG_2_SF_CMP: |
- case MULTI_ARG_2_DF_CMP: |
case MULTI_ARG_2_DI_CMP: |
case MULTI_ARG_2_SI_CMP: |
case MULTI_ARG_2_HI_CMP: |
@@ -23707,7 +22965,6 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
tf_p = true; |
break; |
- case MULTI_ARG_UNKNOWN: |
default: |
gcc_unreachable (); |
} |
@@ -23728,7 +22985,7 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
if (last_arg_constant && i == nargs-1) |
{ |
- if (GET_CODE (op) != CONST_INT) |
+ if (!CONST_INT_P (op)) |
{ |
error ("last argument must be an immediate"); |
return gen_reg_rtx (tmode); |
@@ -23782,6 +23039,10 @@ ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, |
pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); |
break; |
+ case 4: |
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); |
+ break; |
+ |
default: |
gcc_unreachable (); |
} |
@@ -24201,7 +23462,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
bool swap = false; |
enum rtx_code comparison = d->comparison; |
- switch ((enum ix86_builtin_type) d->flag) |
+ switch ((enum ix86_builtin_func_type) d->flag) |
{ |
case INT_FTYPE_V8SF_V8SF_PTEST: |
case INT_FTYPE_V4DI_V4DI_PTEST: |
@@ -24212,6 +23473,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
return ix86_expand_sse_ptest (d, exp, target); |
case FLOAT128_FTYPE_FLOAT128: |
case FLOAT_FTYPE_FLOAT: |
+ case INT_FTYPE_INT: |
+ case UINT64_FTYPE_INT: |
+ case UINT16_FTYPE_UINT16: |
+ case INT64_FTYPE_INT64: |
case INT64_FTYPE_V4SF: |
case INT64_FTYPE_V2DF: |
case INT_FTYPE_V16QI: |
@@ -24337,11 +23602,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
case UINT_FTYPE_UINT_UINT: |
case UINT_FTYPE_UINT_USHORT: |
case UINT_FTYPE_UINT_UCHAR: |
+ case UINT16_FTYPE_UINT16_INT: |
+ case UINT8_FTYPE_UINT8_INT: |
nargs = 2; |
break; |
- case V2DI2TI_FTYPE_V2DI_INT: |
+ case V2DI_FTYPE_V2DI_INT_CONVERT: |
nargs = 2; |
- rmode = V2DImode; |
+ rmode = V1TImode; |
nargs_constant = 1; |
break; |
case V8HI_FTYPE_V8HI_INT: |
@@ -24380,12 +23647,12 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
nargs = 3; |
nargs_constant = 1; |
break; |
- case V2DI2TI_FTYPE_V2DI_V2DI_INT: |
+ case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: |
nargs = 3; |
rmode = V2DImode; |
nargs_constant = 1; |
break; |
- case V1DI2DI_FTYPE_V1DI_V1DI_INT: |
+ case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: |
nargs = 3; |
rmode = DImode; |
nargs_constant = 1; |
@@ -24394,6 +23661,13 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
nargs = 3; |
nargs_constant = 2; |
break; |
+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: |
+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: |
+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: |
+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: |
+ nargs = 4; |
+ nargs_constant = 1; |
+ break; |
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: |
nargs = 4; |
nargs_constant = 2; |
@@ -24463,6 +23737,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, |
case CODE_FOR_sse4_1_blendpd: |
case CODE_FOR_avx_vpermilv2df: |
+ case CODE_FOR_xop_vpermil2v2df3: |
+ case CODE_FOR_xop_vpermil2v4sf3: |
+ case CODE_FOR_xop_vpermil2v4df3: |
+ case CODE_FOR_xop_vpermil2v8sf3: |
error ("the last argument must be a 2-bit immediate"); |
return const0_rtx; |
@@ -24569,18 +23847,24 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
{ |
rtx op; |
enum machine_mode mode; |
- } args[2]; |
+ } args[3]; |
enum insn_code icode = d->icode; |
bool last_arg_constant = false; |
const struct insn_data *insn_p = &insn_data[icode]; |
enum machine_mode tmode = insn_p->operand[0].mode; |
enum { load, store } klass; |
- switch ((enum ix86_special_builtin_type) d->flag) |
+ switch ((enum ix86_builtin_func_type) d->flag) |
{ |
case VOID_FTYPE_VOID: |
emit_insn (GEN_FCN (icode) (target)); |
return 0; |
+ case UINT64_FTYPE_VOID: |
+ nargs = 0; |
+ klass = load; |
+ memory = 0; |
+ break; |
+ case UINT64_FTYPE_PUNSIGNED: |
case V2DI_FTYPE_PV2DI: |
case V32QI_FTYPE_PCCHAR: |
case V16QI_FTYPE_PCCHAR: |
@@ -24590,6 +23874,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
case V4DF_FTYPE_PCV2DF: |
case V4DF_FTYPE_PCDOUBLE: |
case V2DF_FTYPE_PCDOUBLE: |
+ case VOID_FTYPE_PVOID: |
nargs = 1; |
klass = load; |
memory = 0; |
@@ -24603,7 +23888,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
case VOID_FTYPE_PFLOAT_V4SF: |
case VOID_FTYPE_PDOUBLE_V4DF: |
case VOID_FTYPE_PDOUBLE_V2DF: |
- case VOID_FTYPE_PDI_DI: |
+ case VOID_FTYPE_PULONGLONG_ULONGLONG: |
case VOID_FTYPE_PINT_INT: |
nargs = 1; |
klass = store; |
@@ -24633,6 +23918,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
/* Reserve memory operand for target. */ |
memory = ARRAY_SIZE (args); |
break; |
+ case VOID_FTYPE_UINT_UINT_UINT: |
+ case VOID_FTYPE_UINT64_UINT_UINT: |
+ case UCHAR_FTYPE_UINT_UINT_UINT: |
+ case UCHAR_FTYPE_UINT64_UINT_UINT: |
+ nargs = 3; |
+ klass = load; |
+ memory = ARRAY_SIZE (args); |
+ last_arg_constant = true; |
+ break; |
default: |
gcc_unreachable (); |
} |
@@ -24669,12 +23963,16 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
if (last_arg_constant && (i + 1) == nargs) |
{ |
if (!match) |
- switch (icode) |
- { |
- default: |
+ { |
+ if (icode == CODE_FOR_lwp_lwpvalsi3 |
+ || icode == CODE_FOR_lwp_lwpinssi3 |
+ || icode == CODE_FOR_lwp_lwpvaldi3 |
+ || icode == CODE_FOR_lwp_lwpinsdi3) |
+ error ("the last argument must be a 32-bit immediate"); |
+ else |
error ("the last argument must be an 8-bit immediate"); |
- return const0_rtx; |
- } |
+ return const0_rtx; |
+ } |
} |
else |
{ |
@@ -24703,12 +24001,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, |
switch (nargs) |
{ |
+ case 0: |
+ pat = GEN_FCN (icode) (target); |
+ break; |
case 1: |
pat = GEN_FCN (icode) (target, args[0].op); |
break; |
case 2: |
pat = GEN_FCN (icode) (target, args[0].op, args[1].op); |
break; |
+ case 3: |
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); |
+ break; |
default: |
gcc_unreachable (); |
} |
@@ -24989,7 +24293,22 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
case IX86_BUILTIN_VEC_SET_V16QI: |
return ix86_expand_vec_set_builtin (exp); |
+ case IX86_BUILTIN_VEC_PERM_V2DF: |
+ case IX86_BUILTIN_VEC_PERM_V4SF: |
+ case IX86_BUILTIN_VEC_PERM_V2DI: |
+ case IX86_BUILTIN_VEC_PERM_V4SI: |
+ case IX86_BUILTIN_VEC_PERM_V8HI: |
+ case IX86_BUILTIN_VEC_PERM_V16QI: |
+ case IX86_BUILTIN_VEC_PERM_V2DI_U: |
+ case IX86_BUILTIN_VEC_PERM_V4SI_U: |
+ case IX86_BUILTIN_VEC_PERM_V8HI_U: |
+ case IX86_BUILTIN_VEC_PERM_V16QI_U: |
+ case IX86_BUILTIN_VEC_PERM_V4DF: |
+ case IX86_BUILTIN_VEC_PERM_V8SF: |
+ return ix86_expand_vec_perm_builtin (exp); |
+ |
case IX86_BUILTIN_INFQ: |
+ case IX86_BUILTIN_HUGE_VALQ: |
{ |
REAL_VALUE_TYPE inf; |
rtx tmp; |
@@ -25006,6 +24325,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
return target; |
} |
+ case IX86_BUILTIN_LLWPCB: |
+ arg0 = CALL_EXPR_ARG (exp, 0); |
+ op0 = expand_normal (arg0); |
+ icode = CODE_FOR_lwp_llwpcb; |
+ if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) |
+ op0 = copy_to_mode_reg (Pmode, op0); |
+ emit_insn (gen_lwp_llwpcb (op0)); |
+ return 0; |
+ |
+ case IX86_BUILTIN_SLWPCB: |
+ icode = CODE_FOR_lwp_slwpcb; |
+ if (!target |
+ || ! (*insn_data[icode].operand[0].predicate) (target, Pmode)) |
+ target = gen_reg_rtx (Pmode); |
+ emit_insn (gen_lwp_slwpcb (target)); |
+ return target; |
+ |
default: |
break; |
} |
@@ -25050,8 +24386,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) |
if (d->code == fcode) |
return ix86_expand_multi_arg_builtin (d->icode, exp, target, |
- (enum multi_arg_type)d->flag, |
- d->comparison); |
+ (enum ix86_builtin_func_type) |
+ d->flag, d->comparison); |
gcc_unreachable (); |
} |
@@ -25061,14 +24397,16 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
if it is not available. */ |
static tree |
-ix86_builtin_vectorized_function (unsigned int fn, tree type_out, |
+ix86_builtin_vectorized_function (tree fndecl, tree type_out, |
tree type_in) |
{ |
enum machine_mode in_mode, out_mode; |
int in_n, out_n; |
+ enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); |
if (TREE_CODE (type_out) != VECTOR_TYPE |
- || TREE_CODE (type_in) != VECTOR_TYPE) |
+ || TREE_CODE (type_in) != VECTOR_TYPE |
+ || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
return NULL_TREE; |
out_mode = TYPE_MODE (TREE_TYPE (type_out)); |
@@ -25102,13 +24440,26 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, |
return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; |
break; |
+ case BUILT_IN_COPYSIGN: |
+ if (out_mode == DFmode && out_n == 2 |
+ && in_mode == DFmode && in_n == 2) |
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPD]; |
+ break; |
+ |
+ case BUILT_IN_COPYSIGNF: |
+ if (out_mode == SFmode && out_n == 4 |
+ && in_mode == SFmode && in_n == 4) |
+ return ix86_builtins[IX86_BUILTIN_CPYSGNPS]; |
+ break; |
+ |
default: |
; |
} |
/* Dispatch to a handler for a vectorization library. */ |
if (ix86_veclib_handler) |
- return (*ix86_veclib_handler)(fn, type_out, type_in); |
+ return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out, |
+ type_in); |
return NULL_TREE; |
} |
@@ -25216,7 +24567,8 @@ ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) |
fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
/* Build a function declaration for the vectorized function. */ |
- new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); |
+ new_fndecl = build_decl (BUILTINS_LOCATION, |
+ FUNCTION_DECL, get_identifier (name), fntype); |
TREE_PUBLIC (new_fndecl) = 1; |
DECL_EXTERNAL (new_fndecl) = 1; |
DECL_IS_NOVOPS (new_fndecl) = 1; |
@@ -25300,7 +24652,8 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) |
fntype = build_function_type_list (type_out, type_in, type_in, NULL); |
/* Build a function declaration for the vectorized function. */ |
- new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); |
+ new_fndecl = build_decl (BUILTINS_LOCATION, |
+ FUNCTION_DECL, get_identifier (name), fntype); |
TREE_PUBLIC (new_fndecl) = 1; |
DECL_EXTERNAL (new_fndecl) = 1; |
DECL_IS_NOVOPS (new_fndecl) = 1; |
@@ -25318,9 +24671,7 @@ ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) |
static tree |
ix86_vectorize_builtin_conversion (unsigned int code, tree type) |
{ |
- if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE |
- /* There are only conversions from/to signed integers. */ |
- || TYPE_UNSIGNED (TREE_TYPE (type))) |
+ if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE)) |
return NULL_TREE; |
switch (code) |
@@ -25329,7 +24680,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type) |
switch (TYPE_MODE (type)) |
{ |
case V4SImode: |
- return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; |
+ return TYPE_UNSIGNED (type) |
+ ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS] |
+ : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; |
default: |
return NULL_TREE; |
} |
@@ -25338,7 +24691,9 @@ ix86_vectorize_builtin_conversion (unsigned int code, tree type) |
switch (TYPE_MODE (type)) |
{ |
case V4SImode: |
- return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; |
+ return TYPE_UNSIGNED (type) |
+ ? NULL_TREE |
+ : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; |
default: |
return NULL_TREE; |
} |
@@ -25355,7 +24710,7 @@ static tree |
ix86_builtin_reciprocal (unsigned int fn, bool md_fn, |
bool sqrt ATTRIBUTE_UNUSED) |
{ |
- if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () |
+ if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p () |
&& flag_finite_math_only && !flag_trapping_math |
&& flag_unsafe_math_optimizations)) |
return NULL_TREE; |
@@ -25383,6 +24738,134 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn, |
return NULL_TREE; |
} |
} |
+ |
+/* Helper for avx_vpermilps256_operand et al. This is also used by |
+ the expansion functions to turn the parallel back into a mask. |
+ The return value is 0 for no match and the imm8+1 for a match. */ |
+ |
+int |
+avx_vpermilp_parallel (rtx par, enum machine_mode mode) |
+{ |
+ unsigned i, nelt = GET_MODE_NUNITS (mode); |
+ unsigned mask = 0; |
+ unsigned char ipar[8]; |
+ |
+ if (XVECLEN (par, 0) != (int) nelt) |
+ return 0; |
+ |
+ /* Validate that all of the elements are constants, and not totally |
+ out of range. Copy the data into an integral array to make the |
+ subsequent checks easier. */ |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ rtx er = XVECEXP (par, 0, i); |
+ unsigned HOST_WIDE_INT ei; |
+ |
+ if (!CONST_INT_P (er)) |
+ return 0; |
+ ei = INTVAL (er); |
+ if (ei >= nelt) |
+ return 0; |
+ ipar[i] = ei; |
+ } |
+ |
+ switch (mode) |
+ { |
+ case V4DFmode: |
+ /* In the 256-bit DFmode case, we can only move elements within |
+ a 128-bit lane. */ |
+ for (i = 0; i < 2; ++i) |
+ { |
+ if (ipar[i] >= 2) |
+ return 0; |
+ mask |= ipar[i] << i; |
+ } |
+ for (i = 2; i < 4; ++i) |
+ { |
+ if (ipar[i] < 2) |
+ return 0; |
+ mask |= (ipar[i] - 2) << i; |
+ } |
+ break; |
+ |
+ case V8SFmode: |
+ /* In the 256-bit SFmode case, we have full freedom of movement |
+ within the low 128-bit lane, but the high 128-bit lane must |
+ mirror the exact same pattern. */ |
+ for (i = 0; i < 4; ++i) |
+ if (ipar[i] + 4 != ipar[i + 4]) |
+ return 0; |
+ nelt = 4; |
+ /* FALLTHRU */ |
+ |
+ case V2DFmode: |
+ case V4SFmode: |
+ /* In the 128-bit case, we've full freedom in the placement of |
+ the elements from the source operand. */ |
+ for (i = 0; i < nelt; ++i) |
+ mask |= ipar[i] << (i * (nelt / 2)); |
+ break; |
+ |
+ default: |
+ gcc_unreachable (); |
+ } |
+ |
+ /* Make sure success has a non-zero value by adding one. */ |
+ return mask + 1; |
+} |
+ |
+/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by |
+ the expansion functions to turn the parallel back into a mask. |
+ The return value is 0 for no match and the imm8+1 for a match. */ |
+ |
+int |
+avx_vperm2f128_parallel (rtx par, enum machine_mode mode) |
+{ |
+ unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; |
+ unsigned mask = 0; |
+ unsigned char ipar[8]; |
+ |
+ if (XVECLEN (par, 0) != (int) nelt) |
+ return 0; |
+ |
+ /* Validate that all of the elements are constants, and not totally |
+ out of range. Copy the data into an integral array to make the |
+ subsequent checks easier. */ |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ rtx er = XVECEXP (par, 0, i); |
+ unsigned HOST_WIDE_INT ei; |
+ |
+ if (!CONST_INT_P (er)) |
+ return 0; |
+ ei = INTVAL (er); |
+ if (ei >= 2 * nelt) |
+ return 0; |
+ ipar[i] = ei; |
+ } |
+ |
+ /* Validate that the halves of the permute are halves. */ |
+ for (i = 0; i < nelt2 - 1; ++i) |
+ if (ipar[i] + 1 != ipar[i + 1]) |
+ return 0; |
+ for (i = nelt2; i < nelt - 1; ++i) |
+ if (ipar[i] + 1 != ipar[i + 1]) |
+ return 0; |
+ |
+ /* Reconstruct the mask. */ |
+ for (i = 0; i < 2; ++i) |
+ { |
+ unsigned e = ipar[i * nelt2]; |
+ if (e % nelt2) |
+ return 0; |
+ e /= nelt2; |
+ mask |= e << (i * 4); |
+ } |
+ |
+ /* Make sure success has a non-zero value by adding one. */ |
+ return mask + 1; |
+} |
+ |
/* Store OPERAND to the memory after reload is completed. This means |
that we can't easily use assign_stack_local. */ |
@@ -25483,6 +24966,22 @@ ix86_free_from_memory (enum machine_mode mode) |
} |
} |
+/* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer |
+ SSE_REGS to FLOAT_REGS if their costs for a pseudo are the |
+ same. */ |
+static const enum reg_class * |
+i386_ira_cover_classes (void) |
+{ |
+ static const enum reg_class sse_fpmath_classes[] = { |
+ GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES |
+ }; |
+ static const enum reg_class no_sse_fpmath_classes[] = { |
+ GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES |
+ }; |
+ |
+ return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes; |
+} |
+ |
/* Put float CONST_DOUBLE in the constant pool instead of fp regs. |
QImode must go into class Q_REGS. |
Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and |
@@ -26381,6 +25880,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed) |
*total = 0; |
return false; |
+ case VEC_SELECT: |
+ case VEC_CONCAT: |
+ case VEC_MERGE: |
+ case VEC_DUPLICATE: |
+ /* ??? Assume all of these vector manipulation patterns are |
+ recognizable. In which case they all pretty much have the |
+ same cost. */ |
+ *total = COSTS_N_INSNS (1); |
+ return true; |
+ |
default: |
return false; |
} |
@@ -26438,24 +25947,17 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) |
if (MACHOPIC_PURE) |
{ |
fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); |
- fprintf (file, "\tpushl\t%%eax\n"); |
+ fputs ("\tpushl\t%eax\n", file); |
} |
else |
fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); |
- fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); |
+ fputs ("\tjmp\tdyld_stub_binding_helper\n", file); |
switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); |
fprintf (file, "%s:\n", lazy_ptr_name); |
fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
- fprintf (file, "\t.long %s\n", binder_name); |
-} |
- |
-void |
-darwin_x86_file_end (void) |
-{ |
- darwin_file_end (); |
- ix86_file_end (); |
+ fprintf (file, ASM_LONG "%s\n", binder_name); |
} |
#endif /* TARGET_MACHO */ |
@@ -26515,15 +26017,15 @@ ix86_handle_abi_attribute (tree *node, tree name, |
&& TREE_CODE (*node) != FIELD_DECL |
&& TREE_CODE (*node) != TYPE_DECL) |
{ |
- warning (OPT_Wattributes, "%qs attribute only applies to functions", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute only applies to functions", |
+ name); |
*no_add_attrs = true; |
return NULL_TREE; |
} |
if (!TARGET_64BIT) |
{ |
- warning (OPT_Wattributes, "%qs attribute only available for 64-bit", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute only available for 64-bit", |
+ name); |
*no_add_attrs = true; |
return NULL_TREE; |
} |
@@ -26570,8 +26072,8 @@ ix86_handle_struct_attribute (tree *node, tree name, |
if (!(type && (TREE_CODE (*type) == RECORD_TYPE |
|| TREE_CODE (*type) == UNION_TYPE))) |
{ |
- warning (OPT_Wattributes, "%qs attribute ignored", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute ignored", |
+ name); |
*no_add_attrs = true; |
} |
@@ -26580,14 +26082,41 @@ ix86_handle_struct_attribute (tree *node, tree name, |
|| ((is_attribute_p ("gcc_struct", name) |
&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) |
{ |
- warning (OPT_Wattributes, "%qs incompatible attribute ignored", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE incompatible attribute ignored", |
+ name); |
*no_add_attrs = true; |
} |
return NULL_TREE; |
} |
+static tree |
+ix86_handle_fndecl_attribute (tree *node, tree name, |
+ tree args ATTRIBUTE_UNUSED, |
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
+{ |
+ if (TREE_CODE (*node) != FUNCTION_DECL) |
+ { |
+ warning (OPT_Wattributes, "%qE attribute only applies to functions", |
+ name); |
+ *no_add_attrs = true; |
+ return NULL_TREE; |
+ } |
+ |
+ if (TARGET_64BIT) |
+ { |
+ warning (OPT_Wattributes, "%qE attribute only available for 32-bit", |
+ name); |
+ return NULL_TREE; |
+ } |
+ |
+#ifndef HAVE_AS_IX86_SWAP |
+ sorry ("ms_hook_prologue attribute needs assembler swap suffix support"); |
+#endif |
+ |
+ return NULL_TREE; |
+} |
+ |
static bool |
ix86_ms_bitfield_layout_p (const_tree record_type) |
{ |
@@ -26676,7 +26205,7 @@ x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, |
*(*this + vcall_offset) should be added to THIS. */ |
static void |
-x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
+x86_output_mi_thunk (FILE *file, |
tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, |
HOST_WIDE_INT vcall_offset, tree function) |
{ |
@@ -26684,6 +26213,9 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
rtx this_param = x86_this_parameter (function); |
rtx this_reg, tmp; |
+ /* Make sure unwind info is emitted for the thunk if needed. */ |
+ final_start_function (emit_barrier (), file, 1); |
+ |
/* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
pull it in now and let DELTA benefit. */ |
if (REG_P (this_param)) |
@@ -26701,7 +26233,10 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
/* Adjust the this parameter by a fixed constant. */ |
if (delta) |
{ |
- xops[0] = GEN_INT (delta); |
+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. |
+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */ |
+ bool sub = delta < 0 || delta == 128; |
+ xops[0] = GEN_INT (sub ? -delta : delta); |
xops[1] = this_reg ? this_reg : this_param; |
if (TARGET_64BIT) |
{ |
@@ -26713,8 +26248,13 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
xops[0] = tmp; |
xops[1] = this_param; |
} |
- output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); |
+ if (sub) |
+ output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops); |
+ else |
+ output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); |
} |
+ else if (sub) |
+ output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops); |
else |
output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); |
} |
@@ -26804,6 +26344,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, |
output_asm_insn ("jmp\t{*}%1", xops); |
} |
} |
+ final_end_function (); |
} |
static void |
@@ -26845,32 +26386,33 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
if (TARGET_64BIT) |
{ |
#ifndef NO_PROFILE_COUNTERS |
- fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno); |
+ fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); |
#endif |
if (DEFAULT_ABI == SYSV_ABI && flag_pic) |
- fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); |
+ fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file); |
else |
- fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); |
+ fputs ("\tcall\t" MCOUNT_NAME "\n", file); |
} |
else if (flag_pic) |
{ |
#ifndef NO_PROFILE_COUNTERS |
- fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", |
- LPREFIX, labelno, PROFILE_COUNT_REGISTER); |
+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", |
+ LPREFIX, labelno); |
#endif |
- fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); |
+ fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file); |
} |
else |
{ |
#ifndef NO_PROFILE_COUNTERS |
- fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, |
- PROFILE_COUNT_REGISTER); |
+ fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", |
+ LPREFIX, labelno); |
#endif |
- fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); |
+ fputs ("\tcall\t" MCOUNT_NAME "\n", file); |
} |
} |
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD |
/* We don't have exact information about the insn sizes, but we may assume |
quite safely that we are informed about all 1 byte insns and memory |
address sizes. This is enough to eliminate unnecessary padding in |
@@ -26879,7 +26421,7 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
static int |
min_insn_size (rtx insn) |
{ |
- int l = 0; |
+ int l = 0, len; |
if (!INSN_P (insn) || !active_insn_p (insn)) |
return 0; |
@@ -26888,9 +26430,7 @@ min_insn_size (rtx insn) |
if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
&& XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) |
return 0; |
- if (JUMP_P (insn) |
- && (GET_CODE (PATTERN (insn)) == ADDR_VEC |
- || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) |
+ if (JUMP_TABLE_DATA_P (insn)) |
return 0; |
/* Important case - calls are always 5 bytes. |
@@ -26899,14 +26439,31 @@ min_insn_size (rtx insn) |
&& symbolic_reference_mentioned_p (PATTERN (insn)) |
&& !SIBLING_CALL_P (insn)) |
return 5; |
- if (get_attr_length (insn) <= 1) |
+ len = get_attr_length (insn); |
+ if (len <= 1) |
return 1; |
- /* For normal instructions we may rely on the sizes of addresses |
- and the presence of symbol to require 4 bytes of encoding. |
- This is not the case for jumps where references are PC relative. */ |
+ /* For normal instructions we rely on get_attr_length being exact, |
+ with a few exceptions. */ |
if (!JUMP_P (insn)) |
{ |
+ enum attr_type type = get_attr_type (insn); |
+ |
+ switch (type) |
+ { |
+ case TYPE_MULTI: |
+ if (GET_CODE (PATTERN (insn)) == ASM_INPUT |
+ || asm_noperands (PATTERN (insn)) >= 0) |
+ return 0; |
+ break; |
+ case TYPE_OTHER: |
+ case TYPE_FCMP: |
+ break; |
+ default: |
+ /* Otherwise trust get_attr_length. */ |
+ return len; |
+ } |
+ |
l = get_attr_length_address (insn); |
if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) |
l = 4; |
@@ -26921,7 +26478,7 @@ min_insn_size (rtx insn) |
window. */ |
static void |
-ix86_avoid_jump_misspredicts (void) |
+ix86_avoid_jump_mispredicts (void) |
{ |
rtx insn, start = get_insns (); |
int nbytes = 0, njumps = 0; |
@@ -26935,15 +26492,52 @@ ix86_avoid_jump_misspredicts (void) |
The smallest offset in the page INSN can start is the case where START |
ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). |
- We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). |
+ We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). |
*/ |
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) |
+ for (insn = start; insn; insn = NEXT_INSN (insn)) |
{ |
+ int min_size; |
- nbytes += min_insn_size (insn); |
+ if (LABEL_P (insn)) |
+ { |
+ int align = label_to_alignment (insn); |
+ int max_skip = label_to_max_skip (insn); |
+ |
+ if (max_skip > 15) |
+ max_skip = 15; |
+ /* If align > 3, only up to 16 - max_skip - 1 bytes can be |
+ already in the current 16 byte page, because otherwise |
+ ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer |
+ bytes to reach 16 byte boundary. */ |
+ if (align <= 0 |
+ || (align <= 3 && max_skip != (1 << align) - 1)) |
+ max_skip = 0; |
+ if (dump_file) |
+ fprintf (dump_file, "Label %i with max_skip %i\n", |
+ INSN_UID (insn), max_skip); |
+ if (max_skip) |
+ { |
+ while (nbytes + max_skip >= 16) |
+ { |
+ start = NEXT_INSN (start); |
+ if ((JUMP_P (start) |
+ && GET_CODE (PATTERN (start)) != ADDR_VEC |
+ && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) |
+ || CALL_P (start)) |
+ njumps--, isjump = 1; |
+ else |
+ isjump = 0; |
+ nbytes -= min_insn_size (start); |
+ } |
+ } |
+ continue; |
+ } |
+ |
+ min_size = min_insn_size (insn); |
+ nbytes += min_size; |
if (dump_file) |
- fprintf(dump_file, "Insn %i estimated to %i bytes\n", |
- INSN_UID (insn), min_insn_size (insn)); |
+ fprintf (dump_file, "Insn %i estimated to %i bytes\n", |
+ INSN_UID (insn), min_size); |
if ((JUMP_P (insn) |
&& GET_CODE (PATTERN (insn)) != ADDR_VEC |
&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) |
@@ -26967,7 +26561,7 @@ ix86_avoid_jump_misspredicts (void) |
gcc_assert (njumps >= 0); |
if (dump_file) |
fprintf (dump_file, "Interval %i to %i has %i bytes\n", |
- INSN_UID (start), INSN_UID (insn), nbytes); |
+ INSN_UID (start), INSN_UID (insn), nbytes); |
if (njumps == 3 && isjump && nbytes < 16) |
{ |
@@ -26976,10 +26570,11 @@ ix86_avoid_jump_misspredicts (void) |
if (dump_file) |
fprintf (dump_file, "Padding insn %i by %i bytes!\n", |
INSN_UID (insn), padsize); |
- emit_insn_before (gen_align (GEN_INT (padsize)), insn); |
+ emit_insn_before (gen_pad (GEN_INT (padsize)), insn); |
} |
} |
} |
+#endif |
/* AMD Athlon works faster |
when RET is not destination of conditional jump or directly preceded |
@@ -27028,7 +26623,7 @@ ix86_pad_returns (void) |
} |
if (replace) |
{ |
- emit_insn_before (gen_return_internal_long (), ret); |
+ emit_jump_insn_before (gen_return_internal_long (), ret); |
delete_insn (ret); |
} |
} |
@@ -27039,12 +26634,15 @@ ix86_pad_returns (void) |
static void |
ix86_reorg (void) |
{ |
- if (TARGET_PAD_RETURNS && optimize |
- && optimize_function_for_speed_p (cfun)) |
- ix86_pad_returns (); |
- if (TARGET_FOUR_JUMP_LIMIT && optimize |
- && optimize_function_for_speed_p (cfun)) |
- ix86_avoid_jump_misspredicts (); |
+ if (optimize && optimize_function_for_speed_p (cfun)) |
+ { |
+ if (TARGET_PAD_RETURNS) |
+ ix86_pad_returns (); |
+#ifdef ASM_OUTPUT_MAX_SKIP_PAD |
+ if (TARGET_FOUR_JUMP_LIMIT) |
+ ix86_avoid_jump_mispredicts (); |
+#endif |
+ } |
} |
/* Return nonzero when QImode register that must be represented via REX prefix |
@@ -27123,6 +26721,35 @@ x86_emit_floatuns (rtx operands[2]) |
emit_label (donelab); |
} |
+/* AVX does not support 32-byte integer vector operations, |
+ thus the longest vector we are faced with is V16QImode. */ |
+#define MAX_VECT_LEN 16 |
+ |
+struct expand_vec_perm_d |
+{ |
+ rtx target, op0, op1; |
+ unsigned char perm[MAX_VECT_LEN]; |
+ enum machine_mode vmode; |
+ unsigned char nelt; |
+ bool testing_p; |
+}; |
+ |
+static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); |
+static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); |
+ |
+/* Get a vector mode of the same size as the original but with elements |
+ twice as wide. This is only guaranteed to apply to integral vectors. */ |
+ |
+static inline enum machine_mode |
+get_mode_wider_vector (enum machine_mode o) |
+{ |
+ /* ??? Rely on the ordering that genmodes.c gives to vectors. */ |
+ enum machine_mode n = GET_MODE_WIDER_MODE (o); |
+ gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); |
+ gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); |
+ return n; |
+} |
+ |
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector |
with all elements equal to VAR. Return true if successful. */ |
@@ -27130,8 +26757,7 @@ static bool |
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, |
rtx target, rtx val) |
{ |
- enum machine_mode hmode, smode, wsmode, wvmode; |
- rtx x; |
+ bool ok; |
switch (mode) |
{ |
@@ -27141,13 +26767,36 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, |
return false; |
/* FALLTHRU */ |
+ case V4DFmode: |
+ case V4DImode: |
+ case V8SFmode: |
+ case V8SImode: |
case V2DFmode: |
case V2DImode: |
case V4SFmode: |
case V4SImode: |
- val = force_reg (GET_MODE_INNER (mode), val); |
- x = gen_rtx_VEC_DUPLICATE (mode, val); |
- emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
+ { |
+ rtx insn, dup; |
+ |
+ /* First attempt to recognize VAL as-is. */ |
+ dup = gen_rtx_VEC_DUPLICATE (mode, val); |
+ insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup)); |
+ if (recog_memoized (insn) < 0) |
+ { |
+ rtx seq; |
+ /* If that fails, force VAL into a register. */ |
+ |
+ start_sequence (); |
+ XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val); |
+ seq = get_insns (); |
+ end_sequence (); |
+ if (seq) |
+ emit_insn_before (seq, insn); |
+ |
+ ok = recog_memoized (insn) >= 0; |
+ gcc_assert (ok); |
+ } |
+ } |
return true; |
case V4HImode: |
@@ -27155,130 +26804,87 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, |
return false; |
if (TARGET_SSE || TARGET_3DNOW_A) |
{ |
+ rtx x; |
+ |
val = gen_lowpart (SImode, val); |
x = gen_rtx_TRUNCATE (HImode, val); |
x = gen_rtx_VEC_DUPLICATE (mode, x); |
emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
return true; |
} |
- else |
- { |
- smode = HImode; |
- wsmode = SImode; |
- wvmode = V2SImode; |
- goto widen; |
- } |
+ goto widen; |
case V8QImode: |
if (!mmx_ok) |
return false; |
- smode = QImode; |
- wsmode = HImode; |
- wvmode = V4HImode; |
goto widen; |
+ |
case V8HImode: |
if (TARGET_SSE2) |
{ |
+ struct expand_vec_perm_d dperm; |
rtx tmp1, tmp2; |
- /* Extend HImode to SImode using a paradoxical SUBREG. */ |
+ |
+ permute: |
+ memset (&dperm, 0, sizeof (dperm)); |
+ dperm.target = target; |
+ dperm.vmode = mode; |
+ dperm.nelt = GET_MODE_NUNITS (mode); |
+ dperm.op0 = dperm.op1 = gen_reg_rtx (mode); |
+ |
+ /* Extend to SImode using a paradoxical SUBREG. */ |
tmp1 = gen_reg_rtx (SImode); |
emit_move_insn (tmp1, gen_lowpart (SImode, val)); |
- /* Insert the SImode value as low element of V4SImode vector. */ |
- tmp2 = gen_reg_rtx (V4SImode); |
- tmp1 = gen_rtx_VEC_MERGE (V4SImode, |
- gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), |
- CONST0_RTX (V4SImode), |
- const1_rtx); |
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); |
- /* Cast the V4SImode vector back to a V8HImode vector. */ |
- tmp1 = gen_reg_rtx (V8HImode); |
- emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); |
- /* Duplicate the low short through the whole low SImode word. */ |
- emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); |
- /* Cast the V8HImode vector back to a V4SImode vector. */ |
- tmp2 = gen_reg_rtx (V4SImode); |
- emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); |
- /* Replicate the low element of the V4SImode vector. */ |
- emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); |
- /* Cast the V2SImode back to V8HImode, and store in target. */ |
- emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); |
- return true; |
+ |
+ /* Insert the SImode value as low element of a V4SImode vector. */ |
+ tmp2 = gen_lowpart (V4SImode, dperm.op0); |
+ emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); |
+ |
+ ok = (expand_vec_perm_1 (&dperm) |
+ || expand_vec_perm_broadcast_1 (&dperm)); |
+ gcc_assert (ok); |
+ return ok; |
} |
- smode = HImode; |
- wsmode = SImode; |
- wvmode = V4SImode; |
goto widen; |
+ |
case V16QImode: |
if (TARGET_SSE2) |
- { |
- rtx tmp1, tmp2; |
- /* Extend QImode to SImode using a paradoxical SUBREG. */ |
- tmp1 = gen_reg_rtx (SImode); |
- emit_move_insn (tmp1, gen_lowpart (SImode, val)); |
- /* Insert the SImode value as low element of V4SImode vector. */ |
- tmp2 = gen_reg_rtx (V4SImode); |
- tmp1 = gen_rtx_VEC_MERGE (V4SImode, |
- gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), |
- CONST0_RTX (V4SImode), |
- const1_rtx); |
- emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); |
- /* Cast the V4SImode vector back to a V16QImode vector. */ |
- tmp1 = gen_reg_rtx (V16QImode); |
- emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); |
- /* Duplicate the low byte through the whole low SImode word. */ |
- emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); |
- emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); |
- /* Cast the V16QImode vector back to a V4SImode vector. */ |
- tmp2 = gen_reg_rtx (V4SImode); |
- emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); |
- /* Replicate the low element of the V4SImode vector. */ |
- emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); |
- /* Cast the V2SImode back to V16QImode, and store in target. */ |
- emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); |
- return true; |
- } |
- smode = QImode; |
- wsmode = HImode; |
- wvmode = V8HImode; |
+ goto permute; |
goto widen; |
+ |
widen: |
/* Replicate the value once into the next wider mode and recurse. */ |
- val = convert_modes (wsmode, smode, val, true); |
- x = expand_simple_binop (wsmode, ASHIFT, val, |
- GEN_INT (GET_MODE_BITSIZE (smode)), |
- NULL_RTX, 1, OPTAB_LIB_WIDEN); |
- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); |
- |
- x = gen_reg_rtx (wvmode); |
- if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) |
- gcc_unreachable (); |
- emit_move_insn (target, gen_lowpart (mode, x)); |
- return true; |
+ { |
+ enum machine_mode smode, wsmode, wvmode; |
+ rtx x; |
+ |
+ smode = GET_MODE_INNER (mode); |
+ wvmode = get_mode_wider_vector (mode); |
+ wsmode = GET_MODE_INNER (wvmode); |
+ |
+ val = convert_modes (wsmode, smode, val, true); |
+ x = expand_simple_binop (wsmode, ASHIFT, val, |
+ GEN_INT (GET_MODE_BITSIZE (smode)), |
+ NULL_RTX, 1, OPTAB_LIB_WIDEN); |
+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); |
+ |
+ x = gen_lowpart (wvmode, target); |
+ ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); |
+ gcc_assert (ok); |
+ return ok; |
+ } |
- case V4DFmode: |
- hmode = V2DFmode; |
- goto half; |
- case V4DImode: |
- hmode = V2DImode; |
- goto half; |
- case V8SFmode: |
- hmode = V4SFmode; |
- goto half; |
- case V8SImode: |
- hmode = V4SImode; |
- goto half; |
case V16HImode: |
- hmode = V8HImode; |
- goto half; |
case V32QImode: |
- hmode = V16QImode; |
- goto half; |
-half: |
{ |
- rtx tmp = gen_reg_rtx (hmode); |
- ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val); |
- emit_insn (gen_rtx_SET (VOIDmode, target, |
- gen_rtx_VEC_CONCAT (mode, tmp, tmp))); |
+ enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); |
+ rtx x = gen_reg_rtx (hvmode); |
+ |
+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); |
+ gcc_assert (ok); |
+ |
+ x = gen_rtx_VEC_CONCAT (mode, x, x); |
+ emit_insn (gen_rtx_SET (VOIDmode, target, x)); |
} |
return true; |
@@ -27381,7 +26987,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, |
if (mode != V4SFmode && TARGET_SSE2) |
{ |
emit_insn (gen_sse2_pshufd_1 (new_target, new_target, |
- GEN_INT (1), |
+ const1_rtx, |
GEN_INT (one_var == 1 ? 0 : 1), |
GEN_INT (one_var == 2 ? 0 : 1), |
GEN_INT (one_var == 3 ? 0 : 1))); |
@@ -27401,7 +27007,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, |
tmp = new_target; |
emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, |
- GEN_INT (1), |
+ const1_rtx, |
GEN_INT (one_var == 1 ? 0 : 1), |
GEN_INT (one_var == 2 ? 0+4 : 1+4), |
GEN_INT (one_var == 3 ? 0+4 : 1+4))); |
@@ -28059,13 +27665,13 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) |
/* tmp = target = A B C D */ |
tmp = copy_to_reg (target); |
/* target = A A B B */ |
- emit_insn (gen_sse_unpcklps (target, target, target)); |
+ emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); |
/* target = X A B B */ |
ix86_expand_vector_set (false, target, val, 0); |
/* target = A X C D */ |
emit_insn (gen_sse_shufps_v4sf (target, target, tmp, |
- GEN_INT (1), GEN_INT (0), |
- GEN_INT (2+4), GEN_INT (3+4))); |
+ const1_rtx, const0_rtx, |
+ GEN_INT (2+4), GEN_INT (3+4))); |
return; |
case 2: |
@@ -28075,8 +27681,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) |
ix86_expand_vector_set (false, tmp, val, 0); |
/* target = A B X D */ |
emit_insn (gen_sse_shufps_v4sf (target, target, tmp, |
- GEN_INT (0), GEN_INT (1), |
- GEN_INT (0+4), GEN_INT (3+4))); |
+ const0_rtx, const1_rtx, |
+ GEN_INT (0+4), GEN_INT (3+4))); |
return; |
case 3: |
@@ -28086,8 +27692,8 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) |
ix86_expand_vector_set (false, tmp, val, 0); |
/* target = A B X D */ |
emit_insn (gen_sse_shufps_v4sf (target, target, tmp, |
- GEN_INT (0), GEN_INT (1), |
- GEN_INT (2+4), GEN_INT (0+4))); |
+ const0_rtx, const1_rtx, |
+ GEN_INT (2+4), GEN_INT (0+4))); |
return; |
default: |
@@ -28269,7 +27875,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) |
case 2: |
tmp = gen_reg_rtx (mode); |
- emit_insn (gen_sse_unpckhps (tmp, vec, vec)); |
+ emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); |
break; |
default: |
@@ -28303,7 +27909,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) |
case 2: |
tmp = gen_reg_rtx (mode); |
- emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); |
+ emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); |
break; |
default: |
@@ -28380,8 +27986,8 @@ ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) |
emit_insn (fn (tmp2, tmp1, in)); |
emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, |
- GEN_INT (1), GEN_INT (1), |
- GEN_INT (1+4), GEN_INT (1+4))); |
+ const1_rtx, const1_rtx, |
+ GEN_INT (1+4), GEN_INT (1+4))); |
emit_insn (fn (dest, tmp2, tmp3)); |
} |
@@ -28390,7 +27996,7 @@ static bool |
ix86_scalar_mode_supported_p (enum machine_mode mode) |
{ |
if (DECIMAL_FLOAT_MODE_P (mode)) |
- return true; |
+ return default_decimal_float_supported_p (); |
else if (mode == TFmode) |
return true; |
else |
@@ -28483,22 +28089,22 @@ output_387_reg_move (rtx insn, rtx *operands) |
return "fstp\t%y0"; |
} |
if (STACK_TOP_P (operands[0])) |
- return "fld%z1\t%y1"; |
+ return "fld%Z1\t%y1"; |
return "fst\t%y0"; |
} |
else if (MEM_P (operands[0])) |
{ |
gcc_assert (REG_P (operands[1])); |
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) |
- return "fstp%z0\t%y0"; |
+ return "fstp%Z0\t%y0"; |
else |
{ |
/* There is no non-popping store to memory for XFmode. |
So if we need one, follow the store with a load. */ |
if (GET_MODE (operands[0]) == XFmode) |
- return "fstp%z0\t%y0\n\tfld%z0\t%y0"; |
+ return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; |
else |
- return "fst%z0\t%y0"; |
+ return "fst%Z0\t%y0"; |
} |
} |
else |
@@ -28549,13 +28155,14 @@ void ix86_emit_i387_log1p (rtx op0, rtx op1) |
rtx tmp = gen_reg_rtx (XFmode); |
rtx tmp2 = gen_reg_rtx (XFmode); |
+ rtx test; |
emit_insn (gen_absxf2 (tmp, op1)); |
- emit_insn (gen_cmpxf (tmp, |
+ test = gen_rtx_GE (VOIDmode, tmp, |
CONST_DOUBLE_FROM_REAL_VALUE ( |
REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), |
- XFmode))); |
- emit_jump_insn (gen_bge (label1)); |
+ XFmode)); |
+ emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1)); |
emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ |
emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2)); |
@@ -28595,18 +28202,18 @@ void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) |
emit_insn (gen_rtx_SET (VOIDmode, x0, |
gen_rtx_UNSPEC (mode, gen_rtvec (1, b), |
UNSPEC_RCP))); |
- /* e0 = x0 * b */ |
+ /* e0 = x0 * a */ |
emit_insn (gen_rtx_SET (VOIDmode, e0, |
- gen_rtx_MULT (mode, x0, b))); |
- /* e1 = 2. - e0 */ |
+ gen_rtx_MULT (mode, x0, a))); |
+ /* e1 = x0 * b */ |
emit_insn (gen_rtx_SET (VOIDmode, e1, |
- gen_rtx_MINUS (mode, two, e0))); |
- /* x1 = x0 * e1 */ |
+ gen_rtx_MULT (mode, x0, b))); |
+ /* x1 = 2. - e1 */ |
emit_insn (gen_rtx_SET (VOIDmode, x1, |
- gen_rtx_MULT (mode, x0, e1))); |
- /* res = a * x1 */ |
+ gen_rtx_MINUS (mode, two, e1))); |
+ /* res = e0 * x1 */ |
emit_insn (gen_rtx_SET (VOIDmode, res, |
- gen_rtx_MULT (mode, a, x1))); |
+ gen_rtx_MULT (mode, e0, x1))); |
} |
/* Output code to perform a Newton-Rhapson approximation of a |
@@ -29343,277 +28950,1260 @@ ix86_expand_round (rtx operand0, rtx operand1) |
emit_move_insn (operand0, res); |
} |
- |
-/* Validate whether a SSE5 instruction is valid or not. |
- OPERANDS is the array of operands. |
- NUM is the number of operands. |
- USES_OC0 is true if the instruction uses OC0 and provides 4 variants. |
- NUM_MEMORY is the maximum number of memory operands to accept. |
- when COMMUTATIVE is set, operand 1 and 2 can be swapped. */ |
-bool |
-ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, |
- bool uses_oc0, int num_memory, bool commutative) |
+/* Table of valid machine attributes. */ |
+static const struct attribute_spec ix86_attribute_table[] = |
{ |
- int mem_mask; |
- int mem_count; |
- int i; |
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
+ /* Stdcall attribute says callee is responsible for popping arguments |
+ if they are not variable. */ |
+ { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
+ /* Fastcall attribute says callee is responsible for popping arguments |
+ if they are not variable. */ |
+ { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
+ /* Cdecl attribute says the callee is a normal C declaration */ |
+ { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
+ /* Regparm attribute specifies how many integer arguments are to be |
+ passed in registers. */ |
+ { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, |
+ /* Sseregparm attribute says we are using x86_64 calling conventions |
+ for FP arguments. */ |
+ { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
+ /* force_align_arg_pointer says this function realigns the stack at entry. */ |
+ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, |
+ false, true, true, ix86_handle_cconv_attribute }, |
+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, |
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
+ { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, |
+#endif |
+ { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
+ { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
+#ifdef SUBTARGET_ATTRIBUTE_TABLE |
+ SUBTARGET_ATTRIBUTE_TABLE, |
+#endif |
+ /* ms_abi and sysv_abi calling convention function attributes. */ |
+ { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, |
+ { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, |
+ { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute }, |
+ /* End element. */ |
+ { NULL, 0, 0, false, false, false, NULL } |
+}; |
- /* Count the number of memory arguments */ |
- mem_mask = 0; |
- mem_count = 0; |
- for (i = 0; i < num; i++) |
+/* Implement targetm.vectorize.builtin_vectorization_cost. */ |
+static int |
+ix86_builtin_vectorization_cost (bool runtime_test) |
+{ |
+ /* If the branch of the runtime test is taken - i.e. - the vectorized |
+ version is skipped - this incurs a misprediction cost (because the |
+ vectorized version is expected to be the fall-through). So we subtract |
+ the latency of a mispredicted branch from the costs that are incured |
+ when the vectorized version is executed. |
+ |
+ TODO: The values in individual target tables have to be tuned or new |
+ fields may be needed. For eg. on K8, the default branch path is the |
+ not-taken path. If the taken path is predicted correctly, the minimum |
+ penalty of going down the taken-path is 1 cycle. If the taken-path is |
+ not predicted correctly, then the minimum penalty is 10 cycles. */ |
+ |
+ if (runtime_test) |
{ |
- enum machine_mode mode = GET_MODE (operands[i]); |
- if (register_operand (operands[i], mode)) |
- ; |
+ return (-(ix86_cost->cond_taken_branch_cost)); |
+ } |
+ else |
+ return 0; |
+} |
+ |
+/* Implement targetm.vectorize.builtin_vec_perm. */ |
+ |
+static tree |
+ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type) |
+{ |
+ tree itype = TREE_TYPE (vec_type); |
+ bool u = TYPE_UNSIGNED (itype); |
+ enum machine_mode vmode = TYPE_MODE (vec_type); |
+ enum ix86_builtins fcode = fcode; /* Silence bogus warning. */ |
+ bool ok = TARGET_SSE2; |
+ |
+ switch (vmode) |
+ { |
+ case V4DFmode: |
+ ok = TARGET_AVX; |
+ fcode = IX86_BUILTIN_VEC_PERM_V4DF; |
+ goto get_di; |
+ case V2DFmode: |
+ fcode = IX86_BUILTIN_VEC_PERM_V2DF; |
+ get_di: |
+ itype = ix86_get_builtin_type (IX86_BT_DI); |
+ break; |
+ |
+ case V8SFmode: |
+ ok = TARGET_AVX; |
+ fcode = IX86_BUILTIN_VEC_PERM_V8SF; |
+ goto get_si; |
+ case V4SFmode: |
+ ok = TARGET_SSE; |
+ fcode = IX86_BUILTIN_VEC_PERM_V4SF; |
+ get_si: |
+ itype = ix86_get_builtin_type (IX86_BT_SI); |
+ break; |
+ |
+ case V2DImode: |
+ fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI; |
+ break; |
+ case V4SImode: |
+ fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI; |
+ break; |
+ case V8HImode: |
+ fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI; |
+ break; |
+ case V16QImode: |
+ fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI; |
+ break; |
+ default: |
+ ok = false; |
+ break; |
+ } |
+ |
+ if (!ok) |
+ return NULL_TREE; |
+ |
+ *mask_type = itype; |
+ return ix86_builtins[(int) fcode]; |
+} |
+ |
+/* Return a vector mode with twice as many elements as VMODE. */ |
+/* ??? Consider moving this to a table generated by genmodes.c. */ |
+ |
+static enum machine_mode |
+doublesize_vector_mode (enum machine_mode vmode) |
+{ |
+ switch (vmode) |
+ { |
+ case V2SFmode: return V4SFmode; |
+ case V1DImode: return V2DImode; |
+ case V2SImode: return V4SImode; |
+ case V4HImode: return V8HImode; |
+ case V8QImode: return V16QImode; |
+ |
+ case V2DFmode: return V4DFmode; |
+ case V4SFmode: return V8SFmode; |
+ case V2DImode: return V4DImode; |
+ case V4SImode: return V8SImode; |
+ case V8HImode: return V16HImode; |
+ case V16QImode: return V32QImode; |
+ |
+ case V4DFmode: return V8DFmode; |
+ case V8SFmode: return V16SFmode; |
+ case V4DImode: return V8DImode; |
+ case V8SImode: return V16SImode; |
+ case V16HImode: return V32HImode; |
+ case V32QImode: return V64QImode; |
+ |
+ default: |
+ gcc_unreachable (); |
+ } |
+} |
+ |
+/* Construct (set target (vec_select op0 (parallel perm))) and |
+ return true if that's a valid instruction in the active ISA. */ |
+ |
+static bool |
+expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt) |
+{ |
+ rtx rperm[MAX_VECT_LEN], x; |
+ unsigned i; |
+ |
+ for (i = 0; i < nelt; ++i) |
+ rperm[i] = GEN_INT (perm[i]); |
+ |
+ x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm)); |
+ x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x); |
+ x = gen_rtx_SET (VOIDmode, target, x); |
+ |
+ x = emit_insn (x); |
+ if (recog_memoized (x) < 0) |
+ { |
+ remove_insn (x); |
+ return false; |
+ } |
+ return true; |
+} |
+ |
+/* Similar, but generate a vec_concat from op0 and op1 as well. */ |
+ |
+static bool |
+expand_vselect_vconcat (rtx target, rtx op0, rtx op1, |
+ const unsigned char *perm, unsigned nelt) |
+{ |
+ enum machine_mode v2mode; |
+ rtx x; |
+ |
+ v2mode = doublesize_vector_mode (GET_MODE (op0)); |
+ x = gen_rtx_VEC_CONCAT (v2mode, op0, op1); |
+ return expand_vselect (target, x, perm, nelt); |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D |
+ in terms of blendp[sd] / pblendw / pblendvb. */ |
+ |
+static bool |
+expand_vec_perm_blend (struct expand_vec_perm_d *d) |
+{ |
+ enum machine_mode vmode = d->vmode; |
+ unsigned i, mask, nelt = d->nelt; |
+ rtx target, op0, op1, x; |
- else if (memory_operand (operands[i], mode)) |
+ if (!TARGET_SSE4_1 || d->op0 == d->op1) |
+ return false; |
+ if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode)) |
+ return false; |
+ |
+ /* This is a blend, not a permute. Elements must stay in their |
+ respective lanes. */ |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned e = d->perm[i]; |
+ if (!(e == i || e == i + nelt)) |
+ return false; |
+ } |
+ |
+ if (d->testing_p) |
+ return true; |
+ |
+ /* ??? Without SSE4.1, we could implement this with and/andn/or. This |
+ decision should be extracted elsewhere, so that we only try that |
+ sequence once all budget==3 options have been tried. */ |
+ |
+ /* For bytes, see if bytes move in pairs so we can use pblendw with |
+ an immediate argument, rather than pblendvb with a vector argument. */ |
+ if (vmode == V16QImode) |
+ { |
+ bool pblendw_ok = true; |
+ for (i = 0; i < 16 && pblendw_ok; i += 2) |
+ pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]); |
+ |
+ if (!pblendw_ok) |
{ |
- mem_mask |= (1 << i); |
- mem_count++; |
+ rtx rperm[16], vperm; |
+ |
+ for (i = 0; i < nelt; ++i) |
+ rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); |
+ |
+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm)); |
+ vperm = force_reg (V16QImode, vperm); |
+ |
+ emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm)); |
+ return true; |
} |
+ } |
- else |
+ target = d->target; |
+ op0 = d->op0; |
+ op1 = d->op1; |
+ mask = 0; |
+ |
+ switch (vmode) |
+ { |
+ case V4DFmode: |
+ case V8SFmode: |
+ case V2DFmode: |
+ case V4SFmode: |
+ case V8HImode: |
+ for (i = 0; i < nelt; ++i) |
+ mask |= (d->perm[i] >= nelt) << i; |
+ break; |
+ |
+ case V2DImode: |
+ for (i = 0; i < 2; ++i) |
+ mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); |
+ goto do_subreg; |
+ |
+ case V4SImode: |
+ for (i = 0; i < 4; ++i) |
+ mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); |
+ goto do_subreg; |
+ |
+ case V16QImode: |
+ for (i = 0; i < 8; ++i) |
+ mask |= (d->perm[i * 2] >= 16) << i; |
+ |
+ do_subreg: |
+ vmode = V8HImode; |
+ target = gen_lowpart (vmode, target); |
+ op0 = gen_lowpart (vmode, op0); |
+ op1 = gen_lowpart (vmode, op1); |
+ break; |
+ |
+ default: |
+ gcc_unreachable (); |
+ } |
+ |
+ /* This matches five different patterns with the different modes. */ |
+ x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); |
+ x = gen_rtx_SET (VOIDmode, target, x); |
+ emit_insn (x); |
+ |
+ return true; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D |
+ in terms of the variable form of vpermilps. |
+ |
+ Note that we will have already failed the immediate input vpermilps, |
+ which requires that the high and low part shuffle be identical; the |
+ variable form doesn't require that. */ |
+ |
+static bool |
+expand_vec_perm_vpermil (struct expand_vec_perm_d *d) |
+{ |
+ rtx rperm[8], vperm; |
+ unsigned i; |
+ |
+ if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1) |
+ return false; |
+ |
+ /* We can only permute within the 128-bit lane. */ |
+ for (i = 0; i < 8; ++i) |
+ { |
+ unsigned e = d->perm[i]; |
+ if (i < 4 ? e >= 4 : e < 4) |
+ return false; |
+ } |
+ |
+ if (d->testing_p) |
+ return true; |
+ |
+ for (i = 0; i < 8; ++i) |
+ { |
+ unsigned e = d->perm[i]; |
+ |
+ /* Within each 128-bit lane, the elements of op0 are numbered |
+ from 0 and the elements of op1 are numbered from 4. */ |
+ if (e >= 8 + 4) |
+ e -= 8; |
+ else if (e >= 4) |
+ e -= 4; |
+ |
+ rperm[i] = GEN_INT (e); |
+ } |
+ |
+ vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); |
+ vperm = force_reg (V8SImode, vperm); |
+ emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); |
+ |
+ return true; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D |
+ in terms of pshufb or vpperm. */ |
+ |
+static bool |
+expand_vec_perm_pshufb (struct expand_vec_perm_d *d) |
+{ |
+ unsigned i, nelt, eltsz; |
+ rtx rperm[16], vperm, target, op0, op1; |
+ |
+ if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP)) |
+ return false; |
+ if (GET_MODE_SIZE (d->vmode) != 16) |
+ return false; |
+ |
+ if (d->testing_p) |
+ return true; |
+ |
+ nelt = d->nelt; |
+ eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode)); |
+ |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned j, e = d->perm[i]; |
+ for (j = 0; j < eltsz; ++j) |
+ rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); |
+ } |
+ |
+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm)); |
+ vperm = force_reg (V16QImode, vperm); |
+ |
+ target = gen_lowpart (V16QImode, d->target); |
+ op0 = gen_lowpart (V16QImode, d->op0); |
+ if (d->op0 == d->op1) |
+ emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); |
+ else |
+ { |
+ op1 = gen_lowpart (V16QImode, d->op1); |
+ emit_insn (gen_xop_pperm (target, op0, op1, vperm)); |
+ } |
+ |
+ return true; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D |
+ in a single instruction. */ |
+ |
+static bool |
+expand_vec_perm_1 (struct expand_vec_perm_d *d) |
+{ |
+ unsigned i, nelt = d->nelt; |
+ unsigned char perm2[MAX_VECT_LEN]; |
+ |
+ /* Check plain VEC_SELECT first, because AVX has instructions that could |
+ match both SEL and SEL+CONCAT, but the plain SEL will allow a memory |
+ input where SEL+CONCAT may not. */ |
+ if (d->op0 == d->op1) |
+ { |
+ int mask = nelt - 1; |
+ |
+ for (i = 0; i < nelt; i++) |
+ perm2[i] = d->perm[i] & mask; |
+ |
+ if (expand_vselect (d->target, d->op0, perm2, nelt)) |
+ return true; |
+ |
+ /* There are plenty of patterns in sse.md that are written for |
+ SEL+CONCAT and are not replicated for a single op. Perhaps |
+ that should be changed, to avoid the nastiness here. */ |
+ |
+ /* Recognize interleave style patterns, which means incrementing |
+ every other permutation operand. */ |
+ for (i = 0; i < nelt; i += 2) |
{ |
- rtx pattern = PATTERN (insn); |
+ perm2[i] = d->perm[i] & mask; |
+ perm2[i + 1] = (d->perm[i + 1] & mask) + nelt; |
+ } |
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) |
+ return true; |
- /* allow 0 for pcmov */ |
- if (GET_CODE (pattern) != SET |
- || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE |
- || i < 2 |
- || operands[i] != CONST0_RTX (mode)) |
- return false; |
+ /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ |
+ if (nelt >= 4) |
+ { |
+ for (i = 0; i < nelt; i += 4) |
+ { |
+ perm2[i + 0] = d->perm[i + 0] & mask; |
+ perm2[i + 1] = d->perm[i + 1] & mask; |
+ perm2[i + 2] = (d->perm[i + 2] & mask) + nelt; |
+ perm2[i + 3] = (d->perm[i + 3] & mask) + nelt; |
+ } |
+ |
+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) |
+ return true; |
} |
} |
- /* Special case pmacsdq{l,h} where we allow the 3rd argument to be |
- a memory operation. */ |
- if (num_memory < 0) |
+ /* Finally, try the fully general two operand permute. */ |
+ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt)) |
+ return true; |
+ |
+ /* Recognize interleave style patterns with reversed operands. */ |
+ if (d->op0 != d->op1) |
{ |
- num_memory = -num_memory; |
- if ((mem_mask & (1 << (num-1))) != 0) |
+ for (i = 0; i < nelt; ++i) |
{ |
- mem_mask &= ~(1 << (num-1)); |
- mem_count--; |
+ unsigned e = d->perm[i]; |
+ if (e >= nelt) |
+ e -= nelt; |
+ else |
+ e += nelt; |
+ perm2[i] = e; |
} |
+ |
+ if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt)) |
+ return true; |
} |
- /* If there were no memory operations, allow the insn */ |
- if (mem_mask == 0) |
+ /* Try the SSE4.1 blend variable merge instructions. */ |
+ if (expand_vec_perm_blend (d)) |
+ return true; |
+ |
+ /* Try one of the AVX vpermil variable permutations. */ |
+ if (expand_vec_perm_vpermil (d)) |
return true; |
- /* Do not allow the destination register to be a memory operand. */ |
- else if (mem_mask & (1 << 0)) |
+ /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */ |
+ if (expand_vec_perm_pshufb (d)) |
+ return true; |
+ |
+ return false; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D |
+ in terms of a pair of pshuflw + pshufhw instructions. */ |
+ |
+static bool |
+expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) |
+{ |
+ unsigned char perm2[MAX_VECT_LEN]; |
+ unsigned i; |
+ bool ok; |
+ |
+ if (d->vmode != V8HImode || d->op0 != d->op1) |
return false; |
- /* If there are too many memory operations, disallow the instruction. While |
- the hardware only allows 1 memory reference, before register allocation |
- for some insns, we allow two memory operations sometimes in order to allow |
- code like the following to be optimized: |
+ /* The two permutations only operate in 64-bit lanes. */ |
+ for (i = 0; i < 4; ++i) |
+ if (d->perm[i] >= 4) |
+ return false; |
+ for (i = 4; i < 8; ++i) |
+ if (d->perm[i] < 4) |
+ return false; |
- float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } |
+ if (d->testing_p) |
+ return true; |
- or similar cases that are vectorized into using the fmaddss |
- instruction. */ |
- else if (mem_count > num_memory) |
+ /* Emit the pshuflw. */ |
+ memcpy (perm2, d->perm, 4); |
+ for (i = 4; i < 8; ++i) |
+ perm2[i] = i; |
+ ok = expand_vselect (d->target, d->op0, perm2, 8); |
+ gcc_assert (ok); |
+ |
+ /* Emit the pshufhw. */ |
+ memcpy (perm2 + 4, d->perm + 4, 4); |
+ for (i = 0; i < 4; ++i) |
+ perm2[i] = i; |
+ ok = expand_vselect (d->target, d->target, perm2, 8); |
+ gcc_assert (ok); |
+ |
+ return true; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify |
+ the permutation using the SSSE3 palignr instruction. This succeeds |
+ when all of the elements in PERM fit within one vector and we merely |
+ need to shift them down so that a single vector permutation has a |
+ chance to succeed. */ |
+ |
+static bool |
+expand_vec_perm_palignr (struct expand_vec_perm_d *d) |
+{ |
+ unsigned i, nelt = d->nelt; |
+ unsigned min, max; |
+ bool in_order, ok; |
+ rtx shift; |
+ |
+ /* Even with AVX, palignr only operates on 128-bit vectors. */ |
+ if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) |
return false; |
- /* Don't allow more than one memory operation if not optimizing. */ |
- else if (mem_count > 1 && !optimize) |
+ min = nelt, max = 0; |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned e = d->perm[i]; |
+ if (e < min) |
+ min = e; |
+ if (e > max) |
+ max = e; |
+ } |
+ if (min == 0 || max - min >= nelt) |
return false; |
- else if (num == 4 && mem_count == 1) |
- { |
- /* formats (destination is the first argument), example fmaddss: |
- xmm1, xmm1, xmm2, xmm3/mem |
- xmm1, xmm1, xmm2/mem, xmm3 |
- xmm1, xmm2, xmm3/mem, xmm1 |
- xmm1, xmm2/mem, xmm3, xmm1 */ |
- if (uses_oc0) |
- return ((mem_mask == (1 << 1)) |
- || (mem_mask == (1 << 2)) |
- || (mem_mask == (1 << 3))); |
- |
- /* format, example pmacsdd: |
- xmm1, xmm2, xmm3/mem, xmm1 */ |
- if (commutative) |
- return (mem_mask == (1 << 2) || mem_mask == (1 << 1)); |
- else |
- return (mem_mask == (1 << 2)); |
+ /* Given that we have SSSE3, we know we'll be able to implement the |
+ single operand permutation after the palignr with pshufb. */ |
+ if (d->testing_p) |
+ return true; |
+ |
+ shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode))); |
+ emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target), |
+ gen_lowpart (TImode, d->op1), |
+ gen_lowpart (TImode, d->op0), shift)); |
+ |
+ d->op0 = d->op1 = d->target; |
+ |
+ in_order = true; |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned e = d->perm[i] - min; |
+ if (e != i) |
+ in_order = false; |
+ d->perm[i] = e; |
} |
- else if (num == 4 && num_memory == 2) |
+ /* Test for the degenerate case where the alignment by itself |
+ produces the desired permutation. */ |
+ if (in_order) |
+ return true; |
+ |
+ ok = expand_vec_perm_1 (d); |
+ gcc_assert (ok); |
+ |
+ return ok; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify |
+ a two vector permutation into a single vector permutation by using |
+ an interleave operation to merge the vectors. */ |
+ |
+static bool |
+expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) |
+{ |
+ struct expand_vec_perm_d dremap, dfinal; |
+ unsigned i, nelt = d->nelt, nelt2 = nelt / 2; |
+ unsigned contents, h1, h2, h3, h4; |
+ unsigned char remap[2 * MAX_VECT_LEN]; |
+ rtx seq; |
+ bool ok; |
+ |
+ if (d->op0 == d->op1) |
+ return false; |
+ |
+ /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit |
+ lanes. We can use similar techniques with the vperm2f128 instruction, |
+ but it requires slightly different logic. */ |
+ if (GET_MODE_SIZE (d->vmode) != 16) |
+ return false; |
+ |
+ /* Examine from whence the elements come. */ |
+ contents = 0; |
+ for (i = 0; i < nelt; ++i) |
+ contents |= 1u << d->perm[i]; |
+ |
+ /* Split the two input vectors into 4 halves. */ |
+ h1 = (1u << nelt2) - 1; |
+ h2 = h1 << nelt2; |
+ h3 = h2 << nelt2; |
+ h4 = h3 << nelt2; |
+ |
+ memset (remap, 0xff, sizeof (remap)); |
+ dremap = *d; |
+ |
+ /* If the elements from the low halves use interleave low, and similarly |
+ for interleave high. If the elements are from mis-matched halves, we |
+ can use shufps for V4SF/V4SI or do a DImode shuffle. */ |
+ if ((contents & (h1 | h3)) == contents) |
{ |
- /* If there are two memory operations, we can load one of the memory ops |
- into the destination register. This is for optimizing the |
- multiply/add ops, which the combiner has optimized both the multiply |
- and the add insns to have a memory operation. We have to be careful |
- that the destination doesn't overlap with the inputs. */ |
- rtx op0 = operands[0]; |
+ for (i = 0; i < nelt2; ++i) |
+ { |
+ remap[i] = i * 2; |
+ remap[i + nelt] = i * 2 + 1; |
+ dremap.perm[i * 2] = i; |
+ dremap.perm[i * 2 + 1] = i + nelt; |
+ } |
+ } |
+ else if ((contents & (h2 | h4)) == contents) |
+ { |
+ for (i = 0; i < nelt2; ++i) |
+ { |
+ remap[i + nelt2] = i * 2; |
+ remap[i + nelt + nelt2] = i * 2 + 1; |
+ dremap.perm[i * 2] = i + nelt2; |
+ dremap.perm[i * 2 + 1] = i + nelt + nelt2; |
+ } |
+ } |
+ else if ((contents & (h1 | h4)) == contents) |
+ { |
+ for (i = 0; i < nelt2; ++i) |
+ { |
+ remap[i] = i; |
+ remap[i + nelt + nelt2] = i + nelt2; |
+ dremap.perm[i] = i; |
+ dremap.perm[i + nelt2] = i + nelt + nelt2; |
+ } |
+ if (nelt != 4) |
+ { |
+ dremap.vmode = V2DImode; |
+ dremap.nelt = 2; |
+ dremap.perm[0] = 0; |
+ dremap.perm[1] = 3; |
+ } |
+ } |
+ else if ((contents & (h2 | h3)) == contents) |
+ { |
+ for (i = 0; i < nelt2; ++i) |
+ { |
+ remap[i + nelt2] = i; |
+ remap[i + nelt] = i + nelt2; |
+ dremap.perm[i] = i + nelt2; |
+ dremap.perm[i + nelt2] = i + nelt; |
+ } |
+ if (nelt != 4) |
+ { |
+ dremap.vmode = V2DImode; |
+ dremap.nelt = 2; |
+ dremap.perm[0] = 1; |
+ dremap.perm[1] = 2; |
+ } |
+ } |
+ else |
+ return false; |
- if (reg_mentioned_p (op0, operands[1]) |
- || reg_mentioned_p (op0, operands[2]) |
- || reg_mentioned_p (op0, operands[3])) |
- return false; |
+ /* Use the remapping array set up above to move the elements from their |
+ swizzled locations into their final destinations. */ |
+ dfinal = *d; |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned e = remap[d->perm[i]]; |
+ gcc_assert (e < nelt); |
+ dfinal.perm[i] = e; |
+ } |
+ dfinal.op0 = gen_reg_rtx (dfinal.vmode); |
+ dfinal.op1 = dfinal.op0; |
+ dremap.target = dfinal.op0; |
- /* formats (destination is the first argument), example fmaddss: |
- xmm1, xmm1, xmm2, xmm3/mem |
- xmm1, xmm1, xmm2/mem, xmm3 |
- xmm1, xmm2, xmm3/mem, xmm1 |
- xmm1, xmm2/mem, xmm3, xmm1 |
+ /* Test if the final remap can be done with a single insn. For V4SFmode or |
+ V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ |
+ start_sequence (); |
+ ok = expand_vec_perm_1 (&dfinal); |
+ seq = get_insns (); |
+ end_sequence (); |
- For the oc0 case, we will load either operands[1] or operands[3] into |
- operands[0], so any combination of 2 memory operands is ok. */ |
- if (uses_oc0) |
- return true; |
+ if (!ok) |
+ return false; |
- /* format, example pmacsdd: |
- xmm1, xmm2, xmm3/mem, xmm1 |
+ if (dremap.vmode != dfinal.vmode) |
+ { |
+ dremap.target = gen_lowpart (dremap.vmode, dremap.target); |
+ dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); |
+ dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); |
+ } |
- For the integer multiply/add instructions be more restrictive and |
- require operands[2] and operands[3] to be the memory operands. */ |
- if (commutative) |
- return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3))); |
- else |
- return (mem_mask == ((1 << 2) | (1 << 3))); |
+ ok = expand_vec_perm_1 (&dremap); |
+ gcc_assert (ok); |
+ |
+ emit_insn (seq); |
+ return true; |
+} |
+ |
+/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word |
+ permutation with two pshufb insns and an ior. We should have already |
+ failed all two instruction sequences. */ |
+ |
+static bool |
+expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) |
+{ |
+ rtx rperm[2][16], vperm, l, h, op, m128; |
+ unsigned int i, nelt, eltsz; |
+ |
+ if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) |
+ return false; |
+ gcc_assert (d->op0 != d->op1); |
+ |
+ nelt = d->nelt; |
+ eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode)); |
+ |
+ /* Generate two permutation masks. If the required element is within |
+ the given vector it is shuffled into the proper lane. If the required |
+ element is in the other vector, force a zero into the lane by setting |
+ bit 7 in the permutation mask. */ |
+ m128 = GEN_INT (-128); |
+ for (i = 0; i < nelt; ++i) |
+ { |
+ unsigned j, e = d->perm[i]; |
+ unsigned which = (e >= nelt); |
+ if (e >= nelt) |
+ e -= nelt; |
+ |
+ for (j = 0; j < eltsz; ++j) |
+ { |
+ rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); |
+ rperm[1-which][i*eltsz + j] = m128; |
+ } |
} |
- else if (num == 3 && num_memory == 1) |
+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); |
+ vperm = force_reg (V16QImode, vperm); |
+ |
+ l = gen_reg_rtx (V16QImode); |
+ op = gen_lowpart (V16QImode, d->op0); |
+ emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); |
+ |
+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); |
+ vperm = force_reg (V16QImode, vperm); |
+ |
+ h = gen_reg_rtx (V16QImode); |
+ op = gen_lowpart (V16QImode, d->op1); |
+ emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); |
+ |
+ op = gen_lowpart (V16QImode, d->target); |
+ emit_insn (gen_iorv16qi3 (op, l, h)); |
+ |
+ return true; |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even |
+ and extract-odd permutations. */ |
+ |
+static bool |
+expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) |
+{ |
+ rtx t1, t2, t3, t4; |
+ |
+ switch (d->vmode) |
{ |
- /* formats, example protb: |
- xmm1, xmm2, xmm3/mem |
- xmm1, xmm2/mem, xmm3 */ |
- if (uses_oc0) |
- return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); |
+ case V4DFmode: |
+ t1 = gen_reg_rtx (V4DFmode); |
+ t2 = gen_reg_rtx (V4DFmode); |
+ |
+ /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ |
+ emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); |
+ emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); |
+ |
+ /* Now an unpck[lh]pd will produce the result required. */ |
+ if (odd) |
+ t3 = gen_avx_unpckhpd256 (d->target, t1, t2); |
+ else |
+ t3 = gen_avx_unpcklpd256 (d->target, t1, t2); |
+ emit_insn (t3); |
+ break; |
+ |
+ case V8SFmode: |
+ { |
+ static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 }; |
+ static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 }; |
+ static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 }; |
+ |
+ t1 = gen_reg_rtx (V8SFmode); |
+ t2 = gen_reg_rtx (V8SFmode); |
+ t3 = gen_reg_rtx (V8SFmode); |
+ t4 = gen_reg_rtx (V8SFmode); |
+ |
+ /* Shuffle within the 128-bit lanes to produce: |
+ { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */ |
+ expand_vselect (t1, d->op0, perm1, 8); |
+ expand_vselect (t2, d->op1, perm1, 8); |
+ |
+ /* Shuffle the lanes around to produce: |
+ { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ |
+ emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20))); |
+ emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31))); |
+ |
+ /* Now a vpermil2p will produce the result required. */ |
+ /* ??? The vpermil2p requires a vector constant. Another option |
+ is a unpck[lh]ps to merge the two vectors to produce |
+ { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another |
+ vpermilps to get the elements into the final order. */ |
+ d->op0 = t3; |
+ d->op1 = t4; |
+ memcpy (d->perm, odd ? permo: perme, 8); |
+ expand_vec_perm_vpermil (d); |
+ } |
+ break; |
+ |
+ case V2DFmode: |
+ case V4SFmode: |
+ case V2DImode: |
+ case V4SImode: |
+ /* These are always directly implementable by expand_vec_perm_1. */ |
+ gcc_unreachable (); |
+ |
+ case V8HImode: |
+ if (TARGET_SSSE3) |
+ return expand_vec_perm_pshufb2 (d); |
+ else |
+ { |
+ /* We need 2*log2(N)-1 operations to achieve odd/even |
+ with interleave. */ |
+ t1 = gen_reg_rtx (V8HImode); |
+ t2 = gen_reg_rtx (V8HImode); |
+ emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); |
+ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); |
+ emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); |
+ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); |
+ if (odd) |
+ t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); |
+ else |
+ t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); |
+ emit_insn (t3); |
+ } |
+ break; |
- /* format, example comeq: |
- xmm1, xmm2, xmm3/mem */ |
+ case V16QImode: |
+ if (TARGET_SSSE3) |
+ return expand_vec_perm_pshufb2 (d); |
else |
- return (mem_mask == (1 << 2)); |
+ { |
+ t1 = gen_reg_rtx (V16QImode); |
+ t2 = gen_reg_rtx (V16QImode); |
+ t3 = gen_reg_rtx (V16QImode); |
+ emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1)); |
+ emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1)); |
+ emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1)); |
+ emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1)); |
+ emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2)); |
+ emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2)); |
+ if (odd) |
+ t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3); |
+ else |
+ t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3); |
+ emit_insn (t3); |
+ } |
+ break; |
+ |
+ default: |
+ gcc_unreachable (); |
} |
- else |
- gcc_unreachable (); |
+ return true; |
+} |
- return false; |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match |
+ extract-even and extract-odd permutations. */ |
+ |
+static bool |
+expand_vec_perm_even_odd (struct expand_vec_perm_d *d) |
+{ |
+ unsigned i, odd, nelt = d->nelt; |
+ |
+ odd = d->perm[0]; |
+ if (odd != 0 && odd != 1) |
+ return false; |
+ |
+ for (i = 1; i < nelt; ++i) |
+ if (d->perm[i] != 2 * i + odd) |
+ return false; |
+ |
+ return expand_vec_perm_even_odd_1 (d, odd); |
} |
- |
-/* Fixup an SSE5 instruction that has 2 memory input references into a form the |
- hardware will allow by using the destination register to load one of the |
- memory operations. Presently this is used by the multiply/add routines to |
- allow 2 memory references. */ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast |
+ permutations. We assume that expand_vec_perm_1 has already failed. */ |
-void |
-ix86_expand_sse5_multiple_memory (rtx operands[], |
- int num, |
- enum machine_mode mode) |
-{ |
- rtx op0 = operands[0]; |
- if (num != 4 |
- || memory_operand (op0, mode) |
- || reg_mentioned_p (op0, operands[1]) |
- || reg_mentioned_p (op0, operands[2]) |
- || reg_mentioned_p (op0, operands[3])) |
- gcc_unreachable (); |
+static bool |
+expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) |
+{ |
+ unsigned elt = d->perm[0], nelt2 = d->nelt / 2; |
+ enum machine_mode vmode = d->vmode; |
+ unsigned char perm2[4]; |
+ rtx op0 = d->op0; |
+ bool ok; |
- /* For 2 memory operands, pick either operands[1] or operands[3] to move into |
- the destination register. */ |
- if (memory_operand (operands[1], mode)) |
+ switch (vmode) |
{ |
- emit_move_insn (op0, operands[1]); |
- operands[1] = op0; |
+ case V4DFmode: |
+ case V8SFmode: |
+ /* These are special-cased in sse.md so that we can optionally |
+ use the vbroadcast instruction. They expand to two insns |
+ if the input happens to be in a register. */ |
+ gcc_unreachable (); |
+ |
+ case V2DFmode: |
+ case V2DImode: |
+ case V4SFmode: |
+ case V4SImode: |
+ /* These are always implementable using standard shuffle patterns. */ |
+ gcc_unreachable (); |
+ |
+ case V8HImode: |
+ case V16QImode: |
+ /* These can be implemented via interleave. We save one insn by |
+ stopping once we have promoted to V4SImode and then use pshufd. */ |
+ do |
+ { |
+ optab otab = vec_interleave_low_optab; |
+ |
+ if (elt >= nelt2) |
+ { |
+ otab = vec_interleave_high_optab; |
+ elt -= nelt2; |
+ } |
+ nelt2 /= 2; |
+ |
+ op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT); |
+ vmode = get_mode_wider_vector (vmode); |
+ op0 = gen_lowpart (vmode, op0); |
+ } |
+ while (vmode != V4SImode); |
+ |
+ memset (perm2, elt, 4); |
+ ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4); |
+ gcc_assert (ok); |
+ return true; |
+ |
+ default: |
+ gcc_unreachable (); |
} |
- else if (memory_operand (operands[3], mode)) |
+} |
+ |
+/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match |
+ broadcast permutations. */ |
+ |
+static bool |
+expand_vec_perm_broadcast (struct expand_vec_perm_d *d) |
+{ |
+ unsigned i, elt, nelt = d->nelt; |
+ |
+ if (d->op0 != d->op1) |
+ return false; |
+ |
+ elt = d->perm[0]; |
+ for (i = 1; i < nelt; ++i) |
+ if (d->perm[i] != elt) |
+ return false; |
+ |
+ return expand_vec_perm_broadcast_1 (d); |
+} |
+ |
+/* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook. |
+ With all of the interface bits taken care of, perform the expansion |
+ in D and return true on success. */ |
+ |
+static bool |
+ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d) |
+{ |
+ /* Try a single instruction expansion. */ |
+ if (expand_vec_perm_1 (d)) |
+ return true; |
+ |
+ /* Try sequences of two instructions. */ |
+ |
+ if (expand_vec_perm_pshuflw_pshufhw (d)) |
+ return true; |
+ |
+ if (expand_vec_perm_palignr (d)) |
+ return true; |
+ |
+ if (expand_vec_perm_interleave2 (d)) |
+ return true; |
+ |
+ if (expand_vec_perm_broadcast (d)) |
+ return true; |
+ |
+ /* Try sequences of three instructions. */ |
+ |
+ if (expand_vec_perm_pshufb2 (d)) |
+ return true; |
+ |
+ /* ??? Look for narrow permutations whose element orderings would |
+ allow the promotion to a wider mode. */ |
+ |
+ /* ??? Look for sequences of interleave or a wider permute that place |
+ the data into the correct lanes for a half-vector shuffle like |
+ pshuf[lh]w or vpermilps. */ |
+ |
+ /* ??? Look for sequences of interleave that produce the desired results. |
+ The combinatorics of punpck[lh] get pretty ugly... */ |
+ |
+ if (expand_vec_perm_even_odd (d)) |
+ return true; |
+ |
+ return false; |
+} |
+ |
+/* Extract the values from the vector CST into the permutation array in D. |
+ Return 0 on error, 1 if all values from the permutation come from the |
+ first vector, 2 if all values from the second vector, and 3 otherwise. */ |
+ |
+static int |
+extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst) |
+{ |
+ tree list = TREE_VECTOR_CST_ELTS (cst); |
+ unsigned i, nelt = d->nelt; |
+ int ret = 0; |
+ |
+ for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list)) |
{ |
- emit_move_insn (op0, operands[3]); |
- operands[3] = op0; |
+ unsigned HOST_WIDE_INT e; |
+ |
+ if (!host_integerp (TREE_VALUE (list), 1)) |
+ return 0; |
+ e = tree_low_cst (TREE_VALUE (list), 1); |
+ if (e >= 2 * nelt) |
+ return 0; |
+ |
+ ret |= (e < nelt ? 1 : 2); |
+ d->perm[i] = e; |
} |
- else |
- gcc_unreachable (); |
+ gcc_assert (list == NULL); |
- return; |
+ /* For all elements from second vector, fold the elements to first. */ |
+ if (ret == 2) |
+ for (i = 0; i < nelt; ++i) |
+ d->perm[i] -= nelt; |
+ |
+ return ret; |
} |
- |
-/* Table of valid machine attributes. */ |
-static const struct attribute_spec ix86_attribute_table[] = |
+static rtx |
+ix86_expand_vec_perm_builtin (tree exp) |
{ |
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
- /* Stdcall attribute says callee is responsible for popping arguments |
- if they are not variable. */ |
- { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
- /* Fastcall attribute says callee is responsible for popping arguments |
- if they are not variable. */ |
- { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
- /* Cdecl attribute says the callee is a normal C declaration */ |
- { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
- /* Regparm attribute specifies how many integer arguments are to be |
- passed in registers. */ |
- { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, |
- /* Sseregparm attribute says we are using x86_64 calling conventions |
- for FP arguments. */ |
- { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
- /* force_align_arg_pointer says this function realigns the stack at entry. */ |
- { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, |
- false, true, true, ix86_handle_cconv_attribute }, |
-#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
- { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, |
- { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
- { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, |
-#endif |
- { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
- { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, |
-#ifdef SUBTARGET_ATTRIBUTE_TABLE |
- SUBTARGET_ATTRIBUTE_TABLE, |
-#endif |
- /* ms_abi and sysv_abi calling convention function attributes. */ |
- { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, |
- { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, |
- /* End element. */ |
- { NULL, 0, 0, false, false, false, NULL } |
-}; |
+ struct expand_vec_perm_d d; |
+ tree arg0, arg1, arg2; |
-/* Implement targetm.vectorize.builtin_vectorization_cost. */ |
-static int |
-x86_builtin_vectorization_cost (bool runtime_test) |
+ arg0 = CALL_EXPR_ARG (exp, 0); |
+ arg1 = CALL_EXPR_ARG (exp, 1); |
+ arg2 = CALL_EXPR_ARG (exp, 2); |
+ |
+ d.vmode = TYPE_MODE (TREE_TYPE (arg0)); |
+ d.nelt = GET_MODE_NUNITS (d.vmode); |
+ d.testing_p = false; |
+ gcc_assert (VECTOR_MODE_P (d.vmode)); |
+ |
+ if (TREE_CODE (arg2) != VECTOR_CST) |
+ { |
+ error_at (EXPR_LOCATION (exp), |
+ "vector permutation requires vector constant"); |
+ goto exit_error; |
+ } |
+ |
+ switch (extract_vec_perm_cst (&d, arg2)) |
+ { |
+ default: |
+ gcc_unreachable(); |
+ |
+ case 0: |
+ error_at (EXPR_LOCATION (exp), "invalid vector permutation constant"); |
+ goto exit_error; |
+ |
+ case 3: |
+ if (!operand_equal_p (arg0, arg1, 0)) |
+ { |
+ d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL); |
+ d.op0 = force_reg (d.vmode, d.op0); |
+ d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL); |
+ d.op1 = force_reg (d.vmode, d.op1); |
+ break; |
+ } |
+ |
+ /* The elements of PERM do not suggest that only the first operand |
+ is used, but both operands are identical. Allow easier matching |
+ of the permutation by folding the permutation into the single |
+ input vector. */ |
+ { |
+ unsigned i, nelt = d.nelt; |
+ for (i = 0; i < nelt; ++i) |
+ if (d.perm[i] >= nelt) |
+ d.perm[i] -= nelt; |
+ } |
+ /* FALLTHRU */ |
+ |
+ case 1: |
+ d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL); |
+ d.op0 = force_reg (d.vmode, d.op0); |
+ d.op1 = d.op0; |
+ break; |
+ |
+ case 2: |
+ d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL); |
+ d.op0 = force_reg (d.vmode, d.op0); |
+ d.op1 = d.op0; |
+ break; |
+ } |
+ |
+ d.target = gen_reg_rtx (d.vmode); |
+ if (ix86_expand_vec_perm_builtin_1 (&d)) |
+ return d.target; |
+ |
+ /* For compiler generated permutations, we should never got here, because |
+ the compiler should also be checking the ok hook. But since this is a |
+ builtin the user has access too, so don't abort. */ |
+ switch (d.nelt) |
+ { |
+ case 2: |
+ sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]); |
+ break; |
+ case 4: |
+ sorry ("vector permutation (%d %d %d %d)", |
+ d.perm[0], d.perm[1], d.perm[2], d.perm[3]); |
+ break; |
+ case 8: |
+ sorry ("vector permutation (%d %d %d %d %d %d %d %d)", |
+ d.perm[0], d.perm[1], d.perm[2], d.perm[3], |
+ d.perm[4], d.perm[5], d.perm[6], d.perm[7]); |
+ break; |
+ case 16: |
+ sorry ("vector permutation " |
+ "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)", |
+ d.perm[0], d.perm[1], d.perm[2], d.perm[3], |
+ d.perm[4], d.perm[5], d.perm[6], d.perm[7], |
+ d.perm[8], d.perm[9], d.perm[10], d.perm[11], |
+ d.perm[12], d.perm[13], d.perm[14], d.perm[15]); |
+ break; |
+ default: |
+ gcc_unreachable (); |
+ } |
+ exit_error: |
+ return CONST0_RTX (d.vmode); |
+} |
+ |
+/* Implement targetm.vectorize.builtin_vec_perm_ok. */ |
+ |
+static bool |
+ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask) |
{ |
- /* If the branch of the runtime test is taken - i.e. - the vectorized |
- version is skipped - this incurs a misprediction cost (because the |
- vectorized version is expected to be the fall-through). So we subtract |
- the latency of a mispredicted branch from the costs that are incured |
- when the vectorized version is executed. |
+ struct expand_vec_perm_d d; |
+ int vec_mask; |
+ bool ret, one_vec; |
- TODO: The values in individual target tables have to be tuned or new |
- fields may be needed. For eg. on K8, the default branch path is the |
- not-taken path. If the taken path is predicted correctly, the minimum |
- penalty of going down the taken-path is 1 cycle. If the taken-path is |
- not predicted correctly, then the minimum penalty is 10 cycles. */ |
+ d.vmode = TYPE_MODE (vec_type); |
+ d.nelt = GET_MODE_NUNITS (d.vmode); |
+ d.testing_p = true; |
- if (runtime_test) |
+ /* Given sufficient ISA support we can just return true here |
+ for selected vector modes. */ |
+ if (GET_MODE_SIZE (d.vmode) == 16) |
{ |
- return (-(ix86_cost->cond_taken_branch_cost)); |
+ /* All implementable with a single vpperm insn. */ |
+ if (TARGET_XOP) |
+ return true; |
+ /* All implementable with 2 pshufb + 1 ior. */ |
+ if (TARGET_SSSE3) |
+ return true; |
+ /* All implementable with shufpd or unpck[lh]pd. */ |
+ if (d.nelt == 2) |
+ return true; |
} |
- else |
- return 0; |
+ |
+ vec_mask = extract_vec_perm_cst (&d, mask); |
+ |
+ /* This hook is cannot be called in response to something that the |
+ user does (unlike the builtin expander) so we shouldn't ever see |
+ an error generated from the extract. */ |
+ gcc_assert (vec_mask > 0 && vec_mask <= 3); |
+ one_vec = (vec_mask != 3); |
+ |
+ /* Implementable with shufps or pshufd. */ |
+ if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode)) |
+ return true; |
+ |
+ /* Otherwise we have to go through the motions and see if we can |
+ figure out how to generate the requested permutation. */ |
+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); |
+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); |
+ if (!one_vec) |
+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); |
+ |
+ start_sequence (); |
+ ret = ix86_expand_vec_perm_builtin_1 (&d); |
+ end_sequence (); |
+ |
+ return ret; |
} |
+void |
+ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) |
+{ |
+ struct expand_vec_perm_d d; |
+ unsigned i, nelt; |
+ |
+ d.target = targ; |
+ d.op0 = op0; |
+ d.op1 = op1; |
+ d.vmode = GET_MODE (targ); |
+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); |
+ d.testing_p = false; |
+ |
+ for (i = 0; i < nelt; ++i) |
+ d.perm[i] = i * 2 + odd; |
+ |
+ /* We'll either be able to implement the permutation directly... */ |
+ if (expand_vec_perm_1 (&d)) |
+ return; |
+ |
+ /* ... or we use the special-case patterns. */ |
+ expand_vec_perm_even_odd_1 (&d, odd); |
+} |
+ |
/* This function returns the calling abi specific va_list type node. |
It returns the FNDECL specific va_list type. */ |
tree |
ix86_fn_abi_va_list (tree fndecl) |
{ |
- int abi; |
- |
if (!TARGET_64BIT) |
return va_list_type_node; |
gcc_assert (fndecl != NULL_TREE); |
- abi = ix86_function_abi ((const_tree) fndecl); |
- if (abi == MS_ABI) |
+ if (ix86_function_abi ((const_tree) fndecl) == MS_ABI) |
return ms_va_list_type_node; |
else |
return sysv_va_list_type_node; |
@@ -29726,6 +30316,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#undef TARGET_RETURN_IN_MEMORY |
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory |
+#undef TARGET_LEGITIMIZE_ADDRESS |
+#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address |
+ |
#undef TARGET_ATTRIBUTE_TABLE |
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table |
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
@@ -29738,6 +30331,8 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#undef TARGET_INIT_BUILTINS |
#define TARGET_INIT_BUILTINS ix86_init_builtins |
+#undef TARGET_BUILTIN_DECL |
+#define TARGET_BUILTIN_DECL ix86_builtin_decl |
#undef TARGET_EXPAND_BUILTIN |
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin |
@@ -29766,6 +30361,9 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#undef TARGET_ASM_CLOSE_PAREN |
#define TARGET_ASM_CLOSE_PAREN "" |
+#undef TARGET_ASM_BYTE_OP |
+#define TARGET_ASM_BYTE_OP ASM_BYTE |
+ |
#undef TARGET_ASM_ALIGNED_HI_OP |
#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT |
#undef TARGET_ASM_ALIGNED_SI_OP |
@@ -29829,7 +30427,8 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#define TARGET_DEFAULT_TARGET_FLAGS \ |
(TARGET_DEFAULT \ |
| TARGET_SUBTARGET_DEFAULT \ |
- | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) |
+ | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \ |
+ | MASK_FUSED_MADD) |
#undef TARGET_HANDLE_OPTION |
#define TARGET_HANDLE_OPTION ix86_handle_option |
@@ -29881,10 +30480,12 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary |
#undef TARGET_GET_DRAP_RTX |
#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx |
-#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC |
-#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec |
#undef TARGET_STRICT_ARGUMENT_NAMING |
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true |
+#undef TARGET_STATIC_CHAIN |
+#define TARGET_STATIC_CHAIN ix86_static_chain |
+#undef TARGET_TRAMPOLINE_INIT |
+#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init |
#undef TARGET_GIMPLIFY_VA_ARG_EXPR |
#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg |
@@ -29921,7 +30522,14 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost |
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
+ ix86_builtin_vectorization_cost |
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM |
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM \ |
+ ix86_vectorize_builtin_vec_perm |
+#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK |
+#define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \ |
+ ix86_vectorize_builtin_vec_perm_ok |
#undef TARGET_SET_CURRENT_FUNCTION |
#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function |
@@ -29938,12 +30546,27 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree) |
#undef TARGET_OPTION_PRINT |
#define TARGET_OPTION_PRINT ix86_function_specific_print |
-#undef TARGET_OPTION_CAN_INLINE_P |
-#define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p |
+#undef TARGET_CAN_INLINE_P |
+#define TARGET_CAN_INLINE_P ix86_can_inline_p |
#undef TARGET_EXPAND_TO_RTL_HOOK |
#define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi |
+#undef TARGET_LEGITIMATE_ADDRESS_P |
+#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p |
+ |
+#undef TARGET_IRA_COVER_CLASSES |
+#define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes |
+ |
+#undef TARGET_FRAME_POINTER_REQUIRED |
+#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
+ |
+#undef TARGET_CAN_ELIMINATE |
+#define TARGET_CAN_ELIMINATE ix86_can_eliminate |
+ |
+#undef TARGET_ASM_CODE_END |
+#define TARGET_ASM_CODE_END ix86_code_end |
+ |
struct gcc_target targetm = TARGET_INITIALIZER; |
#include "gt-i386.h" |