| Index: gcc/gcc/config/arm/arm.c
|
| diff --git a/gcc/gcc/config/arm/arm.c b/gcc/gcc/config/arm/arm.c
|
| index 7f346da319b1b3c301d84ef9cd71d873e8898cf4..a06a38bc8a9695732972d5867155b71756ff68b8 100644
|
| --- a/gcc/gcc/config/arm/arm.c
|
| +++ b/gcc/gcc/config/arm/arm.c
|
| @@ -1,6 +1,6 @@
|
| /* Output routines for GCC for ARM.
|
| Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
| - 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
| + 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
|
| Free Software Foundation, Inc.
|
| Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
|
| and Martin Simmons (@harleqn.co.uk).
|
| @@ -43,6 +43,7 @@
|
| #include "optabs.h"
|
| #include "toplev.h"
|
| #include "recog.h"
|
| +#include "cgraph.h"
|
| #include "ggc.h"
|
| #include "except.h"
|
| #include "c-pragma.h"
|
| @@ -53,14 +54,13 @@
|
| #include "debug.h"
|
| #include "langhooks.h"
|
| #include "df.h"
|
| +#include "intl.h"
|
| #include "libfuncs.h"
|
|
|
| /* Forward definitions of types. */
|
| typedef struct minipool_node Mnode;
|
| typedef struct minipool_fixup Mfix;
|
|
|
| -const struct attribute_spec arm_attribute_table[];
|
| -
|
| void (*arm_lang_output_object_attributes_hook)(void);
|
|
|
| /* Forward function declarations. */
|
| @@ -74,7 +74,10 @@ static int arm_address_register_rtx_p (rtx, int);
|
| static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
|
| static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
|
| static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
|
| +static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);
|
| +static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);
|
| inline static int thumb1_index_register_rtx_p (rtx, int);
|
| +static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
|
| static int thumb_far_jump_used_p (void);
|
| static bool thumb_force_lr_save (void);
|
| static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
|
| @@ -111,6 +114,7 @@ static unsigned long arm_compute_save_reg_mask (void);
|
| static unsigned long arm_isr_value (tree);
|
| static unsigned long arm_compute_func_type (void);
|
| static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
|
| +static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
|
| static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
|
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
|
| @@ -124,9 +128,17 @@ static int arm_adjust_cost (rtx, rtx, rtx, int);
|
| static int count_insns_for_constant (HOST_WIDE_INT, int);
|
| static int arm_get_strip_length (int);
|
| static bool arm_function_ok_for_sibcall (tree, tree);
|
| +static enum machine_mode arm_promote_function_mode (const_tree,
|
| + enum machine_mode, int *,
|
| + const_tree, int);
|
| +static bool arm_return_in_memory (const_tree, const_tree);
|
| +static rtx arm_function_value (const_tree, const_tree, bool);
|
| +static rtx arm_libcall_value (enum machine_mode, const_rtx);
|
| +
|
| static void arm_internal_label (FILE *, const char *, unsigned long);
|
| static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
|
| tree);
|
| +static bool arm_have_conditional_execution (void);
|
| static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
|
| static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
|
| static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
|
| @@ -149,6 +161,9 @@ static void emit_constant_insn (rtx cond, rtx pattern);
|
| static rtx emit_set_insn (rtx, rtx);
|
| static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
|
| tree, bool);
|
| +static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
|
| + const_tree);
|
| +static int aapcs_select_return_coproc (const_tree, const_tree);
|
|
|
| #ifdef OBJECT_FORMAT_ELF
|
| static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
|
| @@ -176,6 +191,7 @@ static void arm_unwind_emit (FILE *, rtx);
|
| static bool arm_output_ttype (rtx);
|
| #endif
|
| static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
|
| +static rtx arm_dwarf_register_span (rtx);
|
|
|
| static tree arm_cxx_guard_type (void);
|
| static bool arm_cxx_guard_mask_bit (void);
|
| @@ -198,14 +214,65 @@ static bool arm_tls_symbol_p (rtx x);
|
| static int arm_issue_rate (void);
|
| static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
|
| static bool arm_allocate_stack_slots_for_args (void);
|
| +static const char *arm_invalid_parameter_type (const_tree t);
|
| +static const char *arm_invalid_return_type (const_tree t);
|
| +static tree arm_promoted_type (const_tree t);
|
| +static tree arm_convert_to_type (tree type, tree expr);
|
| +static bool arm_scalar_mode_supported_p (enum machine_mode);
|
| +static bool arm_frame_pointer_required (void);
|
| +static bool arm_can_eliminate (const int, const int);
|
| +static void arm_asm_trampoline_template (FILE *);
|
| +static void arm_trampoline_init (rtx, tree, rtx);
|
| +static rtx arm_trampoline_adjust_address (rtx);
|
|
|
|
|
| +/* Table of machine attributes. */
|
| +static const struct attribute_spec arm_attribute_table[] =
|
| +{
|
| + /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
| + /* Function calls made to this symbol must be done indirectly, because
|
| + it may lie outside of the 26 bit addressing range of a normal function
|
| + call. */
|
| + { "long_call", 0, 0, false, true, true, NULL },
|
| + /* Whereas these functions are always known to reside within the 26 bit
|
| + addressing range. */
|
| + { "short_call", 0, 0, false, true, true, NULL },
|
| + /* Specify the procedure call conventions for a function. */
|
| + { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },
|
| + /* Interrupt Service Routines have special prologue and epilogue requirements. */
|
| + { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
|
| + { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
|
| + { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
|
| +#ifdef ARM_PE
|
| + /* ARM/PE has three new attributes:
|
| + interfacearm - ?
|
| + dllexport - for exporting a function/variable that will live in a dll
|
| + dllimport - for importing a function/variable from a dll
|
| +
|
| + Microsoft allows multiple declspecs in one __declspec, separating
|
| + them with spaces. We do NOT support this. Instead, use __declspec
|
| + multiple times.
|
| + */
|
| + { "dllimport", 0, 0, true, false, false, NULL },
|
| + { "dllexport", 0, 0, true, false, false, NULL },
|
| + { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
|
| +#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| + { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
|
| + { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
|
| + { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
|
| +#endif
|
| + { NULL, 0, 0, false, false, false, NULL }
|
| +};
|
| +
|
| /* Initialize the GCC target structure. */
|
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| #undef TARGET_MERGE_DECL_ATTRIBUTES
|
| #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
|
| #endif
|
|
|
| +#undef TARGET_LEGITIMIZE_ADDRESS
|
| +#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
|
| +
|
| #undef TARGET_ATTRIBUTE_TABLE
|
| #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
|
|
|
| @@ -257,6 +324,12 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL
|
| #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
|
|
|
| +#undef TARGET_FUNCTION_VALUE
|
| +#define TARGET_FUNCTION_VALUE arm_function_value
|
| +
|
| +#undef TARGET_LIBCALL_VALUE
|
| +#define TARGET_LIBCALL_VALUE arm_libcall_value
|
| +
|
| #undef TARGET_ASM_OUTPUT_MI_THUNK
|
| #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
|
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
|
| @@ -283,10 +356,8 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #undef TARGET_INIT_LIBFUNCS
|
| #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
|
|
|
| -#undef TARGET_PROMOTE_FUNCTION_ARGS
|
| -#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
|
| -#undef TARGET_PROMOTE_FUNCTION_RETURN
|
| -#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
|
| +#undef TARGET_PROMOTE_FUNCTION_MODE
|
| +#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
|
| #undef TARGET_PROMOTE_PROTOTYPES
|
| #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
|
| #undef TARGET_PASS_BY_REFERENCE
|
| @@ -300,6 +371,13 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
|
| #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
|
|
|
| +#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
|
| +#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
|
| +#undef TARGET_TRAMPOLINE_INIT
|
| +#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
|
| +#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
|
| +#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
|
| +
|
| #undef TARGET_DEFAULT_SHORT_ENUMS
|
| #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
|
|
|
| @@ -361,6 +439,9 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
|
| #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
|
|
|
| +#undef TARGET_DWARF_REGISTER_SPAN
|
| +#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
|
| +
|
| #undef TARGET_CANNOT_COPY_INSN_P
|
| #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
|
|
|
| @@ -369,6 +450,9 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #define TARGET_HAVE_TLS true
|
| #endif
|
|
|
| +#undef TARGET_HAVE_CONDITIONAL_EXECUTION
|
| +#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
|
| +
|
| #undef TARGET_CANNOT_FORCE_CONST_MEM
|
| #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
|
|
|
| @@ -399,6 +483,30 @@ static bool arm_allocate_stack_slots_for_args (void);
|
| #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
|
| #endif
|
|
|
| +#undef TARGET_LEGITIMATE_ADDRESS_P
|
| +#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
|
| +
|
| +#undef TARGET_INVALID_PARAMETER_TYPE
|
| +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
|
| +
|
| +#undef TARGET_INVALID_RETURN_TYPE
|
| +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
|
| +
|
| +#undef TARGET_PROMOTED_TYPE
|
| +#define TARGET_PROMOTED_TYPE arm_promoted_type
|
| +
|
| +#undef TARGET_CONVERT_TO_TYPE
|
| +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type
|
| +
|
| +#undef TARGET_SCALAR_MODE_SUPPORTED_P
|
| +#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
|
| +
|
| +#undef TARGET_FRAME_POINTER_REQUIRED
|
| +#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
|
| +
|
| +#undef TARGET_CAN_ELIMINATE
|
| +#define TARGET_CAN_ELIMINATE arm_can_eliminate
|
| +
|
| struct gcc_target targetm = TARGET_INITIALIZER;
|
|
|
| /* Obstack for minipool constant handling. */
|
| @@ -414,28 +522,24 @@ extern FILE * asm_out_file;
|
| /* True if we are currently building a constant table. */
|
| int making_const_table;
|
|
|
| -/* Define the information needed to generate branch insns. This is
|
| - stored from the compare operation. */
|
| -rtx arm_compare_op0, arm_compare_op1;
|
| -
|
| /* The processor for which instructions should be scheduled. */
|
| enum processor_type arm_tune = arm_none;
|
|
|
| /* The default processor used if not overridden by commandline. */
|
| static enum processor_type arm_default_cpu = arm_none;
|
|
|
| -/* Which floating point model to use. */
|
| -enum arm_fp_model arm_fp_model;
|
| -
|
| -/* Which floating point hardware is available. */
|
| -enum fputype arm_fpu_arch;
|
| -
|
| /* Which floating point hardware to schedule for. */
|
| -enum fputype arm_fpu_tune;
|
| +int arm_fpu_attr;
|
| +
|
| +/* Which floating popint hardware to use. */
|
| +const struct arm_fpu_desc *arm_fpu_desc;
|
|
|
| /* Whether to use floating point hardware. */
|
| enum float_abi_type arm_float_abi;
|
|
|
| +/* Which __fp16 format to use. */
|
| +enum arm_fp16_format_type arm_fp16_format;
|
| +
|
| /* Which ABI to use. */
|
| enum arm_abi_type arm_abi;
|
|
|
| @@ -474,6 +578,8 @@ static int thumb_call_reg_needed;
|
| #define FL_DIV (1 << 18) /* Hardware divide. */
|
| #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
|
| #define FL_NEON (1 << 20) /* Neon instructions. */
|
| +#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
|
| + architecture. */
|
|
|
| #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
|
|
|
| @@ -495,9 +601,10 @@ static int thumb_call_reg_needed;
|
| #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
|
| #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
|
| #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
|
| -#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
|
| +#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
|
| #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
|
| #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
|
| +#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
|
|
|
| /* The bits in this mask specify which
|
| instructions we are allowed to generate. */
|
| @@ -534,6 +641,9 @@ int arm_arch6k = 0;
|
| /* Nonzero if instructions not present in the 'M' profile can be used. */
|
| int arm_arch_notm = 0;
|
|
|
| +/* Nonzero if instructions present in ARMv7E-M can be used. */
|
| +int arm_arch7em = 0;
|
| +
|
| /* Nonzero if this chip can benefit from load scheduling. */
|
| int arm_ld_sched = 0;
|
|
|
| @@ -583,10 +693,6 @@ enum machine_mode output_memory_reference_mode;
|
| /* The register number to be used for the PIC offset register. */
|
| unsigned arm_pic_register = INVALID_REGNUM;
|
|
|
| -/* Set to 1 when a return insn is output, this means that the epilogue
|
| - is not needed. */
|
| -int return_used_this_function;
|
| -
|
| /* Set to 1 after arm_reorg has started. Reset to start at the start of
|
| the next function. */
|
| static int after_arm_reorg = 0;
|
| @@ -594,6 +700,8 @@ static int after_arm_reorg = 0;
|
| /* The maximum number of insns to be used when loading a constant. */
|
| static int arm_constant_limit = 3;
|
|
|
| +static enum arm_pcs arm_pcs_default;
|
| +
|
| /* For an explanation of these variables, see final_prescan_insn below. */
|
| int arm_ccfsm_state;
|
| /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
|
| @@ -674,6 +782,7 @@ static const struct processors all_architectures[] =
|
| {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
|
| {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
|
| {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
|
| + {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
|
| {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
|
| {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
|
| {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
|
| @@ -708,44 +817,29 @@ static struct arm_cpu_select arm_select[] =
|
|
|
| char arm_arch_name[] = "__ARM_ARCH_0UNK__";
|
|
|
| -struct fpu_desc
|
| -{
|
| - const char * name;
|
| - enum fputype fpu;
|
| -};
|
| -
|
| -
|
| /* Available values for -mfpu=. */
|
|
|
| -static const struct fpu_desc all_fpus[] =
|
| -{
|
| - {"fpa", FPUTYPE_FPA},
|
| - {"fpe2", FPUTYPE_FPA_EMU2},
|
| - {"fpe3", FPUTYPE_FPA_EMU2},
|
| - {"maverick", FPUTYPE_MAVERICK},
|
| - {"vfp", FPUTYPE_VFP},
|
| - {"vfp3", FPUTYPE_VFP3},
|
| - {"vfpv3", FPUTYPE_VFP3},
|
| - {"vfpv3-d16", FPUTYPE_VFP3D16},
|
| - {"neon", FPUTYPE_NEON}
|
| -};
|
| -
|
| -
|
| -/* Floating point models used by the different hardware.
|
| - See fputype in arm.h. */
|
| -
|
| -static const enum fputype fp_model_for_fpu[] =
|
| -{
|
| - /* No FP hardware. */
|
| - ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
|
| - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
|
| - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
|
| - ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
|
| - ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
|
| - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
|
| - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */
|
| - ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
|
| - ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
|
| +static const struct arm_fpu_desc all_fpus[] =
|
| +{
|
| + {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},
|
| + {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},
|
| + {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},
|
| + {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},
|
| + {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},
|
| + {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
|
| + {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},
|
| + {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},
|
| + {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},
|
| + {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},
|
| + {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},
|
| + {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},
|
| + {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },
|
| + {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},
|
| + {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},
|
| + {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},
|
| + {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},
|
| + /* Compatibility aliases. */
|
| + {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},
|
| };
|
|
|
|
|
| @@ -766,6 +860,23 @@ static const struct float_abi all_float_abis[] =
|
| };
|
|
|
|
|
| +struct fp16_format
|
| +{
|
| + const char *name;
|
| + enum arm_fp16_format_type fp16_format_type;
|
| +};
|
| +
|
| +
|
| +/* Available values for -mfp16-format=. */
|
| +
|
| +static const struct fp16_format all_fp16_formats[] =
|
| +{
|
| + {"none", ARM_FP16_FORMAT_NONE},
|
| + {"ieee", ARM_FP16_FORMAT_IEEE},
|
| + {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}
|
| +};
|
| +
|
| +
|
| struct abi_name
|
| {
|
| const char *name;
|
| @@ -924,6 +1035,44 @@ arm_init_libfuncs (void)
|
| set_optab_libfunc (smod_optab, SImode, NULL);
|
| set_optab_libfunc (umod_optab, SImode, NULL);
|
|
|
| + /* Half-precision float operations. The compiler handles all operations
|
| + with NULL libfuncs by converting the SFmode. */
|
| + switch (arm_fp16_format)
|
| + {
|
| + case ARM_FP16_FORMAT_IEEE:
|
| + case ARM_FP16_FORMAT_ALTERNATIVE:
|
| +
|
| + /* Conversions. */
|
| + set_conv_libfunc (trunc_optab, HFmode, SFmode,
|
| + (arm_fp16_format == ARM_FP16_FORMAT_IEEE
|
| + ? "__gnu_f2h_ieee"
|
| + : "__gnu_f2h_alternative"));
|
| + set_conv_libfunc (sext_optab, SFmode, HFmode,
|
| + (arm_fp16_format == ARM_FP16_FORMAT_IEEE
|
| + ? "__gnu_h2f_ieee"
|
| + : "__gnu_h2f_alternative"));
|
| +
|
| + /* Arithmetic. */
|
| + set_optab_libfunc (add_optab, HFmode, NULL);
|
| + set_optab_libfunc (sdiv_optab, HFmode, NULL);
|
| + set_optab_libfunc (smul_optab, HFmode, NULL);
|
| + set_optab_libfunc (neg_optab, HFmode, NULL);
|
| + set_optab_libfunc (sub_optab, HFmode, NULL);
|
| +
|
| + /* Comparisons. */
|
| + set_optab_libfunc (eq_optab, HFmode, NULL);
|
| + set_optab_libfunc (ne_optab, HFmode, NULL);
|
| + set_optab_libfunc (lt_optab, HFmode, NULL);
|
| + set_optab_libfunc (le_optab, HFmode, NULL);
|
| + set_optab_libfunc (ge_optab, HFmode, NULL);
|
| + set_optab_libfunc (gt_optab, HFmode, NULL);
|
| + set_optab_libfunc (unord_optab, HFmode, NULL);
|
| + break;
|
| +
|
| + default:
|
| + break;
|
| + }
|
| +
|
| if (TARGET_AAPCS_BASED)
|
| synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
|
| }
|
| @@ -959,13 +1108,15 @@ arm_build_builtin_va_list (void)
|
| /* Create the type. */
|
| va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
|
| /* Give it the required name. */
|
| - va_list_name = build_decl (TYPE_DECL,
|
| + va_list_name = build_decl (BUILTINS_LOCATION,
|
| + TYPE_DECL,
|
| get_identifier ("__va_list"),
|
| va_list_type);
|
| DECL_ARTIFICIAL (va_list_name) = 1;
|
| TYPE_NAME (va_list_type) = va_list_name;
|
| /* Create the __ap field. */
|
| - ap_field = build_decl (FIELD_DECL,
|
| + ap_field = build_decl (BUILTINS_LOCATION,
|
| + FIELD_DECL,
|
| get_identifier ("__ap"),
|
| ptr_type_node);
|
| DECL_ARTIFICIAL (ap_field) = 1;
|
| @@ -1207,13 +1358,13 @@ arm_override_options (void)
|
| const struct processors * sel;
|
| unsigned int sought;
|
|
|
| - selected_cpu = TARGET_CPU_DEFAULT;
|
| + selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;
|
| if (selected_cpu == arm_none)
|
| {
|
| #ifdef SUBTARGET_CPU_DEFAULT
|
| /* Use the subtarget default CPU if none was specified by
|
| configure. */
|
| - selected_cpu = SUBTARGET_CPU_DEFAULT;
|
| + selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;
|
| #endif
|
| /* Default to ARM6. */
|
| if (selected_cpu == arm_none)
|
| @@ -1295,6 +1446,23 @@ arm_override_options (void)
|
|
|
| tune_flags = all_cores[(int)arm_tune].flags;
|
|
|
| + if (target_fp16_format_name)
|
| + {
|
| + for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
|
| + {
|
| + if (streq (all_fp16_formats[i].name, target_fp16_format_name))
|
| + {
|
| + arm_fp16_format = all_fp16_formats[i].fp16_format_type;
|
| + break;
|
| + }
|
| + }
|
| + if (i == ARRAY_SIZE (all_fp16_formats))
|
| + error ("invalid __fp16 format option: -mfp16-format=%s",
|
| + target_fp16_format_name);
|
| + }
|
| + else
|
| + arm_fp16_format = ARM_FP16_FORMAT_NONE;
|
| +
|
| if (target_abi_name)
|
| {
|
| for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
|
| @@ -1387,6 +1555,7 @@ arm_override_options (void)
|
| arm_arch6 = (insn_flags & FL_ARCH6) != 0;
|
| arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
|
| arm_arch_notm = (insn_flags & FL_NOTM) != 0;
|
| + arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
|
| arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
|
| arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
|
| arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
|
| @@ -1438,7 +1607,6 @@ arm_override_options (void)
|
| if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
|
| error ("iwmmxt abi requires an iwmmxt capable cpu");
|
|
|
| - arm_fp_model = ARM_FP_MODEL_UNKNOWN;
|
| if (target_fpu_name == NULL && target_fpe_name != NULL)
|
| {
|
| if (streq (target_fpe_name, "2"))
|
| @@ -1449,46 +1617,56 @@ arm_override_options (void)
|
| error ("invalid floating point emulation option: -mfpe=%s",
|
| target_fpe_name);
|
| }
|
| - if (target_fpu_name != NULL)
|
| - {
|
| - /* The user specified a FPU. */
|
| - for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
|
| - {
|
| - if (streq (all_fpus[i].name, target_fpu_name))
|
| - {
|
| - arm_fpu_arch = all_fpus[i].fpu;
|
| - arm_fpu_tune = arm_fpu_arch;
|
| - arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
|
| - break;
|
| - }
|
| - }
|
| - if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
|
| - error ("invalid floating point option: -mfpu=%s", target_fpu_name);
|
| - }
|
| - else
|
| +
|
| + if (target_fpu_name == NULL)
|
| {
|
| #ifdef FPUTYPE_DEFAULT
|
| - /* Use the default if it is specified for this platform. */
|
| - arm_fpu_arch = FPUTYPE_DEFAULT;
|
| - arm_fpu_tune = FPUTYPE_DEFAULT;
|
| + target_fpu_name = FPUTYPE_DEFAULT;
|
| #else
|
| - /* Pick one based on CPU type. */
|
| - /* ??? Some targets assume FPA is the default.
|
| - if ((insn_flags & FL_VFP) != 0)
|
| - arm_fpu_arch = FPUTYPE_VFP;
|
| - else
|
| - */
|
| if (arm_arch_cirrus)
|
| - arm_fpu_arch = FPUTYPE_MAVERICK;
|
| + target_fpu_name = "maverick";
|
| else
|
| - arm_fpu_arch = FPUTYPE_FPA_EMU2;
|
| + target_fpu_name = "fpe2";
|
| #endif
|
| - if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
|
| - arm_fpu_tune = FPUTYPE_FPA;
|
| + }
|
| +
|
| + arm_fpu_desc = NULL;
|
| + for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
|
| + {
|
| + if (streq (all_fpus[i].name, target_fpu_name))
|
| + {
|
| + arm_fpu_desc = &all_fpus[i];
|
| + break;
|
| + }
|
| + }
|
| +
|
| + if (!arm_fpu_desc)
|
| + {
|
| + error ("invalid floating point option: -mfpu=%s", target_fpu_name);
|
| + return;
|
| + }
|
| +
|
| + switch (arm_fpu_desc->model)
|
| + {
|
| + case ARM_FP_MODEL_FPA:
|
| + if (arm_fpu_desc->rev == 2)
|
| + arm_fpu_attr = FPU_FPE2;
|
| + else if (arm_fpu_desc->rev == 3)
|
| + arm_fpu_attr = FPU_FPE3;
|
| else
|
| - arm_fpu_tune = arm_fpu_arch;
|
| - arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
|
| - gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
|
| + arm_fpu_attr = FPU_FPA;
|
| + break;
|
| +
|
| + case ARM_FP_MODEL_MAVERICK:
|
| + arm_fpu_attr = FPU_MAVERICK;
|
| + break;
|
| +
|
| + case ARM_FP_MODEL_VFP:
|
| + arm_fpu_attr = FPU_VFP;
|
| + break;
|
| +
|
| + default:
|
| + gcc_unreachable();
|
| }
|
|
|
| if (target_float_abi_name != NULL)
|
| @@ -1509,8 +1687,18 @@ arm_override_options (void)
|
| else
|
| arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
|
|
|
| - if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
|
| - sorry ("-mfloat-abi=hard and VFP");
|
| + if (TARGET_AAPCS_BASED
|
| + && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))
|
| + error ("FPA is unsupported in the AAPCS");
|
| +
|
| + if (TARGET_AAPCS_BASED)
|
| + {
|
| + if (TARGET_CALLER_INTERWORKING)
|
| + error ("AAPCS does not support -mcaller-super-interworking");
|
| + else
|
| + if (TARGET_CALLEE_INTERWORKING)
|
| + error ("AAPCS does not support -mcallee-super-interworking");
|
| + }
|
|
|
| /* FPA and iWMMXt are incompatible because the insn encodings overlap.
|
| VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
|
| @@ -1522,15 +1710,40 @@ arm_override_options (void)
|
| if (TARGET_THUMB2 && TARGET_IWMMXT)
|
| sorry ("Thumb-2 iWMMXt");
|
|
|
| + /* __fp16 support currently assumes the core has ldrh. */
|
| + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
|
| + sorry ("__fp16 and no ldrh");
|
| +
|
| /* If soft-float is specified then don't use FPU. */
|
| if (TARGET_SOFT_FLOAT)
|
| - arm_fpu_arch = FPUTYPE_NONE;
|
| + arm_fpu_attr = FPU_NONE;
|
| +
|
| + if (TARGET_AAPCS_BASED)
|
| + {
|
| + if (arm_abi == ARM_ABI_IWMMXT)
|
| + arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
|
| + else if (arm_float_abi == ARM_FLOAT_ABI_HARD
|
| + && TARGET_HARD_FLOAT
|
| + && TARGET_VFP)
|
| + arm_pcs_default = ARM_PCS_AAPCS_VFP;
|
| + else
|
| + arm_pcs_default = ARM_PCS_AAPCS;
|
| + }
|
| + else
|
| + {
|
| + if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
|
| + sorry ("-mfloat-abi=hard and VFP");
|
| +
|
| + if (arm_abi == ARM_ABI_APCS)
|
| + arm_pcs_default = ARM_PCS_APCS;
|
| + else
|
| + arm_pcs_default = ARM_PCS_ATPCS;
|
| + }
|
|
|
| /* For arm2/3 there is no need to do any scheduling if there is only
|
| a floating point emulator, or we are doing software floating-point. */
|
| if ((TARGET_SOFT_FLOAT
|
| - || arm_fpu_tune == FPUTYPE_FPA_EMU2
|
| - || arm_fpu_tune == FPUTYPE_FPA_EMU3)
|
| + || (TARGET_FPA && arm_fpu_desc->rev))
|
| && (tune_flags & FL_MODE32) == 0)
|
| flag_schedule_insns = flag_schedule_insns_after_reload = 0;
|
|
|
| @@ -1549,7 +1762,7 @@ arm_override_options (void)
|
| /* Use the cp15 method if it is available. */
|
| if (target_thread_pointer == TP_AUTO)
|
| {
|
| - if (arm_arch6k && !TARGET_THUMB)
|
| + if (arm_arch6k && !TARGET_THUMB1)
|
| target_thread_pointer = TP_CP15;
|
| else
|
| target_thread_pointer = TP_SOFT;
|
| @@ -1620,8 +1833,7 @@ arm_override_options (void)
|
| fix_cm3_ldrd = 0;
|
| }
|
|
|
| - /* ??? We might want scheduling for thumb2. */
|
| - if (TARGET_THUMB && flag_schedule_insns)
|
| + if (TARGET_THUMB1 && flag_schedule_insns)
|
| {
|
| /* Don't warn since it's on by default in -O2. */
|
| flag_schedule_insns = 0;
|
| @@ -1655,12 +1867,15 @@ arm_override_options (void)
|
| max_insns_skipped = 3;
|
| }
|
|
|
| - /* Ideally we would want to use CFI directives to generate
|
| - debug info. However this also creates the .eh_frame
|
| - section, so disable them until GAS can handle
|
| - this properly. See PR40521. */
|
| - if (TARGET_AAPCS_BASED)
|
| - flag_dwarf2_cfi_asm = 0;
|
| + /* Hot/Cold partitioning is not currently supported, since we can't
|
| + handle literal pool placement in that case. */
|
| + if (flag_reorder_blocks_and_partition)
|
| + {
|
| + inform (input_location,
|
| + "-freorder-blocks-and-partition not supported on this architecture");
|
| + flag_reorder_blocks_and_partition = 0;
|
| + flag_reorder_blocks = 1;
|
| + }
|
|
|
| /* Register global variables with the garbage collector. */
|
| arm_add_gc_roots ();
|
| @@ -1794,6 +2009,84 @@ arm_allocate_stack_slots_for_args (void)
|
| }
|
|
|
|
|
| +/* Output assembler code for a block containing the constant parts
|
| + of a trampoline, leaving space for the variable parts.
|
| +
|
| + On the ARM, (if r8 is the static chain regnum, and remembering that
|
| + referencing pc adds an offset of 8) the trampoline looks like:
|
| + ldr r8, [pc, #0]
|
| + ldr pc, [pc]
|
| + .word static chain value
|
| + .word function's address
|
| + XXX FIXME: When the trampoline returns, r8 will be clobbered. */
|
| +
|
| +static void
|
| +arm_asm_trampoline_template (FILE *f)
|
| +{
|
| + if (TARGET_ARM)
|
| + {
|
| + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
|
| + asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
|
| + }
|
| + else if (TARGET_THUMB2)
|
| + {
|
| + /* The Thumb-2 trampoline is similar to the arm implementation.
|
| + Unlike 16-bit Thumb, we enter the stub in thumb mode. */
|
| + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
|
| + STATIC_CHAIN_REGNUM, PC_REGNUM);
|
| + asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
|
| + }
|
| + else
|
| + {
|
| + ASM_OUTPUT_ALIGN (f, 2);
|
| + fprintf (f, "\t.code\t16\n");
|
| + fprintf (f, ".Ltrampoline_start:\n");
|
| + asm_fprintf (f, "\tpush\t{r0, r1}\n");
|
| + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
|
| + asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
|
| + asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
|
| + asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
|
| + asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
|
| + }
|
| + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
|
| + assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
|
| +}
|
| +
|
| +/* Emit RTL insns to initialize the variable parts of a trampoline. */
|
| +
|
| +static void
|
| +arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
|
| +{
|
| + rtx fnaddr, mem, a_tramp;
|
| +
|
| + emit_block_move (m_tramp, assemble_trampoline_template (),
|
| + GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
|
| +
|
| + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
|
| + emit_move_insn (mem, chain_value);
|
| +
|
| + mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
|
| + fnaddr = XEXP (DECL_RTL (fndecl), 0);
|
| + emit_move_insn (mem, fnaddr);
|
| +
|
| + a_tramp = XEXP (m_tramp, 0);
|
| + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
|
| + LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
|
| + plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);
|
| +}
|
| +
|
| +/* Thumb trampolines should be entered in thumb mode, so set
|
| + the bottom bit of the address. */
|
| +
|
| +static rtx
|
| +arm_trampoline_adjust_address (rtx addr)
|
| +{
|
| + if (TARGET_THUMB)
|
| + addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
|
| + NULL, 0, OPTAB_LIB_WIDEN);
|
| + return addr;
|
| +}
|
| +
|
| /* Return 1 if it is possible to return using a single instruction.
|
| If SIBLING is non-null, this is a test for a return before a sibling
|
| call. SIBLING is the call insn, so we can examine its register usage. */
|
| @@ -2014,7 +2307,11 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
|
|
|
| case MINUS: /* Should only occur with (MINUS I reg) => rsb */
|
| case XOR:
|
| + return 0;
|
| +
|
| case IOR:
|
| + if (TARGET_THUMB2)
|
| + return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
|
| return 0;
|
|
|
| case AND:
|
| @@ -2102,20 +2399,24 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
|
| 1);
|
| }
|
|
|
| -/* Return the number of ARM instructions required to synthesize the given
|
| - constant. */
|
| +/* Return the number of instructions required to synthesize the given
|
| + constant, if we start emitting them from bit-position I. */
|
| static int
|
| count_insns_for_constant (HOST_WIDE_INT remainder, int i)
|
| {
|
| HOST_WIDE_INT temp1;
|
| + int step_size = TARGET_ARM ? 2 : 1;
|
| int num_insns = 0;
|
| +
|
| + gcc_assert (TARGET_ARM || i == 0);
|
| +
|
| do
|
| {
|
| int end;
|
|
|
| if (i <= 0)
|
| i += 32;
|
| - if (remainder & (3 << (i - 2)))
|
| + if (remainder & (((1 << step_size) - 1) << (i - step_size)))
|
| {
|
| end = i - 8;
|
| if (end < 0)
|
| @@ -2124,13 +2425,77 @@ count_insns_for_constant (HOST_WIDE_INT remainder, int i)
|
| | ((i < end) ? (0xff >> (32 - end)) : 0));
|
| remainder &= ~temp1;
|
| num_insns++;
|
| - i -= 6;
|
| + i -= 8 - step_size;
|
| }
|
| - i -= 2;
|
| + i -= step_size;
|
| } while (remainder);
|
| return num_insns;
|
| }
|
|
|
| +static int
|
| +find_best_start (unsigned HOST_WIDE_INT remainder)
|
| +{
|
| + int best_consecutive_zeros = 0;
|
| + int i;
|
| + int best_start = 0;
|
| +
|
| + /* If we aren't targetting ARM, the best place to start is always at
|
| + the bottom. */
|
| + if (! TARGET_ARM)
|
| + return 0;
|
| +
|
| + for (i = 0; i < 32; i += 2)
|
| + {
|
| + int consecutive_zeros = 0;
|
| +
|
| + if (!(remainder & (3 << i)))
|
| + {
|
| + while ((i < 32) && !(remainder & (3 << i)))
|
| + {
|
| + consecutive_zeros += 2;
|
| + i += 2;
|
| + }
|
| + if (consecutive_zeros > best_consecutive_zeros)
|
| + {
|
| + best_consecutive_zeros = consecutive_zeros;
|
| + best_start = i - consecutive_zeros;
|
| + }
|
| + i -= 2;
|
| + }
|
| + }
|
| +
|
| + /* So long as it won't require any more insns to do so, it's
|
| + desirable to emit a small constant (in bits 0...9) in the last
|
| + insn. This way there is more chance that it can be combined with
|
| + a later addressing insn to form a pre-indexed load or store
|
| + operation. Consider:
|
| +
|
| + *((volatile int *)0xe0000100) = 1;
|
| + *((volatile int *)0xe0000110) = 2;
|
| +
|
| + We want this to wind up as:
|
| +
|
| + mov rA, #0xe0000000
|
| + mov rB, #1
|
| + str rB, [rA, #0x100]
|
| + mov rB, #2
|
| + str rB, [rA, #0x110]
|
| +
|
| + rather than having to synthesize both large constants from scratch.
|
| +
|
| + Therefore, we calculate how many insns would be required to emit
|
| + the constant starting from `best_start', and also starting from
|
| + zero (i.e. with bit 31 first to be output). If `best_start' doesn't
|
| + yield a shorter sequence, we may as well use zero. */
|
| + if (best_start != 0
|
| + && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
|
| + && (count_insns_for_constant (remainder, 0) <=
|
| + count_insns_for_constant (remainder, best_start)))
|
| + best_start = 0;
|
| +
|
| + return best_start;
|
| +}
|
| +
|
| /* Emit an instruction with the indicated PATTERN. If COND is
|
| non-NULL, conditionalize the execution of the instruction on COND
|
| being true. */
|
| @@ -2154,6 +2519,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| {
|
| int can_invert = 0;
|
| int can_negate = 0;
|
| + int final_invert = 0;
|
| int can_negate_initial = 0;
|
| int can_shift = 0;
|
| int i;
|
| @@ -2165,6 +2531,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| int insns = 0;
|
| unsigned HOST_WIDE_INT temp1, temp2;
|
| unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
|
| + int step_size = TARGET_ARM ? 2 : 1;
|
|
|
| /* Find out which operations are safe for a given CODE. Also do a quick
|
| check for degenerate cases; these can occur when DImode operations
|
| @@ -2191,15 +2558,20 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| GEN_INT (ARM_SIGN_EXTEND (val))));
|
| return 1;
|
| }
|
| +
|
| if (remainder == 0)
|
| {
|
| if (reload_completed && rtx_equal_p (target, source))
|
| return 0;
|
| +
|
| if (generate)
|
| emit_constant_insn (cond,
|
| gen_rtx_SET (VOIDmode, target, source));
|
| return 1;
|
| }
|
| +
|
| + if (TARGET_THUMB2)
|
| + can_invert = 1;
|
| break;
|
|
|
| case AND:
|
| @@ -2233,14 +2605,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| return 1;
|
| }
|
|
|
| - /* We don't know how to handle other cases yet. */
|
| - gcc_assert (remainder == 0xffffffff);
|
| -
|
| - if (generate)
|
| - emit_constant_insn (cond,
|
| - gen_rtx_SET (VOIDmode, target,
|
| - gen_rtx_NOT (mode, source)));
|
| - return 1;
|
| + if (remainder == 0xffffffff)
|
| + {
|
| + if (generate)
|
| + emit_constant_insn (cond,
|
| + gen_rtx_SET (VOIDmode, target,
|
| + gen_rtx_NOT (mode, source)));
|
| + return 1;
|
| + }
|
| + break;
|
|
|
| case MINUS:
|
| /* We treat MINUS as (val - source), since (source - val) is always
|
| @@ -2287,6 +2660,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
|
|
| /* Calculate a few attributes that may be useful for specific
|
| optimizations. */
|
| + /* Count number of leading zeros. */
|
| for (i = 31; i >= 0; i--)
|
| {
|
| if ((remainder & (1 << i)) == 0)
|
| @@ -2295,6 +2669,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| break;
|
| }
|
|
|
| + /* Count number of leading 1's. */
|
| for (i = 31; i >= 0; i--)
|
| {
|
| if ((remainder & (1 << i)) != 0)
|
| @@ -2303,6 +2678,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| break;
|
| }
|
|
|
| + /* Count number of trailing zero's. */
|
| for (i = 0; i <= 31; i++)
|
| {
|
| if ((remainder & (1 << i)) == 0)
|
| @@ -2311,6 +2687,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| break;
|
| }
|
|
|
| + /* Count number of trailing 1's. */
|
| for (i = 0; i <= 31; i++)
|
| {
|
| if ((remainder & (1 << i)) != 0)
|
| @@ -2498,6 +2875,17 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| if (code == XOR)
|
| break;
|
|
|
| + /* Convert.
|
| + x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
|
| + and the remainder 0s for e.g. 0xfff00000)
|
| + x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
|
| +
|
| + This can be done in 2 instructions by using shifts with mov or mvn.
|
| + e.g. for
|
| + x = x | 0xfff00000;
|
| + we generate.
|
| + mvn r0, r0, asl #12
|
| + mvn r0, r0, lsr #12 */
|
| if (set_sign_bit_copies > 8
|
| && (val & (-1 << (32 - set_sign_bit_copies))) == val)
|
| {
|
| @@ -2523,6 +2911,16 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| return 2;
|
| }
|
|
|
| + /* Convert
|
| + x = y | constant (which has set_zero_bit_copies number of trailing ones).
|
| + to
|
| + x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
|
| +
|
| + For eg. r0 = r0 | 0xfff
|
| + mvn r0, r0, lsr #12
|
| + mvn r0, r0, asl #12
|
| +
|
| + */
|
| if (set_zero_bit_copies > 8
|
| && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
|
| {
|
| @@ -2548,6 +2946,13 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| return 2;
|
| }
|
|
|
| + /* This will never be reached for Thumb2 because orn is a valid
|
| + instruction. This is for Thumb1 and the ARM 32 bit cases.
|
| +
|
| + x = y | constant (such that ~constant is a valid constant)
|
| + Transform this to
|
| + x = ~(~y & ~constant).
|
| + */
|
| if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
|
| {
|
| if (generate)
|
| @@ -2657,10 +3062,27 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| if (remainder & (1 << i))
|
| num_bits_set++;
|
|
|
| - if (code == AND || (can_invert && num_bits_set > 16))
|
| - remainder = (~remainder) & 0xffffffff;
|
| + if ((code == AND)
|
| + || (code != IOR && can_invert && num_bits_set > 16))
|
| + remainder ^= 0xffffffff;
|
| else if (code == PLUS && num_bits_set > 16)
|
| remainder = (-remainder) & 0xffffffff;
|
| +
|
| + /* For XOR, if more than half the bits are set and there's a sequence
|
| + of more than 8 consecutive ones in the pattern then we can XOR by the
|
| + inverted constant and then invert the final result; this may save an
|
| + instruction and might also lead to the final mvn being merged with
|
| + some other operation. */
|
| + else if (code == XOR && num_bits_set > 16
|
| + && (count_insns_for_constant (remainder ^ 0xffffffff,
|
| + find_best_start
|
| + (remainder ^ 0xffffffff))
|
| + < count_insns_for_constant (remainder,
|
| + find_best_start (remainder))))
|
| + {
|
| + remainder ^= 0xffffffff;
|
| + final_invert = 1;
|
| + }
|
| else
|
| {
|
| can_invert = 0;
|
| @@ -2679,63 +3101,8 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| /* ??? Use thumb2 replicated constants when the high and low halfwords are
|
| the same. */
|
| {
|
| - int best_start = 0;
|
| - if (!TARGET_THUMB2)
|
| - {
|
| - int best_consecutive_zeros = 0;
|
| -
|
| - for (i = 0; i < 32; i += 2)
|
| - {
|
| - int consecutive_zeros = 0;
|
| -
|
| - if (!(remainder & (3 << i)))
|
| - {
|
| - while ((i < 32) && !(remainder & (3 << i)))
|
| - {
|
| - consecutive_zeros += 2;
|
| - i += 2;
|
| - }
|
| - if (consecutive_zeros > best_consecutive_zeros)
|
| - {
|
| - best_consecutive_zeros = consecutive_zeros;
|
| - best_start = i - consecutive_zeros;
|
| - }
|
| - i -= 2;
|
| - }
|
| - }
|
| -
|
| - /* So long as it won't require any more insns to do so, it's
|
| - desirable to emit a small constant (in bits 0...9) in the last
|
| - insn. This way there is more chance that it can be combined with
|
| - a later addressing insn to form a pre-indexed load or store
|
| - operation. Consider:
|
| -
|
| - *((volatile int *)0xe0000100) = 1;
|
| - *((volatile int *)0xe0000110) = 2;
|
| -
|
| - We want this to wind up as:
|
| -
|
| - mov rA, #0xe0000000
|
| - mov rB, #1
|
| - str rB, [rA, #0x100]
|
| - mov rB, #2
|
| - str rB, [rA, #0x110]
|
| -
|
| - rather than having to synthesize both large constants from scratch.
|
| -
|
| - Therefore, we calculate how many insns would be required to emit
|
| - the constant starting from `best_start', and also starting from
|
| - zero (i.e. with bit 31 first to be output). If `best_start' doesn't
|
| - yield a shorter sequence, we may as well use zero. */
|
| - if (best_start != 0
|
| - && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
|
| - && (count_insns_for_constant (remainder, 0) <=
|
| - count_insns_for_constant (remainder, best_start)))
|
| - best_start = 0;
|
| - }
|
| -
|
| /* Now start emitting the insns. */
|
| - i = best_start;
|
| + i = find_best_start (remainder);
|
| do
|
| {
|
| int end;
|
| @@ -2763,7 +3130,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| }
|
| else
|
| {
|
| - if (remainder && subtargets)
|
| + if ((final_invert || remainder) && subtargets)
|
| new_src = gen_reg_rtx (mode);
|
| else
|
| new_src = target;
|
| @@ -2798,21 +3165,23 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
|
| code = PLUS;
|
|
|
| insns++;
|
| - if (TARGET_ARM)
|
| - i -= 6;
|
| - else
|
| - i -= 7;
|
| + i -= 8 - step_size;
|
| }
|
| /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
|
| shifts. */
|
| - if (TARGET_ARM)
|
| - i -= 2;
|
| - else
|
| - i--;
|
| + i -= step_size;
|
| }
|
| while (remainder);
|
| }
|
|
|
| + if (final_invert)
|
| + {
|
| + if (generate)
|
| + emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
|
| + gen_rtx_NOT (mode, source)));
|
| + insns++;
|
| + }
|
| +
|
| return insns;
|
| }
|
|
|
| @@ -2884,17 +3253,22 @@ arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
|
|
|
| /* Define how to find the value returned by a function. */
|
|
|
| -rtx
|
| -arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
|
| +static rtx
|
| +arm_function_value(const_tree type, const_tree func,
|
| + bool outgoing ATTRIBUTE_UNUSED)
|
| {
|
| enum machine_mode mode;
|
| int unsignedp ATTRIBUTE_UNUSED;
|
| rtx r ATTRIBUTE_UNUSED;
|
|
|
| mode = TYPE_MODE (type);
|
| +
|
| + if (TARGET_AAPCS_BASED)
|
| + return aapcs_allocate_return_reg (mode, type, func);
|
| +
|
| /* Promote integer types. */
|
| if (INTEGRAL_TYPE_P (type))
|
| - PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
|
| + mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
|
|
|
| /* Promotes small structs returned in a register to full-word size
|
| for big-endian AAPCS. */
|
| @@ -2908,7 +3282,88 @@ arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
|
| }
|
| }
|
|
|
| - return LIBCALL_VALUE(mode);
|
| + return LIBCALL_VALUE (mode);
|
| +}
|
| +
|
| +static int
|
| +libcall_eq (const void *p1, const void *p2)
|
| +{
|
| + return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
|
| +}
|
| +
|
| +static hashval_t
|
| +libcall_hash (const void *p1)
|
| +{
|
| + return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
|
| +}
|
| +
|
| +static void
|
| +add_libcall (htab_t htab, rtx libcall)
|
| +{
|
| + *htab_find_slot (htab, libcall, INSERT) = libcall;
|
| +}
|
| +
|
| +static bool
|
| +arm_libcall_uses_aapcs_base (const_rtx libcall)
|
| +{
|
| + static bool init_done = false;
|
| + static htab_t libcall_htab;
|
| +
|
| + if (!init_done)
|
| + {
|
| + init_done = true;
|
| +
|
| + libcall_htab = htab_create (31, libcall_hash, libcall_eq,
|
| + NULL);
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfloat_optab, SFmode, SImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfloat_optab, DFmode, SImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfloat_optab, SFmode, DImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfloat_optab, DFmode, DImode));
|
| +
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufloat_optab, SFmode, SImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufloat_optab, DFmode, SImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufloat_optab, SFmode, DImode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufloat_optab, DFmode, DImode));
|
| +
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sext_optab, SFmode, HFmode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (trunc_optab, HFmode, SFmode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfix_optab, DImode, DFmode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufix_optab, DImode, DFmode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (sfix_optab, DImode, SFmode));
|
| + add_libcall (libcall_htab,
|
| + convert_optab_libfunc (ufix_optab, DImode, SFmode));
|
| + }
|
| +
|
| + return libcall && htab_find (libcall_htab, libcall) != NULL;
|
| +}
|
| +
|
| +rtx
|
| +arm_libcall_value (enum machine_mode mode, const_rtx libcall)
|
| +{
|
| + if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
|
| + && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| + {
|
| + /* The following libcalls return their result in integer registers,
|
| + even though they return a floating point value. */
|
| + if (arm_libcall_uses_aapcs_base (libcall))
|
| + return gen_rtx_REG (mode, ARG_REGISTER(1));
|
| +
|
| + }
|
| +
|
| + return LIBCALL_VALUE (mode);
|
| }
|
|
|
| /* Determine the amount of memory needed to store the possible return
|
| @@ -2918,10 +3373,12 @@ arm_apply_result_size (void)
|
| {
|
| int size = 16;
|
|
|
| - if (TARGET_ARM)
|
| + if (TARGET_32BIT)
|
| {
|
| if (TARGET_HARD_FLOAT_ABI)
|
| {
|
| + if (TARGET_VFP)
|
| + size += 32;
|
| if (TARGET_FPA)
|
| size += 12;
|
| if (TARGET_MAVERICK)
|
| @@ -2934,27 +3391,56 @@ arm_apply_result_size (void)
|
| return size;
|
| }
|
|
|
| -/* Decide whether a type should be returned in memory (true)
|
| - or in a register (false). This is called as the target hook
|
| - TARGET_RETURN_IN_MEMORY. */
|
| +/* Decide whether TYPE should be returned in memory (true)
|
| + or in a register (false). FNTYPE is the type of the function making
|
| + the call. */
|
| static bool
|
| -arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| +arm_return_in_memory (const_tree type, const_tree fntype)
|
| {
|
| HOST_WIDE_INT size;
|
|
|
| - size = int_size_in_bytes (type);
|
| + size = int_size_in_bytes (type); /* Negative if not fixed size. */
|
| +
|
| + if (TARGET_AAPCS_BASED)
|
| + {
|
| + /* Simple, non-aggregate types (ie not including vectors and
|
| + complex) are always returned in a register (or registers).
|
| + We don't care about which register here, so we can short-cut
|
| + some of the detail. */
|
| + if (!AGGREGATE_TYPE_P (type)
|
| + && TREE_CODE (type) != VECTOR_TYPE
|
| + && TREE_CODE (type) != COMPLEX_TYPE)
|
| + return false;
|
| +
|
| + /* Any return value that is no larger than one word can be
|
| + returned in r0. */
|
| + if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
|
| + return false;
|
| +
|
| + /* Check any available co-processors to see if they accept the
|
| + type as a register candidate (VFP, for example, can return
|
| + some aggregates in consecutive registers). These aren't
|
| + available if the call is variadic. */
|
| + if (aapcs_select_return_coproc (type, fntype) >= 0)
|
| + return false;
|
| +
|
| + /* Vector values should be returned using ARM registers, not
|
| + memory (unless they're over 16 bytes, which will break since
|
| + we only have four call-clobbered registers to play with). */
|
| + if (TREE_CODE (type) == VECTOR_TYPE)
|
| + return (size < 0 || size > (4 * UNITS_PER_WORD));
|
| +
|
| + /* The rest go in memory. */
|
| + return true;
|
| + }
|
|
|
| - /* Vector values should be returned using ARM registers, not memory (unless
|
| - they're over 16 bytes, which will break since we only have four
|
| - call-clobbered registers to play with). */
|
| if (TREE_CODE (type) == VECTOR_TYPE)
|
| return (size < 0 || size > (4 * UNITS_PER_WORD));
|
|
|
| if (!AGGREGATE_TYPE_P (type) &&
|
| - !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
|
| - /* All simple types are returned in registers.
|
| - For AAPCS, complex types are treated the same as aggregates. */
|
| - return 0;
|
| + (TREE_CODE (type) != VECTOR_TYPE))
|
| + /* All simple types are returned in registers. */
|
| + return false;
|
|
|
| if (arm_abi != ARM_ABI_APCS)
|
| {
|
| @@ -2971,7 +3457,7 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| the aggregate is either huge or of variable size, and in either case
|
| we will want to return it via memory and not in a register. */
|
| if (size < 0 || size > UNITS_PER_WORD)
|
| - return 1;
|
| + return true;
|
|
|
| if (TREE_CODE (type) == RECORD_TYPE)
|
| {
|
| @@ -2991,18 +3477,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| continue;
|
|
|
| if (field == NULL)
|
| - return 0; /* An empty structure. Allowed by an extension to ANSI C. */
|
| + return false; /* An empty structure. Allowed by an extension to ANSI C. */
|
|
|
| /* Check that the first field is valid for returning in a register. */
|
|
|
| /* ... Floats are not allowed */
|
| if (FLOAT_TYPE_P (TREE_TYPE (field)))
|
| - return 1;
|
| + return true;
|
|
|
| /* ... Aggregates that are not themselves valid for returning in
|
| a register are not allowed. */
|
| if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
|
| - return 1;
|
| + return true;
|
|
|
| /* Now check the remaining fields, if any. Only bitfields are allowed,
|
| since they are not addressable. */
|
| @@ -3014,10 +3500,10 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| continue;
|
|
|
| if (!DECL_BIT_FIELD_TYPE (field))
|
| - return 1;
|
| + return true;
|
| }
|
|
|
| - return 0;
|
| + return false;
|
| }
|
|
|
| if (TREE_CODE (type) == UNION_TYPE)
|
| @@ -3034,18 +3520,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
|
| continue;
|
|
|
| if (FLOAT_TYPE_P (TREE_TYPE (field)))
|
| - return 1;
|
| + return true;
|
|
|
| if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
|
| - return 1;
|
| + return true;
|
| }
|
|
|
| - return 0;
|
| + return false;
|
| }
|
| #endif /* not ARM_WINCE */
|
|
|
| /* Return all other types in memory. */
|
| - return 1;
|
| + return true;
|
| }
|
|
|
| /* Indicate whether or not words of a double are in big-endian order. */
|
| @@ -3070,14 +3556,770 @@ arm_float_words_big_endian (void)
|
| return 1;
|
| }
|
|
|
| +const struct pcs_attribute_arg
|
| +{
|
| + const char *arg;
|
| + enum arm_pcs value;
|
| +} pcs_attribute_args[] =
|
| + {
|
| + {"aapcs", ARM_PCS_AAPCS},
|
| + {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
|
| +#if 0
|
| + /* We could recognize these, but changes would be needed elsewhere
|
| + * to implement them. */
|
| + {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
|
| + {"atpcs", ARM_PCS_ATPCS},
|
| + {"apcs", ARM_PCS_APCS},
|
| +#endif
|
| + {NULL, ARM_PCS_UNKNOWN}
|
| + };
|
| +
|
| +static enum arm_pcs
|
| +arm_pcs_from_attribute (tree attr)
|
| +{
|
| + const struct pcs_attribute_arg *ptr;
|
| + const char *arg;
|
| +
|
| + /* Get the value of the argument. */
|
| + if (TREE_VALUE (attr) == NULL_TREE
|
| + || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
|
| + return ARM_PCS_UNKNOWN;
|
| +
|
| + arg = TREE_STRING_POINTER (TREE_VALUE (attr));
|
| +
|
| + /* Check it against the list of known arguments. */
|
| + for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
|
| + if (streq (arg, ptr->arg))
|
| + return ptr->value;
|
| +
|
| + /* An unrecognized interrupt type. */
|
| + return ARM_PCS_UNKNOWN;
|
| +}
|
| +
|
| +/* Get the PCS variant to use for this call. TYPE is the function's type
|
| + specification, DECL is the specific declartion. DECL may be null if
|
| + the call could be indirect or if this is a library call. */
|
| +static enum arm_pcs
|
| +arm_get_pcs_model (const_tree type, const_tree decl)
|
| +{
|
| + bool user_convention = false;
|
| + enum arm_pcs user_pcs = arm_pcs_default;
|
| + tree attr;
|
| +
|
| + gcc_assert (type);
|
| +
|
| + attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
|
| + if (attr)
|
| + {
|
| + user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
|
| + user_convention = true;
|
| + }
|
| +
|
| + if (TARGET_AAPCS_BASED)
|
| + {
|
| + /* Detect varargs functions. These always use the base rules
|
| + (no argument is ever a candidate for a co-processor
|
| + register). */
|
| + bool base_rules = (TYPE_ARG_TYPES (type) != 0
|
| + && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
|
| + != void_type_node));
|
| +
|
| + if (user_convention)
|
| + {
|
| + if (user_pcs > ARM_PCS_AAPCS_LOCAL)
|
| + sorry ("Non-AAPCS derived PCS variant");
|
| + else if (base_rules && user_pcs != ARM_PCS_AAPCS)
|
| + error ("Variadic functions must use the base AAPCS variant");
|
| + }
|
| +
|
| + if (base_rules)
|
| + return ARM_PCS_AAPCS;
|
| + else if (user_convention)
|
| + return user_pcs;
|
| + else if (decl && flag_unit_at_a_time)
|
| + {
|
| + /* Local functions never leak outside this compilation unit,
|
| + so we are free to use whatever conventions are
|
| + appropriate. */
|
| + /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
|
| + struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
|
| + if (i && i->local)
|
| + return ARM_PCS_AAPCS_LOCAL;
|
| + }
|
| + }
|
| + else if (user_convention && user_pcs != arm_pcs_default)
|
| + sorry ("PCS variant");
|
| +
|
| + /* For everything else we use the target's default. */
|
| + return arm_pcs_default;
|
| +}
|
| +
|
| +
|
| +static void
|
| +aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
|
| + const_tree fntype ATTRIBUTE_UNUSED,
|
| + rtx libcall ATTRIBUTE_UNUSED,
|
| + const_tree fndecl ATTRIBUTE_UNUSED)
|
| +{
|
| + /* Record the unallocated VFP registers. */
|
| + pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
|
| + pcum->aapcs_vfp_reg_alloc = 0;
|
| +}
|
| +
|
| +/* Walk down the type tree of TYPE counting consecutive base elements.
|
| + If *MODEP is VOIDmode, then set it to the first valid floating point
|
| + type. If a non-floating point type is found, or if a floating point
|
| + type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
|
| + otherwise return the count in the sub-tree. */
|
| +static int
|
| +aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
|
| +{
|
| + enum machine_mode mode;
|
| + HOST_WIDE_INT size;
|
| +
|
| + switch (TREE_CODE (type))
|
| + {
|
| + case REAL_TYPE:
|
| + mode = TYPE_MODE (type);
|
| + if (mode != DFmode && mode != SFmode)
|
| + return -1;
|
| +
|
| + if (*modep == VOIDmode)
|
| + *modep = mode;
|
| +
|
| + if (*modep == mode)
|
| + return 1;
|
| +
|
| + break;
|
| +
|
| + case COMPLEX_TYPE:
|
| + mode = TYPE_MODE (TREE_TYPE (type));
|
| + if (mode != DFmode && mode != SFmode)
|
| + return -1;
|
| +
|
| + if (*modep == VOIDmode)
|
| + *modep = mode;
|
| +
|
| + if (*modep == mode)
|
| + return 2;
|
| +
|
| + break;
|
| +
|
| + case VECTOR_TYPE:
|
| + /* Use V2SImode and V4SImode as representatives of all 64-bit
|
| + and 128-bit vector types, whether or not those modes are
|
| + supported with the present options. */
|
| + size = int_size_in_bytes (type);
|
| + switch (size)
|
| + {
|
| + case 8:
|
| + mode = V2SImode;
|
| + break;
|
| + case 16:
|
| + mode = V4SImode;
|
| + break;
|
| + default:
|
| + return -1;
|
| + }
|
| +
|
| + if (*modep == VOIDmode)
|
| + *modep = mode;
|
| +
|
| + /* Vector modes are considered to be opaque: two vectors are
|
| + equivalent for the purposes of being homogeneous aggregates
|
| + if they are the same size. */
|
| + if (*modep == mode)
|
| + return 1;
|
| +
|
| + break;
|
| +
|
| + case ARRAY_TYPE:
|
| + {
|
| + int count;
|
| + tree index = TYPE_DOMAIN (type);
|
| +
|
| + /* Can't handle incomplete types. */
|
| + if (!COMPLETE_TYPE_P(type))
|
| + return -1;
|
| +
|
| + count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
|
| + if (count == -1
|
| + || !index
|
| + || !TYPE_MAX_VALUE (index)
|
| + || !host_integerp (TYPE_MAX_VALUE (index), 1)
|
| + || !TYPE_MIN_VALUE (index)
|
| + || !host_integerp (TYPE_MIN_VALUE (index), 1)
|
| + || count < 0)
|
| + return -1;
|
| +
|
| + count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
|
| + - tree_low_cst (TYPE_MIN_VALUE (index), 1));
|
| +
|
| + /* There must be no padding. */
|
| + if (!host_integerp (TYPE_SIZE (type), 1)
|
| + || (tree_low_cst (TYPE_SIZE (type), 1)
|
| + != count * GET_MODE_BITSIZE (*modep)))
|
| + return -1;
|
| +
|
| + return count;
|
| + }
|
| +
|
| + case RECORD_TYPE:
|
| + {
|
| + int count = 0;
|
| + int sub_count;
|
| + tree field;
|
| +
|
| + /* Can't handle incomplete types. */
|
| + if (!COMPLETE_TYPE_P(type))
|
| + return -1;
|
| +
|
| + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
|
| + {
|
| + if (TREE_CODE (field) != FIELD_DECL)
|
| + continue;
|
| +
|
| + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
|
| + if (sub_count < 0)
|
| + return -1;
|
| + count += sub_count;
|
| + }
|
| +
|
| + /* There must be no padding. */
|
| + if (!host_integerp (TYPE_SIZE (type), 1)
|
| + || (tree_low_cst (TYPE_SIZE (type), 1)
|
| + != count * GET_MODE_BITSIZE (*modep)))
|
| + return -1;
|
| +
|
| + return count;
|
| + }
|
| +
|
| + case UNION_TYPE:
|
| + case QUAL_UNION_TYPE:
|
| + {
|
| + /* These aren't very interesting except in a degenerate case. */
|
| + int count = 0;
|
| + int sub_count;
|
| + tree field;
|
| +
|
| + /* Can't handle incomplete types. */
|
| + if (!COMPLETE_TYPE_P(type))
|
| + return -1;
|
| +
|
| + for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
|
| + {
|
| + if (TREE_CODE (field) != FIELD_DECL)
|
| + continue;
|
| +
|
| + sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
|
| + if (sub_count < 0)
|
| + return -1;
|
| + count = count > sub_count ? count : sub_count;
|
| + }
|
| +
|
| + /* There must be no padding. */
|
| + if (!host_integerp (TYPE_SIZE (type), 1)
|
| + || (tree_low_cst (TYPE_SIZE (type), 1)
|
| + != count * GET_MODE_BITSIZE (*modep)))
|
| + return -1;
|
| +
|
| + return count;
|
| + }
|
| +
|
| + default:
|
| + break;
|
| + }
|
| +
|
| + return -1;
|
| +}
|
| +
|
| +/* Return true if PCS_VARIANT should use VFP registers. */
|
| +static bool
|
| +use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
|
| +{
|
| + if (pcs_variant == ARM_PCS_AAPCS_VFP)
|
| + return true;
|
| +
|
| + if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
|
| + return false;
|
| +
|
| + return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
|
| + (TARGET_VFP_DOUBLE || !is_double));
|
| +}
|
| +
|
| +static bool
|
| +aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
|
| + enum machine_mode mode, const_tree type,
|
| + enum machine_mode *base_mode, int *count)
|
| +{
|
| + enum machine_mode new_mode = VOIDmode;
|
| +
|
| + if (GET_MODE_CLASS (mode) == MODE_FLOAT
|
| + || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
| + || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
| + {
|
| + *count = 1;
|
| + new_mode = mode;
|
| + }
|
| + else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
|
| + {
|
| + *count = 2;
|
| + new_mode = (mode == DCmode ? DFmode : SFmode);
|
| + }
|
| + else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))
|
| + {
|
| + int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
|
| +
|
| + if (ag_count > 0 && ag_count <= 4)
|
| + *count = ag_count;
|
| + else
|
| + return false;
|
| + }
|
| + else
|
| + return false;
|
| +
|
| +
|
| + if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
|
| + return false;
|
| +
|
| + *base_mode = new_mode;
|
| + return true;
|
| +}
|
| +
|
| +static bool
|
| +aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
|
| + enum machine_mode mode, const_tree type)
|
| +{
|
| + int count ATTRIBUTE_UNUSED;
|
| + enum machine_mode ag_mode ATTRIBUTE_UNUSED;
|
| +
|
| + if (!use_vfp_abi (pcs_variant, false))
|
| + return false;
|
| + return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
|
| + &ag_mode, &count);
|
| +}
|
| +
|
| +static bool
|
| +aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| + const_tree type)
|
| +{
|
| + if (!use_vfp_abi (pcum->pcs_variant, false))
|
| + return false;
|
| +
|
| + return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
|
| + &pcum->aapcs_vfp_rmode,
|
| + &pcum->aapcs_vfp_rcount);
|
| +}
|
| +
|
| +static bool
|
| +aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| + const_tree type ATTRIBUTE_UNUSED)
|
| +{
|
| + int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
|
| + unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
|
| + int regno;
|
| +
|
| + for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
|
| + if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
|
| + {
|
| + pcum->aapcs_vfp_reg_alloc = mask << regno;
|
| + if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
|
| + {
|
| + int i;
|
| + int rcount = pcum->aapcs_vfp_rcount;
|
| + int rshift = shift;
|
| + enum machine_mode rmode = pcum->aapcs_vfp_rmode;
|
| + rtx par;
|
| + if (!TARGET_NEON)
|
| + {
|
| + /* Avoid using unsupported vector modes. */
|
| + if (rmode == V2SImode)
|
| + rmode = DImode;
|
| + else if (rmode == V4SImode)
|
| + {
|
| + rmode = DImode;
|
| + rcount *= 2;
|
| + rshift /= 2;
|
| + }
|
| + }
|
| + par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
|
| + for (i = 0; i < rcount; i++)
|
| + {
|
| + rtx tmp = gen_rtx_REG (rmode,
|
| + FIRST_VFP_REGNUM + regno + i * rshift);
|
| + tmp = gen_rtx_EXPR_LIST
|
| + (VOIDmode, tmp,
|
| + GEN_INT (i * GET_MODE_SIZE (rmode)));
|
| + XVECEXP (par, 0, i) = tmp;
|
| + }
|
| +
|
| + pcum->aapcs_reg = par;
|
| + }
|
| + else
|
| + pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
|
| + return true;
|
| + }
|
| + return false;
|
| +}
|
| +
|
| +static rtx
|
| +aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
|
| + enum machine_mode mode,
|
| + const_tree type ATTRIBUTE_UNUSED)
|
| +{
|
| + if (!use_vfp_abi (pcs_variant, false))
|
| + return false;
|
| +
|
| + if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
|
| + {
|
| + int count;
|
| + enum machine_mode ag_mode;
|
| + int i;
|
| + rtx par;
|
| + int shift;
|
| +
|
| + aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
|
| + &ag_mode, &count);
|
| +
|
| + if (!TARGET_NEON)
|
| + {
|
| + if (ag_mode == V2SImode)
|
| + ag_mode = DImode;
|
| + else if (ag_mode == V4SImode)
|
| + {
|
| + ag_mode = DImode;
|
| + count *= 2;
|
| + }
|
| + }
|
| + shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
|
| + par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
|
| + for (i = 0; i < count; i++)
|
| + {
|
| + rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
|
| + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
|
| + GEN_INT (i * GET_MODE_SIZE (ag_mode)));
|
| + XVECEXP (par, 0, i) = tmp;
|
| + }
|
| +
|
| + return par;
|
| + }
|
| +
|
| + return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
|
| +}
|
| +
|
| +static void
|
| +aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
|
| + enum machine_mode mode ATTRIBUTE_UNUSED,
|
| + const_tree type ATTRIBUTE_UNUSED)
|
| +{
|
| + pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
|
| + pcum->aapcs_vfp_reg_alloc = 0;
|
| + return;
|
| +}
|
| +
|
| +#define AAPCS_CP(X) \
|
| + { \
|
| + aapcs_ ## X ## _cum_init, \
|
| + aapcs_ ## X ## _is_call_candidate, \
|
| + aapcs_ ## X ## _allocate, \
|
| + aapcs_ ## X ## _is_return_candidate, \
|
| + aapcs_ ## X ## _allocate_return_reg, \
|
| + aapcs_ ## X ## _advance \
|
| + }
|
| +
|
| +/* Table of co-processors that can be used to pass arguments in
|
| + registers. Idealy no arugment should be a candidate for more than
|
| + one co-processor table entry, but the table is processed in order
|
| + and stops after the first match. If that entry then fails to put
|
| + the argument into a co-processor register, the argument will go on
|
| + the stack. */
|
| +static struct
|
| +{
|
| + /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
|
| + void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
|
| +
|
| + /* Return true if an argument of mode MODE (or type TYPE if MODE is
|
| + BLKmode) is a candidate for this co-processor's registers; this
|
| + function should ignore any position-dependent state in
|
| + CUMULATIVE_ARGS and only use call-type dependent information. */
|
| + bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
|
| +
|
| + /* Return true if the argument does get a co-processor register; it
|
| + should set aapcs_reg to an RTX of the register allocated as is
|
| + required for a return from FUNCTION_ARG. */
|
| + bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
|
| +
|
| + /* Return true if a result of mode MODE (or type TYPE if MODE is
|
| + BLKmode) is can be returned in this co-processor's registers. */
|
| + bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);
|
| +
|
| + /* Allocate and return an RTX element to hold the return type of a
|
| + call, this routine must not fail and will only be called if
|
| + is_return_candidate returned true with the same parameters. */
|
| + rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);
|
| +
|
| + /* Finish processing this argument and prepare to start processing
|
| + the next one. */
|
| + void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);
|
| +} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
|
| + {
|
| + AAPCS_CP(vfp)
|
| + };
|
| +
|
| +#undef AAPCS_CP
|
| +
|
| +static int
|
| +aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| + tree type)
|
| +{
|
| + int i;
|
| +
|
| + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
|
| + if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
|
| + return i;
|
| +
|
| + return -1;
|
| +}
|
| +
|
| +static int
|
| +aapcs_select_return_coproc (const_tree type, const_tree fntype)
|
| +{
|
| + /* We aren't passed a decl, so we can't check that a call is local.
|
| + However, it isn't clear that that would be a win anyway, since it
|
| + might limit some tail-calling opportunities. */
|
| + enum arm_pcs pcs_variant;
|
| +
|
| + if (fntype)
|
| + {
|
| + const_tree fndecl = NULL_TREE;
|
| +
|
| + if (TREE_CODE (fntype) == FUNCTION_DECL)
|
| + {
|
| + fndecl = fntype;
|
| + fntype = TREE_TYPE (fntype);
|
| + }
|
| +
|
| + pcs_variant = arm_get_pcs_model (fntype, fndecl);
|
| + }
|
| + else
|
| + pcs_variant = arm_pcs_default;
|
| +
|
| + if (pcs_variant != ARM_PCS_AAPCS)
|
| + {
|
| + int i;
|
| +
|
| + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
|
| + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
|
| + TYPE_MODE (type),
|
| + type))
|
| + return i;
|
| + }
|
| + return -1;
|
| +}
|
| +
|
| +static rtx
|
| +aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,
|
| + const_tree fntype)
|
| +{
|
| + /* We aren't passed a decl, so we can't check that a call is local.
|
| + However, it isn't clear that that would be a win anyway, since it
|
| + might limit some tail-calling opportunities. */
|
| + enum arm_pcs pcs_variant;
|
| + int unsignedp ATTRIBUTE_UNUSED;
|
| +
|
| + if (fntype)
|
| + {
|
| + const_tree fndecl = NULL_TREE;
|
| +
|
| + if (TREE_CODE (fntype) == FUNCTION_DECL)
|
| + {
|
| + fndecl = fntype;
|
| + fntype = TREE_TYPE (fntype);
|
| + }
|
| +
|
| + pcs_variant = arm_get_pcs_model (fntype, fndecl);
|
| + }
|
| + else
|
| + pcs_variant = arm_pcs_default;
|
| +
|
| + /* Promote integer types. */
|
| + if (type && INTEGRAL_TYPE_P (type))
|
| + mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
|
| +
|
| + if (pcs_variant != ARM_PCS_AAPCS)
|
| + {
|
| + int i;
|
| +
|
| + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
|
| + if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
|
| + type))
|
| + return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
|
| + mode, type);
|
| + }
|
| +
|
| + /* Promotes small structs returned in a register to full-word size
|
| + for big-endian AAPCS. */
|
| + if (type && arm_return_in_msb (type))
|
| + {
|
| + HOST_WIDE_INT size = int_size_in_bytes (type);
|
| + if (size % UNITS_PER_WORD != 0)
|
| + {
|
| + size += UNITS_PER_WORD - size % UNITS_PER_WORD;
|
| + mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
|
| + }
|
| + }
|
| +
|
| + return gen_rtx_REG (mode, R0_REGNUM);
|
| +}
|
| +
|
| +rtx
|
| +aapcs_libcall_value (enum machine_mode mode)
|
| +{
|
| + return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
|
| +}
|
| +
|
| +/* Lay out a function argument using the AAPCS rules. The rule
|
| + numbers referred to here are those in the AAPCS. */
|
| +static void
|
| +aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| + tree type, int named)
|
| +{
|
| + int nregs, nregs2;
|
| + int ncrn;
|
| +
|
| + /* We only need to do this once per argument. */
|
| + if (pcum->aapcs_arg_processed)
|
| + return;
|
| +
|
| + pcum->aapcs_arg_processed = true;
|
| +
|
| + /* Special case: if named is false then we are handling an incoming
|
| + anonymous argument which is on the stack. */
|
| + if (!named)
|
| + return;
|
| +
|
| + /* Is this a potential co-processor register candidate? */
|
| + if (pcum->pcs_variant != ARM_PCS_AAPCS)
|
| + {
|
| + int slot = aapcs_select_call_coproc (pcum, mode, type);
|
| + pcum->aapcs_cprc_slot = slot;
|
| +
|
| + /* We don't have to apply any of the rules from part B of the
|
| + preparation phase, these are handled elsewhere in the
|
| + compiler. */
|
| +
|
| + if (slot >= 0)
|
| + {
|
| + /* A Co-processor register candidate goes either in its own
|
| + class of registers or on the stack. */
|
| + if (!pcum->aapcs_cprc_failed[slot])
|
| + {
|
| + /* C1.cp - Try to allocate the argument to co-processor
|
| + registers. */
|
| + if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
|
| + return;
|
| +
|
| + /* C2.cp - Put the argument on the stack and note that we
|
| + can't assign any more candidates in this slot. We also
|
| + need to note that we have allocated stack space, so that
|
| + we won't later try to split a non-cprc candidate between
|
| + core registers and the stack. */
|
| + pcum->aapcs_cprc_failed[slot] = true;
|
| + pcum->can_split = false;
|
| + }
|
| +
|
| + /* We didn't get a register, so this argument goes on the
|
| + stack. */
|
| + gcc_assert (pcum->can_split == false);
|
| + return;
|
| + }
|
| + }
|
| +
|
| + /* C3 - For double-word aligned arguments, round the NCRN up to the
|
| + next even number. */
|
| + ncrn = pcum->aapcs_ncrn;
|
| + if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
|
| + ncrn++;
|
| +
|
| + nregs = ARM_NUM_REGS2(mode, type);
|
| +
|
| + /* Sigh, this test should really assert that nregs > 0, but a GCC
|
| + extension allows empty structs and then gives them empty size; it
|
| + then allows such a structure to be passed by value. For some of
|
| + the code below we have to pretend that such an argument has
|
| + non-zero size so that we 'locate' it correctly either in
|
| + registers or on the stack. */
|
| + gcc_assert (nregs >= 0);
|
| +
|
| + nregs2 = nregs ? nregs : 1;
|
| +
|
| + /* C4 - Argument fits entirely in core registers. */
|
| + if (ncrn + nregs2 <= NUM_ARG_REGS)
|
| + {
|
| + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
|
| + pcum->aapcs_next_ncrn = ncrn + nregs;
|
| + return;
|
| + }
|
| +
|
| + /* C5 - Some core registers left and there are no arguments already
|
| + on the stack: split this argument between the remaining core
|
| + registers and the stack. */
|
| + if (ncrn < NUM_ARG_REGS && pcum->can_split)
|
| + {
|
| + pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
|
| + pcum->aapcs_next_ncrn = NUM_ARG_REGS;
|
| + pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
|
| + return;
|
| + }
|
| +
|
| + /* C6 - NCRN is set to 4. */
|
| + pcum->aapcs_next_ncrn = NUM_ARG_REGS;
|
| +
|
| + /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
|
| + return;
|
| +}
|
| +
|
| /* Initialize a variable CUM of type CUMULATIVE_ARGS
|
| for a call to a function whose data type is FNTYPE.
|
| For a library call, FNTYPE is NULL. */
|
| void
|
| arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
|
| - rtx libname ATTRIBUTE_UNUSED,
|
| + rtx libname,
|
| tree fndecl ATTRIBUTE_UNUSED)
|
| {
|
| + /* Long call handling. */
|
| + if (fntype)
|
| + pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
|
| + else
|
| + pcum->pcs_variant = arm_pcs_default;
|
| +
|
| + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
|
| + {
|
| + if (arm_libcall_uses_aapcs_base (libname))
|
| + pcum->pcs_variant = ARM_PCS_AAPCS;
|
| +
|
| + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
|
| + pcum->aapcs_reg = NULL_RTX;
|
| + pcum->aapcs_partial = 0;
|
| + pcum->aapcs_arg_processed = false;
|
| + pcum->aapcs_cprc_slot = -1;
|
| + pcum->can_split = true;
|
| +
|
| + if (pcum->pcs_variant != ARM_PCS_AAPCS)
|
| + {
|
| + int i;
|
| +
|
| + for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
|
| + {
|
| + pcum->aapcs_cprc_failed[i] = false;
|
| + aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
|
| + }
|
| + }
|
| + return;
|
| + }
|
| +
|
| + /* Legacy ABIs */
|
| +
|
| /* On the ARM, the offset starts at 0. */
|
| pcum->nregs = 0;
|
| pcum->iwmmxt_nregs = 0;
|
| @@ -3131,6 +4373,17 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| {
|
| int nregs;
|
|
|
| + /* Handle the special case quickly. Pick an arbitrary value for op2 of
|
| + a call insn (op3 of a call_value insn). */
|
| + if (mode == VOIDmode)
|
| + return const0_rtx;
|
| +
|
| + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
|
| + {
|
| + aapcs_layout_arg (pcum, mode, type, named);
|
| + return pcum->aapcs_reg;
|
| + }
|
| +
|
| /* Varargs vectors are treated the same as long long.
|
| named_count avoids having to change the way arm handles 'named' */
|
| if (TARGET_IWMMXT_ABI
|
| @@ -3172,10 +4425,16 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
|
|
| static int
|
| arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| - tree type, bool named ATTRIBUTE_UNUSED)
|
| + tree type, bool named)
|
| {
|
| int nregs = pcum->nregs;
|
|
|
| + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
|
| + {
|
| + aapcs_layout_arg (pcum, mode, type, named);
|
| + return pcum->aapcs_partial;
|
| + }
|
| +
|
| if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
|
| return 0;
|
|
|
| @@ -3187,6 +4446,39 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| return 0;
|
| }
|
|
|
| +void
|
| +arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
|
| + tree type, bool named)
|
| +{
|
| + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
|
| + {
|
| + aapcs_layout_arg (pcum, mode, type, named);
|
| +
|
| + if (pcum->aapcs_cprc_slot >= 0)
|
| + {
|
| + aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
|
| + type);
|
| + pcum->aapcs_cprc_slot = -1;
|
| + }
|
| +
|
| + /* Generic stuff. */
|
| + pcum->aapcs_arg_processed = false;
|
| + pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
|
| + pcum->aapcs_reg = NULL_RTX;
|
| + pcum->aapcs_partial = 0;
|
| + }
|
| + else
|
| + {
|
| + pcum->nargs += 1;
|
| + if (arm_vector_mode_supported_p (mode)
|
| + && pcum->named_count > pcum->nargs
|
| + && TARGET_IWMMXT_ABI)
|
| + pcum->iwmmxt_nregs += 1;
|
| + else
|
| + pcum->nregs += ARM_NUM_REGS2 (mode, type);
|
| + }
|
| +}
|
| +
|
| /* Variable sized types are passed by reference. This is a GCC
|
| extension to the ARM ABI. */
|
|
|
| @@ -3226,42 +4518,6 @@ arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
| arm_pragma_long_calls = OFF;
|
| }
|
|
|
| -/* Table of machine attributes. */
|
| -const struct attribute_spec arm_attribute_table[] =
|
| -{
|
| - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
|
| - /* Function calls made to this symbol must be done indirectly, because
|
| - it may lie outside of the 26 bit addressing range of a normal function
|
| - call. */
|
| - { "long_call", 0, 0, false, true, true, NULL },
|
| - /* Whereas these functions are always known to reside within the 26 bit
|
| - addressing range. */
|
| - { "short_call", 0, 0, false, true, true, NULL },
|
| - /* Interrupt Service Routines have special prologue and epilogue requirements. */
|
| - { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
|
| - { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
|
| - { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
|
| -#ifdef ARM_PE
|
| - /* ARM/PE has three new attributes:
|
| - interfacearm - ?
|
| - dllexport - for exporting a function/variable that will live in a dll
|
| - dllimport - for importing a function/variable from a dll
|
| -
|
| - Microsoft allows multiple declspecs in one __declspec, separating
|
| - them with spaces. We do NOT support this. Instead, use __declspec
|
| - multiple times.
|
| - */
|
| - { "dllimport", 0, 0, true, false, false, NULL },
|
| - { "dllexport", 0, 0, true, false, false, NULL },
|
| - { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
|
| -#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| - { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
|
| - { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
|
| - { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
|
| -#endif
|
| - { NULL, 0, 0, false, false, false, NULL }
|
| -};
|
| -
|
| /* Handle an attribute requiring a FUNCTION_DECL;
|
| arguments as in struct attribute_spec.handler. */
|
| static tree
|
| @@ -3270,8 +4526,8 @@ arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
|
| {
|
| if (TREE_CODE (*node) != FUNCTION_DECL)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute only applies to functions",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute only applies to functions",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
|
|
| @@ -3288,8 +4544,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
|
| {
|
| if (TREE_CODE (*node) != FUNCTION_DECL)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute only applies to functions",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute only applies to functions",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
| /* FIXME: the argument if any is checked for type attributes;
|
| @@ -3302,8 +4558,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
|
| {
|
| if (arm_isr_value (args) == ARM_FT_UNKNOWN)
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute ignored",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute ignored",
|
| + name);
|
| *no_add_attrs = true;
|
| }
|
| }
|
| @@ -3330,8 +4586,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
|
| }
|
| else
|
| {
|
| - warning (OPT_Wattributes, "%qs attribute ignored",
|
| - IDENTIFIER_POINTER (name));
|
| + warning (OPT_Wattributes, "%qE attribute ignored",
|
| + name);
|
| }
|
| }
|
| }
|
| @@ -3339,6 +4595,20 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
|
| return NULL_TREE;
|
| }
|
|
|
| +/* Handle a "pcs" attribute; arguments as in struct
|
| + attribute_spec.handler. */
|
| +static tree
|
| +arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
|
| + int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
|
| +{
|
| + if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
|
| + {
|
| + warning (OPT_Wattributes, "%qE attribute ignored", name);
|
| + *no_add_attrs = true;
|
| + }
|
| + return NULL_TREE;
|
| +}
|
| +
|
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
|
| /* Handle the "notshared" attribute. This attribute is another way of
|
| requesting hidden visibility. ARM's compiler supports
|
| @@ -3500,7 +4770,7 @@ arm_is_long_call_p (tree decl)
|
|
|
| /* Return nonzero if it is ok to make a tail-call to DECL. */
|
| static bool
|
| -arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
|
| +arm_function_ok_for_sibcall (tree decl, tree exp)
|
| {
|
| unsigned long func_type;
|
|
|
| @@ -3533,6 +4803,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
|
| if (IS_INTERRUPT (func_type))
|
| return false;
|
|
|
| + if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
|
| + {
|
| + /* Check that the return value locations are the same. For
|
| + example that we aren't returning a value from the sibling in
|
| + a VFP register but then need to transfer it to a core
|
| + register. */
|
| + rtx a, b;
|
| +
|
| + a = arm_function_value (TREE_TYPE (exp), decl, false);
|
| + b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
|
| + cfun->decl, false);
|
| + if (!rtx_equal_p (a, b))
|
| + return false;
|
| + }
|
| +
|
| /* Never tailcall if function may be called with a misaligned SP. */
|
| if (IS_STACKALIGN (func_type))
|
| return false;
|
| @@ -3580,7 +4865,7 @@ require_pic_register (void)
|
| /* Play games to avoid marking the function as needing pic
|
| if we are being called as part of the cost-estimation
|
| process. */
|
| - if (current_ir_type () != IR_GIMPLE)
|
| + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
|
| crtl->uses_pic_offset_table = 1;
|
| }
|
| else
|
| @@ -3593,7 +4878,7 @@ require_pic_register (void)
|
| /* Play games to avoid marking the function as needing pic
|
| if we are being called as part of the cost-estimation
|
| process. */
|
| - if (current_ir_type () != IR_GIMPLE)
|
| + if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
|
| {
|
| crtl->uses_pic_offset_table = 1;
|
| start_sequence ();
|
| @@ -3602,7 +4887,11 @@ require_pic_register (void)
|
|
|
| seq = get_insns ();
|
| end_sequence ();
|
| - emit_insn_after (seq, entry_of_function ());
|
| + /* We can be called during expansion of PHI nodes, where
|
| + we can't yet emit instructions directly in the final
|
| + insn stream. Queue the insns on the entry edge, they will
|
| + be committed after everything else is expanded. */
|
| + insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
|
| }
|
| }
|
| }
|
| @@ -3634,10 +4923,8 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
|
| else
|
| address = reg;
|
|
|
| - if (TARGET_ARM)
|
| - emit_insn (gen_pic_load_addr_arm (address, orig));
|
| - else if (TARGET_THUMB2)
|
| - emit_insn (gen_pic_load_addr_thumb2 (address, orig));
|
| + if (TARGET_32BIT)
|
| + emit_insn (gen_pic_load_addr_32bit (address, orig));
|
| else /* TARGET_THUMB1 */
|
| emit_insn (gen_pic_load_addr_thumb1 (address, orig));
|
|
|
| @@ -3814,7 +5101,7 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
|
| {
|
| pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
|
| pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
|
| - emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
|
| + emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
|
|
|
| emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
|
|
|
| @@ -3837,29 +5124,13 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
|
| UNSPEC_GOTSYM_OFF);
|
| pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
|
|
|
| - if (TARGET_ARM)
|
| - {
|
| - emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
|
| - emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
|
| - }
|
| - else if (TARGET_THUMB2)
|
| + if (TARGET_32BIT)
|
| {
|
| - /* Thumb-2 only allows very limited access to the PC. Calculate the
|
| - address in a temporary register. */
|
| - if (arm_pic_register != INVALID_REGNUM)
|
| - {
|
| - pic_tmp = gen_rtx_REG (SImode,
|
| - thumb_find_work_register (saved_regs));
|
| - }
|
| + emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
|
| + if (TARGET_ARM)
|
| + emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
|
| else
|
| - {
|
| - gcc_assert (can_create_pseudo_p ());
|
| - pic_tmp = gen_reg_rtx (Pmode);
|
| - }
|
| -
|
| - emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
|
| - emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
|
| - emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
|
| + emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
|
| }
|
| else /* TARGET_THUMB1 */
|
| {
|
| @@ -3920,8 +5191,8 @@ pcrel_constant_p (rtx x)
|
|
|
| /* Return nonzero if X is a valid ARM state address operand. */
|
| int
|
| -arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
|
| - int strict_p)
|
| +arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
|
| + int strict_p)
|
| {
|
| bool use_ldrd;
|
| enum rtx_code code = GET_CODE (x);
|
| @@ -4005,7 +5276,7 @@ arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
|
| }
|
|
|
| /* Return nonzero if X is a valid Thumb-2 address operand. */
|
| -int
|
| +static int
|
| thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
| {
|
| bool use_ldrd;
|
| @@ -4131,6 +5402,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
|
| if (GET_MODE_SIZE (mode) <= 4
|
| && ! (arm_arch4
|
| && (mode == HImode
|
| + || mode == HFmode
|
| || (mode == QImode && outer == SIGN_EXTEND))))
|
| {
|
| if (code == MULT)
|
| @@ -4159,13 +5431,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
|
| load. */
|
| if (arm_arch4)
|
| {
|
| - if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
|
| + if (mode == HImode
|
| + || mode == HFmode
|
| + || (outer == SIGN_EXTEND && mode == QImode))
|
| range = 256;
|
| else
|
| range = 4096;
|
| }
|
| else
|
| - range = (mode == HImode) ? 4095 : 4096;
|
| + range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
|
|
|
| return (code == CONST_INT
|
| && INTVAL (index) < range
|
| @@ -4226,15 +5500,17 @@ thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
|
|
|
| if (mode == DImode || mode == DFmode)
|
| {
|
| - HOST_WIDE_INT val = INTVAL (index);
|
| - /* ??? Can we assume ldrd for thumb2? */
|
| - /* Thumb-2 ldrd only has reg+const addressing modes. */
|
| - if (code != CONST_INT)
|
| + if (code == CONST_INT)
|
| + {
|
| + HOST_WIDE_INT val = INTVAL (index);
|
| + /* ??? Can we assume ldrd for thumb2? */
|
| + /* Thumb-2 ldrd only has reg+const addressing modes. */
|
| + /* ldrd supports offsets of +-1020.
|
| + However the ldr fallback does not. */
|
| + return val > -256 && val < 256 && (val & 3) == 0;
|
| + }
|
| + else
|
| return 0;
|
| -
|
| - /* ldrd supports offsets of +-1020.
|
| - However the ldr fallback does not. */
|
| - return val > -256 && val < 256 && (val & 3) == 0;
|
| }
|
|
|
| if (code == MULT)
|
| @@ -4311,7 +5587,7 @@ thumb1_index_register_rtx_p (rtx x, int strict_p)
|
| addresses based on the frame pointer or arg pointer until the
|
| reload pass starts. This is so that eliminating such addresses
|
| into stack based ones won't produce impossible code. */
|
| -int
|
| +static int
|
| thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
| {
|
| /* ??? Not clear if this is right. Experiment. */
|
| @@ -4336,7 +5612,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
|
| return 1;
|
|
|
| /* This is PC relative data after arm_reorg runs. */
|
| - else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
|
| + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
|
| + && reload_completed
|
| && (GET_CODE (x) == LABEL_REF
|
| || (GET_CODE (x) == CONST
|
| && GET_CODE (XEXP (x, 0)) == PLUS
|
| @@ -4425,6 +5702,17 @@ thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
|
| }
|
| }
|
|
|
| +bool
|
| +arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
|
| +{
|
| + if (TARGET_ARM)
|
| + return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
|
| + else if (TARGET_THUMB2)
|
| + return thumb2_legitimate_address_p (mode, x, strict_p);
|
| + else /* if (TARGET_THUMB1) */
|
| + return thumb1_legitimate_address_p (mode, x, strict_p);
|
| +}
|
| +
|
| /* Build the SYMBOL_REF for __tls_get_addr. */
|
|
|
| static GTY(()) rtx tls_get_addr_libfunc;
|
| @@ -4499,14 +5787,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
|
| if (TARGET_ARM)
|
| emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
|
| else if (TARGET_THUMB2)
|
| - {
|
| - rtx tmp;
|
| - /* Thumb-2 only allows very limited access to the PC. Calculate
|
| - the address in a temporary register. */
|
| - tmp = gen_reg_rtx (SImode);
|
| - emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
|
| - emit_insn (gen_addsi3(reg, reg, tmp));
|
| - }
|
| + emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
|
| else /* TARGET_THUMB1 */
|
| emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
|
|
|
| @@ -4562,15 +5843,7 @@ legitimize_tls_address (rtx x, rtx reg)
|
| if (TARGET_ARM)
|
| emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
|
| else if (TARGET_THUMB2)
|
| - {
|
| - rtx tmp;
|
| - /* Thumb-2 only allows very limited access to the PC. Calculate
|
| - the address in a temporary register. */
|
| - tmp = gen_reg_rtx (SImode);
|
| - emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
|
| - emit_insn (gen_addsi3(reg, reg, tmp));
|
| - emit_move_insn (reg, gen_const_mem (SImode, reg));
|
| - }
|
| + emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
|
| else
|
| {
|
| emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
|
| @@ -4601,6 +5874,14 @@ legitimize_tls_address (rtx x, rtx reg)
|
| rtx
|
| arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
|
| {
|
| + if (!TARGET_ARM)
|
| + {
|
| + /* TODO: legitimize_address for Thumb2. */
|
| + if (TARGET_THUMB2)
|
| + return x;
|
| + return thumb_legitimize_address (x, orig_x, mode);
|
| + }
|
| +
|
| if (arm_tls_symbol_p (x))
|
| return legitimize_tls_address (x, NULL_RTX);
|
|
|
| @@ -4652,7 +5933,7 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
|
| }
|
|
|
| /* XXX We don't allow MINUS any more -- see comment in
|
| - arm_legitimate_address_p (). */
|
| + arm_legitimate_address_outer_p (). */
|
| else if (GET_CODE (x) == MINUS)
|
| {
|
| rtx xop0 = XEXP (x, 0);
|
| @@ -4799,7 +6080,7 @@ thumb_legitimize_reload_address (rtx *x_p,
|
|
|
| x = copy_rtx (x);
|
| push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
|
| - Pmode, VOIDmode, 0, 0, opnum, type);
|
| + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
|
| return x;
|
| }
|
|
|
| @@ -4816,7 +6097,7 @@ thumb_legitimize_reload_address (rtx *x_p,
|
|
|
| x = copy_rtx (x);
|
| push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
|
| - Pmode, VOIDmode, 0, 0, opnum, type);
|
| + Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);
|
| return x;
|
| }
|
|
|
| @@ -4944,9 +6225,18 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
|
| else if ((outer == PLUS || outer == COMPARE)
|
| && INTVAL (x) < 256 && INTVAL (x) > -256)
|
| return 0;
|
| - else if (outer == AND
|
| + else if ((outer == IOR || outer == XOR || outer == AND)
|
| && INTVAL (x) < 256 && INTVAL (x) >= -256)
|
| return COSTS_N_INSNS (1);
|
| + else if (outer == AND)
|
| + {
|
| + int i;
|
| + /* This duplicates the tests in the andsi3 expander. */
|
| + for (i = 9; i <= 31; i++)
|
| + if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
|
| + || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
|
| + return COSTS_N_INSNS (2);
|
| + }
|
| else if (outer == ASHIFT || outer == ASHIFTRT
|
| || outer == LSHIFTRT)
|
| return 0;
|
| @@ -5035,7 +6325,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| case UMOD:
|
| if (TARGET_HARD_FLOAT && mode == SFmode)
|
| *total = COSTS_N_INSNS (2);
|
| - else if (TARGET_HARD_FLOAT && mode == DFmode)
|
| + else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
|
| *total = COSTS_N_INSNS (4);
|
| else
|
| *total = COSTS_N_INSNS (20);
|
| @@ -5113,7 +6403,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
|
|
| if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
|
| @@ -5154,10 +6446,17 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| return true;
|
| }
|
|
|
| + /* A shift as a part of RSB costs no more than RSB itself. */
|
| + if (GET_CODE (XEXP (x, 0)) == MULT
|
| + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
|
| + {
|
| + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);
|
| + *total += rtx_cost (XEXP (x, 1), code, speed);
|
| + return true;
|
| + }
|
| +
|
| if (subcode == MULT
|
| - && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
|
| - && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
|
| - (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))
|
| + && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
|
| {
|
| *total += rtx_cost (XEXP (x, 0), code, speed);
|
| *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
|
| @@ -5193,9 +6492,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| multiplication by a power of two, so that we fall down into
|
| the code below. */
|
| if (GET_CODE (XEXP (x, 0)) == MULT
|
| - && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
|
| - && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
|
| - (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
|
| + && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
|
| {
|
| /* The cost comes from the cost of the multiply. */
|
| return false;
|
| @@ -5203,7 +6500,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
|
|
| if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
|
| @@ -5278,9 +6577,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| }
|
|
|
| if (subcode == MULT
|
| - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
|
| - && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
|
| - (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
|
| + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
|
| {
|
| *total += rtx_cost (XEXP (x, 1), code, speed);
|
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
|
| @@ -5318,7 +6615,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| case NEG:
|
| if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -5337,9 +6636,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| || subcode == LSHIFTRT
|
| || subcode == ROTATE || subcode == ROTATERT
|
| || (subcode == MULT
|
| - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
|
| - && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
|
| - (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
|
| + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
|
| {
|
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
|
| /* Register shifts cost an extra cycle. */
|
| @@ -5447,9 +6744,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| }
|
|
|
| if (subcode == MULT
|
| - && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
|
| - && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
|
| - (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
|
| + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
|
| {
|
| *total += rtx_cost (XEXP (x, 1), code, speed);
|
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
|
| @@ -5469,9 +6764,11 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| return true;
|
|
|
| case ABS:
|
| - if (GET_MODE_CLASS (mode == MODE_FLOAT))
|
| + if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -5574,7 +6871,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
|
| return true;
|
|
|
| case CONST_DOUBLE:
|
| - if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
|
| + if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
|
| + && (mode == SFmode || !TARGET_VFP_SINGLE))
|
| *total = COSTS_N_INSNS (1);
|
| else
|
| *total = COSTS_N_INSNS (4);
|
| @@ -5649,7 +6947,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
| return false;
|
|
|
| case MINUS:
|
| - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
|
| + && (mode == SFmode || !TARGET_VFP_SINGLE))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -5679,12 +6978,23 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
| return false;
|
|
|
| case PLUS:
|
| - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
|
| + && (mode == SFmode || !TARGET_VFP_SINGLE))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| }
|
|
|
| + /* A shift as a part of ADD costs nothing. */
|
| + if (GET_CODE (XEXP (x, 0)) == MULT
|
| + && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
|
| + {
|
| + *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
|
| + *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);
|
| + *total += rtx_cost (XEXP (x, 1), code, false);
|
| + return true;
|
| + }
|
| +
|
| /* Fall through */
|
| case AND: case XOR: case IOR:
|
| if (mode == SImode)
|
| @@ -5709,7 +7019,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
| return false;
|
|
|
| case NEG:
|
| - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
|
| + && (mode == SFmode || !TARGET_VFP_SINGLE))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -5733,7 +7044,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
| return false;
|
|
|
| case ABS:
|
| - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
|
| + && (mode == SFmode || !TARGET_VFP_SINGLE))
|
| *total = COSTS_N_INSNS (1);
|
| else
|
| *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
|
| @@ -5778,7 +7090,10 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
|
|
| case CONST_INT:
|
| if (const_ok_for_arm (INTVAL (x)))
|
| - *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
|
| + /* A multiplication by a constant requires another instruction
|
| + to load the constant to a register. */
|
| + *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
|
| + ? 1 : 0);
|
| else if (const_ok_for_arm (~INTVAL (x)))
|
| *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
|
| else if (const_ok_for_arm (-INTVAL (x)))
|
| @@ -5825,10 +7140,12 @@ arm_rtx_costs (rtx x, int code, int outer_code, int *total,
|
| bool speed)
|
| {
|
| if (!speed)
|
| - return arm_size_rtx_costs (x, code, outer_code, total);
|
| + return arm_size_rtx_costs (x, (enum rtx_code) code,
|
| + (enum rtx_code) outer_code, total);
|
| else
|
| - return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total,
|
| - speed);
|
| + return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,
|
| + (enum rtx_code) outer_code,
|
| + total, speed);
|
| }
|
|
|
| /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
|
| @@ -5950,7 +7267,9 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
|
|
| if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -6107,7 +7426,9 @@ arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
|
|
|
| if (GET_MODE_CLASS (mode) == MODE_FLOAT)
|
| {
|
| - if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
|
| + if (TARGET_HARD_FLOAT
|
| + && (mode == SFmode
|
| + || (mode == DFmode && !TARGET_VFP_SINGLE)))
|
| {
|
| *total = COSTS_N_INSNS (1);
|
| return false;
|
| @@ -6135,9 +7456,9 @@ arm_arm_address_cost (rtx x)
|
| if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
|
| return 10;
|
|
|
| - if (c == PLUS || c == MINUS)
|
| + if (c == PLUS)
|
| {
|
| - if (GET_CODE (XEXP (x, 0)) == CONST_INT)
|
| + if (GET_CODE (XEXP (x, 1)) == CONST_INT)
|
| return 2;
|
|
|
| if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
|
| @@ -6753,25 +8074,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
|
| }
|
| }
|
|
|
| -/* Initialize a vector with non-constant elements. FIXME: We can do better
|
| - than the current implementation (building a vector on the stack and then
|
| - loading it) in many cases. See rs6000.c. */
|
| +/* If VALS is a vector constant that can be loaded into a register
|
| + using VDUP, generate instructions to do so and return an RTX to
|
| + assign to the register. Otherwise return NULL_RTX. */
|
| +
|
| +static rtx
|
| +neon_vdup_constant (rtx vals)
|
| +{
|
| + enum machine_mode mode = GET_MODE (vals);
|
| + enum machine_mode inner_mode = GET_MODE_INNER (mode);
|
| + int n_elts = GET_MODE_NUNITS (mode);
|
| + bool all_same = true;
|
| + rtx x;
|
| + int i;
|
| +
|
| + if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
|
| + return NULL_RTX;
|
| +
|
| + for (i = 0; i < n_elts; ++i)
|
| + {
|
| + x = XVECEXP (vals, 0, i);
|
| + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
| + all_same = false;
|
| + }
|
| +
|
| + if (!all_same)
|
| + /* The elements are not all the same. We could handle repeating
|
| + patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
|
| + {0, C, 0, C, 0, C, 0, C} which can be loaded using
|
| + vdup.i16). */
|
| + return NULL_RTX;
|
| +
|
| + /* We can load this constant by using VDUP and a constant in a
|
| + single ARM register. This will be cheaper than a vector
|
| + load. */
|
| +
|
| + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
| + return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
|
| + UNSPEC_VDUP_N);
|
| +}
|
| +
|
| +/* Generate code to load VALS, which is a PARALLEL containing only
|
| + constants (for vec_init) or CONST_VECTOR, efficiently into a
|
| + register. Returns an RTX to copy into the register, or NULL_RTX
|
| + for a PARALLEL that can not be converted into a CONST_VECTOR. */
|
| +
|
| +rtx
|
| +neon_make_constant (rtx vals)
|
| +{
|
| + enum machine_mode mode = GET_MODE (vals);
|
| + rtx target;
|
| + rtx const_vec = NULL_RTX;
|
| + int n_elts = GET_MODE_NUNITS (mode);
|
| + int n_const = 0;
|
| + int i;
|
| +
|
| + if (GET_CODE (vals) == CONST_VECTOR)
|
| + const_vec = vals;
|
| + else if (GET_CODE (vals) == PARALLEL)
|
| + {
|
| + /* A CONST_VECTOR must contain only CONST_INTs and
|
| + CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
|
| + Only store valid constants in a CONST_VECTOR. */
|
| + for (i = 0; i < n_elts; ++i)
|
| + {
|
| + rtx x = XVECEXP (vals, 0, i);
|
| + if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
|
| + n_const++;
|
| + }
|
| + if (n_const == n_elts)
|
| + const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
|
| + }
|
| + else
|
| + gcc_unreachable ();
|
| +
|
| + if (const_vec != NULL
|
| + && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
|
| + /* Load using VMOV. On Cortex-A8 this takes one cycle. */
|
| + return const_vec;
|
| + else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
|
| + /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
|
| + pipeline cycle; creating the constant takes one or two ARM
|
| + pipeline cycles. */
|
| + return target;
|
| + else if (const_vec != NULL_RTX)
|
| + /* Load from constant pool. On Cortex-A8 this takes two cycles
|
| + (for either double or quad vectors). We can not take advantage
|
| + of single-cycle VLD1 because we need a PC-relative addressing
|
| + mode. */
|
| + return const_vec;
|
| + else
|
| + /* A PARALLEL containing something not valid inside CONST_VECTOR.
|
| + We can not construct an initializer. */
|
| + return NULL_RTX;
|
| +}
|
| +
|
| +/* Initialize vector TARGET to VALS. */
|
|
|
| void
|
| neon_expand_vector_init (rtx target, rtx vals)
|
| {
|
| enum machine_mode mode = GET_MODE (target);
|
| - enum machine_mode inner = GET_MODE_INNER (mode);
|
| - unsigned int i, n_elts = GET_MODE_NUNITS (mode);
|
| - rtx mem;
|
| + enum machine_mode inner_mode = GET_MODE_INNER (mode);
|
| + int n_elts = GET_MODE_NUNITS (mode);
|
| + int n_var = 0, one_var = -1;
|
| + bool all_same = true;
|
| + rtx x, mem;
|
| + int i;
|
| +
|
| + for (i = 0; i < n_elts; ++i)
|
| + {
|
| + x = XVECEXP (vals, 0, i);
|
| + if (!CONSTANT_P (x))
|
| + ++n_var, one_var = i;
|
| +
|
| + if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
|
| + all_same = false;
|
| + }
|
| +
|
| + if (n_var == 0)
|
| + {
|
| + rtx constant = neon_make_constant (vals);
|
| + if (constant != NULL_RTX)
|
| + {
|
| + emit_move_insn (target, constant);
|
| + return;
|
| + }
|
| + }
|
|
|
| - gcc_assert (VECTOR_MODE_P (mode));
|
| + /* Splat a single non-constant element if we can. */
|
| + if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
|
| + {
|
| + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
|
| + emit_insn (gen_rtx_SET (VOIDmode, target,
|
| + gen_rtx_UNSPEC (mode, gen_rtvec (1, x),
|
| + UNSPEC_VDUP_N)));
|
| + return;
|
| + }
|
| +
|
| + /* One field is non-constant. Load constant then overwrite varying
|
| + field. This is more efficient than using the stack. */
|
| + if (n_var == 1)
|
| + {
|
| + rtx copy = copy_rtx (vals);
|
| + rtvec ops;
|
|
|
| + /* Load constant part of vector, substitute neighboring value for
|
| + varying element. */
|
| + XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
|
| + neon_expand_vector_init (target, copy);
|
| +
|
| + /* Insert variable. */
|
| + x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
|
| + ops = gen_rtvec (3, x, target, GEN_INT (one_var));
|
| + emit_insn (gen_rtx_SET (VOIDmode, target,
|
| + gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));
|
| + return;
|
| + }
|
| +
|
| + /* Construct the vector in memory one field at a time
|
| + and load the whole vector. */
|
| mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
|
| for (i = 0; i < n_elts; i++)
|
| - emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
|
| - XVECEXP (vals, 0, i));
|
| -
|
| + emit_move_insn (adjust_address_nv (mem, inner_mode,
|
| + i * GET_MODE_SIZE (inner_mode)),
|
| + XVECEXP (vals, 0, i));
|
| emit_move_insn (target, mem);
|
| }
|
|
|
| @@ -6930,10 +8397,13 @@ arm_coproc_mem_operand (rtx op, bool wb)
|
| }
|
|
|
| /* Return TRUE if OP is a memory operand which we can load or store a vector
|
| - to/from. If CORE is true, we're moving from ARM registers not Neon
|
| - registers. */
|
| + to/from. TYPE is one of the following values:
|
| + 0 - Vector load/stor (vldr)
|
| + 1 - Core registers (ldm)
|
| + 2 - Element/structure loads (vld1)
|
| + */
|
| int
|
| -neon_vector_mem_operand (rtx op, bool core)
|
| +neon_vector_mem_operand (rtx op, int type)
|
| {
|
| rtx ind;
|
|
|
| @@ -6966,23 +8436,15 @@ neon_vector_mem_operand (rtx op, bool core)
|
| return arm_address_register_rtx_p (ind, 0);
|
|
|
| /* Allow post-increment with Neon registers. */
|
| - if (!core && GET_CODE (ind) == POST_INC)
|
| + if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))
|
| return arm_address_register_rtx_p (XEXP (ind, 0), 0);
|
|
|
| -#if 0
|
| - /* FIXME: We can support this too if we use VLD1/VST1. */
|
| - if (!core
|
| - && GET_CODE (ind) == POST_MODIFY
|
| - && arm_address_register_rtx_p (XEXP (ind, 0), 0)
|
| - && GET_CODE (XEXP (ind, 1)) == PLUS
|
| - && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
|
| - ind = XEXP (ind, 1);
|
| -#endif
|
| + /* FIXME: vld1 allows register post-modify. */
|
|
|
| /* Match:
|
| (plus (reg)
|
| (const)). */
|
| - if (!core
|
| + if (type == 0
|
| && GET_CODE (ind) == PLUS
|
| && GET_CODE (XEXP (ind, 0)) == REG
|
| && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
|
| @@ -7049,10 +8511,19 @@ arm_eliminable_register (rtx x)
|
| enum reg_class
|
| coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
|
| {
|
| + if (mode == HFmode)
|
| + {
|
| + if (!TARGET_NEON_FP16)
|
| + return GENERAL_REGS;
|
| + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
|
| + return NO_REGS;
|
| + return GENERAL_REGS;
|
| + }
|
| +
|
| if (TARGET_NEON
|
| && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
|
| || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
|
| - && neon_vector_mem_operand (x, FALSE))
|
| + && neon_vector_mem_operand (x, 0))
|
| return NO_REGS;
|
|
|
| if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
|
| @@ -7330,28 +8801,21 @@ tls_mentioned_p (rtx x)
|
| }
|
| }
|
|
|
| -/* Must not copy a SET whose source operand is PC-relative. */
|
| +/* Must not copy any rtx that uses a pc-relative address. */
|
| +
|
| +static int
|
| +arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
|
| +{
|
| + if (GET_CODE (*x) == UNSPEC
|
| + && XINT (*x, 1) == UNSPEC_PIC_BASE)
|
| + return 1;
|
| + return 0;
|
| +}
|
|
|
| static bool
|
| arm_cannot_copy_insn_p (rtx insn)
|
| {
|
| - rtx pat = PATTERN (insn);
|
| -
|
| - if (GET_CODE (pat) == SET)
|
| - {
|
| - rtx rhs = SET_SRC (pat);
|
| -
|
| - if (GET_CODE (rhs) == UNSPEC
|
| - && XINT (rhs, 1) == UNSPEC_PIC_BASE)
|
| - return TRUE;
|
| -
|
| - if (GET_CODE (rhs) == MEM
|
| - && GET_CODE (XEXP (rhs, 0)) == UNSPEC
|
| - && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
|
| - return TRUE;
|
| - }
|
| -
|
| - return FALSE;
|
| + return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
|
| }
|
|
|
| enum rtx_code
|
| @@ -7412,7 +8876,7 @@ adjacent_mem_locations (rtx a, rtx b)
|
| /* Don't accept any offset that will require multiple
|
| instructions to handle, since this would cause the
|
| arith_adjacentmem pattern to output an overlong sequence. */
|
| - if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
|
| + if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
|
| return 0;
|
|
|
| /* Don't allow an eliminable register: register elimination can make
|
| @@ -8330,7 +9794,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
|
|
|
| /* A compare with a shifted operand. Because of canonicalization, the
|
| comparison will have to be swapped when we emit the assembler. */
|
| - if (GET_MODE (y) == SImode && GET_CODE (y) == REG
|
| + if (GET_MODE (y) == SImode
|
| + && (REG_P (y) || (GET_CODE (y) == SUBREG))
|
| && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
|
| || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
|
| || GET_CODE (x) == ROTATERT))
|
| @@ -8338,7 +9803,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
|
|
|
| /* This operation is performed swapped, but since we only rely on the Z
|
| flag we don't need an additional mode. */
|
| - if (GET_MODE (y) == SImode && REG_P (y)
|
| + if (GET_MODE (y) == SImode
|
| + && (REG_P (y) || (GET_CODE (y) == SUBREG))
|
| && GET_CODE (x) == NEG
|
| && (op == EQ || op == NE))
|
| return CC_Zmode;
|
| @@ -10184,9 +11650,14 @@ vfp_emit_fstmd (int base_reg, int count)
|
|
|
| XVECEXP (par, 0, 0)
|
| = gen_rtx_SET (VOIDmode,
|
| - gen_frame_mem (BLKmode,
|
| - gen_rtx_PRE_DEC (BLKmode,
|
| - stack_pointer_rtx)),
|
| + gen_frame_mem
|
| + (BLKmode,
|
| + gen_rtx_PRE_MODIFY (Pmode,
|
| + stack_pointer_rtx,
|
| + plus_constant
|
| + (stack_pointer_rtx,
|
| + - (count * 8)))
|
| + ),
|
| gen_rtx_UNSPEC (BLKmode,
|
| gen_rtvec (1, reg),
|
| UNSPEC_PUSH_MULT));
|
| @@ -10218,8 +11689,7 @@ vfp_emit_fstmd (int base_reg, int count)
|
| }
|
|
|
| par = emit_insn (par);
|
| - REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
|
| - REG_NOTES (par));
|
| + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
|
| RTX_FRAME_RELATED_P (par) = 1;
|
|
|
| return count * 8;
|
| @@ -10273,11 +11743,14 @@ output_call (rtx *operands)
|
| return "";
|
| }
|
|
|
| -/* Output a 'call' insn that is a reference in memory. */
|
| +/* Output a 'call' insn that is a reference in memory. This is
|
| + disabled for ARMv5 and we prefer a blx instead because otherwise
|
| + there's a significant performance overhead. */
|
| const char *
|
| output_call_mem (rtx *operands)
|
| {
|
| - if (TARGET_INTERWORK && !arm_arch5)
|
| + gcc_assert (!arm_arch5);
|
| + if (TARGET_INTERWORK)
|
| {
|
| output_asm_insn ("ldr%?\t%|ip, %0", operands);
|
| output_asm_insn ("mov%?\t%|lr, %|pc", operands);
|
| @@ -10289,16 +11762,11 @@ output_call_mem (rtx *operands)
|
| first instruction. It's safe to use IP as the target of the
|
| load since the call will kill it anyway. */
|
| output_asm_insn ("ldr%?\t%|ip, %0", operands);
|
| - if (arm_arch5)
|
| - output_asm_insn ("blx%?\t%|ip", operands);
|
| + output_asm_insn ("mov%?\t%|lr, %|pc", operands);
|
| + if (arm_arch4t)
|
| + output_asm_insn ("bx%?\t%|ip", operands);
|
| else
|
| - {
|
| - output_asm_insn ("mov%?\t%|lr, %|pc", operands);
|
| - if (arm_arch4t)
|
| - output_asm_insn ("bx%?\t%|ip", operands);
|
| - else
|
| - output_asm_insn ("mov%?\t%|pc, %|ip", operands);
|
| - }
|
| + output_asm_insn ("mov%?\t%|pc, %|ip", operands);
|
| }
|
| else
|
| {
|
| @@ -10385,14 +11853,23 @@ output_mov_long_double_arm_from_arm (rtx *operands)
|
| return "";
|
| }
|
|
|
| -
|
| -/* Emit a MOVW/MOVT pair. */
|
| -void arm_emit_movpair (rtx dest, rtx src)
|
| -{
|
| - emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
|
| - emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
|
| -}
|
| -
|
| +void
|
| +arm_emit_movpair (rtx dest, rtx src)
|
| + {
|
| + /* If the src is an immediate, simplify it. */
|
| + if (CONST_INT_P (src))
|
| + {
|
| + HOST_WIDE_INT val = INTVAL (src);
|
| + emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
|
| + if ((val >> 16) & 0x0000ffff)
|
| + emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
|
| + GEN_INT (16)),
|
| + GEN_INT ((val >> 16) & 0x0000ffff));
|
| + return;
|
| + }
|
| + emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
|
| + emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
|
| + }
|
|
|
| /* Output a move from arm registers to an fpa registers.
|
| OPERANDS[0] is an fpa register.
|
| @@ -10769,7 +12246,7 @@ output_move_double (rtx *operands)
|
| }
|
|
|
| /* Output a move, load or store for quad-word vectors in ARM registers. Only
|
| - handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
|
| + handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
|
|
|
| const char *
|
| output_move_quad (rtx *operands)
|
| @@ -10965,6 +12442,13 @@ output_move_neon (rtx *operands)
|
| ops[1] = reg;
|
| break;
|
|
|
| + case PRE_DEC:
|
| + /* FIXME: We should be using vld1/vst1 here in BE mode? */
|
| + templ = "v%smdb%%?\t%%0!, %%h1";
|
| + ops[0] = XEXP (addr, 0);
|
| + ops[1] = reg;
|
| + break;
|
| +
|
| case POST_MODIFY:
|
| /* FIXME: Not currently enabled in neon_vector_mem_operand. */
|
| gcc_unreachable ();
|
| @@ -11014,6 +12498,56 @@ output_move_neon (rtx *operands)
|
| return "";
|
| }
|
|
|
| +/* Compute and return the length of neon_mov<mode>, where <mode> is
|
| + one of VSTRUCT modes: EI, OI, CI or XI. */
|
| +int
|
| +arm_attr_length_move_neon (rtx insn)
|
| +{
|
| + rtx reg, mem, addr;
|
| + int load;
|
| + enum machine_mode mode;
|
| +
|
| + extract_insn_cached (insn);
|
| +
|
| + if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
|
| + {
|
| + mode = GET_MODE (recog_data.operand[0]);
|
| + switch (mode)
|
| + {
|
| + case EImode:
|
| + case OImode:
|
| + return 8;
|
| + case CImode:
|
| + return 12;
|
| + case XImode:
|
| + return 16;
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| + }
|
| +
|
| + load = REG_P (recog_data.operand[0]);
|
| + reg = recog_data.operand[!load];
|
| + mem = recog_data.operand[load];
|
| +
|
| + gcc_assert (MEM_P (mem));
|
| +
|
| + mode = GET_MODE (reg);
|
| + addr = XEXP (mem, 0);
|
| +
|
| + /* Strip off const from addresses like (const (plus (...))). */
|
| + if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
|
| + addr = XEXP (addr, 0);
|
| +
|
| + if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
|
| + {
|
| + int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
|
| + return insns * 4;
|
| + }
|
| + else
|
| + return 4;
|
| +}
|
| +
|
| /* Output an ADD r, s, #n where n may be too big for one instruction.
|
| If adding zero to one register, output nothing. */
|
| const char *
|
| @@ -11320,6 +12854,20 @@ arm_compute_save_reg0_reg12_mask (void)
|
| && crtl->uses_pic_offset_table)
|
| save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
|
| }
|
| + else if (IS_VOLATILE(func_type))
|
| + {
|
| + /* For noreturn functions we historically omitted register saves
|
| + altogether. However this really messes up debugging. As a
|
| + compromise save just the frame pointers. Combined with the link
|
| + register saved elsewhere this should be sufficient to get
|
| + a backtrace. */
|
| + if (frame_pointer_needed)
|
| + save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
|
| + if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
|
| + save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
|
| + if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
|
| + save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
|
| + }
|
| else
|
| {
|
| /* In the normal case we only need to save those registers
|
| @@ -11406,11 +12954,6 @@ arm_compute_save_reg_mask (void)
|
| | (1 << LR_REGNUM)
|
| | (1 << PC_REGNUM);
|
|
|
| - /* Volatile functions do not return, so there
|
| - is no need to save any other registers. */
|
| - if (IS_VOLATILE (func_type))
|
| - return save_reg_mask;
|
| -
|
| save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
|
|
|
| /* Decide if we need to save the link register.
|
| @@ -11629,7 +13172,7 @@ output_return_instruction (rtx operand, int really_return, int reverse)
|
|
|
| sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
|
|
|
| - return_used_this_function = 1;
|
| + cfun->machine->return_used_this_function = 1;
|
|
|
| offsets = arm_get_frame_offsets ();
|
| live_regs_mask = offsets->saved_regs_mask;
|
| @@ -11698,18 +13241,28 @@ output_return_instruction (rtx operand, int really_return, int reverse)
|
| gcc_assert (stack_adjust == 0 || stack_adjust == 4);
|
|
|
| if (stack_adjust && arm_arch5 && TARGET_ARM)
|
| - sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
|
| + if (TARGET_UNIFIED_ASM)
|
| + sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
|
| + else
|
| + sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
|
| else
|
| {
|
| /* If we can't use ldmib (SA110 bug),
|
| then try to pop r3 instead. */
|
| if (stack_adjust)
|
| live_regs_mask |= 1 << 3;
|
| - sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
|
| +
|
| + if (TARGET_UNIFIED_ASM)
|
| + sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
|
| + else
|
| + sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
|
| }
|
| }
|
| else
|
| - sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
|
| + if (TARGET_UNIFIED_ASM)
|
| + sprintf (instr, "pop%s\t{", conditional);
|
| + else
|
| + sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
|
|
|
| p = instr + strlen (instr);
|
|
|
| @@ -11894,7 +13447,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
|
| if (crtl->calls_eh_return)
|
| asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
|
|
|
| - return_used_this_function = 0;
|
| }
|
|
|
| const char *
|
| @@ -11915,7 +13467,8 @@ arm_output_epilogue (rtx sibling)
|
|
|
| /* If we have already generated the return instruction
|
| then it is futile to generate anything else. */
|
| - if (use_return_insn (FALSE, sibling) && return_used_this_function)
|
| + if (use_return_insn (FALSE, sibling) &&
|
| + (cfun->machine->return_used_this_function != 0))
|
| return "";
|
|
|
| func_type = arm_current_func_type ();
|
| @@ -11957,7 +13510,7 @@ arm_output_epilogue (rtx sibling)
|
| /* This variable is for the Virtual Frame Pointer, not VFP regs. */
|
| int vfp_offset = offsets->frame;
|
|
|
| - if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
|
| + if (TARGET_FPA_EMU2)
|
| {
|
| for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
|
| if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
|
| @@ -12180,7 +13733,7 @@ arm_output_epilogue (rtx sibling)
|
| SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
|
| }
|
|
|
| - if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
|
| + if (TARGET_FPA_EMU2)
|
| {
|
| for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
|
| if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
|
| @@ -12221,24 +13774,29 @@ arm_output_epilogue (rtx sibling)
|
|
|
| if (TARGET_HARD_FLOAT && TARGET_VFP)
|
| {
|
| - start_reg = FIRST_VFP_REGNUM;
|
| - for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
|
| + int end_reg = LAST_VFP_REGNUM + 1;
|
| +
|
| + /* Scan the registers in reverse order. We need to match
|
| + any groupings made in the prologue and generate matching
|
| + pop operations. */
|
| + for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
|
| {
|
| if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
|
| - && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
|
| + && (!df_regs_ever_live_p (reg + 1)
|
| + || call_used_regs[reg + 1]))
|
| {
|
| - if (start_reg != reg)
|
| + if (end_reg > reg + 2)
|
| vfp_output_fldmd (f, SP_REGNUM,
|
| - (start_reg - FIRST_VFP_REGNUM) / 2,
|
| - (reg - start_reg) / 2);
|
| - start_reg = reg + 2;
|
| + (reg + 2 - FIRST_VFP_REGNUM) / 2,
|
| + (end_reg - (reg + 2)) / 2);
|
| + end_reg = reg;
|
| }
|
| }
|
| - if (start_reg != reg)
|
| - vfp_output_fldmd (f, SP_REGNUM,
|
| - (start_reg - FIRST_VFP_REGNUM) / 2,
|
| - (reg - start_reg) / 2);
|
| + if (end_reg > reg + 2)
|
| + vfp_output_fldmd (f, SP_REGNUM, 0,
|
| + (end_reg - (reg + 2)) / 2);
|
| }
|
| +
|
| if (TARGET_IWMMXT)
|
| for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
|
| if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
|
| @@ -12362,7 +13920,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|
| /* ??? Probably not safe to set this here, since it assumes that a
|
| function will be emitted as assembly immediately after we generate
|
| RTL for it. This does not happen for inline functions. */
|
| - return_used_this_function = 0;
|
| + cfun->machine->return_used_this_function = 0;
|
| }
|
| else /* TARGET_32BIT */
|
| {
|
| @@ -12370,7 +13928,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
|
| offsets = arm_get_frame_offsets ();
|
|
|
| gcc_assert (!use_return_insn (FALSE, NULL)
|
| - || !return_used_this_function
|
| + || (cfun->machine->return_used_this_function != 0)
|
| || offsets->saved_regs == offsets->outgoing_args
|
| || frame_pointer_needed);
|
|
|
| @@ -12407,16 +13965,17 @@ emit_multi_reg_push (unsigned long mask)
|
|
|
| /* For the body of the insn we are going to generate an UNSPEC in
|
| parallel with several USEs. This allows the insn to be recognized
|
| - by the push_multi pattern in the arm.md file. The insn looks
|
| - something like this:
|
| + by the push_multi pattern in the arm.md file.
|
| +
|
| + The body of the insn looks something like this:
|
|
|
| (parallel [
|
| - (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
|
| + (set (mem:BLK (pre_modify:SI (reg:SI sp)
|
| + (const_int:SI <num>)))
|
| (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
|
| - (use (reg:SI 11 fp))
|
| - (use (reg:SI 12 ip))
|
| - (use (reg:SI 14 lr))
|
| - (use (reg:SI 15 pc))
|
| + (use (reg:SI XX))
|
| + (use (reg:SI YY))
|
| + ...
|
| ])
|
|
|
| For the frame note however, we try to be more explicit and actually
|
| @@ -12429,13 +13988,20 @@ emit_multi_reg_push (unsigned long mask)
|
| (sequence [
|
| (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
|
| (set (mem:SI (reg:SI sp)) (reg:SI r4))
|
| - (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
|
| - (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
|
| - (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
|
| + (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
|
| + (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
|
| + ...
|
| ])
|
|
|
| - This sequence is used both by the code to support stack unwinding for
|
| - exceptions handlers and the code to generate dwarf2 frame debugging. */
|
| + FIXME:: In an ideal world the PRE_MODIFY would not exist and
|
| + instead we'd have a parallel expression detailing all
|
| + the stores to the various memory addresses so that debug
|
| + information is more up-to-date. Remember however while writing
|
| + this to take care of the constraints with the push instruction.
|
| +
|
| + Note also that this has to be taken care of for the VFP registers.
|
| +
|
| + For more see PR43399. */
|
|
|
| par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
|
| dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
|
| @@ -12449,9 +14015,14 @@ emit_multi_reg_push (unsigned long mask)
|
|
|
| XVECEXP (par, 0, 0)
|
| = gen_rtx_SET (VOIDmode,
|
| - gen_frame_mem (BLKmode,
|
| - gen_rtx_PRE_DEC (BLKmode,
|
| - stack_pointer_rtx)),
|
| + gen_frame_mem
|
| + (BLKmode,
|
| + gen_rtx_PRE_MODIFY (Pmode,
|
| + stack_pointer_rtx,
|
| + plus_constant
|
| + (stack_pointer_rtx,
|
| + -4 * num_regs))
|
| + ),
|
| gen_rtx_UNSPEC (BLKmode,
|
| gen_rtvec (1, reg),
|
| UNSPEC_PUSH_MULT));
|
| @@ -12482,9 +14053,10 @@ emit_multi_reg_push (unsigned long mask)
|
| {
|
| tmp
|
| = gen_rtx_SET (VOIDmode,
|
| - gen_frame_mem (SImode,
|
| - plus_constant (stack_pointer_rtx,
|
| - 4 * j)),
|
| + gen_frame_mem
|
| + (SImode,
|
| + plus_constant (stack_pointer_rtx,
|
| + 4 * j)),
|
| reg);
|
| RTX_FRAME_RELATED_P (tmp) = 1;
|
| XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
|
| @@ -12502,8 +14074,8 @@ emit_multi_reg_push (unsigned long mask)
|
| RTX_FRAME_RELATED_P (tmp) = 1;
|
| XVECEXP (dwarf, 0, 0) = tmp;
|
|
|
| - REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
|
| - REG_NOTES (par));
|
| + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
|
| +
|
| return par;
|
| }
|
|
|
| @@ -12536,9 +14108,14 @@ emit_sfm (int base_reg, int count)
|
|
|
| XVECEXP (par, 0, 0)
|
| = gen_rtx_SET (VOIDmode,
|
| - gen_frame_mem (BLKmode,
|
| - gen_rtx_PRE_DEC (BLKmode,
|
| - stack_pointer_rtx)),
|
| + gen_frame_mem
|
| + (BLKmode,
|
| + gen_rtx_PRE_MODIFY (Pmode,
|
| + stack_pointer_rtx,
|
| + plus_constant
|
| + (stack_pointer_rtx,
|
| + -12 * count))
|
| + ),
|
| gen_rtx_UNSPEC (BLKmode,
|
| gen_rtvec (1, reg),
|
| UNSPEC_PUSH_MULT));
|
| @@ -12569,8 +14146,8 @@ emit_sfm (int base_reg, int count)
|
| XVECEXP (dwarf, 0, 0) = tmp;
|
|
|
| par = emit_insn (par);
|
| - REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
|
| - REG_NOTES (par));
|
| + add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
|
| +
|
| return par;
|
| }
|
|
|
| @@ -12748,22 +14325,24 @@ arm_get_frame_offsets (void)
|
| {
|
| int reg = -1;
|
|
|
| - for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
|
| - {
|
| - if ((offsets->saved_regs_mask & (1 << i)) == 0)
|
| - {
|
| - reg = i;
|
| - break;
|
| - }
|
| - }
|
| -
|
| - if (reg == -1 && arm_size_return_regs () <= 12
|
| - && !crtl->tail_call_emit)
|
| + /* If it is safe to use r3, then do so. This sometimes
|
| + generates better code on Thumb-2 by avoiding the need to
|
| + use 32-bit push/pop instructions. */
|
| + if (!crtl->tail_call_emit
|
| + && arm_size_return_regs () <= 12
|
| + && (offsets->saved_regs_mask & (1 << 3)) == 0)
|
| {
|
| - /* Push/pop an argument register (r3) if all callee saved
|
| - registers are already being pushed. */
|
| reg = 3;
|
| }
|
| + else
|
| + for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
|
| + {
|
| + if ((offsets->saved_regs_mask & (1 << i)) == 0)
|
| + {
|
| + reg = i;
|
| + break;
|
| + }
|
| + }
|
|
|
| if (reg != -1)
|
| {
|
| @@ -12863,6 +14442,24 @@ arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
|
| }
|
| }
|
|
|
| +/* Given FROM and TO register numbers, say whether this elimination is
|
| + allowed. Frame pointer elimination is automatically handled.
|
| +
|
| + All eliminations are permissible. Note that ARG_POINTER_REGNUM and
|
| + HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
|
| + pointer, we must eliminate FRAME_POINTER_REGNUM into
|
| + HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
|
| + ARG_POINTER_REGNUM. */
|
| +
|
| +bool
|
| +arm_can_eliminate (const int from, const int to)
|
| +{
|
| + return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
|
| + (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
|
| + (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
|
| + (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
|
| + true);
|
| +}
|
|
|
| /* Emit RTL to save coprocessor registers on function entry. Returns the
|
| number of bytes pushed. */
|
| @@ -12878,7 +14475,7 @@ arm_save_coproc_regs(void)
|
| for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
|
| if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
|
| {
|
| - insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
|
| + insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
|
| insn = gen_rtx_MEM (V2SImode, insn);
|
| insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| @@ -12887,12 +14484,12 @@ arm_save_coproc_regs(void)
|
|
|
| /* Save any floating point call-saved registers used by this
|
| function. */
|
| - if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
|
| + if (TARGET_FPA_EMU2)
|
| {
|
| for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
|
| if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
|
| {
|
| - insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
|
| + insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
|
| insn = gen_rtx_MEM (XFmode, insn);
|
| insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| @@ -12989,8 +14586,7 @@ thumb_set_frame_pointer (arm_stack_offsets *offsets)
|
| dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
|
| plus_constant (stack_pointer_rtx, amount));
|
| RTX_FRAME_RELATED_P (dwarf) = 1;
|
| - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
|
| - REG_NOTES (insn));
|
| + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
| }
|
|
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| @@ -13053,8 +14649,7 @@ arm_expand_prologue (void)
|
| dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
|
| insn = gen_movsi (r0, stack_pointer_rtx);
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
| - dwarf, REG_NOTES (insn));
|
| + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
| emit_insn (insn);
|
| emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
|
| emit_insn (gen_movsi (stack_pointer_rtx, r1));
|
| @@ -13121,8 +14716,7 @@ arm_expand_prologue (void)
|
| plus_constant (stack_pointer_rtx,
|
| -fp_offset));
|
| RTX_FRAME_RELATED_P (insn) = 1;
|
| - REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
|
| - dwarf, REG_NOTES (insn));
|
| + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
| }
|
| else
|
| {
|
| @@ -13715,6 +15309,30 @@ arm_print_operand (FILE *stream, rtx x, int code)
|
| }
|
| return;
|
|
|
| + /* Print the high single-precision register of a VFP double-precision
|
| + register. */
|
| + case 'p':
|
| + {
|
| + int mode = GET_MODE (x);
|
| + int regno;
|
| +
|
| + if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + regno = REGNO (x);
|
| + if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
|
| + }
|
| + return;
|
| +
|
| /* Print a VFP/Neon double precision or quad precision register name. */
|
| case 'P':
|
| case 'q':
|
| @@ -13832,6 +15450,73 @@ arm_print_operand (FILE *stream, rtx x, int code)
|
| }
|
| return;
|
|
|
| + /* Memory operand for vld1/vst1 instruction. */
|
| + case 'A':
|
| + {
|
| + rtx addr;
|
| + bool postinc = FALSE;
|
| + gcc_assert (GET_CODE (x) == MEM);
|
| + addr = XEXP (x, 0);
|
| + if (GET_CODE (addr) == POST_INC)
|
| + {
|
| + postinc = 1;
|
| + addr = XEXP (addr, 0);
|
| + }
|
| + asm_fprintf (stream, "[%r]", REGNO (addr));
|
| + if (postinc)
|
| + fputs("!", stream);
|
| + }
|
| + return;
|
| +
|
| + /* Translate an S register number into a D register number and element index. */
|
| + case 'y':
|
| + {
|
| + int mode = GET_MODE (x);
|
| + int regno;
|
| +
|
| + if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + regno = REGNO (x);
|
| + if (!VFP_REGNO_OK_FOR_SINGLE (regno))
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + regno = regno - FIRST_VFP_REGNUM;
|
| + fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
|
| + }
|
| + return;
|
| +
|
| + /* Register specifier for vld1.16/vst1.16. Translate the S register
|
| + number into a D register number and element index. */
|
| + case 'z':
|
| + {
|
| + int mode = GET_MODE (x);
|
| + int regno;
|
| +
|
| + if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + regno = REGNO (x);
|
| + if (!VFP_REGNO_OK_FOR_SINGLE (regno))
|
| + {
|
| + output_operand_lossage ("invalid operand for code '%c'", code);
|
| + return;
|
| + }
|
| +
|
| + regno = regno - FIRST_VFP_REGNUM;
|
| + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
|
| + }
|
| + return;
|
| +
|
| default:
|
| if (x == 0)
|
| {
|
| @@ -13865,6 +15550,12 @@ arm_print_operand (FILE *stream, rtx x, int code)
|
| default:
|
| gcc_assert (GET_CODE (x) != NEG);
|
| fputc ('#', stream);
|
| + if (GET_CODE (x) == HIGH)
|
| + {
|
| + fputs (":lower16:", stream);
|
| + x = XEXP (x, 0);
|
| + }
|
| +
|
| output_addr_const (stream, x);
|
| break;
|
| }
|
| @@ -14032,7 +15723,7 @@ static enum arm_cond_code
|
| get_arm_condition_code (rtx comparison)
|
| {
|
| enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
|
| - int code;
|
| + enum arm_cond_code code;
|
| enum rtx_code comp_code = GET_CODE (comparison);
|
|
|
| if (GET_MODE_CLASS (mode) != MODE_CC)
|
| @@ -14243,12 +15934,6 @@ arm_final_prescan_insn (rtx insn)
|
| reversed if it appears to fail. */
|
| int reverse = 0;
|
|
|
| - /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
|
| - taken are clobbered, even if the rtl suggests otherwise. It also
|
| - means that we have to grub around within the jump expression to find
|
| - out what the conditions are when the jump isn't taken. */
|
| - int jump_clobbers = 0;
|
| -
|
| /* If we start with a return insn, we only succeed if we find another one. */
|
| int seeking_return = 0;
|
|
|
| @@ -14327,14 +16012,6 @@ arm_final_prescan_insn (rtx insn)
|
| int then_not_else = TRUE;
|
| rtx this_insn = start_insn, label = 0;
|
|
|
| - /* If the jump cannot be done with one instruction, we cannot
|
| - conditionally execute the instruction in the inverse case. */
|
| - if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
|
| - {
|
| - jump_clobbers = 1;
|
| - return;
|
| - }
|
| -
|
| /* Register the insn jumped to. */
|
| if (reverse)
|
| {
|
| @@ -14377,13 +16054,7 @@ arm_final_prescan_insn (rtx insn)
|
| control falls in from somewhere else. */
|
| if (this_insn == label)
|
| {
|
| - if (jump_clobbers)
|
| - {
|
| - arm_ccfsm_state = 2;
|
| - this_insn = next_nonnote_insn (this_insn);
|
| - }
|
| - else
|
| - arm_ccfsm_state = 1;
|
| + arm_ccfsm_state = 1;
|
| succeed = TRUE;
|
| }
|
| else
|
| @@ -14398,13 +16069,7 @@ arm_final_prescan_insn (rtx insn)
|
| this_insn = next_nonnote_insn (this_insn);
|
| if (this_insn && this_insn == label)
|
| {
|
| - if (jump_clobbers)
|
| - {
|
| - arm_ccfsm_state = 2;
|
| - this_insn = next_nonnote_insn (this_insn);
|
| - }
|
| - else
|
| - arm_ccfsm_state = 1;
|
| + arm_ccfsm_state = 1;
|
| succeed = TRUE;
|
| }
|
| else
|
| @@ -14432,13 +16097,7 @@ arm_final_prescan_insn (rtx insn)
|
| if (this_insn && this_insn == label
|
| && insns_skipped < max_insns_skipped)
|
| {
|
| - if (jump_clobbers)
|
| - {
|
| - arm_ccfsm_state = 2;
|
| - this_insn = next_nonnote_insn (this_insn);
|
| - }
|
| - else
|
| - arm_ccfsm_state = 1;
|
| + arm_ccfsm_state = 1;
|
| succeed = TRUE;
|
| }
|
| else
|
| @@ -14544,25 +16203,11 @@ arm_final_prescan_insn (rtx insn)
|
| }
|
| arm_target_insn = this_insn;
|
| }
|
| - if (jump_clobbers)
|
| - {
|
| - gcc_assert (!reverse);
|
| - arm_current_cc =
|
| - get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
|
| - 0), 0), 1));
|
| - if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
|
| - arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
|
| - if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
|
| - arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
|
| - }
|
| - else
|
| - {
|
| - /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
|
| - what it was. */
|
| - if (!reverse)
|
| - arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
|
| - 0));
|
| - }
|
| +
|
| + /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
|
| + what it was. */
|
| + if (!reverse)
|
| + arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
|
|
|
| if (reverse || then_not_else)
|
| arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
|
| @@ -14629,6 +16274,11 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
|
| if (mode == DFmode)
|
| return VFP_REGNO_OK_FOR_DOUBLE (regno);
|
|
|
| + /* VFP registers can hold HFmode values, but there is no point in
|
| + putting them there unless we have hardware conversion insns. */
|
| + if (mode == HFmode)
|
| + return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
|
| +
|
| if (TARGET_NEON)
|
| return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
|
| || (VALID_NEON_QREG_MODE (mode)
|
| @@ -14651,13 +16301,13 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
|
| return VALID_IWMMXT_REG_MODE (mode);
|
| }
|
|
|
| - /* We allow any value to be stored in the general registers.
|
| + /* We allow almost any value to be stored in the general registers.
|
| Restrict doubleword quantities to even register pairs so that we can
|
| - use ldrd. Do not allow Neon structure opaque modes in general registers;
|
| - they would use too many. */
|
| + use ldrd. Do not allow very large Neon structure opaque modes in
|
| + general registers; they would use too many. */
|
| if (regno <= LAST_ARM_REGNUM)
|
| return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
|
| - && !VALID_NEON_STRUCT_MODE (mode);
|
| + && ARM_NUM_REGS (mode) <= 4;
|
|
|
| if (regno == FRAME_POINTER_REGNUM
|
| || regno == ARG_POINTER_REGNUM)
|
| @@ -14674,7 +16324,8 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
|
|
|
| /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
|
| not used in arm mode. */
|
| -int
|
| +
|
| +enum reg_class
|
| arm_regno_class (int regno)
|
| {
|
| if (TARGET_THUMB1)
|
| @@ -14828,7 +16479,7 @@ static const struct builtin_description bdesc_2arg[] =
|
| {
|
| #define IWMMXT_BUILTIN(code, string, builtin) \
|
| { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
|
| - ARM_BUILTIN_##builtin, 0, 0 },
|
| + ARM_BUILTIN_##builtin, UNKNOWN, 0 },
|
|
|
| IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
|
| IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
|
| @@ -14890,7 +16541,7 @@ static const struct builtin_description bdesc_2arg[] =
|
| IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
|
|
|
| #define IWMMXT_BUILTIN2(code, builtin) \
|
| - { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
|
| + { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
|
|
|
| IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
|
| IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
|
| @@ -15287,7 +16938,7 @@ arm_init_tls_builtins (void)
|
| TREE_READONLY (decl) = 1;
|
| }
|
|
|
| -typedef enum {
|
| +enum neon_builtin_type_bits {
|
| T_V8QI = 0x0001,
|
| T_V4HI = 0x0002,
|
| T_V2SI = 0x0004,
|
| @@ -15301,7 +16952,7 @@ typedef enum {
|
| T_TI = 0x0400,
|
| T_EI = 0x0800,
|
| T_OI = 0x1000
|
| -} neon_builtin_type_bits;
|
| +};
|
|
|
| #define v8qi_UP T_V8QI
|
| #define v4hi_UP T_V4HI
|
| @@ -15364,7 +17015,7 @@ typedef enum {
|
| typedef struct {
|
| const char *name;
|
| const neon_itype itype;
|
| - const neon_builtin_type_bits bits;
|
| + const int bits;
|
| const enum insn_code codes[T_MAX];
|
| const unsigned int num_vars;
|
| unsigned int base_fcode;
|
| @@ -16114,6 +17765,15 @@ arm_init_neon_builtins (void)
|
| }
|
|
|
| static void
|
| +arm_init_fp16_builtins (void)
|
| +{
|
| + tree fp16_type = make_node (REAL_TYPE);
|
| + TYPE_PRECISION (fp16_type) = 16;
|
| + layout_type (fp16_type);
|
| + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");
|
| +}
|
| +
|
| +static void
|
| arm_init_builtins (void)
|
| {
|
| arm_init_tls_builtins ();
|
| @@ -16123,6 +17783,71 @@ arm_init_builtins (void)
|
|
|
| if (TARGET_NEON)
|
| arm_init_neon_builtins ();
|
| +
|
| + if (arm_fp16_format)
|
| + arm_init_fp16_builtins ();
|
| +}
|
| +
|
| +/* Implement TARGET_INVALID_PARAMETER_TYPE. */
|
| +
|
| +static const char *
|
| +arm_invalid_parameter_type (const_tree t)
|
| +{
|
| + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
| + return N_("function parameters cannot have __fp16 type");
|
| + return NULL;
|
| +}
|
| +
|
| +/* Implement TARGET_INVALID_PARAMETER_TYPE. */
|
| +
|
| +static const char *
|
| +arm_invalid_return_type (const_tree t)
|
| +{
|
| + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
| + return N_("functions cannot return __fp16 type");
|
| + return NULL;
|
| +}
|
| +
|
| +/* Implement TARGET_PROMOTED_TYPE. */
|
| +
|
| +static tree
|
| +arm_promoted_type (const_tree t)
|
| +{
|
| + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
|
| + return float_type_node;
|
| + return NULL_TREE;
|
| +}
|
| +
|
| +/* Implement TARGET_CONVERT_TO_TYPE.
|
| + Specifically, this hook implements the peculiarity of the ARM
|
| + half-precision floating-point C semantics that requires conversions between
|
| + __fp16 to or from double to do an intermediate conversion to float. */
|
| +
|
| +static tree
|
| +arm_convert_to_type (tree type, tree expr)
|
| +{
|
| + tree fromtype = TREE_TYPE (expr);
|
| + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
|
| + return NULL_TREE;
|
| + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
|
| + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
|
| + return convert (type, convert (float_type_node, expr));
|
| + return NULL_TREE;
|
| +}
|
| +
|
| +/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
|
| + This simply adds HFmode as a supported mode; even though we don't
|
| + implement arithmetic on this type directly, it's supported by
|
| + optabs conversions, much the way the double-word arithmetic is
|
| + special-cased in the default hook. */
|
| +
|
| +static bool
|
| +arm_scalar_mode_supported_p (enum machine_mode mode)
|
| +{
|
| + if (mode == HFmode)
|
| + return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
|
| + else
|
| + return default_scalar_mode_supported_p (mode);
|
| }
|
|
|
| /* Errors in the source file can cause expand_expr to return const0_rtx
|
| @@ -16281,7 +18006,7 @@ arm_expand_neon_args (rtx target, int icode, int have_retval,
|
|
|
| for (;;)
|
| {
|
| - builtin_arg thisarg = va_arg (ap, int);
|
| + builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
|
|
|
| if (thisarg == NEON_ARG_STOP)
|
| break;
|
| @@ -17202,6 +18927,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val)
|
| unsigned HOST_WIDE_INT mask = 0xff;
|
| int i;
|
|
|
| + val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
|
| if (val == 0) /* XXX */
|
| return 0;
|
|
|
| @@ -17311,7 +19037,7 @@ thumb_unexpanded_epilogue (void)
|
| int had_to_push_lr;
|
| int size;
|
|
|
| - if (return_used_this_function)
|
| + if (cfun->machine->return_used_this_function != 0)
|
| return "";
|
|
|
| if (IS_NAKED (arm_current_func_type ()))
|
| @@ -17635,9 +19361,7 @@ thumb1_expand_prologue (void)
|
| plus_constant (stack_pointer_rtx,
|
| -amount));
|
| RTX_FRAME_RELATED_P (dwarf) = 1;
|
| - REG_NOTES (insn)
|
| - = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
|
| - REG_NOTES (insn));
|
| + add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
|
| }
|
| }
|
|
|
| @@ -18289,41 +20013,8 @@ arm_file_start (void)
|
| }
|
| else
|
| {
|
| - int set_float_abi_attributes = 0;
|
| - switch (arm_fpu_arch)
|
| - {
|
| - case FPUTYPE_FPA:
|
| - fpu_name = "fpa";
|
| - break;
|
| - case FPUTYPE_FPA_EMU2:
|
| - fpu_name = "fpe2";
|
| - break;
|
| - case FPUTYPE_FPA_EMU3:
|
| - fpu_name = "fpe3";
|
| - break;
|
| - case FPUTYPE_MAVERICK:
|
| - fpu_name = "maverick";
|
| - break;
|
| - case FPUTYPE_VFP:
|
| - fpu_name = "vfp";
|
| - set_float_abi_attributes = 1;
|
| - break;
|
| - case FPUTYPE_VFP3D16:
|
| - fpu_name = "vfpv3-d16";
|
| - set_float_abi_attributes = 1;
|
| - break;
|
| - case FPUTYPE_VFP3:
|
| - fpu_name = "vfpv3";
|
| - set_float_abi_attributes = 1;
|
| - break;
|
| - case FPUTYPE_NEON:
|
| - fpu_name = "neon";
|
| - set_float_abi_attributes = 1;
|
| - break;
|
| - default:
|
| - abort();
|
| - }
|
| - if (set_float_abi_attributes)
|
| + fpu_name = arm_fpu_desc->name;
|
| + if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
|
| {
|
| if (TARGET_HARD_FLOAT)
|
| asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
|
| @@ -18373,6 +20064,11 @@ arm_file_start (void)
|
| val = 6;
|
| asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
|
|
|
| + /* Tag_ABI_FP_16bit_format. */
|
| + if (arm_fp16_format)
|
| + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
|
| + (int)arm_fp16_format);
|
| +
|
| if (arm_lang_output_object_attributes_hook)
|
| arm_lang_output_object_attributes_hook();
|
| }
|
| @@ -18602,6 +20298,23 @@ arm_emit_vector_const (FILE *file, rtx x)
|
| return 1;
|
| }
|
|
|
| +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
|
| + HFmode constant pool entries are actually loaded with ldr. */
|
| +void
|
| +arm_emit_fp16_const (rtx c)
|
| +{
|
| + REAL_VALUE_TYPE r;
|
| + long bits;
|
| +
|
| + REAL_VALUE_FROM_CONST_DOUBLE (r, c);
|
| + bits = real_to_target (NULL, &r, HFmode);
|
| + if (WORDS_BIG_ENDIAN)
|
| + assemble_zeros (2);
|
| + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
|
| + if (!WORDS_BIG_ENDIAN)
|
| + assemble_zeros (2);
|
| +}
|
| +
|
| const char *
|
| arm_output_load_gr (rtx *operands)
|
| {
|
| @@ -18639,19 +20352,24 @@ arm_output_load_gr (rtx *operands)
|
| that way. */
|
|
|
| static void
|
| -arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
|
| +arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,
|
| enum machine_mode mode,
|
| tree type,
|
| int *pretend_size,
|
| int second_time ATTRIBUTE_UNUSED)
|
| {
|
| - int nregs = cum->nregs;
|
| - if (nregs & 1
|
| - && ARM_DOUBLEWORD_ALIGN
|
| - && arm_needs_doubleword_align (mode, type))
|
| - nregs++;
|
| -
|
| + int nregs;
|
| +
|
| cfun->machine->uses_anonymous_args = 1;
|
| + if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
|
| + {
|
| + nregs = pcum->aapcs_ncrn;
|
| + if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
|
| + nregs++;
|
| + }
|
| + else
|
| + nregs = pcum->nregs;
|
| +
|
| if (nregs < NUM_ARG_REGS)
|
| *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
|
| }
|
| @@ -18785,6 +20503,19 @@ arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
|
| return !TARGET_AAPCS_BASED;
|
| }
|
|
|
| +static enum machine_mode
|
| +arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
|
| + enum machine_mode mode,
|
| + int *punsignedp ATTRIBUTE_UNUSED,
|
| + const_tree fntype ATTRIBUTE_UNUSED,
|
| + int for_return ATTRIBUTE_UNUSED)
|
| +{
|
| + if (GET_MODE_CLASS (mode) == MODE_INT
|
| + && GET_MODE_SIZE (mode) < 4)
|
| + return SImode;
|
| +
|
| + return mode;
|
| +}
|
|
|
| /* AAPCS based ABIs use short enums by default. */
|
|
|
| @@ -19035,9 +20766,10 @@ arm_vector_mode_supported_p (enum machine_mode mode)
|
| || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
|
| return true;
|
|
|
| - if ((mode == V2SImode)
|
| - || (mode == V4HImode)
|
| - || (mode == V8QImode))
|
| + if ((TARGET_NEON || TARGET_IWMMXT)
|
| + && ((mode == V2SImode)
|
| + || (mode == V4HImode)
|
| + || (mode == V8QImode)))
|
| return true;
|
|
|
| return false;
|
| @@ -19068,9 +20800,14 @@ arm_dbx_register_number (unsigned int regno)
|
| if (IS_FPA_REGNUM (regno))
|
| return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
|
|
|
| - /* FIXME: VFPv3 register numbering. */
|
| if (IS_VFP_REGNUM (regno))
|
| - return 64 + regno - FIRST_VFP_REGNUM;
|
| + {
|
| + /* See comment in arm_dwarf_register_span. */
|
| + if (VFP_REGNO_OK_FOR_SINGLE (regno))
|
| + return 64 + regno - FIRST_VFP_REGNUM;
|
| + else
|
| + return 256 + (regno - FIRST_VFP_REGNUM) / 2;
|
| + }
|
|
|
| if (IS_IWMMXT_GR_REGNUM (regno))
|
| return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
|
| @@ -19081,6 +20818,39 @@ arm_dbx_register_number (unsigned int regno)
|
| gcc_unreachable ();
|
| }
|
|
|
| +/* Dwarf models VFPv3 registers as 32 64-bit registers.
|
| + GCC models tham as 64 32-bit registers, so we need to describe this to
|
| + the DWARF generation code. Other registers can use the default. */
|
| +static rtx
|
| +arm_dwarf_register_span (rtx rtl)
|
| +{
|
| + unsigned regno;
|
| + int nregs;
|
| + int i;
|
| + rtx p;
|
| +
|
| + regno = REGNO (rtl);
|
| + if (!IS_VFP_REGNUM (regno))
|
| + return NULL_RTX;
|
| +
|
| + /* XXX FIXME: The EABI defines two VFP register ranges:
|
| + 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
|
| + 256-287: D0-D31
|
| + The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
|
| + corresponding D register. Until GDB supports this, we shall use the
|
| + legacy encodings. We also use these encodings for D0-D15 for
|
| + compatibility with older debuggers. */
|
| + if (VFP_REGNO_OK_FOR_SINGLE (regno))
|
| + return NULL_RTX;
|
| +
|
| + nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
|
| + p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
|
| + regno = (regno - FIRST_VFP_REGNUM) / 2;
|
| + for (i = 0; i < nregs; i++)
|
| + XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
|
| +
|
| + return p;
|
| +}
|
|
|
| #ifdef TARGET_UNWIND_INFO
|
| /* Emit unwind directives for a store-multiple instruction or stack pointer
|
| @@ -19404,7 +21174,7 @@ arm_emit_tls_decoration (FILE *fp, rtx x)
|
| rtx val;
|
|
|
| val = XVECEXP (x, 0, 0);
|
| - reloc = INTVAL (XVECEXP (x, 0, 1));
|
| + reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
|
|
|
| output_addr_const (fp, val);
|
|
|
| @@ -19522,6 +21292,32 @@ arm_output_shift(rtx * operands, int set_flags)
|
| return "";
|
| }
|
|
|
| +/* Output a Thumb-1 casesi dispatch sequence. */
|
| +const char *
|
| +thumb1_output_casesi (rtx *operands)
|
| +{
|
| + rtx diff_vec = PATTERN (next_real_insn (operands[0]));
|
| + addr_diff_vec_flags flags;
|
| +
|
| + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
|
| +
|
| + flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
|
| +
|
| + switch (GET_MODE(diff_vec))
|
| + {
|
| + case QImode:
|
| + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
|
| + "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
|
| + case HImode:
|
| + return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
|
| + "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
|
| + case SImode:
|
| + return "bl\t%___gnu_thumb1_case_si";
|
| + default:
|
| + gcc_unreachable ();
|
| + }
|
| +}
|
| +
|
| /* Output a Thumb-2 casesi instruction. */
|
| const char *
|
| thumb2_output_casesi (rtx *operands)
|
| @@ -19622,7 +21418,7 @@ arm_mangle_type (const_tree type)
|
| && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
|
| {
|
| static bool warned;
|
| - if (!warned && warn_psabi)
|
| + if (!warned && warn_psabi && !in_system_header)
|
| {
|
| warned = true;
|
| inform (input_location,
|
| @@ -19631,6 +21427,10 @@ arm_mangle_type (const_tree type)
|
| return "St9__va_list";
|
| }
|
|
|
| + /* Half-precision float. */
|
| + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
|
| + return "Dh";
|
| +
|
| if (TREE_CODE (type) != VECTOR_TYPE)
|
| return NULL;
|
|
|
| @@ -19689,4 +21489,22 @@ arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
|
| flag_section_anchors = 2;
|
| }
|
|
|
| +/* Implement TARGET_FRAME_POINTER_REQUIRED. */
|
| +
|
| +bool
|
| +arm_frame_pointer_required (void)
|
| +{
|
| + return (cfun->has_nonlocal_label
|
| + || SUBTARGET_FRAME_POINTER_REQUIRED
|
| + || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
|
| +}
|
| +
|
| +/* Only thumb1 can't support conditional execution, so return true if
|
| + the target is not thumb1. */
|
| +static bool
|
| +arm_have_conditional_execution (void)
|
| +{
|
| + return !TARGET_THUMB1;
|
| +}
|
| +
|
| #include "gt-arm.h"
|
|
|