Index: gcc/gcc/config/arm/arm.c |
diff --git a/gcc/gcc/config/arm/arm.c b/gcc/gcc/config/arm/arm.c |
index 7f346da319b1b3c301d84ef9cd71d873e8898cf4..a06a38bc8a9695732972d5867155b71756ff68b8 100644 |
--- a/gcc/gcc/config/arm/arm.c |
+++ b/gcc/gcc/config/arm/arm.c |
@@ -1,6 +1,6 @@ |
/* Output routines for GCC for ARM. |
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
Free Software Foundation, Inc. |
Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) |
and Martin Simmons (@harleqn.co.uk). |
@@ -43,6 +43,7 @@ |
#include "optabs.h" |
#include "toplev.h" |
#include "recog.h" |
+#include "cgraph.h" |
#include "ggc.h" |
#include "except.h" |
#include "c-pragma.h" |
@@ -53,14 +54,13 @@ |
#include "debug.h" |
#include "langhooks.h" |
#include "df.h" |
+#include "intl.h" |
#include "libfuncs.h" |
/* Forward definitions of types. */ |
typedef struct minipool_node Mnode; |
typedef struct minipool_fixup Mfix; |
-const struct attribute_spec arm_attribute_table[]; |
- |
void (*arm_lang_output_object_attributes_hook)(void); |
/* Forward function declarations. */ |
@@ -74,7 +74,10 @@ static int arm_address_register_rtx_p (rtx, int); |
static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int); |
static int thumb2_legitimate_index_p (enum machine_mode, rtx, int); |
static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int); |
+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode); |
+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode); |
inline static int thumb1_index_register_rtx_p (rtx, int); |
+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool); |
static int thumb_far_jump_used_p (void); |
static bool thumb_force_lr_save (void); |
static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); |
@@ -111,6 +114,7 @@ static unsigned long arm_compute_save_reg_mask (void); |
static unsigned long arm_isr_value (tree); |
static unsigned long arm_compute_func_type (void); |
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); |
+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); |
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); |
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); |
@@ -124,9 +128,17 @@ static int arm_adjust_cost (rtx, rtx, rtx, int); |
static int count_insns_for_constant (HOST_WIDE_INT, int); |
static int arm_get_strip_length (int); |
static bool arm_function_ok_for_sibcall (tree, tree); |
+static enum machine_mode arm_promote_function_mode (const_tree, |
+ enum machine_mode, int *, |
+ const_tree, int); |
+static bool arm_return_in_memory (const_tree, const_tree); |
+static rtx arm_function_value (const_tree, const_tree, bool); |
+static rtx arm_libcall_value (enum machine_mode, const_rtx); |
+ |
static void arm_internal_label (FILE *, const char *, unsigned long); |
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, |
tree); |
+static bool arm_have_conditional_execution (void); |
static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); |
static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); |
static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); |
@@ -149,6 +161,9 @@ static void emit_constant_insn (rtx cond, rtx pattern); |
static rtx emit_set_insn (rtx, rtx); |
static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, |
tree, bool); |
+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, |
+ const_tree); |
+static int aapcs_select_return_coproc (const_tree, const_tree); |
#ifdef OBJECT_FORMAT_ELF |
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; |
@@ -176,6 +191,7 @@ static void arm_unwind_emit (FILE *, rtx); |
static bool arm_output_ttype (rtx); |
#endif |
static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); |
+static rtx arm_dwarf_register_span (rtx); |
static tree arm_cxx_guard_type (void); |
static bool arm_cxx_guard_mask_bit (void); |
@@ -198,14 +214,65 @@ static bool arm_tls_symbol_p (rtx x); |
static int arm_issue_rate (void); |
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; |
static bool arm_allocate_stack_slots_for_args (void); |
+static const char *arm_invalid_parameter_type (const_tree t); |
+static const char *arm_invalid_return_type (const_tree t); |
+static tree arm_promoted_type (const_tree t); |
+static tree arm_convert_to_type (tree type, tree expr); |
+static bool arm_scalar_mode_supported_p (enum machine_mode); |
+static bool arm_frame_pointer_required (void); |
+static bool arm_can_eliminate (const int, const int); |
+static void arm_asm_trampoline_template (FILE *); |
+static void arm_trampoline_init (rtx, tree, rtx); |
+static rtx arm_trampoline_adjust_address (rtx); |
+/* Table of machine attributes. */ |
+static const struct attribute_spec arm_attribute_table[] = |
+{ |
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
+ /* Function calls made to this symbol must be done indirectly, because |
+ it may lie outside of the 26 bit addressing range of a normal function |
+ call. */ |
+ { "long_call", 0, 0, false, true, true, NULL }, |
+ /* Whereas these functions are always known to reside within the 26 bit |
+ addressing range. */ |
+ { "short_call", 0, 0, false, true, true, NULL }, |
+ /* Specify the procedure call conventions for a function. */ |
+ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute }, |
+ /* Interrupt Service Routines have special prologue and epilogue requirements. */ |
+ { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, |
+ { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, |
+ { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute }, |
+#ifdef ARM_PE |
+ /* ARM/PE has three new attributes: |
+ interfacearm - ? |
+ dllexport - for exporting a function/variable that will live in a dll |
+ dllimport - for importing a function/variable from a dll |
+ |
+ Microsoft allows multiple declspecs in one __declspec, separating |
+ them with spaces. We do NOT support this. Instead, use __declspec |
+ multiple times. |
+ */ |
+ { "dllimport", 0, 0, true, false, false, NULL }, |
+ { "dllexport", 0, 0, true, false, false, NULL }, |
+ { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute }, |
+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES |
+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, |
+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
+ { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, |
+#endif |
+ { NULL, 0, 0, false, false, false, NULL } |
+}; |
+ |
/* Initialize the GCC target structure. */ |
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
#undef TARGET_MERGE_DECL_ATTRIBUTES |
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
#endif |
+#undef TARGET_LEGITIMIZE_ADDRESS |
+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address |
+ |
#undef TARGET_ATTRIBUTE_TABLE |
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table |
@@ -257,6 +324,12 @@ static bool arm_allocate_stack_slots_for_args (void); |
#undef TARGET_FUNCTION_OK_FOR_SIBCALL |
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall |
+#undef TARGET_FUNCTION_VALUE |
+#define TARGET_FUNCTION_VALUE arm_function_value |
+ |
+#undef TARGET_LIBCALL_VALUE |
+#define TARGET_LIBCALL_VALUE arm_libcall_value |
+ |
#undef TARGET_ASM_OUTPUT_MI_THUNK |
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk |
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
@@ -283,10 +356,8 @@ static bool arm_allocate_stack_slots_for_args (void); |
#undef TARGET_INIT_LIBFUNCS |
#define TARGET_INIT_LIBFUNCS arm_init_libfuncs |
-#undef TARGET_PROMOTE_FUNCTION_ARGS |
-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true |
-#undef TARGET_PROMOTE_FUNCTION_RETURN |
-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true |
+#undef TARGET_PROMOTE_FUNCTION_MODE |
+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode |
#undef TARGET_PROMOTE_PROTOTYPES |
#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes |
#undef TARGET_PASS_BY_REFERENCE |
@@ -300,6 +371,13 @@ static bool arm_allocate_stack_slots_for_args (void); |
#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args |
+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE |
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template |
+#undef TARGET_TRAMPOLINE_INIT |
+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init |
+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS |
+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address |
+ |
#undef TARGET_DEFAULT_SHORT_ENUMS |
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums |
@@ -361,6 +439,9 @@ static bool arm_allocate_stack_slots_for_args (void); |
#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC |
#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec |
+#undef TARGET_DWARF_REGISTER_SPAN |
+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span |
+ |
#undef TARGET_CANNOT_COPY_INSN_P |
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p |
@@ -369,6 +450,9 @@ static bool arm_allocate_stack_slots_for_args (void); |
#define TARGET_HAVE_TLS true |
#endif |
+#undef TARGET_HAVE_CONDITIONAL_EXECUTION |
+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution |
+ |
#undef TARGET_CANNOT_FORCE_CONST_MEM |
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem |
@@ -399,6 +483,30 @@ static bool arm_allocate_stack_slots_for_args (void); |
#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel |
#endif |
+#undef TARGET_LEGITIMATE_ADDRESS_P |
+#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p |
+ |
+#undef TARGET_INVALID_PARAMETER_TYPE |
+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type |
+ |
+#undef TARGET_INVALID_RETURN_TYPE |
+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type |
+ |
+#undef TARGET_PROMOTED_TYPE |
+#define TARGET_PROMOTED_TYPE arm_promoted_type |
+ |
+#undef TARGET_CONVERT_TO_TYPE |
+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type |
+ |
+#undef TARGET_SCALAR_MODE_SUPPORTED_P |
+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p |
+ |
+#undef TARGET_FRAME_POINTER_REQUIRED |
+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required |
+ |
+#undef TARGET_CAN_ELIMINATE |
+#define TARGET_CAN_ELIMINATE arm_can_eliminate |
+ |
struct gcc_target targetm = TARGET_INITIALIZER; |
/* Obstack for minipool constant handling. */ |
@@ -414,28 +522,24 @@ extern FILE * asm_out_file; |
/* True if we are currently building a constant table. */ |
int making_const_table; |
-/* Define the information needed to generate branch insns. This is |
- stored from the compare operation. */ |
-rtx arm_compare_op0, arm_compare_op1; |
- |
/* The processor for which instructions should be scheduled. */ |
enum processor_type arm_tune = arm_none; |
/* The default processor used if not overridden by commandline. */ |
static enum processor_type arm_default_cpu = arm_none; |
-/* Which floating point model to use. */ |
-enum arm_fp_model arm_fp_model; |
- |
-/* Which floating point hardware is available. */ |
-enum fputype arm_fpu_arch; |
- |
/* Which floating point hardware to schedule for. */ |
-enum fputype arm_fpu_tune; |
+int arm_fpu_attr; |
+ |
+/* Which floating popint hardware to use. */ |
+const struct arm_fpu_desc *arm_fpu_desc; |
/* Whether to use floating point hardware. */ |
enum float_abi_type arm_float_abi; |
+/* Which __fp16 format to use. */ |
+enum arm_fp16_format_type arm_fp16_format; |
+ |
/* Which ABI to use. */ |
enum arm_abi_type arm_abi; |
@@ -474,6 +578,8 @@ static int thumb_call_reg_needed; |
#define FL_DIV (1 << 18) /* Hardware divide. */ |
#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ |
#define FL_NEON (1 << 20) /* Neon instructions. */ |
+#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M |
+ architecture. */ |
#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ |
@@ -495,9 +601,10 @@ static int thumb_call_reg_needed; |
#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) |
#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) |
#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) |
-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) |
+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) |
#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) |
#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) |
+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) |
/* The bits in this mask specify which |
instructions we are allowed to generate. */ |
@@ -534,6 +641,9 @@ int arm_arch6k = 0; |
/* Nonzero if instructions not present in the 'M' profile can be used. */ |
int arm_arch_notm = 0; |
+/* Nonzero if instructions present in ARMv7E-M can be used. */ |
+int arm_arch7em = 0; |
+ |
/* Nonzero if this chip can benefit from load scheduling. */ |
int arm_ld_sched = 0; |
@@ -583,10 +693,6 @@ enum machine_mode output_memory_reference_mode; |
/* The register number to be used for the PIC offset register. */ |
unsigned arm_pic_register = INVALID_REGNUM; |
-/* Set to 1 when a return insn is output, this means that the epilogue |
- is not needed. */ |
-int return_used_this_function; |
- |
/* Set to 1 after arm_reorg has started. Reset to start at the start of |
the next function. */ |
static int after_arm_reorg = 0; |
@@ -594,6 +700,8 @@ static int after_arm_reorg = 0; |
/* The maximum number of insns to be used when loading a constant. */ |
static int arm_constant_limit = 3; |
+static enum arm_pcs arm_pcs_default; |
+ |
/* For an explanation of these variables, see final_prescan_insn below. */ |
int arm_ccfsm_state; |
/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ |
@@ -674,6 +782,7 @@ static const struct processors all_architectures[] = |
{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, |
{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, |
{"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, |
+ {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, |
{"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, |
{"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, |
{"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, |
@@ -708,44 +817,29 @@ static struct arm_cpu_select arm_select[] = |
char arm_arch_name[] = "__ARM_ARCH_0UNK__"; |
-struct fpu_desc |
-{ |
- const char * name; |
- enum fputype fpu; |
-}; |
- |
- |
/* Available values for -mfpu=. */ |
-static const struct fpu_desc all_fpus[] = |
-{ |
- {"fpa", FPUTYPE_FPA}, |
- {"fpe2", FPUTYPE_FPA_EMU2}, |
- {"fpe3", FPUTYPE_FPA_EMU2}, |
- {"maverick", FPUTYPE_MAVERICK}, |
- {"vfp", FPUTYPE_VFP}, |
- {"vfp3", FPUTYPE_VFP3}, |
- {"vfpv3", FPUTYPE_VFP3}, |
- {"vfpv3-d16", FPUTYPE_VFP3D16}, |
- {"neon", FPUTYPE_NEON} |
-}; |
- |
- |
-/* Floating point models used by the different hardware. |
- See fputype in arm.h. */ |
- |
-static const enum fputype fp_model_for_fpu[] = |
-{ |
- /* No FP hardware. */ |
- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */ |
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */ |
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */ |
- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */ |
- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */ |
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ |
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ |
- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ |
- ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ |
+static const struct arm_fpu_desc all_fpus[] = |
+{ |
+ {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false}, |
+ {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false}, |
+ {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false}, |
+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false}, |
+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false}, |
+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, |
+ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true}, |
+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false}, |
+ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true}, |
+ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false}, |
+ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true}, |
+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false}, |
+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true }, |
+ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true}, |
+ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true}, |
+ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true}, |
+ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true}, |
+ /* Compatibility aliases. */ |
+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false}, |
}; |
@@ -766,6 +860,23 @@ static const struct float_abi all_float_abis[] = |
}; |
+struct fp16_format |
+{ |
+ const char *name; |
+ enum arm_fp16_format_type fp16_format_type; |
+}; |
+ |
+ |
+/* Available values for -mfp16-format=. */ |
+ |
+static const struct fp16_format all_fp16_formats[] = |
+{ |
+ {"none", ARM_FP16_FORMAT_NONE}, |
+ {"ieee", ARM_FP16_FORMAT_IEEE}, |
+ {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} |
+}; |
+ |
+ |
struct abi_name |
{ |
const char *name; |
@@ -924,6 +1035,44 @@ arm_init_libfuncs (void) |
set_optab_libfunc (smod_optab, SImode, NULL); |
set_optab_libfunc (umod_optab, SImode, NULL); |
+ /* Half-precision float operations. The compiler handles all operations |
+ with NULL libfuncs by converting the SFmode. */ |
+ switch (arm_fp16_format) |
+ { |
+ case ARM_FP16_FORMAT_IEEE: |
+ case ARM_FP16_FORMAT_ALTERNATIVE: |
+ |
+ /* Conversions. */ |
+ set_conv_libfunc (trunc_optab, HFmode, SFmode, |
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE |
+ ? "__gnu_f2h_ieee" |
+ : "__gnu_f2h_alternative")); |
+ set_conv_libfunc (sext_optab, SFmode, HFmode, |
+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE |
+ ? "__gnu_h2f_ieee" |
+ : "__gnu_h2f_alternative")); |
+ |
+ /* Arithmetic. */ |
+ set_optab_libfunc (add_optab, HFmode, NULL); |
+ set_optab_libfunc (sdiv_optab, HFmode, NULL); |
+ set_optab_libfunc (smul_optab, HFmode, NULL); |
+ set_optab_libfunc (neg_optab, HFmode, NULL); |
+ set_optab_libfunc (sub_optab, HFmode, NULL); |
+ |
+ /* Comparisons. */ |
+ set_optab_libfunc (eq_optab, HFmode, NULL); |
+ set_optab_libfunc (ne_optab, HFmode, NULL); |
+ set_optab_libfunc (lt_optab, HFmode, NULL); |
+ set_optab_libfunc (le_optab, HFmode, NULL); |
+ set_optab_libfunc (ge_optab, HFmode, NULL); |
+ set_optab_libfunc (gt_optab, HFmode, NULL); |
+ set_optab_libfunc (unord_optab, HFmode, NULL); |
+ break; |
+ |
+ default: |
+ break; |
+ } |
+ |
if (TARGET_AAPCS_BASED) |
synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); |
} |
@@ -959,13 +1108,15 @@ arm_build_builtin_va_list (void) |
/* Create the type. */ |
va_list_type = lang_hooks.types.make_type (RECORD_TYPE); |
/* Give it the required name. */ |
- va_list_name = build_decl (TYPE_DECL, |
+ va_list_name = build_decl (BUILTINS_LOCATION, |
+ TYPE_DECL, |
get_identifier ("__va_list"), |
va_list_type); |
DECL_ARTIFICIAL (va_list_name) = 1; |
TYPE_NAME (va_list_type) = va_list_name; |
/* Create the __ap field. */ |
- ap_field = build_decl (FIELD_DECL, |
+ ap_field = build_decl (BUILTINS_LOCATION, |
+ FIELD_DECL, |
get_identifier ("__ap"), |
ptr_type_node); |
DECL_ARTIFICIAL (ap_field) = 1; |
@@ -1207,13 +1358,13 @@ arm_override_options (void) |
const struct processors * sel; |
unsigned int sought; |
- selected_cpu = TARGET_CPU_DEFAULT; |
+ selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT; |
if (selected_cpu == arm_none) |
{ |
#ifdef SUBTARGET_CPU_DEFAULT |
/* Use the subtarget default CPU if none was specified by |
configure. */ |
- selected_cpu = SUBTARGET_CPU_DEFAULT; |
+ selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT; |
#endif |
/* Default to ARM6. */ |
if (selected_cpu == arm_none) |
@@ -1295,6 +1446,23 @@ arm_override_options (void) |
tune_flags = all_cores[(int)arm_tune].flags; |
+ if (target_fp16_format_name) |
+ { |
+ for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) |
+ { |
+ if (streq (all_fp16_formats[i].name, target_fp16_format_name)) |
+ { |
+ arm_fp16_format = all_fp16_formats[i].fp16_format_type; |
+ break; |
+ } |
+ } |
+ if (i == ARRAY_SIZE (all_fp16_formats)) |
+ error ("invalid __fp16 format option: -mfp16-format=%s", |
+ target_fp16_format_name); |
+ } |
+ else |
+ arm_fp16_format = ARM_FP16_FORMAT_NONE; |
+ |
if (target_abi_name) |
{ |
for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) |
@@ -1387,6 +1555,7 @@ arm_override_options (void) |
arm_arch6 = (insn_flags & FL_ARCH6) != 0; |
arm_arch6k = (insn_flags & FL_ARCH6K) != 0; |
arm_arch_notm = (insn_flags & FL_NOTM) != 0; |
+ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; |
arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; |
arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; |
arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; |
@@ -1438,7 +1607,6 @@ arm_override_options (void) |
if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) |
error ("iwmmxt abi requires an iwmmxt capable cpu"); |
- arm_fp_model = ARM_FP_MODEL_UNKNOWN; |
if (target_fpu_name == NULL && target_fpe_name != NULL) |
{ |
if (streq (target_fpe_name, "2")) |
@@ -1449,46 +1617,56 @@ arm_override_options (void) |
error ("invalid floating point emulation option: -mfpe=%s", |
target_fpe_name); |
} |
- if (target_fpu_name != NULL) |
- { |
- /* The user specified a FPU. */ |
- for (i = 0; i < ARRAY_SIZE (all_fpus); i++) |
- { |
- if (streq (all_fpus[i].name, target_fpu_name)) |
- { |
- arm_fpu_arch = all_fpus[i].fpu; |
- arm_fpu_tune = arm_fpu_arch; |
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; |
- break; |
- } |
- } |
- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN) |
- error ("invalid floating point option: -mfpu=%s", target_fpu_name); |
- } |
- else |
+ |
+ if (target_fpu_name == NULL) |
{ |
#ifdef FPUTYPE_DEFAULT |
- /* Use the default if it is specified for this platform. */ |
- arm_fpu_arch = FPUTYPE_DEFAULT; |
- arm_fpu_tune = FPUTYPE_DEFAULT; |
+ target_fpu_name = FPUTYPE_DEFAULT; |
#else |
- /* Pick one based on CPU type. */ |
- /* ??? Some targets assume FPA is the default. |
- if ((insn_flags & FL_VFP) != 0) |
- arm_fpu_arch = FPUTYPE_VFP; |
- else |
- */ |
if (arm_arch_cirrus) |
- arm_fpu_arch = FPUTYPE_MAVERICK; |
+ target_fpu_name = "maverick"; |
else |
- arm_fpu_arch = FPUTYPE_FPA_EMU2; |
+ target_fpu_name = "fpe2"; |
#endif |
- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2) |
- arm_fpu_tune = FPUTYPE_FPA; |
+ } |
+ |
+ arm_fpu_desc = NULL; |
+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++) |
+ { |
+ if (streq (all_fpus[i].name, target_fpu_name)) |
+ { |
+ arm_fpu_desc = &all_fpus[i]; |
+ break; |
+ } |
+ } |
+ |
+ if (!arm_fpu_desc) |
+ { |
+ error ("invalid floating point option: -mfpu=%s", target_fpu_name); |
+ return; |
+ } |
+ |
+ switch (arm_fpu_desc->model) |
+ { |
+ case ARM_FP_MODEL_FPA: |
+ if (arm_fpu_desc->rev == 2) |
+ arm_fpu_attr = FPU_FPE2; |
+ else if (arm_fpu_desc->rev == 3) |
+ arm_fpu_attr = FPU_FPE3; |
else |
- arm_fpu_tune = arm_fpu_arch; |
- arm_fp_model = fp_model_for_fpu[arm_fpu_arch]; |
- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN); |
+ arm_fpu_attr = FPU_FPA; |
+ break; |
+ |
+ case ARM_FP_MODEL_MAVERICK: |
+ arm_fpu_attr = FPU_MAVERICK; |
+ break; |
+ |
+ case ARM_FP_MODEL_VFP: |
+ arm_fpu_attr = FPU_VFP; |
+ break; |
+ |
+ default: |
+ gcc_unreachable(); |
} |
if (target_float_abi_name != NULL) |
@@ -1509,8 +1687,18 @@ arm_override_options (void) |
else |
arm_float_abi = TARGET_DEFAULT_FLOAT_ABI; |
- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) |
- sorry ("-mfloat-abi=hard and VFP"); |
+ if (TARGET_AAPCS_BASED |
+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA)) |
+ error ("FPA is unsupported in the AAPCS"); |
+ |
+ if (TARGET_AAPCS_BASED) |
+ { |
+ if (TARGET_CALLER_INTERWORKING) |
+ error ("AAPCS does not support -mcaller-super-interworking"); |
+ else |
+ if (TARGET_CALLEE_INTERWORKING) |
+ error ("AAPCS does not support -mcallee-super-interworking"); |
+ } |
/* FPA and iWMMXt are incompatible because the insn encodings overlap. |
VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon |
@@ -1522,15 +1710,40 @@ arm_override_options (void) |
if (TARGET_THUMB2 && TARGET_IWMMXT) |
sorry ("Thumb-2 iWMMXt"); |
+ /* __fp16 support currently assumes the core has ldrh. */ |
+ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) |
+ sorry ("__fp16 and no ldrh"); |
+ |
/* If soft-float is specified then don't use FPU. */ |
if (TARGET_SOFT_FLOAT) |
- arm_fpu_arch = FPUTYPE_NONE; |
+ arm_fpu_attr = FPU_NONE; |
+ |
+ if (TARGET_AAPCS_BASED) |
+ { |
+ if (arm_abi == ARM_ABI_IWMMXT) |
+ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; |
+ else if (arm_float_abi == ARM_FLOAT_ABI_HARD |
+ && TARGET_HARD_FLOAT |
+ && TARGET_VFP) |
+ arm_pcs_default = ARM_PCS_AAPCS_VFP; |
+ else |
+ arm_pcs_default = ARM_PCS_AAPCS; |
+ } |
+ else |
+ { |
+ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) |
+ sorry ("-mfloat-abi=hard and VFP"); |
+ |
+ if (arm_abi == ARM_ABI_APCS) |
+ arm_pcs_default = ARM_PCS_APCS; |
+ else |
+ arm_pcs_default = ARM_PCS_ATPCS; |
+ } |
/* For arm2/3 there is no need to do any scheduling if there is only |
a floating point emulator, or we are doing software floating-point. */ |
if ((TARGET_SOFT_FLOAT |
- || arm_fpu_tune == FPUTYPE_FPA_EMU2 |
- || arm_fpu_tune == FPUTYPE_FPA_EMU3) |
+ || (TARGET_FPA && arm_fpu_desc->rev)) |
&& (tune_flags & FL_MODE32) == 0) |
flag_schedule_insns = flag_schedule_insns_after_reload = 0; |
@@ -1549,7 +1762,7 @@ arm_override_options (void) |
/* Use the cp15 method if it is available. */ |
if (target_thread_pointer == TP_AUTO) |
{ |
- if (arm_arch6k && !TARGET_THUMB) |
+ if (arm_arch6k && !TARGET_THUMB1) |
target_thread_pointer = TP_CP15; |
else |
target_thread_pointer = TP_SOFT; |
@@ -1620,8 +1833,7 @@ arm_override_options (void) |
fix_cm3_ldrd = 0; |
} |
- /* ??? We might want scheduling for thumb2. */ |
- if (TARGET_THUMB && flag_schedule_insns) |
+ if (TARGET_THUMB1 && flag_schedule_insns) |
{ |
/* Don't warn since it's on by default in -O2. */ |
flag_schedule_insns = 0; |
@@ -1655,12 +1867,15 @@ arm_override_options (void) |
max_insns_skipped = 3; |
} |
- /* Ideally we would want to use CFI directives to generate |
- debug info. However this also creates the .eh_frame |
- section, so disable them until GAS can handle |
- this properly. See PR40521. */ |
- if (TARGET_AAPCS_BASED) |
- flag_dwarf2_cfi_asm = 0; |
+ /* Hot/Cold partitioning is not currently supported, since we can't |
+ handle literal pool placement in that case. */ |
+ if (flag_reorder_blocks_and_partition) |
+ { |
+ inform (input_location, |
+ "-freorder-blocks-and-partition not supported on this architecture"); |
+ flag_reorder_blocks_and_partition = 0; |
+ flag_reorder_blocks = 1; |
+ } |
/* Register global variables with the garbage collector. */ |
arm_add_gc_roots (); |
@@ -1794,6 +2009,84 @@ arm_allocate_stack_slots_for_args (void) |
} |
+/* Output assembler code for a block containing the constant parts |
+ of a trampoline, leaving space for the variable parts. |
+ |
+ On the ARM, (if r8 is the static chain regnum, and remembering that |
+ referencing pc adds an offset of 8) the trampoline looks like: |
+ ldr r8, [pc, #0] |
+ ldr pc, [pc] |
+ .word static chain value |
+ .word function's address |
+ XXX FIXME: When the trampoline returns, r8 will be clobbered. */ |
+ |
+static void |
+arm_asm_trampoline_template (FILE *f) |
+{ |
+ if (TARGET_ARM) |
+ { |
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); |
+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); |
+ } |
+ else if (TARGET_THUMB2) |
+ { |
+ /* The Thumb-2 trampoline is similar to the arm implementation. |
+ Unlike 16-bit Thumb, we enter the stub in thumb mode. */ |
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", |
+ STATIC_CHAIN_REGNUM, PC_REGNUM); |
+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); |
+ } |
+ else |
+ { |
+ ASM_OUTPUT_ALIGN (f, 2); |
+ fprintf (f, "\t.code\t16\n"); |
+ fprintf (f, ".Ltrampoline_start:\n"); |
+ asm_fprintf (f, "\tpush\t{r0, r1}\n"); |
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); |
+ asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); |
+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); |
+ asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); |
+ asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); |
+ } |
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); |
+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); |
+} |
+ |
+/* Emit RTL insns to initialize the variable parts of a trampoline. */ |
+ |
+static void |
+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
+{ |
+ rtx fnaddr, mem, a_tramp; |
+ |
+ emit_block_move (m_tramp, assemble_trampoline_template (), |
+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); |
+ |
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); |
+ emit_move_insn (mem, chain_value); |
+ |
+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); |
+ fnaddr = XEXP (DECL_RTL (fndecl), 0); |
+ emit_move_insn (mem, fnaddr); |
+ |
+ a_tramp = XEXP (m_tramp, 0); |
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), |
+ LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, |
+ plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode); |
+} |
+ |
+/* Thumb trampolines should be entered in thumb mode, so set |
+ the bottom bit of the address. */ |
+ |
+static rtx |
+arm_trampoline_adjust_address (rtx addr) |
+{ |
+ if (TARGET_THUMB) |
+ addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, |
+ NULL, 0, OPTAB_LIB_WIDEN); |
+ return addr; |
+} |
+ |
/* Return 1 if it is possible to return using a single instruction. |
If SIBLING is non-null, this is a test for a return before a sibling |
call. SIBLING is the call insn, so we can examine its register usage. */ |
@@ -2014,7 +2307,11 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) |
case MINUS: /* Should only occur with (MINUS I reg) => rsb */ |
case XOR: |
+ return 0; |
+ |
case IOR: |
+ if (TARGET_THUMB2) |
+ return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); |
return 0; |
case AND: |
@@ -2102,20 +2399,24 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn, |
1); |
} |
-/* Return the number of ARM instructions required to synthesize the given |
- constant. */ |
+/* Return the number of instructions required to synthesize the given |
+ constant, if we start emitting them from bit-position I. */ |
static int |
count_insns_for_constant (HOST_WIDE_INT remainder, int i) |
{ |
HOST_WIDE_INT temp1; |
+ int step_size = TARGET_ARM ? 2 : 1; |
int num_insns = 0; |
+ |
+ gcc_assert (TARGET_ARM || i == 0); |
+ |
do |
{ |
int end; |
if (i <= 0) |
i += 32; |
- if (remainder & (3 << (i - 2))) |
+ if (remainder & (((1 << step_size) - 1) << (i - step_size))) |
{ |
end = i - 8; |
if (end < 0) |
@@ -2124,13 +2425,77 @@ count_insns_for_constant (HOST_WIDE_INT remainder, int i) |
| ((i < end) ? (0xff >> (32 - end)) : 0)); |
remainder &= ~temp1; |
num_insns++; |
- i -= 6; |
+ i -= 8 - step_size; |
} |
- i -= 2; |
+ i -= step_size; |
} while (remainder); |
return num_insns; |
} |
+static int |
+find_best_start (unsigned HOST_WIDE_INT remainder) |
+{ |
+ int best_consecutive_zeros = 0; |
+ int i; |
+ int best_start = 0; |
+ |
+ /* If we aren't targetting ARM, the best place to start is always at |
+ the bottom. */ |
+ if (! TARGET_ARM) |
+ return 0; |
+ |
+ for (i = 0; i < 32; i += 2) |
+ { |
+ int consecutive_zeros = 0; |
+ |
+ if (!(remainder & (3 << i))) |
+ { |
+ while ((i < 32) && !(remainder & (3 << i))) |
+ { |
+ consecutive_zeros += 2; |
+ i += 2; |
+ } |
+ if (consecutive_zeros > best_consecutive_zeros) |
+ { |
+ best_consecutive_zeros = consecutive_zeros; |
+ best_start = i - consecutive_zeros; |
+ } |
+ i -= 2; |
+ } |
+ } |
+ |
+ /* So long as it won't require any more insns to do so, it's |
+ desirable to emit a small constant (in bits 0...9) in the last |
+ insn. This way there is more chance that it can be combined with |
+ a later addressing insn to form a pre-indexed load or store |
+ operation. Consider: |
+ |
+ *((volatile int *)0xe0000100) = 1; |
+ *((volatile int *)0xe0000110) = 2; |
+ |
+ We want this to wind up as: |
+ |
+ mov rA, #0xe0000000 |
+ mov rB, #1 |
+ str rB, [rA, #0x100] |
+ mov rB, #2 |
+ str rB, [rA, #0x110] |
+ |
+ rather than having to synthesize both large constants from scratch. |
+ |
+ Therefore, we calculate how many insns would be required to emit |
+ the constant starting from `best_start', and also starting from |
+ zero (i.e. with bit 31 first to be output). If `best_start' doesn't |
+ yield a shorter sequence, we may as well use zero. */ |
+ if (best_start != 0 |
+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) |
+ && (count_insns_for_constant (remainder, 0) <= |
+ count_insns_for_constant (remainder, best_start))) |
+ best_start = 0; |
+ |
+ return best_start; |
+} |
+ |
/* Emit an instruction with the indicated PATTERN. If COND is |
non-NULL, conditionalize the execution of the instruction on COND |
being true. */ |
@@ -2154,6 +2519,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
{ |
int can_invert = 0; |
int can_negate = 0; |
+ int final_invert = 0; |
int can_negate_initial = 0; |
int can_shift = 0; |
int i; |
@@ -2165,6 +2531,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
int insns = 0; |
unsigned HOST_WIDE_INT temp1, temp2; |
unsigned HOST_WIDE_INT remainder = val & 0xffffffff; |
+ int step_size = TARGET_ARM ? 2 : 1; |
/* Find out which operations are safe for a given CODE. Also do a quick |
check for degenerate cases; these can occur when DImode operations |
@@ -2191,15 +2558,20 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
GEN_INT (ARM_SIGN_EXTEND (val)))); |
return 1; |
} |
+ |
if (remainder == 0) |
{ |
if (reload_completed && rtx_equal_p (target, source)) |
return 0; |
+ |
if (generate) |
emit_constant_insn (cond, |
gen_rtx_SET (VOIDmode, target, source)); |
return 1; |
} |
+ |
+ if (TARGET_THUMB2) |
+ can_invert = 1; |
break; |
case AND: |
@@ -2233,14 +2605,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
return 1; |
} |
- /* We don't know how to handle other cases yet. */ |
- gcc_assert (remainder == 0xffffffff); |
- |
- if (generate) |
- emit_constant_insn (cond, |
- gen_rtx_SET (VOIDmode, target, |
- gen_rtx_NOT (mode, source))); |
- return 1; |
+ if (remainder == 0xffffffff) |
+ { |
+ if (generate) |
+ emit_constant_insn (cond, |
+ gen_rtx_SET (VOIDmode, target, |
+ gen_rtx_NOT (mode, source))); |
+ return 1; |
+ } |
+ break; |
case MINUS: |
/* We treat MINUS as (val - source), since (source - val) is always |
@@ -2287,6 +2660,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
/* Calculate a few attributes that may be useful for specific |
optimizations. */ |
+ /* Count number of leading zeros. */ |
for (i = 31; i >= 0; i--) |
{ |
if ((remainder & (1 << i)) == 0) |
@@ -2295,6 +2669,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
break; |
} |
+ /* Count number of leading 1's. */ |
for (i = 31; i >= 0; i--) |
{ |
if ((remainder & (1 << i)) != 0) |
@@ -2303,6 +2678,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
break; |
} |
+ /* Count number of trailing zero's. */ |
for (i = 0; i <= 31; i++) |
{ |
if ((remainder & (1 << i)) == 0) |
@@ -2311,6 +2687,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
break; |
} |
+ /* Count number of trailing 1's. */ |
for (i = 0; i <= 31; i++) |
{ |
if ((remainder & (1 << i)) != 0) |
@@ -2498,6 +2875,17 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
if (code == XOR) |
break; |
+ /* Convert. |
+ x = y | constant ( which is composed of set_sign_bit_copies of leading 1s |
+ and the remainder 0s for e.g. 0xfff00000) |
+ x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) |
+ |
+ This can be done in 2 instructions by using shifts with mov or mvn. |
+ e.g. for |
+ x = x | 0xfff00000; |
+ we generate. |
+ mvn r0, r0, asl #12 |
+ mvn r0, r0, lsr #12 */ |
if (set_sign_bit_copies > 8 |
&& (val & (-1 << (32 - set_sign_bit_copies))) == val) |
{ |
@@ -2523,6 +2911,16 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
return 2; |
} |
+ /* Convert |
+ x = y | constant (which has set_zero_bit_copies number of trailing ones). |
+ to |
+ x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). |
+ |
+ For eg. r0 = r0 | 0xfff |
+ mvn r0, r0, lsr #12 |
+ mvn r0, r0, asl #12 |
+ |
+ */ |
if (set_zero_bit_copies > 8 |
&& (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) |
{ |
@@ -2548,6 +2946,13 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
return 2; |
} |
+ /* This will never be reached for Thumb2 because orn is a valid |
+ instruction. This is for Thumb1 and the ARM 32 bit cases. |
+ |
+ x = y | constant (such that ~constant is a valid constant) |
+ Transform this to |
+ x = ~(~y & ~constant). |
+ */ |
if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) |
{ |
if (generate) |
@@ -2657,10 +3062,27 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
if (remainder & (1 << i)) |
num_bits_set++; |
- if (code == AND || (can_invert && num_bits_set > 16)) |
- remainder = (~remainder) & 0xffffffff; |
+ if ((code == AND) |
+ || (code != IOR && can_invert && num_bits_set > 16)) |
+ remainder ^= 0xffffffff; |
else if (code == PLUS && num_bits_set > 16) |
remainder = (-remainder) & 0xffffffff; |
+ |
+ /* For XOR, if more than half the bits are set and there's a sequence |
+ of more than 8 consecutive ones in the pattern then we can XOR by the |
+ inverted constant and then invert the final result; this may save an |
+ instruction and might also lead to the final mvn being merged with |
+ some other operation. */ |
+ else if (code == XOR && num_bits_set > 16 |
+ && (count_insns_for_constant (remainder ^ 0xffffffff, |
+ find_best_start |
+ (remainder ^ 0xffffffff)) |
+ < count_insns_for_constant (remainder, |
+ find_best_start (remainder)))) |
+ { |
+ remainder ^= 0xffffffff; |
+ final_invert = 1; |
+ } |
else |
{ |
can_invert = 0; |
@@ -2679,63 +3101,8 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
/* ??? Use thumb2 replicated constants when the high and low halfwords are |
the same. */ |
{ |
- int best_start = 0; |
- if (!TARGET_THUMB2) |
- { |
- int best_consecutive_zeros = 0; |
- |
- for (i = 0; i < 32; i += 2) |
- { |
- int consecutive_zeros = 0; |
- |
- if (!(remainder & (3 << i))) |
- { |
- while ((i < 32) && !(remainder & (3 << i))) |
- { |
- consecutive_zeros += 2; |
- i += 2; |
- } |
- if (consecutive_zeros > best_consecutive_zeros) |
- { |
- best_consecutive_zeros = consecutive_zeros; |
- best_start = i - consecutive_zeros; |
- } |
- i -= 2; |
- } |
- } |
- |
- /* So long as it won't require any more insns to do so, it's |
- desirable to emit a small constant (in bits 0...9) in the last |
- insn. This way there is more chance that it can be combined with |
- a later addressing insn to form a pre-indexed load or store |
- operation. Consider: |
- |
- *((volatile int *)0xe0000100) = 1; |
- *((volatile int *)0xe0000110) = 2; |
- |
- We want this to wind up as: |
- |
- mov rA, #0xe0000000 |
- mov rB, #1 |
- str rB, [rA, #0x100] |
- mov rB, #2 |
- str rB, [rA, #0x110] |
- |
- rather than having to synthesize both large constants from scratch. |
- |
- Therefore, we calculate how many insns would be required to emit |
- the constant starting from `best_start', and also starting from |
- zero (i.e. with bit 31 first to be output). If `best_start' doesn't |
- yield a shorter sequence, we may as well use zero. */ |
- if (best_start != 0 |
- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder) |
- && (count_insns_for_constant (remainder, 0) <= |
- count_insns_for_constant (remainder, best_start))) |
- best_start = 0; |
- } |
- |
/* Now start emitting the insns. */ |
- i = best_start; |
+ i = find_best_start (remainder); |
do |
{ |
int end; |
@@ -2763,7 +3130,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
} |
else |
{ |
- if (remainder && subtargets) |
+ if ((final_invert || remainder) && subtargets) |
new_src = gen_reg_rtx (mode); |
else |
new_src = target; |
@@ -2798,21 +3165,23 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond, |
code = PLUS; |
insns++; |
- if (TARGET_ARM) |
- i -= 6; |
- else |
- i -= 7; |
+ i -= 8 - step_size; |
} |
/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary |
shifts. */ |
- if (TARGET_ARM) |
- i -= 2; |
- else |
- i--; |
+ i -= step_size; |
} |
while (remainder); |
} |
+ if (final_invert) |
+ { |
+ if (generate) |
+ emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, |
+ gen_rtx_NOT (mode, source))); |
+ insns++; |
+ } |
+ |
return insns; |
} |
@@ -2884,17 +3253,22 @@ arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode, |
/* Define how to find the value returned by a function. */ |
-rtx |
-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED) |
+static rtx |
+arm_function_value(const_tree type, const_tree func, |
+ bool outgoing ATTRIBUTE_UNUSED) |
{ |
enum machine_mode mode; |
int unsignedp ATTRIBUTE_UNUSED; |
rtx r ATTRIBUTE_UNUSED; |
mode = TYPE_MODE (type); |
+ |
+ if (TARGET_AAPCS_BASED) |
+ return aapcs_allocate_return_reg (mode, type, func); |
+ |
/* Promote integer types. */ |
if (INTEGRAL_TYPE_P (type)) |
- PROMOTE_FUNCTION_MODE (mode, unsignedp, type); |
+ mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); |
/* Promotes small structs returned in a register to full-word size |
for big-endian AAPCS. */ |
@@ -2908,7 +3282,88 @@ arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED) |
} |
} |
- return LIBCALL_VALUE(mode); |
+ return LIBCALL_VALUE (mode); |
+} |
+ |
+static int |
+libcall_eq (const void *p1, const void *p2) |
+{ |
+ return rtx_equal_p ((const_rtx) p1, (const_rtx) p2); |
+} |
+ |
+static hashval_t |
+libcall_hash (const void *p1) |
+{ |
+ return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE); |
+} |
+ |
+static void |
+add_libcall (htab_t htab, rtx libcall) |
+{ |
+ *htab_find_slot (htab, libcall, INSERT) = libcall; |
+} |
+ |
+static bool |
+arm_libcall_uses_aapcs_base (const_rtx libcall) |
+{ |
+ static bool init_done = false; |
+ static htab_t libcall_htab; |
+ |
+ if (!init_done) |
+ { |
+ init_done = true; |
+ |
+ libcall_htab = htab_create (31, libcall_hash, libcall_eq, |
+ NULL); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfloat_optab, SFmode, SImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfloat_optab, DFmode, SImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfloat_optab, SFmode, DImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfloat_optab, DFmode, DImode)); |
+ |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufloat_optab, SFmode, SImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufloat_optab, DFmode, SImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufloat_optab, SFmode, DImode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufloat_optab, DFmode, DImode)); |
+ |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sext_optab, SFmode, HFmode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (trunc_optab, HFmode, SFmode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfix_optab, DImode, DFmode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufix_optab, DImode, DFmode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (sfix_optab, DImode, SFmode)); |
+ add_libcall (libcall_htab, |
+ convert_optab_libfunc (ufix_optab, DImode, SFmode)); |
+ } |
+ |
+ return libcall && htab_find (libcall_htab, libcall) != NULL; |
+} |
+ |
+rtx |
+arm_libcall_value (enum machine_mode mode, const_rtx libcall) |
+{ |
+ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS |
+ && GET_MODE_CLASS (mode) == MODE_FLOAT) |
+ { |
+ /* The following libcalls return their result in integer registers, |
+ even though they return a floating point value. */ |
+ if (arm_libcall_uses_aapcs_base (libcall)) |
+ return gen_rtx_REG (mode, ARG_REGISTER(1)); |
+ |
+ } |
+ |
+ return LIBCALL_VALUE (mode); |
} |
/* Determine the amount of memory needed to store the possible return |
@@ -2918,10 +3373,12 @@ arm_apply_result_size (void) |
{ |
int size = 16; |
- if (TARGET_ARM) |
+ if (TARGET_32BIT) |
{ |
if (TARGET_HARD_FLOAT_ABI) |
{ |
+ if (TARGET_VFP) |
+ size += 32; |
if (TARGET_FPA) |
size += 12; |
if (TARGET_MAVERICK) |
@@ -2934,27 +3391,56 @@ arm_apply_result_size (void) |
return size; |
} |
-/* Decide whether a type should be returned in memory (true) |
- or in a register (false). This is called as the target hook |
- TARGET_RETURN_IN_MEMORY. */ |
+/* Decide whether TYPE should be returned in memory (true) |
+ or in a register (false). FNTYPE is the type of the function making |
+ the call. */ |
static bool |
-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
+arm_return_in_memory (const_tree type, const_tree fntype) |
{ |
HOST_WIDE_INT size; |
- size = int_size_in_bytes (type); |
+ size = int_size_in_bytes (type); /* Negative if not fixed size. */ |
+ |
+ if (TARGET_AAPCS_BASED) |
+ { |
+ /* Simple, non-aggregate types (ie not including vectors and |
+ complex) are always returned in a register (or registers). |
+ We don't care about which register here, so we can short-cut |
+ some of the detail. */ |
+ if (!AGGREGATE_TYPE_P (type) |
+ && TREE_CODE (type) != VECTOR_TYPE |
+ && TREE_CODE (type) != COMPLEX_TYPE) |
+ return false; |
+ |
+ /* Any return value that is no larger than one word can be |
+ returned in r0. */ |
+ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) |
+ return false; |
+ |
+ /* Check any available co-processors to see if they accept the |
+ type as a register candidate (VFP, for example, can return |
+ some aggregates in consecutive registers). These aren't |
+ available if the call is variadic. */ |
+ if (aapcs_select_return_coproc (type, fntype) >= 0) |
+ return false; |
+ |
+ /* Vector values should be returned using ARM registers, not |
+ memory (unless they're over 16 bytes, which will break since |
+ we only have four call-clobbered registers to play with). */ |
+ if (TREE_CODE (type) == VECTOR_TYPE) |
+ return (size < 0 || size > (4 * UNITS_PER_WORD)); |
+ |
+ /* The rest go in memory. */ |
+ return true; |
+ } |
- /* Vector values should be returned using ARM registers, not memory (unless |
- they're over 16 bytes, which will break since we only have four |
- call-clobbered registers to play with). */ |
if (TREE_CODE (type) == VECTOR_TYPE) |
return (size < 0 || size > (4 * UNITS_PER_WORD)); |
if (!AGGREGATE_TYPE_P (type) && |
- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE)) |
- /* All simple types are returned in registers. |
- For AAPCS, complex types are treated the same as aggregates. */ |
- return 0; |
+ (TREE_CODE (type) != VECTOR_TYPE)) |
+ /* All simple types are returned in registers. */ |
+ return false; |
if (arm_abi != ARM_ABI_APCS) |
{ |
@@ -2971,7 +3457,7 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
the aggregate is either huge or of variable size, and in either case |
we will want to return it via memory and not in a register. */ |
if (size < 0 || size > UNITS_PER_WORD) |
- return 1; |
+ return true; |
if (TREE_CODE (type) == RECORD_TYPE) |
{ |
@@ -2991,18 +3477,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
continue; |
if (field == NULL) |
- return 0; /* An empty structure. Allowed by an extension to ANSI C. */ |
+ return false; /* An empty structure. Allowed by an extension to ANSI C. */ |
/* Check that the first field is valid for returning in a register. */ |
/* ... Floats are not allowed */ |
if (FLOAT_TYPE_P (TREE_TYPE (field))) |
- return 1; |
+ return true; |
/* ... Aggregates that are not themselves valid for returning in |
a register are not allowed. */ |
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) |
- return 1; |
+ return true; |
/* Now check the remaining fields, if any. Only bitfields are allowed, |
since they are not addressable. */ |
@@ -3014,10 +3500,10 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
continue; |
if (!DECL_BIT_FIELD_TYPE (field)) |
- return 1; |
+ return true; |
} |
- return 0; |
+ return false; |
} |
if (TREE_CODE (type) == UNION_TYPE) |
@@ -3034,18 +3520,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) |
continue; |
if (FLOAT_TYPE_P (TREE_TYPE (field))) |
- return 1; |
+ return true; |
if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) |
- return 1; |
+ return true; |
} |
- return 0; |
+ return false; |
} |
#endif /* not ARM_WINCE */ |
/* Return all other types in memory. */ |
- return 1; |
+ return true; |
} |
/* Indicate whether or not words of a double are in big-endian order. */ |
@@ -3070,14 +3556,770 @@ arm_float_words_big_endian (void) |
return 1; |
} |
+const struct pcs_attribute_arg |
+{ |
+ const char *arg; |
+ enum arm_pcs value; |
+} pcs_attribute_args[] = |
+ { |
+ {"aapcs", ARM_PCS_AAPCS}, |
+ {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, |
+#if 0 |
+ /* We could recognize these, but changes would be needed elsewhere |
+ * to implement them. */ |
+ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, |
+ {"atpcs", ARM_PCS_ATPCS}, |
+ {"apcs", ARM_PCS_APCS}, |
+#endif |
+ {NULL, ARM_PCS_UNKNOWN} |
+ }; |
+ |
+static enum arm_pcs |
+arm_pcs_from_attribute (tree attr) |
+{ |
+ const struct pcs_attribute_arg *ptr; |
+ const char *arg; |
+ |
+ /* Get the value of the argument. */ |
+ if (TREE_VALUE (attr) == NULL_TREE |
+ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) |
+ return ARM_PCS_UNKNOWN; |
+ |
+ arg = TREE_STRING_POINTER (TREE_VALUE (attr)); |
+ |
+ /* Check it against the list of known arguments. */ |
+ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) |
+ if (streq (arg, ptr->arg)) |
+ return ptr->value; |
+ |
+ /* An unrecognized interrupt type. */ |
+ return ARM_PCS_UNKNOWN; |
+} |
+ |
+/* Get the PCS variant to use for this call. TYPE is the function's type |
+ specification, DECL is the specific declartion. DECL may be null if |
+ the call could be indirect or if this is a library call. */ |
+static enum arm_pcs |
+arm_get_pcs_model (const_tree type, const_tree decl) |
+{ |
+ bool user_convention = false; |
+ enum arm_pcs user_pcs = arm_pcs_default; |
+ tree attr; |
+ |
+ gcc_assert (type); |
+ |
+ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); |
+ if (attr) |
+ { |
+ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); |
+ user_convention = true; |
+ } |
+ |
+ if (TARGET_AAPCS_BASED) |
+ { |
+ /* Detect varargs functions. These always use the base rules |
+ (no argument is ever a candidate for a co-processor |
+ register). */ |
+ bool base_rules = (TYPE_ARG_TYPES (type) != 0 |
+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type))) |
+ != void_type_node)); |
+ |
+ if (user_convention) |
+ { |
+ if (user_pcs > ARM_PCS_AAPCS_LOCAL) |
+ sorry ("Non-AAPCS derived PCS variant"); |
+ else if (base_rules && user_pcs != ARM_PCS_AAPCS) |
+ error ("Variadic functions must use the base AAPCS variant"); |
+ } |
+ |
+ if (base_rules) |
+ return ARM_PCS_AAPCS; |
+ else if (user_convention) |
+ return user_pcs; |
+ else if (decl && flag_unit_at_a_time) |
+ { |
+ /* Local functions never leak outside this compilation unit, |
+ so we are free to use whatever conventions are |
+ appropriate. */ |
+ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ |
+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); |
+ if (i && i->local) |
+ return ARM_PCS_AAPCS_LOCAL; |
+ } |
+ } |
+ else if (user_convention && user_pcs != arm_pcs_default) |
+ sorry ("PCS variant"); |
+ |
+ /* For everything else we use the target's default. */ |
+ return arm_pcs_default; |
+} |
+ |
+ |
+static void |
+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, |
+ const_tree fntype ATTRIBUTE_UNUSED, |
+ rtx libcall ATTRIBUTE_UNUSED, |
+ const_tree fndecl ATTRIBUTE_UNUSED) |
+{ |
+ /* Record the unallocated VFP registers. */ |
+ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; |
+ pcum->aapcs_vfp_reg_alloc = 0; |
+} |
+ |
+/* Walk down the type tree of TYPE counting consecutive base elements. |
+ If *MODEP is VOIDmode, then set it to the first valid floating point |
+ type. If a non-floating point type is found, or if a floating point |
+ type that doesn't match a non-VOIDmode *MODEP is found, then return -1, |
+ otherwise return the count in the sub-tree. */ |
+static int |
+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep) |
+{ |
+ enum machine_mode mode; |
+ HOST_WIDE_INT size; |
+ |
+ switch (TREE_CODE (type)) |
+ { |
+ case REAL_TYPE: |
+ mode = TYPE_MODE (type); |
+ if (mode != DFmode && mode != SFmode) |
+ return -1; |
+ |
+ if (*modep == VOIDmode) |
+ *modep = mode; |
+ |
+ if (*modep == mode) |
+ return 1; |
+ |
+ break; |
+ |
+ case COMPLEX_TYPE: |
+ mode = TYPE_MODE (TREE_TYPE (type)); |
+ if (mode != DFmode && mode != SFmode) |
+ return -1; |
+ |
+ if (*modep == VOIDmode) |
+ *modep = mode; |
+ |
+ if (*modep == mode) |
+ return 2; |
+ |
+ break; |
+ |
+ case VECTOR_TYPE: |
+ /* Use V2SImode and V4SImode as representatives of all 64-bit |
+ and 128-bit vector types, whether or not those modes are |
+ supported with the present options. */ |
+ size = int_size_in_bytes (type); |
+ switch (size) |
+ { |
+ case 8: |
+ mode = V2SImode; |
+ break; |
+ case 16: |
+ mode = V4SImode; |
+ break; |
+ default: |
+ return -1; |
+ } |
+ |
+ if (*modep == VOIDmode) |
+ *modep = mode; |
+ |
+ /* Vector modes are considered to be opaque: two vectors are |
+ equivalent for the purposes of being homogeneous aggregates |
+ if they are the same size. */ |
+ if (*modep == mode) |
+ return 1; |
+ |
+ break; |
+ |
+ case ARRAY_TYPE: |
+ { |
+ int count; |
+ tree index = TYPE_DOMAIN (type); |
+ |
+ /* Can't handle incomplete types. */ |
+ if (!COMPLETE_TYPE_P(type)) |
+ return -1; |
+ |
+ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); |
+ if (count == -1 |
+ || !index |
+ || !TYPE_MAX_VALUE (index) |
+ || !host_integerp (TYPE_MAX_VALUE (index), 1) |
+ || !TYPE_MIN_VALUE (index) |
+ || !host_integerp (TYPE_MIN_VALUE (index), 1) |
+ || count < 0) |
+ return -1; |
+ |
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1) |
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1)); |
+ |
+ /* There must be no padding. */ |
+ if (!host_integerp (TYPE_SIZE (type), 1) |
+ || (tree_low_cst (TYPE_SIZE (type), 1) |
+ != count * GET_MODE_BITSIZE (*modep))) |
+ return -1; |
+ |
+ return count; |
+ } |
+ |
+ case RECORD_TYPE: |
+ { |
+ int count = 0; |
+ int sub_count; |
+ tree field; |
+ |
+ /* Can't handle incomplete types. */ |
+ if (!COMPLETE_TYPE_P(type)) |
+ return -1; |
+ |
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) |
+ { |
+ if (TREE_CODE (field) != FIELD_DECL) |
+ continue; |
+ |
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
+ if (sub_count < 0) |
+ return -1; |
+ count += sub_count; |
+ } |
+ |
+ /* There must be no padding. */ |
+ if (!host_integerp (TYPE_SIZE (type), 1) |
+ || (tree_low_cst (TYPE_SIZE (type), 1) |
+ != count * GET_MODE_BITSIZE (*modep))) |
+ return -1; |
+ |
+ return count; |
+ } |
+ |
+ case UNION_TYPE: |
+ case QUAL_UNION_TYPE: |
+ { |
+ /* These aren't very interesting except in a degenerate case. */ |
+ int count = 0; |
+ int sub_count; |
+ tree field; |
+ |
+ /* Can't handle incomplete types. */ |
+ if (!COMPLETE_TYPE_P(type)) |
+ return -1; |
+ |
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) |
+ { |
+ if (TREE_CODE (field) != FIELD_DECL) |
+ continue; |
+ |
+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
+ if (sub_count < 0) |
+ return -1; |
+ count = count > sub_count ? count : sub_count; |
+ } |
+ |
+ /* There must be no padding. */ |
+ if (!host_integerp (TYPE_SIZE (type), 1) |
+ || (tree_low_cst (TYPE_SIZE (type), 1) |
+ != count * GET_MODE_BITSIZE (*modep))) |
+ return -1; |
+ |
+ return count; |
+ } |
+ |
+ default: |
+ break; |
+ } |
+ |
+ return -1; |
+} |
+ |
+/* Return true if PCS_VARIANT should use VFP registers. */ |
+static bool |
+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) |
+{ |
+ if (pcs_variant == ARM_PCS_AAPCS_VFP) |
+ return true; |
+ |
+ if (pcs_variant != ARM_PCS_AAPCS_LOCAL) |
+ return false; |
+ |
+ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && |
+ (TARGET_VFP_DOUBLE || !is_double)); |
+} |
+ |
+static bool |
+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, |
+ enum machine_mode mode, const_tree type, |
+ enum machine_mode *base_mode, int *count) |
+{ |
+ enum machine_mode new_mode = VOIDmode; |
+ |
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT |
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) |
+ { |
+ *count = 1; |
+ new_mode = mode; |
+ } |
+ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
+ { |
+ *count = 2; |
+ new_mode = (mode == DCmode ? DFmode : SFmode); |
+ } |
+ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE)) |
+ { |
+ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); |
+ |
+ if (ag_count > 0 && ag_count <= 4) |
+ *count = ag_count; |
+ else |
+ return false; |
+ } |
+ else |
+ return false; |
+ |
+ |
+ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) |
+ return false; |
+ |
+ *base_mode = new_mode; |
+ return true; |
+} |
+ |
+static bool |
+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, |
+ enum machine_mode mode, const_tree type) |
+{ |
+ int count ATTRIBUTE_UNUSED; |
+ enum machine_mode ag_mode ATTRIBUTE_UNUSED; |
+ |
+ if (!use_vfp_abi (pcs_variant, false)) |
+ return false; |
+ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, |
+ &ag_mode, &count); |
+} |
+ |
+static bool |
+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
+ const_tree type) |
+{ |
+ if (!use_vfp_abi (pcum->pcs_variant, false)) |
+ return false; |
+ |
+ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, |
+ &pcum->aapcs_vfp_rmode, |
+ &pcum->aapcs_vfp_rcount); |
+} |
+ |
+static bool |
+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
+ const_tree type ATTRIBUTE_UNUSED) |
+{ |
+ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); |
+ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; |
+ int regno; |
+ |
+ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) |
+ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) |
+ { |
+ pcum->aapcs_vfp_reg_alloc = mask << regno; |
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) |
+ { |
+ int i; |
+ int rcount = pcum->aapcs_vfp_rcount; |
+ int rshift = shift; |
+ enum machine_mode rmode = pcum->aapcs_vfp_rmode; |
+ rtx par; |
+ if (!TARGET_NEON) |
+ { |
+ /* Avoid using unsupported vector modes. */ |
+ if (rmode == V2SImode) |
+ rmode = DImode; |
+ else if (rmode == V4SImode) |
+ { |
+ rmode = DImode; |
+ rcount *= 2; |
+ rshift /= 2; |
+ } |
+ } |
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); |
+ for (i = 0; i < rcount; i++) |
+ { |
+ rtx tmp = gen_rtx_REG (rmode, |
+ FIRST_VFP_REGNUM + regno + i * rshift); |
+ tmp = gen_rtx_EXPR_LIST |
+ (VOIDmode, tmp, |
+ GEN_INT (i * GET_MODE_SIZE (rmode))); |
+ XVECEXP (par, 0, i) = tmp; |
+ } |
+ |
+ pcum->aapcs_reg = par; |
+ } |
+ else |
+ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); |
+ return true; |
+ } |
+ return false; |
+} |
+ |
+static rtx |
+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, |
+ enum machine_mode mode, |
+ const_tree type ATTRIBUTE_UNUSED) |
+{ |
+ if (!use_vfp_abi (pcs_variant, false)) |
+ return false; |
+ |
+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) |
+ { |
+ int count; |
+ enum machine_mode ag_mode; |
+ int i; |
+ rtx par; |
+ int shift; |
+ |
+ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, |
+ &ag_mode, &count); |
+ |
+ if (!TARGET_NEON) |
+ { |
+ if (ag_mode == V2SImode) |
+ ag_mode = DImode; |
+ else if (ag_mode == V4SImode) |
+ { |
+ ag_mode = DImode; |
+ count *= 2; |
+ } |
+ } |
+ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); |
+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); |
+ for (i = 0; i < count; i++) |
+ { |
+ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); |
+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, |
+ GEN_INT (i * GET_MODE_SIZE (ag_mode))); |
+ XVECEXP (par, 0, i) = tmp; |
+ } |
+ |
+ return par; |
+ } |
+ |
+ return gen_rtx_REG (mode, FIRST_VFP_REGNUM); |
+} |
+ |
+static void |
+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, |
+ enum machine_mode mode ATTRIBUTE_UNUSED, |
+ const_tree type ATTRIBUTE_UNUSED) |
+{ |
+ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; |
+ pcum->aapcs_vfp_reg_alloc = 0; |
+ return; |
+} |
+ |
+#define AAPCS_CP(X) \ |
+ { \ |
+ aapcs_ ## X ## _cum_init, \ |
+ aapcs_ ## X ## _is_call_candidate, \ |
+ aapcs_ ## X ## _allocate, \ |
+ aapcs_ ## X ## _is_return_candidate, \ |
+ aapcs_ ## X ## _allocate_return_reg, \ |
+ aapcs_ ## X ## _advance \ |
+ } |
+ |
+/* Table of co-processors that can be used to pass arguments in |
+ registers. Idealy no arugment should be a candidate for more than |
+ one co-processor table entry, but the table is processed in order |
+ and stops after the first match. If that entry then fails to put |
+ the argument into a co-processor register, the argument will go on |
+ the stack. */ |
+static struct |
+{ |
+ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ |
+ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); |
+ |
+ /* Return true if an argument of mode MODE (or type TYPE if MODE is |
+ BLKmode) is a candidate for this co-processor's registers; this |
+ function should ignore any position-dependent state in |
+ CUMULATIVE_ARGS and only use call-type dependent information. */ |
+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); |
+ |
+ /* Return true if the argument does get a co-processor register; it |
+ should set aapcs_reg to an RTX of the register allocated as is |
+ required for a return from FUNCTION_ARG. */ |
+ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); |
+ |
+ /* Return true if a result of mode MODE (or type TYPE if MODE is |
+ BLKmode) is can be returned in this co-processor's registers. */ |
+ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree); |
+ |
+ /* Allocate and return an RTX element to hold the return type of a |
+ call, this routine must not fail and will only be called if |
+ is_return_candidate returned true with the same parameters. */ |
+ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree); |
+ |
+ /* Finish processing this argument and prepare to start processing |
+ the next one. */ |
+ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree); |
+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = |
+ { |
+ AAPCS_CP(vfp) |
+ }; |
+ |
+#undef AAPCS_CP |
+ |
+static int |
+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
+ tree type) |
+{ |
+ int i; |
+ |
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
+ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) |
+ return i; |
+ |
+ return -1; |
+} |
+ |
+static int |
+aapcs_select_return_coproc (const_tree type, const_tree fntype) |
+{ |
+ /* We aren't passed a decl, so we can't check that a call is local. |
+ However, it isn't clear that that would be a win anyway, since it |
+ might limit some tail-calling opportunities. */ |
+ enum arm_pcs pcs_variant; |
+ |
+ if (fntype) |
+ { |
+ const_tree fndecl = NULL_TREE; |
+ |
+ if (TREE_CODE (fntype) == FUNCTION_DECL) |
+ { |
+ fndecl = fntype; |
+ fntype = TREE_TYPE (fntype); |
+ } |
+ |
+ pcs_variant = arm_get_pcs_model (fntype, fndecl); |
+ } |
+ else |
+ pcs_variant = arm_pcs_default; |
+ |
+ if (pcs_variant != ARM_PCS_AAPCS) |
+ { |
+ int i; |
+ |
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, |
+ TYPE_MODE (type), |
+ type)) |
+ return i; |
+ } |
+ return -1; |
+} |
+ |
+static rtx |
+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type, |
+ const_tree fntype) |
+{ |
+ /* We aren't passed a decl, so we can't check that a call is local. |
+ However, it isn't clear that that would be a win anyway, since it |
+ might limit some tail-calling opportunities. */ |
+ enum arm_pcs pcs_variant; |
+ int unsignedp ATTRIBUTE_UNUSED; |
+ |
+ if (fntype) |
+ { |
+ const_tree fndecl = NULL_TREE; |
+ |
+ if (TREE_CODE (fntype) == FUNCTION_DECL) |
+ { |
+ fndecl = fntype; |
+ fntype = TREE_TYPE (fntype); |
+ } |
+ |
+ pcs_variant = arm_get_pcs_model (fntype, fndecl); |
+ } |
+ else |
+ pcs_variant = arm_pcs_default; |
+ |
+ /* Promote integer types. */ |
+ if (type && INTEGRAL_TYPE_P (type)) |
+ mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); |
+ |
+ if (pcs_variant != ARM_PCS_AAPCS) |
+ { |
+ int i; |
+ |
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, |
+ type)) |
+ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, |
+ mode, type); |
+ } |
+ |
+ /* Promotes small structs returned in a register to full-word size |
+ for big-endian AAPCS. */ |
+ if (type && arm_return_in_msb (type)) |
+ { |
+ HOST_WIDE_INT size = int_size_in_bytes (type); |
+ if (size % UNITS_PER_WORD != 0) |
+ { |
+ size += UNITS_PER_WORD - size % UNITS_PER_WORD; |
+ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); |
+ } |
+ } |
+ |
+ return gen_rtx_REG (mode, R0_REGNUM); |
+} |
+ |
+rtx |
+aapcs_libcall_value (enum machine_mode mode) |
+{ |
+ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); |
+} |
+ |
+/* Lay out a function argument using the AAPCS rules. The rule |
+ numbers referred to here are those in the AAPCS. */ |
+static void |
+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
+ tree type, int named) |
+{ |
+ int nregs, nregs2; |
+ int ncrn; |
+ |
+ /* We only need to do this once per argument. */ |
+ if (pcum->aapcs_arg_processed) |
+ return; |
+ |
+ pcum->aapcs_arg_processed = true; |
+ |
+ /* Special case: if named is false then we are handling an incoming |
+ anonymous argument which is on the stack. */ |
+ if (!named) |
+ return; |
+ |
+ /* Is this a potential co-processor register candidate? */ |
+ if (pcum->pcs_variant != ARM_PCS_AAPCS) |
+ { |
+ int slot = aapcs_select_call_coproc (pcum, mode, type); |
+ pcum->aapcs_cprc_slot = slot; |
+ |
+ /* We don't have to apply any of the rules from part B of the |
+ preparation phase, these are handled elsewhere in the |
+ compiler. */ |
+ |
+ if (slot >= 0) |
+ { |
+ /* A Co-processor register candidate goes either in its own |
+ class of registers or on the stack. */ |
+ if (!pcum->aapcs_cprc_failed[slot]) |
+ { |
+ /* C1.cp - Try to allocate the argument to co-processor |
+ registers. */ |
+ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) |
+ return; |
+ |
+ /* C2.cp - Put the argument on the stack and note that we |
+ can't assign any more candidates in this slot. We also |
+ need to note that we have allocated stack space, so that |
+ we won't later try to split a non-cprc candidate between |
+ core registers and the stack. */ |
+ pcum->aapcs_cprc_failed[slot] = true; |
+ pcum->can_split = false; |
+ } |
+ |
+ /* We didn't get a register, so this argument goes on the |
+ stack. */ |
+ gcc_assert (pcum->can_split == false); |
+ return; |
+ } |
+ } |
+ |
+ /* C3 - For double-word aligned arguments, round the NCRN up to the |
+ next even number. */ |
+ ncrn = pcum->aapcs_ncrn; |
+ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) |
+ ncrn++; |
+ |
+ nregs = ARM_NUM_REGS2(mode, type); |
+ |
+ /* Sigh, this test should really assert that nregs > 0, but a GCC |
+ extension allows empty structs and then gives them empty size; it |
+ then allows such a structure to be passed by value. For some of |
+ the code below we have to pretend that such an argument has |
+ non-zero size so that we 'locate' it correctly either in |
+ registers or on the stack. */ |
+ gcc_assert (nregs >= 0); |
+ |
+ nregs2 = nregs ? nregs : 1; |
+ |
+ /* C4 - Argument fits entirely in core registers. */ |
+ if (ncrn + nregs2 <= NUM_ARG_REGS) |
+ { |
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); |
+ pcum->aapcs_next_ncrn = ncrn + nregs; |
+ return; |
+ } |
+ |
+ /* C5 - Some core registers left and there are no arguments already |
+ on the stack: split this argument between the remaining core |
+ registers and the stack. */ |
+ if (ncrn < NUM_ARG_REGS && pcum->can_split) |
+ { |
+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); |
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS; |
+ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; |
+ return; |
+ } |
+ |
+ /* C6 - NCRN is set to 4. */ |
+ pcum->aapcs_next_ncrn = NUM_ARG_REGS; |
+ |
+ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ |
+ return; |
+} |
+ |
/* Initialize a variable CUM of type CUMULATIVE_ARGS |
for a call to a function whose data type is FNTYPE. |
For a library call, FNTYPE is NULL. */ |
void |
arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, |
- rtx libname ATTRIBUTE_UNUSED, |
+ rtx libname, |
tree fndecl ATTRIBUTE_UNUSED) |
{ |
+ /* Long call handling. */ |
+ if (fntype) |
+ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); |
+ else |
+ pcum->pcs_variant = arm_pcs_default; |
+ |
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
+ { |
+ if (arm_libcall_uses_aapcs_base (libname)) |
+ pcum->pcs_variant = ARM_PCS_AAPCS; |
+ |
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; |
+ pcum->aapcs_reg = NULL_RTX; |
+ pcum->aapcs_partial = 0; |
+ pcum->aapcs_arg_processed = false; |
+ pcum->aapcs_cprc_slot = -1; |
+ pcum->can_split = true; |
+ |
+ if (pcum->pcs_variant != ARM_PCS_AAPCS) |
+ { |
+ int i; |
+ |
+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
+ { |
+ pcum->aapcs_cprc_failed[i] = false; |
+ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); |
+ } |
+ } |
+ return; |
+ } |
+ |
+ /* Legacy ABIs */ |
+ |
/* On the ARM, the offset starts at 0. */ |
pcum->nregs = 0; |
pcum->iwmmxt_nregs = 0; |
@@ -3131,6 +4373,17 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
{ |
int nregs; |
+ /* Handle the special case quickly. Pick an arbitrary value for op2 of |
+ a call insn (op3 of a call_value insn). */ |
+ if (mode == VOIDmode) |
+ return const0_rtx; |
+ |
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
+ { |
+ aapcs_layout_arg (pcum, mode, type, named); |
+ return pcum->aapcs_reg; |
+ } |
+ |
/* Varargs vectors are treated the same as long long. |
named_count avoids having to change the way arm handles 'named' */ |
if (TARGET_IWMMXT_ABI |
@@ -3172,10 +4425,16 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
static int |
arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
- tree type, bool named ATTRIBUTE_UNUSED) |
+ tree type, bool named) |
{ |
int nregs = pcum->nregs; |
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
+ { |
+ aapcs_layout_arg (pcum, mode, type, named); |
+ return pcum->aapcs_partial; |
+ } |
+ |
if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) |
return 0; |
@@ -3187,6 +4446,39 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
return 0; |
} |
+void |
+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
+ tree type, bool named) |
+{ |
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
+ { |
+ aapcs_layout_arg (pcum, mode, type, named); |
+ |
+ if (pcum->aapcs_cprc_slot >= 0) |
+ { |
+ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, |
+ type); |
+ pcum->aapcs_cprc_slot = -1; |
+ } |
+ |
+ /* Generic stuff. */ |
+ pcum->aapcs_arg_processed = false; |
+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; |
+ pcum->aapcs_reg = NULL_RTX; |
+ pcum->aapcs_partial = 0; |
+ } |
+ else |
+ { |
+ pcum->nargs += 1; |
+ if (arm_vector_mode_supported_p (mode) |
+ && pcum->named_count > pcum->nargs |
+ && TARGET_IWMMXT_ABI) |
+ pcum->iwmmxt_nregs += 1; |
+ else |
+ pcum->nregs += ARM_NUM_REGS2 (mode, type); |
+ } |
+} |
+ |
/* Variable sized types are passed by reference. This is a GCC |
extension to the ARM ABI. */ |
@@ -3226,42 +4518,6 @@ arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) |
arm_pragma_long_calls = OFF; |
} |
-/* Table of machine attributes. */ |
-const struct attribute_spec arm_attribute_table[] = |
-{ |
- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ |
- /* Function calls made to this symbol must be done indirectly, because |
- it may lie outside of the 26 bit addressing range of a normal function |
- call. */ |
- { "long_call", 0, 0, false, true, true, NULL }, |
- /* Whereas these functions are always known to reside within the 26 bit |
- addressing range. */ |
- { "short_call", 0, 0, false, true, true, NULL }, |
- /* Interrupt Service Routines have special prologue and epilogue requirements. */ |
- { "isr", 0, 1, false, false, false, arm_handle_isr_attribute }, |
- { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute }, |
- { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute }, |
-#ifdef ARM_PE |
- /* ARM/PE has three new attributes: |
- interfacearm - ? |
- dllexport - for exporting a function/variable that will live in a dll |
- dllimport - for importing a function/variable from a dll |
- |
- Microsoft allows multiple declspecs in one __declspec, separating |
- them with spaces. We do NOT support this. Instead, use __declspec |
- multiple times. |
- */ |
- { "dllimport", 0, 0, true, false, false, NULL }, |
- { "dllexport", 0, 0, true, false, false, NULL }, |
- { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute }, |
-#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES |
- { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, |
- { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
- { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, |
-#endif |
- { NULL, 0, 0, false, false, false, NULL } |
-}; |
- |
/* Handle an attribute requiring a FUNCTION_DECL; |
arguments as in struct attribute_spec.handler. */ |
static tree |
@@ -3270,8 +4526,8 @@ arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, |
{ |
if (TREE_CODE (*node) != FUNCTION_DECL) |
{ |
- warning (OPT_Wattributes, "%qs attribute only applies to functions", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute only applies to functions", |
+ name); |
*no_add_attrs = true; |
} |
@@ -3288,8 +4544,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, |
{ |
if (TREE_CODE (*node) != FUNCTION_DECL) |
{ |
- warning (OPT_Wattributes, "%qs attribute only applies to functions", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute only applies to functions", |
+ name); |
*no_add_attrs = true; |
} |
/* FIXME: the argument if any is checked for type attributes; |
@@ -3302,8 +4558,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, |
{ |
if (arm_isr_value (args) == ARM_FT_UNKNOWN) |
{ |
- warning (OPT_Wattributes, "%qs attribute ignored", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute ignored", |
+ name); |
*no_add_attrs = true; |
} |
} |
@@ -3330,8 +4586,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, |
} |
else |
{ |
- warning (OPT_Wattributes, "%qs attribute ignored", |
- IDENTIFIER_POINTER (name)); |
+ warning (OPT_Wattributes, "%qE attribute ignored", |
+ name); |
} |
} |
} |
@@ -3339,6 +4595,20 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, |
return NULL_TREE; |
} |
+/* Handle a "pcs" attribute; arguments as in struct |
+ attribute_spec.handler. */ |
+static tree |
+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, |
+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
+{ |
+ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) |
+ { |
+ warning (OPT_Wattributes, "%qE attribute ignored", name); |
+ *no_add_attrs = true; |
+ } |
+ return NULL_TREE; |
+} |
+ |
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
/* Handle the "notshared" attribute. This attribute is another way of |
requesting hidden visibility. ARM's compiler supports |
@@ -3500,7 +4770,7 @@ arm_is_long_call_p (tree decl) |
/* Return nonzero if it is ok to make a tail-call to DECL. */ |
static bool |
-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) |
+arm_function_ok_for_sibcall (tree decl, tree exp) |
{ |
unsigned long func_type; |
@@ -3533,6 +4803,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) |
if (IS_INTERRUPT (func_type)) |
return false; |
+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
+ { |
+ /* Check that the return value locations are the same. For |
+ example that we aren't returning a value from the sibling in |
+ a VFP register but then need to transfer it to a core |
+ register. */ |
+ rtx a, b; |
+ |
+ a = arm_function_value (TREE_TYPE (exp), decl, false); |
+ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
+ cfun->decl, false); |
+ if (!rtx_equal_p (a, b)) |
+ return false; |
+ } |
+ |
/* Never tailcall if function may be called with a misaligned SP. */ |
if (IS_STACKALIGN (func_type)) |
return false; |
@@ -3580,7 +4865,7 @@ require_pic_register (void) |
/* Play games to avoid marking the function as needing pic |
if we are being called as part of the cost-estimation |
process. */ |
- if (current_ir_type () != IR_GIMPLE) |
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) |
crtl->uses_pic_offset_table = 1; |
} |
else |
@@ -3593,7 +4878,7 @@ require_pic_register (void) |
/* Play games to avoid marking the function as needing pic |
if we are being called as part of the cost-estimation |
process. */ |
- if (current_ir_type () != IR_GIMPLE) |
+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) |
{ |
crtl->uses_pic_offset_table = 1; |
start_sequence (); |
@@ -3602,7 +4887,11 @@ require_pic_register (void) |
seq = get_insns (); |
end_sequence (); |
- emit_insn_after (seq, entry_of_function ()); |
+ /* We can be called during expansion of PHI nodes, where |
+ we can't yet emit instructions directly in the final |
+ insn stream. Queue the insns on the entry edge, they will |
+ be committed after everything else is expanded. */ |
+ insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); |
} |
} |
} |
@@ -3634,10 +4923,8 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) |
else |
address = reg; |
- if (TARGET_ARM) |
- emit_insn (gen_pic_load_addr_arm (address, orig)); |
- else if (TARGET_THUMB2) |
- emit_insn (gen_pic_load_addr_thumb2 (address, orig)); |
+ if (TARGET_32BIT) |
+ emit_insn (gen_pic_load_addr_32bit (address, orig)); |
else /* TARGET_THUMB1 */ |
emit_insn (gen_pic_load_addr_thumb1 (address, orig)); |
@@ -3814,7 +5101,7 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) |
{ |
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); |
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); |
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); |
emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); |
@@ -3837,29 +5124,13 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) |
UNSPEC_GOTSYM_OFF); |
pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
- if (TARGET_ARM) |
- { |
- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); |
- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); |
- } |
- else if (TARGET_THUMB2) |
+ if (TARGET_32BIT) |
{ |
- /* Thumb-2 only allows very limited access to the PC. Calculate the |
- address in a temporary register. */ |
- if (arm_pic_register != INVALID_REGNUM) |
- { |
- pic_tmp = gen_rtx_REG (SImode, |
- thumb_find_work_register (saved_regs)); |
- } |
+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); |
+ if (TARGET_ARM) |
+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); |
else |
- { |
- gcc_assert (can_create_pseudo_p ()); |
- pic_tmp = gen_reg_rtx (Pmode); |
- } |
- |
- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx)); |
- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno)); |
- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp)); |
+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); |
} |
else /* TARGET_THUMB1 */ |
{ |
@@ -3920,8 +5191,8 @@ pcrel_constant_p (rtx x) |
/* Return nonzero if X is a valid ARM state address operand. */ |
int |
-arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer, |
- int strict_p) |
+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, |
+ int strict_p) |
{ |
bool use_ldrd; |
enum rtx_code code = GET_CODE (x); |
@@ -4005,7 +5276,7 @@ arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer, |
} |
/* Return nonzero if X is a valid Thumb-2 address operand. */ |
-int |
+static int |
thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) |
{ |
bool use_ldrd; |
@@ -4131,6 +5402,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, |
if (GET_MODE_SIZE (mode) <= 4 |
&& ! (arm_arch4 |
&& (mode == HImode |
+ || mode == HFmode |
|| (mode == QImode && outer == SIGN_EXTEND)))) |
{ |
if (code == MULT) |
@@ -4159,13 +5431,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, |
load. */ |
if (arm_arch4) |
{ |
- if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode)) |
+ if (mode == HImode |
+ || mode == HFmode |
+ || (outer == SIGN_EXTEND && mode == QImode)) |
range = 256; |
else |
range = 4096; |
} |
else |
- range = (mode == HImode) ? 4095 : 4096; |
+ range = (mode == HImode || mode == HFmode) ? 4095 : 4096; |
return (code == CONST_INT |
&& INTVAL (index) < range |
@@ -4226,15 +5500,17 @@ thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p) |
if (mode == DImode || mode == DFmode) |
{ |
- HOST_WIDE_INT val = INTVAL (index); |
- /* ??? Can we assume ldrd for thumb2? */ |
- /* Thumb-2 ldrd only has reg+const addressing modes. */ |
- if (code != CONST_INT) |
+ if (code == CONST_INT) |
+ { |
+ HOST_WIDE_INT val = INTVAL (index); |
+ /* ??? Can we assume ldrd for thumb2? */ |
+ /* Thumb-2 ldrd only has reg+const addressing modes. */ |
+ /* ldrd supports offsets of +-1020. |
+ However the ldr fallback does not. */ |
+ return val > -256 && val < 256 && (val & 3) == 0; |
+ } |
+ else |
return 0; |
- |
- /* ldrd supports offsets of +-1020. |
- However the ldr fallback does not. */ |
- return val > -256 && val < 256 && (val & 3) == 0; |
} |
if (code == MULT) |
@@ -4311,7 +5587,7 @@ thumb1_index_register_rtx_p (rtx x, int strict_p) |
addresses based on the frame pointer or arg pointer until the |
reload pass starts. This is so that eliminating such addresses |
into stack based ones won't produce impossible code. */ |
-int |
+static int |
thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) |
{ |
/* ??? Not clear if this is right. Experiment. */ |
@@ -4336,7 +5612,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) |
return 1; |
/* This is PC relative data after arm_reorg runs. */ |
- else if (GET_MODE_SIZE (mode) >= 4 && reload_completed |
+ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) |
+ && reload_completed |
&& (GET_CODE (x) == LABEL_REF |
|| (GET_CODE (x) == CONST |
&& GET_CODE (XEXP (x, 0)) == PLUS |
@@ -4425,6 +5702,17 @@ thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val) |
} |
} |
+bool |
+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p) |
+{ |
+ if (TARGET_ARM) |
+ return arm_legitimate_address_outer_p (mode, x, SET, strict_p); |
+ else if (TARGET_THUMB2) |
+ return thumb2_legitimate_address_p (mode, x, strict_p); |
+ else /* if (TARGET_THUMB1) */ |
+ return thumb1_legitimate_address_p (mode, x, strict_p); |
+} |
+ |
/* Build the SYMBOL_REF for __tls_get_addr. */ |
static GTY(()) rtx tls_get_addr_libfunc; |
@@ -4499,14 +5787,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) |
if (TARGET_ARM) |
emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); |
else if (TARGET_THUMB2) |
- { |
- rtx tmp; |
- /* Thumb-2 only allows very limited access to the PC. Calculate |
- the address in a temporary register. */ |
- tmp = gen_reg_rtx (SImode); |
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); |
- emit_insn (gen_addsi3(reg, reg, tmp)); |
- } |
+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
else /* TARGET_THUMB1 */ |
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
@@ -4562,15 +5843,7 @@ legitimize_tls_address (rtx x, rtx reg) |
if (TARGET_ARM) |
emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); |
else if (TARGET_THUMB2) |
- { |
- rtx tmp; |
- /* Thumb-2 only allows very limited access to the PC. Calculate |
- the address in a temporary register. */ |
- tmp = gen_reg_rtx (SImode); |
- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); |
- emit_insn (gen_addsi3(reg, reg, tmp)); |
- emit_move_insn (reg, gen_const_mem (SImode, reg)); |
- } |
+ emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); |
else |
{ |
emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
@@ -4601,6 +5874,14 @@ legitimize_tls_address (rtx x, rtx reg) |
rtx |
arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) |
{ |
+ if (!TARGET_ARM) |
+ { |
+ /* TODO: legitimize_address for Thumb2. */ |
+ if (TARGET_THUMB2) |
+ return x; |
+ return thumb_legitimize_address (x, orig_x, mode); |
+ } |
+ |
if (arm_tls_symbol_p (x)) |
return legitimize_tls_address (x, NULL_RTX); |
@@ -4652,7 +5933,7 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode) |
} |
/* XXX We don't allow MINUS any more -- see comment in |
- arm_legitimate_address_p (). */ |
+ arm_legitimate_address_outer_p (). */ |
else if (GET_CODE (x) == MINUS) |
{ |
rtx xop0 = XEXP (x, 0); |
@@ -4799,7 +6080,7 @@ thumb_legitimize_reload_address (rtx *x_p, |
x = copy_rtx (x); |
push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), |
- Pmode, VOIDmode, 0, 0, opnum, type); |
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); |
return x; |
} |
@@ -4816,7 +6097,7 @@ thumb_legitimize_reload_address (rtx *x_p, |
x = copy_rtx (x); |
push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), |
- Pmode, VOIDmode, 0, 0, opnum, type); |
+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); |
return x; |
} |
@@ -4944,9 +6225,18 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) |
else if ((outer == PLUS || outer == COMPARE) |
&& INTVAL (x) < 256 && INTVAL (x) > -256) |
return 0; |
- else if (outer == AND |
+ else if ((outer == IOR || outer == XOR || outer == AND) |
&& INTVAL (x) < 256 && INTVAL (x) >= -256) |
return COSTS_N_INSNS (1); |
+ else if (outer == AND) |
+ { |
+ int i; |
+ /* This duplicates the tests in the andsi3 expander. */ |
+ for (i = 9; i <= 31; i++) |
+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) |
+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) |
+ return COSTS_N_INSNS (2); |
+ } |
else if (outer == ASHIFT || outer == ASHIFTRT |
|| outer == LSHIFTRT) |
return 0; |
@@ -5035,7 +6325,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
case UMOD: |
if (TARGET_HARD_FLOAT && mode == SFmode) |
*total = COSTS_N_INSNS (2); |
- else if (TARGET_HARD_FLOAT && mode == DFmode) |
+ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) |
*total = COSTS_N_INSNS (4); |
else |
*total = COSTS_N_INSNS (20); |
@@ -5113,7 +6403,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE |
@@ -5154,10 +6446,17 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
return true; |
} |
+ /* A shift as a part of RSB costs no more than RSB itself. */ |
+ if (GET_CODE (XEXP (x, 0)) == MULT |
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
+ { |
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed); |
+ *total += rtx_cost (XEXP (x, 1), code, speed); |
+ return true; |
+ } |
+ |
if (subcode == MULT |
- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT |
- && ((INTVAL (XEXP (XEXP (x, 1), 1)) & |
- (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)) |
+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) |
{ |
*total += rtx_cost (XEXP (x, 0), code, speed); |
*total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed); |
@@ -5193,9 +6492,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
multiplication by a power of two, so that we fall down into |
the code below. */ |
if (GET_CODE (XEXP (x, 0)) == MULT |
- && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) & |
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) |
+ && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
{ |
/* The cost comes from the cost of the multiply. */ |
return false; |
@@ -5203,7 +6500,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE |
@@ -5278,9 +6577,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
} |
if (subcode == MULT |
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) & |
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) |
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
{ |
*total += rtx_cost (XEXP (x, 1), code, speed); |
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); |
@@ -5318,7 +6615,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
case NEG: |
if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -5337,9 +6636,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
|| subcode == LSHIFTRT |
|| subcode == ROTATE || subcode == ROTATERT |
|| (subcode == MULT |
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) & |
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))) |
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) |
{ |
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); |
/* Register shifts cost an extra cycle. */ |
@@ -5447,9 +6744,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
} |
if (subcode == MULT |
- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
- && ((INTVAL (XEXP (XEXP (x, 0), 1)) & |
- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)) |
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
{ |
*total += rtx_cost (XEXP (x, 1), code, speed); |
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed); |
@@ -5469,9 +6764,11 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
return true; |
case ABS: |
- if (GET_MODE_CLASS (mode == MODE_FLOAT)) |
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -5574,7 +6871,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
return true; |
case CONST_DOUBLE: |
- if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)) |
+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) |
+ && (mode == SFmode || !TARGET_VFP_SINGLE)) |
*total = COSTS_N_INSNS (1); |
else |
*total = COSTS_N_INSNS (4); |
@@ -5649,7 +6947,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
return false; |
case MINUS: |
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) |
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
+ && (mode == SFmode || !TARGET_VFP_SINGLE)) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -5679,12 +6978,23 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
return false; |
case PLUS: |
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) |
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
+ && (mode == SFmode || !TARGET_VFP_SINGLE)) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
} |
+ /* A shift as a part of ADD costs nothing. */ |
+ if (GET_CODE (XEXP (x, 0)) == MULT |
+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
+ { |
+ *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); |
+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false); |
+ *total += rtx_cost (XEXP (x, 1), code, false); |
+ return true; |
+ } |
+ |
/* Fall through */ |
case AND: case XOR: case IOR: |
if (mode == SImode) |
@@ -5709,7 +7019,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
return false; |
case NEG: |
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) |
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
+ && (mode == SFmode || !TARGET_VFP_SINGLE)) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -5733,7 +7044,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
return false; |
case ABS: |
- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT) |
+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
+ && (mode == SFmode || !TARGET_VFP_SINGLE)) |
*total = COSTS_N_INSNS (1); |
else |
*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); |
@@ -5778,7 +7090,10 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
case CONST_INT: |
if (const_ok_for_arm (INTVAL (x))) |
- *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0); |
+ /* A multiplication by a constant requires another instruction |
+ to load the constant to a register. */ |
+ *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) |
+ ? 1 : 0); |
else if (const_ok_for_arm (~INTVAL (x))) |
*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); |
else if (const_ok_for_arm (-INTVAL (x))) |
@@ -5825,10 +7140,12 @@ arm_rtx_costs (rtx x, int code, int outer_code, int *total, |
bool speed) |
{ |
if (!speed) |
- return arm_size_rtx_costs (x, code, outer_code, total); |
+ return arm_size_rtx_costs (x, (enum rtx_code) code, |
+ (enum rtx_code) outer_code, total); |
else |
- return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total, |
- speed); |
+ return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code, |
+ (enum rtx_code) outer_code, |
+ total, speed); |
} |
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not |
@@ -5950,7 +7267,9 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -6107,7 +7426,9 @@ arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
{ |
- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) |
+ if (TARGET_HARD_FLOAT |
+ && (mode == SFmode |
+ || (mode == DFmode && !TARGET_VFP_SINGLE))) |
{ |
*total = COSTS_N_INSNS (1); |
return false; |
@@ -6135,9 +7456,9 @@ arm_arm_address_cost (rtx x) |
if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) |
return 10; |
- if (c == PLUS || c == MINUS) |
+ if (c == PLUS) |
{ |
- if (GET_CODE (XEXP (x, 0)) == CONST_INT) |
+ if (GET_CODE (XEXP (x, 1)) == CONST_INT) |
return 2; |
if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) |
@@ -6753,25 +8074,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode, |
} |
} |
-/* Initialize a vector with non-constant elements. FIXME: We can do better |
- than the current implementation (building a vector on the stack and then |
- loading it) in many cases. See rs6000.c. */ |
+/* If VALS is a vector constant that can be loaded into a register |
+ using VDUP, generate instructions to do so and return an RTX to |
+ assign to the register. Otherwise return NULL_RTX. */ |
+ |
+static rtx |
+neon_vdup_constant (rtx vals) |
+{ |
+ enum machine_mode mode = GET_MODE (vals); |
+ enum machine_mode inner_mode = GET_MODE_INNER (mode); |
+ int n_elts = GET_MODE_NUNITS (mode); |
+ bool all_same = true; |
+ rtx x; |
+ int i; |
+ |
+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) |
+ return NULL_RTX; |
+ |
+ for (i = 0; i < n_elts; ++i) |
+ { |
+ x = XVECEXP (vals, 0, i); |
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) |
+ all_same = false; |
+ } |
+ |
+ if (!all_same) |
+ /* The elements are not all the same. We could handle repeating |
+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t |
+ {0, C, 0, C, 0, C, 0, C} which can be loaded using |
+ vdup.i16). */ |
+ return NULL_RTX; |
+ |
+ /* We can load this constant by using VDUP and a constant in a |
+ single ARM register. This will be cheaper than a vector |
+ load. */ |
+ |
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), |
+ UNSPEC_VDUP_N); |
+} |
+ |
+/* Generate code to load VALS, which is a PARALLEL containing only |
+ constants (for vec_init) or CONST_VECTOR, efficiently into a |
+ register. Returns an RTX to copy into the register, or NULL_RTX |
+ for a PARALLEL that can not be converted into a CONST_VECTOR. */ |
+ |
+rtx |
+neon_make_constant (rtx vals) |
+{ |
+ enum machine_mode mode = GET_MODE (vals); |
+ rtx target; |
+ rtx const_vec = NULL_RTX; |
+ int n_elts = GET_MODE_NUNITS (mode); |
+ int n_const = 0; |
+ int i; |
+ |
+ if (GET_CODE (vals) == CONST_VECTOR) |
+ const_vec = vals; |
+ else if (GET_CODE (vals) == PARALLEL) |
+ { |
+ /* A CONST_VECTOR must contain only CONST_INTs and |
+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). |
+ Only store valid constants in a CONST_VECTOR. */ |
+ for (i = 0; i < n_elts; ++i) |
+ { |
+ rtx x = XVECEXP (vals, 0, i); |
+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) |
+ n_const++; |
+ } |
+ if (n_const == n_elts) |
+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); |
+ } |
+ else |
+ gcc_unreachable (); |
+ |
+ if (const_vec != NULL |
+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) |
+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */ |
+ return const_vec; |
+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX) |
+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON |
+ pipeline cycle; creating the constant takes one or two ARM |
+ pipeline cycles. */ |
+ return target; |
+ else if (const_vec != NULL_RTX) |
+ /* Load from constant pool. On Cortex-A8 this takes two cycles |
+ (for either double or quad vectors). We can not take advantage |
+ of single-cycle VLD1 because we need a PC-relative addressing |
+ mode. */ |
+ return const_vec; |
+ else |
+ /* A PARALLEL containing something not valid inside CONST_VECTOR. |
+ We can not construct an initializer. */ |
+ return NULL_RTX; |
+} |
+ |
+/* Initialize vector TARGET to VALS. */ |
void |
neon_expand_vector_init (rtx target, rtx vals) |
{ |
enum machine_mode mode = GET_MODE (target); |
- enum machine_mode inner = GET_MODE_INNER (mode); |
- unsigned int i, n_elts = GET_MODE_NUNITS (mode); |
- rtx mem; |
+ enum machine_mode inner_mode = GET_MODE_INNER (mode); |
+ int n_elts = GET_MODE_NUNITS (mode); |
+ int n_var = 0, one_var = -1; |
+ bool all_same = true; |
+ rtx x, mem; |
+ int i; |
+ |
+ for (i = 0; i < n_elts; ++i) |
+ { |
+ x = XVECEXP (vals, 0, i); |
+ if (!CONSTANT_P (x)) |
+ ++n_var, one_var = i; |
+ |
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) |
+ all_same = false; |
+ } |
+ |
+ if (n_var == 0) |
+ { |
+ rtx constant = neon_make_constant (vals); |
+ if (constant != NULL_RTX) |
+ { |
+ emit_move_insn (target, constant); |
+ return; |
+ } |
+ } |
- gcc_assert (VECTOR_MODE_P (mode)); |
+ /* Splat a single non-constant element if we can. */ |
+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4) |
+ { |
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
+ emit_insn (gen_rtx_SET (VOIDmode, target, |
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, x), |
+ UNSPEC_VDUP_N))); |
+ return; |
+ } |
+ |
+ /* One field is non-constant. Load constant then overwrite varying |
+ field. This is more efficient than using the stack. */ |
+ if (n_var == 1) |
+ { |
+ rtx copy = copy_rtx (vals); |
+ rtvec ops; |
+ /* Load constant part of vector, substitute neighboring value for |
+ varying element. */ |
+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); |
+ neon_expand_vector_init (target, copy); |
+ |
+ /* Insert variable. */ |
+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); |
+ ops = gen_rtvec (3, x, target, GEN_INT (one_var)); |
+ emit_insn (gen_rtx_SET (VOIDmode, target, |
+ gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); |
+ return; |
+ } |
+ |
+ /* Construct the vector in memory one field at a time |
+ and load the whole vector. */ |
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); |
for (i = 0; i < n_elts; i++) |
- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)), |
- XVECEXP (vals, 0, i)); |
- |
+ emit_move_insn (adjust_address_nv (mem, inner_mode, |
+ i * GET_MODE_SIZE (inner_mode)), |
+ XVECEXP (vals, 0, i)); |
emit_move_insn (target, mem); |
} |
@@ -6930,10 +8397,13 @@ arm_coproc_mem_operand (rtx op, bool wb) |
} |
/* Return TRUE if OP is a memory operand which we can load or store a vector |
- to/from. If CORE is true, we're moving from ARM registers not Neon |
- registers. */ |
+ to/from. TYPE is one of the following values: |
+ 0 - Vector load/stor (vldr) |
+ 1 - Core registers (ldm) |
+ 2 - Element/structure loads (vld1) |
+ */ |
int |
-neon_vector_mem_operand (rtx op, bool core) |
+neon_vector_mem_operand (rtx op, int type) |
{ |
rtx ind; |
@@ -6966,23 +8436,15 @@ neon_vector_mem_operand (rtx op, bool core) |
return arm_address_register_rtx_p (ind, 0); |
/* Allow post-increment with Neon registers. */ |
- if (!core && GET_CODE (ind) == POST_INC) |
+ if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC)) |
return arm_address_register_rtx_p (XEXP (ind, 0), 0); |
-#if 0 |
- /* FIXME: We can support this too if we use VLD1/VST1. */ |
- if (!core |
- && GET_CODE (ind) == POST_MODIFY |
- && arm_address_register_rtx_p (XEXP (ind, 0), 0) |
- && GET_CODE (XEXP (ind, 1)) == PLUS |
- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) |
- ind = XEXP (ind, 1); |
-#endif |
+ /* FIXME: vld1 allows register post-modify. */ |
/* Match: |
(plus (reg) |
(const)). */ |
- if (!core |
+ if (type == 0 |
&& GET_CODE (ind) == PLUS |
&& GET_CODE (XEXP (ind, 0)) == REG |
&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) |
@@ -7049,10 +8511,19 @@ arm_eliminable_register (rtx x) |
enum reg_class |
coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) |
{ |
+ if (mode == HFmode) |
+ { |
+ if (!TARGET_NEON_FP16) |
+ return GENERAL_REGS; |
+ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) |
+ return NO_REGS; |
+ return GENERAL_REGS; |
+ } |
+ |
if (TARGET_NEON |
&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) |
- && neon_vector_mem_operand (x, FALSE)) |
+ && neon_vector_mem_operand (x, 0)) |
return NO_REGS; |
if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) |
@@ -7330,28 +8801,21 @@ tls_mentioned_p (rtx x) |
} |
} |
-/* Must not copy a SET whose source operand is PC-relative. */ |
+/* Must not copy any rtx that uses a pc-relative address. */ |
+ |
+static int |
+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) |
+{ |
+ if (GET_CODE (*x) == UNSPEC |
+ && XINT (*x, 1) == UNSPEC_PIC_BASE) |
+ return 1; |
+ return 0; |
+} |
static bool |
arm_cannot_copy_insn_p (rtx insn) |
{ |
- rtx pat = PATTERN (insn); |
- |
- if (GET_CODE (pat) == SET) |
- { |
- rtx rhs = SET_SRC (pat); |
- |
- if (GET_CODE (rhs) == UNSPEC |
- && XINT (rhs, 1) == UNSPEC_PIC_BASE) |
- return TRUE; |
- |
- if (GET_CODE (rhs) == MEM |
- && GET_CODE (XEXP (rhs, 0)) == UNSPEC |
- && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE) |
- return TRUE; |
- } |
- |
- return FALSE; |
+ return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); |
} |
enum rtx_code |
@@ -7412,7 +8876,7 @@ adjacent_mem_locations (rtx a, rtx b) |
/* Don't accept any offset that will require multiple |
instructions to handle, since this would cause the |
arith_adjacentmem pattern to output an overlong sequence. */ |
- if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1)) |
+ if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) |
return 0; |
/* Don't allow an eliminable register: register elimination can make |
@@ -8330,7 +9794,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) |
/* A compare with a shifted operand. Because of canonicalization, the |
comparison will have to be swapped when we emit the assembler. */ |
- if (GET_MODE (y) == SImode && GET_CODE (y) == REG |
+ if (GET_MODE (y) == SImode |
+ && (REG_P (y) || (GET_CODE (y) == SUBREG)) |
&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT |
|| GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE |
|| GET_CODE (x) == ROTATERT)) |
@@ -8338,7 +9803,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y) |
/* This operation is performed swapped, but since we only rely on the Z |
flag we don't need an additional mode. */ |
- if (GET_MODE (y) == SImode && REG_P (y) |
+ if (GET_MODE (y) == SImode |
+ && (REG_P (y) || (GET_CODE (y) == SUBREG)) |
&& GET_CODE (x) == NEG |
&& (op == EQ || op == NE)) |
return CC_Zmode; |
@@ -10184,9 +11650,14 @@ vfp_emit_fstmd (int base_reg, int count) |
XVECEXP (par, 0, 0) |
= gen_rtx_SET (VOIDmode, |
- gen_frame_mem (BLKmode, |
- gen_rtx_PRE_DEC (BLKmode, |
- stack_pointer_rtx)), |
+ gen_frame_mem |
+ (BLKmode, |
+ gen_rtx_PRE_MODIFY (Pmode, |
+ stack_pointer_rtx, |
+ plus_constant |
+ (stack_pointer_rtx, |
+ - (count * 8))) |
+ ), |
gen_rtx_UNSPEC (BLKmode, |
gen_rtvec (1, reg), |
UNSPEC_PUSH_MULT)); |
@@ -10218,8 +11689,7 @@ vfp_emit_fstmd (int base_reg, int count) |
} |
par = emit_insn (par); |
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, |
- REG_NOTES (par)); |
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); |
RTX_FRAME_RELATED_P (par) = 1; |
return count * 8; |
@@ -10273,11 +11743,14 @@ output_call (rtx *operands) |
return ""; |
} |
-/* Output a 'call' insn that is a reference in memory. */ |
+/* Output a 'call' insn that is a reference in memory. This is |
+ disabled for ARMv5 and we prefer a blx instead because otherwise |
+ there's a significant performance overhead. */ |
const char * |
output_call_mem (rtx *operands) |
{ |
- if (TARGET_INTERWORK && !arm_arch5) |
+ gcc_assert (!arm_arch5); |
+ if (TARGET_INTERWORK) |
{ |
output_asm_insn ("ldr%?\t%|ip, %0", operands); |
output_asm_insn ("mov%?\t%|lr, %|pc", operands); |
@@ -10289,16 +11762,11 @@ output_call_mem (rtx *operands) |
first instruction. It's safe to use IP as the target of the |
load since the call will kill it anyway. */ |
output_asm_insn ("ldr%?\t%|ip, %0", operands); |
- if (arm_arch5) |
- output_asm_insn ("blx%?\t%|ip", operands); |
+ output_asm_insn ("mov%?\t%|lr, %|pc", operands); |
+ if (arm_arch4t) |
+ output_asm_insn ("bx%?\t%|ip", operands); |
else |
- { |
- output_asm_insn ("mov%?\t%|lr, %|pc", operands); |
- if (arm_arch4t) |
- output_asm_insn ("bx%?\t%|ip", operands); |
- else |
- output_asm_insn ("mov%?\t%|pc, %|ip", operands); |
- } |
+ output_asm_insn ("mov%?\t%|pc, %|ip", operands); |
} |
else |
{ |
@@ -10385,14 +11853,23 @@ output_mov_long_double_arm_from_arm (rtx *operands) |
return ""; |
} |
- |
-/* Emit a MOVW/MOVT pair. */ |
-void arm_emit_movpair (rtx dest, rtx src) |
-{ |
- emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); |
- emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); |
-} |
- |
+void |
+arm_emit_movpair (rtx dest, rtx src) |
+ { |
+ /* If the src is an immediate, simplify it. */ |
+ if (CONST_INT_P (src)) |
+ { |
+ HOST_WIDE_INT val = INTVAL (src); |
+ emit_set_insn (dest, GEN_INT (val & 0x0000ffff)); |
+ if ((val >> 16) & 0x0000ffff) |
+ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16), |
+ GEN_INT (16)), |
+ GEN_INT ((val >> 16) & 0x0000ffff)); |
+ return; |
+ } |
+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src)); |
+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src)); |
+ } |
/* Output a move from arm registers to an fpa registers. |
OPERANDS[0] is an fpa register. |
@@ -10769,7 +12246,7 @@ output_move_double (rtx *operands) |
} |
/* Output a move, load or store for quad-word vectors in ARM registers. Only |
- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */ |
+ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */ |
const char * |
output_move_quad (rtx *operands) |
@@ -10965,6 +12442,13 @@ output_move_neon (rtx *operands) |
ops[1] = reg; |
break; |
+ case PRE_DEC: |
+ /* FIXME: We should be using vld1/vst1 here in BE mode? */ |
+ templ = "v%smdb%%?\t%%0!, %%h1"; |
+ ops[0] = XEXP (addr, 0); |
+ ops[1] = reg; |
+ break; |
+ |
case POST_MODIFY: |
/* FIXME: Not currently enabled in neon_vector_mem_operand. */ |
gcc_unreachable (); |
@@ -11014,6 +12498,56 @@ output_move_neon (rtx *operands) |
return ""; |
} |
+/* Compute and return the length of neon_mov<mode>, where <mode> is |
+ one of VSTRUCT modes: EI, OI, CI or XI. */ |
+int |
+arm_attr_length_move_neon (rtx insn) |
+{ |
+ rtx reg, mem, addr; |
+ int load; |
+ enum machine_mode mode; |
+ |
+ extract_insn_cached (insn); |
+ |
+ if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) |
+ { |
+ mode = GET_MODE (recog_data.operand[0]); |
+ switch (mode) |
+ { |
+ case EImode: |
+ case OImode: |
+ return 8; |
+ case CImode: |
+ return 12; |
+ case XImode: |
+ return 16; |
+ default: |
+ gcc_unreachable (); |
+ } |
+ } |
+ |
+ load = REG_P (recog_data.operand[0]); |
+ reg = recog_data.operand[!load]; |
+ mem = recog_data.operand[load]; |
+ |
+ gcc_assert (MEM_P (mem)); |
+ |
+ mode = GET_MODE (reg); |
+ addr = XEXP (mem, 0); |
+ |
+ /* Strip off const from addresses like (const (plus (...))). */ |
+ if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) |
+ addr = XEXP (addr, 0); |
+ |
+ if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) |
+ { |
+ int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; |
+ return insns * 4; |
+ } |
+ else |
+ return 4; |
+} |
+ |
/* Output an ADD r, s, #n where n may be too big for one instruction. |
If adding zero to one register, output nothing. */ |
const char * |
@@ -11320,6 +12854,20 @@ arm_compute_save_reg0_reg12_mask (void) |
&& crtl->uses_pic_offset_table) |
save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM; |
} |
+ else if (IS_VOLATILE(func_type)) |
+ { |
+ /* For noreturn functions we historically omitted register saves |
+ altogether. However this really messes up debugging. As a |
+ compromise save just the frame pointers. Combined with the link |
+ register saved elsewhere this should be sufficient to get |
+ a backtrace. */ |
+ if (frame_pointer_needed) |
+ save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM; |
+ if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM)) |
+ save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM; |
+ if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM)) |
+ save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM; |
+ } |
else |
{ |
/* In the normal case we only need to save those registers |
@@ -11406,11 +12954,6 @@ arm_compute_save_reg_mask (void) |
| (1 << LR_REGNUM) |
| (1 << PC_REGNUM); |
- /* Volatile functions do not return, so there |
- is no need to save any other registers. */ |
- if (IS_VOLATILE (func_type)) |
- return save_reg_mask; |
- |
save_reg_mask |= arm_compute_save_reg0_reg12_mask (); |
/* Decide if we need to save the link register. |
@@ -11629,7 +13172,7 @@ output_return_instruction (rtx operand, int really_return, int reverse) |
sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); |
- return_used_this_function = 1; |
+ cfun->machine->return_used_this_function = 1; |
offsets = arm_get_frame_offsets (); |
live_regs_mask = offsets->saved_regs_mask; |
@@ -11698,18 +13241,28 @@ output_return_instruction (rtx operand, int really_return, int reverse) |
gcc_assert (stack_adjust == 0 || stack_adjust == 4); |
if (stack_adjust && arm_arch5 && TARGET_ARM) |
- sprintf (instr, "ldm%sib\t%%|sp, {", conditional); |
+ if (TARGET_UNIFIED_ASM) |
+ sprintf (instr, "ldmib%s\t%%|sp, {", conditional); |
+ else |
+ sprintf (instr, "ldm%sib\t%%|sp, {", conditional); |
else |
{ |
/* If we can't use ldmib (SA110 bug), |
then try to pop r3 instead. */ |
if (stack_adjust) |
live_regs_mask |= 1 << 3; |
- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); |
+ |
+ if (TARGET_UNIFIED_ASM) |
+ sprintf (instr, "ldmfd%s\t%%|sp, {", conditional); |
+ else |
+ sprintf (instr, "ldm%sfd\t%%|sp, {", conditional); |
} |
} |
else |
- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); |
+ if (TARGET_UNIFIED_ASM) |
+ sprintf (instr, "pop%s\t{", conditional); |
+ else |
+ sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional); |
p = instr + strlen (instr); |
@@ -11894,7 +13447,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) |
if (crtl->calls_eh_return) |
asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n"); |
- return_used_this_function = 0; |
} |
const char * |
@@ -11915,7 +13467,8 @@ arm_output_epilogue (rtx sibling) |
/* If we have already generated the return instruction |
then it is futile to generate anything else. */ |
- if (use_return_insn (FALSE, sibling) && return_used_this_function) |
+ if (use_return_insn (FALSE, sibling) && |
+ (cfun->machine->return_used_this_function != 0)) |
return ""; |
func_type = arm_current_func_type (); |
@@ -11957,7 +13510,7 @@ arm_output_epilogue (rtx sibling) |
/* This variable is for the Virtual Frame Pointer, not VFP regs. */ |
int vfp_offset = offsets->frame; |
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) |
+ if (TARGET_FPA_EMU2) |
{ |
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) |
if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
@@ -12180,7 +13733,7 @@ arm_output_epilogue (rtx sibling) |
SP_REGNUM, HARD_FRAME_POINTER_REGNUM); |
} |
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) |
+ if (TARGET_FPA_EMU2) |
{ |
for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++) |
if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
@@ -12221,24 +13774,29 @@ arm_output_epilogue (rtx sibling) |
if (TARGET_HARD_FLOAT && TARGET_VFP) |
{ |
- start_reg = FIRST_VFP_REGNUM; |
- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) |
+ int end_reg = LAST_VFP_REGNUM + 1; |
+ |
+ /* Scan the registers in reverse order. We need to match |
+ any groupings made in the prologue and generate matching |
+ pop operations. */ |
+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) |
{ |
if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) |
- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) |
+ && (!df_regs_ever_live_p (reg + 1) |
+ || call_used_regs[reg + 1])) |
{ |
- if (start_reg != reg) |
+ if (end_reg > reg + 2) |
vfp_output_fldmd (f, SP_REGNUM, |
- (start_reg - FIRST_VFP_REGNUM) / 2, |
- (reg - start_reg) / 2); |
- start_reg = reg + 2; |
+ (reg + 2 - FIRST_VFP_REGNUM) / 2, |
+ (end_reg - (reg + 2)) / 2); |
+ end_reg = reg; |
} |
} |
- if (start_reg != reg) |
- vfp_output_fldmd (f, SP_REGNUM, |
- (start_reg - FIRST_VFP_REGNUM) / 2, |
- (reg - start_reg) / 2); |
+ if (end_reg > reg + 2) |
+ vfp_output_fldmd (f, SP_REGNUM, 0, |
+ (end_reg - (reg + 2)) / 2); |
} |
+ |
if (TARGET_IWMMXT) |
for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) |
if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
@@ -12362,7 +13920,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, |
/* ??? Probably not safe to set this here, since it assumes that a |
function will be emitted as assembly immediately after we generate |
RTL for it. This does not happen for inline functions. */ |
- return_used_this_function = 0; |
+ cfun->machine->return_used_this_function = 0; |
} |
else /* TARGET_32BIT */ |
{ |
@@ -12370,7 +13928,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, |
offsets = arm_get_frame_offsets (); |
gcc_assert (!use_return_insn (FALSE, NULL) |
- || !return_used_this_function |
+ || (cfun->machine->return_used_this_function != 0) |
|| offsets->saved_regs == offsets->outgoing_args |
|| frame_pointer_needed); |
@@ -12407,16 +13965,17 @@ emit_multi_reg_push (unsigned long mask) |
/* For the body of the insn we are going to generate an UNSPEC in |
parallel with several USEs. This allows the insn to be recognized |
- by the push_multi pattern in the arm.md file. The insn looks |
- something like this: |
+ by the push_multi pattern in the arm.md file. |
+ |
+ The body of the insn looks something like this: |
(parallel [ |
- (set (mem:BLK (pre_dec:BLK (reg:SI sp))) |
+ (set (mem:BLK (pre_modify:SI (reg:SI sp) |
+ (const_int:SI <num>))) |
(unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) |
- (use (reg:SI 11 fp)) |
- (use (reg:SI 12 ip)) |
- (use (reg:SI 14 lr)) |
- (use (reg:SI 15 pc)) |
+ (use (reg:SI XX)) |
+ (use (reg:SI YY)) |
+ ... |
]) |
For the frame note however, we try to be more explicit and actually |
@@ -12429,13 +13988,20 @@ emit_multi_reg_push (unsigned long mask) |
(sequence [ |
(set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) |
(set (mem:SI (reg:SI sp)) (reg:SI r4)) |
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp)) |
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip)) |
- (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr)) |
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX)) |
+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY)) |
+ ... |
]) |
- This sequence is used both by the code to support stack unwinding for |
- exceptions handlers and the code to generate dwarf2 frame debugging. */ |
+ FIXME:: In an ideal world the PRE_MODIFY would not exist and |
+ instead we'd have a parallel expression detailing all |
+ the stores to the various memory addresses so that debug |
+ information is more up-to-date. Remember however while writing |
+ this to take care of the constraints with the push instruction. |
+ |
+ Note also that this has to be taken care of for the VFP registers. |
+ |
+ For more see PR43399. */ |
par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); |
dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); |
@@ -12449,9 +14015,14 @@ emit_multi_reg_push (unsigned long mask) |
XVECEXP (par, 0, 0) |
= gen_rtx_SET (VOIDmode, |
- gen_frame_mem (BLKmode, |
- gen_rtx_PRE_DEC (BLKmode, |
- stack_pointer_rtx)), |
+ gen_frame_mem |
+ (BLKmode, |
+ gen_rtx_PRE_MODIFY (Pmode, |
+ stack_pointer_rtx, |
+ plus_constant |
+ (stack_pointer_rtx, |
+ -4 * num_regs)) |
+ ), |
gen_rtx_UNSPEC (BLKmode, |
gen_rtvec (1, reg), |
UNSPEC_PUSH_MULT)); |
@@ -12482,9 +14053,10 @@ emit_multi_reg_push (unsigned long mask) |
{ |
tmp |
= gen_rtx_SET (VOIDmode, |
- gen_frame_mem (SImode, |
- plus_constant (stack_pointer_rtx, |
- 4 * j)), |
+ gen_frame_mem |
+ (SImode, |
+ plus_constant (stack_pointer_rtx, |
+ 4 * j)), |
reg); |
RTX_FRAME_RELATED_P (tmp) = 1; |
XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; |
@@ -12502,8 +14074,8 @@ emit_multi_reg_push (unsigned long mask) |
RTX_FRAME_RELATED_P (tmp) = 1; |
XVECEXP (dwarf, 0, 0) = tmp; |
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, |
- REG_NOTES (par)); |
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); |
+ |
return par; |
} |
@@ -12536,9 +14108,14 @@ emit_sfm (int base_reg, int count) |
XVECEXP (par, 0, 0) |
= gen_rtx_SET (VOIDmode, |
- gen_frame_mem (BLKmode, |
- gen_rtx_PRE_DEC (BLKmode, |
- stack_pointer_rtx)), |
+ gen_frame_mem |
+ (BLKmode, |
+ gen_rtx_PRE_MODIFY (Pmode, |
+ stack_pointer_rtx, |
+ plus_constant |
+ (stack_pointer_rtx, |
+ -12 * count)) |
+ ), |
gen_rtx_UNSPEC (BLKmode, |
gen_rtvec (1, reg), |
UNSPEC_PUSH_MULT)); |
@@ -12569,8 +14146,8 @@ emit_sfm (int base_reg, int count) |
XVECEXP (dwarf, 0, 0) = tmp; |
par = emit_insn (par); |
- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, |
- REG_NOTES (par)); |
+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf); |
+ |
return par; |
} |
@@ -12748,22 +14325,24 @@ arm_get_frame_offsets (void) |
{ |
int reg = -1; |
- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) |
- { |
- if ((offsets->saved_regs_mask & (1 << i)) == 0) |
- { |
- reg = i; |
- break; |
- } |
- } |
- |
- if (reg == -1 && arm_size_return_regs () <= 12 |
- && !crtl->tail_call_emit) |
+ /* If it is safe to use r3, then do so. This sometimes |
+ generates better code on Thumb-2 by avoiding the need to |
+ use 32-bit push/pop instructions. */ |
+ if (!crtl->tail_call_emit |
+ && arm_size_return_regs () <= 12 |
+ && (offsets->saved_regs_mask & (1 << 3)) == 0) |
{ |
- /* Push/pop an argument register (r3) if all callee saved |
- registers are already being pushed. */ |
reg = 3; |
} |
+ else |
+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) |
+ { |
+ if ((offsets->saved_regs_mask & (1 << i)) == 0) |
+ { |
+ reg = i; |
+ break; |
+ } |
+ } |
if (reg != -1) |
{ |
@@ -12863,6 +14442,24 @@ arm_compute_initial_elimination_offset (unsigned int from, unsigned int to) |
} |
} |
+/* Given FROM and TO register numbers, say whether this elimination is |
+ allowed. Frame pointer elimination is automatically handled. |
+ |
+ All eliminations are permissible. Note that ARG_POINTER_REGNUM and |
+ HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame |
+ pointer, we must eliminate FRAME_POINTER_REGNUM into |
+ HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or |
+ ARG_POINTER_REGNUM. */ |
+ |
+bool |
+arm_can_eliminate (const int from, const int to) |
+{ |
+ return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false : |
+ (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false : |
+ (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false : |
+ (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false : |
+ true); |
+} |
/* Emit RTL to save coprocessor registers on function entry. Returns the |
number of bytes pushed. */ |
@@ -12878,7 +14475,7 @@ arm_save_coproc_regs(void) |
for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) |
if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) |
{ |
- insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx); |
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); |
insn = gen_rtx_MEM (V2SImode, insn); |
insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); |
RTX_FRAME_RELATED_P (insn) = 1; |
@@ -12887,12 +14484,12 @@ arm_save_coproc_regs(void) |
/* Save any floating point call-saved registers used by this |
function. */ |
- if (arm_fpu_arch == FPUTYPE_FPA_EMU2) |
+ if (TARGET_FPA_EMU2) |
{ |
for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) |
if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
{ |
- insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx); |
+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); |
insn = gen_rtx_MEM (XFmode, insn); |
insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); |
RTX_FRAME_RELATED_P (insn) = 1; |
@@ -12989,8 +14586,7 @@ thumb_set_frame_pointer (arm_stack_offsets *offsets) |
dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, |
plus_constant (stack_pointer_rtx, amount)); |
RTX_FRAME_RELATED_P (dwarf) = 1; |
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, |
- REG_NOTES (insn)); |
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
} |
RTX_FRAME_RELATED_P (insn) = 1; |
@@ -13053,8 +14649,7 @@ arm_expand_prologue (void) |
dwarf = gen_rtx_SET (VOIDmode, r0, dwarf); |
insn = gen_movsi (r0, stack_pointer_rtx); |
RTX_FRAME_RELATED_P (insn) = 1; |
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, |
- dwarf, REG_NOTES (insn)); |
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
emit_insn (insn); |
emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7))); |
emit_insn (gen_movsi (stack_pointer_rtx, r1)); |
@@ -13121,8 +14716,7 @@ arm_expand_prologue (void) |
plus_constant (stack_pointer_rtx, |
-fp_offset)); |
RTX_FRAME_RELATED_P (insn) = 1; |
- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, |
- dwarf, REG_NOTES (insn)); |
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
} |
else |
{ |
@@ -13715,6 +15309,30 @@ arm_print_operand (FILE *stream, rtx x, int code) |
} |
return; |
+ /* Print the high single-precision register of a VFP double-precision |
+ register. */ |
+ case 'p': |
+ { |
+ int mode = GET_MODE (x); |
+ int regno; |
+ |
+ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ regno = REGNO (x); |
+ if (!VFP_REGNO_OK_FOR_DOUBLE (regno)) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1); |
+ } |
+ return; |
+ |
/* Print a VFP/Neon double precision or quad precision register name. */ |
case 'P': |
case 'q': |
@@ -13832,6 +15450,73 @@ arm_print_operand (FILE *stream, rtx x, int code) |
} |
return; |
+ /* Memory operand for vld1/vst1 instruction. */ |
+ case 'A': |
+ { |
+ rtx addr; |
+ bool postinc = FALSE; |
+ gcc_assert (GET_CODE (x) == MEM); |
+ addr = XEXP (x, 0); |
+ if (GET_CODE (addr) == POST_INC) |
+ { |
+ postinc = 1; |
+ addr = XEXP (addr, 0); |
+ } |
+ asm_fprintf (stream, "[%r]", REGNO (addr)); |
+ if (postinc) |
+ fputs("!", stream); |
+ } |
+ return; |
+ |
+ /* Translate an S register number into a D register number and element index. */ |
+ case 'y': |
+ { |
+ int mode = GET_MODE (x); |
+ int regno; |
+ |
+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ regno = REGNO (x); |
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno)) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ regno = regno - FIRST_VFP_REGNUM; |
+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2); |
+ } |
+ return; |
+ |
+ /* Register specifier for vld1.16/vst1.16. Translate the S register |
+ number into a D register number and element index. */ |
+ case 'z': |
+ { |
+ int mode = GET_MODE (x); |
+ int regno; |
+ |
+ if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ regno = REGNO (x); |
+ if (!VFP_REGNO_OK_FOR_SINGLE (regno)) |
+ { |
+ output_operand_lossage ("invalid operand for code '%c'", code); |
+ return; |
+ } |
+ |
+ regno = regno - FIRST_VFP_REGNUM; |
+ fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); |
+ } |
+ return; |
+ |
default: |
if (x == 0) |
{ |
@@ -13865,6 +15550,12 @@ arm_print_operand (FILE *stream, rtx x, int code) |
default: |
gcc_assert (GET_CODE (x) != NEG); |
fputc ('#', stream); |
+ if (GET_CODE (x) == HIGH) |
+ { |
+ fputs (":lower16:", stream); |
+ x = XEXP (x, 0); |
+ } |
+ |
output_addr_const (stream, x); |
break; |
} |
@@ -14032,7 +15723,7 @@ static enum arm_cond_code |
get_arm_condition_code (rtx comparison) |
{ |
enum machine_mode mode = GET_MODE (XEXP (comparison, 0)); |
- int code; |
+ enum arm_cond_code code; |
enum rtx_code comp_code = GET_CODE (comparison); |
if (GET_MODE_CLASS (mode) != MODE_CC) |
@@ -14243,12 +15934,6 @@ arm_final_prescan_insn (rtx insn) |
reversed if it appears to fail. */ |
int reverse = 0; |
- /* JUMP_CLOBBERS will be one implies that the conditions if a branch is |
- taken are clobbered, even if the rtl suggests otherwise. It also |
- means that we have to grub around within the jump expression to find |
- out what the conditions are when the jump isn't taken. */ |
- int jump_clobbers = 0; |
- |
/* If we start with a return insn, we only succeed if we find another one. */ |
int seeking_return = 0; |
@@ -14327,14 +16012,6 @@ arm_final_prescan_insn (rtx insn) |
int then_not_else = TRUE; |
rtx this_insn = start_insn, label = 0; |
- /* If the jump cannot be done with one instruction, we cannot |
- conditionally execute the instruction in the inverse case. */ |
- if (get_attr_conds (insn) == CONDS_JUMP_CLOB) |
- { |
- jump_clobbers = 1; |
- return; |
- } |
- |
/* Register the insn jumped to. */ |
if (reverse) |
{ |
@@ -14377,13 +16054,7 @@ arm_final_prescan_insn (rtx insn) |
control falls in from somewhere else. */ |
if (this_insn == label) |
{ |
- if (jump_clobbers) |
- { |
- arm_ccfsm_state = 2; |
- this_insn = next_nonnote_insn (this_insn); |
- } |
- else |
- arm_ccfsm_state = 1; |
+ arm_ccfsm_state = 1; |
succeed = TRUE; |
} |
else |
@@ -14398,13 +16069,7 @@ arm_final_prescan_insn (rtx insn) |
this_insn = next_nonnote_insn (this_insn); |
if (this_insn && this_insn == label) |
{ |
- if (jump_clobbers) |
- { |
- arm_ccfsm_state = 2; |
- this_insn = next_nonnote_insn (this_insn); |
- } |
- else |
- arm_ccfsm_state = 1; |
+ arm_ccfsm_state = 1; |
succeed = TRUE; |
} |
else |
@@ -14432,13 +16097,7 @@ arm_final_prescan_insn (rtx insn) |
if (this_insn && this_insn == label |
&& insns_skipped < max_insns_skipped) |
{ |
- if (jump_clobbers) |
- { |
- arm_ccfsm_state = 2; |
- this_insn = next_nonnote_insn (this_insn); |
- } |
- else |
- arm_ccfsm_state = 1; |
+ arm_ccfsm_state = 1; |
succeed = TRUE; |
} |
else |
@@ -14544,25 +16203,11 @@ arm_final_prescan_insn (rtx insn) |
} |
arm_target_insn = this_insn; |
} |
- if (jump_clobbers) |
- { |
- gcc_assert (!reverse); |
- arm_current_cc = |
- get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body), |
- 0), 0), 1)); |
- if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND) |
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); |
- if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE) |
- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); |
- } |
- else |
- { |
- /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from |
- what it was. */ |
- if (!reverse) |
- arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), |
- 0)); |
- } |
+ |
+ /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from |
+ what it was. */ |
+ if (!reverse) |
+ arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0)); |
if (reverse || then_not_else) |
arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc); |
@@ -14629,6 +16274,11 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) |
if (mode == DFmode) |
return VFP_REGNO_OK_FOR_DOUBLE (regno); |
+ /* VFP registers can hold HFmode values, but there is no point in |
+ putting them there unless we have hardware conversion insns. */ |
+ if (mode == HFmode) |
+ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); |
+ |
if (TARGET_NEON) |
return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) |
|| (VALID_NEON_QREG_MODE (mode) |
@@ -14651,13 +16301,13 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) |
return VALID_IWMMXT_REG_MODE (mode); |
} |
- /* We allow any value to be stored in the general registers. |
+ /* We allow almost any value to be stored in the general registers. |
Restrict doubleword quantities to even register pairs so that we can |
- use ldrd. Do not allow Neon structure opaque modes in general registers; |
- they would use too many. */ |
+ use ldrd. Do not allow very large Neon structure opaque modes in |
+ general registers; they would use too many. */ |
if (regno <= LAST_ARM_REGNUM) |
return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) |
- && !VALID_NEON_STRUCT_MODE (mode); |
+ && ARM_NUM_REGS (mode) <= 4; |
if (regno == FRAME_POINTER_REGNUM |
|| regno == ARG_POINTER_REGNUM) |
@@ -14674,7 +16324,8 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) |
/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are |
not used in arm mode. */ |
-int |
+ |
+enum reg_class |
arm_regno_class (int regno) |
{ |
if (TARGET_THUMB1) |
@@ -14828,7 +16479,7 @@ static const struct builtin_description bdesc_2arg[] = |
{ |
#define IWMMXT_BUILTIN(code, string, builtin) \ |
{ FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \ |
- ARM_BUILTIN_##builtin, 0, 0 }, |
+ ARM_BUILTIN_##builtin, UNKNOWN, 0 }, |
IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB) |
IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH) |
@@ -14890,7 +16541,7 @@ static const struct builtin_description bdesc_2arg[] = |
IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU) |
#define IWMMXT_BUILTIN2(code, builtin) \ |
- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 }, |
+ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 }, |
IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS) |
IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS) |
@@ -15287,7 +16938,7 @@ arm_init_tls_builtins (void) |
TREE_READONLY (decl) = 1; |
} |
-typedef enum { |
+enum neon_builtin_type_bits { |
T_V8QI = 0x0001, |
T_V4HI = 0x0002, |
T_V2SI = 0x0004, |
@@ -15301,7 +16952,7 @@ typedef enum { |
T_TI = 0x0400, |
T_EI = 0x0800, |
T_OI = 0x1000 |
-} neon_builtin_type_bits; |
+}; |
#define v8qi_UP T_V8QI |
#define v4hi_UP T_V4HI |
@@ -15364,7 +17015,7 @@ typedef enum { |
typedef struct { |
const char *name; |
const neon_itype itype; |
- const neon_builtin_type_bits bits; |
+ const int bits; |
const enum insn_code codes[T_MAX]; |
const unsigned int num_vars; |
unsigned int base_fcode; |
@@ -16114,6 +17765,15 @@ arm_init_neon_builtins (void) |
} |
static void |
+arm_init_fp16_builtins (void) |
+{ |
+ tree fp16_type = make_node (REAL_TYPE); |
+ TYPE_PRECISION (fp16_type) = 16; |
+ layout_type (fp16_type); |
+ (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); |
+} |
+ |
+static void |
arm_init_builtins (void) |
{ |
arm_init_tls_builtins (); |
@@ -16123,6 +17783,71 @@ arm_init_builtins (void) |
if (TARGET_NEON) |
arm_init_neon_builtins (); |
+ |
+ if (arm_fp16_format) |
+ arm_init_fp16_builtins (); |
+} |
+ |
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */ |
+ |
+static const char * |
+arm_invalid_parameter_type (const_tree t) |
+{ |
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) |
+ return N_("function parameters cannot have __fp16 type"); |
+ return NULL; |
+} |
+ |
+/* Implement TARGET_INVALID_PARAMETER_TYPE. */ |
+ |
+static const char * |
+arm_invalid_return_type (const_tree t) |
+{ |
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) |
+ return N_("functions cannot return __fp16 type"); |
+ return NULL; |
+} |
+ |
+/* Implement TARGET_PROMOTED_TYPE. */ |
+ |
+static tree |
+arm_promoted_type (const_tree t) |
+{ |
+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) |
+ return float_type_node; |
+ return NULL_TREE; |
+} |
+ |
+/* Implement TARGET_CONVERT_TO_TYPE. |
+ Specifically, this hook implements the peculiarity of the ARM |
+ half-precision floating-point C semantics that requires conversions between |
+ __fp16 to or from double to do an intermediate conversion to float. */ |
+ |
+static tree |
+arm_convert_to_type (tree type, tree expr) |
+{ |
+ tree fromtype = TREE_TYPE (expr); |
+ if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) |
+ return NULL_TREE; |
+ if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) |
+ || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) |
+ return convert (type, convert (float_type_node, expr)); |
+ return NULL_TREE; |
+} |
+ |
+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. |
+ This simply adds HFmode as a supported mode; even though we don't |
+ implement arithmetic on this type directly, it's supported by |
+ optabs conversions, much the way the double-word arithmetic is |
+ special-cased in the default hook. */ |
+ |
+static bool |
+arm_scalar_mode_supported_p (enum machine_mode mode) |
+{ |
+ if (mode == HFmode) |
+ return (arm_fp16_format != ARM_FP16_FORMAT_NONE); |
+ else |
+ return default_scalar_mode_supported_p (mode); |
} |
/* Errors in the source file can cause expand_expr to return const0_rtx |
@@ -16281,7 +18006,7 @@ arm_expand_neon_args (rtx target, int icode, int have_retval, |
for (;;) |
{ |
- builtin_arg thisarg = va_arg (ap, int); |
+ builtin_arg thisarg = (builtin_arg) va_arg (ap, int); |
if (thisarg == NEON_ARG_STOP) |
break; |
@@ -17202,6 +18927,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val) |
unsigned HOST_WIDE_INT mask = 0xff; |
int i; |
+ val = val & (unsigned HOST_WIDE_INT)0xffffffffu; |
if (val == 0) /* XXX */ |
return 0; |
@@ -17311,7 +19037,7 @@ thumb_unexpanded_epilogue (void) |
int had_to_push_lr; |
int size; |
- if (return_used_this_function) |
+ if (cfun->machine->return_used_this_function != 0) |
return ""; |
if (IS_NAKED (arm_current_func_type ())) |
@@ -17635,9 +19361,7 @@ thumb1_expand_prologue (void) |
plus_constant (stack_pointer_rtx, |
-amount)); |
RTX_FRAME_RELATED_P (dwarf) = 1; |
- REG_NOTES (insn) |
- = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf, |
- REG_NOTES (insn)); |
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); |
} |
} |
@@ -18289,41 +20013,8 @@ arm_file_start (void) |
} |
else |
{ |
- int set_float_abi_attributes = 0; |
- switch (arm_fpu_arch) |
- { |
- case FPUTYPE_FPA: |
- fpu_name = "fpa"; |
- break; |
- case FPUTYPE_FPA_EMU2: |
- fpu_name = "fpe2"; |
- break; |
- case FPUTYPE_FPA_EMU3: |
- fpu_name = "fpe3"; |
- break; |
- case FPUTYPE_MAVERICK: |
- fpu_name = "maverick"; |
- break; |
- case FPUTYPE_VFP: |
- fpu_name = "vfp"; |
- set_float_abi_attributes = 1; |
- break; |
- case FPUTYPE_VFP3D16: |
- fpu_name = "vfpv3-d16"; |
- set_float_abi_attributes = 1; |
- break; |
- case FPUTYPE_VFP3: |
- fpu_name = "vfpv3"; |
- set_float_abi_attributes = 1; |
- break; |
- case FPUTYPE_NEON: |
- fpu_name = "neon"; |
- set_float_abi_attributes = 1; |
- break; |
- default: |
- abort(); |
- } |
- if (set_float_abi_attributes) |
+ fpu_name = arm_fpu_desc->name; |
+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP) |
{ |
if (TARGET_HARD_FLOAT) |
asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n"); |
@@ -18373,6 +20064,11 @@ arm_file_start (void) |
val = 6; |
asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); |
+ /* Tag_ABI_FP_16bit_format. */ |
+ if (arm_fp16_format) |
+ asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", |
+ (int)arm_fp16_format); |
+ |
if (arm_lang_output_object_attributes_hook) |
arm_lang_output_object_attributes_hook(); |
} |
@@ -18602,6 +20298,23 @@ arm_emit_vector_const (FILE *file, rtx x) |
return 1; |
} |
+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. |
+ HFmode constant pool entries are actually loaded with ldr. */ |
+void |
+arm_emit_fp16_const (rtx c) |
+{ |
+ REAL_VALUE_TYPE r; |
+ long bits; |
+ |
+ REAL_VALUE_FROM_CONST_DOUBLE (r, c); |
+ bits = real_to_target (NULL, &r, HFmode); |
+ if (WORDS_BIG_ENDIAN) |
+ assemble_zeros (2); |
+ assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); |
+ if (!WORDS_BIG_ENDIAN) |
+ assemble_zeros (2); |
+} |
+ |
const char * |
arm_output_load_gr (rtx *operands) |
{ |
@@ -18639,19 +20352,24 @@ arm_output_load_gr (rtx *operands) |
that way. */ |
static void |
-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum, |
+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum, |
enum machine_mode mode, |
tree type, |
int *pretend_size, |
int second_time ATTRIBUTE_UNUSED) |
{ |
- int nregs = cum->nregs; |
- if (nregs & 1 |
- && ARM_DOUBLEWORD_ALIGN |
- && arm_needs_doubleword_align (mode, type)) |
- nregs++; |
- |
+ int nregs; |
+ |
cfun->machine->uses_anonymous_args = 1; |
+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
+ { |
+ nregs = pcum->aapcs_ncrn; |
+ if ((nregs & 1) && arm_needs_doubleword_align (mode, type)) |
+ nregs++; |
+ } |
+ else |
+ nregs = pcum->nregs; |
+ |
if (nregs < NUM_ARG_REGS) |
*pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; |
} |
@@ -18785,6 +20503,19 @@ arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED) |
return !TARGET_AAPCS_BASED; |
} |
+static enum machine_mode |
+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, |
+ enum machine_mode mode, |
+ int *punsignedp ATTRIBUTE_UNUSED, |
+ const_tree fntype ATTRIBUTE_UNUSED, |
+ int for_return ATTRIBUTE_UNUSED) |
+{ |
+ if (GET_MODE_CLASS (mode) == MODE_INT |
+ && GET_MODE_SIZE (mode) < 4) |
+ return SImode; |
+ |
+ return mode; |
+} |
/* AAPCS based ABIs use short enums by default. */ |
@@ -19035,9 +20766,10 @@ arm_vector_mode_supported_p (enum machine_mode mode) |
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode)) |
return true; |
- if ((mode == V2SImode) |
- || (mode == V4HImode) |
- || (mode == V8QImode)) |
+ if ((TARGET_NEON || TARGET_IWMMXT) |
+ && ((mode == V2SImode) |
+ || (mode == V4HImode) |
+ || (mode == V8QImode))) |
return true; |
return false; |
@@ -19068,9 +20800,14 @@ arm_dbx_register_number (unsigned int regno) |
if (IS_FPA_REGNUM (regno)) |
return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM; |
- /* FIXME: VFPv3 register numbering. */ |
if (IS_VFP_REGNUM (regno)) |
- return 64 + regno - FIRST_VFP_REGNUM; |
+ { |
+ /* See comment in arm_dwarf_register_span. */ |
+ if (VFP_REGNO_OK_FOR_SINGLE (regno)) |
+ return 64 + regno - FIRST_VFP_REGNUM; |
+ else |
+ return 256 + (regno - FIRST_VFP_REGNUM) / 2; |
+ } |
if (IS_IWMMXT_GR_REGNUM (regno)) |
return 104 + regno - FIRST_IWMMXT_GR_REGNUM; |
@@ -19081,6 +20818,39 @@ arm_dbx_register_number (unsigned int regno) |
gcc_unreachable (); |
} |
+/* Dwarf models VFPv3 registers as 32 64-bit registers. |
+ GCC models tham as 64 32-bit registers, so we need to describe this to |
+ the DWARF generation code. Other registers can use the default. */ |
+static rtx |
+arm_dwarf_register_span (rtx rtl) |
+{ |
+ unsigned regno; |
+ int nregs; |
+ int i; |
+ rtx p; |
+ |
+ regno = REGNO (rtl); |
+ if (!IS_VFP_REGNUM (regno)) |
+ return NULL_RTX; |
+ |
+ /* XXX FIXME: The EABI defines two VFP register ranges: |
+ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent) |
+ 256-287: D0-D31 |
+ The recommended encoding for S0-S31 is a DW_OP_bit_piece of the |
+ corresponding D register. Until GDB supports this, we shall use the |
+ legacy encodings. We also use these encodings for D0-D15 for |
+ compatibility with older debuggers. */ |
+ if (VFP_REGNO_OK_FOR_SINGLE (regno)) |
+ return NULL_RTX; |
+ |
+ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8; |
+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs)); |
+ regno = (regno - FIRST_VFP_REGNUM) / 2; |
+ for (i = 0; i < nregs; i++) |
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); |
+ |
+ return p; |
+} |
#ifdef TARGET_UNWIND_INFO |
/* Emit unwind directives for a store-multiple instruction or stack pointer |
@@ -19404,7 +21174,7 @@ arm_emit_tls_decoration (FILE *fp, rtx x) |
rtx val; |
val = XVECEXP (x, 0, 0); |
- reloc = INTVAL (XVECEXP (x, 0, 1)); |
+ reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1)); |
output_addr_const (fp, val); |
@@ -19522,6 +21292,32 @@ arm_output_shift(rtx * operands, int set_flags) |
return ""; |
} |
+/* Output a Thumb-1 casesi dispatch sequence. */ |
+const char * |
+thumb1_output_casesi (rtx *operands) |
+{ |
+ rtx diff_vec = PATTERN (next_real_insn (operands[0])); |
+ addr_diff_vec_flags flags; |
+ |
+ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); |
+ |
+ flags = ADDR_DIFF_VEC_FLAGS (diff_vec); |
+ |
+ switch (GET_MODE(diff_vec)) |
+ { |
+ case QImode: |
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? |
+ "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi"); |
+ case HImode: |
+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? |
+ "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi"); |
+ case SImode: |
+ return "bl\t%___gnu_thumb1_case_si"; |
+ default: |
+ gcc_unreachable (); |
+ } |
+} |
+ |
/* Output a Thumb-2 casesi instruction. */ |
const char * |
thumb2_output_casesi (rtx *operands) |
@@ -19622,7 +21418,7 @@ arm_mangle_type (const_tree type) |
&& lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) |
{ |
static bool warned; |
- if (!warned && warn_psabi) |
+ if (!warned && warn_psabi && !in_system_header) |
{ |
warned = true; |
inform (input_location, |
@@ -19631,6 +21427,10 @@ arm_mangle_type (const_tree type) |
return "St9__va_list"; |
} |
+ /* Half-precision float. */ |
+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) |
+ return "Dh"; |
+ |
if (TREE_CODE (type) != VECTOR_TYPE) |
return NULL; |
@@ -19689,4 +21489,22 @@ arm_optimization_options (int level, int size ATTRIBUTE_UNUSED) |
flag_section_anchors = 2; |
} |
+/* Implement TARGET_FRAME_POINTER_REQUIRED. */ |
+ |
+bool |
+arm_frame_pointer_required (void) |
+{ |
+ return (cfun->has_nonlocal_label |
+ || SUBTARGET_FRAME_POINTER_REQUIRED |
+ || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ())); |
+} |
+ |
+/* Only thumb1 can't support conditional execution, so return true if |
+ the target is not thumb1. */ |
+static bool |
+arm_have_conditional_execution (void) |
+{ |
+ return !TARGET_THUMB1; |
+} |
+ |
#include "gt-arm.h" |