gcc/gcc/config/arm/arm.c - Issue 3050029: [gcc] GCC 4.5.0=>4.5.1

Unified Diff: gcc/gcc/config/arm/arm.c

Issue 3050029: [gcc] GCC 4.5.0=>4.5.1 (Closed) Base URL: ssh://git@gitrw.chromium.org:9222/nacl-toolchain.git

Patch Set: Created 10 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: gcc/gcc/config/arm/arm.c

diff --git a/gcc/gcc/config/arm/arm.c b/gcc/gcc/config/arm/arm.c

index 7f346da319b1b3c301d84ef9cd71d873e8898cf4..a06a38bc8a9695732972d5867155b71756ff68b8 100644

--- a/gcc/gcc/config/arm/arm.c

+++ b/gcc/gcc/config/arm/arm.c

@@ -1,6 +1,6 @@

/* Output routines for GCC for ARM.

- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009

+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010

Free Software Foundation, Inc.

Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)

and Martin Simmons (@harleqn.co.uk).

@@ -43,6 +43,7 @@

#include "optabs.h"

#include "toplev.h"

#include "recog.h"

+#include "cgraph.h"

#include "ggc.h"

#include "except.h"

#include "c-pragma.h"

@@ -53,14 +54,13 @@

#include "debug.h"

#include "langhooks.h"

#include "df.h"

+#include "intl.h"

#include "libfuncs.h"

/* Forward definitions of types. */

typedef struct minipool_node Mnode;

typedef struct minipool_fixup Mfix;

-const struct attribute_spec arm_attribute_table[];

void (*arm_lang_output_object_attributes_hook)(void);

/* Forward function declarations. */

@@ -74,7 +74,10 @@ static int arm_address_register_rtx_p (rtx, int);

static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);

static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);

static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);

+static rtx arm_legitimize_address (rtx, rtx, enum machine_mode);

+static rtx thumb_legitimize_address (rtx, rtx, enum machine_mode);

inline static int thumb1_index_register_rtx_p (rtx, int);

+static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);

static int thumb_far_jump_used_p (void);

static bool thumb_force_lr_save (void);

static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);

@@ -111,6 +114,7 @@ static unsigned long arm_compute_save_reg_mask (void);

static unsigned long arm_isr_value (tree);

static unsigned long arm_compute_func_type (void);

static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);

+static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);

static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);

#if TARGET_DLLIMPORT_DECL_ATTRIBUTES

static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);

@@ -124,9 +128,17 @@ static int arm_adjust_cost (rtx, rtx, rtx, int);

static int count_insns_for_constant (HOST_WIDE_INT, int);

static int arm_get_strip_length (int);

static bool arm_function_ok_for_sibcall (tree, tree);

+static enum machine_mode arm_promote_function_mode (const_tree,

+ enum machine_mode, int *,

+ const_tree, int);

+static bool arm_return_in_memory (const_tree, const_tree);

+static rtx arm_function_value (const_tree, const_tree, bool);

+static rtx arm_libcall_value (enum machine_mode, const_rtx);

static void arm_internal_label (FILE *, const char *, unsigned long);

static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,

tree);

+static bool arm_have_conditional_execution (void);

static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);

static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);

static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);

@@ -149,6 +161,9 @@ static void emit_constant_insn (rtx cond, rtx pattern);

static rtx emit_set_insn (rtx, rtx);

static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,

tree, bool);

+static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,

+ const_tree);

+static int aapcs_select_return_coproc (const_tree, const_tree);

#ifdef OBJECT_FORMAT_ELF

static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;

@@ -176,6 +191,7 @@ static void arm_unwind_emit (FILE *, rtx);

static bool arm_output_ttype (rtx);

#endif

static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);

+static rtx arm_dwarf_register_span (rtx);

static tree arm_cxx_guard_type (void);

static bool arm_cxx_guard_mask_bit (void);

@@ -198,14 +214,65 @@ static bool arm_tls_symbol_p (rtx x);

static int arm_issue_rate (void);

static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;

static bool arm_allocate_stack_slots_for_args (void);

+static const char *arm_invalid_parameter_type (const_tree t);

+static const char *arm_invalid_return_type (const_tree t);

+static tree arm_promoted_type (const_tree t);

+static tree arm_convert_to_type (tree type, tree expr);

+static bool arm_scalar_mode_supported_p (enum machine_mode);

+static bool arm_frame_pointer_required (void);

+static bool arm_can_eliminate (const int, const int);

+static void arm_asm_trampoline_template (FILE *);

+static void arm_trampoline_init (rtx, tree, rtx);

+static rtx arm_trampoline_adjust_address (rtx);

+/* Table of machine attributes. */

+static const struct attribute_spec arm_attribute_table[] =

+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */

+ /* Function calls made to this symbol must be done indirectly, because

+ it may lie outside of the 26 bit addressing range of a normal function

+ call. */

+ { "long_call", 0, 0, false, true, true, NULL },

+ /* Whereas these functions are always known to reside within the 26 bit

+ addressing range. */

+ { "short_call", 0, 0, false, true, true, NULL },

+ /* Specify the procedure call conventions for a function. */

+ { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute },

+ /* Interrupt Service Routines have special prologue and epilogue requirements. */

+ { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },

+ { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },

+ { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },

+#ifdef ARM_PE

+ /* ARM/PE has three new attributes:

+ interfacearm - ?

+ dllexport - for exporting a function/variable that will live in a dll

+ dllimport - for importing a function/variable from a dll

+ Microsoft allows multiple declspecs in one __declspec, separating

+ them with spaces. We do NOT support this. Instead, use __declspec

+ multiple times.

+ */

+ { "dllimport", 0, 0, true, false, false, NULL },

+ { "dllexport", 0, 0, true, false, false, NULL },

+ { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },

+#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES

+ { "dllimport", 0, 0, false, false, false, handle_dll_attribute },

+ { "dllexport", 0, 0, false, false, false, handle_dll_attribute },

+ { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },

+#endif

+ { NULL, 0, 0, false, false, false, NULL }

+};

/* Initialize the GCC target structure. */

#if TARGET_DLLIMPORT_DECL_ATTRIBUTES

#undef TARGET_MERGE_DECL_ATTRIBUTES

#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes

#endif

+#undef TARGET_LEGITIMIZE_ADDRESS

+#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address

#undef TARGET_ATTRIBUTE_TABLE

#define TARGET_ATTRIBUTE_TABLE arm_attribute_table

@@ -257,6 +324,12 @@ static bool arm_allocate_stack_slots_for_args (void);

#undef TARGET_FUNCTION_OK_FOR_SIBCALL

#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall

+#undef TARGET_FUNCTION_VALUE

+#define TARGET_FUNCTION_VALUE arm_function_value

+#undef TARGET_LIBCALL_VALUE

+#define TARGET_LIBCALL_VALUE arm_libcall_value

#undef TARGET_ASM_OUTPUT_MI_THUNK

#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk

#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK

@@ -283,10 +356,8 @@ static bool arm_allocate_stack_slots_for_args (void);

#undef TARGET_INIT_LIBFUNCS

#define TARGET_INIT_LIBFUNCS arm_init_libfuncs

-#undef TARGET_PROMOTE_FUNCTION_ARGS

-#define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true

-#undef TARGET_PROMOTE_FUNCTION_RETURN

-#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true

+#undef TARGET_PROMOTE_FUNCTION_MODE

+#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode

#undef TARGET_PROMOTE_PROTOTYPES

#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes

#undef TARGET_PASS_BY_REFERENCE

@@ -300,6 +371,13 @@ static bool arm_allocate_stack_slots_for_args (void);

#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS

#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args

+#undef TARGET_ASM_TRAMPOLINE_TEMPLATE

+#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template

+#undef TARGET_TRAMPOLINE_INIT

+#define TARGET_TRAMPOLINE_INIT arm_trampoline_init

+#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS

+#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address

#undef TARGET_DEFAULT_SHORT_ENUMS

#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums

@@ -361,6 +439,9 @@ static bool arm_allocate_stack_slots_for_args (void);

#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC

#define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec

+#undef TARGET_DWARF_REGISTER_SPAN

+#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span

#undef TARGET_CANNOT_COPY_INSN_P

#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p

@@ -369,6 +450,9 @@ static bool arm_allocate_stack_slots_for_args (void);

#define TARGET_HAVE_TLS true

#endif

+#undef TARGET_HAVE_CONDITIONAL_EXECUTION

+#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution

#undef TARGET_CANNOT_FORCE_CONST_MEM

#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem

@@ -399,6 +483,30 @@ static bool arm_allocate_stack_slots_for_args (void);

#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel

#endif

+#undef TARGET_LEGITIMATE_ADDRESS_P

+#define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p

+#undef TARGET_INVALID_PARAMETER_TYPE

+#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type

+#undef TARGET_INVALID_RETURN_TYPE

+#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type

+#undef TARGET_PROMOTED_TYPE

+#define TARGET_PROMOTED_TYPE arm_promoted_type

+#undef TARGET_CONVERT_TO_TYPE

+#define TARGET_CONVERT_TO_TYPE arm_convert_to_type

+#undef TARGET_SCALAR_MODE_SUPPORTED_P

+#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p

+#undef TARGET_FRAME_POINTER_REQUIRED

+#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required

+#undef TARGET_CAN_ELIMINATE

+#define TARGET_CAN_ELIMINATE arm_can_eliminate

struct gcc_target targetm = TARGET_INITIALIZER;

/* Obstack for minipool constant handling. */

@@ -414,28 +522,24 @@ extern FILE * asm_out_file;

/* True if we are currently building a constant table. */

int making_const_table;

-/* Define the information needed to generate branch insns. This is

- stored from the compare operation. */

-rtx arm_compare_op0, arm_compare_op1;

/* The processor for which instructions should be scheduled. */

enum processor_type arm_tune = arm_none;

/* The default processor used if not overridden by commandline. */

static enum processor_type arm_default_cpu = arm_none;

-/* Which floating point model to use. */

-enum arm_fp_model arm_fp_model;

-/* Which floating point hardware is available. */

-enum fputype arm_fpu_arch;

/* Which floating point hardware to schedule for. */

-enum fputype arm_fpu_tune;

+int arm_fpu_attr;

+/* Which floating popint hardware to use. */

+const struct arm_fpu_desc *arm_fpu_desc;

/* Whether to use floating point hardware. */

enum float_abi_type arm_float_abi;

+/* Which __fp16 format to use. */

+enum arm_fp16_format_type arm_fp16_format;

/* Which ABI to use. */

enum arm_abi_type arm_abi;

@@ -474,6 +578,8 @@ static int thumb_call_reg_needed;

#define FL_DIV (1 << 18) /* Hardware divide. */

#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */

#define FL_NEON (1 << 20) /* Neon instructions. */

+#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M

+ architecture. */

#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */

@@ -495,9 +601,10 @@ static int thumb_call_reg_needed;

#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)

#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)

#define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)

-#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)

+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)

#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)

#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)

+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)

/* The bits in this mask specify which

instructions we are allowed to generate. */

@@ -534,6 +641,9 @@ int arm_arch6k = 0;

/* Nonzero if instructions not present in the 'M' profile can be used. */

int arm_arch_notm = 0;

+/* Nonzero if instructions present in ARMv7E-M can be used. */

+int arm_arch7em = 0;

/* Nonzero if this chip can benefit from load scheduling. */

int arm_ld_sched = 0;

@@ -583,10 +693,6 @@ enum machine_mode output_memory_reference_mode;

/* The register number to be used for the PIC offset register. */

unsigned arm_pic_register = INVALID_REGNUM;

-/* Set to 1 when a return insn is output, this means that the epilogue

- is not needed. */

-int return_used_this_function;

/* Set to 1 after arm_reorg has started. Reset to start at the start of

the next function. */

static int after_arm_reorg = 0;

@@ -594,6 +700,8 @@ static int after_arm_reorg = 0;

/* The maximum number of insns to be used when loading a constant. */

static int arm_constant_limit = 3;

+static enum arm_pcs arm_pcs_default;

/* For an explanation of these variables, see final_prescan_insn below. */

int arm_ccfsm_state;

/* arm_current_cc is also used for Thumb-2 cond_exec blocks. */

@@ -674,6 +782,7 @@ static const struct processors all_architectures[] =

{"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},

{"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},

{"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},

+ {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},

{"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},

{"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},

{"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},

@@ -708,44 +817,29 @@ static struct arm_cpu_select arm_select[] =

char arm_arch_name[] = "__ARM_ARCH_0UNK__";

-struct fpu_desc

- const char * name;

- enum fputype fpu;

-};

/* Available values for -mfpu=. */

-static const struct fpu_desc all_fpus[] =

- {"fpa", FPUTYPE_FPA},

- {"fpe2", FPUTYPE_FPA_EMU2},

- {"fpe3", FPUTYPE_FPA_EMU2},

- {"maverick", FPUTYPE_MAVERICK},

- {"vfp", FPUTYPE_VFP},

- {"vfp3", FPUTYPE_VFP3},

- {"vfpv3", FPUTYPE_VFP3},

- {"vfpv3-d16", FPUTYPE_VFP3D16},

- {"neon", FPUTYPE_NEON}

-};

-/* Floating point models used by the different hardware.

- See fputype in arm.h. */

-static const enum fputype fp_model_for_fpu[] =

- /* No FP hardware. */

- ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */

- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */

- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */

- ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */

- ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */

- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */

- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */

- ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */

- ARM_FP_MODEL_VFP /* FPUTYPE_NEON */

+static const struct arm_fpu_desc all_fpus[] =

+ {"fpa", ARM_FP_MODEL_FPA, 0, VFP_NONE, false, false},

+ {"fpe2", ARM_FP_MODEL_FPA, 2, VFP_NONE, false, false},

+ {"fpe3", ARM_FP_MODEL_FPA, 3, VFP_NONE, false, false},

+ {"maverick", ARM_FP_MODEL_MAVERICK, 0, VFP_NONE, false, false},

+ {"vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, false, false},

+ {"vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},

+ {"vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, true},

+ {"vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, false},

+ {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, false, true},

+ {"vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, false},

+ {"vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, false, true},

+ {"neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , false},

+ {"neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, true , true },

+ {"vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, false, true},

+ {"vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, false, true},

+ {"fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, false, true},

+ {"neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, true, true},

+ /* Compatibility aliases. */

+ {"vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, false, false},

};

@@ -766,6 +860,23 @@ static const struct float_abi all_float_abis[] =

};

+struct fp16_format

+ const char *name;

+ enum arm_fp16_format_type fp16_format_type;

+};

+/* Available values for -mfp16-format=. */

+static const struct fp16_format all_fp16_formats[] =

+ {"none", ARM_FP16_FORMAT_NONE},

+ {"ieee", ARM_FP16_FORMAT_IEEE},

+ {"alternative", ARM_FP16_FORMAT_ALTERNATIVE}

+};

struct abi_name

{

const char *name;

@@ -924,6 +1035,44 @@ arm_init_libfuncs (void)

set_optab_libfunc (smod_optab, SImode, NULL);

set_optab_libfunc (umod_optab, SImode, NULL);

+ /* Half-precision float operations. The compiler handles all operations

+ with NULL libfuncs by converting the SFmode. */

+ switch (arm_fp16_format)

+ {

+ case ARM_FP16_FORMAT_IEEE:

+ case ARM_FP16_FORMAT_ALTERNATIVE:

+ /* Conversions. */

+ set_conv_libfunc (trunc_optab, HFmode, SFmode,

+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE

+ ? "__gnu_f2h_ieee"

+ : "__gnu_f2h_alternative"));

+ set_conv_libfunc (sext_optab, SFmode, HFmode,

+ (arm_fp16_format == ARM_FP16_FORMAT_IEEE

+ ? "__gnu_h2f_ieee"

+ : "__gnu_h2f_alternative"));

+ /* Arithmetic. */

+ set_optab_libfunc (add_optab, HFmode, NULL);

+ set_optab_libfunc (sdiv_optab, HFmode, NULL);

+ set_optab_libfunc (smul_optab, HFmode, NULL);

+ set_optab_libfunc (neg_optab, HFmode, NULL);

+ set_optab_libfunc (sub_optab, HFmode, NULL);

+ /* Comparisons. */

+ set_optab_libfunc (eq_optab, HFmode, NULL);

+ set_optab_libfunc (ne_optab, HFmode, NULL);

+ set_optab_libfunc (lt_optab, HFmode, NULL);

+ set_optab_libfunc (le_optab, HFmode, NULL);

+ set_optab_libfunc (ge_optab, HFmode, NULL);

+ set_optab_libfunc (gt_optab, HFmode, NULL);

+ set_optab_libfunc (unord_optab, HFmode, NULL);

+ break;

+ default:

+ break;

+ }

if (TARGET_AAPCS_BASED)

synchronize_libfunc = init_one_libfunc ("__sync_synchronize");

}

@@ -959,13 +1108,15 @@ arm_build_builtin_va_list (void)

/* Create the type. */

va_list_type = lang_hooks.types.make_type (RECORD_TYPE);

/* Give it the required name. */

- va_list_name = build_decl (TYPE_DECL,

+ va_list_name = build_decl (BUILTINS_LOCATION,

+ TYPE_DECL,

get_identifier ("__va_list"),

va_list_type);

DECL_ARTIFICIAL (va_list_name) = 1;

TYPE_NAME (va_list_type) = va_list_name;

/* Create the __ap field. */

- ap_field = build_decl (FIELD_DECL,

+ ap_field = build_decl (BUILTINS_LOCATION,

+ FIELD_DECL,

get_identifier ("__ap"),

ptr_type_node);

DECL_ARTIFICIAL (ap_field) = 1;

@@ -1207,13 +1358,13 @@ arm_override_options (void)

const struct processors * sel;

unsigned int sought;

- selected_cpu = TARGET_CPU_DEFAULT;

+ selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT;

if (selected_cpu == arm_none)

{

#ifdef SUBTARGET_CPU_DEFAULT

/* Use the subtarget default CPU if none was specified by

configure. */

- selected_cpu = SUBTARGET_CPU_DEFAULT;

+ selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT;

#endif

/* Default to ARM6. */

if (selected_cpu == arm_none)

@@ -1295,6 +1446,23 @@ arm_override_options (void)

tune_flags = all_cores[(int)arm_tune].flags;

+ if (target_fp16_format_name)

+ {

+ for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)

+ {

+ if (streq (all_fp16_formats[i].name, target_fp16_format_name))

+ {

+ arm_fp16_format = all_fp16_formats[i].fp16_format_type;

+ break;

+ }

+ if (i == ARRAY_SIZE (all_fp16_formats))

+ error ("invalid __fp16 format option: -mfp16-format=%s",

+ target_fp16_format_name);

+ }

+ else

+ arm_fp16_format = ARM_FP16_FORMAT_NONE;

if (target_abi_name)

{

for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)

@@ -1387,6 +1555,7 @@ arm_override_options (void)

arm_arch6 = (insn_flags & FL_ARCH6) != 0;

arm_arch6k = (insn_flags & FL_ARCH6K) != 0;

arm_arch_notm = (insn_flags & FL_NOTM) != 0;

+ arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;

arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;

arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;

arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;

@@ -1438,7 +1607,6 @@ arm_override_options (void)

if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)

error ("iwmmxt abi requires an iwmmxt capable cpu");

- arm_fp_model = ARM_FP_MODEL_UNKNOWN;

if (target_fpu_name == NULL && target_fpe_name != NULL)

{

if (streq (target_fpe_name, "2"))

@@ -1449,46 +1617,56 @@ arm_override_options (void)

error ("invalid floating point emulation option: -mfpe=%s",

target_fpe_name);

}

- if (target_fpu_name != NULL)

- {

- /* The user specified a FPU. */

- for (i = 0; i < ARRAY_SIZE (all_fpus); i++)

- {

- if (streq (all_fpus[i].name, target_fpu_name))

- {

- arm_fpu_arch = all_fpus[i].fpu;

- arm_fpu_tune = arm_fpu_arch;

- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];

- break;

- }

- if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)

- error ("invalid floating point option: -mfpu=%s", target_fpu_name);

- }

- else

+ if (target_fpu_name == NULL)

{

#ifdef FPUTYPE_DEFAULT

- /* Use the default if it is specified for this platform. */

- arm_fpu_arch = FPUTYPE_DEFAULT;

- arm_fpu_tune = FPUTYPE_DEFAULT;

+ target_fpu_name = FPUTYPE_DEFAULT;

#else

- /* Pick one based on CPU type. */

- /* ??? Some targets assume FPA is the default.

- if ((insn_flags & FL_VFP) != 0)

- arm_fpu_arch = FPUTYPE_VFP;

- else

- */

if (arm_arch_cirrus)

- arm_fpu_arch = FPUTYPE_MAVERICK;

+ target_fpu_name = "maverick";

else

- arm_fpu_arch = FPUTYPE_FPA_EMU2;

+ target_fpu_name = "fpe2";

#endif

- if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)

- arm_fpu_tune = FPUTYPE_FPA;

+ }

+ arm_fpu_desc = NULL;

+ for (i = 0; i < ARRAY_SIZE (all_fpus); i++)

+ {

+ if (streq (all_fpus[i].name, target_fpu_name))

+ {

+ arm_fpu_desc = &all_fpus[i];

+ break;

+ }

+ if (!arm_fpu_desc)

+ {

+ error ("invalid floating point option: -mfpu=%s", target_fpu_name);

+ return;

+ }

+ switch (arm_fpu_desc->model)

+ {

+ case ARM_FP_MODEL_FPA:

+ if (arm_fpu_desc->rev == 2)

+ arm_fpu_attr = FPU_FPE2;

+ else if (arm_fpu_desc->rev == 3)

+ arm_fpu_attr = FPU_FPE3;

else

- arm_fpu_tune = arm_fpu_arch;

- arm_fp_model = fp_model_for_fpu[arm_fpu_arch];

- gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);

+ arm_fpu_attr = FPU_FPA;

+ break;

+ case ARM_FP_MODEL_MAVERICK:

+ arm_fpu_attr = FPU_MAVERICK;

+ break;

+ case ARM_FP_MODEL_VFP:

+ arm_fpu_attr = FPU_VFP;

+ break;

+ default:

+ gcc_unreachable();

}

if (target_float_abi_name != NULL)

@@ -1509,8 +1687,18 @@ arm_override_options (void)

else

arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;

- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)

- sorry ("-mfloat-abi=hard and VFP");

+ if (TARGET_AAPCS_BASED

+ && (arm_fpu_desc->model == ARM_FP_MODEL_FPA))

+ error ("FPA is unsupported in the AAPCS");

+ if (TARGET_AAPCS_BASED)

+ {

+ if (TARGET_CALLER_INTERWORKING)

+ error ("AAPCS does not support -mcaller-super-interworking");

+ else

+ if (TARGET_CALLEE_INTERWORKING)

+ error ("AAPCS does not support -mcallee-super-interworking");

+ }

/* FPA and iWMMXt are incompatible because the insn encodings overlap.

VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon

@@ -1522,15 +1710,40 @@ arm_override_options (void)

if (TARGET_THUMB2 && TARGET_IWMMXT)

sorry ("Thumb-2 iWMMXt");

+ /* __fp16 support currently assumes the core has ldrh. */

+ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)

+ sorry ("__fp16 and no ldrh");

/* If soft-float is specified then don't use FPU. */

if (TARGET_SOFT_FLOAT)

- arm_fpu_arch = FPUTYPE_NONE;

+ arm_fpu_attr = FPU_NONE;

+ if (TARGET_AAPCS_BASED)

+ {

+ if (arm_abi == ARM_ABI_IWMMXT)

+ arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;

+ else if (arm_float_abi == ARM_FLOAT_ABI_HARD

+ && TARGET_HARD_FLOAT

+ && TARGET_VFP)

+ arm_pcs_default = ARM_PCS_AAPCS_VFP;

+ else

+ arm_pcs_default = ARM_PCS_AAPCS;

+ }

+ else

+ {

+ if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)

+ sorry ("-mfloat-abi=hard and VFP");

+ if (arm_abi == ARM_ABI_APCS)

+ arm_pcs_default = ARM_PCS_APCS;

+ else

+ arm_pcs_default = ARM_PCS_ATPCS;

+ }

/* For arm2/3 there is no need to do any scheduling if there is only

a floating point emulator, or we are doing software floating-point. */

if ((TARGET_SOFT_FLOAT

- || arm_fpu_tune == FPUTYPE_FPA_EMU2

- || arm_fpu_tune == FPUTYPE_FPA_EMU3)

+ || (TARGET_FPA && arm_fpu_desc->rev))

&& (tune_flags & FL_MODE32) == 0)

flag_schedule_insns = flag_schedule_insns_after_reload = 0;

@@ -1549,7 +1762,7 @@ arm_override_options (void)

/* Use the cp15 method if it is available. */

if (target_thread_pointer == TP_AUTO)

{

- if (arm_arch6k && !TARGET_THUMB)

+ if (arm_arch6k && !TARGET_THUMB1)

target_thread_pointer = TP_CP15;

else

target_thread_pointer = TP_SOFT;

@@ -1620,8 +1833,7 @@ arm_override_options (void)

fix_cm3_ldrd = 0;

}

- /* ??? We might want scheduling for thumb2. */

- if (TARGET_THUMB && flag_schedule_insns)

+ if (TARGET_THUMB1 && flag_schedule_insns)

{

/* Don't warn since it's on by default in -O2. */

flag_schedule_insns = 0;

@@ -1655,12 +1867,15 @@ arm_override_options (void)

max_insns_skipped = 3;

}

- /* Ideally we would want to use CFI directives to generate

- debug info. However this also creates the .eh_frame

- section, so disable them until GAS can handle

- this properly. See PR40521. */

- if (TARGET_AAPCS_BASED)

- flag_dwarf2_cfi_asm = 0;

+ /* Hot/Cold partitioning is not currently supported, since we can't

+ handle literal pool placement in that case. */

+ if (flag_reorder_blocks_and_partition)

+ {

+ inform (input_location,

+ "-freorder-blocks-and-partition not supported on this architecture");

+ flag_reorder_blocks_and_partition = 0;

+ flag_reorder_blocks = 1;

+ }

/* Register global variables with the garbage collector. */

arm_add_gc_roots ();

@@ -1794,6 +2009,84 @@ arm_allocate_stack_slots_for_args (void)

}

+/* Output assembler code for a block containing the constant parts

+ of a trampoline, leaving space for the variable parts.

+ On the ARM, (if r8 is the static chain regnum, and remembering that

+ referencing pc adds an offset of 8) the trampoline looks like:

+ ldr r8, [pc, #0]

+ ldr pc, [pc]

+ .word static chain value

+ .word function's address

+ XXX FIXME: When the trampoline returns, r8 will be clobbered. */

+static void

+arm_asm_trampoline_template (FILE *f)

+ if (TARGET_ARM)

+ {

+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);

+ asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);

+ }

+ else if (TARGET_THUMB2)

+ {

+ /* The Thumb-2 trampoline is similar to the arm implementation.

+ Unlike 16-bit Thumb, we enter the stub in thumb mode. */

+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",

+ STATIC_CHAIN_REGNUM, PC_REGNUM);

+ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);

+ }

+ else

+ {

+ ASM_OUTPUT_ALIGN (f, 2);

+ fprintf (f, "\t.code\t16\n");

+ fprintf (f, ".Ltrampoline_start:\n");

+ asm_fprintf (f, "\tpush\t{r0, r1}\n");

+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);

+ asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);

+ asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);

+ asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);

+ asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);

+ }

+ assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);

+/* Emit RTL insns to initialize the variable parts of a trampoline. */

+static void

+arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)

+ rtx fnaddr, mem, a_tramp;

+ emit_block_move (m_tramp, assemble_trampoline_template (),

+ GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);

+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);

+ emit_move_insn (mem, chain_value);

+ mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);

+ fnaddr = XEXP (DECL_RTL (fndecl), 0);

+ emit_move_insn (mem, fnaddr);

+ a_tramp = XEXP (m_tramp, 0);

+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),

+ LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,

+ plus_constant (a_tramp, TRAMPOLINE_SIZE), Pmode);

+/* Thumb trampolines should be entered in thumb mode, so set

+ the bottom bit of the address. */

+static rtx

+arm_trampoline_adjust_address (rtx addr)

+ if (TARGET_THUMB)

+ addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,

+ NULL, 0, OPTAB_LIB_WIDEN);

+ return addr;

/* Return 1 if it is possible to return using a single instruction.

If SIBLING is non-null, this is a test for a return before a sibling

call. SIBLING is the call insn, so we can examine its register usage. */

@@ -2014,7 +2307,11 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)

case MINUS: /* Should only occur with (MINUS I reg) => rsb */

case XOR:

+ return 0;

case IOR:

+ if (TARGET_THUMB2)

+ return const_ok_for_arm (ARM_SIGN_EXTEND (~i));

return 0;

case AND:

@@ -2102,20 +2399,24 @@ arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,

1);

}

-/* Return the number of ARM instructions required to synthesize the given

- constant. */

+/* Return the number of instructions required to synthesize the given

+ constant, if we start emitting them from bit-position I. */

static int

count_insns_for_constant (HOST_WIDE_INT remainder, int i)

{

HOST_WIDE_INT temp1;

+ int step_size = TARGET_ARM ? 2 : 1;

int num_insns = 0;

+ gcc_assert (TARGET_ARM || i == 0);

{

int end;

if (i <= 0)

i += 32;

- if (remainder & (3 << (i - 2)))

+ if (remainder & (((1 << step_size) - 1) << (i - step_size)))

{

end = i - 8;

if (end < 0)

@@ -2124,13 +2425,77 @@ count_insns_for_constant (HOST_WIDE_INT remainder, int i)

| ((i < end) ? (0xff >> (32 - end)) : 0));

remainder &= ~temp1;

num_insns++;

- i -= 6;

+ i -= 8 - step_size;

}

- i -= 2;

+ i -= step_size;

} while (remainder);

return num_insns;

}

+static int

+find_best_start (unsigned HOST_WIDE_INT remainder)

+ int best_consecutive_zeros = 0;

+ int i;

+ int best_start = 0;

+ /* If we aren't targetting ARM, the best place to start is always at

+ the bottom. */

+ if (! TARGET_ARM)

+ return 0;

+ for (i = 0; i < 32; i += 2)

+ {

+ int consecutive_zeros = 0;

+ if (!(remainder & (3 << i)))

+ {

+ while ((i < 32) && !(remainder & (3 << i)))

+ {

+ consecutive_zeros += 2;

+ i += 2;

+ }

+ if (consecutive_zeros > best_consecutive_zeros)

+ {

+ best_consecutive_zeros = consecutive_zeros;

+ best_start = i - consecutive_zeros;

+ }

+ i -= 2;

+ }

+ /* So long as it won't require any more insns to do so, it's

+ desirable to emit a small constant (in bits 0...9) in the last

+ insn. This way there is more chance that it can be combined with

+ a later addressing insn to form a pre-indexed load or store

+ operation. Consider:

+ *((volatile int *)0xe0000100) = 1;

+ *((volatile int *)0xe0000110) = 2;

+ We want this to wind up as:

+ mov rA, #0xe0000000

+ mov rB, #1

+ str rB, [rA, #0x100]

+ mov rB, #2

+ str rB, [rA, #0x110]

+ rather than having to synthesize both large constants from scratch.

+ Therefore, we calculate how many insns would be required to emit

+ the constant starting from `best_start', and also starting from

+ zero (i.e. with bit 31 first to be output). If `best_start' doesn't

+ yield a shorter sequence, we may as well use zero. */

+ if (best_start != 0

+ && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)

+ && (count_insns_for_constant (remainder, 0) <=

+ count_insns_for_constant (remainder, best_start)))

+ best_start = 0;

+ return best_start;

/* Emit an instruction with the indicated PATTERN. If COND is

non-NULL, conditionalize the execution of the instruction on COND

being true. */

@@ -2154,6 +2519,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

{

int can_invert = 0;

int can_negate = 0;

+ int final_invert = 0;

int can_negate_initial = 0;

int can_shift = 0;

int i;

@@ -2165,6 +2531,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

int insns = 0;

unsigned HOST_WIDE_INT temp1, temp2;

unsigned HOST_WIDE_INT remainder = val & 0xffffffff;

+ int step_size = TARGET_ARM ? 2 : 1;

/* Find out which operations are safe for a given CODE. Also do a quick

check for degenerate cases; these can occur when DImode operations

@@ -2191,15 +2558,20 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

GEN_INT (ARM_SIGN_EXTEND (val))));

return 1;

}

if (remainder == 0)

{

if (reload_completed && rtx_equal_p (target, source))

return 0;

if (generate)

emit_constant_insn (cond,

gen_rtx_SET (VOIDmode, target, source));

return 1;

}

+ if (TARGET_THUMB2)

+ can_invert = 1;

break;

case AND:

@@ -2233,14 +2605,15 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

return 1;

}

- /* We don't know how to handle other cases yet. */

- gcc_assert (remainder == 0xffffffff);

- if (generate)

- emit_constant_insn (cond,

- gen_rtx_SET (VOIDmode, target,

- gen_rtx_NOT (mode, source)));

- return 1;

+ if (remainder == 0xffffffff)

+ {

+ if (generate)

+ emit_constant_insn (cond,

+ gen_rtx_SET (VOIDmode, target,

+ gen_rtx_NOT (mode, source)));

+ return 1;

+ }

+ break;

case MINUS:

/* We treat MINUS as (val - source), since (source - val) is always

@@ -2287,6 +2660,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

/* Calculate a few attributes that may be useful for specific

optimizations. */

+ /* Count number of leading zeros. */

for (i = 31; i >= 0; i--)

{

if ((remainder & (1 << i)) == 0)

@@ -2295,6 +2669,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

break;

}

+ /* Count number of leading 1's. */

for (i = 31; i >= 0; i--)

{

if ((remainder & (1 << i)) != 0)

@@ -2303,6 +2678,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

break;

}

+ /* Count number of trailing zero's. */

for (i = 0; i <= 31; i++)

{

if ((remainder & (1 << i)) == 0)

@@ -2311,6 +2687,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

break;

}

+ /* Count number of trailing 1's. */

for (i = 0; i <= 31; i++)

{

if ((remainder & (1 << i)) != 0)

@@ -2498,6 +2875,17 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

if (code == XOR)

break;

+ /* Convert.

+ x = y | constant ( which is composed of set_sign_bit_copies of leading 1s

+ and the remainder 0s for e.g. 0xfff00000)

+ x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)

+ This can be done in 2 instructions by using shifts with mov or mvn.

+ e.g. for

+ x = x | 0xfff00000;

+ we generate.

+ mvn r0, r0, asl #12

+ mvn r0, r0, lsr #12 */

if (set_sign_bit_copies > 8

&& (val & (-1 << (32 - set_sign_bit_copies))) == val)

{

@@ -2523,6 +2911,16 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

return 2;

}

+ /* Convert

+ x = y | constant (which has set_zero_bit_copies number of trailing ones).

+ to

+ x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).

+ For eg. r0 = r0 | 0xfff

+ mvn r0, r0, lsr #12

+ mvn r0, r0, asl #12

+ */

if (set_zero_bit_copies > 8

&& (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)

{

@@ -2548,6 +2946,13 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

return 2;

}

+ /* This will never be reached for Thumb2 because orn is a valid

+ instruction. This is for Thumb1 and the ARM 32 bit cases.

+ x = y | constant (such that ~constant is a valid constant)

+ Transform this to

+ x = ~(~y & ~constant).

+ */

if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))

{

if (generate)

@@ -2657,10 +3062,27 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

if (remainder & (1 << i))

num_bits_set++;

- if (code == AND || (can_invert && num_bits_set > 16))

- remainder = (~remainder) & 0xffffffff;

+ if ((code == AND)

+ || (code != IOR && can_invert && num_bits_set > 16))

+ remainder ^= 0xffffffff;

else if (code == PLUS && num_bits_set > 16)

remainder = (-remainder) & 0xffffffff;

+ /* For XOR, if more than half the bits are set and there's a sequence

+ of more than 8 consecutive ones in the pattern then we can XOR by the

+ inverted constant and then invert the final result; this may save an

+ instruction and might also lead to the final mvn being merged with

+ some other operation. */

+ else if (code == XOR && num_bits_set > 16

+ && (count_insns_for_constant (remainder ^ 0xffffffff,

+ find_best_start

+ (remainder ^ 0xffffffff))

+ < count_insns_for_constant (remainder,

+ find_best_start (remainder))))

+ {

+ remainder ^= 0xffffffff;

+ final_invert = 1;

+ }

else

{

can_invert = 0;

@@ -2679,63 +3101,8 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

/* ??? Use thumb2 replicated constants when the high and low halfwords are

the same. */

{

- int best_start = 0;

- if (!TARGET_THUMB2)

- {

- int best_consecutive_zeros = 0;

- for (i = 0; i < 32; i += 2)

- {

- int consecutive_zeros = 0;

- if (!(remainder & (3 << i)))

- {

- while ((i < 32) && !(remainder & (3 << i)))

- {

- consecutive_zeros += 2;

- i += 2;

- }

- if (consecutive_zeros > best_consecutive_zeros)

- {

- best_consecutive_zeros = consecutive_zeros;

- best_start = i - consecutive_zeros;

- }

- i -= 2;

- }

- /* So long as it won't require any more insns to do so, it's

- desirable to emit a small constant (in bits 0...9) in the last

- insn. This way there is more chance that it can be combined with

- a later addressing insn to form a pre-indexed load or store

- operation. Consider:

- *((volatile int *)0xe0000100) = 1;

- *((volatile int *)0xe0000110) = 2;

- We want this to wind up as:

- mov rA, #0xe0000000

- mov rB, #1

- str rB, [rA, #0x100]

- mov rB, #2

- str rB, [rA, #0x110]

- rather than having to synthesize both large constants from scratch.

- Therefore, we calculate how many insns would be required to emit

- the constant starting from `best_start', and also starting from

- zero (i.e. with bit 31 first to be output). If `best_start' doesn't

- yield a shorter sequence, we may as well use zero. */

- if (best_start != 0

- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)

- && (count_insns_for_constant (remainder, 0) <=

- count_insns_for_constant (remainder, best_start)))

- best_start = 0;

- }

/* Now start emitting the insns. */

- i = best_start;

+ i = find_best_start (remainder);

{

int end;

@@ -2763,7 +3130,7 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

}

else

{

- if (remainder && subtargets)

+ if ((final_invert || remainder) && subtargets)

new_src = gen_reg_rtx (mode);

else

new_src = target;

@@ -2798,21 +3165,23 @@ arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,

code = PLUS;

insns++;

- if (TARGET_ARM)

- i -= 6;

- else

- i -= 7;

+ i -= 8 - step_size;

}

/* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary

shifts. */

- if (TARGET_ARM)

- i -= 2;

- else

- i--;

+ i -= step_size;

}

while (remainder);

}

+ if (final_invert)

+ {

+ if (generate)

+ emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,

+ gen_rtx_NOT (mode, source)));

+ insns++;

+ }

return insns;

}

@@ -2884,17 +3253,22 @@ arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,

/* Define how to find the value returned by a function. */

-rtx

-arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)

+static rtx

+arm_function_value(const_tree type, const_tree func,

+ bool outgoing ATTRIBUTE_UNUSED)

{

enum machine_mode mode;

int unsignedp ATTRIBUTE_UNUSED;

rtx r ATTRIBUTE_UNUSED;

mode = TYPE_MODE (type);

+ if (TARGET_AAPCS_BASED)

+ return aapcs_allocate_return_reg (mode, type, func);

/* Promote integer types. */

if (INTEGRAL_TYPE_P (type))

- PROMOTE_FUNCTION_MODE (mode, unsignedp, type);

+ mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);

/* Promotes small structs returned in a register to full-word size

for big-endian AAPCS. */

@@ -2908,7 +3282,88 @@ arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)

}

- return LIBCALL_VALUE(mode);

+ return LIBCALL_VALUE (mode);

+static int

+libcall_eq (const void *p1, const void *p2)

+ return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);

+static hashval_t

+libcall_hash (const void *p1)

+ return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);

+static void

+add_libcall (htab_t htab, rtx libcall)

+ *htab_find_slot (htab, libcall, INSERT) = libcall;

+static bool

+arm_libcall_uses_aapcs_base (const_rtx libcall)

+ static bool init_done = false;

+ static htab_t libcall_htab;

+ if (!init_done)

+ {

+ init_done = true;

+ libcall_htab = htab_create (31, libcall_hash, libcall_eq,

+ NULL);

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfloat_optab, SFmode, SImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfloat_optab, DFmode, SImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfloat_optab, SFmode, DImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfloat_optab, DFmode, DImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufloat_optab, SFmode, SImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufloat_optab, DFmode, SImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufloat_optab, SFmode, DImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufloat_optab, DFmode, DImode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sext_optab, SFmode, HFmode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (trunc_optab, HFmode, SFmode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfix_optab, DImode, DFmode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufix_optab, DImode, DFmode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (sfix_optab, DImode, SFmode));

+ add_libcall (libcall_htab,

+ convert_optab_libfunc (ufix_optab, DImode, SFmode));

+ }

+ return libcall && htab_find (libcall_htab, libcall) != NULL;

+rtx

+arm_libcall_value (enum machine_mode mode, const_rtx libcall)

+ if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS

+ && GET_MODE_CLASS (mode) == MODE_FLOAT)

+ {

+ /* The following libcalls return their result in integer registers,

+ even though they return a floating point value. */

+ if (arm_libcall_uses_aapcs_base (libcall))

+ return gen_rtx_REG (mode, ARG_REGISTER(1));

+ }

+ return LIBCALL_VALUE (mode);

}

/* Determine the amount of memory needed to store the possible return

@@ -2918,10 +3373,12 @@ arm_apply_result_size (void)

{

int size = 16;

- if (TARGET_ARM)

+ if (TARGET_32BIT)

{

if (TARGET_HARD_FLOAT_ABI)

{

+ if (TARGET_VFP)

+ size += 32;

if (TARGET_FPA)

size += 12;

if (TARGET_MAVERICK)

@@ -2934,27 +3391,56 @@ arm_apply_result_size (void)

return size;

}

-/* Decide whether a type should be returned in memory (true)

- or in a register (false). This is called as the target hook

- TARGET_RETURN_IN_MEMORY. */

+/* Decide whether TYPE should be returned in memory (true)

+ or in a register (false). FNTYPE is the type of the function making

+ the call. */

static bool

-arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)

+arm_return_in_memory (const_tree type, const_tree fntype)

{

HOST_WIDE_INT size;

- size = int_size_in_bytes (type);

+ size = int_size_in_bytes (type); /* Negative if not fixed size. */

+ if (TARGET_AAPCS_BASED)

+ {

+ /* Simple, non-aggregate types (ie not including vectors and

+ complex) are always returned in a register (or registers).

+ We don't care about which register here, so we can short-cut

+ some of the detail. */

+ if (!AGGREGATE_TYPE_P (type)

+ && TREE_CODE (type) != VECTOR_TYPE

+ && TREE_CODE (type) != COMPLEX_TYPE)

+ return false;

+ /* Any return value that is no larger than one word can be

+ returned in r0. */

+ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)

+ return false;

+ /* Check any available co-processors to see if they accept the

+ type as a register candidate (VFP, for example, can return

+ some aggregates in consecutive registers). These aren't

+ available if the call is variadic. */

+ if (aapcs_select_return_coproc (type, fntype) >= 0)

+ return false;

+ /* Vector values should be returned using ARM registers, not

+ memory (unless they're over 16 bytes, which will break since

+ we only have four call-clobbered registers to play with). */

+ if (TREE_CODE (type) == VECTOR_TYPE)

+ return (size < 0 || size > (4 * UNITS_PER_WORD));

+ /* The rest go in memory. */

+ return true;

+ }

- /* Vector values should be returned using ARM registers, not memory (unless

- they're over 16 bytes, which will break since we only have four

- call-clobbered registers to play with). */

if (TREE_CODE (type) == VECTOR_TYPE)

return (size < 0 || size > (4 * UNITS_PER_WORD));

if (!AGGREGATE_TYPE_P (type) &&

- !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))

- /* All simple types are returned in registers.

- For AAPCS, complex types are treated the same as aggregates. */

- return 0;

+ (TREE_CODE (type) != VECTOR_TYPE))

+ /* All simple types are returned in registers. */

+ return false;

if (arm_abi != ARM_ABI_APCS)

{

@@ -2971,7 +3457,7 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)

the aggregate is either huge or of variable size, and in either case

we will want to return it via memory and not in a register. */

if (size < 0 || size > UNITS_PER_WORD)

- return 1;

+ return true;

if (TREE_CODE (type) == RECORD_TYPE)

{

@@ -2991,18 +3477,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)

continue;

if (field == NULL)

- return 0; /* An empty structure. Allowed by an extension to ANSI C. */

+ return false; /* An empty structure. Allowed by an extension to ANSI C. */

/* Check that the first field is valid for returning in a register. */

/* ... Floats are not allowed */

if (FLOAT_TYPE_P (TREE_TYPE (field)))

- return 1;

+ return true;

/* ... Aggregates that are not themselves valid for returning in

a register are not allowed. */

if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))

- return 1;

+ return true;

/* Now check the remaining fields, if any. Only bitfields are allowed,

since they are not addressable. */

@@ -3014,10 +3500,10 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)

continue;

if (!DECL_BIT_FIELD_TYPE (field))

- return 1;

+ return true;

}

- return 0;

+ return false;

}

if (TREE_CODE (type) == UNION_TYPE)

@@ -3034,18 +3520,18 @@ arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)

continue;

if (FLOAT_TYPE_P (TREE_TYPE (field)))

- return 1;

+ return true;

if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))

- return 1;

+ return true;

}

- return 0;

+ return false;

}

#endif /* not ARM_WINCE */

/* Return all other types in memory. */

- return 1;

+ return true;

}

/* Indicate whether or not words of a double are in big-endian order. */

@@ -3070,14 +3556,770 @@ arm_float_words_big_endian (void)

return 1;

}

+const struct pcs_attribute_arg

+ const char *arg;

+ enum arm_pcs value;

+} pcs_attribute_args[] =

+ {

+ {"aapcs", ARM_PCS_AAPCS},

+ {"aapcs-vfp", ARM_PCS_AAPCS_VFP},

+#if 0

+ /* We could recognize these, but changes would be needed elsewhere

+ * to implement them. */

+ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},

+ {"atpcs", ARM_PCS_ATPCS},

+ {"apcs", ARM_PCS_APCS},

+#endif

+ {NULL, ARM_PCS_UNKNOWN}

+ };

+static enum arm_pcs

+arm_pcs_from_attribute (tree attr)

+ const struct pcs_attribute_arg *ptr;

+ const char *arg;

+ /* Get the value of the argument. */

+ if (TREE_VALUE (attr) == NULL_TREE

+ || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)

+ return ARM_PCS_UNKNOWN;

+ arg = TREE_STRING_POINTER (TREE_VALUE (attr));

+ /* Check it against the list of known arguments. */

+ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)

+ if (streq (arg, ptr->arg))

+ return ptr->value;

+ /* An unrecognized interrupt type. */

+ return ARM_PCS_UNKNOWN;

+/* Get the PCS variant to use for this call. TYPE is the function's type

+ specification, DECL is the specific declartion. DECL may be null if

+ the call could be indirect or if this is a library call. */

+static enum arm_pcs

+arm_get_pcs_model (const_tree type, const_tree decl)

+ bool user_convention = false;

+ enum arm_pcs user_pcs = arm_pcs_default;

+ tree attr;

+ gcc_assert (type);

+ attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));

+ if (attr)

+ {

+ user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));

+ user_convention = true;

+ }

+ if (TARGET_AAPCS_BASED)

+ {

+ /* Detect varargs functions. These always use the base rules

+ (no argument is ever a candidate for a co-processor

+ register). */

+ bool base_rules = (TYPE_ARG_TYPES (type) != 0

+ && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))

+ != void_type_node));

+ if (user_convention)

+ {

+ if (user_pcs > ARM_PCS_AAPCS_LOCAL)

+ sorry ("Non-AAPCS derived PCS variant");

+ else if (base_rules && user_pcs != ARM_PCS_AAPCS)

+ error ("Variadic functions must use the base AAPCS variant");

+ }

+ if (base_rules)

+ return ARM_PCS_AAPCS;

+ else if (user_convention)

+ return user_pcs;

+ else if (decl && flag_unit_at_a_time)

+ {

+ /* Local functions never leak outside this compilation unit,

+ so we are free to use whatever conventions are

+ appropriate. */

+ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */

+ struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));

+ if (i && i->local)

+ return ARM_PCS_AAPCS_LOCAL;

+ }

+ else if (user_convention && user_pcs != arm_pcs_default)

+ sorry ("PCS variant");

+ /* For everything else we use the target's default. */

+ return arm_pcs_default;

+static void

+aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,

+ const_tree fntype ATTRIBUTE_UNUSED,

+ rtx libcall ATTRIBUTE_UNUSED,

+ const_tree fndecl ATTRIBUTE_UNUSED)

+ /* Record the unallocated VFP registers. */

+ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;

+ pcum->aapcs_vfp_reg_alloc = 0;

+/* Walk down the type tree of TYPE counting consecutive base elements.

+ If *MODEP is VOIDmode, then set it to the first valid floating point

+ type. If a non-floating point type is found, or if a floating point

+ type that doesn't match a non-VOIDmode *MODEP is found, then return -1,

+ otherwise return the count in the sub-tree. */

+static int

+aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)

+ enum machine_mode mode;

+ HOST_WIDE_INT size;

+ switch (TREE_CODE (type))

+ {

+ case REAL_TYPE:

+ mode = TYPE_MODE (type);

+ if (mode != DFmode && mode != SFmode)

+ return -1;

+ if (*modep == VOIDmode)

+ *modep = mode;

+ if (*modep == mode)

+ return 1;

+ break;

+ case COMPLEX_TYPE:

+ mode = TYPE_MODE (TREE_TYPE (type));

+ if (mode != DFmode && mode != SFmode)

+ return -1;

+ if (*modep == VOIDmode)

+ *modep = mode;

+ if (*modep == mode)

+ return 2;

+ break;

+ case VECTOR_TYPE:

+ /* Use V2SImode and V4SImode as representatives of all 64-bit

+ and 128-bit vector types, whether or not those modes are

+ supported with the present options. */

+ size = int_size_in_bytes (type);

+ switch (size)

+ {

+ case 8:

+ mode = V2SImode;

+ break;

+ case 16:

+ mode = V4SImode;

+ break;

+ default:

+ return -1;

+ }

+ if (*modep == VOIDmode)

+ *modep = mode;

+ /* Vector modes are considered to be opaque: two vectors are

+ equivalent for the purposes of being homogeneous aggregates

+ if they are the same size. */

+ if (*modep == mode)

+ return 1;

+ break;

+ case ARRAY_TYPE:

+ {

+ int count;

+ tree index = TYPE_DOMAIN (type);

+ /* Can't handle incomplete types. */

+ if (!COMPLETE_TYPE_P(type))

+ return -1;

+ count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);

+ if (count == -1

+ || !index

+ || !TYPE_MAX_VALUE (index)

+ || !host_integerp (TYPE_MAX_VALUE (index), 1)

+ || !TYPE_MIN_VALUE (index)

+ || !host_integerp (TYPE_MIN_VALUE (index), 1)

+ || count < 0)

+ return -1;

+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)

+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));

+ /* There must be no padding. */

+ if (!host_integerp (TYPE_SIZE (type), 1)

+ || (tree_low_cst (TYPE_SIZE (type), 1)

+ != count * GET_MODE_BITSIZE (*modep)))

+ return -1;

+ return count;

+ }

+ case RECORD_TYPE:

+ {

+ int count = 0;

+ int sub_count;

+ tree field;

+ /* Can't handle incomplete types. */

+ if (!COMPLETE_TYPE_P(type))

+ return -1;

+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))

+ {

+ if (TREE_CODE (field) != FIELD_DECL)

+ continue;

+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);

+ if (sub_count < 0)

+ return -1;

+ count += sub_count;

+ }

+ /* There must be no padding. */

+ if (!host_integerp (TYPE_SIZE (type), 1)

+ || (tree_low_cst (TYPE_SIZE (type), 1)

+ != count * GET_MODE_BITSIZE (*modep)))

+ return -1;

+ return count;

+ }

+ case UNION_TYPE:

+ case QUAL_UNION_TYPE:

+ {

+ /* These aren't very interesting except in a degenerate case. */

+ int count = 0;

+ int sub_count;

+ tree field;

+ /* Can't handle incomplete types. */

+ if (!COMPLETE_TYPE_P(type))

+ return -1;

+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))

+ {

+ if (TREE_CODE (field) != FIELD_DECL)

+ continue;

+ sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);

+ if (sub_count < 0)

+ return -1;

+ count = count > sub_count ? count : sub_count;

+ }

+ /* There must be no padding. */

+ if (!host_integerp (TYPE_SIZE (type), 1)

+ || (tree_low_cst (TYPE_SIZE (type), 1)

+ != count * GET_MODE_BITSIZE (*modep)))

+ return -1;

+ return count;

+ }

+ default:

+ break;

+ }

+ return -1;

+/* Return true if PCS_VARIANT should use VFP registers. */

+static bool

+use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)

+ if (pcs_variant == ARM_PCS_AAPCS_VFP)

+ return true;

+ if (pcs_variant != ARM_PCS_AAPCS_LOCAL)

+ return false;

+ return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&

+ (TARGET_VFP_DOUBLE || !is_double));

+static bool

+aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,

+ enum machine_mode mode, const_tree type,

+ enum machine_mode *base_mode, int *count)

+ enum machine_mode new_mode = VOIDmode;

+ if (GET_MODE_CLASS (mode) == MODE_FLOAT

+ || GET_MODE_CLASS (mode) == MODE_VECTOR_INT

+ || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)

+ {

+ *count = 1;

+ new_mode = mode;

+ }

+ else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)

+ {

+ *count = 2;

+ new_mode = (mode == DCmode ? DFmode : SFmode);

+ }

+ else if (type && (mode == BLKmode || TREE_CODE (type) == VECTOR_TYPE))

+ {

+ int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);

+ if (ag_count > 0 && ag_count <= 4)

+ *count = ag_count;

+ else

+ return false;

+ }

+ else

+ return false;

+ if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))

+ return false;

+ *base_mode = new_mode;

+ return true;

+static bool

+aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,

+ enum machine_mode mode, const_tree type)

+ int count ATTRIBUTE_UNUSED;

+ enum machine_mode ag_mode ATTRIBUTE_UNUSED;

+ if (!use_vfp_abi (pcs_variant, false))

+ return false;

+ return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,

+ &ag_mode, &count);

+static bool

+aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

+ const_tree type)

+ if (!use_vfp_abi (pcum->pcs_variant, false))

+ return false;

+ return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,

+ &pcum->aapcs_vfp_rmode,

+ &pcum->aapcs_vfp_rcount);

+static bool

+aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

+ const_tree type ATTRIBUTE_UNUSED)

+ int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);

+ unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;

+ int regno;

+ for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)

+ if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)

+ {

+ pcum->aapcs_vfp_reg_alloc = mask << regno;

+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))

+ {

+ int i;

+ int rcount = pcum->aapcs_vfp_rcount;

+ int rshift = shift;

+ enum machine_mode rmode = pcum->aapcs_vfp_rmode;

+ rtx par;

+ if (!TARGET_NEON)

+ {

+ /* Avoid using unsupported vector modes. */

+ if (rmode == V2SImode)

+ rmode = DImode;

+ else if (rmode == V4SImode)

+ {

+ rmode = DImode;

+ rcount *= 2;

+ rshift /= 2;

+ }

+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));

+ for (i = 0; i < rcount; i++)

+ {

+ rtx tmp = gen_rtx_REG (rmode,

+ FIRST_VFP_REGNUM + regno + i * rshift);

+ tmp = gen_rtx_EXPR_LIST

+ (VOIDmode, tmp,

+ GEN_INT (i * GET_MODE_SIZE (rmode)));

+ XVECEXP (par, 0, i) = tmp;

+ }

+ pcum->aapcs_reg = par;

+ }

+ else

+ pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);

+ return true;

+ }

+ return false;

+static rtx

+aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,

+ enum machine_mode mode,

+ const_tree type ATTRIBUTE_UNUSED)

+ if (!use_vfp_abi (pcs_variant, false))

+ return false;

+ if (mode == BLKmode || (mode == TImode && !TARGET_NEON))

+ {

+ int count;

+ enum machine_mode ag_mode;

+ int i;

+ rtx par;

+ int shift;

+ aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,

+ &ag_mode, &count);

+ if (!TARGET_NEON)

+ {

+ if (ag_mode == V2SImode)

+ ag_mode = DImode;

+ else if (ag_mode == V4SImode)

+ {

+ ag_mode = DImode;

+ count *= 2;

+ }

+ shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);

+ par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));

+ for (i = 0; i < count; i++)

+ {

+ rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);

+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,

+ GEN_INT (i * GET_MODE_SIZE (ag_mode)));

+ XVECEXP (par, 0, i) = tmp;

+ }

+ return par;

+ }

+ return gen_rtx_REG (mode, FIRST_VFP_REGNUM);

+static void

+aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,

+ enum machine_mode mode ATTRIBUTE_UNUSED,

+ const_tree type ATTRIBUTE_UNUSED)

+ pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;

+ pcum->aapcs_vfp_reg_alloc = 0;

+ return;

+#define AAPCS_CP(X) \

+ { \

+ aapcs_ ## X ## _cum_init, \

+ aapcs_ ## X ## _is_call_candidate, \

+ aapcs_ ## X ## _allocate, \

+ aapcs_ ## X ## _is_return_candidate, \

+ aapcs_ ## X ## _allocate_return_reg, \

+ aapcs_ ## X ## _advance \

+ }

+/* Table of co-processors that can be used to pass arguments in

+ registers. Idealy no arugment should be a candidate for more than

+ one co-processor table entry, but the table is processed in order

+ and stops after the first match. If that entry then fails to put

+ the argument into a co-processor register, the argument will go on

+ the stack. */

+static struct

+ /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */

+ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);

+ /* Return true if an argument of mode MODE (or type TYPE if MODE is

+ BLKmode) is a candidate for this co-processor's registers; this

+ function should ignore any position-dependent state in

+ CUMULATIVE_ARGS and only use call-type dependent information. */

+ bool (*is_call_candidate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);

+ /* Return true if the argument does get a co-processor register; it

+ should set aapcs_reg to an RTX of the register allocated as is

+ required for a return from FUNCTION_ARG. */

+ bool (*allocate) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);

+ /* Return true if a result of mode MODE (or type TYPE if MODE is

+ BLKmode) is can be returned in this co-processor's registers. */

+ bool (*is_return_candidate) (enum arm_pcs, enum machine_mode, const_tree);

+ /* Allocate and return an RTX element to hold the return type of a

+ call, this routine must not fail and will only be called if

+ is_return_candidate returned true with the same parameters. */

+ rtx (*allocate_return_reg) (enum arm_pcs, enum machine_mode, const_tree);

+ /* Finish processing this argument and prepare to start processing

+ the next one. */

+ void (*advance) (CUMULATIVE_ARGS *, enum machine_mode, const_tree);

+} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =

+ {

+ AAPCS_CP(vfp)

+ };

+#undef AAPCS_CP

+static int

+aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

+ tree type)

+ int i;

+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)

+ if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))

+ return i;

+ return -1;

+static int

+aapcs_select_return_coproc (const_tree type, const_tree fntype)

+ /* We aren't passed a decl, so we can't check that a call is local.

+ However, it isn't clear that that would be a win anyway, since it

+ might limit some tail-calling opportunities. */

+ enum arm_pcs pcs_variant;

+ if (fntype)

+ {

+ const_tree fndecl = NULL_TREE;

+ if (TREE_CODE (fntype) == FUNCTION_DECL)

+ {

+ fndecl = fntype;

+ fntype = TREE_TYPE (fntype);

+ }

+ pcs_variant = arm_get_pcs_model (fntype, fndecl);

+ }

+ else

+ pcs_variant = arm_pcs_default;

+ if (pcs_variant != ARM_PCS_AAPCS)

+ {

+ int i;

+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)

+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,

+ TYPE_MODE (type),

+ type))

+ return i;

+ }

+ return -1;

+static rtx

+aapcs_allocate_return_reg (enum machine_mode mode, const_tree type,

+ const_tree fntype)

+ /* We aren't passed a decl, so we can't check that a call is local.

+ However, it isn't clear that that would be a win anyway, since it

+ might limit some tail-calling opportunities. */

+ enum arm_pcs pcs_variant;

+ int unsignedp ATTRIBUTE_UNUSED;

+ if (fntype)

+ {

+ const_tree fndecl = NULL_TREE;

+ if (TREE_CODE (fntype) == FUNCTION_DECL)

+ {

+ fndecl = fntype;

+ fntype = TREE_TYPE (fntype);

+ }

+ pcs_variant = arm_get_pcs_model (fntype, fndecl);

+ }

+ else

+ pcs_variant = arm_pcs_default;

+ /* Promote integer types. */

+ if (type && INTEGRAL_TYPE_P (type))

+ mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);

+ if (pcs_variant != ARM_PCS_AAPCS)

+ {

+ int i;

+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)

+ if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,

+ type))

+ return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,

+ mode, type);

+ }

+ /* Promotes small structs returned in a register to full-word size

+ for big-endian AAPCS. */

+ if (type && arm_return_in_msb (type))

+ {

+ HOST_WIDE_INT size = int_size_in_bytes (type);

+ if (size % UNITS_PER_WORD != 0)

+ {

+ size += UNITS_PER_WORD - size % UNITS_PER_WORD;

+ mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);

+ }

+ return gen_rtx_REG (mode, R0_REGNUM);

+rtx

+aapcs_libcall_value (enum machine_mode mode)

+ return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);

+/* Lay out a function argument using the AAPCS rules. The rule

+ numbers referred to here are those in the AAPCS. */

+static void

+aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

+ tree type, int named)

+ int nregs, nregs2;

+ int ncrn;

+ /* We only need to do this once per argument. */

+ if (pcum->aapcs_arg_processed)

+ return;

+ pcum->aapcs_arg_processed = true;

+ /* Special case: if named is false then we are handling an incoming

+ anonymous argument which is on the stack. */

+ if (!named)

+ return;

+ /* Is this a potential co-processor register candidate? */

+ if (pcum->pcs_variant != ARM_PCS_AAPCS)

+ {

+ int slot = aapcs_select_call_coproc (pcum, mode, type);

+ pcum->aapcs_cprc_slot = slot;

+ /* We don't have to apply any of the rules from part B of the

+ preparation phase, these are handled elsewhere in the

+ compiler. */

+ if (slot >= 0)

+ {

+ /* A Co-processor register candidate goes either in its own

+ class of registers or on the stack. */

+ if (!pcum->aapcs_cprc_failed[slot])

+ {

+ /* C1.cp - Try to allocate the argument to co-processor

+ registers. */

+ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))

+ return;

+ /* C2.cp - Put the argument on the stack and note that we

+ can't assign any more candidates in this slot. We also

+ need to note that we have allocated stack space, so that

+ we won't later try to split a non-cprc candidate between

+ core registers and the stack. */

+ pcum->aapcs_cprc_failed[slot] = true;

+ pcum->can_split = false;

+ }

+ /* We didn't get a register, so this argument goes on the

+ stack. */

+ gcc_assert (pcum->can_split == false);

+ return;

+ }

+ /* C3 - For double-word aligned arguments, round the NCRN up to the

+ next even number. */

+ ncrn = pcum->aapcs_ncrn;

+ if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))

+ ncrn++;

+ nregs = ARM_NUM_REGS2(mode, type);

+ /* Sigh, this test should really assert that nregs > 0, but a GCC

+ extension allows empty structs and then gives them empty size; it

+ then allows such a structure to be passed by value. For some of

+ the code below we have to pretend that such an argument has

+ non-zero size so that we 'locate' it correctly either in

+ registers or on the stack. */

+ gcc_assert (nregs >= 0);

+ nregs2 = nregs ? nregs : 1;

+ /* C4 - Argument fits entirely in core registers. */

+ if (ncrn + nregs2 <= NUM_ARG_REGS)

+ {

+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);

+ pcum->aapcs_next_ncrn = ncrn + nregs;

+ return;

+ }

+ /* C5 - Some core registers left and there are no arguments already

+ on the stack: split this argument between the remaining core

+ registers and the stack. */

+ if (ncrn < NUM_ARG_REGS && pcum->can_split)

+ {

+ pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);

+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;

+ pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;

+ return;

+ }

+ /* C6 - NCRN is set to 4. */

+ pcum->aapcs_next_ncrn = NUM_ARG_REGS;

+ /* C7,C8 - arugment goes on the stack. We have nothing to do here. */

+ return;

/* Initialize a variable CUM of type CUMULATIVE_ARGS

for a call to a function whose data type is FNTYPE.

For a library call, FNTYPE is NULL. */

void

arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,

- rtx libname ATTRIBUTE_UNUSED,

+ rtx libname,

tree fndecl ATTRIBUTE_UNUSED)

{

+ /* Long call handling. */

+ if (fntype)

+ pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);

+ else

+ pcum->pcs_variant = arm_pcs_default;

+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)

+ {

+ if (arm_libcall_uses_aapcs_base (libname))

+ pcum->pcs_variant = ARM_PCS_AAPCS;

+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;

+ pcum->aapcs_reg = NULL_RTX;

+ pcum->aapcs_partial = 0;

+ pcum->aapcs_arg_processed = false;

+ pcum->aapcs_cprc_slot = -1;

+ pcum->can_split = true;

+ if (pcum->pcs_variant != ARM_PCS_AAPCS)

+ {

+ int i;

+ for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)

+ {

+ pcum->aapcs_cprc_failed[i] = false;

+ aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);

+ }

+ return;

+ }

+ /* Legacy ABIs */

/* On the ARM, the offset starts at 0. */

pcum->nregs = 0;

pcum->iwmmxt_nregs = 0;

@@ -3131,6 +4373,17 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

{

int nregs;

+ /* Handle the special case quickly. Pick an arbitrary value for op2 of

+ a call insn (op3 of a call_value insn). */

+ if (mode == VOIDmode)

+ return const0_rtx;

+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)

+ {

+ aapcs_layout_arg (pcum, mode, type, named);

+ return pcum->aapcs_reg;

+ }

/* Varargs vectors are treated the same as long long.

named_count avoids having to change the way arm handles 'named' */

if (TARGET_IWMMXT_ABI

@@ -3172,10 +4425,16 @@ arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

static int

arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

- tree type, bool named ATTRIBUTE_UNUSED)

+ tree type, bool named)

{

int nregs = pcum->nregs;

+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)

+ {

+ aapcs_layout_arg (pcum, mode, type, named);

+ return pcum->aapcs_partial;

+ }

if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))

return 0;

@@ -3187,6 +4446,39 @@ arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

return 0;

}

+void

+arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,

+ tree type, bool named)

+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)

+ {

+ aapcs_layout_arg (pcum, mode, type, named);

+ if (pcum->aapcs_cprc_slot >= 0)

+ {

+ aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,

+ type);

+ pcum->aapcs_cprc_slot = -1;

+ }

+ /* Generic stuff. */

+ pcum->aapcs_arg_processed = false;

+ pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;

+ pcum->aapcs_reg = NULL_RTX;

+ pcum->aapcs_partial = 0;

+ }

+ else

+ {

+ pcum->nargs += 1;

+ if (arm_vector_mode_supported_p (mode)

+ && pcum->named_count > pcum->nargs

+ && TARGET_IWMMXT_ABI)

+ pcum->iwmmxt_nregs += 1;

+ else

+ pcum->nregs += ARM_NUM_REGS2 (mode, type);

+ }

/* Variable sized types are passed by reference. This is a GCC

extension to the ARM ABI. */

@@ -3226,42 +4518,6 @@ arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)

arm_pragma_long_calls = OFF;

}

-/* Table of machine attributes. */

-const struct attribute_spec arm_attribute_table[] =

- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */

- /* Function calls made to this symbol must be done indirectly, because

- it may lie outside of the 26 bit addressing range of a normal function

- call. */

- { "long_call", 0, 0, false, true, true, NULL },

- /* Whereas these functions are always known to reside within the 26 bit

- addressing range. */

- { "short_call", 0, 0, false, true, true, NULL },

- /* Interrupt Service Routines have special prologue and epilogue requirements. */

- { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },

- { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },

- { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },

-#ifdef ARM_PE

- /* ARM/PE has three new attributes:

- interfacearm - ?

- dllexport - for exporting a function/variable that will live in a dll

- dllimport - for importing a function/variable from a dll

- Microsoft allows multiple declspecs in one __declspec, separating

- them with spaces. We do NOT support this. Instead, use __declspec

- multiple times.

- */

- { "dllimport", 0, 0, true, false, false, NULL },

- { "dllexport", 0, 0, true, false, false, NULL },

- { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },

-#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES

- { "dllimport", 0, 0, false, false, false, handle_dll_attribute },

- { "dllexport", 0, 0, false, false, false, handle_dll_attribute },

- { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },

-#endif

- { NULL, 0, 0, false, false, false, NULL }

-};

/* Handle an attribute requiring a FUNCTION_DECL;

arguments as in struct attribute_spec.handler. */

static tree

@@ -3270,8 +4526,8 @@ arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,

{

if (TREE_CODE (*node) != FUNCTION_DECL)

{

- warning (OPT_Wattributes, "%qs attribute only applies to functions",

- IDENTIFIER_POINTER (name));

+ warning (OPT_Wattributes, "%qE attribute only applies to functions",

+ name);

*no_add_attrs = true;

}

@@ -3288,8 +4544,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,

{

if (TREE_CODE (*node) != FUNCTION_DECL)

{

- warning (OPT_Wattributes, "%qs attribute only applies to functions",

- IDENTIFIER_POINTER (name));

+ warning (OPT_Wattributes, "%qE attribute only applies to functions",

+ name);

*no_add_attrs = true;

}

/* FIXME: the argument if any is checked for type attributes;

@@ -3302,8 +4558,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,

{

if (arm_isr_value (args) == ARM_FT_UNKNOWN)

{

- warning (OPT_Wattributes, "%qs attribute ignored",

- IDENTIFIER_POINTER (name));

+ warning (OPT_Wattributes, "%qE attribute ignored",

+ name);

*no_add_attrs = true;

}

@@ -3330,8 +4586,8 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,

}

else

{

- warning (OPT_Wattributes, "%qs attribute ignored",

- IDENTIFIER_POINTER (name));

+ warning (OPT_Wattributes, "%qE attribute ignored",

+ name);

}

@@ -3339,6 +4595,20 @@ arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,

return NULL_TREE;

}

+/* Handle a "pcs" attribute; arguments as in struct

+ attribute_spec.handler. */

+static tree

+arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,

+ int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)

+ if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)

+ {

+ warning (OPT_Wattributes, "%qE attribute ignored", name);

+ *no_add_attrs = true;

+ }

+ return NULL_TREE;

#if TARGET_DLLIMPORT_DECL_ATTRIBUTES

/* Handle the "notshared" attribute. This attribute is another way of

requesting hidden visibility. ARM's compiler supports

@@ -3500,7 +4770,7 @@ arm_is_long_call_p (tree decl)

/* Return nonzero if it is ok to make a tail-call to DECL. */

static bool

-arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)

+arm_function_ok_for_sibcall (tree decl, tree exp)

{

unsigned long func_type;

@@ -3533,6 +4803,21 @@ arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)

if (IS_INTERRUPT (func_type))

return false;

+ if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))

+ {

+ /* Check that the return value locations are the same. For

+ example that we aren't returning a value from the sibling in

+ a VFP register but then need to transfer it to a core

+ register. */

+ rtx a, b;

+ a = arm_function_value (TREE_TYPE (exp), decl, false);

+ b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),

+ cfun->decl, false);

+ if (!rtx_equal_p (a, b))

+ return false;

+ }

/* Never tailcall if function may be called with a misaligned SP. */

if (IS_STACKALIGN (func_type))

return false;

@@ -3580,7 +4865,7 @@ require_pic_register (void)

/* Play games to avoid marking the function as needing pic

if we are being called as part of the cost-estimation

process. */

- if (current_ir_type () != IR_GIMPLE)

+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)

crtl->uses_pic_offset_table = 1;

}

else

@@ -3593,7 +4878,7 @@ require_pic_register (void)

/* Play games to avoid marking the function as needing pic

if we are being called as part of the cost-estimation

process. */

- if (current_ir_type () != IR_GIMPLE)

+ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)

{

crtl->uses_pic_offset_table = 1;

start_sequence ();

@@ -3602,7 +4887,11 @@ require_pic_register (void)

seq = get_insns ();

end_sequence ();

- emit_insn_after (seq, entry_of_function ());

+ /* We can be called during expansion of PHI nodes, where

+ we can't yet emit instructions directly in the final

+ insn stream. Queue the insns on the entry edge, they will

+ be committed after everything else is expanded. */

+ insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));

}

@@ -3634,10 +4923,8 @@ legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)

else

address = reg;

- if (TARGET_ARM)

- emit_insn (gen_pic_load_addr_arm (address, orig));

- else if (TARGET_THUMB2)

- emit_insn (gen_pic_load_addr_thumb2 (address, orig));

+ if (TARGET_32BIT)

+ emit_insn (gen_pic_load_addr_32bit (address, orig));

else /* TARGET_THUMB1 */

emit_insn (gen_pic_load_addr_thumb1 (address, orig));

@@ -3814,7 +5101,7 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

{

pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);

pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);

- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));

+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));

emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));

@@ -3837,29 +5124,13 @@ arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)

UNSPEC_GOTSYM_OFF);

pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);

- if (TARGET_ARM)

- {

- emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));

- emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));

- }

- else if (TARGET_THUMB2)

+ if (TARGET_32BIT)

{

- /* Thumb-2 only allows very limited access to the PC. Calculate the

- address in a temporary register. */

- if (arm_pic_register != INVALID_REGNUM)

- {

- pic_tmp = gen_rtx_REG (SImode,

- thumb_find_work_register (saved_regs));

- }

+ emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));

+ if (TARGET_ARM)

+ emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));

else

- {

- gcc_assert (can_create_pseudo_p ());

- pic_tmp = gen_reg_rtx (Pmode);

- }

- emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));

- emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));

- emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));

+ emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));

}

else /* TARGET_THUMB1 */

{

@@ -3920,8 +5191,8 @@ pcrel_constant_p (rtx x)

/* Return nonzero if X is a valid ARM state address operand. */

int

-arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,

- int strict_p)

+arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,

+ int strict_p)

{

bool use_ldrd;

enum rtx_code code = GET_CODE (x);

@@ -4005,7 +5276,7 @@ arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,

}

/* Return nonzero if X is a valid Thumb-2 address operand. */

-int

+static int

thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)

{

bool use_ldrd;

@@ -4131,6 +5402,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,

if (GET_MODE_SIZE (mode) <= 4

&& ! (arm_arch4

&& (mode == HImode

+ || mode == HFmode

|| (mode == QImode && outer == SIGN_EXTEND))))

{

if (code == MULT)

@@ -4159,13 +5431,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,

load. */

if (arm_arch4)

{

- if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))

+ if (mode == HImode

+ || mode == HFmode

+ || (outer == SIGN_EXTEND && mode == QImode))

range = 256;

else

range = 4096;

}

else

- range = (mode == HImode) ? 4095 : 4096;

+ range = (mode == HImode || mode == HFmode) ? 4095 : 4096;

return (code == CONST_INT

&& INTVAL (index) < range

@@ -4226,15 +5500,17 @@ thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)

if (mode == DImode || mode == DFmode)

{

- HOST_WIDE_INT val = INTVAL (index);

- /* ??? Can we assume ldrd for thumb2? */

- /* Thumb-2 ldrd only has reg+const addressing modes. */

- if (code != CONST_INT)

+ if (code == CONST_INT)

+ {

+ HOST_WIDE_INT val = INTVAL (index);

+ /* ??? Can we assume ldrd for thumb2? */

+ /* Thumb-2 ldrd only has reg+const addressing modes. */

+ /* ldrd supports offsets of +-1020.

+ However the ldr fallback does not. */

+ return val > -256 && val < 256 && (val & 3) == 0;

+ }

+ else

return 0;

- /* ldrd supports offsets of +-1020.

- However the ldr fallback does not. */

- return val > -256 && val < 256 && (val & 3) == 0;

}

if (code == MULT)

@@ -4311,7 +5587,7 @@ thumb1_index_register_rtx_p (rtx x, int strict_p)

addresses based on the frame pointer or arg pointer until the

reload pass starts. This is so that eliminating such addresses

into stack based ones won't produce impossible code. */

-int

+static int

thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)

{

/* ??? Not clear if this is right. Experiment. */

@@ -4336,7 +5612,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)

return 1;

/* This is PC relative data after arm_reorg runs. */

- else if (GET_MODE_SIZE (mode) >= 4 && reload_completed

+ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)

+ && reload_completed

&& (GET_CODE (x) == LABEL_REF

|| (GET_CODE (x) == CONST

&& GET_CODE (XEXP (x, 0)) == PLUS

@@ -4425,6 +5702,17 @@ thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)

}

+bool

+arm_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)

+ if (TARGET_ARM)

+ return arm_legitimate_address_outer_p (mode, x, SET, strict_p);

+ else if (TARGET_THUMB2)

+ return thumb2_legitimate_address_p (mode, x, strict_p);

+ else /* if (TARGET_THUMB1) */

+ return thumb1_legitimate_address_p (mode, x, strict_p);

/* Build the SYMBOL_REF for __tls_get_addr. */

static GTY(()) rtx tls_get_addr_libfunc;

@@ -4499,14 +5787,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)

if (TARGET_ARM)

emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));

else if (TARGET_THUMB2)

- {

- rtx tmp;

- /* Thumb-2 only allows very limited access to the PC. Calculate

- the address in a temporary register. */

- tmp = gen_reg_rtx (SImode);

- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));

- emit_insn (gen_addsi3(reg, reg, tmp));

- }

+ emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));

else /* TARGET_THUMB1 */

emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));

@@ -4562,15 +5843,7 @@ legitimize_tls_address (rtx x, rtx reg)

if (TARGET_ARM)

emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));

else if (TARGET_THUMB2)

- {

- rtx tmp;

- /* Thumb-2 only allows very limited access to the PC. Calculate

- the address in a temporary register. */

- tmp = gen_reg_rtx (SImode);

- emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));

- emit_insn (gen_addsi3(reg, reg, tmp));

- emit_move_insn (reg, gen_const_mem (SImode, reg));

- }

+ emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));

else

{

emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));

@@ -4601,6 +5874,14 @@ legitimize_tls_address (rtx x, rtx reg)

rtx

arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)

{

+ if (!TARGET_ARM)

+ {

+ /* TODO: legitimize_address for Thumb2. */

+ if (TARGET_THUMB2)

+ return x;

+ return thumb_legitimize_address (x, orig_x, mode);

+ }

if (arm_tls_symbol_p (x))

return legitimize_tls_address (x, NULL_RTX);

@@ -4652,7 +5933,7 @@ arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)

}

/* XXX We don't allow MINUS any more -- see comment in

- arm_legitimate_address_p (). */

+ arm_legitimate_address_outer_p (). */

else if (GET_CODE (x) == MINUS)

{

rtx xop0 = XEXP (x, 0);

@@ -4799,7 +6080,7 @@ thumb_legitimize_reload_address (rtx *x_p,

x = copy_rtx (x);

push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),

- Pmode, VOIDmode, 0, 0, opnum, type);

+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);

return x;

}

@@ -4816,7 +6097,7 @@ thumb_legitimize_reload_address (rtx *x_p,

x = copy_rtx (x);

push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),

- Pmode, VOIDmode, 0, 0, opnum, type);

+ Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type);

return x;

}

@@ -4944,9 +6225,18 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)

else if ((outer == PLUS || outer == COMPARE)

&& INTVAL (x) < 256 && INTVAL (x) > -256)

return 0;

- else if (outer == AND

+ else if ((outer == IOR || outer == XOR || outer == AND)

&& INTVAL (x) < 256 && INTVAL (x) >= -256)

return COSTS_N_INSNS (1);

+ else if (outer == AND)

+ {

+ int i;

+ /* This duplicates the tests in the andsi3 expander. */

+ for (i = 9; i <= 31; i++)

+ if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)

+ || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))

+ return COSTS_N_INSNS (2);

+ }

else if (outer == ASHIFT || outer == ASHIFTRT

|| outer == LSHIFTRT)

return 0;

@@ -5035,7 +6325,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

case UMOD:

if (TARGET_HARD_FLOAT && mode == SFmode)

*total = COSTS_N_INSNS (2);

- else if (TARGET_HARD_FLOAT && mode == DFmode)

+ else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)

*total = COSTS_N_INSNS (4);

else

*total = COSTS_N_INSNS (20);

@@ -5113,7 +6403,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE

@@ -5154,10 +6446,17 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

return true;

}

+ /* A shift as a part of RSB costs no more than RSB itself. */

+ if (GET_CODE (XEXP (x, 0)) == MULT

+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))

+ {

+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, speed);

+ *total += rtx_cost (XEXP (x, 1), code, speed);

+ return true;

+ }

if (subcode == MULT

- && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT

- && ((INTVAL (XEXP (XEXP (x, 1), 1)) &

- (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))

+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))

{

*total += rtx_cost (XEXP (x, 0), code, speed);

*total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);

@@ -5193,9 +6492,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

multiplication by a power of two, so that we fall down into

the code below. */

if (GET_CODE (XEXP (x, 0)) == MULT

- && ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT

- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &

- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))

+ && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))

{

/* The cost comes from the cost of the multiply. */

return false;

@@ -5203,7 +6500,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE

@@ -5278,9 +6577,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

}

if (subcode == MULT

- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT

- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &

- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))

+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))

{

*total += rtx_cost (XEXP (x, 1), code, speed);

*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);

@@ -5318,7 +6615,9 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

case NEG:

if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -5337,9 +6636,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

|| subcode == LSHIFTRT

|| subcode == ROTATE || subcode == ROTATERT

|| (subcode == MULT

- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT

- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &

- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))

+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))

{

*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);

/* Register shifts cost an extra cycle. */

@@ -5447,9 +6744,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

}

if (subcode == MULT

- && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT

- && ((INTVAL (XEXP (XEXP (x, 0), 1)) &

- (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))

+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))

{

*total += rtx_cost (XEXP (x, 1), code, speed);

*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);

@@ -5469,9 +6764,11 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

return true;

case ABS:

- if (GET_MODE_CLASS (mode == MODE_FLOAT))

+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -5574,7 +6871,8 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)

return true;

case CONST_DOUBLE:

- if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))

+ if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)

+ && (mode == SFmode || !TARGET_VFP_SINGLE))

*total = COSTS_N_INSNS (1);

else

*total = COSTS_N_INSNS (4);

@@ -5649,7 +6947,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

return false;

case MINUS:

- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)

+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT

+ && (mode == SFmode || !TARGET_VFP_SINGLE))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -5679,12 +6978,23 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

return false;

case PLUS:

- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)

+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT

+ && (mode == SFmode || !TARGET_VFP_SINGLE))

{

*total = COSTS_N_INSNS (1);

return false;

}

+ /* A shift as a part of ADD costs nothing. */

+ if (GET_CODE (XEXP (x, 0)) == MULT

+ && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))

+ {

+ *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);

+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, false);

+ *total += rtx_cost (XEXP (x, 1), code, false);

+ return true;

+ }

/* Fall through */

case AND: case XOR: case IOR:

if (mode == SImode)

@@ -5709,7 +7019,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

return false;

case NEG:

- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)

+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT

+ && (mode == SFmode || !TARGET_VFP_SINGLE))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -5733,7 +7044,8 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

return false;

case ABS:

- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)

+ if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT

+ && (mode == SFmode || !TARGET_VFP_SINGLE))

*total = COSTS_N_INSNS (1);

else

*total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));

@@ -5778,7 +7090,10 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

case CONST_INT:

if (const_ok_for_arm (INTVAL (x)))

- *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);

+ /* A multiplication by a constant requires another instruction

+ to load the constant to a register. */

+ *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)

+ ? 1 : 0);

else if (const_ok_for_arm (~INTVAL (x)))

*total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);

else if (const_ok_for_arm (-INTVAL (x)))

@@ -5825,10 +7140,12 @@ arm_rtx_costs (rtx x, int code, int outer_code, int *total,

bool speed)

{

if (!speed)

- return arm_size_rtx_costs (x, code, outer_code, total);

+ return arm_size_rtx_costs (x, (enum rtx_code) code,

+ (enum rtx_code) outer_code, total);

else

- return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total,

- speed);

+ return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code,

+ (enum rtx_code) outer_code,

+ total, speed);

}

/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not

@@ -5950,7 +7267,9 @@ arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -6107,7 +7426,9 @@ arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,

if (GET_MODE_CLASS (mode) == MODE_FLOAT)

{

- if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))

+ if (TARGET_HARD_FLOAT

+ && (mode == SFmode

+ || (mode == DFmode && !TARGET_VFP_SINGLE)))

{

*total = COSTS_N_INSNS (1);

return false;

@@ -6135,9 +7456,9 @@ arm_arm_address_cost (rtx x)

if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)

return 10;

- if (c == PLUS || c == MINUS)

+ if (c == PLUS)

{

- if (GET_CODE (XEXP (x, 0)) == CONST_INT)

+ if (GET_CODE (XEXP (x, 1)) == CONST_INT)

return 2;

if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))

@@ -6753,25 +8074,171 @@ neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,

}

-/* Initialize a vector with non-constant elements. FIXME: We can do better

- than the current implementation (building a vector on the stack and then

- loading it) in many cases. See rs6000.c. */

+/* If VALS is a vector constant that can be loaded into a register

+ using VDUP, generate instructions to do so and return an RTX to

+ assign to the register. Otherwise return NULL_RTX. */

+static rtx

+neon_vdup_constant (rtx vals)

+ enum machine_mode mode = GET_MODE (vals);

+ enum machine_mode inner_mode = GET_MODE_INNER (mode);

+ int n_elts = GET_MODE_NUNITS (mode);

+ bool all_same = true;

+ rtx x;

+ int i;

+ if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)

+ return NULL_RTX;

+ for (i = 0; i < n_elts; ++i)

+ {

+ x = XVECEXP (vals, 0, i);

+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))

+ all_same = false;

+ }

+ if (!all_same)

+ /* The elements are not all the same. We could handle repeating

+ patterns of a mode larger than INNER_MODE here (e.g. int8x8_t

+ {0, C, 0, C, 0, C, 0, C} which can be loaded using

+ vdup.i16). */

+ return NULL_RTX;

+ /* We can load this constant by using VDUP and a constant in a

+ single ARM register. This will be cheaper than a vector

+ load. */

+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));

+ return gen_rtx_UNSPEC (mode, gen_rtvec (1, x),

+ UNSPEC_VDUP_N);

+/* Generate code to load VALS, which is a PARALLEL containing only

+ constants (for vec_init) or CONST_VECTOR, efficiently into a

+ register. Returns an RTX to copy into the register, or NULL_RTX

+ for a PARALLEL that can not be converted into a CONST_VECTOR. */

+rtx

+neon_make_constant (rtx vals)

+ enum machine_mode mode = GET_MODE (vals);

+ rtx target;

+ rtx const_vec = NULL_RTX;

+ int n_elts = GET_MODE_NUNITS (mode);

+ int n_const = 0;

+ int i;

+ if (GET_CODE (vals) == CONST_VECTOR)

+ const_vec = vals;

+ else if (GET_CODE (vals) == PARALLEL)

+ {

+ /* A CONST_VECTOR must contain only CONST_INTs and

+ CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).

+ Only store valid constants in a CONST_VECTOR. */

+ for (i = 0; i < n_elts; ++i)

+ {

+ rtx x = XVECEXP (vals, 0, i);

+ if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)

+ n_const++;

+ }

+ if (n_const == n_elts)

+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));

+ }

+ else

+ gcc_unreachable ();

+ if (const_vec != NULL

+ && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))

+ /* Load using VMOV. On Cortex-A8 this takes one cycle. */

+ return const_vec;

+ else if ((target = neon_vdup_constant (vals)) != NULL_RTX)

+ /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON

+ pipeline cycle; creating the constant takes one or two ARM

+ pipeline cycles. */

+ return target;

+ else if (const_vec != NULL_RTX)

+ /* Load from constant pool. On Cortex-A8 this takes two cycles

+ (for either double or quad vectors). We can not take advantage

+ of single-cycle VLD1 because we need a PC-relative addressing

+ mode. */

+ return const_vec;

+ else

+ /* A PARALLEL containing something not valid inside CONST_VECTOR.

+ We can not construct an initializer. */

+ return NULL_RTX;

+/* Initialize vector TARGET to VALS. */

void

neon_expand_vector_init (rtx target, rtx vals)

{

enum machine_mode mode = GET_MODE (target);

- enum machine_mode inner = GET_MODE_INNER (mode);

- unsigned int i, n_elts = GET_MODE_NUNITS (mode);

- rtx mem;

+ enum machine_mode inner_mode = GET_MODE_INNER (mode);

+ int n_elts = GET_MODE_NUNITS (mode);

+ int n_var = 0, one_var = -1;

+ bool all_same = true;

+ rtx x, mem;

+ int i;

+ for (i = 0; i < n_elts; ++i)

+ {

+ x = XVECEXP (vals, 0, i);

+ if (!CONSTANT_P (x))

+ ++n_var, one_var = i;

+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))

+ all_same = false;

+ }

+ if (n_var == 0)

+ {

+ rtx constant = neon_make_constant (vals);

+ if (constant != NULL_RTX)

+ {

+ emit_move_insn (target, constant);

+ return;

+ }

- gcc_assert (VECTOR_MODE_P (mode));

+ /* Splat a single non-constant element if we can. */

+ if (all_same && GET_MODE_SIZE (inner_mode) <= 4)

+ {

+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));

+ emit_insn (gen_rtx_SET (VOIDmode, target,

+ gen_rtx_UNSPEC (mode, gen_rtvec (1, x),

+ UNSPEC_VDUP_N)));

+ return;

+ }

+ /* One field is non-constant. Load constant then overwrite varying

+ field. This is more efficient than using the stack. */

+ if (n_var == 1)

+ {

+ rtx copy = copy_rtx (vals);

+ rtvec ops;

+ /* Load constant part of vector, substitute neighboring value for

+ varying element. */

+ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);

+ neon_expand_vector_init (target, copy);

+ /* Insert variable. */

+ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));

+ ops = gen_rtvec (3, x, target, GEN_INT (one_var));

+ emit_insn (gen_rtx_SET (VOIDmode, target,

+ gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE)));

+ return;

+ }

+ /* Construct the vector in memory one field at a time

+ and load the whole vector. */

mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);

for (i = 0; i < n_elts; i++)

- emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),

- XVECEXP (vals, 0, i));

+ emit_move_insn (adjust_address_nv (mem, inner_mode,

+ i * GET_MODE_SIZE (inner_mode)),

+ XVECEXP (vals, 0, i));

emit_move_insn (target, mem);

}

@@ -6930,10 +8397,13 @@ arm_coproc_mem_operand (rtx op, bool wb)

}

/* Return TRUE if OP is a memory operand which we can load or store a vector

- to/from. If CORE is true, we're moving from ARM registers not Neon

- registers. */

+ to/from. TYPE is one of the following values:

+ 0 - Vector load/stor (vldr)

+ 1 - Core registers (ldm)

+ 2 - Element/structure loads (vld1)

+ */

int

-neon_vector_mem_operand (rtx op, bool core)

+neon_vector_mem_operand (rtx op, int type)

{

rtx ind;

@@ -6966,23 +8436,15 @@ neon_vector_mem_operand (rtx op, bool core)

return arm_address_register_rtx_p (ind, 0);

/* Allow post-increment with Neon registers. */

- if (!core && GET_CODE (ind) == POST_INC)

+ if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC))

return arm_address_register_rtx_p (XEXP (ind, 0), 0);

-#if 0

- /* FIXME: We can support this too if we use VLD1/VST1. */

- if (!core

- && GET_CODE (ind) == POST_MODIFY

- && arm_address_register_rtx_p (XEXP (ind, 0), 0)

- && GET_CODE (XEXP (ind, 1)) == PLUS

- && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))

- ind = XEXP (ind, 1);

-#endif

+ /* FIXME: vld1 allows register post-modify. */

/* Match:

(plus (reg)

(const)). */

- if (!core

+ if (type == 0

&& GET_CODE (ind) == PLUS

&& GET_CODE (XEXP (ind, 0)) == REG

&& REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)

@@ -7049,10 +8511,19 @@ arm_eliminable_register (rtx x)

enum reg_class

coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)

{

+ if (mode == HFmode)

+ {

+ if (!TARGET_NEON_FP16)

+ return GENERAL_REGS;

+ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))

+ return NO_REGS;

+ return GENERAL_REGS;

+ }

if (TARGET_NEON

&& (GET_MODE_CLASS (mode) == MODE_VECTOR_INT

|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)

- && neon_vector_mem_operand (x, FALSE))

+ && neon_vector_mem_operand (x, 0))

return NO_REGS;

if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))

@@ -7330,28 +8801,21 @@ tls_mentioned_p (rtx x)

}

-/* Must not copy a SET whose source operand is PC-relative. */

+/* Must not copy any rtx that uses a pc-relative address. */

+static int

+arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)

+ if (GET_CODE (*x) == UNSPEC

+ && XINT (*x, 1) == UNSPEC_PIC_BASE)

+ return 1;

+ return 0;

static bool

arm_cannot_copy_insn_p (rtx insn)

{

- rtx pat = PATTERN (insn);

- if (GET_CODE (pat) == SET)

- {

- rtx rhs = SET_SRC (pat);

- if (GET_CODE (rhs) == UNSPEC

- && XINT (rhs, 1) == UNSPEC_PIC_BASE)

- return TRUE;

- if (GET_CODE (rhs) == MEM

- && GET_CODE (XEXP (rhs, 0)) == UNSPEC

- && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)

- return TRUE;

- }

- return FALSE;

+ return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);

}

enum rtx_code

@@ -7412,7 +8876,7 @@ adjacent_mem_locations (rtx a, rtx b)

/* Don't accept any offset that will require multiple

instructions to handle, since this would cause the

arith_adjacentmem pattern to output an overlong sequence. */

- if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))

+ if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))

return 0;

/* Don't allow an eliminable register: register elimination can make

@@ -8330,7 +9794,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)

/* A compare with a shifted operand. Because of canonicalization, the

comparison will have to be swapped when we emit the assembler. */

- if (GET_MODE (y) == SImode && GET_CODE (y) == REG

+ if (GET_MODE (y) == SImode

+ && (REG_P (y) || (GET_CODE (y) == SUBREG))

&& (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT

|| GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE

|| GET_CODE (x) == ROTATERT))

@@ -8338,7 +9803,8 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)

/* This operation is performed swapped, but since we only rely on the Z

flag we don't need an additional mode. */

- if (GET_MODE (y) == SImode && REG_P (y)

+ if (GET_MODE (y) == SImode

+ && (REG_P (y) || (GET_CODE (y) == SUBREG))

&& GET_CODE (x) == NEG

&& (op == EQ || op == NE))

return CC_Zmode;

@@ -10184,9 +11650,14 @@ vfp_emit_fstmd (int base_reg, int count)

XVECEXP (par, 0, 0)

= gen_rtx_SET (VOIDmode,

- gen_frame_mem (BLKmode,

- gen_rtx_PRE_DEC (BLKmode,

- stack_pointer_rtx)),

+ gen_frame_mem

+ (BLKmode,

+ gen_rtx_PRE_MODIFY (Pmode,

+ stack_pointer_rtx,

+ plus_constant

+ (stack_pointer_rtx,

+ - (count * 8)))

+ ),

gen_rtx_UNSPEC (BLKmode,

gen_rtvec (1, reg),

UNSPEC_PUSH_MULT));

@@ -10218,8 +11689,7 @@ vfp_emit_fstmd (int base_reg, int count)

}

par = emit_insn (par);

- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,

- REG_NOTES (par));

+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);

RTX_FRAME_RELATED_P (par) = 1;

return count * 8;

@@ -10273,11 +11743,14 @@ output_call (rtx *operands)

return "";

}

-/* Output a 'call' insn that is a reference in memory. */

+/* Output a 'call' insn that is a reference in memory. This is

+ disabled for ARMv5 and we prefer a blx instead because otherwise

+ there's a significant performance overhead. */

const char *

output_call_mem (rtx *operands)

{

- if (TARGET_INTERWORK && !arm_arch5)

+ gcc_assert (!arm_arch5);

+ if (TARGET_INTERWORK)

{

output_asm_insn ("ldr%?\t%|ip, %0", operands);

output_asm_insn ("mov%?\t%|lr, %|pc", operands);

@@ -10289,16 +11762,11 @@ output_call_mem (rtx *operands)

first instruction. It's safe to use IP as the target of the

load since the call will kill it anyway. */

output_asm_insn ("ldr%?\t%|ip, %0", operands);

- if (arm_arch5)

- output_asm_insn ("blx%?\t%|ip", operands);

+ output_asm_insn ("mov%?\t%|lr, %|pc", operands);

+ if (arm_arch4t)

+ output_asm_insn ("bx%?\t%|ip", operands);

else

- {

- output_asm_insn ("mov%?\t%|lr, %|pc", operands);

- if (arm_arch4t)

- output_asm_insn ("bx%?\t%|ip", operands);

- else

- output_asm_insn ("mov%?\t%|pc, %|ip", operands);

- }

+ output_asm_insn ("mov%?\t%|pc, %|ip", operands);

}

else

{

@@ -10385,14 +11853,23 @@ output_mov_long_double_arm_from_arm (rtx *operands)

return "";

}

-/* Emit a MOVW/MOVT pair. */

-void arm_emit_movpair (rtx dest, rtx src)

- emit_set_insn (dest, gen_rtx_HIGH (SImode, src));

- emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));

+void

+arm_emit_movpair (rtx dest, rtx src)

+ {

+ /* If the src is an immediate, simplify it. */

+ if (CONST_INT_P (src))

+ {

+ HOST_WIDE_INT val = INTVAL (src);

+ emit_set_insn (dest, GEN_INT (val & 0x0000ffff));

+ if ((val >> 16) & 0x0000ffff)

+ emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),

+ GEN_INT (16)),

+ GEN_INT ((val >> 16) & 0x0000ffff));

+ return;

+ }

+ emit_set_insn (dest, gen_rtx_HIGH (SImode, src));

+ emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));

+ }

/* Output a move from arm registers to an fpa registers.

OPERANDS[0] is an fpa register.

@@ -10769,7 +12246,7 @@ output_move_double (rtx *operands)

}

/* Output a move, load or store for quad-word vectors in ARM registers. Only

- handles MEMs accepted by neon_vector_mem_operand with CORE=true. */

+ handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */

const char *

output_move_quad (rtx *operands)

@@ -10965,6 +12442,13 @@ output_move_neon (rtx *operands)

ops[1] = reg;

break;

+ case PRE_DEC:

+ /* FIXME: We should be using vld1/vst1 here in BE mode? */

+ templ = "v%smdb%%?\t%%0!, %%h1";

+ ops[0] = XEXP (addr, 0);

+ ops[1] = reg;

+ break;

case POST_MODIFY:

/* FIXME: Not currently enabled in neon_vector_mem_operand. */

gcc_unreachable ();

@@ -11014,6 +12498,56 @@ output_move_neon (rtx *operands)

return "";

}

+/* Compute and return the length of neon_mov<mode>, where <mode> is

+ one of VSTRUCT modes: EI, OI, CI or XI. */

+int

+arm_attr_length_move_neon (rtx insn)

+ rtx reg, mem, addr;

+ int load;

+ enum machine_mode mode;

+ extract_insn_cached (insn);

+ if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))

+ {

+ mode = GET_MODE (recog_data.operand[0]);

+ switch (mode)

+ {

+ case EImode:

+ case OImode:

+ return 8;

+ case CImode:

+ return 12;

+ case XImode:

+ return 16;

+ default:

+ gcc_unreachable ();

+ }

+ load = REG_P (recog_data.operand[0]);

+ reg = recog_data.operand[!load];

+ mem = recog_data.operand[load];

+ gcc_assert (MEM_P (mem));

+ mode = GET_MODE (reg);

+ addr = XEXP (mem, 0);

+ /* Strip off const from addresses like (const (plus (...))). */

+ if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)

+ addr = XEXP (addr, 0);

+ if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)

+ {

+ int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;

+ return insns * 4;

+ }

+ else

+ return 4;

/* Output an ADD r, s, #n where n may be too big for one instruction.

If adding zero to one register, output nothing. */

const char *

@@ -11320,6 +12854,20 @@ arm_compute_save_reg0_reg12_mask (void)

&& crtl->uses_pic_offset_table)

save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;

}

+ else if (IS_VOLATILE(func_type))

+ {

+ /* For noreturn functions we historically omitted register saves

+ altogether. However this really messes up debugging. As a

+ compromise save just the frame pointers. Combined with the link

+ register saved elsewhere this should be sufficient to get

+ a backtrace. */

+ if (frame_pointer_needed)

+ save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;

+ if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))

+ save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;

+ if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))

+ save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;

+ }

else

{

/* In the normal case we only need to save those registers

@@ -11406,11 +12954,6 @@ arm_compute_save_reg_mask (void)

| (1 << LR_REGNUM)

| (1 << PC_REGNUM);

- /* Volatile functions do not return, so there

- is no need to save any other registers. */

- if (IS_VOLATILE (func_type))

- return save_reg_mask;

save_reg_mask |= arm_compute_save_reg0_reg12_mask ();

/* Decide if we need to save the link register.

@@ -11629,7 +13172,7 @@ output_return_instruction (rtx operand, int really_return, int reverse)

sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');

- return_used_this_function = 1;

+ cfun->machine->return_used_this_function = 1;

offsets = arm_get_frame_offsets ();

live_regs_mask = offsets->saved_regs_mask;

@@ -11698,18 +13241,28 @@ output_return_instruction (rtx operand, int really_return, int reverse)

gcc_assert (stack_adjust == 0 || stack_adjust == 4);

if (stack_adjust && arm_arch5 && TARGET_ARM)

- sprintf (instr, "ldm%sib\t%%|sp, {", conditional);

+ if (TARGET_UNIFIED_ASM)

+ sprintf (instr, "ldmib%s\t%%|sp, {", conditional);

+ else

+ sprintf (instr, "ldm%sib\t%%|sp, {", conditional);

else

{

/* If we can't use ldmib (SA110 bug),

then try to pop r3 instead. */

if (stack_adjust)

live_regs_mask |= 1 << 3;

- sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);

+ if (TARGET_UNIFIED_ASM)

+ sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);

+ else

+ sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);

}

else

- sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);

+ if (TARGET_UNIFIED_ASM)

+ sprintf (instr, "pop%s\t{", conditional);

+ else

+ sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);

p = instr + strlen (instr);

@@ -11894,7 +13447,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)

if (crtl->calls_eh_return)

asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");

- return_used_this_function = 0;

}

const char *

@@ -11915,7 +13467,8 @@ arm_output_epilogue (rtx sibling)

/* If we have already generated the return instruction

then it is futile to generate anything else. */

- if (use_return_insn (FALSE, sibling) && return_used_this_function)

+ if (use_return_insn (FALSE, sibling) &&

+ (cfun->machine->return_used_this_function != 0))

return "";

func_type = arm_current_func_type ();

@@ -11957,7 +13510,7 @@ arm_output_epilogue (rtx sibling)

/* This variable is for the Virtual Frame Pointer, not VFP regs. */

int vfp_offset = offsets->frame;

- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)

+ if (TARGET_FPA_EMU2)

{

for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)

if (df_regs_ever_live_p (reg) && !call_used_regs[reg])

@@ -12180,7 +13733,7 @@ arm_output_epilogue (rtx sibling)

SP_REGNUM, HARD_FRAME_POINTER_REGNUM);

}

- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)

+ if (TARGET_FPA_EMU2)

{

for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)

if (df_regs_ever_live_p (reg) && !call_used_regs[reg])

@@ -12221,24 +13774,29 @@ arm_output_epilogue (rtx sibling)

if (TARGET_HARD_FLOAT && TARGET_VFP)

{

- start_reg = FIRST_VFP_REGNUM;

- for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)

+ int end_reg = LAST_VFP_REGNUM + 1;

+ /* Scan the registers in reverse order. We need to match

+ any groupings made in the prologue and generate matching

+ pop operations. */

+ for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)

{

if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])

- && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))

+ && (!df_regs_ever_live_p (reg + 1)

+ || call_used_regs[reg + 1]))

{

- if (start_reg != reg)

+ if (end_reg > reg + 2)

vfp_output_fldmd (f, SP_REGNUM,

- (start_reg - FIRST_VFP_REGNUM) / 2,

- (reg - start_reg) / 2);

- start_reg = reg + 2;

+ (reg + 2 - FIRST_VFP_REGNUM) / 2,

+ (end_reg - (reg + 2)) / 2);

+ end_reg = reg;

}

- if (start_reg != reg)

- vfp_output_fldmd (f, SP_REGNUM,

- (start_reg - FIRST_VFP_REGNUM) / 2,

- (reg - start_reg) / 2);

+ if (end_reg > reg + 2)

+ vfp_output_fldmd (f, SP_REGNUM, 0,

+ (end_reg - (reg + 2)) / 2);

}

if (TARGET_IWMMXT)

for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)

if (df_regs_ever_live_p (reg) && !call_used_regs[reg])

@@ -12362,7 +13920,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,

/* ??? Probably not safe to set this here, since it assumes that a

function will be emitted as assembly immediately after we generate

RTL for it. This does not happen for inline functions. */

- return_used_this_function = 0;

+ cfun->machine->return_used_this_function = 0;

}

else /* TARGET_32BIT */

{

@@ -12370,7 +13928,7 @@ arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,

offsets = arm_get_frame_offsets ();

gcc_assert (!use_return_insn (FALSE, NULL)

- || !return_used_this_function

+ || (cfun->machine->return_used_this_function != 0)

|| offsets->saved_regs == offsets->outgoing_args

|| frame_pointer_needed);

@@ -12407,16 +13965,17 @@ emit_multi_reg_push (unsigned long mask)

/* For the body of the insn we are going to generate an UNSPEC in

parallel with several USEs. This allows the insn to be recognized

- by the push_multi pattern in the arm.md file. The insn looks

- something like this:

+ by the push_multi pattern in the arm.md file.

+ The body of the insn looks something like this:

(parallel [

- (set (mem:BLK (pre_dec:BLK (reg:SI sp)))

+ (set (mem:BLK (pre_modify:SI (reg:SI sp)

+ (const_int:SI <num>)))

(unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))

- (use (reg:SI 11 fp))

- (use (reg:SI 12 ip))

- (use (reg:SI 14 lr))

- (use (reg:SI 15 pc))

+ (use (reg:SI XX))

+ (use (reg:SI YY))

+ ...

])

For the frame note however, we try to be more explicit and actually

@@ -12429,13 +13988,20 @@ emit_multi_reg_push (unsigned long mask)

(sequence [

(set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))

(set (mem:SI (reg:SI sp)) (reg:SI r4))

- (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))

- (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))

- (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))

+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))

+ (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))

+ ...

])

- This sequence is used both by the code to support stack unwinding for

- exceptions handlers and the code to generate dwarf2 frame debugging. */

+ FIXME:: In an ideal world the PRE_MODIFY would not exist and

+ instead we'd have a parallel expression detailing all

+ the stores to the various memory addresses so that debug

+ information is more up-to-date. Remember however while writing

+ this to take care of the constraints with the push instruction.

+ Note also that this has to be taken care of for the VFP registers.

+ For more see PR43399. */

par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));

dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));

@@ -12449,9 +14015,14 @@ emit_multi_reg_push (unsigned long mask)

XVECEXP (par, 0, 0)

= gen_rtx_SET (VOIDmode,

- gen_frame_mem (BLKmode,

- gen_rtx_PRE_DEC (BLKmode,

- stack_pointer_rtx)),

+ gen_frame_mem

+ (BLKmode,

+ gen_rtx_PRE_MODIFY (Pmode,

+ stack_pointer_rtx,

+ plus_constant

+ (stack_pointer_rtx,

+ -4 * num_regs))

+ ),

gen_rtx_UNSPEC (BLKmode,

gen_rtvec (1, reg),

UNSPEC_PUSH_MULT));

@@ -12482,9 +14053,10 @@ emit_multi_reg_push (unsigned long mask)

{

tmp

= gen_rtx_SET (VOIDmode,

- gen_frame_mem (SImode,

- plus_constant (stack_pointer_rtx,

- 4 * j)),

+ gen_frame_mem

+ (SImode,

+ plus_constant (stack_pointer_rtx,

+ 4 * j)),

reg);

RTX_FRAME_RELATED_P (tmp) = 1;

XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;

@@ -12502,8 +14074,8 @@ emit_multi_reg_push (unsigned long mask)

RTX_FRAME_RELATED_P (tmp) = 1;

XVECEXP (dwarf, 0, 0) = tmp;

- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,

- REG_NOTES (par));

+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);

return par;

}

@@ -12536,9 +14108,14 @@ emit_sfm (int base_reg, int count)

XVECEXP (par, 0, 0)

= gen_rtx_SET (VOIDmode,

- gen_frame_mem (BLKmode,

- gen_rtx_PRE_DEC (BLKmode,

- stack_pointer_rtx)),

+ gen_frame_mem

+ (BLKmode,

+ gen_rtx_PRE_MODIFY (Pmode,

+ stack_pointer_rtx,

+ plus_constant

+ (stack_pointer_rtx,

+ -12 * count))

+ ),

gen_rtx_UNSPEC (BLKmode,

gen_rtvec (1, reg),

UNSPEC_PUSH_MULT));

@@ -12569,8 +14146,8 @@ emit_sfm (int base_reg, int count)

XVECEXP (dwarf, 0, 0) = tmp;

par = emit_insn (par);

- REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,

- REG_NOTES (par));

+ add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);

return par;

}

@@ -12748,22 +14325,24 @@ arm_get_frame_offsets (void)

{

int reg = -1;

- for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)

- {

- if ((offsets->saved_regs_mask & (1 << i)) == 0)

- {

- reg = i;

- break;

- }

- if (reg == -1 && arm_size_return_regs () <= 12

- && !crtl->tail_call_emit)

+ /* If it is safe to use r3, then do so. This sometimes

+ generates better code on Thumb-2 by avoiding the need to

+ use 32-bit push/pop instructions. */

+ if (!crtl->tail_call_emit

+ && arm_size_return_regs () <= 12

+ && (offsets->saved_regs_mask & (1 << 3)) == 0)

{

- /* Push/pop an argument register (r3) if all callee saved

- registers are already being pushed. */

reg = 3;

}

+ else

+ for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)

+ {

+ if ((offsets->saved_regs_mask & (1 << i)) == 0)

+ {

+ reg = i;

+ break;

+ }

if (reg != -1)

{

@@ -12863,6 +14442,24 @@ arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)

}

+/* Given FROM and TO register numbers, say whether this elimination is

+ allowed. Frame pointer elimination is automatically handled.

+ All eliminations are permissible. Note that ARG_POINTER_REGNUM and

+ HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame

+ pointer, we must eliminate FRAME_POINTER_REGNUM into

+ HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or

+ ARG_POINTER_REGNUM. */

+bool

+arm_can_eliminate (const int from, const int to)

+ return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :

+ (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :

+ (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :

+ (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :

+ true);

/* Emit RTL to save coprocessor registers on function entry. Returns the

number of bytes pushed. */

@@ -12878,7 +14475,7 @@ arm_save_coproc_regs(void)

for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)

if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])

{

- insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);

+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);

insn = gen_rtx_MEM (V2SImode, insn);

insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));

RTX_FRAME_RELATED_P (insn) = 1;

@@ -12887,12 +14484,12 @@ arm_save_coproc_regs(void)

/* Save any floating point call-saved registers used by this

function. */

- if (arm_fpu_arch == FPUTYPE_FPA_EMU2)

+ if (TARGET_FPA_EMU2)

{

for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)

if (df_regs_ever_live_p (reg) && !call_used_regs[reg])

{

- insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);

+ insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);

insn = gen_rtx_MEM (XFmode, insn);

insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));

RTX_FRAME_RELATED_P (insn) = 1;

@@ -12989,8 +14586,7 @@ thumb_set_frame_pointer (arm_stack_offsets *offsets)

dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,

plus_constant (stack_pointer_rtx, amount));

RTX_FRAME_RELATED_P (dwarf) = 1;

- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,

- REG_NOTES (insn));

+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);

}

RTX_FRAME_RELATED_P (insn) = 1;

@@ -13053,8 +14649,7 @@ arm_expand_prologue (void)

dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);

insn = gen_movsi (r0, stack_pointer_rtx);

RTX_FRAME_RELATED_P (insn) = 1;

- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,

- dwarf, REG_NOTES (insn));

+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);

emit_insn (insn);

emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));

emit_insn (gen_movsi (stack_pointer_rtx, r1));

@@ -13121,8 +14716,7 @@ arm_expand_prologue (void)

plus_constant (stack_pointer_rtx,

-fp_offset));

RTX_FRAME_RELATED_P (insn) = 1;

- REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,

- dwarf, REG_NOTES (insn));

+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);

}

else

{

@@ -13715,6 +15309,30 @@ arm_print_operand (FILE *stream, rtx x, int code)

}

return;

+ /* Print the high single-precision register of a VFP double-precision

+ register. */

+ case 'p':

+ {

+ int mode = GET_MODE (x);

+ int regno;

+ if (GET_MODE_SIZE (mode) != 8 || GET_CODE (x) != REG)

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ regno = REGNO (x);

+ if (!VFP_REGNO_OK_FOR_DOUBLE (regno))

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);

+ }

+ return;

/* Print a VFP/Neon double precision or quad precision register name. */

case 'P':

case 'q':

@@ -13832,6 +15450,73 @@ arm_print_operand (FILE *stream, rtx x, int code)

}

return;

+ /* Memory operand for vld1/vst1 instruction. */

+ case 'A':

+ {

+ rtx addr;

+ bool postinc = FALSE;

+ gcc_assert (GET_CODE (x) == MEM);

+ addr = XEXP (x, 0);

+ if (GET_CODE (addr) == POST_INC)

+ {

+ postinc = 1;

+ addr = XEXP (addr, 0);

+ }

+ asm_fprintf (stream, "[%r]", REGNO (addr));

+ if (postinc)

+ fputs("!", stream);

+ }

+ return;

+ /* Translate an S register number into a D register number and element index. */

+ case 'y':

+ {

+ int mode = GET_MODE (x);

+ int regno;

+ if (GET_MODE_SIZE (mode) != 4 || GET_CODE (x) != REG)

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ regno = REGNO (x);

+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ regno = regno - FIRST_VFP_REGNUM;

+ fprintf (stream, "d%d[%d]", regno / 2, regno % 2);

+ }

+ return;

+ /* Register specifier for vld1.16/vst1.16. Translate the S register

+ number into a D register number and element index. */

+ case 'z':

+ {

+ int mode = GET_MODE (x);

+ int regno;

+ if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG)

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ regno = REGNO (x);

+ if (!VFP_REGNO_OK_FOR_SINGLE (regno))

+ {

+ output_operand_lossage ("invalid operand for code '%c'", code);

+ return;

+ }

+ regno = regno - FIRST_VFP_REGNUM;

+ fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));

+ }

+ return;

default:

if (x == 0)

{

@@ -13865,6 +15550,12 @@ arm_print_operand (FILE *stream, rtx x, int code)

default:

gcc_assert (GET_CODE (x) != NEG);

fputc ('#', stream);

+ if (GET_CODE (x) == HIGH)

+ {

+ fputs (":lower16:", stream);

+ x = XEXP (x, 0);

+ }

output_addr_const (stream, x);

break;

}

@@ -14032,7 +15723,7 @@ static enum arm_cond_code

get_arm_condition_code (rtx comparison)

{

enum machine_mode mode = GET_MODE (XEXP (comparison, 0));

- int code;

+ enum arm_cond_code code;

enum rtx_code comp_code = GET_CODE (comparison);

if (GET_MODE_CLASS (mode) != MODE_CC)

@@ -14243,12 +15934,6 @@ arm_final_prescan_insn (rtx insn)

reversed if it appears to fail. */

int reverse = 0;

- /* JUMP_CLOBBERS will be one implies that the conditions if a branch is

- taken are clobbered, even if the rtl suggests otherwise. It also

- means that we have to grub around within the jump expression to find

- out what the conditions are when the jump isn't taken. */

- int jump_clobbers = 0;

/* If we start with a return insn, we only succeed if we find another one. */

int seeking_return = 0;

@@ -14327,14 +16012,6 @@ arm_final_prescan_insn (rtx insn)

int then_not_else = TRUE;

rtx this_insn = start_insn, label = 0;

- /* If the jump cannot be done with one instruction, we cannot

- conditionally execute the instruction in the inverse case. */

- if (get_attr_conds (insn) == CONDS_JUMP_CLOB)

- {

- jump_clobbers = 1;

- return;

- }

/* Register the insn jumped to. */

if (reverse)

{

@@ -14377,13 +16054,7 @@ arm_final_prescan_insn (rtx insn)

control falls in from somewhere else. */

if (this_insn == label)

{

- if (jump_clobbers)

- {

- arm_ccfsm_state = 2;

- this_insn = next_nonnote_insn (this_insn);

- }

- else

- arm_ccfsm_state = 1;

+ arm_ccfsm_state = 1;

succeed = TRUE;

}

else

@@ -14398,13 +16069,7 @@ arm_final_prescan_insn (rtx insn)

this_insn = next_nonnote_insn (this_insn);

if (this_insn && this_insn == label)

{

- if (jump_clobbers)

- {

- arm_ccfsm_state = 2;

- this_insn = next_nonnote_insn (this_insn);

- }

- else

- arm_ccfsm_state = 1;

+ arm_ccfsm_state = 1;

succeed = TRUE;

}

else

@@ -14432,13 +16097,7 @@ arm_final_prescan_insn (rtx insn)

if (this_insn && this_insn == label

&& insns_skipped < max_insns_skipped)

{

- if (jump_clobbers)

- {

- arm_ccfsm_state = 2;

- this_insn = next_nonnote_insn (this_insn);

- }

- else

- arm_ccfsm_state = 1;

+ arm_ccfsm_state = 1;

succeed = TRUE;

}

else

@@ -14544,25 +16203,11 @@ arm_final_prescan_insn (rtx insn)

}

arm_target_insn = this_insn;

}

- if (jump_clobbers)

- {

- gcc_assert (!reverse);

- arm_current_cc =

- get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),

- 0), 0), 1));

- if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)

- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);

- if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)

- arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);

- }

- else

- {

- /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from

- what it was. */

- if (!reverse)

- arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),

- 0));

- }

+ /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from

+ what it was. */

+ if (!reverse)

+ arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));

if (reverse || then_not_else)

arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);

@@ -14629,6 +16274,11 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)

if (mode == DFmode)

return VFP_REGNO_OK_FOR_DOUBLE (regno);

+ /* VFP registers can hold HFmode values, but there is no point in

+ putting them there unless we have hardware conversion insns. */

+ if (mode == HFmode)

+ return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);

if (TARGET_NEON)

return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))

|| (VALID_NEON_QREG_MODE (mode)

@@ -14651,13 +16301,13 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)

return VALID_IWMMXT_REG_MODE (mode);

}

- /* We allow any value to be stored in the general registers.

+ /* We allow almost any value to be stored in the general registers.

Restrict doubleword quantities to even register pairs so that we can

- use ldrd. Do not allow Neon structure opaque modes in general registers;

- they would use too many. */

+ use ldrd. Do not allow very large Neon structure opaque modes in

+ general registers; they would use too many. */

if (regno <= LAST_ARM_REGNUM)

return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)

- && !VALID_NEON_STRUCT_MODE (mode);

+ && ARM_NUM_REGS (mode) <= 4;

if (regno == FRAME_POINTER_REGNUM

|| regno == ARG_POINTER_REGNUM)

@@ -14674,7 +16324,8 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)

/* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are

not used in arm mode. */

-int

+enum reg_class

arm_regno_class (int regno)

{

if (TARGET_THUMB1)

@@ -14828,7 +16479,7 @@ static const struct builtin_description bdesc_2arg[] =

{

#define IWMMXT_BUILTIN(code, string, builtin) \

{ FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \

- ARM_BUILTIN_##builtin, 0, 0 },

+ ARM_BUILTIN_##builtin, UNKNOWN, 0 },

IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)

IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)

@@ -14890,7 +16541,7 @@ static const struct builtin_description bdesc_2arg[] =

IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)

#define IWMMXT_BUILTIN2(code, builtin) \

- { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },

+ { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },

IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)

IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)

@@ -15287,7 +16938,7 @@ arm_init_tls_builtins (void)

TREE_READONLY (decl) = 1;

}

-typedef enum {

+enum neon_builtin_type_bits {

T_V8QI = 0x0001,

T_V4HI = 0x0002,

T_V2SI = 0x0004,

@@ -15301,7 +16952,7 @@ typedef enum {

T_TI = 0x0400,

T_EI = 0x0800,

T_OI = 0x1000

-} neon_builtin_type_bits;

+};

#define v8qi_UP T_V8QI

#define v4hi_UP T_V4HI

@@ -15364,7 +17015,7 @@ typedef enum {

typedef struct {

const char *name;

const neon_itype itype;

- const neon_builtin_type_bits bits;

+ const int bits;

const enum insn_code codes[T_MAX];

const unsigned int num_vars;

unsigned int base_fcode;

@@ -16114,6 +17765,15 @@ arm_init_neon_builtins (void)

}

static void

+arm_init_fp16_builtins (void)

+ tree fp16_type = make_node (REAL_TYPE);

+ TYPE_PRECISION (fp16_type) = 16;

+ layout_type (fp16_type);

+ (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16");

+static void

arm_init_builtins (void)

{

arm_init_tls_builtins ();

@@ -16123,6 +17783,71 @@ arm_init_builtins (void)

if (TARGET_NEON)

arm_init_neon_builtins ();

+ if (arm_fp16_format)

+ arm_init_fp16_builtins ();

+/* Implement TARGET_INVALID_PARAMETER_TYPE. */

+static const char *

+arm_invalid_parameter_type (const_tree t)

+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)

+ return N_("function parameters cannot have __fp16 type");

+ return NULL;

+/* Implement TARGET_INVALID_PARAMETER_TYPE. */

+static const char *

+arm_invalid_return_type (const_tree t)

+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)

+ return N_("functions cannot return __fp16 type");

+ return NULL;

+/* Implement TARGET_PROMOTED_TYPE. */

+static tree

+arm_promoted_type (const_tree t)

+ if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)

+ return float_type_node;

+ return NULL_TREE;

+/* Implement TARGET_CONVERT_TO_TYPE.

+ Specifically, this hook implements the peculiarity of the ARM

+ half-precision floating-point C semantics that requires conversions between

+ __fp16 to or from double to do an intermediate conversion to float. */

+static tree

+arm_convert_to_type (tree type, tree expr)

+ tree fromtype = TREE_TYPE (expr);

+ if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))

+ return NULL_TREE;

+ if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)

+ || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))

+ return convert (type, convert (float_type_node, expr));

+ return NULL_TREE;

+/* Implement TARGET_SCALAR_MODE_SUPPORTED_P.

+ This simply adds HFmode as a supported mode; even though we don't

+ implement arithmetic on this type directly, it's supported by

+ optabs conversions, much the way the double-word arithmetic is

+ special-cased in the default hook. */

+static bool

+arm_scalar_mode_supported_p (enum machine_mode mode)

+ if (mode == HFmode)

+ return (arm_fp16_format != ARM_FP16_FORMAT_NONE);

+ else

+ return default_scalar_mode_supported_p (mode);

}

/* Errors in the source file can cause expand_expr to return const0_rtx

@@ -16281,7 +18006,7 @@ arm_expand_neon_args (rtx target, int icode, int have_retval,

for (;;)

{

- builtin_arg thisarg = va_arg (ap, int);

+ builtin_arg thisarg = (builtin_arg) va_arg (ap, int);

if (thisarg == NEON_ARG_STOP)

break;

@@ -17202,6 +18927,7 @@ thumb_shiftable_const (unsigned HOST_WIDE_INT val)

unsigned HOST_WIDE_INT mask = 0xff;

int i;

+ val = val & (unsigned HOST_WIDE_INT)0xffffffffu;

if (val == 0) /* XXX */

return 0;

@@ -17311,7 +19037,7 @@ thumb_unexpanded_epilogue (void)

int had_to_push_lr;

int size;

- if (return_used_this_function)

+ if (cfun->machine->return_used_this_function != 0)

return "";

if (IS_NAKED (arm_current_func_type ()))

@@ -17635,9 +19361,7 @@ thumb1_expand_prologue (void)

plus_constant (stack_pointer_rtx,

-amount));

RTX_FRAME_RELATED_P (dwarf) = 1;

- REG_NOTES (insn)

- = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,

- REG_NOTES (insn));

+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);

}

@@ -18289,41 +20013,8 @@ arm_file_start (void)

}

else

{

- int set_float_abi_attributes = 0;

- switch (arm_fpu_arch)

- {

- case FPUTYPE_FPA:

- fpu_name = "fpa";

- break;

- case FPUTYPE_FPA_EMU2:

- fpu_name = "fpe2";

- break;

- case FPUTYPE_FPA_EMU3:

- fpu_name = "fpe3";

- break;

- case FPUTYPE_MAVERICK:

- fpu_name = "maverick";

- break;

- case FPUTYPE_VFP:

- fpu_name = "vfp";

- set_float_abi_attributes = 1;

- break;

- case FPUTYPE_VFP3D16:

- fpu_name = "vfpv3-d16";

- set_float_abi_attributes = 1;

- break;

- case FPUTYPE_VFP3:

- fpu_name = "vfpv3";

- set_float_abi_attributes = 1;

- break;

- case FPUTYPE_NEON:

- fpu_name = "neon";

- set_float_abi_attributes = 1;

- break;

- default:

- abort();

- }

- if (set_float_abi_attributes)

+ fpu_name = arm_fpu_desc->name;

+ if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)

{

if (TARGET_HARD_FLOAT)

asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");

@@ -18373,6 +20064,11 @@ arm_file_start (void)

val = 6;

asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);

+ /* Tag_ABI_FP_16bit_format. */

+ if (arm_fp16_format)

+ asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",

+ (int)arm_fp16_format);

if (arm_lang_output_object_attributes_hook)

arm_lang_output_object_attributes_hook();

}

@@ -18602,6 +20298,23 @@ arm_emit_vector_const (FILE *file, rtx x)

return 1;

}

+/* Emit a fp16 constant appropriately padded to occupy a 4-byte word.

+ HFmode constant pool entries are actually loaded with ldr. */

+void

+arm_emit_fp16_const (rtx c)

+ REAL_VALUE_TYPE r;

+ long bits;

+ REAL_VALUE_FROM_CONST_DOUBLE (r, c);

+ bits = real_to_target (NULL, &r, HFmode);

+ if (WORDS_BIG_ENDIAN)

+ assemble_zeros (2);

+ assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);

+ if (!WORDS_BIG_ENDIAN)

+ assemble_zeros (2);

const char *

arm_output_load_gr (rtx *operands)

{

@@ -18639,19 +20352,24 @@ arm_output_load_gr (rtx *operands)

that way. */

static void

-arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,

+arm_setup_incoming_varargs (CUMULATIVE_ARGS *pcum,

enum machine_mode mode,

tree type,

int *pretend_size,

int second_time ATTRIBUTE_UNUSED)

{

- int nregs = cum->nregs;

- if (nregs & 1

- && ARM_DOUBLEWORD_ALIGN

- && arm_needs_doubleword_align (mode, type))

- nregs++;

+ int nregs;

cfun->machine->uses_anonymous_args = 1;

+ if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)

+ {

+ nregs = pcum->aapcs_ncrn;

+ if ((nregs & 1) && arm_needs_doubleword_align (mode, type))

+ nregs++;

+ }

+ else

+ nregs = pcum->nregs;

if (nregs < NUM_ARG_REGS)

*pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;

}

@@ -18785,6 +20503,19 @@ arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)

return !TARGET_AAPCS_BASED;

}

+static enum machine_mode

+arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,

+ enum machine_mode mode,

+ int *punsignedp ATTRIBUTE_UNUSED,

+ const_tree fntype ATTRIBUTE_UNUSED,

+ int for_return ATTRIBUTE_UNUSED)

+ if (GET_MODE_CLASS (mode) == MODE_INT

+ && GET_MODE_SIZE (mode) < 4)

+ return SImode;

+ return mode;

/* AAPCS based ABIs use short enums by default. */

@@ -19035,9 +20766,10 @@ arm_vector_mode_supported_p (enum machine_mode mode)

|| mode == V16QImode || mode == V4SFmode || mode == V2DImode))

return true;

- if ((mode == V2SImode)

- || (mode == V4HImode)

- || (mode == V8QImode))

+ if ((TARGET_NEON || TARGET_IWMMXT)

+ && ((mode == V2SImode)

+ || (mode == V4HImode)

+ || (mode == V8QImode)))

return true;

return false;

@@ -19068,9 +20800,14 @@ arm_dbx_register_number (unsigned int regno)

if (IS_FPA_REGNUM (regno))

return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;

- /* FIXME: VFPv3 register numbering. */

if (IS_VFP_REGNUM (regno))

- return 64 + regno - FIRST_VFP_REGNUM;

+ {

+ /* See comment in arm_dwarf_register_span. */

+ if (VFP_REGNO_OK_FOR_SINGLE (regno))

+ return 64 + regno - FIRST_VFP_REGNUM;

+ else

+ return 256 + (regno - FIRST_VFP_REGNUM) / 2;

+ }

if (IS_IWMMXT_GR_REGNUM (regno))

return 104 + regno - FIRST_IWMMXT_GR_REGNUM;

@@ -19081,6 +20818,39 @@ arm_dbx_register_number (unsigned int regno)

gcc_unreachable ();

}

+/* Dwarf models VFPv3 registers as 32 64-bit registers.

+ GCC models tham as 64 32-bit registers, so we need to describe this to

+ the DWARF generation code. Other registers can use the default. */

+static rtx

+arm_dwarf_register_span (rtx rtl)

+ unsigned regno;

+ int nregs;

+ int i;

+ rtx p;

+ regno = REGNO (rtl);

+ if (!IS_VFP_REGNUM (regno))

+ return NULL_RTX;

+ /* XXX FIXME: The EABI defines two VFP register ranges:

+ 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)

+ 256-287: D0-D31

+ The recommended encoding for S0-S31 is a DW_OP_bit_piece of the

+ corresponding D register. Until GDB supports this, we shall use the

+ legacy encodings. We also use these encodings for D0-D15 for

+ compatibility with older debuggers. */

+ if (VFP_REGNO_OK_FOR_SINGLE (regno))

+ return NULL_RTX;

+ nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;

+ p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));

+ regno = (regno - FIRST_VFP_REGNUM) / 2;

+ for (i = 0; i < nregs; i++)

+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);

+ return p;

#ifdef TARGET_UNWIND_INFO

/* Emit unwind directives for a store-multiple instruction or stack pointer

@@ -19404,7 +21174,7 @@ arm_emit_tls_decoration (FILE *fp, rtx x)

rtx val;

val = XVECEXP (x, 0, 0);

- reloc = INTVAL (XVECEXP (x, 0, 1));

+ reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));

output_addr_const (fp, val);

@@ -19522,6 +21292,32 @@ arm_output_shift(rtx * operands, int set_flags)

return "";

}

+/* Output a Thumb-1 casesi dispatch sequence. */

+const char *

+thumb1_output_casesi (rtx *operands)

+ rtx diff_vec = PATTERN (next_real_insn (operands[0]));

+ addr_diff_vec_flags flags;

+ gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);

+ flags = ADDR_DIFF_VEC_FLAGS (diff_vec);

+ switch (GET_MODE(diff_vec))

+ {

+ case QImode:

+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?

+ "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");

+ case HImode:

+ return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?

+ "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");

+ case SImode:

+ return "bl\t%___gnu_thumb1_case_si";

+ default:

+ gcc_unreachable ();

+ }

/* Output a Thumb-2 casesi instruction. */

const char *

thumb2_output_casesi (rtx *operands)

@@ -19622,7 +21418,7 @@ arm_mangle_type (const_tree type)

&& lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))

{

static bool warned;

- if (!warned && warn_psabi)

+ if (!warned && warn_psabi && !in_system_header)

{

warned = true;

inform (input_location,

@@ -19631,6 +21427,10 @@ arm_mangle_type (const_tree type)

return "St9__va_list";

}

+ /* Half-precision float. */

+ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)

+ return "Dh";

if (TREE_CODE (type) != VECTOR_TYPE)

return NULL;

@@ -19689,4 +21489,22 @@ arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)

flag_section_anchors = 2;

}

+/* Implement TARGET_FRAME_POINTER_REQUIRED. */

+bool

+arm_frame_pointer_required (void)

+ return (cfun->has_nonlocal_label

+ || SUBTARGET_FRAME_POINTER_REQUIRED

+ || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));

+/* Only thumb1 can't support conditional execution, so return true if

+ the target is not thumb1. */

+static bool

+arm_have_conditional_execution (void)

+ return !TARGET_THUMB1;

#include "gt-arm.h"

« no previous file with comments | « gcc/gcc/config/arm/arm.h ('k') | gcc/gcc/config/arm/arm-cores.def » ('j') | no next file with comments »