From 6a70badb2c1f627cd669f2fcfaeca4a05db50b5b Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 11 Jan 2018 13:17:02 +0000 Subject: [PATCH] [AArch64] Set NUM_POLY_INT_COEFFS to 2 This patch switches the AArch64 port to use 2 poly_int coefficients and updates code as necessary to keep it compiling. One potentially-significant change is to aarch64_hard_regno_caller_save_mode. The old implementation was written in a pretty conservative way: it changed the default behaviour for single-register values, but used the default handling for multi-register values. I don't think that's necessary, since the interesting cases for this macro are usually the single-register ones. Multi-register modes take up the whole of the constituent registers and the move patterns for all multi-register modes should be equally good. Using the original mode for multi-register cases stops us from using SVE modes to spill multi-register NEON values. This was caught by gcc.c-torture/execute/pr47538.c. Also, aarch64_shift_truncation_mask used GET_MODE_BITSIZE - 1. GET_MODE_UNIT_BITSIZE - 1 is equivalent for the cases that it handles (which are all scalars), and I think it's more obvious, since if we ever do use this for elementwise shifts of vector modes, the mask will depend on the number of bits in each element rather than the number of bits in the whole vector. 2018-01-11 Richard Sandiford Alan Hayward David Sherwood gcc/ * config/aarch64/aarch64-modes.def (NUM_POLY_INT_COEFFS): Set to 2. * config/aarch64/aarch64-protos.h (aarch64_initial_elimination_offset): Return a poly_int64 rather than a HOST_WIDE_INT. (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. * config/aarch64/aarch64.h (aarch64_frame): Protect with HAVE_POLY_INT_H rather than HOST_WIDE_INT. Change locals_offset, hard_fp_offset, frame_size, initial_adjust, callee_offset and final_offset from HOST_WIDE_INT to poly_int64. * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use to_constant when getting the number of units in an Advanced SIMD mode. (aarch64_builtin_vectorized_function): Check for a constant number of units. * config/aarch64/aarch64-simd.md (mov): Handle polynomial GET_MODE_SIZE. (aarch64_ld_lane): Use the nunits attribute instead of GET_MODE_NUNITS. * config/aarch64/aarch64.c (aarch64_hard_regno_nregs) (aarch64_class_max_nregs): Use the constant_lowest_bound of the GET_MODE_SIZE for fixed-size registers. (aarch64_const_vec_all_same_in_range_p): Use const_vec_duplicate_p. (aarch64_hard_regno_call_part_clobbered, aarch64_classify_index) (aarch64_mode_valid_for_sched_fusion_p, aarch64_classify_address) (aarch64_legitimize_address_displacement, aarch64_secondary_reload) (aarch64_print_operand, aarch64_print_address_internal) (aarch64_address_cost, aarch64_rtx_costs, aarch64_register_move_cost) (aarch64_short_vector_p, aapcs_vfp_sub_candidate) (aarch64_simd_attr_length_rglist, aarch64_operands_ok_for_ldpstp): Handle polynomial GET_MODE_SIZE. (aarch64_hard_regno_caller_save_mode): Likewise. Return modes wider than SImode without modification. (tls_symbolic_operand_type): Use strip_offset instead of split_const. (aarch64_pass_by_reference, aarch64_layout_arg, aarch64_pad_reg_upward) (aarch64_gimplify_va_arg_expr): Assert that we don't yet handle passing and returning SVE modes. (aarch64_function_value, aarch64_layout_arg): Use gen_int_mode rather than GEN_INT. (aarch64_emit_probe_stack_range): Take the size as a poly_int64 rather than a HOST_WIDE_INT, but call sorry if it isn't constant. (aarch64_allocate_and_probe_stack_space): Likewise. (aarch64_layout_frame): Cope with polynomial offsets. (aarch64_save_callee_saves, aarch64_restore_callee_saves): Take the start_offset as a poly_int64 rather than a HOST_WIDE_INT. Track polynomial offsets. (offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p) (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 rather than a HOST_WIDE_INT. (aarch64_get_separate_components, aarch64_process_components) (aarch64_expand_prologue, aarch64_expand_epilogue) (aarch64_use_return_insn_p): Handle polynomial frame offsets. (aarch64_anchor_offset): New function, split out from... (aarch64_legitimize_address): ...here. (aarch64_builtin_vectorization_cost): Handle polynomial TYPE_VECTOR_SUBPARTS. (aarch64_simd_check_vect_par_cnst_half): Handle polynomial GET_MODE_NUNITS. (aarch64_simd_make_constant, aarch64_expand_vector_init): Get the number of elements from the PARALLEL rather than the mode. (aarch64_shift_truncation_mask): Use GET_MODE_UNIT_BITSIZE rather than GET_MODE_BITSIZE. (aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_ext) (aarch64_evpc_rev, aarch64_evpc_dup, aarch64_evpc_zip) (aarch64_expand_vec_perm_const_1): Handle polynomial d->perm.length () and d->perm elements. (aarch64_evpc_tbl): Likewise. Use nelt rather than GET_MODE_NUNITS. Apply to_constant to d->perm elements. (aarch64_simd_valid_immediate, aarch64_vec_fpconst_pow_of_2): Handle polynomial CONST_VECTOR_NUNITS. (aarch64_move_pointer): Take amount as a poly_int64 rather than an int. (aarch64_progress_pointer): Avoid temporary variable. * config/aarch64/aarch64.md (aarch64_): Use the mode attribute instead of GET_MODE. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r256533 --- gcc/ChangeLog | 79 ++++ gcc/config/aarch64/aarch64-builtins.c | 18 +- gcc/config/aarch64/aarch64-modes.def | 4 + gcc/config/aarch64/aarch64-protos.h | 4 +- gcc/config/aarch64/aarch64-simd.md | 8 +- gcc/config/aarch64/aarch64.c | 574 ++++++++++++++------------ gcc/config/aarch64/aarch64.h | 18 +- gcc/config/aarch64/aarch64.md | 2 +- 8 files changed, 423 insertions(+), 284 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 19a37573b24..0ed0a2a2786 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,82 @@ +2018-01-11 Richard Sandiford + Alan Hayward + David Sherwood + + * config/aarch64/aarch64-modes.def (NUM_POLY_INT_COEFFS): Set to 2. + * config/aarch64/aarch64-protos.h (aarch64_initial_elimination_offset): + Return a poly_int64 rather than a HOST_WIDE_INT. + (aarch64_offset_7bit_signed_scaled_p): Take the offset as a poly_int64 + rather than a HOST_WIDE_INT. + * config/aarch64/aarch64.h (aarch64_frame): Protect with + HAVE_POLY_INT_H rather than HOST_WIDE_INT. Change locals_offset, + hard_fp_offset, frame_size, initial_adjust, callee_offset and + final_offset from HOST_WIDE_INT to poly_int64. + * config/aarch64/aarch64-builtins.c (aarch64_simd_expand_args): Use + to_constant when getting the number of units in an Advanced SIMD + mode. + (aarch64_builtin_vectorized_function): Check for a constant number + of units. + * config/aarch64/aarch64-simd.md (mov): Handle polynomial + GET_MODE_SIZE. + (aarch64_ld_lane): Use the nunits + attribute instead of GET_MODE_NUNITS. + * config/aarch64/aarch64.c (aarch64_hard_regno_nregs) + (aarch64_class_max_nregs): Use the constant_lowest_bound of the + GET_MODE_SIZE for fixed-size registers. + (aarch64_const_vec_all_same_in_range_p): Use const_vec_duplicate_p. + (aarch64_hard_regno_call_part_clobbered, aarch64_classify_index) + (aarch64_mode_valid_for_sched_fusion_p, aarch64_classify_address) + (aarch64_legitimize_address_displacement, aarch64_secondary_reload) + (aarch64_print_operand, aarch64_print_address_internal) + (aarch64_address_cost, aarch64_rtx_costs, aarch64_register_move_cost) + (aarch64_short_vector_p, aapcs_vfp_sub_candidate) + (aarch64_simd_attr_length_rglist, aarch64_operands_ok_for_ldpstp): + Handle polynomial GET_MODE_SIZE. + (aarch64_hard_regno_caller_save_mode): Likewise. Return modes + wider than SImode without modification. + (tls_symbolic_operand_type): Use strip_offset instead of split_const. + (aarch64_pass_by_reference, aarch64_layout_arg, aarch64_pad_reg_upward) + (aarch64_gimplify_va_arg_expr): Assert that we don't yet handle + passing and returning SVE modes. + (aarch64_function_value, aarch64_layout_arg): Use gen_int_mode + rather than GEN_INT. + (aarch64_emit_probe_stack_range): Take the size as a poly_int64 + rather than a HOST_WIDE_INT, but call sorry if it isn't constant. + (aarch64_allocate_and_probe_stack_space): Likewise. + (aarch64_layout_frame): Cope with polynomial offsets. + (aarch64_save_callee_saves, aarch64_restore_callee_saves): Take the + start_offset as a poly_int64 rather than a HOST_WIDE_INT. Track + polynomial offsets. + (offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p) + (aarch64_offset_7bit_signed_scaled_p): Take the offset as a + poly_int64 rather than a HOST_WIDE_INT. + (aarch64_get_separate_components, aarch64_process_components) + (aarch64_expand_prologue, aarch64_expand_epilogue) + (aarch64_use_return_insn_p): Handle polynomial frame offsets. + (aarch64_anchor_offset): New function, split out from... + (aarch64_legitimize_address): ...here. + (aarch64_builtin_vectorization_cost): Handle polynomial + TYPE_VECTOR_SUBPARTS. + (aarch64_simd_check_vect_par_cnst_half): Handle polynomial + GET_MODE_NUNITS. + (aarch64_simd_make_constant, aarch64_expand_vector_init): Get the + number of elements from the PARALLEL rather than the mode. + (aarch64_shift_truncation_mask): Use GET_MODE_UNIT_BITSIZE + rather than GET_MODE_BITSIZE. + (aarch64_evpc_trn, aarch64_evpc_uzp, aarch64_evpc_ext) + (aarch64_evpc_rev, aarch64_evpc_dup, aarch64_evpc_zip) + (aarch64_expand_vec_perm_const_1): Handle polynomial + d->perm.length () and d->perm elements. + (aarch64_evpc_tbl): Likewise. Use nelt rather than GET_MODE_NUNITS. + Apply to_constant to d->perm elements. + (aarch64_simd_valid_immediate, aarch64_vec_fpconst_pow_of_2): Handle + polynomial CONST_VECTOR_NUNITS. + (aarch64_move_pointer): Take amount as a poly_int64 rather + than an int. + (aarch64_progress_pointer): Avoid temporary variable. + * config/aarch64/aarch64.md (aarch64_): Use + the mode attribute instead of GET_MODE. + 2018-01-11 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 9775d3a59c2..02c6738d220 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1077,9 +1077,9 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, gcc_assert (opc > 1); if (CONST_INT_P (op[opc])) { - aarch64_simd_lane_bounds (op[opc], 0, - GET_MODE_NUNITS (builtin_mode), - exp); + unsigned int nunits + = GET_MODE_NUNITS (builtin_mode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (builtin_mode, INTVAL (op[opc])); @@ -1092,8 +1092,9 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, if (CONST_INT_P (op[opc])) { machine_mode vmode = insn_data[icode].operand[opc - 1].mode; - aarch64_simd_lane_bounds (op[opc], - 0, GET_MODE_NUNITS (vmode), exp); + unsigned int nunits + = GET_MODE_NUNITS (vmode).to_constant (); + aarch64_simd_lane_bounds (op[opc], 0, nunits, exp); /* Keep to GCC-vector-extension lane indices in the RTL. */ op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc])); } @@ -1412,16 +1413,17 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in) { machine_mode in_mode, out_mode; - int in_n, out_n; + unsigned HOST_WIDE_INT in_n, out_n; if (TREE_CODE (type_out) != VECTOR_TYPE || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); + if (!TYPE_VECTOR_SUBPARTS (type_out).is_constant (&out_n) + || !TYPE_VECTOR_SUBPARTS (type_in).is_constant (&in_n)) + return NULL_TREE; #undef AARCH64_CHECK_BUILTIN_MODE #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1 diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 3ed11189ad4..de40f72d666 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -47,3 +47,7 @@ INT_MODE (XI, 64); /* Quad float: 128-bit floating mode for long doubles. */ FLOAT_MODE (TF, 16, ieee_quad_format); + +/* Coefficient 1 is multiplied by the number of 128-bit chunks in an + SVE vector (referred to as "VQ") minus one. */ +#define NUM_POLY_INT_COEFFS 2 diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 7df3aae4353..8c3471bdbb8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -333,7 +333,7 @@ enum simd_immediate_check { extern struct tune_params aarch64_tune_params; -HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); +poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); int aarch64_get_condition_code (rtx); bool aarch64_address_valid_for_prefetch_p (rtx, bool); bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); @@ -366,7 +366,7 @@ bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); bool aarch64_mov_operand_p (rtx, machine_mode); rtx aarch64_reverse_mask (machine_mode, unsigned int); -bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); +bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); char *aarch64_output_simd_mov_immediate (rtx, unsigned, enum simd_immediate_check w = AARCH64_CHECK_MOV); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 792fa1ce478..3d1f6a01cb7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -31,9 +31,9 @@ normal str, so the check need not apply. */ if (GET_CODE (operands[0]) == MEM && !(aarch64_simd_imm_zero (operands[1], mode) - && ((GET_MODE_SIZE (mode) == 16 + && ((known_eq (GET_MODE_SIZE (mode), 16) && aarch64_mem_pair_operand (operands[0], DImode)) - || GET_MODE_SIZE (mode) == 8))) + || known_eq (GET_MODE_SIZE (mode), 8)))) operands[1] = force_reg (mode, operands[1]); " ) @@ -5334,9 +5334,7 @@ set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (mode)) * ); - aarch64_simd_lane_bounds (operands[3], 0, - GET_MODE_NUNITS (mode), - NULL); + aarch64_simd_lane_bounds (operands[3], 0, , NULL); emit_insn (gen_aarch64_vec_load_lanes_lane ( operands[0], mem, operands[2], operands[3])); DONE; diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 42d97d53f16..7ab13773b1d 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1139,13 +1139,18 @@ aarch64_array_mode_supported_p (machine_mode mode, static unsigned int aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that the combination is valid, + but at the moment we need to handle all modes. Just ignore + any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (aarch64_regno_regclass (regno)) { case FP_REGS: case FP_LO_REGS: - return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; + return CEIL (lowest_size, UNITS_PER_VREG); default: - return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return CEIL (lowest_size, UNITS_PER_WORD); } gcc_unreachable (); } @@ -1188,25 +1193,17 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) static bool aarch64_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) { - return FP_REGNUM_P (regno) && GET_MODE_SIZE (mode) > 8; + return FP_REGNUM_P (regno) && maybe_gt (GET_MODE_SIZE (mode), 8); } /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ machine_mode -aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, - machine_mode mode) +aarch64_hard_regno_caller_save_mode (unsigned, unsigned, machine_mode mode) { - /* Handle modes that fit within single registers. */ - if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) - { - if (GET_MODE_SIZE (mode) >= 4) - return mode; - else - return SImode; - } - /* Fall back to generic for multi-reg and very large modes. */ + if (known_ge (GET_MODE_SIZE (mode), 4)) + return mode; else - return choose_hard_reg_mode (regno, nregs, false); + return SImode; } /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so @@ -1319,11 +1316,10 @@ static enum tls_model tls_symbolic_operand_type (rtx addr) { enum tls_model tls_kind = TLS_MODEL_NONE; - rtx sym, addend; - if (GET_CODE (addr) == CONST) { - split_const (addr, &sym, &addend); + poly_int64 addend; + rtx sym = strip_offset (addr, &addend); if (GET_CODE (sym) == SYMBOL_REF) tls_kind = SYMBOL_REF_TLS_MODEL (sym); } @@ -2275,8 +2271,12 @@ aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, int nregs; /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ - size = (mode == BLKmode && type) - ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); + if (mode == BLKmode && type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); /* Aggregates are passed by reference based on their size. */ if (type && AGGREGATE_TYPE_P (type)) @@ -2373,8 +2373,8 @@ aarch64_function_value (const_tree type, const_tree func, for (i = 0; i < count; i++) { rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); - tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (ag_mode))); + rtx offset = gen_int_mode (i * GET_MODE_SIZE (ag_mode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } return par; @@ -2501,9 +2501,13 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, pcum->aapcs_arg_processed = true; /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ - size - = ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), - UNITS_PER_WORD); + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); + size = ROUND_UP (size, UNITS_PER_WORD); allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, @@ -2540,9 +2544,9 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, { rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, V0_REGNUM + nvrn + i); - tmp = gen_rtx_EXPR_LIST - (VOIDmode, tmp, - GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); + rtx offset = gen_int_mode + (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode), Pmode); + tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, offset); XVECEXP (par, 0, i) = tmp; } pcum->aapcs_reg = par; @@ -2767,8 +2771,13 @@ aarch64_pad_reg_upward (machine_mode mode, const_tree type, /* Small composite types are always padded upward. */ if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) { - HOST_WIDE_INT size = (type ? int_size_in_bytes (type) - : GET_MODE_SIZE (mode)); + HOST_WIDE_INT size; + if (type) + size = int_size_in_bytes (type); + else + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + size = GET_MODE_SIZE (mode).to_constant (); if (size < 2 * UNITS_PER_WORD) return true; } @@ -2797,12 +2806,19 @@ aarch64_libgcc_cmp_return_mode (void) #define PROBE_STACK_FIRST_REG 9 #define PROBE_STACK_SECOND_REG 10 -/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, +/* Emit code to probe a range of stack addresses from FIRST to FIRST+POLY_SIZE, inclusive. These are offsets from the current stack pointer. */ static void -aarch64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) +aarch64_emit_probe_stack_range (HOST_WIDE_INT first, poly_int64 poly_size) { + HOST_WIDE_INT size; + if (!poly_size.is_constant (&size)) + { + sorry ("stack probes for SVE frames"); + return; + } + rtx reg1 = gen_rtx_REG (Pmode, PROBE_STACK_FIRST_REG); /* See the same assertion on PROBE_INTERVAL above. */ @@ -3073,13 +3089,16 @@ aarch64_layout_frame (void) = offset + cfun->machine->frame.saved_varargs_size; cfun->machine->frame.hard_fp_offset - = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); + = aligned_upper_bound (varargs_and_saved_regs_size + + get_frame_size (), + STACK_BOUNDARY / BITS_PER_UNIT); + /* Both these values are already aligned. */ + gcc_assert (multiple_p (crtl->outgoing_args_size, + STACK_BOUNDARY / BITS_PER_UNIT)); cfun->machine->frame.frame_size - = ROUND_UP (cfun->machine->frame.hard_fp_offset - + crtl->outgoing_args_size, - STACK_BOUNDARY / BITS_PER_UNIT); + = (cfun->machine->frame.hard_fp_offset + + crtl->outgoing_args_size); cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; @@ -3094,18 +3113,21 @@ aarch64_layout_frame (void) else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) max_push_offset = 256; - if (cfun->machine->frame.frame_size < max_push_offset - && crtl->outgoing_args_size == 0) + HOST_WIDE_INT const_size, const_fp_offset; + if (cfun->machine->frame.frame_size.is_constant (&const_size) + && const_size < max_push_offset + && known_eq (crtl->outgoing_args_size, 0)) { /* Simple, small frame with no outgoing arguments: stp reg1, reg2, [sp, -frame_size]! stp reg3, reg4, [sp, 16] */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size; + cfun->machine->frame.callee_adjust = const_size; } - else if ((crtl->outgoing_args_size - + cfun->machine->frame.saved_regs_size < 512) + else if (known_lt (crtl->outgoing_args_size + + cfun->machine->frame.saved_regs_size, 512) && !(cfun->calls_alloca - && cfun->machine->frame.hard_fp_offset < max_push_offset)) + && known_lt (cfun->machine->frame.hard_fp_offset, + max_push_offset))) { /* Frame with small outgoing arguments: sub sp, sp, frame_size @@ -3115,13 +3137,14 @@ aarch64_layout_frame (void) cfun->machine->frame.callee_offset = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; } - else if (cfun->machine->frame.hard_fp_offset < max_push_offset) + else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) + && const_fp_offset < max_push_offset) { /* Frame with large outgoing arguments but a small local area: stp reg1, reg2, [sp, -hard_fp_offset]! stp reg3, reg4, [sp, 16] sub sp, sp, outgoing_args_size */ - cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset; + cfun->machine->frame.callee_adjust = const_fp_offset; cfun->machine->frame.final_adjust = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; } @@ -3334,7 +3357,7 @@ aarch64_return_address_signing_enabled (void) skipping any write-back candidates if SKIP_WB is true. */ static void -aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, +aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb) { rtx_insn *insn; @@ -3346,7 +3369,7 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, regno = aarch64_next_callee_save (regno + 1, limit)) { rtx reg, mem; - HOST_WIDE_INT offset; + poly_int64 offset; if (skip_wb && (regno == cfun->machine->frame.wb_candidate1 @@ -3399,13 +3422,13 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, static void aarch64_restore_callee_saves (machine_mode mode, - HOST_WIDE_INT start_offset, unsigned start, + poly_int64 start_offset, unsigned start, unsigned limit, bool skip_wb, rtx *cfi_ops) { rtx base_rtx = stack_pointer_rtx; unsigned regno; unsigned regno2; - HOST_WIDE_INT offset; + poly_int64 offset; for (regno = aarch64_next_callee_save (start, limit); regno <= limit; @@ -3450,25 +3473,27 @@ aarch64_restore_callee_saves (machine_mode mode, static inline bool offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED, - HOST_WIDE_INT offset) + poly_int64 offset) { - return offset >= -256 && offset < 256; + HOST_WIDE_INT const_offset; + return (offset.is_constant (&const_offset) + && IN_RANGE (const_offset, -256, 255)); } static inline bool -offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= 0 - && offset < 4096 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, 0, 4095)); } bool -aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset) +aarch64_offset_7bit_signed_scaled_p (machine_mode mode, poly_int64 offset) { - return (offset >= -64 * GET_MODE_SIZE (mode) - && offset < 64 * GET_MODE_SIZE (mode) - && offset % GET_MODE_SIZE (mode) == 0); + HOST_WIDE_INT multiple; + return (constant_multiple_p (offset, GET_MODE_SIZE (mode), &multiple) + && IN_RANGE (multiple, -64, 63)); } /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ @@ -3485,7 +3510,7 @@ aarch64_get_separate_components (void) for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) if (aarch64_register_saved_on_entry (regno)) { - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3589,7 +3614,7 @@ aarch64_process_components (sbitmap components, bool prologue_p) so DFmode for the vector registers is enough. */ machine_mode mode = GP_REGNUM_P (regno) ? E_DImode : E_DFmode; rtx reg = gen_rtx_REG (mode, regno); - HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno]; + poly_int64 offset = cfun->machine->frame.reg_offset[regno]; if (!frame_pointer_needed) offset += cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; @@ -3611,13 +3636,13 @@ aarch64_process_components (sbitmap components, bool prologue_p) break; } - HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2]; + poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; /* The next register is not of the same class or its offset is not mergeable with the current one into a pair. */ if (!satisfies_constraint_Ump (mem) || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) - || (offset2 - cfun->machine->frame.reg_offset[regno]) - != GET_MODE_SIZE (mode)) + || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), + GET_MODE_SIZE (mode))) { insn = emit_insn (set); RTX_FRAME_RELATED_P (insn) = 1; @@ -3734,11 +3759,11 @@ aarch64_expand_prologue (void) { aarch64_layout_frame (); - HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 frame_size = cfun->machine->frame.frame_size; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; @@ -3753,19 +3778,19 @@ aarch64_expand_prologue (void) } if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; + current_function_static_stack_size = constant_lower_bound (frame_size); if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) { if (crtl->is_leaf && !cfun->calls_alloca) { - if (frame_size > PROBE_INTERVAL - && frame_size > get_stack_check_protect ()) + if (maybe_gt (frame_size, PROBE_INTERVAL) + && maybe_gt (frame_size, get_stack_check_protect ())) aarch64_emit_probe_stack_range (get_stack_check_protect (), (frame_size - get_stack_check_protect ())); } - else if (frame_size > 0) + else if (maybe_gt (frame_size, 0)) aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size); } @@ -3812,7 +3837,7 @@ aarch64_use_return_insn_p (void) aarch64_layout_frame (); - return cfun->machine->frame.frame_size == 0; + return known_eq (cfun->machine->frame.frame_size, 0); } /* Generate the epilogue instructions for returning from a function. @@ -3825,21 +3850,23 @@ aarch64_expand_epilogue (bool for_sibcall) { aarch64_layout_frame (); - HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; + poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; - HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; + poly_int64 final_adjust = cfun->machine->frame.final_adjust; + poly_int64 callee_offset = cfun->machine->frame.callee_offset; unsigned reg1 = cfun->machine->frame.wb_candidate1; unsigned reg2 = cfun->machine->frame.wb_candidate2; rtx cfi_ops = NULL; rtx_insn *insn; /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p = (get_frame_size () - + cfun->machine->frame.saved_varargs_size) != 0; + bool need_barrier_p + = maybe_ne (get_frame_size () + + cfun->machine->frame.saved_varargs_size, 0); /* Emit a barrier to prevent loads from a deallocated stack. */ - if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca + if (maybe_gt (final_adjust, crtl->outgoing_args_size) + || cfun->calls_alloca || crtl->calls_eh_return) { emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); @@ -3850,7 +3877,8 @@ aarch64_expand_epilogue (bool for_sibcall) be the same as the stack pointer. */ rtx ip0_rtx = gen_rtx_REG (Pmode, IP0_REGNUM); rtx ip1_rtx = gen_rtx_REG (Pmode, IP1_REGNUM); - if (frame_pointer_needed && (final_adjust || cfun->calls_alloca)) + if (frame_pointer_needed + && (maybe_ne (final_adjust, 0) || cfun->calls_alloca)) /* If writeback is used when restoring callee-saves, the CFA is restored on the instruction doing the writeback. */ aarch64_add_offset (Pmode, stack_pointer_rtx, @@ -3870,7 +3898,7 @@ aarch64_expand_epilogue (bool for_sibcall) if (callee_adjust != 0) aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); - if (callee_adjust != 0 || initial_adjust > 65536) + if (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)) { /* Emit delayed restores and set the CFA to be SP + initial_adjust. */ insn = get_last_insn (); @@ -4467,9 +4495,9 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x, && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) index = SUBREG_REG (index); - if ((shift == 0 || - (shift > 0 && shift <= 3 - && (1 << shift) == GET_MODE_SIZE (mode))) + if ((shift == 0 + || (shift > 0 && shift <= 3 + && known_eq (1 << shift, GET_MODE_SIZE (mode)))) && REG_P (index) && aarch64_regno_ok_for_index_p (REGNO (index), strict_p)) { @@ -4491,7 +4519,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode) return mode == SImode || mode == DImode || mode == SFmode || mode == DFmode || (aarch64_vector_mode_supported_p (mode) - && GET_MODE_SIZE (mode) == 8); + && known_eq (GET_MODE_SIZE (mode), 8)); } /* Return true if REGNO is a virtual pointer register, or an eliminable @@ -4517,6 +4545,7 @@ aarch64_classify_address (struct aarch64_address_info *info, { enum rtx_code code = GET_CODE (x); rtx op0, op1; + HOST_WIDE_INT const_size; /* On BE, we use load/store pair for all large int mode load/stores. TI/TFmode may also use a load/store pair. */ @@ -4526,10 +4555,10 @@ aarch64_classify_address (struct aarch64_address_info *info, || (BYTES_BIG_ENDIAN && aarch64_vect_struct_mode_p (mode))); - bool allow_reg_index_p = - !load_store_pair_p - && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode)) - && !aarch64_vect_struct_mode_p (mode); + bool allow_reg_index_p = (!load_store_pair_p + && (maybe_ne (GET_MODE_SIZE (mode), 16) + || aarch64_vector_mode_supported_p (mode)) + && !aarch64_vect_struct_mode_p (mode)); /* On LE, for AdvSIMD, don't support anything other than POST_INC or REG addressing. */ @@ -4562,7 +4591,7 @@ aarch64_classify_address (struct aarch64_address_info *info, return true; } - if (GET_MODE_SIZE (mode) != 0 + if (maybe_ne (GET_MODE_SIZE (mode), 0) && CONST_INT_P (op1) && aarch64_base_register_rtx_p (op0, strict_p)) { @@ -4609,7 +4638,8 @@ aarch64_classify_address (struct aarch64_address_info *info, offset + 32)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((known_eq (GET_MODE_SIZE (mode), 4) + || known_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return (offset_9bit_signed_unscaled_p (mode, offset) @@ -4669,7 +4699,8 @@ aarch64_classify_address (struct aarch64_address_info *info, && offset_9bit_signed_unscaled_p (mode, offset)); if (load_store_pair_p) - return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) + return ((known_eq (GET_MODE_SIZE (mode), 4) + || known_eq (GET_MODE_SIZE (mode), 8)) && aarch64_offset_7bit_signed_scaled_p (mode, offset)); else return offset_9bit_signed_unscaled_p (mode, offset); @@ -4683,7 +4714,9 @@ aarch64_classify_address (struct aarch64_address_info *info, for SI mode or larger. */ info->type = ADDRESS_SYMBOLIC; - if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4) + if (!load_store_pair_p + && GET_MODE_SIZE (mode).is_constant (&const_size) + && const_size >= 4) { rtx sym, addend; @@ -4709,7 +4742,6 @@ aarch64_classify_address (struct aarch64_address_info *info, { /* The symbol and offset must be aligned to the access size. */ unsigned int align; - unsigned int ref_size; if (CONSTANT_POOL_ADDRESS_P (sym)) align = GET_MODE_ALIGNMENT (get_pool_mode (sym)); @@ -4727,12 +4759,12 @@ aarch64_classify_address (struct aarch64_address_info *info, else align = BITS_PER_UNIT; - ref_size = GET_MODE_SIZE (mode); - if (ref_size == 0) + poly_int64 ref_size = GET_MODE_SIZE (mode); + if (known_eq (ref_size, 0)) ref_size = GET_MODE_SIZE (DImode); - return ((INTVAL (offs) & (ref_size - 1)) == 0 - && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0); + return (multiple_p (INTVAL (offs), ref_size) + && multiple_p (align / BITS_PER_UNIT, ref_size)); } } return false; @@ -4810,19 +4842,24 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, bool strict_p, static bool aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) { - HOST_WIDE_INT offset = INTVAL (*disp); - HOST_WIDE_INT base; + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) + { + HOST_WIDE_INT offset = INTVAL (*disp); + HOST_WIDE_INT base; - if (mode == TImode || mode == TFmode) - base = (offset + 0x100) & ~0x1f8; - else if ((offset & (GET_MODE_SIZE (mode) - 1)) != 0) - base = (offset + 0x100) & ~0x1ff; - else - base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc); + if (mode == TImode || mode == TFmode) + base = (offset + 0x100) & ~0x1f8; + else if ((offset & (size - 1)) != 0) + base = (offset + 0x100) & ~0x1ff; + else + base = offset & ~(size < 4 ? 0xfff : 0x3ffc); - *off = GEN_INT (base); - *disp = GEN_INT (offset - base); - return true; + *off = GEN_INT (base); + *disp = GEN_INT (offset - base); + return true; + } + return false; } /* Return the binary representation of floating point constant VALUE in INTVAL. @@ -5210,26 +5247,13 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) bool aarch64_const_vec_all_same_in_range_p (rtx x, - HOST_WIDE_INT minval, - HOST_WIDE_INT maxval) + HOST_WIDE_INT minval, + HOST_WIDE_INT maxval) { - HOST_WIDE_INT firstval; - int count, i; - - if (GET_CODE (x) != CONST_VECTOR - || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT) - return false; - - firstval = INTVAL (CONST_VECTOR_ELT (x, 0)); - if (firstval < minval || firstval > maxval) - return false; - - count = CONST_VECTOR_NUNITS (x); - for (i = 1; i < count; i++) - if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval) - return false; - - return true; + rtx elt; + return (const_vec_duplicate_p (x, &elt) + && CONST_INT_P (elt) + && IN_RANGE (INTVAL (elt), minval, maxval)); } bool @@ -5671,7 +5695,7 @@ aarch64_print_operand (FILE *f, rtx x, int code) machine_mode mode = GET_MODE (x); if (GET_CODE (x) != MEM - || (code == 'y' && GET_MODE_SIZE (mode) != 16)) + || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16))) { output_operand_lossage ("invalid operand for '%%%c'", code); return; @@ -5702,6 +5726,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, aarch64_addr_query_type type) { struct aarch64_address_info addr; + unsigned int size; /* Check all addresses are Pmode - including ILP32. */ gcc_assert (GET_MODE (x) == Pmode); @@ -5745,30 +5770,28 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, return true; case ADDRESS_REG_WB: + /* Writeback is only supported for fixed-width modes. */ + size = GET_MODE_SIZE (mode).to_constant (); switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], size); return true; case POST_INC: - asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], size); return true; case PRE_DEC: - asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], size); return true; case POST_DEC: - asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (mode)); + asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], size); return true; case PRE_MODIFY: - asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %wd]!", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return true; case POST_MODIFY: - asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], %wd", reg_names[REGNO (addr.base)], INTVAL (addr.offset)); return true; default: @@ -5859,6 +5882,39 @@ aarch64_regno_regclass (unsigned regno) return NO_REGS; } +/* OFFSET is an address offset for mode MODE, which has SIZE bytes. + If OFFSET is out of range, return an offset of an anchor point + that is in range. Return 0 otherwise. */ + +static HOST_WIDE_INT +aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size, + machine_mode mode) +{ + /* Does it look like we'll need a 16-byte load/store-pair operation? */ + if (size > 16) + return (offset + 0x400) & ~0x7f0; + + /* For offsets that aren't a multiple of the access size, the limit is + -256...255. */ + if (offset & (size - 1)) + { + /* BLKmode typically uses LDP of X-registers. */ + if (mode == BLKmode) + return (offset + 512) & ~0x3ff; + return (offset + 0x100) & ~0x1ff; + } + + /* Small negative offsets are supported. */ + if (IN_RANGE (offset, -256, 0)) + return 0; + + if (mode == TImode || mode == TFmode) + return (offset + 0x100) & ~0x1ff; + + /* Use 12-bit offset by access size. */ + return offset & (~0xfff * size); +} + static rtx aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) { @@ -5908,34 +5964,17 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) x = gen_rtx_PLUS (Pmode, base, offset_rtx); } - /* Does it look like we'll need a 16-byte load/store-pair operation? */ - HOST_WIDE_INT base_offset; - if (GET_MODE_SIZE (mode) > 16) - base_offset = (offset + 0x400) & ~0x7f0; - /* For offsets aren't a multiple of the access size, the limit is - -256...255. */ - else if (offset & (GET_MODE_SIZE (mode) - 1)) + HOST_WIDE_INT size; + if (GET_MODE_SIZE (mode).is_constant (&size)) { - base_offset = (offset + 0x100) & ~0x1ff; - - /* BLKmode typically uses LDP of X-registers. */ - if (mode == BLKmode) - base_offset = (offset + 512) & ~0x3ff; - } - /* Small negative offsets are supported. */ - else if (IN_RANGE (offset, -256, 0)) - base_offset = 0; - else if (mode == TImode || mode == TFmode) - base_offset = (offset + 0x100) & ~0x1ff; - /* Use 12-bit offset by access size. */ - else - base_offset = offset & (~0xfff * GET_MODE_SIZE (mode)); - - if (base_offset != 0) - { - base = plus_constant (Pmode, base, base_offset); - base = force_operand (base, NULL_RTX); - return plus_constant (Pmode, base, offset - base_offset); + HOST_WIDE_INT base_offset = aarch64_anchor_offset (offset, size, + mode); + if (base_offset != 0) + { + base = plus_constant (Pmode, base, base_offset); + base = force_operand (base, NULL_RTX); + return plus_constant (Pmode, base, offset - base_offset); + } } } @@ -6022,7 +6061,7 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, because AArch64 has richer addressing modes for LDR/STR instructions than LDP/STP instructions. */ if (TARGET_FLOAT && rclass == GENERAL_REGS - && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) + && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x)) return FP_REGS; if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) @@ -6043,7 +6082,7 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) return true; } -HOST_WIDE_INT +poly_int64 aarch64_initial_elimination_offset (unsigned from, unsigned to) { aarch64_layout_frame (); @@ -6129,6 +6168,11 @@ aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) static unsigned char aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) { + /* ??? Logically we should only need to provide a value when + HARD_REGNO_MODE_OK says that at least one register in REGCLASS + can hold MODE, but at the moment we need to handle all modes. + Just ignore any runtime parts for registers that can't store them. */ + HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); switch (regclass) { case CALLER_SAVE_REGS: @@ -6138,10 +6182,9 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) case POINTER_AND_FP_REGS: case FP_REGS: case FP_LO_REGS: - return - aarch64_vector_mode_p (mode) - ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG - : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + return (aarch64_vector_mode_p (mode) + ? CEIL (lowest_size, UNITS_PER_VREG) + : CEIL (lowest_size, UNITS_PER_WORD)); case STACK_REG: return 1; @@ -6692,25 +6735,15 @@ aarch64_address_cost (rtx x, { /* For the sake of calculating the cost of the shifted register component, we can treat same sized modes in the same way. */ - switch (GET_MODE_BITSIZE (mode)) - { - case 16: - cost += addr_cost->addr_scale_costs.hi; - break; - - case 32: - cost += addr_cost->addr_scale_costs.si; - break; - - case 64: - cost += addr_cost->addr_scale_costs.di; - break; - - /* We can't tell, or this is a 128-bit vector. */ - default: - cost += addr_cost->addr_scale_costs.ti; - break; - } + if (known_eq (GET_MODE_BITSIZE (mode), 16)) + cost += addr_cost->addr_scale_costs.hi; + else if (known_eq (GET_MODE_BITSIZE (mode), 32)) + cost += addr_cost->addr_scale_costs.si; + else if (known_eq (GET_MODE_BITSIZE (mode), 64)) + cost += addr_cost->addr_scale_costs.di; + else + /* We can't tell, or this is a 128-bit vector. */ + cost += addr_cost->addr_scale_costs.ti; } return cost; @@ -7839,7 +7872,8 @@ cost_plus: if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && known_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -7887,7 +7921,8 @@ cost_plus: if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) - && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + && known_eq (INTVAL (XEXP (op1, 1)), + GET_MODE_BITSIZE (mode) - 1)) { *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); /* We already demanded XEXP (op1, 0) to be REG_P, so @@ -8313,7 +8348,7 @@ aarch64_register_move_cost (machine_mode mode, return aarch64_register_move_cost (mode, from, GENERAL_REGS) + aarch64_register_move_cost (mode, GENERAL_REGS, to); - if (GET_MODE_SIZE (mode) == 16) + if (known_eq (GET_MODE_SIZE (mode), 16)) { /* 128-bit operations on general registers require 2 instructions. */ if (from == GENERAL_REGS && to == GENERAL_REGS) @@ -8689,7 +8724,7 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; case vec_construct: - elements = TYPE_VECTOR_SUBPARTS (vectype); + elements = estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype)); return elements / 2 + 1; default: @@ -10730,6 +10765,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, &nregs, &is_ha)) { + /* No frontends can create types with variable-sized modes, so we + shouldn't be asked to pass or return them. */ + unsigned int ag_size = GET_MODE_SIZE (ag_mode).to_constant (); + /* TYPE passed in fp/simd registers. */ if (!TARGET_FLOAT) aarch64_err_no_fpadvsimd (mode, "varargs"); @@ -10743,8 +10782,8 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, if (is_ha) { - if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG) - adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode); + if (BYTES_BIG_ENDIAN && ag_size < UNITS_PER_VREG) + adjust = UNITS_PER_VREG - ag_size; } else if (BLOCK_REG_PADDING (mode, type, 1) == PAD_DOWNWARD && size < UNITS_PER_VREG) @@ -11132,8 +11171,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11163,8 +11202,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11196,8 +11235,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) } /* There must be no padding. */ - if (wi::to_wide (TYPE_SIZE (type)) - != count * GET_MODE_BITSIZE (*modep)) + if (maybe_ne (wi::to_poly_wide (TYPE_SIZE (type)), + count * GET_MODE_BITSIZE (*modep))) return -1; return count; @@ -11219,7 +11258,7 @@ static bool aarch64_short_vector_p (const_tree type, machine_mode mode) { - HOST_WIDE_INT size = -1; + poly_int64 size = -1; if (type && TREE_CODE (type) == VECTOR_TYPE) size = int_size_in_bytes (type); @@ -11227,7 +11266,7 @@ aarch64_short_vector_p (const_tree type, || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) size = GET_MODE_SIZE (mode); - return (size == 8 || size == 16); + return known_eq (size, 8) || known_eq (size, 16); } /* Return TRUE if the type, as described by TYPE and MODE, is a composite @@ -11679,8 +11718,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, unsigned int n_elts; if (const_vec_duplicate_p (op, &elt)) n_elts = 1; - else if (GET_CODE (op) == CONST_VECTOR) - n_elts = CONST_VECTOR_NUNITS (op); + else if (GET_CODE (op) == CONST_VECTOR + && CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) + /* N_ELTS set above. */; else return false; @@ -11869,11 +11909,11 @@ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, bool high) { - if (!VECTOR_MODE_P (mode)) + int nelts; + if (!VECTOR_MODE_P (mode) || !GET_MODE_NUNITS (mode).is_constant (&nelts)) return false; - rtx ideal = aarch64_simd_vect_par_cnst_half (mode, GET_MODE_NUNITS (mode), - high); + rtx ideal = aarch64_simd_vect_par_cnst_half (mode, nelts, high); HOST_WIDE_INT count_op = XVECLEN (op, 0); HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); int i = 0; @@ -11958,7 +11998,8 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, machine_mode mode, int aarch64_simd_attr_length_rglist (machine_mode mode) { - return (GET_MODE_SIZE (mode) / UNITS_PER_VREG) * 4; + /* This is only used (and only meaningful) for Advanced SIMD, not SVE. */ + return (GET_MODE_SIZE (mode).to_constant () / UNITS_PER_VREG) * 4; } /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum @@ -12038,7 +12079,6 @@ aarch64_simd_make_constant (rtx vals) machine_mode mode = GET_MODE (vals); rtx const_dup; rtx const_vec = NULL_RTX; - int n_elts = GET_MODE_NUNITS (mode); int n_const = 0; int i; @@ -12049,6 +12089,7 @@ aarch64_simd_make_constant (rtx vals) /* A CONST_VECTOR must contain only CONST_INTs and CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). Only store valid constants in a CONST_VECTOR. */ + int n_elts = XVECLEN (vals, 0); for (i = 0; i < n_elts; ++i) { rtx x = XVECEXP (vals, 0, i); @@ -12087,7 +12128,7 @@ aarch64_expand_vector_init (rtx target, rtx vals) machine_mode mode = GET_MODE (target); scalar_mode inner_mode = GET_MODE_INNER (mode); /* The number of vector elements. */ - int n_elts = GET_MODE_NUNITS (mode); + int n_elts = XVECLEN (vals, 0); /* The number of vector elements which are not constant. */ int n_var = 0; rtx any_const = NULL_RTX; @@ -12227,7 +12268,9 @@ aarch64_shift_truncation_mask (machine_mode mode) return (!SHIFT_COUNT_TRUNCATED || aarch64_vector_mode_supported_p (mode) - || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1); + || aarch64_vect_struct_mode_p (mode)) + ? 0 + : (GET_MODE_UNIT_BITSIZE (mode) - 1); } /* Select a format to encode pointers in exception handling data. */ @@ -13350,7 +13393,8 @@ aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel, static bool aarch64_evpc_trn (struct expand_vec_perm_d *d) { - unsigned int odd, nelt = d->perm.length (); + HOST_WIDE_INT odd; + poly_uint64 nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13359,8 +13403,8 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - odd = d->perm[0]; - if ((odd != 0 && odd != 1) + if (!d->perm[0].is_constant (&odd) + || (odd != 0 && odd != 1) || !d->perm.series_p (0, 2, odd, 2) || !d->perm.series_p (1, 2, nelt + odd, 2)) return false; @@ -13387,7 +13431,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) static bool aarch64_evpc_uzp (struct expand_vec_perm_d *d) { - unsigned int odd; + HOST_WIDE_INT odd; rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13396,8 +13440,8 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - odd = d->perm[0]; - if ((odd != 0 && odd != 1) + if (!d->perm[0].is_constant (&odd) + || (odd != 0 && odd != 1) || !d->perm.series_p (0, 1, odd, 2)) return false; @@ -13423,7 +13467,8 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) static bool aarch64_evpc_zip (struct expand_vec_perm_d *d) { - unsigned int high, nelt = d->perm.length (); + unsigned int high; + poly_uint64 nelt = d->perm.length (); rtx out, in0, in1, x; machine_mode vmode = d->vmode; @@ -13432,11 +13477,12 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) /* Note that these are little-endian tests. We correct for big-endian later. */ - high = d->perm[0]; - if ((high != 0 && high * 2 != nelt) - || !d->perm.series_p (0, 2, high, 1) - || !d->perm.series_p (1, 2, high + nelt, 1)) + poly_uint64 first = d->perm[0]; + if ((maybe_ne (first, 0U) && maybe_ne (first * 2, nelt)) + || !d->perm.series_p (0, 2, first, 1) + || !d->perm.series_p (1, 2, first + nelt, 1)) return false; + high = maybe_ne (first, 0U); /* Success! */ if (d->testing_p) @@ -13461,13 +13507,13 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) static bool aarch64_evpc_ext (struct expand_vec_perm_d *d) { - unsigned int nelt = d->perm.length (); + HOST_WIDE_INT location; rtx offset; - unsigned int location = d->perm[0]; /* Always < nelt. */ - - /* Check if the extracted indices are increasing by one. */ - if (!d->perm.series_p (0, 1, location, 1)) + /* The first element always refers to the first vector. + Check if the extracted indices are increasing by one. */ + if (!d->perm[0].is_constant (&location) + || !d->perm.series_p (0, 1, location, 1)) return false; /* Success! */ @@ -13483,8 +13529,10 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d) at the LSB end of the register), and the low elements of the second vector (stored at the MSB end of the register). So swap. */ std::swap (d->op0, d->op1); - /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ - location = nelt - location; + /* location != 0 (above), so safe to assume (nelt - location) < nelt. + to_constant () is safe since this is restricted to Advanced SIMD + vectors. */ + location = d->perm.length ().to_constant () - location; } offset = GEN_INT (location); @@ -13500,12 +13548,13 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d) static bool aarch64_evpc_rev (struct expand_vec_perm_d *d) { - unsigned int i, diff, size, unspec; + HOST_WIDE_INT diff; + unsigned int i, size, unspec; - if (!d->one_vector_p) + if (!d->one_vector_p + || !d->perm[0].is_constant (&diff)) return false; - diff = d->perm[0]; size = (diff + 1) * GET_MODE_UNIT_SIZE (d->vmode); if (size == 8) unspec = UNSPEC_REV64; @@ -13535,19 +13584,18 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d) { rtx out = d->target; rtx in0; + HOST_WIDE_INT elt; machine_mode vmode = d->vmode; - unsigned int elt; rtx lane; - if (d->perm.encoding ().encoded_nelts () != 1) + if (d->perm.encoding ().encoded_nelts () != 1 + || !d->perm[0].is_constant (&elt)) return false; /* Success! */ if (d->testing_p) return true; - elt = d->perm[0]; - /* The generic preparation in aarch64_expand_vec_perm_const_1 swaps the operand order and the permute indices if it finds d->perm[0] to be in the second operand. Thus, we can always @@ -13567,7 +13615,12 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d) { rtx rperm[MAX_VECT_LEN], sel; machine_mode vmode = d->vmode; - unsigned int i, nelt = d->perm.length (); + + /* Make sure that the indices are constant. */ + unsigned int encoded_nelts = d->perm.encoding ().encoded_nelts (); + for (unsigned int i = 0; i < encoded_nelts; ++i) + if (!d->perm[i].is_constant ()) + return false; if (d->testing_p) return true; @@ -13578,16 +13631,17 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d) if (vmode != V8QImode && vmode != V16QImode) return false; - for (i = 0; i < nelt; ++i) - { - int nunits = GET_MODE_NUNITS (vmode); + /* to_constant is safe since this routine is specific to Advanced SIMD + vectors. */ + unsigned int nelt = d->perm.length ().to_constant (); + for (unsigned int i = 0; i < nelt; ++i) + /* If big-endian and two vectors we end up with a weird mixed-endian + mode on NEON. Reverse the index within each word but not the word + itself. to_constant is safe because we checked is_constant above. */ + rperm[i] = GEN_INT (BYTES_BIG_ENDIAN + ? d->perm[i].to_constant () ^ (nelt - 1) + : d->perm[i].to_constant ()); - /* If big-endian and two vectors we end up with a weird mixed-endian - mode on NEON. Reverse the index within each word but not the word - itself. */ - rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) - : (HOST_WIDE_INT) d->perm[i]); - } sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); sel = force_reg (vmode, sel); @@ -13601,14 +13655,14 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) /* The pattern matching functions above are written to look for a small number to begin the sequence (0, 1, N/2). If we begin with an index from the second operand, we can swap the operands. */ - unsigned int nelt = d->perm.length (); - if (d->perm[0] >= nelt) + poly_int64 nelt = d->perm.length (); + if (known_ge (d->perm[0], nelt)) { d->perm.rotate_inputs (1); std::swap (d->op0, d->op1); } - if (TARGET_SIMD && nelt > 1) + if (TARGET_SIMD && known_gt (nelt, 1)) { if (aarch64_evpc_rev (d)) return true; @@ -13724,7 +13778,7 @@ aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2) AMOUNT bytes. */ static rtx -aarch64_move_pointer (rtx pointer, int amount) +aarch64_move_pointer (rtx pointer, poly_int64 amount) { rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); @@ -13738,9 +13792,7 @@ aarch64_move_pointer (rtx pointer, int amount) static rtx aarch64_progress_pointer (rtx pointer) { - HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); - - return aarch64_move_pointer (pointer, amount); + return aarch64_move_pointer (pointer, GET_MODE_SIZE (GET_MODE (pointer))); } /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by @@ -14551,7 +14603,9 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, offval_1 = INTVAL (offset_1); offval_2 = INTVAL (offset_2); - msize = GET_MODE_SIZE (mode); + /* We should only be trying this for fixed-sized modes. There is no + SVE LDP/STP instruction. */ + msize = GET_MODE_SIZE (mode).to_constant (); /* Check if the offsets are consecutive. */ if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize)) return false; @@ -14911,7 +14965,9 @@ aarch64_fpconst_pow_of_2 (rtx x) int aarch64_vec_fpconst_pow_of_2 (rtx x) { - if (GET_CODE (x) != CONST_VECTOR) + int nelts; + if (GET_CODE (x) != CONST_VECTOR + || !CONST_VECTOR_NUNITS (x).is_constant (&nelts)) return -1; if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT) @@ -14921,7 +14977,7 @@ aarch64_vec_fpconst_pow_of_2 (rtx x) if (firstval <= 0) return -1; - for (int i = 1; i < CONST_VECTOR_NUNITS (x); i++) + for (int i = 1; i < nelts; i++) if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x, i)) != firstval) return -1; diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index ed7a8e5e7e8..98e45171043 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -586,7 +586,7 @@ extern enum aarch64_processor aarch64_tune; #define DEFAULT_PCC_STRUCT_RETURN 0 -#ifdef HOST_WIDE_INT +#ifdef HAVE_POLY_INT_H struct GTY (()) aarch64_frame { HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; @@ -604,20 +604,20 @@ struct GTY (()) aarch64_frame /* Offset from the base of the frame (incomming SP) to the top of the locals area. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT locals_offset; + poly_int64 locals_offset; /* Offset from the base of the frame (incomming SP) to the hard_frame_pointer. This value is always a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT hard_fp_offset; + poly_int64 hard_fp_offset; /* The size of the frame. This value is the offset from base of the - * frame (incomming SP) to the stack_pointer. This value is always - * a multiple of STACK_BOUNDARY. */ - HOST_WIDE_INT frame_size; + frame (incomming SP) to the stack_pointer. This value is always + a multiple of STACK_BOUNDARY. */ + poly_int64 frame_size; /* The size of the initial stack adjustment before saving callee-saves. */ - HOST_WIDE_INT initial_adjust; + poly_int64 initial_adjust; /* The writeback value when pushing callee-save registers. It is zero when no push is used. */ @@ -625,10 +625,10 @@ struct GTY (()) aarch64_frame /* The offset from SP to the callee-save registers after initial_adjust. It may be non-zero if no push is used (ie. callee_adjust == 0). */ - HOST_WIDE_INT callee_offset; + poly_int64 callee_offset; /* The size of the stack adjustment after saving callee-saves. */ - HOST_WIDE_INT final_adjust; + poly_int64 final_adjust; /* Store FP,LR and setup a frame pointer. */ bool emit_frame_chain; diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 07510415917..854c44830e6 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3328,7 +3328,7 @@ CRC))] "TARGET_CRC32" { - if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) + if (GET_MODE_BITSIZE (mode) >= 64) return "\\t%w0, %w1, %x2"; else return "\\t%w0, %w1, %w2"; -- 2.30.2