From cba9c7897755fe9f7343d5428b12eea53c98b94e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 7 Oct 2015 19:42:09 +0200 Subject: [PATCH] re PR target/66697 (Feature request: -mstackrealign and force_align_arg_pointer for x86_64) PR target/66697 * config/i386/i386.c (ix86_option_override_internal): Always use 8-byte minimum stack boundary in 64-bit mode. (ix86_compute_frame_layout): Remove assert on INCOMING_STACK_BOUNDARY. (ix86_emit_save_reg_using_mov): Support unaligned SSE store. Add a REG_CFA_EXPRESSION note if needed. (ix86_emit_restore_sse_regs_using_mov): Support unaligned SSE load. (ix86_handle_force_align_arg_pointer_attribute): New. (ix86_minimum_incoming_stack_boundary): Remove TARGET_64BIT check. (ix86_attribute_table): Set ix86_force_align_arg_pointer_string with ix86_handle_force_align_arg_pointer_attribute. * config/i386/i386.h (MIN_STACK_BOUNDARY): Set to BITS_PER_WORD. testsuite/ChangeLog: PR target/66697 * gcc.target/i386/20060512-1.c: Remove ia32 requirement. (PUSH, POP): New defines. (sse2_test): Use PUSH and POP to misalign runtime stack. * gcc.target/i386/20060512-2.c: Remove ia32 requirement. From-SVN: r228577 --- gcc/ChangeLog | 23 +++++++- gcc/config/i386/i386.c | 65 ++++++++++++++++++---- gcc/config/i386/i386.h | 2 +- gcc/testsuite/ChangeLog | 14 ++++- gcc/testsuite/gcc.target/i386/20060512-1.c | 13 ++++- gcc/testsuite/gcc.target/i386/20060512-2.c | 1 - 6 files changed, 97 insertions(+), 21 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2d51ce850dd..7504c567124 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-10-07 Uros Bizjak + + PR target/66697 + * config/i386/i386.c (ix86_option_override_internal): Always use + 8-byte minimum stack boundary in 64-bit mode. + (ix86_compute_frame_layout): Remove assert on INCOMING_STACK_BOUNDARY. + (ix86_emit_save_reg_using_mov): Support unaligned SSE store. + Add a REG_CFA_EXPRESSION note if needed. + (ix86_emit_restore_sse_regs_using_mov): Support unaligned SSE load. + (ix86_handle_force_align_arg_pointer_attribute): New. + (ix86_minimum_incoming_stack_boundary): Remove TARGET_64BIT check. + (ix86_attribute_table): Set ix86_force_align_arg_pointer_string + with ix86_handle_force_align_arg_pointer_attribute. + * config/i386/i386.h (MIN_STACK_BOUNDARY): Set to BITS_PER_WORD. + 2015-10-07 Aditya Kumar Sebastian Pop @@ -287,7 +302,8 @@ * graphite-sese-to-poly.c (build_loop_iteration_domains): Only loops which are in this region are passed so gcc_assert and remove redundant computation. - * sese.c (sese_build_liveouts): Pass only those bbs which are not in region. + * sese.c (sese_build_liveouts): Pass only those bbs which are not + in region. (sese_bad_liveouts_use): Only BBs which are not in region are passed so gcc_assert on that and remove unnecessary computation. (sese_build_liveouts_use): Same. @@ -500,7 +516,7 @@ 2015-10-05 Aditya Kumar Sebastian Pop - * params.def (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS): Increase to 7. + * params.def (PARAM_GRAPHITE_MAX_NB_SCOP_PARAMS): Increase to 7. 2015-10-05 Aditya Kumar Sebastian Pop @@ -1111,7 +1127,8 @@ * graphite-poly.c (new_poly_bb): Same. * graphite-poly.h (gbb_from_bb): Same. * sese.h: Same. - * graphite-sese-to-poly.c (new_gimple_bb): gimple_bb_p -> gimple_poly_bb_p + * graphite-sese-to-poly.c (new_gimple_bb): + gimple_bb_p -> gimple_poly_bb_p (build_scop_scattering): Same. (find_params_in_bb): Same. (add_conditions_to_domain): Same. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index c5ebff5eaaf..a24bd26c96a 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -5209,8 +5209,7 @@ ix86_option_override_internal (bool main_args_p, ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; if (opts_set->x_ix86_incoming_stack_boundary_arg) { - int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags) - ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2); + int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; if (opts->x_ix86_incoming_stack_boundary_arg < min || opts->x_ix86_incoming_stack_boundary_arg > 12) @@ -11386,7 +11385,6 @@ ix86_compute_frame_layout (struct ix86_frame *frame) /* The only ABI that has saved SSE registers (Win64) also has a 16-byte aligned default stack, and thus we don't need to be within the re-aligned local stack frame to save them. */ - gcc_assert (INCOMING_STACK_BOUNDARY >= 128); offset = ROUND_UP (offset, 16); offset += frame->nsseregs * 16; } @@ -11611,14 +11609,26 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, struct machine_function *m = cfun->machine; rtx reg = gen_rtx_REG (mode, regno); rtx mem, addr, base, insn; + unsigned int align; addr = choose_baseaddr (cfa_offset); mem = gen_frame_mem (mode, addr); - /* For SSE saves, we need to indicate the 128-bit alignment. */ - set_mem_align (mem, GET_MODE_ALIGNMENT (mode)); + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned store. */ + if (mode == V4SFmode && align < 128) + { + rtx unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU); + insn = emit_insn (gen_rtx_SET (mem, unspec)); + } + else + insn = emit_insn (gen_rtx_SET (mem, reg)); - insn = emit_move_insn (mem, reg); RTX_FRAME_RELATED_P (insn) = 1; base = addr; @@ -11665,6 +11675,8 @@ ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, mem = gen_rtx_MEM (mode, addr); add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); } + else + add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); } /* Emit code to save registers using MOV insns. @@ -11881,6 +11893,25 @@ find_drap_reg (void) } } +/* Handle a "force_align_arg_pointer" attribute. */ + +static tree +ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, + tree, int, bool *no_add_attrs) +{ + if (TREE_CODE (*node) != FUNCTION_TYPE + && TREE_CODE (*node) != METHOD_TYPE + && TREE_CODE (*node) != FIELD_DECL + && TREE_CODE (*node) != TYPE_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Return minimum incoming stack alignment. */ static unsigned int @@ -11895,7 +11926,6 @@ ix86_minimum_incoming_stack_boundary (bool sibcall) if -mstackrealign is used, it isn't used for sibcall check and estimated stack alignment is 128bit. */ else if (!sibcall - && !TARGET_64BIT && ix86_force_align_arg_pointer && crtl->stack_alignment_estimated == 128) incoming_stack_boundary = MIN_STACK_BOUNDARY; @@ -13179,11 +13209,26 @@ ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, { rtx reg = gen_rtx_REG (V4SFmode, regno); rtx mem; + unsigned int align; mem = choose_baseaddr (cfa_offset); mem = gen_rtx_MEM (V4SFmode, mem); - set_mem_align (mem, 128); - emit_move_insn (reg, mem); + + /* The location is aligned up to INCOMING_STACK_BOUNDARY. */ + align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY); + set_mem_align (mem, align); + + /* SSE saves are not within re-aligned local stack frame. + In case INCOMING_STACK_BOUNDARY is misaligned, we have + to emit unaligned load. */ + if (align < 128) + { + rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem), + UNSPEC_LOADU); + emit_insn (gen_rtx_SET (reg, unspec)); + } + else + emit_insn (gen_rtx_SET (reg, mem)); ix86_add_cfa_restore_note (NULL, reg, cfa_offset); @@ -48154,7 +48199,7 @@ static const struct attribute_spec ix86_attribute_table[] = true }, /* force_align_arg_pointer says this function realigns the stack at entry. */ { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, - false, true, true, ix86_handle_cconv_attribute, false }, + false, true, true, ix86_handle_force_align_arg_pointer_attribute, false }, #if TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index ad174608850..4a84fb972d1 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -755,7 +755,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); #define MAIN_STACK_BOUNDARY (TARGET_64BIT ? 128 : 32) /* Minimum stack boundary. */ -#define MIN_STACK_BOUNDARY (TARGET_64BIT ? (TARGET_SSE ? 128 : 64) : 32) +#define MIN_STACK_BOUNDARY BITS_PER_WORD /* Boundary (in *bits*) on which the stack pointer prefers to be aligned; the compiler cannot rely on having this alignment. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index d06a25f01ab..cc3cd6b4f74 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,6 +1,14 @@ +2015-10-07 Uros Bizjak + + PR target/66697 + * gcc.target/i386/20060512-1.c: Remove ia32 requirement. + (PUSH, POP): New defines. + (sse2_test): Use PUSH and POP to misalign runtime stack. + * gcc.target/i386/20060512-2.c: Remove ia32 requirement. + 2015-10-07 James Norris - * testsuite/lib/atomic-dg.exp (atomic_link_flags): Move flag setting + * lib/atomic-dg.exp (atomic_link_flags): Move flag setting to atomic_init. (atomic_init): Restrict flags usage. 2015-10-07 Andre Vehreschild @@ -62,12 +70,12 @@ 2015-10-05 Aditya Kumar Sebastian Pop - * gcc.dg/graphite/scop-sor.c: Un-xfail. + * gcc.dg/graphite/scop-sor.c: Un-xfail. 2015-10-05 Aditya Kumar Sebastian Pop - * gcc.dg/graphite/scop-sor.c: Xfail. + * gcc.dg/graphite/scop-sor.c: Xfail. 2015-10-05 Martin Jambor Jan Hubicka diff --git a/gcc/testsuite/gcc.target/i386/20060512-1.c b/gcc/testsuite/gcc.target/i386/20060512-1.c index 374d18aea57..ec163a9bc51 100644 --- a/gcc/testsuite/gcc.target/i386/20060512-1.c +++ b/gcc/testsuite/gcc.target/i386/20060512-1.c @@ -1,5 +1,4 @@ /* { dg-do run } */ -/* { dg-require-effective-target ia32 } */ /* { dg-options "-std=gnu99 -msse2 -mpreferred-stack-boundary=4" } */ /* { dg-require-effective-target sse2 } */ @@ -7,6 +6,14 @@ #include +#ifdef __x86_64__ +# define PUSH "pushq %rsi" +# define POP "popq %rsi" +#else +# define PUSH "pushl %esi" +# define POP "popl %esi" +#endif + __m128i __attribute__ ((__noinline__)) vector_using_function () { @@ -27,9 +34,9 @@ static void sse2_test (void) { int result; - asm ("pushl %esi"); /* Disalign runtime stack. */ + asm (PUSH); /* Misalign runtime stack. */ result = self_aligning_function (g_1, g_2); if (result != 42) abort (); - asm ("popl %esi"); + asm (POP); } diff --git a/gcc/testsuite/gcc.target/i386/20060512-2.c b/gcc/testsuite/gcc.target/i386/20060512-2.c index d3a779cb4e2..8ce4bd7f8be 100644 --- a/gcc/testsuite/gcc.target/i386/20060512-2.c +++ b/gcc/testsuite/gcc.target/i386/20060512-2.c @@ -1,5 +1,4 @@ /* { dg-do compile } */ -/* { dg-require-effective-target ia32 } */ /* { dg-options "-std=gnu99 -mpreferred-stack-boundary=4" } */ int outer_function (int x, int y) -- 2.30.2