From d018b46e328163477ca334b844bb7ea9f99c42f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 28 Jun 2011 15:25:52 -0700 Subject: [PATCH] arm: Convert thumb1 prologue to rtl. * config/arm/arm.c (arm_output_function_prologue): Don't call thumb1_output_function_prologue. (arm_expand_prologue): Avoid dead store. (number_of_first_bit_set): Use ctz_hwi. (thumb1_emit_multi_reg_push): New. (thumb1_expand_prologue): Merge thumb1_output_function_prologue to emit the entire prologue as rtl. (thumb1_output_interwork): Split out from thumb1_output_function_prologue. (thumb1_output_function_prologue): Remove. (arm_attr_length_push_multi): Handle thumb1. * config/arm/arm.md (VUNSPEC_THUMB1_INTERWORK): New. (prologue_thumb1_interwork): New. (*push_multi): Allow thumb1; use push_mult_memory_operand. * config/arm/predicates.md (push_mult_memory_operand): New. From-SVN: r175605 --- gcc/ChangeLog | 20 +- gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.c | 560 +++++++++++++++++++---------------- gcc/config/arm/arm.md | 13 +- gcc/config/arm/predicates.md | 28 ++ 5 files changed, 365 insertions(+), 257 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c5759af0d48..fafbae50af1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2011-06-28 Richard Henderson + + * config/arm/arm.c (arm_output_function_prologue): Don't call + thumb1_output_function_prologue. + (arm_expand_prologue): Avoid dead store. + (number_of_first_bit_set): Use ctz_hwi. + (thumb1_emit_multi_reg_push): New. + (thumb1_expand_prologue): Merge thumb1_output_function_prologue + to emit the entire prologue as rtl. + (thumb1_output_interwork): Split out from + thumb1_output_function_prologue. + (thumb1_output_function_prologue): Remove. + (arm_attr_length_push_multi): Handle thumb1. + * config/arm/arm.md (VUNSPEC_THUMB1_INTERWORK): New. + (prologue_thumb1_interwork): New. + (*push_multi): Allow thumb1; use push_mult_memory_operand. + * config/arm/predicates.md (push_mult_memory_operand): New. + 2011-06-28 Eric Botcazou * config/sparc/sync.md (*stbar): Delete. @@ -863,7 +881,7 @@ 2011-06-20 Changpeng Fang - PR i386/49089 + PR i386/49089 * config/i386/i386.c (avx256_split_unaligned_load): New definition. (avx256_split_unaligned_store): New definition. (ix86_option_override_internal): Enable avx256 unaligned load/store diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 3eb21778123..2f7c508cfc5 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -176,6 +176,7 @@ extern void arm_init_expanders (void); extern const char *thumb_unexpanded_epilogue (void); extern void thumb1_expand_prologue (void); extern void thumb1_expand_epilogue (void); +extern const char *thumb1_output_interwork (void); #ifdef TREE_CODE extern int is_called_in_ARM_mode (tree); #endif diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 8b9cb25b49c..be036594a8c 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -126,7 +126,6 @@ static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); #endif static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); -static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT); static int arm_comp_type_attributes (const_tree, const_tree); static void arm_set_default_type_attributes (tree); static int arm_adjust_cost (rtx, rtx, rtx, int); @@ -14571,11 +14570,9 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) { unsigned long func_type; + /* ??? Do we want to print some of the below anyway? */ if (TARGET_THUMB1) - { - thumb1_output_function_prologue (f, frame_size); - return; - } + return; /* Sanity check. */ gcc_assert (!arm_ccfsm_state && !arm_target_insn); @@ -15873,7 +15870,7 @@ arm_expand_prologue (void) /* Interrupt functions must not corrupt any registers. Creating a frame pointer however, corrupts the IP register, so we must push it first. */ - insn = emit_multi_reg_push (1 << IP_REGNUM); + emit_multi_reg_push (1 << IP_REGNUM); /* Do not set RTX_FRAME_RELATED_P on this insn. The dwarf stack unwinding code only wants to see one @@ -20121,14 +20118,73 @@ arm_expand_builtin (tree exp, inline static int number_of_first_bit_set (unsigned mask) { - int bit; + return ctz_hwi (mask); +} + +/* Like emit_multi_reg_push, but allowing for a different set of + registers to be described as saved. MASK is the set of registers + to be saved; REAL_REGS is the set of registers to be described as + saved. If REAL_REGS is 0, only describe the stack adjustment. */ - for (bit = 0; - (mask & (1 << bit)) == 0; - ++bit) - continue; +static rtx +thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs) +{ + unsigned long regno; + rtx par[10], tmp, reg, insn; + int i, j; + + /* Build the parallel of the registers actually being stored. */ + for (i = 0; mask; ++i, mask &= mask - 1) + { + regno = ctz_hwi (mask); + reg = gen_rtx_REG (SImode, regno); - return bit; + if (i == 0) + tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT); + else + tmp = gen_rtx_USE (VOIDmode, reg); + + par[i] = tmp; + } + + tmp = plus_constant (stack_pointer_rtx, -4 * i); + tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp); + tmp = gen_frame_mem (BLKmode, tmp); + tmp = gen_rtx_SET (VOIDmode, tmp, par[0]); + par[0] = tmp; + + tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par)); + insn = emit_insn (tmp); + + /* Always build the stack adjustment note for unwind info. */ + tmp = plus_constant (stack_pointer_rtx, -4 * i); + tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp); + par[0] = tmp; + + /* Build the parallel of the registers recorded as saved for unwind. */ + for (j = 0; real_regs; ++j, real_regs &= real_regs - 1) + { + regno = ctz_hwi (real_regs); + reg = gen_rtx_REG (SImode, regno); + + tmp = plus_constant (stack_pointer_rtx, j * 4); + tmp = gen_frame_mem (SImode, tmp); + tmp = gen_rtx_SET (VOIDmode, tmp, reg); + RTX_FRAME_RELATED_P (tmp) = 1; + par[j + 1] = tmp; + } + + if (j == 0) + tmp = par[0]; + else + { + RTX_FRAME_RELATED_P (par[0]) = 1; + tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par)); + } + + add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp); + + return insn; } /* Emit code to push or pop registers to or from the stack. F is the @@ -21004,17 +21060,20 @@ thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to) } } -/* Generate the rest of a function's prologue. */ +/* Generate the function's prologue. */ + void thumb1_expand_prologue (void) { - rtx insn, dwarf; + rtx insn; HOST_WIDE_INT amount; arm_stack_offsets *offsets; unsigned long func_type; int regno; unsigned long live_regs_mask; + unsigned long l_mask; + unsigned high_regs_pushed = 0; func_type = arm_current_func_type (); @@ -21028,8 +21087,206 @@ thumb1_expand_prologue (void) return; } + if (is_called_in_ARM_mode (current_function_decl)) + emit_insn (gen_prologue_thumb1_interwork ()); + offsets = arm_get_frame_offsets (); live_regs_mask = offsets->saved_regs_mask; + + /* Extract a mask of the ones we can give to the Thumb's push instruction. */ + l_mask = live_regs_mask & 0x40ff; + /* Then count how many other high registers will need to be pushed. */ + high_regs_pushed = bit_count (live_regs_mask & 0x0f00); + + if (crtl->args.pretend_args_size) + { + rtx x = GEN_INT (-crtl->args.pretend_args_size); + + if (cfun->machine->uses_anonymous_args) + { + int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size); + unsigned long mask; + + mask = 1ul << (LAST_ARG_REGNUM + 1); + mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes); + + insn = thumb1_emit_multi_reg_push (mask, 0); + } + else + { + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, x)); + } + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (TARGET_BACKTRACE) + { + HOST_WIDE_INT offset = 0; + unsigned work_register; + rtx work_reg, x, arm_hfp_rtx; + + /* We have been asked to create a stack backtrace structure. + The code looks like this: + + 0 .align 2 + 0 func: + 0 sub SP, #16 Reserve space for 4 registers. + 2 push {R7} Push low registers. + 4 add R7, SP, #20 Get the stack pointer before the push. + 6 str R7, [SP, #8] Store the stack pointer + (before reserving the space). + 8 mov R7, PC Get hold of the start of this code + 12. + 10 str R7, [SP, #16] Store it. + 12 mov R7, FP Get hold of the current frame pointer. + 14 str R7, [SP, #4] Store it. + 16 mov R7, LR Get hold of the current return address. + 18 str R7, [SP, #12] Store it. + 20 add R7, SP, #16 Point at the start of the + backtrace structure. + 22 mov FP, R7 Put this value into the frame pointer. */ + + work_register = thumb_find_work_register (live_regs_mask); + work_reg = gen_rtx_REG (SImode, work_register); + arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM); + + insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, GEN_INT (-16))); + RTX_FRAME_RELATED_P (insn) = 1; + + if (l_mask) + { + insn = thumb1_emit_multi_reg_push (l_mask, l_mask); + RTX_FRAME_RELATED_P (insn) = 1; + + offset = bit_count (l_mask) * UNITS_PER_WORD; + } + + x = GEN_INT (offset + 16 + crtl->args.pretend_args_size); + emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x)); + + x = plus_constant (stack_pointer_rtx, offset + 4); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + /* Make sure that the instruction fetching the PC is in the right place + to calculate "start of backtrace creation code + 12". */ + /* ??? The stores using the common WORK_REG ought to be enough to + prevent the scheduler from doing anything weird. Failing that + we could always move all of the following into an UNSPEC_VOLATILE. */ + if (l_mask) + { + x = gen_rtx_REG (SImode, PC_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (stack_pointer_rtx, offset + 12); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + emit_move_insn (work_reg, arm_hfp_rtx); + + x = plus_constant (stack_pointer_rtx, offset); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + } + else + { + emit_move_insn (work_reg, arm_hfp_rtx); + + x = plus_constant (stack_pointer_rtx, offset); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + x = gen_rtx_REG (SImode, PC_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (stack_pointer_rtx, offset + 12); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + } + + x = gen_rtx_REG (SImode, LR_REGNUM); + emit_move_insn (work_reg, x); + + x = plus_constant (stack_pointer_rtx, offset + 8); + x = gen_frame_mem (SImode, x); + emit_move_insn (x, work_reg); + + x = GEN_INT (offset + 12); + emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x)); + + emit_move_insn (arm_hfp_rtx, work_reg); + } + /* Optimization: If we are not pushing any low registers but we are going + to push some high registers then delay our first push. This will just + be a push of LR and we can combine it with the push of the first high + register. */ + else if ((l_mask & 0xff) != 0 + || (high_regs_pushed == 0 && l_mask)) + { + unsigned long mask = l_mask; + mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; + insn = thumb1_emit_multi_reg_push (mask, mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + + if (high_regs_pushed) + { + unsigned pushable_regs; + unsigned next_hi_reg; + + for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) + if (live_regs_mask & (1 << next_hi_reg)) + break; + + pushable_regs = l_mask & 0xff; + + if (pushable_regs == 0) + pushable_regs = 1 << thumb_find_work_register (live_regs_mask); + + while (high_regs_pushed > 0) + { + unsigned long real_regs_mask = 0; + + for (regno = LAST_LO_REGNUM; regno >= 0; regno --) + { + if (pushable_regs & (1 << regno)) + { + emit_move_insn (gen_rtx_REG (SImode, regno), + gen_rtx_REG (SImode, next_hi_reg)); + + high_regs_pushed --; + real_regs_mask |= (1 << next_hi_reg); + + if (high_regs_pushed) + { + for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM; + next_hi_reg --) + if (live_regs_mask & (1 << next_hi_reg)) + break; + } + else + { + pushable_regs &= ~((1 << regno) - 1); + break; + } + } + } + + /* If we had to find a work register and we have not yet + saved the LR then add it to the list of regs to push. */ + if (l_mask == (1 << LR_REGNUM)) + { + pushable_regs |= l_mask; + real_regs_mask |= l_mask; + l_mask = 0; + } + + insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask); + RTX_FRAME_RELATED_P (insn) = 1; + } + } + /* Load the pic register before setting the frame pointer, so we can use r7 as a temporary work register. */ if (flag_pic && arm_pic_register != INVALID_REGNUM) @@ -21055,7 +21312,7 @@ thumb1_expand_prologue (void) } else { - rtx reg; + rtx reg, dwarf; /* The stack decrement is too big for an immediate value in a single insn. In theory we could issue multiple subtracts, but after @@ -21083,12 +21340,12 @@ thumb1_expand_prologue (void) insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg)); - RTX_FRAME_RELATED_P (insn) = 1; + dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx, plus_constant (stack_pointer_rtx, -amount)); - RTX_FRAME_RELATED_P (dwarf) = 1; add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf); + RTX_FRAME_RELATED_P (insn) = 1; } } @@ -21165,253 +21422,45 @@ thumb1_expand_epilogue (void) emit_use (gen_rtx_REG (SImode, LR_REGNUM)); } -static void -thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED) -{ - arm_stack_offsets *offsets; - unsigned long live_regs_mask = 0; - unsigned long l_mask; - unsigned high_regs_pushed = 0; - int cfa_offset = 0; - int regno; +/* Implementation of insn prologue_thumb1_interwork. This is the first + "instruction" of a function called in ARM mode. Swap to thumb mode. */ - if (IS_NAKED (arm_current_func_type ())) - return; - - if (is_called_in_ARM_mode (current_function_decl)) - { - const char * name; +const char * +thumb1_output_interwork (void) +{ + const char * name; + FILE *f = asm_out_file; - gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM); - gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) - == SYMBOL_REF); - name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); + gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM); + gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0)) + == SYMBOL_REF); + name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); - /* Generate code sequence to switch us into Thumb mode. */ - /* The .code 32 directive has already been emitted by - ASM_DECLARE_FUNCTION_NAME. */ - asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM); - asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM); + /* Generate code sequence to switch us into Thumb mode. */ + /* The .code 32 directive has already been emitted by + ASM_DECLARE_FUNCTION_NAME. */ + asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM); + asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM); - /* Generate a label, so that the debugger will notice the - change in instruction sets. This label is also used by - the assembler to bypass the ARM code when this function - is called from a Thumb encoded function elsewhere in the - same file. Hence the definition of STUB_NAME here must - agree with the definition in gas/config/tc-arm.c. */ + /* Generate a label, so that the debugger will notice the + change in instruction sets. This label is also used by + the assembler to bypass the ARM code when this function + is called from a Thumb encoded function elsewhere in the + same file. Hence the definition of STUB_NAME here must + agree with the definition in gas/config/tc-arm.c. */ #define STUB_NAME ".real_start_of" - fprintf (f, "\t.code\t16\n"); + fprintf (f, "\t.code\t16\n"); #ifdef ARM_PE - if (arm_dllexport_name_p (name)) - name = arm_strip_name_encoding (name); + if (arm_dllexport_name_p (name)) + name = arm_strip_name_encoding (name); #endif - asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name); - fprintf (f, "\t.thumb_func\n"); - asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name); - } - - if (crtl->args.pretend_args_size) - { - /* Output unwind directive for the stack adjustment. */ - if (arm_except_unwind_info (&global_options) == UI_TARGET) - fprintf (f, "\t.pad #%d\n", - crtl->args.pretend_args_size); - - if (cfun->machine->uses_anonymous_args) - { - int num_pushes; - - fprintf (f, "\tpush\t{"); - - num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size); - - for (regno = LAST_ARG_REGNUM + 1 - num_pushes; - regno <= LAST_ARG_REGNUM; - regno++) - asm_fprintf (f, "%r%s", regno, - regno == LAST_ARG_REGNUM ? "" : ", "); - - fprintf (f, "}\n"); - } - else - asm_fprintf (f, "\tsub\t%r, %r, #%d\n", - SP_REGNUM, SP_REGNUM, - crtl->args.pretend_args_size); - - /* We don't need to record the stores for unwinding (would it - help the debugger any if we did?), but record the change in - the stack pointer. */ - if (dwarf2out_do_frame ()) - { - char *l = dwarf2out_cfi_label (false); - - cfa_offset = cfa_offset + crtl->args.pretend_args_size; - dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); - } - } + asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name); + fprintf (f, "\t.thumb_func\n"); + asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name); - /* Get the registers we are going to push. */ - offsets = arm_get_frame_offsets (); - live_regs_mask = offsets->saved_regs_mask; - /* Extract a mask of the ones we can give to the Thumb's push instruction. */ - l_mask = live_regs_mask & 0x40ff; - /* Then count how many other high registers will need to be pushed. */ - high_regs_pushed = bit_count (live_regs_mask & 0x0f00); - - if (TARGET_BACKTRACE) - { - unsigned offset; - unsigned work_register; - - /* We have been asked to create a stack backtrace structure. - The code looks like this: - - 0 .align 2 - 0 func: - 0 sub SP, #16 Reserve space for 4 registers. - 2 push {R7} Push low registers. - 4 add R7, SP, #20 Get the stack pointer before the push. - 6 str R7, [SP, #8] Store the stack pointer (before reserving the space). - 8 mov R7, PC Get hold of the start of this code plus 12. - 10 str R7, [SP, #16] Store it. - 12 mov R7, FP Get hold of the current frame pointer. - 14 str R7, [SP, #4] Store it. - 16 mov R7, LR Get hold of the current return address. - 18 str R7, [SP, #12] Store it. - 20 add R7, SP, #16 Point at the start of the backtrace structure. - 22 mov FP, R7 Put this value into the frame pointer. */ - - work_register = thumb_find_work_register (live_regs_mask); - - if (arm_except_unwind_info (&global_options) == UI_TARGET) - asm_fprintf (f, "\t.pad #16\n"); - - asm_fprintf - (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", - SP_REGNUM, SP_REGNUM); - - if (dwarf2out_do_frame ()) - { - char *l = dwarf2out_cfi_label (false); - - cfa_offset = cfa_offset + 16; - dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset); - } - - if (l_mask) - { - thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); - offset = bit_count (l_mask) * UNITS_PER_WORD; - } - else - offset = 0; - - asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, - offset + 16 + crtl->args.pretend_args_size); - - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset + 4); - - /* Make sure that the instruction fetching the PC is in the right place - to calculate "start of backtrace creation code + 12". */ - if (l_mask) - { - asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset + 12); - asm_fprintf (f, "\tmov\t%r, %r\n", work_register, - ARM_HARD_FRAME_POINTER_REGNUM); - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset); - } - else - { - asm_fprintf (f, "\tmov\t%r, %r\n", work_register, - ARM_HARD_FRAME_POINTER_REGNUM); - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset); - asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM); - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset + 12); - } - - asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM); - asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM, - offset + 8); - asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM, - offset + 12); - asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n", - ARM_HARD_FRAME_POINTER_REGNUM, work_register); - } - /* Optimization: If we are not pushing any low registers but we are going - to push some high registers then delay our first push. This will just - be a push of LR and we can combine it with the push of the first high - register. */ - else if ((l_mask & 0xff) != 0 - || (high_regs_pushed == 0 && l_mask)) - { - unsigned long mask = l_mask; - mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; - thumb_pushpop (f, mask, 1, &cfa_offset, mask); - } - - if (high_regs_pushed) - { - unsigned pushable_regs; - unsigned next_hi_reg; - - for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--) - if (live_regs_mask & (1 << next_hi_reg)) - break; - - pushable_regs = l_mask & 0xff; - - if (pushable_regs == 0) - pushable_regs = 1 << thumb_find_work_register (live_regs_mask); - - while (high_regs_pushed > 0) - { - unsigned long real_regs_mask = 0; - - for (regno = LAST_LO_REGNUM; regno >= 0; regno --) - { - if (pushable_regs & (1 << regno)) - { - asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg); - - high_regs_pushed --; - real_regs_mask |= (1 << next_hi_reg); - - if (high_regs_pushed) - { - for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM; - next_hi_reg --) - if (live_regs_mask & (1 << next_hi_reg)) - break; - } - else - { - pushable_regs &= ~((1 << regno) - 1); - break; - } - } - } - - /* If we had to find a work register and we have not yet - saved the LR then add it to the list of regs to push. */ - if (l_mask == (1 << LR_REGNUM)) - { - thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM), - 1, &cfa_offset, - real_regs_mask | (1 << LR_REGNUM)); - l_mask = 0; - } - else - thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask); - } - } + return ""; } /* Handle the case of a double word load into a low register from @@ -23949,6 +23998,9 @@ arm_attr_length_push_multi(rtx parallel_op, rtx first_op) /* ARM mode. */ if (TARGET_ARM) return 4; + /* Thumb1 mode. */ + if (TARGET_THUMB1) + return 2; /* Thumb2 mode. */ regno = REGNO (first_op); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 26291afd615..2bf3551a414 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -116,6 +116,8 @@ ; instruction epilogue sequence that isn't expanded ; into normal RTL. Used for both normal and sibcall ; epilogues. + VUNSPEC_THUMB1_INTERWORK ; `prologue_thumb1_interwork' insn, used to swap + ; modes from arm to thumb. VUNSPEC_ALIGN ; `align' insn. Used at the head of a minipool table ; for inlined constants. VUNSPEC_POOL_END ; `end-of-table'. Used to mark the end of a minipool @@ -10121,6 +10123,13 @@ " ) +(define_insn "prologue_thumb1_interwork" + [(unspec_volatile [(const_int 0)] VUNSPEC_THUMB1_INTERWORK)] + "TARGET_THUMB1" + "* return thumb1_output_interwork ();" + [(set_attr "length" "8")] +) + ;; Note - although unspec_volatile's USE all hard registers, ;; USEs are ignored after relaod has completed. Thus we need ;; to add an unspec of the link register to ensure that flow @@ -10365,10 +10374,10 @@ ;; in a C function arm_attr_length_push_multi. (define_insn "*push_multi" [(match_parallel 2 "multi_register_push" - [(set (match_operand:BLK 0 "memory_operand" "=m") + [(set (match_operand:BLK 0 "push_mult_memory_operand" "") (unspec:BLK [(match_operand:SI 1 "s_register_operand" "")] UNSPEC_PUSH_MULT))])] - "TARGET_32BIT" + "" "* { int num_saves = XVECLEN (operands[2], 0); diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md index 4bd8af144b4..215d58df516 100644 --- a/gcc/config/arm/predicates.md +++ b/gcc/config/arm/predicates.md @@ -508,6 +508,34 @@ return true; }) +(define_predicate "push_mult_memory_operand" + (match_code "mem") +{ + /* ??? Given how PUSH_MULT is generated in the prologues, is there + any point in testing for thumb1 specially? All of the variants + use the same form. */ + if (TARGET_THUMB1) + { + /* ??? No attempt is made to represent STMIA, or validate that + the stack adjustment matches the register count. This is + true of the ARM/Thumb2 path as well. */ + rtx x = XEXP (op, 0); + if (GET_CODE (x) != PRE_MODIFY) + return false; + if (XEXP (x, 0) != stack_pointer_rtx) + return false; + x = XEXP (x, 1); + if (GET_CODE (x) != PLUS) + return false; + if (XEXP (x, 0) != stack_pointer_rtx) + return false; + return CONST_INT_P (XEXP (x, 1)); + } + + /* ARM and Thumb2 handle pre-modify in their legitimate_address. */ + return memory_operand (op, mode); +}) + ;;------------------------------------------------------------------------- ;; ;; Thumb predicates -- 2.30.2