From 70238876c799943407ff6717b7fd9d98b8cb9080 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Sat, 12 Oct 2019 20:37:37 +0000 Subject: [PATCH] pa.c (pa_output_call): Load descriptor address to register %r22. * config/pa/pa.c (pa_output_call): Load descriptor address to register %r22. Load function address before global pointer. (pa_attr_length_indirect_call): Adjust length of inline versions of $$dyncall. (pa_output_indirect_call): Remove fast inline version of $$dyncall before normal cases. Update inline $$dyncall sequences to preserve function descriptor address in register %r22. (TRAMPOLINE_CODE_SIZE): Adjust. (pa_asm_trampoline_template): Revise 32-bit trampoline. Don't assume register %r22 contains trampoline address. (pa_trampoline_init): Adjust offsets. (pa_trampoline_adjust_address): Likewise. * config/pa/pa.h (TRAMPOLINE_SIZE): Adjust 32-bit size. From-SVN: r276928 --- gcc/ChangeLog | 16 +++++ gcc/config/pa/pa.c | 163 ++++++++++++++++++++++++--------------------- gcc/config/pa/pa.h | 2 +- 3 files changed, 103 insertions(+), 78 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 161b58084a0..3cd90c7f566 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2019-10-12 John David Anglin + + * config/pa/pa.c (pa_output_call): Load descriptor address to register + %r22. Load function address before global pointer. + (pa_attr_length_indirect_call): Adjust length of inline versions of + $$dyncall. + (pa_output_indirect_call): Remove fast inline version of $$dyncall + before normal cases. Update inline $$dyncall sequences to preserve + function descriptor address in register %r22. + (TRAMPOLINE_CODE_SIZE): Adjust. + (pa_asm_trampoline_template): Revise 32-bit trampoline. Don't assume + register %r22 contains trampoline address. + (pa_trampoline_init): Adjust offsets. + (pa_trampoline_adjust_address): Likewise. + * config/pa/pa.h (TRAMPOLINE_SIZE): Adjust 32-bit size. + 2019-10-12 Iain Sandoe PR target/67183 diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index a11362bb06b..d8aff717ed8 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -8027,20 +8027,22 @@ pa_output_call (rtx_insn *insn, rtx call_dest, int sibcall) { output_asm_insn ("addil LT'%0,%%r19", xoperands); output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands); - output_asm_insn ("ldw 0(%%r1),%%r1", xoperands); + output_asm_insn ("ldw 0(%%r1),%%r22", xoperands); } else { output_asm_insn ("addil LR'%0-$global$,%%r27", xoperands); - output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1", + output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r22", xoperands); } - output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands); - output_asm_insn ("depi 0,31,2,%%r1", xoperands); - output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands); - output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands); + output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands); + output_asm_insn ("depi 0,31,2,%%r22", xoperands); + /* Should this be an ordered load to ensure the target + address is loaded before the global pointer? */ + output_asm_insn ("ldw 0(%%r22),%%r1", xoperands); + output_asm_insn ("ldw 4(%%r22),%%r19", xoperands); if (!sibcall && !TARGET_PA_20) { @@ -8133,10 +8135,6 @@ pa_attr_length_indirect_call (rtx_insn *insn) if (TARGET_PORTABLE_RUNTIME) return 16; - /* Inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - return 20; - if (!TARGET_LONG_CALLS && ((TARGET_PA_20 && !TARGET_SOM && distance < 7600000) || distance < MAX_PCREL17F_OFFSET)) @@ -8146,12 +8144,15 @@ pa_attr_length_indirect_call (rtx_insn *insn) if (!flag_pic) return 12; - /* Inline version of $$dyncall. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20) - return 20; - + /* Inline versions of $$dyncall. */ if (!optimize_size) - return 36; + { + if (TARGET_NO_SPACE_REGS) + return 28; + + if (TARGET_PA_20) + return 32; + } /* Long PIC pc-relative call. */ return 20; @@ -8189,22 +8190,6 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest) return "blr %%r0,%%r2\n\tbv,n %%r0(%%r31)"; } - /* Maybe emit a fast inline version of $$dyncall. */ - if ((TARGET_NO_SPACE_REGS || TARGET_PA_20) && !optimize_size) - { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); - pa_output_arg_descriptor (insn); - if (TARGET_NO_SPACE_REGS) - { - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; - } - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; - } - /* Now the normal case -- we can reach $$dyncall directly or we're sure that we can get there via a long-branch stub. @@ -8233,35 +8218,40 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest) return "ble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2"; } - /* Maybe emit a fast inline version of $$dyncall. The long PIC - pc-relative call sequence is five instructions. The inline PA 2.0 - version of $$dyncall is also five instructions. The PA 1.X versions - are longer but still an overall win. */ - if (TARGET_NO_SPACE_REGS || TARGET_PA_20 || !optimize_size) + /* The long PIC pc-relative call sequence is five instructions. So, + let's use an inline version of $$dyncall when the calling sequence + has a roughly similar number of instructions and we are not optimizing + for size. We need two instructions to load the return pointer plus + the $$dyncall implementation. */ + if (!optimize_size) { - output_asm_insn ("bb,>=,n %%r22,30,.+12\n\t" - "ldw 2(%%r22),%%r19\n\t" - "ldw -2(%%r22),%%r22", xoperands); if (TARGET_NO_SPACE_REGS) { pa_output_arg_descriptor (insn); - if (TARGET_PA_20) - return "bve,l,n (%%r22),%%r2\n\tnop"; - return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; + output_asm_insn ("bl .+8,%rp\b\t" + "ldo 20(%r2),%r2\n\t" + "extru,<> %r22,30,1,%r0\n\t" + "bv,n %%r0(%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bv %%r0(%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; } if (TARGET_PA_20) { pa_output_arg_descriptor (insn); - return "bve,l (%%r22),%%r2\n\tstw %%r2,-24(%%sp)"; + output_asm_insn ("bl .+8,%%r2\b\t" + "ldo 24(%%r2),%%r2\n\t" + "stw %%r2,-24(%%sp)\n\t" + "extru,<> %r22,30,1,%r0\n\t" + "bve,n (%%r22)\n\t" + "ldw -2(%%r22),%%r21\n\t" + "bve (%%r21)\n\t" + "ldw 2(%%r22),%%r19", xoperands); + return ""; } - output_asm_insn ("bl .+8,%%r2\n\t" - "ldo 16(%%r2),%%r2\n\t" - "ldsid (%%r22),%%r1\n\t" - "mtsp %%r1,%%sr0", xoperands); - pa_output_arg_descriptor (insn); - return "be 0(%%sr0,%%r22)\n\tstw %%r2,-24(%%sp)"; } - + /* We need a long PIC call to $$dyncall. */ xoperands[0] = gen_rtx_SYMBOL_REF (Pmode, "$$dyncall"); xoperands[1] = gen_rtx_REG (Pmode, 2); @@ -10025,7 +10015,7 @@ pa_modes_tieable_p (machine_mode mode1, machine_mode mode2) /* Length in units of the trampoline instruction code. */ -#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40)) +#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 36 : 48)) /* Output assembler code for a block containing the constant parts @@ -10046,27 +10036,46 @@ pa_asm_trampoline_template (FILE *f) { if (!TARGET_64BIT) { - fputs ("\tldw 36(%r22),%r21\n", f); - fputs ("\tbb,>=,n %r21,30,.+16\n", f); - if (ASSEMBLER_DIALECT == 0) - fputs ("\tdepi 0,31,2,%r21\n", f); - else - fputs ("\tdepwi 0,31,2,%r21\n", f); - fputs ("\tldw 4(%r21),%r19\n", f); - fputs ("\tldw 0(%r21),%r21\n", f); if (TARGET_PA_20) { - fputs ("\tbve (%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tmfia %r20\n", f); + fputs ("\tldw 48(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); + fputs ("\tbve (%r21)\n", f); + fputs ("\tldw 52(%r1),%r29\n", f); + fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); } else { + if (ASSEMBLER_DIALECT == 0) + { + fputs ("\tbl .+8,%r20\n", f); + fputs ("\tdepi 0,31,2,%r20\n", f); + } + else + { + fputs ("\tb,l .+8,%r20\n", f); + fputs ("\tdepwi 0,31,2,%r20\n", f); + } + fputs ("\tldw 40(%r20),%r22\n", f); + fputs ("\tcopy %r22,%r21\n", f); + fputs ("\tbb,>=,n %r22,30,.+16\n", f); + if (ASSEMBLER_DIALECT == 0) + fputs ("\tdepi 0,31,2,%r22\n", f); + else + fputs ("\tdepwi 0,31,2,%r22\n", f); + fputs ("\tldw 0(%r22),%r21\n", f); + fputs ("\tldw 4(%r22),%r19\n", f); fputs ("\tldsid (%r21),%r1\n", f); fputs ("\tmtsp %r1,%sr0\n", f); - fputs ("\tbe 0(%sr0,%r21)\n", f); - fputs ("\tldw 40(%r22),%r29\n", f); + fputs ("\tbe 0(%sr0,%r21)\n", f); + fputs ("\tldw 44(%r20),%r29\n", f); } fputs ("\t.word 0\n", f); fputs ("\t.word 0\n", f); @@ -10080,11 +10089,11 @@ pa_asm_trampoline_template (FILE *f) fputs ("\t.dword 0\n", f); fputs ("\t.dword 0\n", f); fputs ("\tmfia %r31\n", f); - fputs ("\tldd 24(%r31),%r1\n", f); - fputs ("\tldd 24(%r1),%r27\n", f); - fputs ("\tldd 16(%r1),%r1\n", f); - fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r31),%r27\n", f); fputs ("\tldd 32(%r31),%r31\n", f); + fputs ("\tldd 16(%r27),%r1\n", f); + fputs ("\tbve (%r1)\n", f); + fputs ("\tldd 24(%r27),%r27\n", f); fputs ("\t.dword 0 ; fptr\n", f); fputs ("\t.dword 0 ; static link\n", f); } @@ -10094,10 +10103,10 @@ pa_asm_trampoline_template (FILE *f) FNADDR is an RTX for the address of the function's pure code. CXT is an RTX for the static chain value for the function. - Move the function address to the trampoline template at offset 36. - Move the static chain value to trampoline template at offset 40. - Move the trampoline address to trampoline template at offset 44. - Move r19 to trampoline template at offset 48. The latter two + Move the function address to the trampoline template at offset 48. + Move the static chain value to trampoline template at offset 52. + Move the trampoline address to trampoline template at offset 56. + Move r19 to trampoline template at offset 60. The latter two words create a plabel for the indirect call to the trampoline. A similar sequence is used for the 64-bit port but the plabel is @@ -10123,15 +10132,15 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) if (!TARGET_64BIT) { - tmp = adjust_address (m_tramp, Pmode, 36); + tmp = adjust_address (m_tramp, Pmode, 48); emit_move_insn (tmp, fnaddr); - tmp = adjust_address (m_tramp, Pmode, 40); + tmp = adjust_address (m_tramp, Pmode, 52); emit_move_insn (tmp, chain_value); /* Create a fat pointer for the trampoline. */ - tmp = adjust_address (m_tramp, Pmode, 44); + tmp = adjust_address (m_tramp, Pmode, 56); emit_move_insn (tmp, r_tramp); - tmp = adjust_address (m_tramp, Pmode, 48); + tmp = adjust_address (m_tramp, Pmode, 60); emit_move_insn (tmp, gen_rtx_REG (Pmode, 19)); /* fdc and fic only use registers for the address to flush, @@ -10190,13 +10199,13 @@ pa_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) /* Perform any machine-specific adjustment in the address of the trampoline. ADDR contains the address that was passed to pa_trampoline_init. - Adjust the trampoline address to point to the plabel at offset 44. */ + Adjust the trampoline address to point to the plabel at offset 56. */ static rtx pa_trampoline_adjust_address (rtx addr) { if (!TARGET_64BIT) - addr = memory_address (Pmode, plus_constant (Pmode, addr, 46)); + addr = memory_address (Pmode, plus_constant (Pmode, addr, 58)); return addr; } diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 1a43e5caadc..4d6c425a085 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -688,7 +688,7 @@ extern int may_call_alloca; /* Length in units of the trampoline for entering a nested function. */ -#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52) +#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 64) /* Alignment required by the trampoline. */ -- 2.30.2