From: Jeff Law Date: Tue, 5 Mar 1996 07:34:13 +0000 (-0700) Subject: lib2funcs.asm (__outline_prologue): New "function". X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9e18f575557d94acced039000fa26958e75f69e6;p=gcc.git lib2funcs.asm (__outline_prologue): New "function". * lib2funcs.asm (__outline_prologue): New "function". (__outline_epilogue): New "function". * pa.h (TARGET_SPACE): Define. (target_flags): Add -mspace and -mno-space. Enable/disable space saving optimizations. (FRAME_POINTER_REQUIRED): Frame pointers are always required when generating out of line prologues and epilogues. * pa.c (compute_frame_size): Handle out of line prologues/epilogues. (hppa_expand_prologue): If optimizing for space, emit an out of line prologue. * pa.c (compute_frame_size): Handle out of line prologues/epilogues. (hppa_expand_prologue): If optimizing for space, emit an out of line prologue. (hppa_expand_epilogue): Similarly. (override_options): Optimizing for space is not compatable with either profiling or PIC code generation. * pa.md (outline_prologue_call): New pattern. (outline_epilogue_call): Likewise. From-SVN: r11438 --- diff --git a/gcc/config/pa/lib2funcs.asm b/gcc/config/pa/lib2funcs.asm index ea964d3af6f..d6f5cbc1d9d 100644 --- a/gcc/config/pa/lib2funcs.asm +++ b/gcc/config/pa/lib2funcs.asm @@ -24,6 +24,8 @@ .SPACE $TEXT$ .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44 .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY + .SUBSPA $MILLICODE$,QUAD=0,ALIGN=8,ACCESS=44,SORT=8 + .IMPORT $$dyncall,MILLICODE ; gcc_compiled.: .SPACE $TEXT$ @@ -61,3 +63,251 @@ L$foo be,n 0(%sr0,%rp) .EXIT .PROCEND + + .SPACE $TEXT$ + .SUBSPA $MILLICODE$ + +; This is an out-of-line prologue. +; +; It performs the following operations: +; +; * Saves the return pointer at sp - 20 +; +; * Creates a new stack frame (sp'), size of the frame is passed in %r21 +; +; * The old stack pointer is saved at sp +; +; * Saves grs (passed in low 16 bits of %r22 into the stack frame +; at sp' + local_fsize (passed in %r19). +; +; * Saves frs (passed in high 16 bits of %r22) into the stack +; frame at sp' + local_fsize (passed in %r19). +; +; * Sets up a frame pointer (in %r3). +; +; * Returns to the instruction _immediately_ after the call to +; this function. + + .align 32 + .NSUBSPA $MILLICODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY + .EXPORT __outline_prologue,MILLICODE +__outline_prologue + .PROC + .CALLINFO FRAME=0,NO_CALLS + .ENTRY + copy %r30,%r20 + + ; Subtract 4 from our return pointer so that we return to + ; the right location. + ldo -4(%r31),%r31 + + ; Save off %r2 + stw %r2,-20(0,%r30) + + ; Make our new frame. + add %r21,%r30,%r30 + + ; Save our old stack pointer. + stw %r20,0(0,%r20) + + ; Add in local_fsize to our frame pointer so we do register + ; saves into the right place + add %r20,%r19,%r20 + + ; %r22 tells us what registers we need to save. The upper half + ; is for fp registers, the lower half for integer registers. + ; We put the lower half in %r1 and the upper half into %r22 + ; for later use. + extru %r22,31,16,%r1 + extrs %r22,15,16,%r22 + + ; %r1 now olds a value 0-18 which corresponds to the number + ; of grs we need to save. We need to reverse that value so + ; we can just into the table and straight-line execute to the + ; end of the gr saves. + comb,= %r0,%r1,L$0000 + subi 18,%r1,%r1 + blr,n %r1,%r0 + b,n L$0000 + stws,ma %r18,4(0,%r20) + nop + stws,ma %r17,4(0,%r20) + nop + stws,ma %r16,4(0,%r20) + nop + stws,ma %r15,4(0,%r20) + nop + stws,ma %r14,4(0,%r20) + nop + stws,ma %r13,4(0,%r20) + nop + stws,ma %r12,4(0,%r20) + nop + stws,ma %r11,4(0,%r20) + nop + stws,ma %r10,4(0,%r20) + nop + stws,ma %r9,4(0,%r20) + nop + stws,ma %r8,4(0,%r20) + nop + stws,ma %r7,4(0,%r20) + nop + stws,ma %r6,4(0,%r20) + nop + stws,ma %r5,4(0,%r20) + nop + stws,ma %r4,4(0,%r20) + nop + stws,ma %r3,4(0,%r20) + nop +L$0000 + ; All gr saves are done. Align the temporary frame pointer and + ; do the fr saves. + ldo 7(%r20),%r20 + depi 0,31,3,%r20 + + comb,= %r0,%r22,L$0001 + subi 21,%r22,%r22 + blr,n %r22,%r0 + b,n L$0001 + fstws,ma %fr21,8(0,%r20) + nop + fstws,ma %fr20,8(0,%r20) + nop + fstws,ma %fr19,8(0,%r20) + nop + fstws,ma %fr18,8(0,%r20) + nop + fstws,ma %fr17,8(0,%r20) + nop + fstws,ma %fr16,8(0,%r20) + nop + fstws,ma %fr15,8(0,%r20) + nop + fstws,ma %fr14,8(0,%r20) + nop + fstws,ma %fr13,8(0,%r20) + nop + fstws,ma %fr12,8(0,%r20) + nop +L$0001 + ; Return, setting up a frame pointer in the delay slot + bv 0(%r31) + sub %r30,%r21,%r3 + + .EXIT + .PROCEND + +; This is an out-of-line epilogue. It's operation is basically the reverse +; of the out-of-line prologue. + + .align 32 + .NSUBSPA $MILLICODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY + .EXPORT __outline_epilogue,MILLICODE +__outline_epilogue + .PROC + .CALLINFO FRAME=0,NO_CALLS + .ENTRY + ; Make a copy of our frame pointer into %r20 + copy %r3,%r20 + + ; Subtract 4 from our return pointer so that we return to + ; the right location. + ldo -4(%r31),%r31 + + ; Reload %r2 + ; First save off %r2 + ldw -20(0,%r20),%r2 + + ; Load our old stack pointer, save it in %r21. + ldw 0(0,%r20),%r21 + + ; Add in local_fsize (%r19) to the frame pointer to find + ; the saved registers. + add %r20,%r19,%r20 + + ; %r22 tells us what registers we need to restore. The upper half + ; is for fp registers, the lower half for integer registers. + ; We put the lower half in %r1 and the upper half into %r22 + ; for later use. + extru %r22,31,16,%r1 + extrs %r22,15,16,%r22 + + ; %r1 now olds a value 0-18 which corresponds to the number + ; of grs we need to restore. We need to reverse that value so + ; we can just into the table and straight-line execute to the + ; end of the gr restore. + comb,= %r0,%r1,L$0002 + subi 18,%r1,%r1 + blr,n %r1,%r0 + b,n L$0002 + ldws,ma 4(0,%r20),%r18 + nop + ldws,ma 4(0,%r20),%r17 + nop + ldws,ma 4(0,%r20),%r16 + nop + ldws,ma 4(0,%r20),%r15 + nop + ldws,ma 4(0,%r20),%r14 + nop + ldws,ma 4(0,%r20),%r13 + nop + ldws,ma 4(0,%r20),%r12 + nop + ldws,ma 4(0,%r20),%r11 + nop + ldws,ma 4(0,%r20),%r10 + nop + ldws,ma 4(0,%r20),%r9 + nop + ldws,ma 4(0,%r20),%r8 + nop + ldws,ma 4(0,%r20),%r7 + nop + ldws,ma 4(0,%r20),%r6 + nop + ldws,ma 4(0,%r20),%r5 + nop + ldws,ma 4(0,%r20),%r4 + nop + ldws,ma 4(0,%r20),%r3 + nop +L$0002 + ; All gr restore are done. Align the temporary frame pointer and + ; do the fr restore. + ldo 7(%r20),%r20 + depi 0,31,3,%r20 + + comb,= %r0,%r22,L$0003 + subi 21,%r22,%r22 + blr,n %r22,%r0 + b,n L$0003 + fldws,ma 8(0,%r20),%fr21 + nop + fldws,ma 8(0,%r20),%fr20 + nop + fldws,ma 8(0,%r20),%fr19 + nop + fldws,ma 8(0,%r20),%fr18 + nop + fldws,ma 8(0,%r20),%fr17 + nop + fldws,ma 8(0,%r20),%fr16 + nop + fldws,ma 8(0,%r20),%fr15 + nop + fldws,ma 8(0,%r20),%fr14 + nop + fldws,ma 8(0,%r20),%fr13 + nop + fldws,ma 8(0,%r20),%fr12 + nop +L$0003 + ; Return and deallocate our frame. + bv 0(%r31) + copy %r21,%r30 + .EXIT + .PROCEND + diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 026821c37ab..545dab5cce6 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -113,6 +113,12 @@ override_options () { warning ("PIC code generation is not compatable with profiling\n"); } + + if (TARGET_SPACE && (flag_pic || profile_flag)) + { + warning ("Out of line entry/exit sequences are not compatable\n"); + warning ("with PIC or profiling\n"); + } } @@ -2039,8 +2045,24 @@ compute_frame_size (size, fregs_live) for (i = 18; i >= 4; i--) { if (regs_ever_live[i]) - fsize += 4; + { + /* For out of line prologues/epilogues we only need to + compute the highest register number to save and + allocate space for all the callee saved registers + with a lower number. */ + if (TARGET_SPACE) + { + fsize += 4 * (i - 3); + break; + } + fsize += 4; + } } + + /* We always save %r3, make room for it. */ + if (TARGET_SPACE) + fsize += 8; + /* If we don't have a frame pointer, the register normally used for that purpose is saved just like other registers, not in the "frame marker". */ if (! frame_pointer_needed) @@ -2053,9 +2075,19 @@ compute_frame_size (size, fregs_live) for (i = 66; i >= 48; i -= 2) if (regs_ever_live[i] || regs_ever_live[i + 1]) { - fsize += 8; if (fregs_live) *fregs_live = 1; + + /* For out of line prologues/epilogues we only need to + compute the highest register number to save and + allocate space for all the callee saved registers + with a lower number. */ + if (TARGET_SPACE) + { + fsize += 4 * (i - 46); + break; + } + fsize += 8; } fsize += current_function_outgoing_args_size; @@ -2148,6 +2180,47 @@ hppa_expand_prologue() tmpreg = gen_rtx (REG, SImode, 1); size_rtx = GEN_INT (actual_fsize); + /* Handle out of line prologues and epilogues. */ + if (TARGET_SPACE) + { + rtx operands[2]; + int saves = 0; + + /* Put the local_fisze into %r19. */ + operands[0] = gen_rtx (REG, SImode, 19); + operands[1] = GEN_INT (local_fsize); + emit_move_insn (operands[0], operands[1]); + + /* Put the stack size into %r21. */ + operands[0] = gen_rtx (REG, SImode, 21); + operands[1] = size_rtx; + emit_move_insn (operands[0], operands[1]); + + /* Put the register save info into %r22. */ + for (i = 18; i >= 3; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + saves = i; + break; + } + + for (i = 66; i >= 48; i -= 2) + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + saves |= ((i/2 - 12 ) << 16); + break; + } + + operands[0] = gen_rtx (REG, SImode, 22); + operands[1] = GEN_INT (saves); + emit_move_insn (operands[0], operands[1]); + + /* Now call the out-of-line prologue. */ + emit_insn (gen_outline_prologue_call ()); + emit_insn (gen_blockage ()); + return; + } + /* Save RP first. The calling conventions manual states RP will always be stored into the caller's frame at sp-20. */ if (regs_ever_live[2] || profile_flag) @@ -2416,6 +2489,43 @@ hppa_expand_epilogue () int offset,i; int merge_sp_adjust_with_load = 0; + /* Handle out of line prologues and epilogues. */ + if (TARGET_SPACE) + { + int saves = 0; + rtx operands[2]; + + /* Put the register save info into %r22. */ + for (i = 18; i >= 3; i--) + if (regs_ever_live[i] && ! call_used_regs[i]) + { + saves = i; + break; + } + + for (i = 66; i >= 48; i -= 2) + if (regs_ever_live[i] || regs_ever_live[i + 1]) + { + saves |= ((i/2 - 12 ) << 16); + break; + } + + emit_insn (gen_blockage ()); + + /* Put the local_fisze into %r19. */ + operands[0] = gen_rtx (REG, SImode, 19); + operands[1] = GEN_INT (local_fsize); + emit_move_insn (operands[0], operands[1]); + + operands[0] = gen_rtx (REG, SImode, 22); + operands[1] = GEN_INT (saves); + emit_move_insn (operands[0], operands[1]); + + /* Now call the out-of-line epilogue. */ + emit_insn (gen_outline_epilogue_call ()); + return; + } + /* We will use this often. */ tmpreg = gen_rtx (REG, SImode, 1); diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 89697db8c82..606493a4bcc 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -78,6 +78,10 @@ extern int target_flags; /* Allow unconditional jumps in the delay slots of call instructions. */ #define TARGET_JUMP_IN_DELAY (target_flags & 8) +/* Optimize for space. Currently this only turns on out of line + prologues and epilogues. */ +#define TARGET_SPACE (target_flags & 16) + /* Disable indexed addressing modes. */ #define TARGET_DISABLE_INDEXING (target_flags & 32) @@ -119,9 +123,11 @@ extern int target_flags; {"disable-fpregs", 2}, \ {"no-disable-fpregs", -2}, \ {"no-space-regs", 4}, \ - {"space-regs", -4},\ + {"space-regs", -4}, \ {"jump-in-delay", 8}, \ {"no-jump-in-delay", -8}, \ + {"space", 16}, \ + {"no-space", -16}, \ {"disable-indexing", 32}, \ {"no-disable-indexing", -32},\ {"portable-runtime", 64}, \ @@ -554,9 +560,11 @@ do { \ /* Base register for access to local variables of the function. */ #define FRAME_POINTER_REGNUM 3 -/* Value should be nonzero if functions must have frame pointers. */ -#define FRAME_POINTER_REQUIRED (current_function_calls_alloca) - +/* Value should be nonzero if functions must have frame pointers. + All functions have frame pointers when optimizing for space + (for now). +#define FRAME_POINTER_REQUIRED \ + (current_function_calls_alloca || TARGET_SPACE) /* C statement to store the difference between the frame pointer and the stack pointer values immediately after the function prologue. diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 1aeeb61ff48..a64d17b9144 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -4837,3 +4837,65 @@ "mfsp %%sr0,%4\;ldsid (0,%2),%3\;mtsp %3,%%sr0\;fic 0(%%sr0,%0)\;fic 0(%%sr0,%1)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop" [(set_attr "type" "multi") (set_attr "length" "52")]) + +;; An out-of-line prologue. +(define_insn "outline_prologue_call" + [(unspec_volatile [(const_int 0)] 0) + (clobber (reg:SI 31)) + (clobber (reg:SI 22)) + (clobber (reg:SI 21)) + (clobber (reg:SI 20)) + (clobber (reg:SI 19)) + (clobber (reg:SI 1))] + "" + "* +{ + /* Must import the magic millicode routine. */ + output_asm_insn (\".IMPORT __outline_prologue,MILLICODE\", NULL); + + /* The out-of-line prologue will make sure we return to the right + instruction. */ + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_prologue,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_prologue(%%sr0,%%r31)\", NULL); + } + else + output_asm_insn (\"bl,n __outline_prologue,%%r31\", NULL); + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) + +;; An out-of-line epilogue. +(define_insn "outline_epilogue_call" + [(unspec_volatile [(const_int 1)] 0) + (use (reg:SI 29)) + (use (reg:SI 28)) + (clobber (reg:SI 31)) + (clobber (reg:SI 22)) + (clobber (reg:SI 21)) + (clobber (reg:SI 20)) + (clobber (reg:SI 19)) + (clobber (reg:SI 2)) + (clobber (reg:SI 1))] + "" + "* +{ + /* Must import the magic millicode routine. */ + output_asm_insn (\".IMPORT __outline_epilogue,MILLICODE\", NULL); + + /* The out-of-line prologue will make sure we return to the right + instruction. */ + if (TARGET_PORTABLE_RUNTIME) + { + output_asm_insn (\"ldil L'__outline_epilogue,%%r31\", NULL); + output_asm_insn (\"ble,n R'__outline_epilogue(%%sr0,%%r31)\", NULL); + } + else + output_asm_insn (\"bl,n __outline_epilogue,%%r31\", NULL); + return \"\"; +}" + [(set_attr "type" "multi") + (set_attr "length" "8")]) +