From: Jeff Law Date: Fri, 11 Nov 1994 04:53:33 +0000 (-0700) Subject: * Automatic generation of inline long call sequences when needed. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=279c9bde625781e47709a2a5900c3c56966e67e7;p=gcc.git * Automatic generation of inline long call sequences when needed. * pa.h (TARGET_LONG_CALLS): Delete. Replace all uses of with TARGET_PORTABLE_RUNTIME. (TARGET_MILLICODE_LONG_CALLS): New target flag. * pa.c (output_function_prologue): Keep track of the total number of code bytes emitted for each source file. (output_call): Handle TARGET_PORTABLE_RUNTIME and millicode calls for TARGET_MILLICODE_LONG_CALLS. Emit an inline long-call if needed. If emitting an inline long-call, perform argument relocations before the call if they are needed, unfill the delay slot of the call if necessary. * pa.md (define_delay for millicode): Disable the delay slot if TARGET_MILLICODE_LONG_CALLS. (millicode insns and indirect calls): Properly compute length for both TARGET_PORTABLE_RUNTIME and TARGET_MILLICODE_LONG_CALLS. (call_internal_symref): Properly compute the length when more than 240000 bytes of code have already been output. Take TARGET_MILLICODE_LONG_CALLS into account in the length computation. From-SVN: r8422 --- diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 2ec55bab639..21d207f4215 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -52,6 +52,11 @@ static int gr_saved, fr_saved; static rtx find_addr_reg (); +/* Keep track of the number of bytes we have output in the CODE subspaces + during this compilation so we'll know when to emit inline long-calls. */ + +unsigned int total_code_bytes; + /* Return non-zero only if OP is a register of mode MODE, or CONST0_RTX. */ int @@ -72,7 +77,7 @@ call_operand_address (op, mode) rtx op; enum machine_mode mode; { - return (CONSTANT_P (op) && ! TARGET_LONG_CALLS); + return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME); } /* Return 1 if X contains a symbolic expression. We know these @@ -2044,6 +2049,19 @@ output_function_prologue (file, size) if (profile_flag) ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP", hp_profile_labelno); + + if (insn_addresses) + { + unsigned int old_total = total_code_bytes; + + total_code_bytes += insn_addresses[INSN_UID (get_last_insn())]; + total_code_bytes += FUNCTION_BOUNDARY /BITS_PER_UNIT; + + /* Be prepared to handle overflows. */ + total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes; + } + else + total_code_bytes = -1; } void @@ -3760,9 +3778,9 @@ output_movb (operands, insn, which_alternative, reverse_comparison) RETURN_POINTER is the register which will hold the return address. %r2 for most calls, %r31 for millicode calls. - When TARGET_LONG_CALLS is true, output_call is only called for - millicode calls. In addition, no delay slots are available when - TARGET_LONG_CALLS is true. */ + When TARGET_MILLICODE_LONG_CALLS is true, then we have to assume + that two instruction sequences must be used to reach the millicode + routines (including dyncall!). */ char * output_call (insn, call_dest, return_pointer) @@ -3775,21 +3793,124 @@ output_call (insn, call_dest, return_pointer) rtx xoperands[4]; rtx seq_insn; - /* Handle common case -- empty delay slot or no jump in the delay slot. */ - if (dbr_sequence_length () == 0 + /* Handle long millicode calls for mod, div, and mul. */ + if (TARGET_PORTABLE_RUNTIME + || (TARGET_MILLICODE_LONG_CALLS && REGNO (return_pointer) == 31)) + { + xoperands[0] = call_dest; + xoperands[1] = return_pointer; + output_asm_insn ("ldil L%%%0,%%r29", xoperands); + output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands); + output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands); + return ""; + } + + /* Handle common case -- empty delay slot or no jump in the delay slot, + and we're sure that the branch will reach the beginning of the $CODE$ + subspace. */ + if ((dbr_sequence_length () == 0 + && get_attr_length (insn) == 8) || (dbr_sequence_length () != 0 - && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)) + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN + && get_attr_length (insn) == 4)) { xoperands[0] = call_dest; xoperands[1] = return_pointer; - if (TARGET_LONG_CALLS) + output_asm_insn ("bl %0,%r1%#", xoperands); + return ""; + } + + /* This call may not reach the beginning of the $CODE$ subspace. */ + if (get_attr_length (insn) > 8) + { + int delay_insn_deleted = 0; + rtx xoperands[2]; + rtx link; + + /* We need to emit an inline long-call branch. Furthermore, + because we're changing a named function call into an indirect + function call well after the parameters have been set up, we + need to make sure any FP args appear in both the integer + and FP registers. Also, we need move any delay slot insn + out of the delay slot -- Yuk! */ + if (dbr_sequence_length () != 0 + && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) { - output_asm_insn ("ldil L%%%0,%%r29", xoperands); - output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands); - output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands); + /* A non-jump insn in the delay slot. By definition we can + emit this insn before the call (and in fact before argument + relocating. */ + final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + delay_insn_deleted = 1; + } + + /* Now copy any FP arguments into integer registers. */ + for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) + { + int arg_mode, regno; + rtx use = XEXP (link, 0); + if (! (GET_CODE (use) == USE + && GET_CODE (XEXP (use, 0)) == REG + && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) + continue; + + arg_mode = GET_MODE (XEXP (use, 0)); + regno = REGNO (XEXP (use, 0)); + /* Is it a floating point register? */ + if (regno >= 32 && regno <= 39) + { + /* Copy from the FP register into an integer register + (via memory). */ + if (arg_mode == SFmode) + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2); + output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + else + { + xoperands[0] = XEXP (use, 0); + xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2); + output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands); + output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); + output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); + } + + } + } + + /* Now emit the inline long-call. */ + xoperands[0] = call_dest; + output_asm_insn ("ldil L%%%0,%%r22\n\tldo R%%%0(%%r22),%%r22", xoperands); + + /* If TARGET_MILLICODE_LONG_CALLS, then we must use a long-call sequence + to call dyncall! */ + if (TARGET_MILLICODE_LONG_CALLS) + { + output_asm_insn ("ldil L%%$$dyncall,%%r31", xoperands); + output_asm_insn ("ldo R%%$$dyncall(%%r31),%%r31", xoperands); + output_asm_insn ("blr 0,%%r2\n\tbv,n 0(%%r31)\n\tnop", xoperands); } else - output_asm_insn ("bl %0,%r1%#", xoperands); + output_asm_insn ("bl $$dyncall,%%r31\n\tcopy %%r31,%%r2", xoperands); + + /* If we had a jump in the call's delay slot, output it now. */ + if (dbr_sequence_length () != 0 + && !delay_insn_deleted) + { + xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); + output_asm_insn ("b,n %0", xoperands); + + /* Now delete the delay insn. */ + PUT_CODE (NEXT_INSN (insn), NOTE); + NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; + NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; + } return ""; } diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h index 5d6e99111f5..218a97de0bc 100644 --- a/gcc/config/pa/pa.h +++ b/gcc/config/pa/pa.h @@ -28,6 +28,9 @@ enum cmp_type /* comparison type */ CMP_MAX /* max comparison type */ }; +/* For long call handling. */ +extern unsigned int total_code_bytes; + /* Print subsidiary information on the compiler version in use. */ #define TARGET_VERSION fprintf (stderr, " (hppa)"); @@ -57,13 +60,18 @@ extern int target_flags; /* Allow unconditional jumps in the delay slots of call instructions. */ #define TARGET_JUMP_IN_DELAY (target_flags & 8) -/* Force all function calls to indirect addressing via a register. This - avoids lossage when the function is very far away from the current PC. +/* In rare cases, a millicode call via "bl" can not be turned into + a millicode call using "ble" (when SHLIB_INFO subspace is very large). + + This option forces just millicode calls to use inline long-calls + This is far more efficient than the old long-call option which forced + every function to be called indirectly (as is still the case for + TARGET_PORTABLE_RUNTIME). ??? What about simple jumps, they can suffer from the same problem. Would require significant surgery in pa.md. */ -#define TARGET_LONG_CALLS (target_flags & 16) +#define TARGET_MILLICODE_LONG_CALLS (target_flags & 16) /* Disable indexed addressing modes. */ @@ -73,7 +81,8 @@ extern int target_flags; HP wants everyone to use for ELF objects. If at all possible you want to avoid this since it's a performance loss for non-prototyped code. - Note TARGET_PORTABLE_RUNTIME also implies TARGET_LONG_CALLS. */ + Note TARGET_PORTABLE_RUNTIME also forces all calls to use inline + long-call stubs which is quite expensive. */ #define TARGET_PORTABLE_RUNTIME (target_flags & 64) @@ -100,8 +109,8 @@ extern int target_flags; {"no-fast-indirect-calls", -4},\ {"jump-in-delay", 8}, \ {"no-jump-in-delay", -8}, \ - {"long-calls", 16}, \ - {"no-long-calls", -16}, \ + {"millicode-long-calls", 16},\ + {"no-millicode-long-calls", -16},\ {"disable-indexing", 32}, \ {"no-disable-indexing", -32},\ {"portable-runtime", 64+16},\ @@ -832,9 +841,7 @@ struct hppa_args {int words, nargs_prototype; }; The caller must make a distinction between calls to explicitly named functions and calls through pointers to functions -- the conventions are different! Calls through pointers to functions only use general - registers for the first four argument words. Note the indirect function - calling conventions are in effect during TARGET_LONG_CALLS, but - current_call_is_indirect will not be set in such situations. + registers for the first four argument words. Of course all this is different for the portable runtime model HP wants everyone to use for ELF. Ugh. Here's a quick description @@ -869,12 +876,12 @@ struct hppa_args {int words, nargs_prototype; }; || !FLOAT_MODE_P (MODE) || (CUM).nargs_prototype > 0) \ ? gen_rtx (REG, (MODE), \ (FUNCTION_ARG_SIZE ((MODE), (TYPE)) > 1 \ - ? (((!(current_call_is_indirect || TARGET_LONG_CALLS) \ + ? (((!current_call_is_indirect \ || TARGET_PORTABLE_RUNTIME) \ && (MODE) == DFmode) \ ? ((CUM).words ? 38 : 34) \ : ((CUM).words ? 23 : 25)) \ - : (((!(current_call_is_indirect || TARGET_LONG_CALLS) \ + : (((!current_call_is_indirect \ || TARGET_PORTABLE_RUNTIME) \ && (MODE) == SFmode) \ ? (32 + 2 * (CUM).words) \ diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md index 60459ac1577..db6cf972615 100644 --- a/gcc/config/pa/pa.md +++ b/gcc/config/pa/pa.md @@ -93,10 +93,11 @@ [(eq_attr "in_call_delay" "true") (nil) (nil)]) ;; millicode call delay slot description. Note it disallows delay slot -;; when TARGET_LONG_CALLS is true. +;; when TARGET_PORTABLE_RUNTIME or TARGET_MILLICODE_LONG_CALLS is true. (define_delay (eq_attr "type" "milli") [(and (eq_attr "in_call_delay" "true") - (eq (symbol_ref "TARGET_LONG_CALLS") (const_int 0))) + (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") (const_int 0)))) (nil) (nil)]) ;; Unconditional branch, return and other similar instructions. @@ -2268,10 +2269,13 @@ "" "* return output_mul_insn (0, insn);" [(set_attr "type" "milli") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 4) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 4) + (const_int 24)))]) ;;; Division and mod. (define_expand "divsi3" @@ -2318,10 +2322,13 @@ "* return output_div_insn (operands, 0, insn);" [(set_attr "type" "milli") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 4) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 4) + (const_int 24)))]) (define_expand "udivsi3" [(set (reg:SI 26) (match_operand:SI 1 "move_operand" "")) @@ -2367,10 +2374,13 @@ "* return output_div_insn (operands, 1, insn);" [(set_attr "type" "milli") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 4) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 4) + (const_int 24)))]) (define_expand "modsi3" [(set (reg:SI 26) (match_operand:SI 1 "move_operand" "")) @@ -2412,10 +2422,13 @@ "* return output_mod_insn (0, insn);" [(set_attr "type" "milli") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 4) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 4) + (const_int 24)))]) (define_expand "umodsi3" [(set (reg:SI 26) (match_operand:SI 1 "move_operand" "")) @@ -2457,10 +2470,13 @@ "* return output_mod_insn (1, insn);" [(set_attr "type" "milli") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 4) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 4) + (const_int 24)))]) ;;- and instructions ;; We define DImode `and` so with DImode `not` we can get @@ -3143,7 +3159,7 @@ rtx op; rtx call_insn; - if (TARGET_LONG_CALLS) + if (TARGET_PORTABLE_RUNTIME) op = force_reg (SImode, XEXP (operands[0], 0)); else op = XEXP (operands[0], 0); @@ -3185,14 +3201,21 @@ (match_operand 1 "" "i")) (clobber (reg:SI 2)) (use (const_int 0))] - "! TARGET_LONG_CALLS" + "! TARGET_PORTABLE_RUNTIME" "* { output_arg_descriptor (insn); return output_call (insn, operands[0], gen_rtx (REG, SImode, 2)); }" [(set_attr "type" "call") - (set_attr "length" "4")]) + (set (attr "length") + (if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc)) + (const_int 240000)) + (const_int 4) + (if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0)) + (const_int 64) + (const_int 52))))]) (define_insn "call_internal_reg" [(call (mem:SI (match_operand:SI 0 "register_operand" "r")) @@ -3206,16 +3229,19 @@ return \"blr 0,%%r2\;bv,n 0(%r0)\;ldo 4(%%r2),%%r2\"; /* Yuk! bl may not be able to reach $$dyncall. */ - if (TARGET_LONG_CALLS) + if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS) return \"copy %r0,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\"; else return \"copy %r0,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\"; }" [(set_attr "type" "dyncall") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 12) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 12) + (const_int 24)))]) (define_expand "call_value" [(parallel [(set (match_operand 0 "" "") @@ -3228,7 +3254,7 @@ rtx op; rtx call_insn; - if (TARGET_LONG_CALLS) + if (TARGET_PORTABLE_RUNTIME) op = force_reg (SImode, XEXP (operands[1], 0)); else op = XEXP (operands[1], 0); @@ -3275,14 +3301,21 @@ (clobber (reg:SI 2)) (use (const_int 0))] ;;- Don't use operand 1 for most machines. - "! TARGET_LONG_CALLS" + "! TARGET_PORTABLE_RUNTIME" "* { output_arg_descriptor (insn); return output_call (insn, operands[1], gen_rtx (REG, SImode, 2)); }" [(set_attr "type" "call") - (set_attr "length" "4")]) + (set (attr "length") + (if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc)) + (const_int 240000)) + (const_int 4) + (if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0)) + (const_int 64) + (const_int 52))))]) (define_insn "call_value_internal_reg" [(set (match_operand 0 "" "=rf") @@ -3297,16 +3330,19 @@ return \"blr 0,%%r2\;bv,n 0(%r1)\;ldo 4(%%r2),%%r2\"; /* Yuk! bl may not be able to reach $$dyncall. */ - if (TARGET_LONG_CALLS) + if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS) return \"copy %r1,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\"; else return \"copy %r1,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\"; }" [(set_attr "type" "dyncall") - (set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS") - (const_int 0)) - (const_int 12) - (const_int 24)))]) + (set (attr "length") + (if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME") + (const_int 0)) + (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS") + (const_int 0))) + (const_int 12) + (const_int 24)))]) ;; Call subroutine returning any type.