1 /* Subroutines for insn-output.c for HPPA.
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "insn-config.h"
31 #include "conditions.h"
32 #include "insn-attr.h"
40 #include "integrate.h"
42 #include "diagnostic-core.h"
49 #include "target-def.h"
52 /* Return nonzero if there is a bypass for the output of
53 OUT_INSN and the fp store IN_INSN. */
55 hppa_fpstore_bypass_p (rtx out_insn
, rtx in_insn
)
57 enum machine_mode store_mode
;
58 enum machine_mode other_mode
;
61 if (recog_memoized (in_insn
) < 0
62 || (get_attr_type (in_insn
) != TYPE_FPSTORE
63 && get_attr_type (in_insn
) != TYPE_FPSTORE_LOAD
)
64 || recog_memoized (out_insn
) < 0)
67 store_mode
= GET_MODE (SET_SRC (PATTERN (in_insn
)));
69 set
= single_set (out_insn
);
73 other_mode
= GET_MODE (SET_SRC (set
));
75 return (GET_MODE_SIZE (store_mode
) == GET_MODE_SIZE (other_mode
));
79 #ifndef DO_FRAME_NOTES
80 #ifdef INCOMING_RETURN_ADDR_RTX
81 #define DO_FRAME_NOTES 1
83 #define DO_FRAME_NOTES 0
87 static void pa_option_override (void);
88 static void copy_reg_pointer (rtx
, rtx
);
89 static void fix_range (const char *);
90 static bool pa_handle_option (size_t, const char *, int);
91 static int hppa_register_move_cost (enum machine_mode mode
, reg_class_t
,
93 static int hppa_address_cost (rtx
, bool);
94 static bool hppa_rtx_costs (rtx
, int, int, int *, bool);
95 static inline rtx
force_mode (enum machine_mode
, rtx
);
96 static void pa_reorg (void);
97 static void pa_combine_instructions (void);
98 static int pa_can_combine_p (rtx
, rtx
, rtx
, int, rtx
, rtx
, rtx
);
99 static bool forward_branch_p (rtx
);
100 static void compute_zdepwi_operands (unsigned HOST_WIDE_INT
, unsigned *);
101 static int compute_movmem_length (rtx
);
102 static int compute_clrmem_length (rtx
);
103 static bool pa_assemble_integer (rtx
, unsigned int, int);
104 static void remove_useless_addtr_insns (int);
105 static void store_reg (int, HOST_WIDE_INT
, int);
106 static void store_reg_modify (int, int, HOST_WIDE_INT
);
107 static void load_reg (int, HOST_WIDE_INT
, int);
108 static void set_reg_plus_d (int, int, HOST_WIDE_INT
, int);
109 static rtx
pa_function_value (const_tree
, const_tree
, bool);
110 static rtx
pa_libcall_value (enum machine_mode
, const_rtx
);
111 static bool pa_function_value_regno_p (const unsigned int);
112 static void pa_output_function_prologue (FILE *, HOST_WIDE_INT
);
113 static void update_total_code_bytes (unsigned int);
114 static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT
);
115 static int pa_adjust_cost (rtx
, rtx
, rtx
, int);
116 static int pa_adjust_priority (rtx
, int);
117 static int pa_issue_rate (void);
118 static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED
;
119 static section
*pa_select_section (tree
, int, unsigned HOST_WIDE_INT
)
121 static void pa_encode_section_info (tree
, rtx
, int);
122 static const char *pa_strip_name_encoding (const char *);
123 static bool pa_function_ok_for_sibcall (tree
, tree
);
124 static void pa_globalize_label (FILE *, const char *)
126 static void pa_asm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
127 HOST_WIDE_INT
, tree
);
128 #if !defined(USE_COLLECT2)
129 static void pa_asm_out_constructor (rtx
, int);
130 static void pa_asm_out_destructor (rtx
, int);
132 static void pa_init_builtins (void);
133 static rtx
hppa_builtin_saveregs (void);
134 static void hppa_va_start (tree
, rtx
);
135 static tree
hppa_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
136 static bool pa_scalar_mode_supported_p (enum machine_mode
);
137 static bool pa_commutative_p (const_rtx x
, int outer_code
);
138 static void copy_fp_args (rtx
) ATTRIBUTE_UNUSED
;
139 static int length_fp_args (rtx
) ATTRIBUTE_UNUSED
;
140 static rtx
hppa_legitimize_address (rtx
, rtx
, enum machine_mode
);
141 static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED
;
142 static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED
;
143 static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED
;
144 static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED
;
145 static void pa_elf_file_start (void) ATTRIBUTE_UNUSED
;
146 static void pa_som_file_start (void) ATTRIBUTE_UNUSED
;
147 static void pa_linux_file_start (void) ATTRIBUTE_UNUSED
;
148 static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED
;
149 static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED
;
150 static void output_deferred_plabels (void);
151 static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED
;
152 #ifdef ASM_OUTPUT_EXTERNAL_REAL
153 static void pa_hpux_file_end (void);
155 #ifdef HPUX_LONG_DOUBLE_LIBRARY
156 static void pa_hpux_init_libfuncs (void);
158 static rtx
pa_struct_value_rtx (tree
, int);
159 static bool pa_pass_by_reference (CUMULATIVE_ARGS
*, enum machine_mode
,
161 static int pa_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
163 static struct machine_function
* pa_init_machine_status (void);
164 static reg_class_t
pa_secondary_reload (bool, rtx
, reg_class_t
,
166 secondary_reload_info
*);
167 static void pa_extra_live_on_entry (bitmap
);
168 static enum machine_mode
pa_promote_function_mode (const_tree
,
169 enum machine_mode
, int *,
172 static void pa_asm_trampoline_template (FILE *);
173 static void pa_trampoline_init (rtx
, tree
, rtx
);
174 static rtx
pa_trampoline_adjust_address (rtx
);
175 static rtx
pa_delegitimize_address (rtx
);
176 static bool pa_print_operand_punct_valid_p (unsigned char);
178 /* The following extra sections are only used for SOM. */
179 static GTY(()) section
*som_readonly_data_section
;
180 static GTY(()) section
*som_one_only_readonly_data_section
;
181 static GTY(()) section
*som_one_only_data_section
;
183 /* Which cpu we are scheduling for. */
184 enum processor_type pa_cpu
= TARGET_SCHED_DEFAULT
;
186 /* The UNIX standard to use for predefines and linking. */
187 int flag_pa_unix
= TARGET_HPUX_11_11
? 1998 : TARGET_HPUX_10_10
? 1995 : 1993;
189 /* Counts for the number of callee-saved general and floating point
190 registers which were saved by the current function's prologue. */
191 static int gr_saved
, fr_saved
;
193 /* Boolean indicating whether the return pointer was saved by the
194 current function's prologue. */
195 static bool rp_saved
;
197 static rtx
find_addr_reg (rtx
);
199 /* Keep track of the number of bytes we have output in the CODE subspace
200 during this compilation so we'll know when to emit inline long-calls. */
201 unsigned long total_code_bytes
;
203 /* The last address of the previous function plus the number of bytes in
204 associated thunks that have been output. This is used to determine if
205 a thunk can use an IA-relative branch to reach its target function. */
206 static unsigned int last_address
;
208 /* Variables to handle plabels that we discover are necessary at assembly
209 output time. They are output after the current function. */
210 struct GTY(()) deferred_plabel
215 static GTY((length ("n_deferred_plabels"))) struct deferred_plabel
*
217 static size_t n_deferred_plabels
= 0;
220 /* Initialize the GCC target structure. */
222 #undef TARGET_OPTION_OVERRIDE
223 #define TARGET_OPTION_OVERRIDE pa_option_override
225 #undef TARGET_ASM_ALIGNED_HI_OP
226 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
227 #undef TARGET_ASM_ALIGNED_SI_OP
228 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
229 #undef TARGET_ASM_ALIGNED_DI_OP
230 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
231 #undef TARGET_ASM_UNALIGNED_HI_OP
232 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
233 #undef TARGET_ASM_UNALIGNED_SI_OP
234 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
235 #undef TARGET_ASM_UNALIGNED_DI_OP
236 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
237 #undef TARGET_ASM_INTEGER
238 #define TARGET_ASM_INTEGER pa_assemble_integer
240 #undef TARGET_ASM_FUNCTION_PROLOGUE
241 #define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
242 #undef TARGET_ASM_FUNCTION_EPILOGUE
243 #define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
245 #undef TARGET_FUNCTION_VALUE
246 #define TARGET_FUNCTION_VALUE pa_function_value
247 #undef TARGET_LIBCALL_VALUE
248 #define TARGET_LIBCALL_VALUE pa_libcall_value
249 #undef TARGET_FUNCTION_VALUE_REGNO_P
250 #define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
252 #undef TARGET_LEGITIMIZE_ADDRESS
253 #define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
255 #undef TARGET_SCHED_ADJUST_COST
256 #define TARGET_SCHED_ADJUST_COST pa_adjust_cost
257 #undef TARGET_SCHED_ADJUST_PRIORITY
258 #define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
259 #undef TARGET_SCHED_ISSUE_RATE
260 #define TARGET_SCHED_ISSUE_RATE pa_issue_rate
262 #undef TARGET_ENCODE_SECTION_INFO
263 #define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
264 #undef TARGET_STRIP_NAME_ENCODING
265 #define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
267 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
268 #define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
270 #undef TARGET_COMMUTATIVE_P
271 #define TARGET_COMMUTATIVE_P pa_commutative_p
273 #undef TARGET_ASM_OUTPUT_MI_THUNK
274 #define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
275 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
276 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
278 #undef TARGET_ASM_FILE_END
279 #ifdef ASM_OUTPUT_EXTERNAL_REAL
280 #define TARGET_ASM_FILE_END pa_hpux_file_end
282 #define TARGET_ASM_FILE_END output_deferred_plabels
285 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
286 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
288 #if !defined(USE_COLLECT2)
289 #undef TARGET_ASM_CONSTRUCTOR
290 #define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
291 #undef TARGET_ASM_DESTRUCTOR
292 #define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
295 #undef TARGET_DEFAULT_TARGET_FLAGS
296 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | TARGET_CPU_DEFAULT)
297 #undef TARGET_HANDLE_OPTION
298 #define TARGET_HANDLE_OPTION pa_handle_option
300 #undef TARGET_INIT_BUILTINS
301 #define TARGET_INIT_BUILTINS pa_init_builtins
303 #undef TARGET_REGISTER_MOVE_COST
304 #define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
305 #undef TARGET_RTX_COSTS
306 #define TARGET_RTX_COSTS hppa_rtx_costs
307 #undef TARGET_ADDRESS_COST
308 #define TARGET_ADDRESS_COST hppa_address_cost
310 #undef TARGET_MACHINE_DEPENDENT_REORG
311 #define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
313 #ifdef HPUX_LONG_DOUBLE_LIBRARY
314 #undef TARGET_INIT_LIBFUNCS
315 #define TARGET_INIT_LIBFUNCS pa_hpux_init_libfuncs
318 #undef TARGET_PROMOTE_FUNCTION_MODE
319 #define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
320 #undef TARGET_PROMOTE_PROTOTYPES
321 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
323 #undef TARGET_STRUCT_VALUE_RTX
324 #define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
325 #undef TARGET_RETURN_IN_MEMORY
326 #define TARGET_RETURN_IN_MEMORY pa_return_in_memory
327 #undef TARGET_MUST_PASS_IN_STACK
328 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
329 #undef TARGET_PASS_BY_REFERENCE
330 #define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
331 #undef TARGET_CALLEE_COPIES
332 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
333 #undef TARGET_ARG_PARTIAL_BYTES
334 #define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
336 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
337 #define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
338 #undef TARGET_EXPAND_BUILTIN_VA_START
339 #define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
340 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
341 #define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
343 #undef TARGET_SCALAR_MODE_SUPPORTED_P
344 #define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
346 #undef TARGET_CANNOT_FORCE_CONST_MEM
347 #define TARGET_CANNOT_FORCE_CONST_MEM pa_tls_referenced_p
349 #undef TARGET_SECONDARY_RELOAD
350 #define TARGET_SECONDARY_RELOAD pa_secondary_reload
352 #undef TARGET_EXTRA_LIVE_ON_ENTRY
353 #define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
355 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
356 #define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
357 #undef TARGET_TRAMPOLINE_INIT
358 #define TARGET_TRAMPOLINE_INIT pa_trampoline_init
359 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
360 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
361 #undef TARGET_DELEGITIMIZE_ADDRESS
362 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
364 struct gcc_target targetm
= TARGET_INITIALIZER
;
366 /* Parse the -mfixed-range= option string. */
369 fix_range (const char *const_str
)
372 char *str
, *dash
, *comma
;
374 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
375 REG2 are either register names or register numbers. The effect
376 of this option is to mark the registers in the range from REG1 to
377 REG2 as ``fixed'' so they won't be used by the compiler. This is
378 used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
380 i
= strlen (const_str
);
381 str
= (char *) alloca (i
+ 1);
382 memcpy (str
, const_str
, i
+ 1);
386 dash
= strchr (str
, '-');
389 warning (0, "value of -mfixed-range must have form REG1-REG2");
394 comma
= strchr (dash
+ 1, ',');
398 first
= decode_reg_name (str
);
401 warning (0, "unknown register name: %s", str
);
405 last
= decode_reg_name (dash
+ 1);
408 warning (0, "unknown register name: %s", dash
+ 1);
416 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
420 for (i
= first
; i
<= last
; ++i
)
421 fixed_regs
[i
] = call_used_regs
[i
] = 1;
430 /* Check if all floating point registers have been fixed. */
431 for (i
= FP_REG_FIRST
; i
<= FP_REG_LAST
; i
++)
436 target_flags
|= MASK_DISABLE_FPREGS
;
439 /* Implement TARGET_HANDLE_OPTION. */
442 pa_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
447 case OPT_mpa_risc_1_0
:
449 target_flags
&= ~(MASK_PA_11
| MASK_PA_20
);
453 case OPT_mpa_risc_1_1
:
455 target_flags
&= ~MASK_PA_20
;
456 target_flags
|= MASK_PA_11
;
459 case OPT_mpa_risc_2_0
:
461 target_flags
|= MASK_PA_11
| MASK_PA_20
;
465 if (strcmp (arg
, "8000") == 0)
466 pa_cpu
= PROCESSOR_8000
;
467 else if (strcmp (arg
, "7100") == 0)
468 pa_cpu
= PROCESSOR_7100
;
469 else if (strcmp (arg
, "700") == 0)
470 pa_cpu
= PROCESSOR_700
;
471 else if (strcmp (arg
, "7100LC") == 0)
472 pa_cpu
= PROCESSOR_7100LC
;
473 else if (strcmp (arg
, "7200") == 0)
474 pa_cpu
= PROCESSOR_7200
;
475 else if (strcmp (arg
, "7300") == 0)
476 pa_cpu
= PROCESSOR_7300
;
481 case OPT_mfixed_range_
:
491 #if TARGET_HPUX_10_10
497 #if TARGET_HPUX_11_11
508 /* Implement the TARGET_OPTION_OVERRIDE hook. */
511 pa_option_override (void)
513 /* Unconditional branches in the delay slot are not compatible with dwarf2
514 call frame information. There is no benefit in using this optimization
515 on PA8000 and later processors. */
516 if (pa_cpu
>= PROCESSOR_8000
517 || (! USING_SJLJ_EXCEPTIONS
&& flag_exceptions
)
518 || flag_unwind_tables
)
519 target_flags
&= ~MASK_JUMP_IN_DELAY
;
521 if (flag_pic
&& TARGET_PORTABLE_RUNTIME
)
523 warning (0, "PIC code generation is not supported in the portable runtime model");
526 if (flag_pic
&& TARGET_FAST_INDIRECT_CALLS
)
528 warning (0, "PIC code generation is not compatible with fast indirect calls");
531 if (! TARGET_GAS
&& write_symbols
!= NO_DEBUG
)
533 warning (0, "-g is only supported when using GAS on this processor,");
534 warning (0, "-g option disabled");
535 write_symbols
= NO_DEBUG
;
538 /* We only support the "big PIC" model now. And we always generate PIC
539 code when in 64bit mode. */
540 if (flag_pic
== 1 || TARGET_64BIT
)
543 /* Disable -freorder-blocks-and-partition as we don't support hot and
544 cold partitioning. */
545 if (flag_reorder_blocks_and_partition
)
547 inform (input_location
,
548 "-freorder-blocks-and-partition does not work "
549 "on this architecture");
550 flag_reorder_blocks_and_partition
= 0;
551 flag_reorder_blocks
= 1;
554 /* We can't guarantee that .dword is available for 32-bit targets. */
555 if (UNITS_PER_WORD
== 4)
556 targetm
.asm_out
.aligned_op
.di
= NULL
;
558 /* The unaligned ops are only available when using GAS. */
561 targetm
.asm_out
.unaligned_op
.hi
= NULL
;
562 targetm
.asm_out
.unaligned_op
.si
= NULL
;
563 targetm
.asm_out
.unaligned_op
.di
= NULL
;
566 init_machine_status
= pa_init_machine_status
;
570 pa_init_builtins (void)
572 #ifdef DONT_HAVE_FPUTC_UNLOCKED
573 built_in_decls
[(int) BUILT_IN_FPUTC_UNLOCKED
] =
574 built_in_decls
[(int) BUILT_IN_PUTC_UNLOCKED
];
575 implicit_built_in_decls
[(int) BUILT_IN_FPUTC_UNLOCKED
]
576 = implicit_built_in_decls
[(int) BUILT_IN_PUTC_UNLOCKED
];
579 if (built_in_decls
[BUILT_IN_FINITE
])
580 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITE
], "_Isfinite");
581 if (built_in_decls
[BUILT_IN_FINITEF
])
582 set_user_assembler_name (built_in_decls
[BUILT_IN_FINITEF
], "_Isfinitef");
586 /* Function to init struct machine_function.
587 This will be called, via a pointer variable,
588 from push_function_context. */
590 static struct machine_function
*
591 pa_init_machine_status (void)
593 return ggc_alloc_cleared_machine_function ();
596 /* If FROM is a probable pointer register, mark TO as a probable
597 pointer register with the same pointer alignment as FROM. */
600 copy_reg_pointer (rtx to
, rtx from
)
602 if (REG_POINTER (from
))
603 mark_reg_pointer (to
, REGNO_POINTER_ALIGN (REGNO (from
)));
606 /* Return 1 if X contains a symbolic expression. We know these
607 expressions will have one of a few well defined forms, so
608 we need only check those forms. */
610 symbolic_expression_p (rtx x
)
613 /* Strip off any HIGH. */
614 if (GET_CODE (x
) == HIGH
)
617 return (symbolic_operand (x
, VOIDmode
));
620 /* Accept any constant that can be moved in one instruction into a
623 cint_ok_for_move (HOST_WIDE_INT ival
)
625 /* OK if ldo, ldil, or zdepi, can be used. */
626 return (VAL_14_BITS_P (ival
)
627 || ldil_cint_p (ival
)
628 || zdepi_cint_p (ival
));
631 /* Return truth value of whether OP can be used as an operand in a
634 adddi3_operand (rtx op
, enum machine_mode mode
)
636 return (register_operand (op
, mode
)
637 || (GET_CODE (op
) == CONST_INT
638 && (TARGET_64BIT
? INT_14_BITS (op
) : INT_11_BITS (op
))));
641 /* True iff the operand OP can be used as the destination operand of
642 an integer store. This also implies the operand could be used as
643 the source operand of an integer load. Symbolic, lo_sum and indexed
644 memory operands are not allowed. We accept reloading pseudos and
645 other memory operands. */
647 integer_store_memory_operand (rtx op
, enum machine_mode mode
)
649 return ((reload_in_progress
651 && REGNO (op
) >= FIRST_PSEUDO_REGISTER
652 && reg_renumber
[REGNO (op
)] < 0)
653 || (GET_CODE (op
) == MEM
654 && (reload_in_progress
|| memory_address_p (mode
, XEXP (op
, 0)))
655 && !symbolic_memory_operand (op
, VOIDmode
)
656 && !IS_LO_SUM_DLT_ADDR_P (XEXP (op
, 0))
657 && !IS_INDEX_ADDR_P (XEXP (op
, 0))));
660 /* True iff ldil can be used to load this CONST_INT. The least
661 significant 11 bits of the value must be zero and the value must
662 not change sign when extended from 32 to 64 bits. */
664 ldil_cint_p (HOST_WIDE_INT ival
)
666 HOST_WIDE_INT x
= ival
& (((HOST_WIDE_INT
) -1 << 31) | 0x7ff);
668 return x
== 0 || x
== ((HOST_WIDE_INT
) -1 << 31);
671 /* True iff zdepi can be used to generate this CONST_INT.
672 zdepi first sign extends a 5-bit signed number to a given field
673 length, then places this field anywhere in a zero. */
675 zdepi_cint_p (unsigned HOST_WIDE_INT x
)
677 unsigned HOST_WIDE_INT lsb_mask
, t
;
679 /* This might not be obvious, but it's at least fast.
680 This function is critical; we don't have the time loops would take. */
682 t
= ((x
>> 4) + lsb_mask
) & ~(lsb_mask
- 1);
683 /* Return true iff t is a power of two. */
684 return ((t
& (t
- 1)) == 0);
687 /* True iff depi or extru can be used to compute (reg & mask).
688 Accept bit pattern like these:
693 and_mask_p (unsigned HOST_WIDE_INT mask
)
696 mask
+= mask
& -mask
;
697 return (mask
& (mask
- 1)) == 0;
700 /* True iff depi can be used to compute (reg | MASK). */
702 ior_mask_p (unsigned HOST_WIDE_INT mask
)
704 mask
+= mask
& -mask
;
705 return (mask
& (mask
- 1)) == 0;
708 /* Legitimize PIC addresses. If the address is already
709 position-independent, we return ORIG. Newly generated
710 position-independent addresses go to REG. If we need more
711 than one register, we lose. */
714 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
718 gcc_assert (!PA_SYMBOL_REF_TLS_P (orig
));
720 /* Labels need special handling. */
721 if (pic_label_operand (orig
, mode
))
725 /* We do not want to go through the movXX expanders here since that
726 would create recursion.
728 Nor do we really want to call a generator for a named pattern
729 since that requires multiple patterns if we want to support
732 So instead we just emit the raw set, which avoids the movXX
733 expanders completely. */
734 mark_reg_pointer (reg
, BITS_PER_UNIT
);
735 insn
= emit_insn (gen_rtx_SET (VOIDmode
, reg
, orig
));
737 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
738 add_reg_note (insn
, REG_EQUAL
, orig
);
740 /* During and after reload, we need to generate a REG_LABEL_OPERAND note
741 and update LABEL_NUSES because this is not done automatically. */
742 if (reload_in_progress
|| reload_completed
)
744 /* Extract LABEL_REF. */
745 if (GET_CODE (orig
) == CONST
)
746 orig
= XEXP (XEXP (orig
, 0), 0);
747 /* Extract CODE_LABEL. */
748 orig
= XEXP (orig
, 0);
749 add_reg_note (insn
, REG_LABEL_OPERAND
, orig
);
750 LABEL_NUSES (orig
)++;
752 crtl
->uses_pic_offset_table
= 1;
755 if (GET_CODE (orig
) == SYMBOL_REF
)
761 /* Before reload, allocate a temporary register for the intermediate
762 result. This allows the sequence to be deleted when the final
763 result is unused and the insns are trivially dead. */
764 tmp_reg
= ((reload_in_progress
|| reload_completed
)
765 ? reg
: gen_reg_rtx (Pmode
));
767 if (function_label_operand (orig
, mode
))
769 /* Force function label into memory in word mode. */
770 orig
= XEXP (force_const_mem (word_mode
, orig
), 0);
771 /* Load plabel address from DLT. */
772 emit_move_insn (tmp_reg
,
773 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
774 gen_rtx_HIGH (word_mode
, orig
)));
776 = gen_const_mem (Pmode
,
777 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
778 gen_rtx_UNSPEC (Pmode
,
781 emit_move_insn (reg
, pic_ref
);
782 /* Now load address of function descriptor. */
783 pic_ref
= gen_rtx_MEM (Pmode
, reg
);
787 /* Load symbol reference from DLT. */
788 emit_move_insn (tmp_reg
,
789 gen_rtx_PLUS (word_mode
, pic_offset_table_rtx
,
790 gen_rtx_HIGH (word_mode
, orig
)));
792 = gen_const_mem (Pmode
,
793 gen_rtx_LO_SUM (Pmode
, tmp_reg
,
794 gen_rtx_UNSPEC (Pmode
,
799 crtl
->uses_pic_offset_table
= 1;
800 mark_reg_pointer (reg
, BITS_PER_UNIT
);
801 insn
= emit_move_insn (reg
, pic_ref
);
803 /* Put a REG_EQUAL note on this insn, so that it can be optimized. */
804 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
808 else if (GET_CODE (orig
) == CONST
)
812 if (GET_CODE (XEXP (orig
, 0)) == PLUS
813 && XEXP (XEXP (orig
, 0), 0) == pic_offset_table_rtx
)
817 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
819 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
820 orig
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
821 base
== reg
? 0 : reg
);
823 if (GET_CODE (orig
) == CONST_INT
)
825 if (INT_14_BITS (orig
))
826 return plus_constant (base
, INTVAL (orig
));
827 orig
= force_reg (Pmode
, orig
);
829 pic_ref
= gen_rtx_PLUS (Pmode
, base
, orig
);
830 /* Likewise, should we set special REG_NOTEs here? */
836 static GTY(()) rtx gen_tls_tga
;
839 gen_tls_get_addr (void)
842 gen_tls_tga
= init_one_libfunc ("__tls_get_addr");
847 hppa_tls_call (rtx arg
)
851 ret
= gen_reg_rtx (Pmode
);
852 emit_library_call_value (gen_tls_get_addr (), ret
,
853 LCT_CONST
, Pmode
, 1, arg
, Pmode
);
859 legitimize_tls_address (rtx addr
)
861 rtx ret
, insn
, tmp
, t1
, t2
, tp
;
862 enum tls_model model
= SYMBOL_REF_TLS_MODEL (addr
);
866 case TLS_MODEL_GLOBAL_DYNAMIC
:
867 tmp
= gen_reg_rtx (Pmode
);
869 emit_insn (gen_tgd_load_pic (tmp
, addr
));
871 emit_insn (gen_tgd_load (tmp
, addr
));
872 ret
= hppa_tls_call (tmp
);
875 case TLS_MODEL_LOCAL_DYNAMIC
:
876 ret
= gen_reg_rtx (Pmode
);
877 tmp
= gen_reg_rtx (Pmode
);
880 emit_insn (gen_tld_load_pic (tmp
, addr
));
882 emit_insn (gen_tld_load (tmp
, addr
));
883 t1
= hppa_tls_call (tmp
);
886 t2
= gen_reg_rtx (Pmode
);
887 emit_libcall_block (insn
, t2
, t1
,
888 gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
),
890 emit_insn (gen_tld_offset_load (ret
, addr
, t2
));
893 case TLS_MODEL_INITIAL_EXEC
:
894 tp
= gen_reg_rtx (Pmode
);
895 tmp
= gen_reg_rtx (Pmode
);
896 ret
= gen_reg_rtx (Pmode
);
897 emit_insn (gen_tp_load (tp
));
899 emit_insn (gen_tie_load_pic (tmp
, addr
));
901 emit_insn (gen_tie_load (tmp
, addr
));
902 emit_move_insn (ret
, gen_rtx_PLUS (Pmode
, tp
, tmp
));
905 case TLS_MODEL_LOCAL_EXEC
:
906 tp
= gen_reg_rtx (Pmode
);
907 ret
= gen_reg_rtx (Pmode
);
908 emit_insn (gen_tp_load (tp
));
909 emit_insn (gen_tle_load (ret
, addr
, tp
));
919 /* Try machine-dependent ways of modifying an illegitimate address
920 to be legitimate. If we find one, return the new, valid address.
921 This macro is used in only one place: `memory_address' in explow.c.
923 OLDX is the address as it was before break_out_memory_refs was called.
924 In some cases it is useful to look at this to decide what needs to be done.
926 It is always safe for this macro to do nothing. It exists to recognize
927 opportunities to optimize the output.
929 For the PA, transform:
931 memory(X + <large int>)
935 if (<large int> & mask) >= 16
936 Y = (<large int> & ~mask) + mask + 1 Round up.
938 Y = (<large int> & ~mask) Round down.
940 memory (Z + (<large int> - Y));
942 This is for CSE to find several similar references, and only use one Z.
944 X can either be a SYMBOL_REF or REG, but because combine cannot
945 perform a 4->2 combination we do nothing for SYMBOL_REF + D where
946 D will not fit in 14 bits.
948 MODE_FLOAT references allow displacements which fit in 5 bits, so use
951 MODE_INT references allow displacements which fit in 14 bits, so use
954 This relies on the fact that most mode MODE_FLOAT references will use FP
955 registers and most mode MODE_INT references will use integer registers.
956 (In the rare case of an FP register used in an integer MODE, we depend
957 on secondary reloads to clean things up.)
960 It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
961 manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
962 addressing modes to be used).
964 Put X and Z into registers. Then put the entire expression into
968 hppa_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
969 enum machine_mode mode
)
973 /* We need to canonicalize the order of operands in unscaled indexed
974 addresses since the code that checks if an address is valid doesn't
975 always try both orders. */
976 if (!TARGET_NO_SPACE_REGS
977 && GET_CODE (x
) == PLUS
978 && GET_MODE (x
) == Pmode
979 && REG_P (XEXP (x
, 0))
980 && REG_P (XEXP (x
, 1))
981 && REG_POINTER (XEXP (x
, 0))
982 && !REG_POINTER (XEXP (x
, 1)))
983 return gen_rtx_PLUS (Pmode
, XEXP (x
, 1), XEXP (x
, 0));
985 if (PA_SYMBOL_REF_TLS_P (x
))
986 return legitimize_tls_address (x
);
988 return legitimize_pic_address (x
, mode
, gen_reg_rtx (Pmode
));
990 /* Strip off CONST. */
991 if (GET_CODE (x
) == CONST
)
994 /* Special case. Get the SYMBOL_REF into a register and use indexing.
995 That should always be safe. */
996 if (GET_CODE (x
) == PLUS
997 && GET_CODE (XEXP (x
, 0)) == REG
998 && GET_CODE (XEXP (x
, 1)) == SYMBOL_REF
)
1000 rtx reg
= force_reg (Pmode
, XEXP (x
, 1));
1001 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg
, XEXP (x
, 0)));
1004 /* Note we must reject symbols which represent function addresses
1005 since the assembler/linker can't handle arithmetic on plabels. */
1006 if (GET_CODE (x
) == PLUS
1007 && GET_CODE (XEXP (x
, 1)) == CONST_INT
1008 && ((GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
1009 && !FUNCTION_NAME_P (XSTR (XEXP (x
, 0), 0)))
1010 || GET_CODE (XEXP (x
, 0)) == REG
))
1012 rtx int_part
, ptr_reg
;
1014 int offset
= INTVAL (XEXP (x
, 1));
1017 mask
= (GET_MODE_CLASS (mode
) == MODE_FLOAT
1018 ? (INT14_OK_STRICT
? 0x3fff : 0x1f) : 0x3fff);
1020 /* Choose which way to round the offset. Round up if we
1021 are >= halfway to the next boundary. */
1022 if ((offset
& mask
) >= ((mask
+ 1) / 2))
1023 newoffset
= (offset
& ~ mask
) + mask
+ 1;
1025 newoffset
= (offset
& ~ mask
);
1027 /* If the newoffset will not fit in 14 bits (ldo), then
1028 handling this would take 4 or 5 instructions (2 to load
1029 the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
1030 add the new offset and the SYMBOL_REF.) Combine can
1031 not handle 4->2 or 5->2 combinations, so do not create
1033 if (! VAL_14_BITS_P (newoffset
)
1034 && GET_CODE (XEXP (x
, 0)) == SYMBOL_REF
)
1036 rtx const_part
= plus_constant (XEXP (x
, 0), newoffset
);
1039 gen_rtx_HIGH (Pmode
, const_part
));
1042 gen_rtx_LO_SUM (Pmode
,
1043 tmp_reg
, const_part
));
1047 if (! VAL_14_BITS_P (newoffset
))
1048 int_part
= force_reg (Pmode
, GEN_INT (newoffset
));
1050 int_part
= GEN_INT (newoffset
);
1052 ptr_reg
= force_reg (Pmode
,
1053 gen_rtx_PLUS (Pmode
,
1054 force_reg (Pmode
, XEXP (x
, 0)),
1057 return plus_constant (ptr_reg
, offset
- newoffset
);
1060 /* Handle (plus (mult (a) (shadd_constant)) (b)). */
1062 if (GET_CODE (x
) == PLUS
&& GET_CODE (XEXP (x
, 0)) == MULT
1063 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1064 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1)))
1065 && (OBJECT_P (XEXP (x
, 1))
1066 || GET_CODE (XEXP (x
, 1)) == SUBREG
)
1067 && GET_CODE (XEXP (x
, 1)) != CONST
)
1069 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1073 if (GET_CODE (reg1
) != REG
)
1074 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1076 reg2
= XEXP (XEXP (x
, 0), 0);
1077 if (GET_CODE (reg2
) != REG
)
1078 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1080 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1081 gen_rtx_MULT (Pmode
,
1087 /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
1089 Only do so for floating point modes since this is more speculative
1090 and we lose if it's an integer store. */
1091 if (GET_CODE (x
) == PLUS
1092 && GET_CODE (XEXP (x
, 0)) == PLUS
1093 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
1094 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == CONST_INT
1095 && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
1096 && (mode
== SFmode
|| mode
== DFmode
))
1099 /* First, try and figure out what to use as a base register. */
1100 rtx reg1
, reg2
, base
, idx
;
1102 reg1
= XEXP (XEXP (x
, 0), 1);
1107 /* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
1108 then emit_move_sequence will turn on REG_POINTER so we'll know
1109 it's a base register below. */
1110 if (GET_CODE (reg1
) != REG
)
1111 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1113 if (GET_CODE (reg2
) != REG
)
1114 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1116 /* Figure out what the base and index are. */
1118 if (GET_CODE (reg1
) == REG
1119 && REG_POINTER (reg1
))
1122 idx
= gen_rtx_PLUS (Pmode
,
1123 gen_rtx_MULT (Pmode
,
1124 XEXP (XEXP (XEXP (x
, 0), 0), 0),
1125 XEXP (XEXP (XEXP (x
, 0), 0), 1)),
1128 else if (GET_CODE (reg2
) == REG
1129 && REG_POINTER (reg2
))
1138 /* If the index adds a large constant, try to scale the
1139 constant so that it can be loaded with only one insn. */
1140 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1141 && VAL_14_BITS_P (INTVAL (XEXP (idx
, 1))
1142 / INTVAL (XEXP (XEXP (idx
, 0), 1)))
1143 && INTVAL (XEXP (idx
, 1)) % INTVAL (XEXP (XEXP (idx
, 0), 1)) == 0)
1145 /* Divide the CONST_INT by the scale factor, then add it to A. */
1146 int val
= INTVAL (XEXP (idx
, 1));
1148 val
/= INTVAL (XEXP (XEXP (idx
, 0), 1));
1149 reg1
= XEXP (XEXP (idx
, 0), 0);
1150 if (GET_CODE (reg1
) != REG
)
1151 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1153 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, reg1
, GEN_INT (val
)));
1155 /* We can now generate a simple scaled indexed address. */
1158 (Pmode
, gen_rtx_PLUS (Pmode
,
1159 gen_rtx_MULT (Pmode
, reg1
,
1160 XEXP (XEXP (idx
, 0), 1)),
1164 /* If B + C is still a valid base register, then add them. */
1165 if (GET_CODE (XEXP (idx
, 1)) == CONST_INT
1166 && INTVAL (XEXP (idx
, 1)) <= 4096
1167 && INTVAL (XEXP (idx
, 1)) >= -4096)
1169 int val
= INTVAL (XEXP (XEXP (idx
, 0), 1));
1172 reg1
= force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, XEXP (idx
, 1)));
1174 reg2
= XEXP (XEXP (idx
, 0), 0);
1175 if (GET_CODE (reg2
) != CONST_INT
)
1176 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1178 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
,
1179 gen_rtx_MULT (Pmode
,
1185 /* Get the index into a register, then add the base + index and
1186 return a register holding the result. */
1188 /* First get A into a register. */
1189 reg1
= XEXP (XEXP (idx
, 0), 0);
1190 if (GET_CODE (reg1
) != REG
)
1191 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1193 /* And get B into a register. */
1194 reg2
= XEXP (idx
, 1);
1195 if (GET_CODE (reg2
) != REG
)
1196 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1198 reg1
= force_reg (Pmode
,
1199 gen_rtx_PLUS (Pmode
,
1200 gen_rtx_MULT (Pmode
, reg1
,
1201 XEXP (XEXP (idx
, 0), 1)),
1204 /* Add the result to our base register and return. */
1205 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, base
, reg1
));
1209 /* Uh-oh. We might have an address for x[n-100000]. This needs
1210 special handling to avoid creating an indexed memory address
1211 with x-100000 as the base.
1213 If the constant part is small enough, then it's still safe because
1214 there is a guard page at the beginning and end of the data segment.
1216 Scaled references are common enough that we want to try and rearrange the
1217 terms so that we can use indexing for these addresses too. Only
1218 do the optimization for floatint point modes. */
1220 if (GET_CODE (x
) == PLUS
1221 && symbolic_expression_p (XEXP (x
, 1)))
1223 /* Ugly. We modify things here so that the address offset specified
1224 by the index expression is computed first, then added to x to form
1225 the entire address. */
1227 rtx regx1
, regx2
, regy1
, regy2
, y
;
1229 /* Strip off any CONST. */
1231 if (GET_CODE (y
) == CONST
)
1234 if (GET_CODE (y
) == PLUS
|| GET_CODE (y
) == MINUS
)
1236 /* See if this looks like
1237 (plus (mult (reg) (shadd_const))
1238 (const (plus (symbol_ref) (const_int))))
1240 Where const_int is small. In that case the const
1241 expression is a valid pointer for indexing.
1243 If const_int is big, but can be divided evenly by shadd_const
1244 and added to (reg). This allows more scaled indexed addresses. */
1245 if (GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1246 && GET_CODE (XEXP (x
, 0)) == MULT
1247 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1248 && INTVAL (XEXP (y
, 1)) >= -4096
1249 && INTVAL (XEXP (y
, 1)) <= 4095
1250 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1251 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1253 int val
= INTVAL (XEXP (XEXP (x
, 0), 1));
1257 if (GET_CODE (reg1
) != REG
)
1258 reg1
= force_reg (Pmode
, force_operand (reg1
, 0));
1260 reg2
= XEXP (XEXP (x
, 0), 0);
1261 if (GET_CODE (reg2
) != REG
)
1262 reg2
= force_reg (Pmode
, force_operand (reg2
, 0));
1264 return force_reg (Pmode
,
1265 gen_rtx_PLUS (Pmode
,
1266 gen_rtx_MULT (Pmode
,
1271 else if ((mode
== DFmode
|| mode
== SFmode
)
1272 && GET_CODE (XEXP (y
, 0)) == SYMBOL_REF
1273 && GET_CODE (XEXP (x
, 0)) == MULT
1274 && GET_CODE (XEXP (y
, 1)) == CONST_INT
1275 && INTVAL (XEXP (y
, 1)) % INTVAL (XEXP (XEXP (x
, 0), 1)) == 0
1276 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
1277 && shadd_constant_p (INTVAL (XEXP (XEXP (x
, 0), 1))))
1280 = force_reg (Pmode
, GEN_INT (INTVAL (XEXP (y
, 1))
1281 / INTVAL (XEXP (XEXP (x
, 0), 1))));
1282 regx2
= XEXP (XEXP (x
, 0), 0);
1283 if (GET_CODE (regx2
) != REG
)
1284 regx2
= force_reg (Pmode
, force_operand (regx2
, 0));
1285 regx2
= force_reg (Pmode
, gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1289 gen_rtx_PLUS (Pmode
,
1290 gen_rtx_MULT (Pmode
, regx2
,
1291 XEXP (XEXP (x
, 0), 1)),
1292 force_reg (Pmode
, XEXP (y
, 0))));
1294 else if (GET_CODE (XEXP (y
, 1)) == CONST_INT
1295 && INTVAL (XEXP (y
, 1)) >= -4096
1296 && INTVAL (XEXP (y
, 1)) <= 4095)
1298 /* This is safe because of the guard page at the
1299 beginning and end of the data space. Just
1300 return the original address. */
1305 /* Doesn't look like one we can optimize. */
1306 regx1
= force_reg (Pmode
, force_operand (XEXP (x
, 0), 0));
1307 regy1
= force_reg (Pmode
, force_operand (XEXP (y
, 0), 0));
1308 regy2
= force_reg (Pmode
, force_operand (XEXP (y
, 1), 0));
1309 regx1
= force_reg (Pmode
,
1310 gen_rtx_fmt_ee (GET_CODE (y
), Pmode
,
1312 return force_reg (Pmode
, gen_rtx_PLUS (Pmode
, regx1
, regy1
));
1320 /* Implement the TARGET_REGISTER_MOVE_COST hook.
1322 Compute extra cost of moving data between one register class
1325 Make moves from SAR so expensive they should never happen. We used to
1326 have 0xffff here, but that generates overflow in rare cases.
1328 Copies involving a FP register and a non-FP register are relatively
1329 expensive because they must go through memory.
1331 Other copies are reasonably cheap. */
1334 hppa_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
1335 reg_class_t from
, reg_class_t to
)
1337 if (from
== SHIFT_REGS
)
1339 else if ((FP_REG_CLASS_P (from
) && ! FP_REG_CLASS_P (to
))
1340 || (FP_REG_CLASS_P (to
) && ! FP_REG_CLASS_P (from
)))
1346 /* For the HPPA, REG and REG+CONST is cost 0
1347 and addresses involving symbolic constants are cost 2.
1349 PIC addresses are very expensive.
1351 It is no coincidence that this has the same structure
1352 as GO_IF_LEGITIMATE_ADDRESS. */
1355 hppa_address_cost (rtx X
,
1356 bool speed ATTRIBUTE_UNUSED
)
1358 switch (GET_CODE (X
))
1371 /* Compute a (partial) cost for rtx X. Return true if the complete
1372 cost has been computed, and false if subexpressions should be
1373 scanned. In either case, *TOTAL contains the cost result. */
1376 hppa_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
1377 bool speed ATTRIBUTE_UNUSED
)
1382 if (INTVAL (x
) == 0)
1384 else if (INT_14_BITS (x
))
1401 if ((x
== CONST0_RTX (DFmode
) || x
== CONST0_RTX (SFmode
))
1402 && outer_code
!= SET
)
1409 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1410 *total
= COSTS_N_INSNS (3);
1411 else if (TARGET_PA_11
&& !TARGET_DISABLE_FPREGS
&& !TARGET_SOFT_FLOAT
)
1412 *total
= COSTS_N_INSNS (8);
1414 *total
= COSTS_N_INSNS (20);
1418 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1420 *total
= COSTS_N_INSNS (14);
1428 *total
= COSTS_N_INSNS (60);
1431 case PLUS
: /* this includes shNadd insns */
1433 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
1434 *total
= COSTS_N_INSNS (3);
1436 *total
= COSTS_N_INSNS (1);
1442 *total
= COSTS_N_INSNS (1);
1450 /* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
1451 new rtx with the correct mode. */
1453 force_mode (enum machine_mode mode
, rtx orig
)
1455 if (mode
== GET_MODE (orig
))
1458 gcc_assert (REGNO (orig
) < FIRST_PSEUDO_REGISTER
);
1460 return gen_rtx_REG (mode
, REGNO (orig
));
1463 /* Return 1 if *X is a thread-local symbol. */
1466 pa_tls_symbol_ref_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
1468 return PA_SYMBOL_REF_TLS_P (*x
);
1471 /* Return 1 if X contains a thread-local symbol. */
1474 pa_tls_referenced_p (rtx x
)
1476 if (!TARGET_HAVE_TLS
)
1479 return for_each_rtx (&x
, &pa_tls_symbol_ref_1
, 0);
1482 /* Emit insns to move operands[1] into operands[0].
1484 Return 1 if we have written out everything that needs to be done to
1485 do the move. Otherwise, return 0 and the caller will emit the move
1488 Note SCRATCH_REG may not be in the proper mode depending on how it
1489 will be used. This routine is responsible for creating a new copy
1490 of SCRATCH_REG in the proper mode. */
1493 emit_move_sequence (rtx
*operands
, enum machine_mode mode
, rtx scratch_reg
)
1495 register rtx operand0
= operands
[0];
1496 register rtx operand1
= operands
[1];
1499 /* We can only handle indexed addresses in the destination operand
1500 of floating point stores. Thus, we need to break out indexed
1501 addresses from the destination operand. */
1502 if (GET_CODE (operand0
) == MEM
&& IS_INDEX_ADDR_P (XEXP (operand0
, 0)))
1504 gcc_assert (can_create_pseudo_p ());
1506 tem
= copy_to_mode_reg (Pmode
, XEXP (operand0
, 0));
1507 operand0
= replace_equiv_address (operand0
, tem
);
1510 /* On targets with non-equivalent space registers, break out unscaled
1511 indexed addresses from the source operand before the final CSE.
1512 We have to do this because the REG_POINTER flag is not correctly
1513 carried through various optimization passes and CSE may substitute
1514 a pseudo without the pointer set for one with the pointer set. As
1515 a result, we loose various opportunities to create insns with
1516 unscaled indexed addresses. */
1517 if (!TARGET_NO_SPACE_REGS
1518 && !cse_not_expected
1519 && GET_CODE (operand1
) == MEM
1520 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1521 && REG_P (XEXP (XEXP (operand1
, 0), 0))
1522 && REG_P (XEXP (XEXP (operand1
, 0), 1)))
1524 = replace_equiv_address (operand1
,
1525 copy_to_mode_reg (Pmode
, XEXP (operand1
, 0)));
1528 && reload_in_progress
&& GET_CODE (operand0
) == REG
1529 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1530 operand0
= reg_equiv_mem
[REGNO (operand0
)];
1531 else if (scratch_reg
1532 && reload_in_progress
&& GET_CODE (operand0
) == SUBREG
1533 && GET_CODE (SUBREG_REG (operand0
)) == REG
1534 && REGNO (SUBREG_REG (operand0
)) >= FIRST_PSEUDO_REGISTER
)
1536 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1537 the code which tracks sets/uses for delete_output_reload. */
1538 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand0
),
1539 reg_equiv_mem
[REGNO (SUBREG_REG (operand0
))],
1540 SUBREG_BYTE (operand0
));
1541 operand0
= alter_subreg (&temp
);
1545 && reload_in_progress
&& GET_CODE (operand1
) == REG
1546 && REGNO (operand1
) >= FIRST_PSEUDO_REGISTER
)
1547 operand1
= reg_equiv_mem
[REGNO (operand1
)];
1548 else if (scratch_reg
1549 && reload_in_progress
&& GET_CODE (operand1
) == SUBREG
1550 && GET_CODE (SUBREG_REG (operand1
)) == REG
1551 && REGNO (SUBREG_REG (operand1
)) >= FIRST_PSEUDO_REGISTER
)
1553 /* We must not alter SUBREG_BYTE (operand0) since that would confuse
1554 the code which tracks sets/uses for delete_output_reload. */
1555 rtx temp
= gen_rtx_SUBREG (GET_MODE (operand1
),
1556 reg_equiv_mem
[REGNO (SUBREG_REG (operand1
))],
1557 SUBREG_BYTE (operand1
));
1558 operand1
= alter_subreg (&temp
);
1561 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand0
) == MEM
1562 && ((tem
= find_replacement (&XEXP (operand0
, 0)))
1563 != XEXP (operand0
, 0)))
1564 operand0
= replace_equiv_address (operand0
, tem
);
1566 if (scratch_reg
&& reload_in_progress
&& GET_CODE (operand1
) == MEM
1567 && ((tem
= find_replacement (&XEXP (operand1
, 0)))
1568 != XEXP (operand1
, 0)))
1569 operand1
= replace_equiv_address (operand1
, tem
);
1571 /* Handle secondary reloads for loads/stores of FP registers from
1572 REG+D addresses where D does not fit in 5 or 14 bits, including
1573 (subreg (mem (addr))) cases. */
1575 && fp_reg_operand (operand0
, mode
)
1576 && ((GET_CODE (operand1
) == MEM
1577 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4 ? SFmode
: DFmode
),
1578 XEXP (operand1
, 0)))
1579 || ((GET_CODE (operand1
) == SUBREG
1580 && GET_CODE (XEXP (operand1
, 0)) == MEM
1581 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1583 XEXP (XEXP (operand1
, 0), 0))))))
1585 if (GET_CODE (operand1
) == SUBREG
)
1586 operand1
= XEXP (operand1
, 0);
1588 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1589 it in WORD_MODE regardless of what mode it was originally given
1591 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1593 /* D might not fit in 14 bits either; for such cases load D into
1595 if (!memory_address_p (Pmode
, XEXP (operand1
, 0)))
1597 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1598 emit_move_insn (scratch_reg
,
1599 gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
, 0)),
1601 XEXP (XEXP (operand1
, 0), 0),
1605 emit_move_insn (scratch_reg
, XEXP (operand1
, 0));
1606 emit_insn (gen_rtx_SET (VOIDmode
, operand0
,
1607 replace_equiv_address (operand1
, scratch_reg
)));
1610 else if (scratch_reg
1611 && fp_reg_operand (operand1
, mode
)
1612 && ((GET_CODE (operand0
) == MEM
1613 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1615 XEXP (operand0
, 0)))
1616 || ((GET_CODE (operand0
) == SUBREG
)
1617 && GET_CODE (XEXP (operand0
, 0)) == MEM
1618 && !memory_address_p ((GET_MODE_SIZE (mode
) == 4
1620 XEXP (XEXP (operand0
, 0), 0)))))
1622 if (GET_CODE (operand0
) == SUBREG
)
1623 operand0
= XEXP (operand0
, 0);
1625 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1626 it in WORD_MODE regardless of what mode it was originally given
1628 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1630 /* D might not fit in 14 bits either; for such cases load D into
1632 if (!memory_address_p (Pmode
, XEXP (operand0
, 0)))
1634 emit_move_insn (scratch_reg
, XEXP (XEXP (operand0
, 0), 1));
1635 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0
,
1638 XEXP (XEXP (operand0
, 0),
1643 emit_move_insn (scratch_reg
, XEXP (operand0
, 0));
1644 emit_insn (gen_rtx_SET (VOIDmode
,
1645 replace_equiv_address (operand0
, scratch_reg
),
1649 /* Handle secondary reloads for loads of FP registers from constant
1650 expressions by forcing the constant into memory.
1652 Use scratch_reg to hold the address of the memory location.
1654 The proper fix is to change PREFERRED_RELOAD_CLASS to return
1655 NO_REGS when presented with a const_int and a register class
1656 containing only FP registers. Doing so unfortunately creates
1657 more problems than it solves. Fix this for 2.5. */
1658 else if (scratch_reg
1659 && CONSTANT_P (operand1
)
1660 && fp_reg_operand (operand0
, mode
))
1662 rtx const_mem
, xoperands
[2];
1664 /* SCRATCH_REG will hold an address and maybe the actual data. We want
1665 it in WORD_MODE regardless of what mode it was originally given
1667 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1669 /* Force the constant into memory and put the address of the
1670 memory location into scratch_reg. */
1671 const_mem
= force_const_mem (mode
, operand1
);
1672 xoperands
[0] = scratch_reg
;
1673 xoperands
[1] = XEXP (const_mem
, 0);
1674 emit_move_sequence (xoperands
, Pmode
, 0);
1676 /* Now load the destination register. */
1677 emit_insn (gen_rtx_SET (mode
, operand0
,
1678 replace_equiv_address (const_mem
, scratch_reg
)));
1681 /* Handle secondary reloads for SAR. These occur when trying to load
1682 the SAR from memory, FP register, or with a constant. */
1683 else if (scratch_reg
1684 && GET_CODE (operand0
) == REG
1685 && REGNO (operand0
) < FIRST_PSEUDO_REGISTER
1686 && REGNO_REG_CLASS (REGNO (operand0
)) == SHIFT_REGS
1687 && (GET_CODE (operand1
) == MEM
1688 || GET_CODE (operand1
) == CONST_INT
1689 || (GET_CODE (operand1
) == REG
1690 && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1
))))))
1692 /* D might not fit in 14 bits either; for such cases load D into
1694 if (GET_CODE (operand1
) == MEM
1695 && !memory_address_p (GET_MODE (operand0
), XEXP (operand1
, 0)))
1697 /* We are reloading the address into the scratch register, so we
1698 want to make sure the scratch register is a full register. */
1699 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1701 emit_move_insn (scratch_reg
, XEXP (XEXP (operand1
, 0), 1));
1702 emit_move_insn (scratch_reg
, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1
,
1705 XEXP (XEXP (operand1
, 0),
1709 /* Now we are going to load the scratch register from memory,
1710 we want to load it in the same width as the original MEM,
1711 which must be the same as the width of the ultimate destination,
1713 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1715 emit_move_insn (scratch_reg
,
1716 replace_equiv_address (operand1
, scratch_reg
));
1720 /* We want to load the scratch register using the same mode as
1721 the ultimate destination. */
1722 scratch_reg
= force_mode (GET_MODE (operand0
), scratch_reg
);
1724 emit_move_insn (scratch_reg
, operand1
);
1727 /* And emit the insn to set the ultimate destination. We know that
1728 the scratch register has the same mode as the destination at this
1730 emit_move_insn (operand0
, scratch_reg
);
1733 /* Handle the most common case: storing into a register. */
1734 else if (register_operand (operand0
, mode
))
1736 if (register_operand (operand1
, mode
)
1737 || (GET_CODE (operand1
) == CONST_INT
1738 && cint_ok_for_move (INTVAL (operand1
)))
1739 || (operand1
== CONST0_RTX (mode
))
1740 || (GET_CODE (operand1
) == HIGH
1741 && !symbolic_operand (XEXP (operand1
, 0), VOIDmode
))
1742 /* Only `general_operands' can come here, so MEM is ok. */
1743 || GET_CODE (operand1
) == MEM
)
1745 /* Various sets are created during RTL generation which don't
1746 have the REG_POINTER flag correctly set. After the CSE pass,
1747 instruction recognition can fail if we don't consistently
1748 set this flag when performing register copies. This should
1749 also improve the opportunities for creating insns that use
1750 unscaled indexing. */
1751 if (REG_P (operand0
) && REG_P (operand1
))
1753 if (REG_POINTER (operand1
)
1754 && !REG_POINTER (operand0
)
1755 && !HARD_REGISTER_P (operand0
))
1756 copy_reg_pointer (operand0
, operand1
);
1759 /* When MEMs are broken out, the REG_POINTER flag doesn't
1760 get set. In some cases, we can set the REG_POINTER flag
1761 from the declaration for the MEM. */
1762 if (REG_P (operand0
)
1763 && GET_CODE (operand1
) == MEM
1764 && !REG_POINTER (operand0
))
1766 tree decl
= MEM_EXPR (operand1
);
1768 /* Set the register pointer flag and register alignment
1769 if the declaration for this memory reference is a
1775 /* If this is a COMPONENT_REF, use the FIELD_DECL from
1777 if (TREE_CODE (decl
) == COMPONENT_REF
)
1778 decl
= TREE_OPERAND (decl
, 1);
1780 type
= TREE_TYPE (decl
);
1781 type
= strip_array_types (type
);
1783 if (POINTER_TYPE_P (type
))
1787 type
= TREE_TYPE (type
);
1788 /* Using TYPE_ALIGN_OK is rather conservative as
1789 only the ada frontend actually sets it. */
1790 align
= (TYPE_ALIGN_OK (type
) ? TYPE_ALIGN (type
)
1792 mark_reg_pointer (operand0
, align
);
1797 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1801 else if (GET_CODE (operand0
) == MEM
)
1803 if (mode
== DFmode
&& operand1
== CONST0_RTX (mode
)
1804 && !(reload_in_progress
|| reload_completed
))
1806 rtx temp
= gen_reg_rtx (DFmode
);
1808 emit_insn (gen_rtx_SET (VOIDmode
, temp
, operand1
));
1809 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, temp
));
1812 if (register_operand (operand1
, mode
) || operand1
== CONST0_RTX (mode
))
1814 /* Run this case quickly. */
1815 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operand1
));
1818 if (! (reload_in_progress
|| reload_completed
))
1820 operands
[0] = validize_mem (operand0
);
1821 operands
[1] = operand1
= force_reg (mode
, operand1
);
1825 /* Simplify the source if we need to.
1826 Note we do have to handle function labels here, even though we do
1827 not consider them legitimate constants. Loop optimizations can
1828 call the emit_move_xxx with one as a source. */
1829 if ((GET_CODE (operand1
) != HIGH
&& immediate_operand (operand1
, mode
))
1830 || function_label_operand (operand1
, mode
)
1831 || (GET_CODE (operand1
) == HIGH
1832 && symbolic_operand (XEXP (operand1
, 0), mode
)))
1836 if (GET_CODE (operand1
) == HIGH
)
1839 operand1
= XEXP (operand1
, 0);
1841 if (symbolic_operand (operand1
, mode
))
1843 /* Argh. The assembler and linker can't handle arithmetic
1846 So we force the plabel into memory, load operand0 from
1847 the memory location, then add in the constant part. */
1848 if ((GET_CODE (operand1
) == CONST
1849 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1850 && function_label_operand (XEXP (XEXP (operand1
, 0), 0), Pmode
))
1851 || function_label_operand (operand1
, mode
))
1853 rtx temp
, const_part
;
1855 /* Figure out what (if any) scratch register to use. */
1856 if (reload_in_progress
|| reload_completed
)
1858 scratch_reg
= scratch_reg
? scratch_reg
: operand0
;
1859 /* SCRATCH_REG will hold an address and maybe the actual
1860 data. We want it in WORD_MODE regardless of what mode it
1861 was originally given to us. */
1862 scratch_reg
= force_mode (word_mode
, scratch_reg
);
1865 scratch_reg
= gen_reg_rtx (Pmode
);
1867 if (GET_CODE (operand1
) == CONST
)
1869 /* Save away the constant part of the expression. */
1870 const_part
= XEXP (XEXP (operand1
, 0), 1);
1871 gcc_assert (GET_CODE (const_part
) == CONST_INT
);
1873 /* Force the function label into memory. */
1874 temp
= force_const_mem (mode
, XEXP (XEXP (operand1
, 0), 0));
1878 /* No constant part. */
1879 const_part
= NULL_RTX
;
1881 /* Force the function label into memory. */
1882 temp
= force_const_mem (mode
, operand1
);
1886 /* Get the address of the memory location. PIC-ify it if
1888 temp
= XEXP (temp
, 0);
1890 temp
= legitimize_pic_address (temp
, mode
, scratch_reg
);
1892 /* Put the address of the memory location into our destination
1895 emit_move_sequence (operands
, mode
, scratch_reg
);
1897 /* Now load from the memory location into our destination
1899 operands
[1] = gen_rtx_MEM (Pmode
, operands
[0]);
1900 emit_move_sequence (operands
, mode
, scratch_reg
);
1902 /* And add back in the constant part. */
1903 if (const_part
!= NULL_RTX
)
1904 expand_inc (operand0
, const_part
);
1913 if (reload_in_progress
|| reload_completed
)
1915 temp
= scratch_reg
? scratch_reg
: operand0
;
1916 /* TEMP will hold an address and maybe the actual
1917 data. We want it in WORD_MODE regardless of what mode it
1918 was originally given to us. */
1919 temp
= force_mode (word_mode
, temp
);
1922 temp
= gen_reg_rtx (Pmode
);
1924 /* (const (plus (symbol) (const_int))) must be forced to
1925 memory during/after reload if the const_int will not fit
1927 if (GET_CODE (operand1
) == CONST
1928 && GET_CODE (XEXP (operand1
, 0)) == PLUS
1929 && GET_CODE (XEXP (XEXP (operand1
, 0), 1)) == CONST_INT
1930 && !INT_14_BITS (XEXP (XEXP (operand1
, 0), 1))
1931 && (reload_completed
|| reload_in_progress
)
1934 rtx const_mem
= force_const_mem (mode
, operand1
);
1935 operands
[1] = legitimize_pic_address (XEXP (const_mem
, 0),
1937 operands
[1] = replace_equiv_address (const_mem
, operands
[1]);
1938 emit_move_sequence (operands
, mode
, temp
);
1942 operands
[1] = legitimize_pic_address (operand1
, mode
, temp
);
1943 if (REG_P (operand0
) && REG_P (operands
[1]))
1944 copy_reg_pointer (operand0
, operands
[1]);
1945 emit_insn (gen_rtx_SET (VOIDmode
, operand0
, operands
[1]));
1948 /* On the HPPA, references to data space are supposed to use dp,
1949 register 27, but showing it in the RTL inhibits various cse
1950 and loop optimizations. */
1955 if (reload_in_progress
|| reload_completed
)
1957 temp
= scratch_reg
? scratch_reg
: operand0
;
1958 /* TEMP will hold an address and maybe the actual
1959 data. We want it in WORD_MODE regardless of what mode it
1960 was originally given to us. */
1961 temp
= force_mode (word_mode
, temp
);
1964 temp
= gen_reg_rtx (mode
);
1966 /* Loading a SYMBOL_REF into a register makes that register
1967 safe to be used as the base in an indexed address.
1969 Don't mark hard registers though. That loses. */
1970 if (GET_CODE (operand0
) == REG
1971 && REGNO (operand0
) >= FIRST_PSEUDO_REGISTER
)
1972 mark_reg_pointer (operand0
, BITS_PER_UNIT
);
1973 if (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
)
1974 mark_reg_pointer (temp
, BITS_PER_UNIT
);
1977 set
= gen_rtx_SET (mode
, operand0
, temp
);
1979 set
= gen_rtx_SET (VOIDmode
,
1981 gen_rtx_LO_SUM (mode
, temp
, operand1
));
1983 emit_insn (gen_rtx_SET (VOIDmode
,
1985 gen_rtx_HIGH (mode
, operand1
)));
1991 else if (pa_tls_referenced_p (operand1
))
1996 if (GET_CODE (tmp
) == CONST
&& GET_CODE (XEXP (tmp
, 0)) == PLUS
)
1998 addend
= XEXP (XEXP (tmp
, 0), 1);
1999 tmp
= XEXP (XEXP (tmp
, 0), 0);
2002 gcc_assert (GET_CODE (tmp
) == SYMBOL_REF
);
2003 tmp
= legitimize_tls_address (tmp
);
2006 tmp
= gen_rtx_PLUS (mode
, tmp
, addend
);
2007 tmp
= force_operand (tmp
, operands
[0]);
2011 else if (GET_CODE (operand1
) != CONST_INT
2012 || !cint_ok_for_move (INTVAL (operand1
)))
2016 HOST_WIDE_INT value
= 0;
2017 HOST_WIDE_INT insv
= 0;
2020 if (GET_CODE (operand1
) == CONST_INT
)
2021 value
= INTVAL (operand1
);
2024 && GET_CODE (operand1
) == CONST_INT
2025 && HOST_BITS_PER_WIDE_INT
> 32
2026 && GET_MODE_BITSIZE (GET_MODE (operand0
)) > 32)
2030 /* Extract the low order 32 bits of the value and sign extend.
2031 If the new value is the same as the original value, we can
2032 can use the original value as-is. If the new value is
2033 different, we use it and insert the most-significant 32-bits
2034 of the original value into the final result. */
2035 nval
= ((value
& (((HOST_WIDE_INT
) 2 << 31) - 1))
2036 ^ ((HOST_WIDE_INT
) 1 << 31)) - ((HOST_WIDE_INT
) 1 << 31);
2039 #if HOST_BITS_PER_WIDE_INT > 32
2040 insv
= value
>= 0 ? value
>> 32 : ~(~value
>> 32);
2044 operand1
= GEN_INT (nval
);
2048 if (reload_in_progress
|| reload_completed
)
2049 temp
= scratch_reg
? scratch_reg
: operand0
;
2051 temp
= gen_reg_rtx (mode
);
2053 /* We don't directly split DImode constants on 32-bit targets
2054 because PLUS uses an 11-bit immediate and the insn sequence
2055 generated is not as efficient as the one using HIGH/LO_SUM. */
2056 if (GET_CODE (operand1
) == CONST_INT
2057 && GET_MODE_BITSIZE (mode
) <= BITS_PER_WORD
2058 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
2061 /* Directly break constant into high and low parts. This
2062 provides better optimization opportunities because various
2063 passes recognize constants split with PLUS but not LO_SUM.
2064 We use a 14-bit signed low part except when the addition
2065 of 0x4000 to the high part might change the sign of the
2067 HOST_WIDE_INT low
= value
& 0x3fff;
2068 HOST_WIDE_INT high
= value
& ~ 0x3fff;
2072 if (high
== 0x7fffc000 || (mode
== HImode
&& high
== 0x4000))
2080 emit_insn (gen_rtx_SET (VOIDmode
, temp
, GEN_INT (high
)));
2081 operands
[1] = gen_rtx_PLUS (mode
, temp
, GEN_INT (low
));
2085 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2086 gen_rtx_HIGH (mode
, operand1
)));
2087 operands
[1] = gen_rtx_LO_SUM (mode
, temp
, operand1
);
2090 insn
= emit_move_insn (operands
[0], operands
[1]);
2092 /* Now insert the most significant 32 bits of the value
2093 into the register. When we don't have a second register
2094 available, it could take up to nine instructions to load
2095 a 64-bit integer constant. Prior to reload, we force
2096 constants that would take more than three instructions
2097 to load to the constant pool. During and after reload,
2098 we have to handle all possible values. */
2101 /* Use a HIGH/LO_SUM/INSV sequence if we have a second
2102 register and the value to be inserted is outside the
2103 range that can be loaded with three depdi instructions. */
2104 if (temp
!= operand0
&& (insv
>= 16384 || insv
< -16384))
2106 operand1
= GEN_INT (insv
);
2108 emit_insn (gen_rtx_SET (VOIDmode
, temp
,
2109 gen_rtx_HIGH (mode
, operand1
)));
2110 emit_move_insn (temp
, gen_rtx_LO_SUM (mode
, temp
, operand1
));
2111 emit_insn (gen_insv (operand0
, GEN_INT (32),
2116 int len
= 5, pos
= 27;
2118 /* Insert the bits using the depdi instruction. */
2121 HOST_WIDE_INT v5
= ((insv
& 31) ^ 16) - 16;
2122 HOST_WIDE_INT sign
= v5
< 0;
2124 /* Left extend the insertion. */
2125 insv
= (insv
>= 0 ? insv
>> len
: ~(~insv
>> len
));
2126 while (pos
> 0 && (insv
& 1) == sign
)
2128 insv
= (insv
>= 0 ? insv
>> 1 : ~(~insv
>> 1));
2133 emit_insn (gen_insv (operand0
, GEN_INT (len
),
2134 GEN_INT (pos
), GEN_INT (v5
)));
2136 len
= pos
> 0 && pos
< 5 ? pos
: 5;
2142 set_unique_reg_note (insn
, REG_EQUAL
, op1
);
2147 /* Now have insn-emit do whatever it normally does. */
2151 /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
2152 it will need a link/runtime reloc). */
2155 reloc_needed (tree exp
)
2159 switch (TREE_CODE (exp
))
2164 case POINTER_PLUS_EXPR
:
2167 reloc
= reloc_needed (TREE_OPERAND (exp
, 0));
2168 reloc
|= reloc_needed (TREE_OPERAND (exp
, 1));
2172 case NON_LVALUE_EXPR
:
2173 reloc
= reloc_needed (TREE_OPERAND (exp
, 0));
2179 unsigned HOST_WIDE_INT ix
;
2181 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp
), ix
, value
)
2183 reloc
|= reloc_needed (value
);
2196 /* Does operand (which is a symbolic_operand) live in text space?
2197 If so, SYMBOL_REF_FLAG, which is set by pa_encode_section_info,
2201 read_only_operand (rtx operand
, enum machine_mode mode ATTRIBUTE_UNUSED
)
2203 if (GET_CODE (operand
) == CONST
)
2204 operand
= XEXP (XEXP (operand
, 0), 0);
2207 if (GET_CODE (operand
) == SYMBOL_REF
)
2208 return SYMBOL_REF_FLAG (operand
) && !CONSTANT_POOL_ADDRESS_P (operand
);
2212 if (GET_CODE (operand
) == SYMBOL_REF
)
2213 return SYMBOL_REF_FLAG (operand
) || CONSTANT_POOL_ADDRESS_P (operand
);
2219 /* Return the best assembler insn template
2220 for moving operands[1] into operands[0] as a fullword. */
2222 singlemove_string (rtx
*operands
)
2224 HOST_WIDE_INT intval
;
2226 if (GET_CODE (operands
[0]) == MEM
)
2227 return "stw %r1,%0";
2228 if (GET_CODE (operands
[1]) == MEM
)
2230 if (GET_CODE (operands
[1]) == CONST_DOUBLE
)
2235 gcc_assert (GET_MODE (operands
[1]) == SFmode
);
2237 /* Translate the CONST_DOUBLE to a CONST_INT with the same target
2239 REAL_VALUE_FROM_CONST_DOUBLE (d
, operands
[1]);
2240 REAL_VALUE_TO_TARGET_SINGLE (d
, i
);
2242 operands
[1] = GEN_INT (i
);
2243 /* Fall through to CONST_INT case. */
2245 if (GET_CODE (operands
[1]) == CONST_INT
)
2247 intval
= INTVAL (operands
[1]);
2249 if (VAL_14_BITS_P (intval
))
2251 else if ((intval
& 0x7ff) == 0)
2252 return "ldil L'%1,%0";
2253 else if (zdepi_cint_p (intval
))
2254 return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
2256 return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
2258 return "copy %1,%0";
2262 /* Compute position (in OP[1]) and width (in OP[2])
2263 useful for copying IMM to a register using the zdepi
2264 instructions. Store the immediate value to insert in OP[0]. */
2266 compute_zdepwi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2270 /* Find the least significant set bit in IMM. */
2271 for (lsb
= 0; lsb
< 32; lsb
++)
2278 /* Choose variants based on *sign* of the 5-bit field. */
2279 if ((imm
& 0x10) == 0)
2280 len
= (lsb
<= 28) ? 4 : 32 - lsb
;
2283 /* Find the width of the bitstring in IMM. */
2284 for (len
= 5; len
< 32 - lsb
; len
++)
2286 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2290 /* Sign extend IMM as a 5-bit value. */
2291 imm
= (imm
& 0xf) - 0x10;
2299 /* Compute position (in OP[1]) and width (in OP[2])
2300 useful for copying IMM to a register using the depdi,z
2301 instructions. Store the immediate value to insert in OP[0]. */
2303 compute_zdepdi_operands (unsigned HOST_WIDE_INT imm
, unsigned *op
)
2305 int lsb
, len
, maxlen
;
2307 maxlen
= MIN (HOST_BITS_PER_WIDE_INT
, 64);
2309 /* Find the least significant set bit in IMM. */
2310 for (lsb
= 0; lsb
< maxlen
; lsb
++)
2317 /* Choose variants based on *sign* of the 5-bit field. */
2318 if ((imm
& 0x10) == 0)
2319 len
= (lsb
<= maxlen
- 4) ? 4 : maxlen
- lsb
;
2322 /* Find the width of the bitstring in IMM. */
2323 for (len
= 5; len
< maxlen
- lsb
; len
++)
2325 if ((imm
& ((unsigned HOST_WIDE_INT
) 1 << len
)) == 0)
2329 /* Extend length if host is narrow and IMM is negative. */
2330 if (HOST_BITS_PER_WIDE_INT
== 32 && len
== maxlen
- lsb
)
2333 /* Sign extend IMM as a 5-bit value. */
2334 imm
= (imm
& 0xf) - 0x10;
2342 /* Output assembler code to perform a doubleword move insn
2343 with operands OPERANDS. */
2346 output_move_double (rtx
*operands
)
2348 enum { REGOP
, OFFSOP
, MEMOP
, CNSTOP
, RNDOP
} optype0
, optype1
;
2350 rtx addreg0
= 0, addreg1
= 0;
2352 /* First classify both operands. */
2354 if (REG_P (operands
[0]))
2356 else if (offsettable_memref_p (operands
[0]))
2358 else if (GET_CODE (operands
[0]) == MEM
)
2363 if (REG_P (operands
[1]))
2365 else if (CONSTANT_P (operands
[1]))
2367 else if (offsettable_memref_p (operands
[1]))
2369 else if (GET_CODE (operands
[1]) == MEM
)
2374 /* Check for the cases that the operand constraints are not
2375 supposed to allow to happen. */
2376 gcc_assert (optype0
== REGOP
|| optype1
== REGOP
);
2378 /* Handle copies between general and floating registers. */
2380 if (optype0
== REGOP
&& optype1
== REGOP
2381 && FP_REG_P (operands
[0]) ^ FP_REG_P (operands
[1]))
2383 if (FP_REG_P (operands
[0]))
2385 output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands
);
2386 output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands
);
2387 return "{fldds|fldd} -16(%%sp),%0";
2391 output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands
);
2392 output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands
);
2393 return "{ldws|ldw} -12(%%sp),%R0";
2397 /* Handle auto decrementing and incrementing loads and stores
2398 specifically, since the structure of the function doesn't work
2399 for them without major modification. Do it better when we learn
2400 this port about the general inc/dec addressing of PA.
2401 (This was written by tege. Chide him if it doesn't work.) */
2403 if (optype0
== MEMOP
)
2405 /* We have to output the address syntax ourselves, since print_operand
2406 doesn't deal with the addresses we want to use. Fix this later. */
2408 rtx addr
= XEXP (operands
[0], 0);
2409 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2411 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2413 operands
[0] = XEXP (addr
, 0);
2414 gcc_assert (GET_CODE (operands
[1]) == REG
2415 && GET_CODE (operands
[0]) == REG
);
2417 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2419 /* No overlap between high target register and address
2420 register. (We do this in a non-obvious way to
2421 save a register file writeback) */
2422 if (GET_CODE (addr
) == POST_INC
)
2423 return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
2424 return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
2426 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2428 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[1], 0);
2430 operands
[0] = XEXP (addr
, 0);
2431 gcc_assert (GET_CODE (operands
[1]) == REG
2432 && GET_CODE (operands
[0]) == REG
);
2434 gcc_assert (!reg_overlap_mentioned_p (high_reg
, addr
));
2435 /* No overlap between high target register and address
2436 register. (We do this in a non-obvious way to save a
2437 register file writeback) */
2438 if (GET_CODE (addr
) == PRE_INC
)
2439 return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
2440 return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
2443 if (optype1
== MEMOP
)
2445 /* We have to output the address syntax ourselves, since print_operand
2446 doesn't deal with the addresses we want to use. Fix this later. */
2448 rtx addr
= XEXP (operands
[1], 0);
2449 if (GET_CODE (addr
) == POST_INC
|| GET_CODE (addr
) == POST_DEC
)
2451 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2453 operands
[1] = XEXP (addr
, 0);
2454 gcc_assert (GET_CODE (operands
[0]) == REG
2455 && GET_CODE (operands
[1]) == REG
);
2457 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2459 /* No overlap between high target register and address
2460 register. (We do this in a non-obvious way to
2461 save a register file writeback) */
2462 if (GET_CODE (addr
) == POST_INC
)
2463 return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
2464 return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
2468 /* This is an undefined situation. We should load into the
2469 address register *and* update that register. Probably
2470 we don't need to handle this at all. */
2471 if (GET_CODE (addr
) == POST_INC
)
2472 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
2473 return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
2476 else if (GET_CODE (addr
) == PRE_INC
|| GET_CODE (addr
) == PRE_DEC
)
2478 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2480 operands
[1] = XEXP (addr
, 0);
2481 gcc_assert (GET_CODE (operands
[0]) == REG
2482 && GET_CODE (operands
[1]) == REG
);
2484 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2486 /* No overlap between high target register and address
2487 register. (We do this in a non-obvious way to
2488 save a register file writeback) */
2489 if (GET_CODE (addr
) == PRE_INC
)
2490 return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
2491 return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
2495 /* This is an undefined situation. We should load into the
2496 address register *and* update that register. Probably
2497 we don't need to handle this at all. */
2498 if (GET_CODE (addr
) == PRE_INC
)
2499 return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
2500 return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
2503 else if (GET_CODE (addr
) == PLUS
2504 && GET_CODE (XEXP (addr
, 0)) == MULT
)
2507 rtx high_reg
= gen_rtx_SUBREG (SImode
, operands
[0], 0);
2509 if (!reg_overlap_mentioned_p (high_reg
, addr
))
2511 xoperands
[0] = high_reg
;
2512 xoperands
[1] = XEXP (addr
, 1);
2513 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2514 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2515 output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
2517 return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
2521 xoperands
[0] = high_reg
;
2522 xoperands
[1] = XEXP (addr
, 1);
2523 xoperands
[2] = XEXP (XEXP (addr
, 0), 0);
2524 xoperands
[3] = XEXP (XEXP (addr
, 0), 1);
2525 output_asm_insn ("{sh%O3addl %2,%1,%R0|shladd,l %2,%O3,%1,%R0}",
2527 return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
2532 /* If an operand is an unoffsettable memory ref, find a register
2533 we can increment temporarily to make it refer to the second word. */
2535 if (optype0
== MEMOP
)
2536 addreg0
= find_addr_reg (XEXP (operands
[0], 0));
2538 if (optype1
== MEMOP
)
2539 addreg1
= find_addr_reg (XEXP (operands
[1], 0));
2541 /* Ok, we can do one word at a time.
2542 Normally we do the low-numbered word first.
2544 In either case, set up in LATEHALF the operands to use
2545 for the high-numbered word and in some cases alter the
2546 operands in OPERANDS to be suitable for the low-numbered word. */
2548 if (optype0
== REGOP
)
2549 latehalf
[0] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2550 else if (optype0
== OFFSOP
)
2551 latehalf
[0] = adjust_address (operands
[0], SImode
, 4);
2553 latehalf
[0] = operands
[0];
2555 if (optype1
== REGOP
)
2556 latehalf
[1] = gen_rtx_REG (SImode
, REGNO (operands
[1]) + 1);
2557 else if (optype1
== OFFSOP
)
2558 latehalf
[1] = adjust_address (operands
[1], SImode
, 4);
2559 else if (optype1
== CNSTOP
)
2560 split_double (operands
[1], &operands
[1], &latehalf
[1]);
2562 latehalf
[1] = operands
[1];
2564 /* If the first move would clobber the source of the second one,
2565 do them in the other order.
2567 This can happen in two cases:
2569 mem -> register where the first half of the destination register
2570 is the same register used in the memory's address. Reload
2571 can create such insns.
2573 mem in this case will be either register indirect or register
2574 indirect plus a valid offset.
2576 register -> register move where REGNO(dst) == REGNO(src + 1)
2577 someone (Tim/Tege?) claimed this can happen for parameter loads.
2579 Handle mem -> register case first. */
2580 if (optype0
== REGOP
2581 && (optype1
== MEMOP
|| optype1
== OFFSOP
)
2582 && refers_to_regno_p (REGNO (operands
[0]), REGNO (operands
[0]) + 1,
2585 /* Do the late half first. */
2587 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2588 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2592 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2593 return singlemove_string (operands
);
2596 /* Now handle register -> register case. */
2597 if (optype0
== REGOP
&& optype1
== REGOP
2598 && REGNO (operands
[0]) == REGNO (operands
[1]) + 1)
2600 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2601 return singlemove_string (operands
);
2604 /* Normal case: do the two words, low-numbered first. */
2606 output_asm_insn (singlemove_string (operands
), operands
);
2608 /* Make any unoffsettable addresses point at high-numbered word. */
2610 output_asm_insn ("ldo 4(%0),%0", &addreg0
);
2612 output_asm_insn ("ldo 4(%0),%0", &addreg1
);
2615 output_asm_insn (singlemove_string (latehalf
), latehalf
);
2617 /* Undo the adds we just did. */
2619 output_asm_insn ("ldo -4(%0),%0", &addreg0
);
2621 output_asm_insn ("ldo -4(%0),%0", &addreg1
);
2627 output_fp_move_double (rtx
*operands
)
2629 if (FP_REG_P (operands
[0]))
2631 if (FP_REG_P (operands
[1])
2632 || operands
[1] == CONST0_RTX (GET_MODE (operands
[0])))
2633 output_asm_insn ("fcpy,dbl %f1,%0", operands
);
2635 output_asm_insn ("fldd%F1 %1,%0", operands
);
2637 else if (FP_REG_P (operands
[1]))
2639 output_asm_insn ("fstd%F0 %1,%0", operands
);
2645 gcc_assert (operands
[1] == CONST0_RTX (GET_MODE (operands
[0])));
2647 /* This is a pain. You have to be prepared to deal with an
2648 arbitrary address here including pre/post increment/decrement.
2650 so avoid this in the MD. */
2651 gcc_assert (GET_CODE (operands
[0]) == REG
);
2653 xoperands
[1] = gen_rtx_REG (SImode
, REGNO (operands
[0]) + 1);
2654 xoperands
[0] = operands
[0];
2655 output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands
);
2660 /* Return a REG that occurs in ADDR with coefficient 1.
2661 ADDR can be effectively incremented by incrementing REG. */
2664 find_addr_reg (rtx addr
)
2666 while (GET_CODE (addr
) == PLUS
)
2668 if (GET_CODE (XEXP (addr
, 0)) == REG
)
2669 addr
= XEXP (addr
, 0);
2670 else if (GET_CODE (XEXP (addr
, 1)) == REG
)
2671 addr
= XEXP (addr
, 1);
2672 else if (CONSTANT_P (XEXP (addr
, 0)))
2673 addr
= XEXP (addr
, 1);
2674 else if (CONSTANT_P (XEXP (addr
, 1)))
2675 addr
= XEXP (addr
, 0);
2679 gcc_assert (GET_CODE (addr
) == REG
);
2683 /* Emit code to perform a block move.
2685 OPERANDS[0] is the destination pointer as a REG, clobbered.
2686 OPERANDS[1] is the source pointer as a REG, clobbered.
2687 OPERANDS[2] is a register for temporary storage.
2688 OPERANDS[3] is a register for temporary storage.
2689 OPERANDS[4] is the size as a CONST_INT
2690 OPERANDS[5] is the alignment safe to use, as a CONST_INT.
2691 OPERANDS[6] is another temporary register. */
2694 output_block_move (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2696 int align
= INTVAL (operands
[5]);
2697 unsigned long n_bytes
= INTVAL (operands
[4]);
2699 /* We can't move more than a word at a time because the PA
2700 has no longer integer move insns. (Could use fp mem ops?) */
2701 if (align
> (TARGET_64BIT
? 8 : 4))
2702 align
= (TARGET_64BIT
? 8 : 4);
2704 /* Note that we know each loop below will execute at least twice
2705 (else we would have open-coded the copy). */
2709 /* Pre-adjust the loop counter. */
2710 operands
[4] = GEN_INT (n_bytes
- 16);
2711 output_asm_insn ("ldi %4,%2", operands
);
2714 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2715 output_asm_insn ("ldd,ma 8(%1),%6", operands
);
2716 output_asm_insn ("std,ma %3,8(%0)", operands
);
2717 output_asm_insn ("addib,>= -16,%2,.-12", operands
);
2718 output_asm_insn ("std,ma %6,8(%0)", operands
);
2720 /* Handle the residual. There could be up to 7 bytes of
2721 residual to copy! */
2722 if (n_bytes
% 16 != 0)
2724 operands
[4] = GEN_INT (n_bytes
% 8);
2725 if (n_bytes
% 16 >= 8)
2726 output_asm_insn ("ldd,ma 8(%1),%3", operands
);
2727 if (n_bytes
% 8 != 0)
2728 output_asm_insn ("ldd 0(%1),%6", operands
);
2729 if (n_bytes
% 16 >= 8)
2730 output_asm_insn ("std,ma %3,8(%0)", operands
);
2731 if (n_bytes
% 8 != 0)
2732 output_asm_insn ("stdby,e %6,%4(%0)", operands
);
2737 /* Pre-adjust the loop counter. */
2738 operands
[4] = GEN_INT (n_bytes
- 8);
2739 output_asm_insn ("ldi %4,%2", operands
);
2742 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2743 output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands
);
2744 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2745 output_asm_insn ("addib,>= -8,%2,.-12", operands
);
2746 output_asm_insn ("{stws|stw},ma %6,4(%0)", operands
);
2748 /* Handle the residual. There could be up to 7 bytes of
2749 residual to copy! */
2750 if (n_bytes
% 8 != 0)
2752 operands
[4] = GEN_INT (n_bytes
% 4);
2753 if (n_bytes
% 8 >= 4)
2754 output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands
);
2755 if (n_bytes
% 4 != 0)
2756 output_asm_insn ("ldw 0(%1),%6", operands
);
2757 if (n_bytes
% 8 >= 4)
2758 output_asm_insn ("{stws|stw},ma %3,4(%0)", operands
);
2759 if (n_bytes
% 4 != 0)
2760 output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands
);
2765 /* Pre-adjust the loop counter. */
2766 operands
[4] = GEN_INT (n_bytes
- 4);
2767 output_asm_insn ("ldi %4,%2", operands
);
2770 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2771 output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands
);
2772 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2773 output_asm_insn ("addib,>= -4,%2,.-12", operands
);
2774 output_asm_insn ("{sths|sth},ma %6,2(%0)", operands
);
2776 /* Handle the residual. */
2777 if (n_bytes
% 4 != 0)
2779 if (n_bytes
% 4 >= 2)
2780 output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands
);
2781 if (n_bytes
% 2 != 0)
2782 output_asm_insn ("ldb 0(%1),%6", operands
);
2783 if (n_bytes
% 4 >= 2)
2784 output_asm_insn ("{sths|sth},ma %3,2(%0)", operands
);
2785 if (n_bytes
% 2 != 0)
2786 output_asm_insn ("stb %6,0(%0)", operands
);
2791 /* Pre-adjust the loop counter. */
2792 operands
[4] = GEN_INT (n_bytes
- 2);
2793 output_asm_insn ("ldi %4,%2", operands
);
2796 output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands
);
2797 output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands
);
2798 output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands
);
2799 output_asm_insn ("addib,>= -2,%2,.-12", operands
);
2800 output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands
);
2802 /* Handle the residual. */
2803 if (n_bytes
% 2 != 0)
2805 output_asm_insn ("ldb 0(%1),%3", operands
);
2806 output_asm_insn ("stb %3,0(%0)", operands
);
2815 /* Count the number of insns necessary to handle this block move.
2817 Basic structure is the same as emit_block_move, except that we
2818 count insns rather than emit them. */
2821 compute_movmem_length (rtx insn
)
2823 rtx pat
= PATTERN (insn
);
2824 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 7), 0));
2825 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 6), 0));
2826 unsigned int n_insns
= 0;
2828 /* We can't move more than four bytes at a time because the PA
2829 has no longer integer move insns. (Could use fp mem ops?) */
2830 if (align
> (TARGET_64BIT
? 8 : 4))
2831 align
= (TARGET_64BIT
? 8 : 4);
2833 /* The basic copying loop. */
2837 if (n_bytes
% (2 * align
) != 0)
2839 if ((n_bytes
% (2 * align
)) >= align
)
2842 if ((n_bytes
% align
) != 0)
2846 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2850 /* Emit code to perform a block clear.
2852 OPERANDS[0] is the destination pointer as a REG, clobbered.
2853 OPERANDS[1] is a register for temporary storage.
2854 OPERANDS[2] is the size as a CONST_INT
2855 OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
2858 output_block_clear (rtx
*operands
, int size_is_constant ATTRIBUTE_UNUSED
)
2860 int align
= INTVAL (operands
[3]);
2861 unsigned long n_bytes
= INTVAL (operands
[2]);
2863 /* We can't clear more than a word at a time because the PA
2864 has no longer integer move insns. */
2865 if (align
> (TARGET_64BIT
? 8 : 4))
2866 align
= (TARGET_64BIT
? 8 : 4);
2868 /* Note that we know each loop below will execute at least twice
2869 (else we would have open-coded the copy). */
2873 /* Pre-adjust the loop counter. */
2874 operands
[2] = GEN_INT (n_bytes
- 16);
2875 output_asm_insn ("ldi %2,%1", operands
);
2878 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2879 output_asm_insn ("addib,>= -16,%1,.-4", operands
);
2880 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2882 /* Handle the residual. There could be up to 7 bytes of
2883 residual to copy! */
2884 if (n_bytes
% 16 != 0)
2886 operands
[2] = GEN_INT (n_bytes
% 8);
2887 if (n_bytes
% 16 >= 8)
2888 output_asm_insn ("std,ma %%r0,8(%0)", operands
);
2889 if (n_bytes
% 8 != 0)
2890 output_asm_insn ("stdby,e %%r0,%2(%0)", operands
);
2895 /* Pre-adjust the loop counter. */
2896 operands
[2] = GEN_INT (n_bytes
- 8);
2897 output_asm_insn ("ldi %2,%1", operands
);
2900 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2901 output_asm_insn ("addib,>= -8,%1,.-4", operands
);
2902 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2904 /* Handle the residual. There could be up to 7 bytes of
2905 residual to copy! */
2906 if (n_bytes
% 8 != 0)
2908 operands
[2] = GEN_INT (n_bytes
% 4);
2909 if (n_bytes
% 8 >= 4)
2910 output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands
);
2911 if (n_bytes
% 4 != 0)
2912 output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands
);
2917 /* Pre-adjust the loop counter. */
2918 operands
[2] = GEN_INT (n_bytes
- 4);
2919 output_asm_insn ("ldi %2,%1", operands
);
2922 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2923 output_asm_insn ("addib,>= -4,%1,.-4", operands
);
2924 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2926 /* Handle the residual. */
2927 if (n_bytes
% 4 != 0)
2929 if (n_bytes
% 4 >= 2)
2930 output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands
);
2931 if (n_bytes
% 2 != 0)
2932 output_asm_insn ("stb %%r0,0(%0)", operands
);
2937 /* Pre-adjust the loop counter. */
2938 operands
[2] = GEN_INT (n_bytes
- 2);
2939 output_asm_insn ("ldi %2,%1", operands
);
2942 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
2943 output_asm_insn ("addib,>= -2,%1,.-4", operands
);
2944 output_asm_insn ("{stbs|stb},ma %%r0,1(%0)", operands
);
2946 /* Handle the residual. */
2947 if (n_bytes
% 2 != 0)
2948 output_asm_insn ("stb %%r0,0(%0)", operands
);
2957 /* Count the number of insns necessary to handle this block move.
2959 Basic structure is the same as emit_block_move, except that we
2960 count insns rather than emit them. */
2963 compute_clrmem_length (rtx insn
)
2965 rtx pat
= PATTERN (insn
);
2966 unsigned int align
= INTVAL (XEXP (XVECEXP (pat
, 0, 4), 0));
2967 unsigned long n_bytes
= INTVAL (XEXP (XVECEXP (pat
, 0, 3), 0));
2968 unsigned int n_insns
= 0;
2970 /* We can't clear more than a word at a time because the PA
2971 has no longer integer move insns. */
2972 if (align
> (TARGET_64BIT
? 8 : 4))
2973 align
= (TARGET_64BIT
? 8 : 4);
2975 /* The basic loop. */
2979 if (n_bytes
% (2 * align
) != 0)
2981 if ((n_bytes
% (2 * align
)) >= align
)
2984 if ((n_bytes
% align
) != 0)
2988 /* Lengths are expressed in bytes now; each insn is 4 bytes. */
2994 output_and (rtx
*operands
)
2996 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
2998 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
2999 int ls0
, ls1
, ms0
, p
, len
;
3001 for (ls0
= 0; ls0
< 32; ls0
++)
3002 if ((mask
& (1 << ls0
)) == 0)
3005 for (ls1
= ls0
; ls1
< 32; ls1
++)
3006 if ((mask
& (1 << ls1
)) != 0)
3009 for (ms0
= ls1
; ms0
< 32; ms0
++)
3010 if ((mask
& (1 << ms0
)) == 0)
3013 gcc_assert (ms0
== 32);
3021 operands
[2] = GEN_INT (len
);
3022 return "{extru|extrw,u} %1,31,%2,%0";
3026 /* We could use this `depi' for the case above as well, but `depi'
3027 requires one more register file access than an `extru'. */
3032 operands
[2] = GEN_INT (p
);
3033 operands
[3] = GEN_INT (len
);
3034 return "{depi|depwi} 0,%2,%3,%0";
3038 return "and %1,%2,%0";
3041 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3042 storing the result in operands[0]. */
3044 output_64bit_and (rtx
*operands
)
3046 if (GET_CODE (operands
[2]) == CONST_INT
&& INTVAL (operands
[2]) != 0)
3048 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3049 int ls0
, ls1
, ms0
, p
, len
;
3051 for (ls0
= 0; ls0
< HOST_BITS_PER_WIDE_INT
; ls0
++)
3052 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls0
)) == 0)
3055 for (ls1
= ls0
; ls1
< HOST_BITS_PER_WIDE_INT
; ls1
++)
3056 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ls1
)) != 0)
3059 for (ms0
= ls1
; ms0
< HOST_BITS_PER_WIDE_INT
; ms0
++)
3060 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << ms0
)) == 0)
3063 gcc_assert (ms0
== HOST_BITS_PER_WIDE_INT
);
3065 if (ls1
== HOST_BITS_PER_WIDE_INT
)
3071 operands
[2] = GEN_INT (len
);
3072 return "extrd,u %1,63,%2,%0";
3076 /* We could use this `depi' for the case above as well, but `depi'
3077 requires one more register file access than an `extru'. */
3082 operands
[2] = GEN_INT (p
);
3083 operands
[3] = GEN_INT (len
);
3084 return "depdi 0,%2,%3,%0";
3088 return "and %1,%2,%0";
3092 output_ior (rtx
*operands
)
3094 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3095 int bs0
, bs1
, p
, len
;
3097 if (INTVAL (operands
[2]) == 0)
3098 return "copy %1,%0";
3100 for (bs0
= 0; bs0
< 32; bs0
++)
3101 if ((mask
& (1 << bs0
)) != 0)
3104 for (bs1
= bs0
; bs1
< 32; bs1
++)
3105 if ((mask
& (1 << bs1
)) == 0)
3108 gcc_assert (bs1
== 32 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3113 operands
[2] = GEN_INT (p
);
3114 operands
[3] = GEN_INT (len
);
3115 return "{depi|depwi} -1,%2,%3,%0";
3118 /* Return a string to perform a bitwise-and of operands[1] with operands[2]
3119 storing the result in operands[0]. */
3121 output_64bit_ior (rtx
*operands
)
3123 unsigned HOST_WIDE_INT mask
= INTVAL (operands
[2]);
3124 int bs0
, bs1
, p
, len
;
3126 if (INTVAL (operands
[2]) == 0)
3127 return "copy %1,%0";
3129 for (bs0
= 0; bs0
< HOST_BITS_PER_WIDE_INT
; bs0
++)
3130 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs0
)) != 0)
3133 for (bs1
= bs0
; bs1
< HOST_BITS_PER_WIDE_INT
; bs1
++)
3134 if ((mask
& ((unsigned HOST_WIDE_INT
) 1 << bs1
)) == 0)
3137 gcc_assert (bs1
== HOST_BITS_PER_WIDE_INT
3138 || ((unsigned HOST_WIDE_INT
) 1 << bs1
) > mask
);
3143 operands
[2] = GEN_INT (p
);
3144 operands
[3] = GEN_INT (len
);
3145 return "depdi -1,%2,%3,%0";
3148 /* Target hook for assembling integer objects. This code handles
3149 aligned SI and DI integers specially since function references
3150 must be preceded by P%. */
3153 pa_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3155 if (size
== UNITS_PER_WORD
3157 && function_label_operand (x
, VOIDmode
))
3159 fputs (size
== 8? "\t.dword\tP%" : "\t.word\tP%", asm_out_file
);
3160 output_addr_const (asm_out_file
, x
);
3161 fputc ('\n', asm_out_file
);
3164 return default_assemble_integer (x
, size
, aligned_p
);
3167 /* Output an ascii string. */
3169 output_ascii (FILE *file
, const char *p
, int size
)
3173 unsigned char partial_output
[16]; /* Max space 4 chars can occupy. */
3175 /* The HP assembler can only take strings of 256 characters at one
3176 time. This is a limitation on input line length, *not* the
3177 length of the string. Sigh. Even worse, it seems that the
3178 restriction is in number of input characters (see \xnn &
3179 \whatever). So we have to do this very carefully. */
3181 fputs ("\t.STRING \"", file
);
3184 for (i
= 0; i
< size
; i
+= 4)
3188 for (io
= 0, co
= 0; io
< MIN (4, size
- i
); io
++)
3190 register unsigned int c
= (unsigned char) p
[i
+ io
];
3192 if (c
== '\"' || c
== '\\')
3193 partial_output
[co
++] = '\\';
3194 if (c
>= ' ' && c
< 0177)
3195 partial_output
[co
++] = c
;
3199 partial_output
[co
++] = '\\';
3200 partial_output
[co
++] = 'x';
3201 hexd
= c
/ 16 - 0 + '0';
3203 hexd
-= '9' - 'a' + 1;
3204 partial_output
[co
++] = hexd
;
3205 hexd
= c
% 16 - 0 + '0';
3207 hexd
-= '9' - 'a' + 1;
3208 partial_output
[co
++] = hexd
;
3211 if (chars_output
+ co
> 243)
3213 fputs ("\"\n\t.STRING \"", file
);
3216 fwrite (partial_output
, 1, (size_t) co
, file
);
3220 fputs ("\"\n", file
);
3223 /* Try to rewrite floating point comparisons & branches to avoid
3224 useless add,tr insns.
3226 CHECK_NOTES is nonzero if we should examine REG_DEAD notes
3227 to see if FPCC is dead. CHECK_NOTES is nonzero for the
3228 first attempt to remove useless add,tr insns. It is zero
3229 for the second pass as reorg sometimes leaves bogus REG_DEAD
3232 When CHECK_NOTES is zero we can only eliminate add,tr insns
3233 when there's a 1:1 correspondence between fcmp and ftest/fbranch
3236 remove_useless_addtr_insns (int check_notes
)
3239 static int pass
= 0;
3241 /* This is fairly cheap, so always run it when optimizing. */
3245 int fbranch_count
= 0;
3247 /* Walk all the insns in this function looking for fcmp & fbranch
3248 instructions. Keep track of how many of each we find. */
3249 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3253 /* Ignore anything that isn't an INSN or a JUMP_INSN. */
3254 if (GET_CODE (insn
) != INSN
&& GET_CODE (insn
) != JUMP_INSN
)
3257 tmp
= PATTERN (insn
);
3259 /* It must be a set. */
3260 if (GET_CODE (tmp
) != SET
)
3263 /* If the destination is CCFP, then we've found an fcmp insn. */
3264 tmp
= SET_DEST (tmp
);
3265 if (GET_CODE (tmp
) == REG
&& REGNO (tmp
) == 0)
3271 tmp
= PATTERN (insn
);
3272 /* If this is an fbranch instruction, bump the fbranch counter. */
3273 if (GET_CODE (tmp
) == SET
3274 && SET_DEST (tmp
) == pc_rtx
3275 && GET_CODE (SET_SRC (tmp
)) == IF_THEN_ELSE
3276 && GET_CODE (XEXP (SET_SRC (tmp
), 0)) == NE
3277 && GET_CODE (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == REG
3278 && REGNO (XEXP (XEXP (SET_SRC (tmp
), 0), 0)) == 0)
3286 /* Find all floating point compare + branch insns. If possible,
3287 reverse the comparison & the branch to avoid add,tr insns. */
3288 for (insn
= get_insns (); insn
; insn
= next_insn (insn
))
3292 /* Ignore anything that isn't an INSN. */
3293 if (GET_CODE (insn
) != INSN
)
3296 tmp
= PATTERN (insn
);
3298 /* It must be a set. */
3299 if (GET_CODE (tmp
) != SET
)
3302 /* The destination must be CCFP, which is register zero. */
3303 tmp
= SET_DEST (tmp
);
3304 if (GET_CODE (tmp
) != REG
|| REGNO (tmp
) != 0)
3307 /* INSN should be a set of CCFP.
3309 See if the result of this insn is used in a reversed FP
3310 conditional branch. If so, reverse our condition and
3311 the branch. Doing so avoids useless add,tr insns. */
3312 next
= next_insn (insn
);
3315 /* Jumps, calls and labels stop our search. */
3316 if (GET_CODE (next
) == JUMP_INSN
3317 || GET_CODE (next
) == CALL_INSN
3318 || GET_CODE (next
) == CODE_LABEL
)
3321 /* As does another fcmp insn. */
3322 if (GET_CODE (next
) == INSN
3323 && GET_CODE (PATTERN (next
)) == SET
3324 && GET_CODE (SET_DEST (PATTERN (next
))) == REG
3325 && REGNO (SET_DEST (PATTERN (next
))) == 0)
3328 next
= next_insn (next
);
3331 /* Is NEXT_INSN a branch? */
3333 && GET_CODE (next
) == JUMP_INSN
)
3335 rtx pattern
= PATTERN (next
);
3337 /* If it a reversed fp conditional branch (e.g. uses add,tr)
3338 and CCFP dies, then reverse our conditional and the branch
3339 to avoid the add,tr. */
3340 if (GET_CODE (pattern
) == SET
3341 && SET_DEST (pattern
) == pc_rtx
3342 && GET_CODE (SET_SRC (pattern
)) == IF_THEN_ELSE
3343 && GET_CODE (XEXP (SET_SRC (pattern
), 0)) == NE
3344 && GET_CODE (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == REG
3345 && REGNO (XEXP (XEXP (SET_SRC (pattern
), 0), 0)) == 0
3346 && GET_CODE (XEXP (SET_SRC (pattern
), 1)) == PC
3347 && (fcmp_count
== fbranch_count
3349 && find_regno_note (next
, REG_DEAD
, 0))))
3351 /* Reverse the branch. */
3352 tmp
= XEXP (SET_SRC (pattern
), 1);
3353 XEXP (SET_SRC (pattern
), 1) = XEXP (SET_SRC (pattern
), 2);
3354 XEXP (SET_SRC (pattern
), 2) = tmp
;
3355 INSN_CODE (next
) = -1;
3357 /* Reverse our condition. */
3358 tmp
= PATTERN (insn
);
3359 PUT_CODE (XEXP (tmp
, 1),
3360 (reverse_condition_maybe_unordered
3361 (GET_CODE (XEXP (tmp
, 1)))));
3371 /* You may have trouble believing this, but this is the 32 bit HP-PA
3376 Variable arguments (optional; any number may be allocated)
3378 SP-(4*(N+9)) arg word N
3383 Fixed arguments (must be allocated; may remain unused)
3392 SP-32 External Data Pointer (DP)
3394 SP-24 External/stub RP (RP')
3398 SP-8 Calling Stub RP (RP'')
3403 SP-0 Stack Pointer (points to next available address)
3407 /* This function saves registers as follows. Registers marked with ' are
3408 this function's registers (as opposed to the previous function's).
3409 If a frame_pointer isn't needed, r4 is saved as a general register;
3410 the space for the frame pointer is still allocated, though, to keep
3416 SP (FP') Previous FP
3417 SP + 4 Alignment filler (sigh)
3418 SP + 8 Space for locals reserved here.
3422 SP + n All call saved register used.
3426 SP + o All call saved fp registers used.
3430 SP + p (SP') points to next available address.
3434 /* Global variables set by output_function_prologue(). */
3435 /* Size of frame. Need to know this to emit return insns from
3437 static HOST_WIDE_INT actual_fsize
, local_fsize
;
3438 static int save_fregs
;
3440 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
3441 Handle case where DISP > 8k by using the add_high_const patterns.
3443 Note in DISP > 8k case, we will leave the high part of the address
3444 in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
3447 store_reg (int reg
, HOST_WIDE_INT disp
, int base
)
3449 rtx insn
, dest
, src
, basereg
;
3451 src
= gen_rtx_REG (word_mode
, reg
);
3452 basereg
= gen_rtx_REG (Pmode
, base
);
3453 if (VAL_14_BITS_P (disp
))
3455 dest
= gen_rtx_MEM (word_mode
, plus_constant (basereg
, disp
));
3456 insn
= emit_move_insn (dest
, src
);
3458 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3460 rtx delta
= GEN_INT (disp
);
3461 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3463 emit_move_insn (tmpreg
, delta
);
3464 insn
= emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3467 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3468 gen_rtx_SET (VOIDmode
, tmpreg
,
3469 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3470 RTX_FRAME_RELATED_P (insn
) = 1;
3472 dest
= gen_rtx_MEM (word_mode
, tmpreg
);
3473 insn
= emit_move_insn (dest
, src
);
3477 rtx delta
= GEN_INT (disp
);
3478 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
3479 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3481 emit_move_insn (tmpreg
, high
);
3482 dest
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3483 insn
= emit_move_insn (dest
, src
);
3485 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3486 gen_rtx_SET (VOIDmode
,
3487 gen_rtx_MEM (word_mode
,
3488 gen_rtx_PLUS (word_mode
,
3495 RTX_FRAME_RELATED_P (insn
) = 1;
3498 /* Emit RTL to store REG at the memory location specified by BASE and then
3499 add MOD to BASE. MOD must be <= 8k. */
3502 store_reg_modify (int base
, int reg
, HOST_WIDE_INT mod
)
3504 rtx insn
, basereg
, srcreg
, delta
;
3506 gcc_assert (VAL_14_BITS_P (mod
));
3508 basereg
= gen_rtx_REG (Pmode
, base
);
3509 srcreg
= gen_rtx_REG (word_mode
, reg
);
3510 delta
= GEN_INT (mod
);
3512 insn
= emit_insn (gen_post_store (basereg
, srcreg
, delta
));
3515 RTX_FRAME_RELATED_P (insn
) = 1;
3517 /* RTX_FRAME_RELATED_P must be set on each frame related set
3518 in a parallel with more than one element. */
3519 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 0)) = 1;
3520 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
3524 /* Emit RTL to set REG to the value specified by BASE+DISP. Handle case
3525 where DISP > 8k by using the add_high_const patterns. NOTE indicates
3526 whether to add a frame note or not.
3528 In the DISP > 8k case, we leave the high part of the address in %r1.
3529 There is code in expand_hppa_{prologue,epilogue} that knows about this. */
3532 set_reg_plus_d (int reg
, int base
, HOST_WIDE_INT disp
, int note
)
3536 if (VAL_14_BITS_P (disp
))
3538 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3539 plus_constant (gen_rtx_REG (Pmode
, base
), disp
));
3541 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
3543 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3544 rtx delta
= GEN_INT (disp
);
3545 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3547 emit_move_insn (tmpreg
, delta
);
3548 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3549 gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
3551 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3552 gen_rtx_SET (VOIDmode
, tmpreg
,
3553 gen_rtx_PLUS (Pmode
, basereg
, delta
)));
3557 rtx basereg
= gen_rtx_REG (Pmode
, base
);
3558 rtx delta
= GEN_INT (disp
);
3559 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
3561 emit_move_insn (tmpreg
,
3562 gen_rtx_PLUS (Pmode
, basereg
,
3563 gen_rtx_HIGH (Pmode
, delta
)));
3564 insn
= emit_move_insn (gen_rtx_REG (Pmode
, reg
),
3565 gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
3568 if (DO_FRAME_NOTES
&& note
)
3569 RTX_FRAME_RELATED_P (insn
) = 1;
3573 compute_frame_size (HOST_WIDE_INT size
, int *fregs_live
)
3578 /* The code in hppa_expand_prologue and hppa_expand_epilogue must
3579 be consistent with the rounding and size calculation done here.
3580 Change them at the same time. */
3582 /* We do our own stack alignment. First, round the size of the
3583 stack locals up to a word boundary. */
3584 size
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3586 /* Space for previous frame pointer + filler. If any frame is
3587 allocated, we need to add in the STARTING_FRAME_OFFSET. We
3588 waste some space here for the sake of HP compatibility. The
3589 first slot is only used when the frame pointer is needed. */
3590 if (size
|| frame_pointer_needed
)
3591 size
+= STARTING_FRAME_OFFSET
;
3593 /* If the current function calls __builtin_eh_return, then we need
3594 to allocate stack space for registers that will hold data for
3595 the exception handler. */
3596 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3600 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; ++i
)
3602 size
+= i
* UNITS_PER_WORD
;
3605 /* Account for space used by the callee general register saves. */
3606 for (i
= 18, j
= frame_pointer_needed
? 4 : 3; i
>= j
; i
--)
3607 if (df_regs_ever_live_p (i
))
3608 size
+= UNITS_PER_WORD
;
3610 /* Account for space used by the callee floating point register saves. */
3611 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3612 if (df_regs_ever_live_p (i
)
3613 || (!TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3617 /* We always save both halves of the FP register, so always
3618 increment the frame size by 8 bytes. */
3622 /* If any of the floating registers are saved, account for the
3623 alignment needed for the floating point register save block. */
3626 size
= (size
+ 7) & ~7;
3631 /* The various ABIs include space for the outgoing parameters in the
3632 size of the current function's stack frame. We don't need to align
3633 for the outgoing arguments as their alignment is set by the final
3634 rounding for the frame as a whole. */
3635 size
+= crtl
->outgoing_args_size
;
3637 /* Allocate space for the fixed frame marker. This space must be
3638 allocated for any function that makes calls or allocates
3640 if (!current_function_is_leaf
|| size
)
3641 size
+= TARGET_64BIT
? 48 : 32;
3643 /* Finally, round to the preferred stack boundary. */
3644 return ((size
+ PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
3645 & ~(PREFERRED_STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
3648 /* Generate the assembly code for function entry. FILE is a stdio
3649 stream to output the code to. SIZE is an int: how many units of
3650 temporary storage to allocate.
3652 Refer to the array `regs_ever_live' to determine which registers to
3653 save; `regs_ever_live[I]' is nonzero if register number I is ever
3654 used in the function. This function is responsible for knowing
3655 which registers should not be saved even if used. */
3657 /* On HP-PA, move-double insns between fpu and cpu need an 8-byte block
3658 of memory. If any fpu reg is used in the function, we allocate
3659 such a block here, at the bottom of the frame, just in case it's needed.
3661 If this function is a leaf procedure, then we may choose not
3662 to do a "save" insn. The decision about whether or not
3663 to do this is made in regclass.c. */
3666 pa_output_function_prologue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
3668 /* The function's label and associated .PROC must never be
3669 separated and must be output *after* any profiling declarations
3670 to avoid changing spaces/subspaces within a procedure. */
3671 ASM_OUTPUT_LABEL (file
, XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0));
3672 fputs ("\t.PROC\n", file
);
3674 /* hppa_expand_prologue does the dirty work now. We just need
3675 to output the assembler directives which denote the start
3677 fprintf (file
, "\t.CALLINFO FRAME=" HOST_WIDE_INT_PRINT_DEC
, actual_fsize
);
3678 if (current_function_is_leaf
)
3679 fputs (",NO_CALLS", file
);
3681 fputs (",CALLS", file
);
3683 fputs (",SAVE_RP", file
);
3685 /* The SAVE_SP flag is used to indicate that register %r3 is stored
3686 at the beginning of the frame and that it is used as the frame
3687 pointer for the frame. We do this because our current frame
3688 layout doesn't conform to that specified in the HP runtime
3689 documentation and we need a way to indicate to programs such as
3690 GDB where %r3 is saved. The SAVE_SP flag was chosen because it
3691 isn't used by HP compilers but is supported by the assembler.
3692 However, SAVE_SP is supposed to indicate that the previous stack
3693 pointer has been saved in the frame marker. */
3694 if (frame_pointer_needed
)
3695 fputs (",SAVE_SP", file
);
3697 /* Pass on information about the number of callee register saves
3698 performed in the prologue.
3700 The compiler is supposed to pass the highest register number
3701 saved, the assembler then has to adjust that number before
3702 entering it into the unwind descriptor (to account for any
3703 caller saved registers with lower register numbers than the
3704 first callee saved register). */
3706 fprintf (file
, ",ENTRY_GR=%d", gr_saved
+ 2);
3709 fprintf (file
, ",ENTRY_FR=%d", fr_saved
+ 11);
3711 fputs ("\n\t.ENTRY\n", file
);
3713 remove_useless_addtr_insns (0);
3717 hppa_expand_prologue (void)
3719 int merge_sp_adjust_with_store
= 0;
3720 HOST_WIDE_INT size
= get_frame_size ();
3721 HOST_WIDE_INT offset
;
3729 /* Compute total size for frame pointer, filler, locals and rounding to
3730 the next word boundary. Similar code appears in compute_frame_size
3731 and must be changed in tandem with this code. */
3732 local_fsize
= (size
+ UNITS_PER_WORD
- 1) & ~(UNITS_PER_WORD
- 1);
3733 if (local_fsize
|| frame_pointer_needed
)
3734 local_fsize
+= STARTING_FRAME_OFFSET
;
3736 actual_fsize
= compute_frame_size (size
, &save_fregs
);
3738 /* Compute a few things we will use often. */
3739 tmpreg
= gen_rtx_REG (word_mode
, 1);
3741 /* Save RP first. The calling conventions manual states RP will
3742 always be stored into the caller's frame at sp - 20 or sp - 16
3743 depending on which ABI is in use. */
3744 if (df_regs_ever_live_p (2) || crtl
->calls_eh_return
)
3746 store_reg (2, TARGET_64BIT
? -16 : -20, STACK_POINTER_REGNUM
);
3752 /* Allocate the local frame and set up the frame pointer if needed. */
3753 if (actual_fsize
!= 0)
3755 if (frame_pointer_needed
)
3757 /* Copy the old frame pointer temporarily into %r1. Set up the
3758 new stack pointer, then store away the saved old frame pointer
3759 into the stack at sp and at the same time update the stack
3760 pointer by actual_fsize bytes. Two versions, first
3761 handles small (<8k) frames. The second handles large (>=8k)
3763 insn
= emit_move_insn (tmpreg
, frame_pointer_rtx
);
3765 RTX_FRAME_RELATED_P (insn
) = 1;
3767 insn
= emit_move_insn (frame_pointer_rtx
, stack_pointer_rtx
);
3769 RTX_FRAME_RELATED_P (insn
) = 1;
3771 if (VAL_14_BITS_P (actual_fsize
))
3772 store_reg_modify (STACK_POINTER_REGNUM
, 1, actual_fsize
);
3775 /* It is incorrect to store the saved frame pointer at *sp,
3776 then increment sp (writes beyond the current stack boundary).
3778 So instead use stwm to store at *sp and post-increment the
3779 stack pointer as an atomic operation. Then increment sp to
3780 finish allocating the new frame. */
3781 HOST_WIDE_INT adjust1
= 8192 - 64;
3782 HOST_WIDE_INT adjust2
= actual_fsize
- adjust1
;
3784 store_reg_modify (STACK_POINTER_REGNUM
, 1, adjust1
);
3785 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3789 /* We set SAVE_SP in frames that need a frame pointer. Thus,
3790 we need to store the previous stack pointer (frame pointer)
3791 into the frame marker on targets that use the HP unwind
3792 library. This allows the HP unwind library to be used to
3793 unwind GCC frames. However, we are not fully compatible
3794 with the HP library because our frame layout differs from
3795 that specified in the HP runtime specification.
3797 We don't want a frame note on this instruction as the frame
3798 marker moves during dynamic stack allocation.
3800 This instruction also serves as a blockage to prevent
3801 register spills from being scheduled before the stack
3802 pointer is raised. This is necessary as we store
3803 registers using the frame pointer as a base register,
3804 and the frame pointer is set before sp is raised. */
3805 if (TARGET_HPUX_UNWIND_LIBRARY
)
3807 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
,
3808 GEN_INT (TARGET_64BIT
? -8 : -4));
3810 emit_move_insn (gen_rtx_MEM (word_mode
, addr
),
3814 emit_insn (gen_blockage ());
3816 /* no frame pointer needed. */
3819 /* In some cases we can perform the first callee register save
3820 and allocating the stack frame at the same time. If so, just
3821 make a note of it and defer allocating the frame until saving
3822 the callee registers. */
3823 if (VAL_14_BITS_P (actual_fsize
) && local_fsize
== 0)
3824 merge_sp_adjust_with_store
= 1;
3825 /* Can not optimize. Adjust the stack frame by actual_fsize
3828 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3833 /* Normal register save.
3835 Do not save the frame pointer in the frame_pointer_needed case. It
3836 was done earlier. */
3837 if (frame_pointer_needed
)
3839 offset
= local_fsize
;
3841 /* Saving the EH return data registers in the frame is the simplest
3842 way to get the frame unwind information emitted. We put them
3843 just before the general registers. */
3844 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3846 unsigned int i
, regno
;
3850 regno
= EH_RETURN_DATA_REGNO (i
);
3851 if (regno
== INVALID_REGNUM
)
3854 store_reg (regno
, offset
, FRAME_POINTER_REGNUM
);
3855 offset
+= UNITS_PER_WORD
;
3859 for (i
= 18; i
>= 4; i
--)
3860 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3862 store_reg (i
, offset
, FRAME_POINTER_REGNUM
);
3863 offset
+= UNITS_PER_WORD
;
3866 /* Account for %r3 which is saved in a special place. */
3869 /* No frame pointer needed. */
3872 offset
= local_fsize
- actual_fsize
;
3874 /* Saving the EH return data registers in the frame is the simplest
3875 way to get the frame unwind information emitted. */
3876 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
3878 unsigned int i
, regno
;
3882 regno
= EH_RETURN_DATA_REGNO (i
);
3883 if (regno
== INVALID_REGNUM
)
3886 /* If merge_sp_adjust_with_store is nonzero, then we can
3887 optimize the first save. */
3888 if (merge_sp_adjust_with_store
)
3890 store_reg_modify (STACK_POINTER_REGNUM
, regno
, -offset
);
3891 merge_sp_adjust_with_store
= 0;
3894 store_reg (regno
, offset
, STACK_POINTER_REGNUM
);
3895 offset
+= UNITS_PER_WORD
;
3899 for (i
= 18; i
>= 3; i
--)
3900 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
3902 /* If merge_sp_adjust_with_store is nonzero, then we can
3903 optimize the first GR save. */
3904 if (merge_sp_adjust_with_store
)
3906 store_reg_modify (STACK_POINTER_REGNUM
, i
, -offset
);
3907 merge_sp_adjust_with_store
= 0;
3910 store_reg (i
, offset
, STACK_POINTER_REGNUM
);
3911 offset
+= UNITS_PER_WORD
;
3915 /* If we wanted to merge the SP adjustment with a GR save, but we never
3916 did any GR saves, then just emit the adjustment here. */
3917 if (merge_sp_adjust_with_store
)
3918 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
3922 /* The hppa calling conventions say that %r19, the pic offset
3923 register, is saved at sp - 32 (in this function's frame)
3924 when generating PIC code. FIXME: What is the correct thing
3925 to do for functions which make no calls and allocate no
3926 frame? Do we need to allocate a frame, or can we just omit
3927 the save? For now we'll just omit the save.
3929 We don't want a note on this insn as the frame marker can
3930 move if there is a dynamic stack allocation. */
3931 if (flag_pic
&& actual_fsize
!= 0 && !TARGET_64BIT
)
3933 rtx addr
= gen_rtx_PLUS (word_mode
, stack_pointer_rtx
, GEN_INT (-32));
3935 emit_move_insn (gen_rtx_MEM (word_mode
, addr
), pic_offset_table_rtx
);
3939 /* Align pointer properly (doubleword boundary). */
3940 offset
= (offset
+ 7) & ~7;
3942 /* Floating point register store. */
3947 /* First get the frame or stack pointer to the start of the FP register
3949 if (frame_pointer_needed
)
3951 set_reg_plus_d (1, FRAME_POINTER_REGNUM
, offset
, 0);
3952 base
= frame_pointer_rtx
;
3956 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
3957 base
= stack_pointer_rtx
;
3960 /* Now actually save the FP registers. */
3961 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
3963 if (df_regs_ever_live_p (i
)
3964 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
3966 rtx addr
, insn
, reg
;
3967 addr
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
3968 reg
= gen_rtx_REG (DFmode
, i
);
3969 insn
= emit_move_insn (addr
, reg
);
3972 RTX_FRAME_RELATED_P (insn
) = 1;
3975 rtx mem
= gen_rtx_MEM (DFmode
,
3976 plus_constant (base
, offset
));
3977 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3978 gen_rtx_SET (VOIDmode
, mem
, reg
));
3982 rtx meml
= gen_rtx_MEM (SFmode
,
3983 plus_constant (base
, offset
));
3984 rtx memr
= gen_rtx_MEM (SFmode
,
3985 plus_constant (base
, offset
+ 4));
3986 rtx regl
= gen_rtx_REG (SFmode
, i
);
3987 rtx regr
= gen_rtx_REG (SFmode
, i
+ 1);
3988 rtx setl
= gen_rtx_SET (VOIDmode
, meml
, regl
);
3989 rtx setr
= gen_rtx_SET (VOIDmode
, memr
, regr
);
3992 RTX_FRAME_RELATED_P (setl
) = 1;
3993 RTX_FRAME_RELATED_P (setr
) = 1;
3994 vec
= gen_rtvec (2, setl
, setr
);
3995 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
3996 gen_rtx_SEQUENCE (VOIDmode
, vec
));
3999 offset
+= GET_MODE_SIZE (DFmode
);
4006 /* Emit RTL to load REG from the memory location specified by BASE+DISP.
4007 Handle case where DISP > 8k by using the add_high_const patterns. */
4010 load_reg (int reg
, HOST_WIDE_INT disp
, int base
)
4012 rtx dest
= gen_rtx_REG (word_mode
, reg
);
4013 rtx basereg
= gen_rtx_REG (Pmode
, base
);
4016 if (VAL_14_BITS_P (disp
))
4017 src
= gen_rtx_MEM (word_mode
, plus_constant (basereg
, disp
));
4018 else if (TARGET_64BIT
&& !VAL_32_BITS_P (disp
))
4020 rtx delta
= GEN_INT (disp
);
4021 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4023 emit_move_insn (tmpreg
, delta
);
4024 if (TARGET_DISABLE_INDEXING
)
4026 emit_move_insn (tmpreg
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4027 src
= gen_rtx_MEM (word_mode
, tmpreg
);
4030 src
= gen_rtx_MEM (word_mode
, gen_rtx_PLUS (Pmode
, tmpreg
, basereg
));
4034 rtx delta
= GEN_INT (disp
);
4035 rtx high
= gen_rtx_PLUS (Pmode
, basereg
, gen_rtx_HIGH (Pmode
, delta
));
4036 rtx tmpreg
= gen_rtx_REG (Pmode
, 1);
4038 emit_move_insn (tmpreg
, high
);
4039 src
= gen_rtx_MEM (word_mode
, gen_rtx_LO_SUM (Pmode
, tmpreg
, delta
));
4042 emit_move_insn (dest
, src
);
4045 /* Update the total code bytes output to the text section. */
4048 update_total_code_bytes (unsigned int nbytes
)
4050 if ((TARGET_PORTABLE_RUNTIME
|| !TARGET_GAS
|| !TARGET_SOM
)
4051 && !IN_NAMED_SECTION_P (cfun
->decl
))
4053 unsigned int old_total
= total_code_bytes
;
4055 total_code_bytes
+= nbytes
;
4057 /* Be prepared to handle overflows. */
4058 if (old_total
> total_code_bytes
)
4059 total_code_bytes
= UINT_MAX
;
4063 /* This function generates the assembly code for function exit.
4064 Args are as for output_function_prologue ().
4066 The function epilogue should not depend on the current stack
4067 pointer! It should use the frame pointer only. This is mandatory
4068 because of alloca; we also take advantage of it to omit stack
4069 adjustments before returning. */
4072 pa_output_function_epilogue (FILE *file
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
4074 rtx insn
= get_last_insn ();
4078 /* hppa_expand_epilogue does the dirty work now. We just need
4079 to output the assembler directives which denote the end
4082 To make debuggers happy, emit a nop if the epilogue was completely
4083 eliminated due to a volatile call as the last insn in the
4084 current function. That way the return address (in %r2) will
4085 always point to a valid instruction in the current function. */
4087 /* Get the last real insn. */
4088 if (GET_CODE (insn
) == NOTE
)
4089 insn
= prev_real_insn (insn
);
4091 /* If it is a sequence, then look inside. */
4092 if (insn
&& GET_CODE (insn
) == INSN
&& GET_CODE (PATTERN (insn
)) == SEQUENCE
)
4093 insn
= XVECEXP (PATTERN (insn
), 0, 0);
4095 /* If insn is a CALL_INSN, then it must be a call to a volatile
4096 function (otherwise there would be epilogue insns). */
4097 if (insn
&& GET_CODE (insn
) == CALL_INSN
)
4099 fputs ("\tnop\n", file
);
4103 fputs ("\t.EXIT\n\t.PROCEND\n", file
);
4105 if (TARGET_SOM
&& TARGET_GAS
)
4107 /* We done with this subspace except possibly for some additional
4108 debug information. Forget that we are in this subspace to ensure
4109 that the next function is output in its own subspace. */
4111 cfun
->machine
->in_nsubspa
= 2;
4114 if (INSN_ADDRESSES_SET_P ())
4116 insn
= get_last_nonnote_insn ();
4117 last_address
+= INSN_ADDRESSES (INSN_UID (insn
));
4119 last_address
+= insn_default_length (insn
);
4120 last_address
= ((last_address
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
4121 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
4124 last_address
= UINT_MAX
;
4126 /* Finally, update the total number of code bytes output so far. */
4127 update_total_code_bytes (last_address
);
4131 hppa_expand_epilogue (void)
4134 HOST_WIDE_INT offset
;
4135 HOST_WIDE_INT ret_off
= 0;
4137 int merge_sp_adjust_with_load
= 0;
4139 /* We will use this often. */
4140 tmpreg
= gen_rtx_REG (word_mode
, 1);
4142 /* Try to restore RP early to avoid load/use interlocks when
4143 RP gets used in the return (bv) instruction. This appears to still
4144 be necessary even when we schedule the prologue and epilogue. */
4147 ret_off
= TARGET_64BIT
? -16 : -20;
4148 if (frame_pointer_needed
)
4150 load_reg (2, ret_off
, FRAME_POINTER_REGNUM
);
4155 /* No frame pointer, and stack is smaller than 8k. */
4156 if (VAL_14_BITS_P (ret_off
- actual_fsize
))
4158 load_reg (2, ret_off
- actual_fsize
, STACK_POINTER_REGNUM
);
4164 /* General register restores. */
4165 if (frame_pointer_needed
)
4167 offset
= local_fsize
;
4169 /* If the current function calls __builtin_eh_return, then we need
4170 to restore the saved EH data registers. */
4171 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4173 unsigned int i
, regno
;
4177 regno
= EH_RETURN_DATA_REGNO (i
);
4178 if (regno
== INVALID_REGNUM
)
4181 load_reg (regno
, offset
, FRAME_POINTER_REGNUM
);
4182 offset
+= UNITS_PER_WORD
;
4186 for (i
= 18; i
>= 4; i
--)
4187 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4189 load_reg (i
, offset
, FRAME_POINTER_REGNUM
);
4190 offset
+= UNITS_PER_WORD
;
4195 offset
= local_fsize
- actual_fsize
;
4197 /* If the current function calls __builtin_eh_return, then we need
4198 to restore the saved EH data registers. */
4199 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4201 unsigned int i
, regno
;
4205 regno
= EH_RETURN_DATA_REGNO (i
);
4206 if (regno
== INVALID_REGNUM
)
4209 /* Only for the first load.
4210 merge_sp_adjust_with_load holds the register load
4211 with which we will merge the sp adjustment. */
4212 if (merge_sp_adjust_with_load
== 0
4214 && VAL_14_BITS_P (-actual_fsize
))
4215 merge_sp_adjust_with_load
= regno
;
4217 load_reg (regno
, offset
, STACK_POINTER_REGNUM
);
4218 offset
+= UNITS_PER_WORD
;
4222 for (i
= 18; i
>= 3; i
--)
4224 if (df_regs_ever_live_p (i
) && ! call_used_regs
[i
])
4226 /* Only for the first load.
4227 merge_sp_adjust_with_load holds the register load
4228 with which we will merge the sp adjustment. */
4229 if (merge_sp_adjust_with_load
== 0
4231 && VAL_14_BITS_P (-actual_fsize
))
4232 merge_sp_adjust_with_load
= i
;
4234 load_reg (i
, offset
, STACK_POINTER_REGNUM
);
4235 offset
+= UNITS_PER_WORD
;
4240 /* Align pointer properly (doubleword boundary). */
4241 offset
= (offset
+ 7) & ~7;
4243 /* FP register restores. */
4246 /* Adjust the register to index off of. */
4247 if (frame_pointer_needed
)
4248 set_reg_plus_d (1, FRAME_POINTER_REGNUM
, offset
, 0);
4250 set_reg_plus_d (1, STACK_POINTER_REGNUM
, offset
, 0);
4252 /* Actually do the restores now. */
4253 for (i
= FP_SAVED_REG_LAST
; i
>= FP_SAVED_REG_FIRST
; i
-= FP_REG_STEP
)
4254 if (df_regs_ever_live_p (i
)
4255 || (! TARGET_64BIT
&& df_regs_ever_live_p (i
+ 1)))
4257 rtx src
= gen_rtx_MEM (DFmode
, gen_rtx_POST_INC (DFmode
, tmpreg
));
4258 rtx dest
= gen_rtx_REG (DFmode
, i
);
4259 emit_move_insn (dest
, src
);
4263 /* Emit a blockage insn here to keep these insns from being moved to
4264 an earlier spot in the epilogue, or into the main instruction stream.
4266 This is necessary as we must not cut the stack back before all the
4267 restores are finished. */
4268 emit_insn (gen_blockage ());
4270 /* Reset stack pointer (and possibly frame pointer). The stack
4271 pointer is initially set to fp + 64 to avoid a race condition. */
4272 if (frame_pointer_needed
)
4274 rtx delta
= GEN_INT (-64);
4276 set_reg_plus_d (STACK_POINTER_REGNUM
, FRAME_POINTER_REGNUM
, 64, 0);
4277 emit_insn (gen_pre_load (frame_pointer_rtx
, stack_pointer_rtx
, delta
));
4279 /* If we were deferring a callee register restore, do it now. */
4280 else if (merge_sp_adjust_with_load
)
4282 rtx delta
= GEN_INT (-actual_fsize
);
4283 rtx dest
= gen_rtx_REG (word_mode
, merge_sp_adjust_with_load
);
4285 emit_insn (gen_pre_load (dest
, stack_pointer_rtx
, delta
));
4287 else if (actual_fsize
!= 0)
4288 set_reg_plus_d (STACK_POINTER_REGNUM
, STACK_POINTER_REGNUM
,
4291 /* If we haven't restored %r2 yet (no frame pointer, and a stack
4292 frame greater than 8k), do so now. */
4294 load_reg (2, ret_off
, STACK_POINTER_REGNUM
);
4296 if (DO_FRAME_NOTES
&& crtl
->calls_eh_return
)
4298 rtx sa
= EH_RETURN_STACKADJ_RTX
;
4300 emit_insn (gen_blockage ());
4301 emit_insn (TARGET_64BIT
4302 ? gen_subdi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
)
4303 : gen_subsi3 (stack_pointer_rtx
, stack_pointer_rtx
, sa
));
4308 hppa_pic_save_rtx (void)
4310 return get_hard_reg_initial_val (word_mode
, PIC_OFFSET_TABLE_REGNUM
);
4313 #ifndef NO_DEFERRED_PROFILE_COUNTERS
4314 #define NO_DEFERRED_PROFILE_COUNTERS 0
4318 /* Vector of funcdef numbers. */
4319 static VEC(int,heap
) *funcdef_nos
;
4321 /* Output deferred profile counters. */
4323 output_deferred_profile_counters (void)
4328 if (VEC_empty (int, funcdef_nos
))
4331 switch_to_section (data_section
);
4332 align
= MIN (BIGGEST_ALIGNMENT
, LONG_TYPE_SIZE
);
4333 ASM_OUTPUT_ALIGN (asm_out_file
, floor_log2 (align
/ BITS_PER_UNIT
));
4335 for (i
= 0; VEC_iterate (int, funcdef_nos
, i
, n
); i
++)
4337 targetm
.asm_out
.internal_label (asm_out_file
, "LP", n
);
4338 assemble_integer (const0_rtx
, LONG_TYPE_SIZE
/ BITS_PER_UNIT
, align
, 1);
4341 VEC_free (int, heap
, funcdef_nos
);
4345 hppa_profile_hook (int label_no
)
4347 /* We use SImode for the address of the function in both 32 and
4348 64-bit code to avoid having to provide DImode versions of the
4349 lcla2 and load_offset_label_address insn patterns. */
4350 rtx reg
= gen_reg_rtx (SImode
);
4351 rtx label_rtx
= gen_label_rtx ();
4352 rtx begin_label_rtx
, call_insn
;
4353 char begin_label_name
[16];
4355 ASM_GENERATE_INTERNAL_LABEL (begin_label_name
, FUNC_BEGIN_PROLOG_LABEL
,
4357 begin_label_rtx
= gen_rtx_SYMBOL_REF (SImode
, ggc_strdup (begin_label_name
));
4360 emit_move_insn (arg_pointer_rtx
,
4361 gen_rtx_PLUS (word_mode
, virtual_outgoing_args_rtx
,
4364 emit_move_insn (gen_rtx_REG (word_mode
, 26), gen_rtx_REG (word_mode
, 2));
4366 /* The address of the function is loaded into %r25 with an instruction-
4367 relative sequence that avoids the use of relocations. The sequence
4368 is split so that the load_offset_label_address instruction can
4369 occupy the delay slot of the call to _mcount. */
4371 emit_insn (gen_lcla2 (reg
, label_rtx
));
4373 emit_insn (gen_lcla1 (reg
, label_rtx
));
4375 emit_insn (gen_load_offset_label_address (gen_rtx_REG (SImode
, 25),
4376 reg
, begin_label_rtx
, label_rtx
));
4378 #if !NO_DEFERRED_PROFILE_COUNTERS
4380 rtx count_label_rtx
, addr
, r24
;
4381 char count_label_name
[16];
4383 VEC_safe_push (int, heap
, funcdef_nos
, label_no
);
4384 ASM_GENERATE_INTERNAL_LABEL (count_label_name
, "LP", label_no
);
4385 count_label_rtx
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (count_label_name
));
4387 addr
= force_reg (Pmode
, count_label_rtx
);
4388 r24
= gen_rtx_REG (Pmode
, 24);
4389 emit_move_insn (r24
, addr
);
4392 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4393 gen_rtx_SYMBOL_REF (Pmode
,
4395 GEN_INT (TARGET_64BIT
? 24 : 12)));
4397 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), r24
);
4402 emit_call_insn (gen_call (gen_rtx_MEM (Pmode
,
4403 gen_rtx_SYMBOL_REF (Pmode
,
4405 GEN_INT (TARGET_64BIT
? 16 : 8)));
4409 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 25));
4410 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn
), gen_rtx_REG (SImode
, 26));
4412 /* Indicate the _mcount call cannot throw, nor will it execute a
4414 make_reg_eh_region_note_nothrow_nononlocal (call_insn
);
4417 /* Fetch the return address for the frame COUNT steps up from
4418 the current frame, after the prologue. FRAMEADDR is the
4419 frame pointer of the COUNT frame.
4421 We want to ignore any export stub remnants here. To handle this,
4422 we examine the code at the return address, and if it is an export
4423 stub, we return a memory rtx for the stub return address stored
4426 The value returned is used in two different ways:
4428 1. To find a function's caller.
4430 2. To change the return address for a function.
4432 This function handles most instances of case 1; however, it will
4433 fail if there are two levels of stubs to execute on the return
4434 path. The only way I believe that can happen is if the return value
4435 needs a parameter relocation, which never happens for C code.
4437 This function handles most instances of case 2; however, it will
4438 fail if we did not originally have stub code on the return path
4439 but will need stub code on the new return path. This can happen if
4440 the caller & callee are both in the main program, but the new
4441 return location is in a shared library. */
4444 return_addr_rtx (int count
, rtx frameaddr
)
4451 /* Instruction stream at the normal return address for the export stub:
4453 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp
4454 0x004010a1 | stub+12: ldsid (sr0,rp),r1
4455 0x00011820 | stub+16: mtsp r1,sr0
4456 0xe0400002 | stub+20: be,n 0(sr0,rp)
4458 0xe0400002 must be specified as -532676606 so that it won't be
4459 rejected as an invalid immediate operand on 64-bit hosts. */
4461 HOST_WIDE_INT insns
[4] = {0x4bc23fd1, 0x004010a1, 0x00011820, -532676606};
4467 rp
= get_hard_reg_initial_val (Pmode
, 2);
4469 if (TARGET_64BIT
|| TARGET_NO_SPACE_REGS
)
4472 /* If there is no export stub then just use the value saved from
4473 the return pointer register. */
4475 saved_rp
= gen_reg_rtx (Pmode
);
4476 emit_move_insn (saved_rp
, rp
);
4478 /* Get pointer to the instruction stream. We have to mask out the
4479 privilege level from the two low order bits of the return address
4480 pointer here so that ins will point to the start of the first
4481 instruction that would have been executed if we returned. */
4482 ins
= copy_to_reg (gen_rtx_AND (Pmode
, rp
, MASK_RETURN_ADDR
));
4483 label
= gen_label_rtx ();
4485 /* Check the instruction stream at the normal return address for the
4486 export stub. If it is an export stub, than our return address is
4487 really in -24[frameaddr]. */
4489 for (i
= 0; i
< 3; i
++)
4491 rtx op0
= gen_rtx_MEM (SImode
, plus_constant (ins
, i
* 4));
4492 rtx op1
= GEN_INT (insns
[i
]);
4493 emit_cmp_and_jump_insns (op0
, op1
, NE
, NULL
, SImode
, 0, label
);
4496 /* Here we know that our return address points to an export
4497 stub. We don't want to return the address of the export stub,
4498 but rather the return address of the export stub. That return
4499 address is stored at -24[frameaddr]. */
4501 emit_move_insn (saved_rp
,
4503 memory_address (Pmode
,
4504 plus_constant (frameaddr
,
4513 emit_bcond_fp (rtx operands
[])
4515 enum rtx_code code
= GET_CODE (operands
[0]);
4516 rtx operand0
= operands
[1];
4517 rtx operand1
= operands
[2];
4518 rtx label
= operands
[3];
4520 emit_insn (gen_rtx_SET (VOIDmode
, gen_rtx_REG (CCFPmode
, 0),
4521 gen_rtx_fmt_ee (code
, CCFPmode
, operand0
, operand1
)));
4523 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4524 gen_rtx_IF_THEN_ELSE (VOIDmode
,
4527 gen_rtx_REG (CCFPmode
, 0),
4529 gen_rtx_LABEL_REF (VOIDmode
, label
),
4534 /* Adjust the cost of a scheduling dependency. Return the new cost of
4535 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4538 pa_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
4540 enum attr_type attr_type
;
4542 /* Don't adjust costs for a pa8000 chip, also do not adjust any
4543 true dependencies as they are described with bypasses now. */
4544 if (pa_cpu
>= PROCESSOR_8000
|| REG_NOTE_KIND (link
) == 0)
4547 if (! recog_memoized (insn
))
4550 attr_type
= get_attr_type (insn
);
4552 switch (REG_NOTE_KIND (link
))
4555 /* Anti dependency; DEP_INSN reads a register that INSN writes some
4558 if (attr_type
== TYPE_FPLOAD
)
4560 rtx pat
= PATTERN (insn
);
4561 rtx dep_pat
= PATTERN (dep_insn
);
4562 if (GET_CODE (pat
) == PARALLEL
)
4564 /* This happens for the fldXs,mb patterns. */
4565 pat
= XVECEXP (pat
, 0, 0);
4567 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4568 /* If this happens, we have to extend this to schedule
4569 optimally. Return 0 for now. */
4572 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4574 if (! recog_memoized (dep_insn
))
4576 switch (get_attr_type (dep_insn
))
4583 case TYPE_FPSQRTSGL
:
4584 case TYPE_FPSQRTDBL
:
4585 /* A fpload can't be issued until one cycle before a
4586 preceding arithmetic operation has finished if
4587 the target of the fpload is any of the sources
4588 (or destination) of the arithmetic operation. */
4589 return insn_default_latency (dep_insn
) - 1;
4596 else if (attr_type
== TYPE_FPALU
)
4598 rtx pat
= PATTERN (insn
);
4599 rtx dep_pat
= PATTERN (dep_insn
);
4600 if (GET_CODE (pat
) == PARALLEL
)
4602 /* This happens for the fldXs,mb patterns. */
4603 pat
= XVECEXP (pat
, 0, 0);
4605 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4606 /* If this happens, we have to extend this to schedule
4607 optimally. Return 0 for now. */
4610 if (reg_mentioned_p (SET_DEST (pat
), SET_SRC (dep_pat
)))
4612 if (! recog_memoized (dep_insn
))
4614 switch (get_attr_type (dep_insn
))
4618 case TYPE_FPSQRTSGL
:
4619 case TYPE_FPSQRTDBL
:
4620 /* An ALU flop can't be issued until two cycles before a
4621 preceding divide or sqrt operation has finished if
4622 the target of the ALU flop is any of the sources
4623 (or destination) of the divide or sqrt operation. */
4624 return insn_default_latency (dep_insn
) - 2;
4632 /* For other anti dependencies, the cost is 0. */
4635 case REG_DEP_OUTPUT
:
4636 /* Output dependency; DEP_INSN writes a register that INSN writes some
4638 if (attr_type
== TYPE_FPLOAD
)
4640 rtx pat
= PATTERN (insn
);
4641 rtx dep_pat
= PATTERN (dep_insn
);
4642 if (GET_CODE (pat
) == PARALLEL
)
4644 /* This happens for the fldXs,mb patterns. */
4645 pat
= XVECEXP (pat
, 0, 0);
4647 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4648 /* If this happens, we have to extend this to schedule
4649 optimally. Return 0 for now. */
4652 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4654 if (! recog_memoized (dep_insn
))
4656 switch (get_attr_type (dep_insn
))
4663 case TYPE_FPSQRTSGL
:
4664 case TYPE_FPSQRTDBL
:
4665 /* A fpload can't be issued until one cycle before a
4666 preceding arithmetic operation has finished if
4667 the target of the fpload is the destination of the
4668 arithmetic operation.
4670 Exception: For PA7100LC, PA7200 and PA7300, the cost
4671 is 3 cycles, unless they bundle together. We also
4672 pay the penalty if the second insn is a fpload. */
4673 return insn_default_latency (dep_insn
) - 1;
4680 else if (attr_type
== TYPE_FPALU
)
4682 rtx pat
= PATTERN (insn
);
4683 rtx dep_pat
= PATTERN (dep_insn
);
4684 if (GET_CODE (pat
) == PARALLEL
)
4686 /* This happens for the fldXs,mb patterns. */
4687 pat
= XVECEXP (pat
, 0, 0);
4689 if (GET_CODE (pat
) != SET
|| GET_CODE (dep_pat
) != SET
)
4690 /* If this happens, we have to extend this to schedule
4691 optimally. Return 0 for now. */
4694 if (reg_mentioned_p (SET_DEST (pat
), SET_DEST (dep_pat
)))
4696 if (! recog_memoized (dep_insn
))
4698 switch (get_attr_type (dep_insn
))
4702 case TYPE_FPSQRTSGL
:
4703 case TYPE_FPSQRTDBL
:
4704 /* An ALU flop can't be issued until two cycles before a
4705 preceding divide or sqrt operation has finished if
4706 the target of the ALU flop is also the target of
4707 the divide or sqrt operation. */
4708 return insn_default_latency (dep_insn
) - 2;
4716 /* For other output dependencies, the cost is 0. */
4724 /* Adjust scheduling priorities. We use this to try and keep addil
4725 and the next use of %r1 close together. */
4727 pa_adjust_priority (rtx insn
, int priority
)
4729 rtx set
= single_set (insn
);
4733 src
= SET_SRC (set
);
4734 dest
= SET_DEST (set
);
4735 if (GET_CODE (src
) == LO_SUM
4736 && symbolic_operand (XEXP (src
, 1), VOIDmode
)
4737 && ! read_only_operand (XEXP (src
, 1), VOIDmode
))
4740 else if (GET_CODE (src
) == MEM
4741 && GET_CODE (XEXP (src
, 0)) == LO_SUM
4742 && symbolic_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
)
4743 && ! read_only_operand (XEXP (XEXP (src
, 0), 1), VOIDmode
))
4746 else if (GET_CODE (dest
) == MEM
4747 && GET_CODE (XEXP (dest
, 0)) == LO_SUM
4748 && symbolic_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
)
4749 && ! read_only_operand (XEXP (XEXP (dest
, 0), 1), VOIDmode
))
4755 /* The 700 can only issue a single insn at a time.
4756 The 7XXX processors can issue two insns at a time.
4757 The 8000 can issue 4 insns at a time. */
4759 pa_issue_rate (void)
4763 case PROCESSOR_700
: return 1;
4764 case PROCESSOR_7100
: return 2;
4765 case PROCESSOR_7100LC
: return 2;
4766 case PROCESSOR_7200
: return 2;
4767 case PROCESSOR_7300
: return 2;
4768 case PROCESSOR_8000
: return 4;
4777 /* Return any length adjustment needed by INSN which already has its length
4778 computed as LENGTH. Return zero if no adjustment is necessary.
4780 For the PA: function calls, millicode calls, and backwards short
4781 conditional branches with unfilled delay slots need an adjustment by +1
4782 (to account for the NOP which will be inserted into the instruction stream).
4784 Also compute the length of an inline block move here as it is too
4785 complicated to express as a length attribute in pa.md. */
4787 pa_adjust_insn_length (rtx insn
, int length
)
4789 rtx pat
= PATTERN (insn
);
4791 /* Jumps inside switch tables which have unfilled delay slots need
4793 if (GET_CODE (insn
) == JUMP_INSN
4794 && GET_CODE (pat
) == PARALLEL
4795 && get_attr_type (insn
) == TYPE_BTABLE_BRANCH
)
4797 /* Millicode insn with an unfilled delay slot. */
4798 else if (GET_CODE (insn
) == INSN
4799 && GET_CODE (pat
) != SEQUENCE
4800 && GET_CODE (pat
) != USE
4801 && GET_CODE (pat
) != CLOBBER
4802 && get_attr_type (insn
) == TYPE_MILLI
)
4804 /* Block move pattern. */
4805 else if (GET_CODE (insn
) == INSN
4806 && GET_CODE (pat
) == PARALLEL
4807 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4808 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4809 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == MEM
4810 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
4811 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 1)) == BLKmode
)
4812 return compute_movmem_length (insn
) - 4;
4813 /* Block clear pattern. */
4814 else if (GET_CODE (insn
) == INSN
4815 && GET_CODE (pat
) == PARALLEL
4816 && GET_CODE (XVECEXP (pat
, 0, 0)) == SET
4817 && GET_CODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == MEM
4818 && XEXP (XVECEXP (pat
, 0, 0), 1) == const0_rtx
4819 && GET_MODE (XEXP (XVECEXP (pat
, 0, 0), 0)) == BLKmode
)
4820 return compute_clrmem_length (insn
) - 4;
4821 /* Conditional branch with an unfilled delay slot. */
4822 else if (GET_CODE (insn
) == JUMP_INSN
&& ! simplejump_p (insn
))
4824 /* Adjust a short backwards conditional with an unfilled delay slot. */
4825 if (GET_CODE (pat
) == SET
4827 && JUMP_LABEL (insn
) != NULL_RTX
4828 && ! forward_branch_p (insn
))
4830 else if (GET_CODE (pat
) == PARALLEL
4831 && get_attr_type (insn
) == TYPE_PARALLEL_BRANCH
4834 /* Adjust dbra insn with short backwards conditional branch with
4835 unfilled delay slot -- only for case where counter is in a
4836 general register register. */
4837 else if (GET_CODE (pat
) == PARALLEL
4838 && GET_CODE (XVECEXP (pat
, 0, 1)) == SET
4839 && GET_CODE (XEXP (XVECEXP (pat
, 0, 1), 0)) == REG
4840 && ! FP_REG_P (XEXP (XVECEXP (pat
, 0, 1), 0))
4842 && ! forward_branch_p (insn
))
4850 /* Implement the TARGET_PRINT_OPERAND_PUNCT_VALID_P hook. */
4853 pa_print_operand_punct_valid_p (unsigned char code
)
4864 /* Print operand X (an rtx) in assembler syntax to file FILE.
4865 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
4866 For `%' followed by punctuation, CODE is the punctuation and X is null. */
4869 print_operand (FILE *file
, rtx x
, int code
)
4874 /* Output a 'nop' if there's nothing for the delay slot. */
4875 if (dbr_sequence_length () == 0)
4876 fputs ("\n\tnop", file
);
4879 /* Output a nullification completer if there's nothing for the */
4880 /* delay slot or nullification is requested. */
4881 if (dbr_sequence_length () == 0 ||
4883 INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence
, 0, 0))))
4887 /* Print out the second register name of a register pair.
4888 I.e., R (6) => 7. */
4889 fputs (reg_names
[REGNO (x
) + 1], file
);
4892 /* A register or zero. */
4894 || (x
== CONST0_RTX (DFmode
))
4895 || (x
== CONST0_RTX (SFmode
)))
4897 fputs ("%r0", file
);
4903 /* A register or zero (floating point). */
4905 || (x
== CONST0_RTX (DFmode
))
4906 || (x
== CONST0_RTX (SFmode
)))
4908 fputs ("%fr0", file
);
4917 xoperands
[0] = XEXP (XEXP (x
, 0), 0);
4918 xoperands
[1] = XVECEXP (XEXP (XEXP (x
, 0), 1), 0, 0);
4919 output_global_address (file
, xoperands
[1], 0);
4920 fprintf (file
, "(%s)", reg_names
[REGNO (xoperands
[0])]);
4924 case 'C': /* Plain (C)ondition */
4926 switch (GET_CODE (x
))
4929 fputs ("=", file
); break;
4931 fputs ("<>", file
); break;
4933 fputs (">", file
); break;
4935 fputs (">=", file
); break;
4937 fputs (">>=", file
); break;
4939 fputs (">>", file
); break;
4941 fputs ("<", file
); break;
4943 fputs ("<=", file
); break;
4945 fputs ("<<=", file
); break;
4947 fputs ("<<", file
); break;
4952 case 'N': /* Condition, (N)egated */
4953 switch (GET_CODE (x
))
4956 fputs ("<>", file
); break;
4958 fputs ("=", file
); break;
4960 fputs ("<=", file
); break;
4962 fputs ("<", file
); break;
4964 fputs ("<<", file
); break;
4966 fputs ("<<=", file
); break;
4968 fputs (">=", file
); break;
4970 fputs (">", file
); break;
4972 fputs (">>", file
); break;
4974 fputs (">>=", file
); break;
4979 /* For floating point comparisons. Note that the output
4980 predicates are the complement of the desired mode. The
4981 conditions for GT, GE, LT, LE and LTGT cause an invalid
4982 operation exception if the result is unordered and this
4983 exception is enabled in the floating-point status register. */
4985 switch (GET_CODE (x
))
4988 fputs ("!=", file
); break;
4990 fputs ("=", file
); break;
4992 fputs ("!>", file
); break;
4994 fputs ("!>=", file
); break;
4996 fputs ("!<", file
); break;
4998 fputs ("!<=", file
); break;
5000 fputs ("!<>", file
); break;
5002 fputs ("!?<=", file
); break;
5004 fputs ("!?<", file
); break;
5006 fputs ("!?>=", file
); break;
5008 fputs ("!?>", file
); break;
5010 fputs ("!?=", file
); break;
5012 fputs ("!?", file
); break;
5014 fputs ("?", file
); break;
5019 case 'S': /* Condition, operands are (S)wapped. */
5020 switch (GET_CODE (x
))
5023 fputs ("=", file
); break;
5025 fputs ("<>", file
); break;
5027 fputs ("<", file
); break;
5029 fputs ("<=", file
); break;
5031 fputs ("<<=", file
); break;
5033 fputs ("<<", file
); break;
5035 fputs (">", file
); break;
5037 fputs (">=", file
); break;
5039 fputs (">>=", file
); break;
5041 fputs (">>", file
); break;
5046 case 'B': /* Condition, (B)oth swapped and negate. */
5047 switch (GET_CODE (x
))
5050 fputs ("<>", file
); break;
5052 fputs ("=", file
); break;
5054 fputs (">=", file
); break;
5056 fputs (">", file
); break;
5058 fputs (">>", file
); break;
5060 fputs (">>=", file
); break;
5062 fputs ("<=", file
); break;
5064 fputs ("<", file
); break;
5066 fputs ("<<", file
); break;
5068 fputs ("<<=", file
); break;
5074 gcc_assert (GET_CODE (x
) == CONST_INT
);
5075 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ~INTVAL (x
));
5078 gcc_assert (GET_CODE (x
) == CONST_INT
);
5079 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 64 - (INTVAL (x
) & 63));
5082 gcc_assert (GET_CODE (x
) == CONST_INT
);
5083 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 32 - (INTVAL (x
) & 31));
5086 gcc_assert (GET_CODE (x
) == CONST_INT
&& exact_log2 (INTVAL (x
)) >= 0);
5087 fprintf (file
, "%d", exact_log2 (INTVAL (x
)));
5090 gcc_assert (GET_CODE (x
) == CONST_INT
);
5091 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 63 - (INTVAL (x
) & 63));
5094 gcc_assert (GET_CODE (x
) == CONST_INT
);
5095 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, 31 - (INTVAL (x
) & 31));
5098 if (GET_CODE (x
) == CONST_INT
)
5103 switch (GET_CODE (XEXP (x
, 0)))
5107 if (ASSEMBLER_DIALECT
== 0)
5108 fputs ("s,mb", file
);
5110 fputs (",mb", file
);
5114 if (ASSEMBLER_DIALECT
== 0)
5115 fputs ("s,ma", file
);
5117 fputs (",ma", file
);
5120 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5121 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5123 if (ASSEMBLER_DIALECT
== 0)
5126 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5127 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5129 if (ASSEMBLER_DIALECT
== 0)
5130 fputs ("x,s", file
);
5134 else if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5138 if (code
== 'F' && ASSEMBLER_DIALECT
== 0)
5144 output_global_address (file
, x
, 0);
5147 output_global_address (file
, x
, 1);
5149 case 0: /* Don't do anything special */
5154 compute_zdepwi_operands (INTVAL (x
), op
);
5155 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5161 compute_zdepdi_operands (INTVAL (x
), op
);
5162 fprintf (file
, "%d,%d,%d", op
[0], op
[1], op
[2]);
5166 /* We can get here from a .vtable_inherit due to our
5167 CONSTANT_ADDRESS_P rejecting perfectly good constant
5173 if (GET_CODE (x
) == REG
)
5175 fputs (reg_names
[REGNO (x
)], file
);
5176 if (TARGET_64BIT
&& FP_REG_P (x
) && GET_MODE_SIZE (GET_MODE (x
)) <= 4)
5182 && GET_MODE_SIZE (GET_MODE (x
)) <= 4
5183 && (REGNO (x
) & 1) == 0)
5186 else if (GET_CODE (x
) == MEM
)
5188 int size
= GET_MODE_SIZE (GET_MODE (x
));
5189 rtx base
= NULL_RTX
;
5190 switch (GET_CODE (XEXP (x
, 0)))
5194 base
= XEXP (XEXP (x
, 0), 0);
5195 fprintf (file
, "-%d(%s)", size
, reg_names
[REGNO (base
)]);
5199 base
= XEXP (XEXP (x
, 0), 0);
5200 fprintf (file
, "%d(%s)", size
, reg_names
[REGNO (base
)]);
5203 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
)
5204 fprintf (file
, "%s(%s)",
5205 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 0), 0))],
5206 reg_names
[REGNO (XEXP (XEXP (x
, 0), 1))]);
5207 else if (GET_CODE (XEXP (XEXP (x
, 0), 1)) == MULT
)
5208 fprintf (file
, "%s(%s)",
5209 reg_names
[REGNO (XEXP (XEXP (XEXP (x
, 0), 1), 0))],
5210 reg_names
[REGNO (XEXP (XEXP (x
, 0), 0))]);
5211 else if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
5212 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == REG
)
5214 /* Because the REG_POINTER flag can get lost during reload,
5215 GO_IF_LEGITIMATE_ADDRESS canonicalizes the order of the
5216 index and base registers in the combined move patterns. */
5217 rtx base
= XEXP (XEXP (x
, 0), 1);
5218 rtx index
= XEXP (XEXP (x
, 0), 0);
5220 fprintf (file
, "%s(%s)",
5221 reg_names
[REGNO (index
)], reg_names
[REGNO (base
)]);
5224 output_address (XEXP (x
, 0));
5227 output_address (XEXP (x
, 0));
5232 output_addr_const (file
, x
);
5235 /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */
5238 output_global_address (FILE *file
, rtx x
, int round_constant
)
5241 /* Imagine (high (const (plus ...))). */
5242 if (GET_CODE (x
) == HIGH
)
5245 if (GET_CODE (x
) == SYMBOL_REF
&& read_only_operand (x
, VOIDmode
))
5246 output_addr_const (file
, x
);
5247 else if (GET_CODE (x
) == SYMBOL_REF
&& !flag_pic
)
5249 output_addr_const (file
, x
);
5250 fputs ("-$global$", file
);
5252 else if (GET_CODE (x
) == CONST
)
5254 const char *sep
= "";
5255 int offset
= 0; /* assembler wants -$global$ at end */
5256 rtx base
= NULL_RTX
;
5258 switch (GET_CODE (XEXP (XEXP (x
, 0), 0)))
5261 base
= XEXP (XEXP (x
, 0), 0);
5262 output_addr_const (file
, base
);
5265 offset
= INTVAL (XEXP (XEXP (x
, 0), 0));
5271 switch (GET_CODE (XEXP (XEXP (x
, 0), 1)))
5274 base
= XEXP (XEXP (x
, 0), 1);
5275 output_addr_const (file
, base
);
5278 offset
= INTVAL (XEXP (XEXP (x
, 0), 1));
5284 /* How bogus. The compiler is apparently responsible for
5285 rounding the constant if it uses an LR field selector.
5287 The linker and/or assembler seem a better place since
5288 they have to do this kind of thing already.
5290 If we fail to do this, HP's optimizing linker may eliminate
5291 an addil, but not update the ldw/stw/ldo instruction that
5292 uses the result of the addil. */
5294 offset
= ((offset
+ 0x1000) & ~0x1fff);
5296 switch (GET_CODE (XEXP (x
, 0)))
5309 gcc_assert (GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
);
5317 if (!read_only_operand (base
, VOIDmode
) && !flag_pic
)
5318 fputs ("-$global$", file
);
5320 fprintf (file
, "%s%d", sep
, offset
);
5323 output_addr_const (file
, x
);
5326 /* Output boilerplate text to appear at the beginning of the file.
5327 There are several possible versions. */
5328 #define aputs(x) fputs(x, asm_out_file)
5330 pa_file_start_level (void)
5333 aputs ("\t.LEVEL 2.0w\n");
5334 else if (TARGET_PA_20
)
5335 aputs ("\t.LEVEL 2.0\n");
5336 else if (TARGET_PA_11
)
5337 aputs ("\t.LEVEL 1.1\n");
5339 aputs ("\t.LEVEL 1.0\n");
5343 pa_file_start_space (int sortspace
)
5345 aputs ("\t.SPACE $PRIVATE$");
5348 aputs ("\n\t.SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31"
5349 "\n\t.SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82"
5350 "\n\t.SPACE $TEXT$");
5353 aputs ("\n\t.SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44"
5354 "\n\t.SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY\n");
5358 pa_file_start_file (int want_version
)
5360 if (write_symbols
!= NO_DEBUG
)
5362 output_file_directive (asm_out_file
, main_input_filename
);
5364 aputs ("\t.version\t\"01.01\"\n");
5369 pa_file_start_mcount (const char *aswhat
)
5372 fprintf (asm_out_file
, "\t.IMPORT _mcount,%s\n", aswhat
);
5376 pa_elf_file_start (void)
5378 pa_file_start_level ();
5379 pa_file_start_mcount ("ENTRY");
5380 pa_file_start_file (0);
5384 pa_som_file_start (void)
5386 pa_file_start_level ();
5387 pa_file_start_space (0);
5388 aputs ("\t.IMPORT $global$,DATA\n"
5389 "\t.IMPORT $$dyncall,MILLICODE\n");
5390 pa_file_start_mcount ("CODE");
5391 pa_file_start_file (0);
5395 pa_linux_file_start (void)
5397 pa_file_start_file (1);
5398 pa_file_start_level ();
5399 pa_file_start_mcount ("CODE");
5403 pa_hpux64_gas_file_start (void)
5405 pa_file_start_level ();
5406 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
5408 ASM_OUTPUT_TYPE_DIRECTIVE (asm_out_file
, "_mcount", "function");
5410 pa_file_start_file (1);
5414 pa_hpux64_hpas_file_start (void)
5416 pa_file_start_level ();
5417 pa_file_start_space (1);
5418 pa_file_start_mcount ("CODE");
5419 pa_file_start_file (0);
5423 /* Search the deferred plabel list for SYMBOL and return its internal
5424 label. If an entry for SYMBOL is not found, a new entry is created. */
5427 get_deferred_plabel (rtx symbol
)
5429 const char *fname
= XSTR (symbol
, 0);
5432 /* See if we have already put this function on the list of deferred
5433 plabels. This list is generally small, so a liner search is not
5434 too ugly. If it proves too slow replace it with something faster. */
5435 for (i
= 0; i
< n_deferred_plabels
; i
++)
5436 if (strcmp (fname
, XSTR (deferred_plabels
[i
].symbol
, 0)) == 0)
5439 /* If the deferred plabel list is empty, or this entry was not found
5440 on the list, create a new entry on the list. */
5441 if (deferred_plabels
== NULL
|| i
== n_deferred_plabels
)
5445 if (deferred_plabels
== 0)
5446 deferred_plabels
= ggc_alloc_deferred_plabel ();
5448 deferred_plabels
= GGC_RESIZEVEC (struct deferred_plabel
,
5450 n_deferred_plabels
+ 1);
5452 i
= n_deferred_plabels
++;
5453 deferred_plabels
[i
].internal_label
= gen_label_rtx ();
5454 deferred_plabels
[i
].symbol
= symbol
;
5456 /* Gross. We have just implicitly taken the address of this
5457 function. Mark it in the same manner as assemble_name. */
5458 id
= maybe_get_identifier (targetm
.strip_name_encoding (fname
));
5460 mark_referenced (id
);
5463 return deferred_plabels
[i
].internal_label
;
5467 output_deferred_plabels (void)
5471 /* If we have some deferred plabels, then we need to switch into the
5472 data or readonly data section, and align it to a 4 byte boundary
5473 before outputting the deferred plabels. */
5474 if (n_deferred_plabels
)
5476 switch_to_section (flag_pic
? data_section
: readonly_data_section
);
5477 ASM_OUTPUT_ALIGN (asm_out_file
, TARGET_64BIT
? 3 : 2);
5480 /* Now output the deferred plabels. */
5481 for (i
= 0; i
< n_deferred_plabels
; i
++)
5483 targetm
.asm_out
.internal_label (asm_out_file
, "L",
5484 CODE_LABEL_NUMBER (deferred_plabels
[i
].internal_label
));
5485 assemble_integer (deferred_plabels
[i
].symbol
,
5486 TARGET_64BIT
? 8 : 4, TARGET_64BIT
? 64 : 32, 1);
5490 #ifdef HPUX_LONG_DOUBLE_LIBRARY
5491 /* Initialize optabs to point to HPUX long double emulation routines. */
5493 pa_hpux_init_libfuncs (void)
5495 set_optab_libfunc (add_optab
, TFmode
, "_U_Qfadd");
5496 set_optab_libfunc (sub_optab
, TFmode
, "_U_Qfsub");
5497 set_optab_libfunc (smul_optab
, TFmode
, "_U_Qfmpy");
5498 set_optab_libfunc (sdiv_optab
, TFmode
, "_U_Qfdiv");
5499 set_optab_libfunc (smin_optab
, TFmode
, "_U_Qmin");
5500 set_optab_libfunc (smax_optab
, TFmode
, "_U_Qfmax");
5501 set_optab_libfunc (sqrt_optab
, TFmode
, "_U_Qfsqrt");
5502 set_optab_libfunc (abs_optab
, TFmode
, "_U_Qfabs");
5503 set_optab_libfunc (neg_optab
, TFmode
, "_U_Qfneg");
5505 set_optab_libfunc (eq_optab
, TFmode
, "_U_Qfeq");
5506 set_optab_libfunc (ne_optab
, TFmode
, "_U_Qfne");
5507 set_optab_libfunc (gt_optab
, TFmode
, "_U_Qfgt");
5508 set_optab_libfunc (ge_optab
, TFmode
, "_U_Qfge");
5509 set_optab_libfunc (lt_optab
, TFmode
, "_U_Qflt");
5510 set_optab_libfunc (le_optab
, TFmode
, "_U_Qfle");
5511 set_optab_libfunc (unord_optab
, TFmode
, "_U_Qfunord");
5513 set_conv_libfunc (sext_optab
, TFmode
, SFmode
, "_U_Qfcnvff_sgl_to_quad");
5514 set_conv_libfunc (sext_optab
, TFmode
, DFmode
, "_U_Qfcnvff_dbl_to_quad");
5515 set_conv_libfunc (trunc_optab
, SFmode
, TFmode
, "_U_Qfcnvff_quad_to_sgl");
5516 set_conv_libfunc (trunc_optab
, DFmode
, TFmode
, "_U_Qfcnvff_quad_to_dbl");
5518 set_conv_libfunc (sfix_optab
, SImode
, TFmode
, TARGET_64BIT
5519 ? "__U_Qfcnvfxt_quad_to_sgl"
5520 : "_U_Qfcnvfxt_quad_to_sgl");
5521 set_conv_libfunc (sfix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_dbl");
5522 set_conv_libfunc (ufix_optab
, SImode
, TFmode
, "_U_Qfcnvfxt_quad_to_usgl");
5523 set_conv_libfunc (ufix_optab
, DImode
, TFmode
, "_U_Qfcnvfxt_quad_to_udbl");
5525 set_conv_libfunc (sfloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_sgl_to_quad");
5526 set_conv_libfunc (sfloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_dbl_to_quad");
5527 set_conv_libfunc (ufloat_optab
, TFmode
, SImode
, "_U_Qfcnvxf_usgl_to_quad");
5528 set_conv_libfunc (ufloat_optab
, TFmode
, DImode
, "_U_Qfcnvxf_udbl_to_quad");
5532 /* HP's millicode routines mean something special to the assembler.
5533 Keep track of which ones we have used. */
5535 enum millicodes
{ remI
, remU
, divI
, divU
, mulI
, end1000
};
5536 static void import_milli (enum millicodes
);
5537 static char imported
[(int) end1000
];
5538 static const char * const milli_names
[] = {"remI", "remU", "divI", "divU", "mulI"};
5539 static const char import_string
[] = ".IMPORT $$....,MILLICODE";
5540 #define MILLI_START 10
5543 import_milli (enum millicodes code
)
5545 char str
[sizeof (import_string
)];
5547 if (!imported
[(int) code
])
5549 imported
[(int) code
] = 1;
5550 strcpy (str
, import_string
);
5551 strncpy (str
+ MILLI_START
, milli_names
[(int) code
], 4);
5552 output_asm_insn (str
, 0);
5556 /* The register constraints have put the operands and return value in
5557 the proper registers. */
5560 output_mul_insn (int unsignedp ATTRIBUTE_UNUSED
, rtx insn
)
5562 import_milli (mulI
);
5563 return output_millicode_call (insn
, gen_rtx_SYMBOL_REF (Pmode
, "$$mulI"));
5566 /* Emit the rtl for doing a division by a constant. */
5568 /* Do magic division millicodes exist for this value? */
5569 const int magic_milli
[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1};
5571 /* We'll use an array to keep track of the magic millicodes and
5572 whether or not we've used them already. [n][0] is signed, [n][1] is
5575 static int div_milli
[16][2];
5578 emit_hpdiv_const (rtx
*operands
, int unsignedp
)
5580 if (GET_CODE (operands
[2]) == CONST_INT
5581 && INTVAL (operands
[2]) > 0
5582 && INTVAL (operands
[2]) < 16
5583 && magic_milli
[INTVAL (operands
[2])])
5585 rtx ret
= gen_rtx_REG (SImode
, TARGET_64BIT
? 2 : 31);
5587 emit_move_insn (gen_rtx_REG (SImode
, 26), operands
[1]);
5591 gen_rtvec (6, gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, 29),
5592 gen_rtx_fmt_ee (unsignedp
? UDIV
: DIV
,
5594 gen_rtx_REG (SImode
, 26),
5596 gen_rtx_CLOBBER (VOIDmode
, operands
[4]),
5597 gen_rtx_CLOBBER (VOIDmode
, operands
[3]),
5598 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 26)),
5599 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (SImode
, 25)),
5600 gen_rtx_CLOBBER (VOIDmode
, ret
))));
5601 emit_move_insn (operands
[0], gen_rtx_REG (SImode
, 29));
5608 output_div_insn (rtx
*operands
, int unsignedp
, rtx insn
)
5612 /* If the divisor is a constant, try to use one of the special
5614 if (GET_CODE (operands
[0]) == CONST_INT
)
5616 static char buf
[100];
5617 divisor
= INTVAL (operands
[0]);
5618 if (!div_milli
[divisor
][unsignedp
])
5620 div_milli
[divisor
][unsignedp
] = 1;
5622 output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands
);
5624 output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands
);
5628 sprintf (buf
, "$$divU_" HOST_WIDE_INT_PRINT_DEC
,
5629 INTVAL (operands
[0]));
5630 return output_millicode_call (insn
,
5631 gen_rtx_SYMBOL_REF (SImode
, buf
));
5635 sprintf (buf
, "$$divI_" HOST_WIDE_INT_PRINT_DEC
,
5636 INTVAL (operands
[0]));
5637 return output_millicode_call (insn
,
5638 gen_rtx_SYMBOL_REF (SImode
, buf
));
5641 /* Divisor isn't a special constant. */
5646 import_milli (divU
);
5647 return output_millicode_call (insn
,
5648 gen_rtx_SYMBOL_REF (SImode
, "$$divU"));
5652 import_milli (divI
);
5653 return output_millicode_call (insn
,
5654 gen_rtx_SYMBOL_REF (SImode
, "$$divI"));
5659 /* Output a $$rem millicode to do mod. */
5662 output_mod_insn (int unsignedp
, rtx insn
)
5666 import_milli (remU
);
5667 return output_millicode_call (insn
,
5668 gen_rtx_SYMBOL_REF (SImode
, "$$remU"));
5672 import_milli (remI
);
5673 return output_millicode_call (insn
,
5674 gen_rtx_SYMBOL_REF (SImode
, "$$remI"));
5679 output_arg_descriptor (rtx call_insn
)
5681 const char *arg_regs
[4];
5682 enum machine_mode arg_mode
;
5684 int i
, output_flag
= 0;
5687 /* We neither need nor want argument location descriptors for the
5688 64bit runtime environment or the ELF32 environment. */
5689 if (TARGET_64BIT
|| TARGET_ELF32
)
5692 for (i
= 0; i
< 4; i
++)
5695 /* Specify explicitly that no argument relocations should take place
5696 if using the portable runtime calling conventions. */
5697 if (TARGET_PORTABLE_RUNTIME
)
5699 fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n",
5704 gcc_assert (GET_CODE (call_insn
) == CALL_INSN
);
5705 for (link
= CALL_INSN_FUNCTION_USAGE (call_insn
);
5706 link
; link
= XEXP (link
, 1))
5708 rtx use
= XEXP (link
, 0);
5710 if (! (GET_CODE (use
) == USE
5711 && GET_CODE (XEXP (use
, 0)) == REG
5712 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
5715 arg_mode
= GET_MODE (XEXP (use
, 0));
5716 regno
= REGNO (XEXP (use
, 0));
5717 if (regno
>= 23 && regno
<= 26)
5719 arg_regs
[26 - regno
] = "GR";
5720 if (arg_mode
== DImode
)
5721 arg_regs
[25 - regno
] = "GR";
5723 else if (regno
>= 32 && regno
<= 39)
5725 if (arg_mode
== SFmode
)
5726 arg_regs
[(regno
- 32) / 2] = "FR";
5729 #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED
5730 arg_regs
[(regno
- 34) / 2] = "FR";
5731 arg_regs
[(regno
- 34) / 2 + 1] = "FU";
5733 arg_regs
[(regno
- 34) / 2] = "FU";
5734 arg_regs
[(regno
- 34) / 2 + 1] = "FR";
5739 fputs ("\t.CALL ", asm_out_file
);
5740 for (i
= 0; i
< 4; i
++)
5745 fputc (',', asm_out_file
);
5746 fprintf (asm_out_file
, "ARGW%d=%s", i
, arg_regs
[i
]);
5749 fputc ('\n', asm_out_file
);
5753 pa_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
5754 enum machine_mode mode
, secondary_reload_info
*sri
)
5756 int is_symbolic
, regno
;
5757 enum reg_class rclass
= (enum reg_class
) rclass_i
;
5759 /* Handle the easy stuff first. */
5760 if (rclass
== R1_REGS
)
5766 if (rclass
== BASE_REG_CLASS
&& regno
< FIRST_PSEUDO_REGISTER
)
5772 /* If we have something like (mem (mem (...)), we can safely assume the
5773 inner MEM will end up in a general register after reloading, so there's
5774 no need for a secondary reload. */
5775 if (GET_CODE (x
) == MEM
&& GET_CODE (XEXP (x
, 0)) == MEM
)
5778 /* Trying to load a constant into a FP register during PIC code
5779 generation requires %r1 as a scratch register. */
5781 && (mode
== SImode
|| mode
== DImode
)
5782 && FP_REG_CLASS_P (rclass
)
5783 && (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
))
5785 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5786 : CODE_FOR_reload_indi_r1
);
5790 /* Profiling showed the PA port spends about 1.3% of its compilation
5791 time in true_regnum from calls inside pa_secondary_reload_class. */
5792 if (regno
>= FIRST_PSEUDO_REGISTER
|| GET_CODE (x
) == SUBREG
)
5793 regno
= true_regnum (x
);
5795 /* In order to allow 14-bit displacements in integer loads and stores,
5796 we need to prevent reload from generating out of range integer mode
5797 loads and stores to the floating point registers. Previously, we
5798 used to call for a secondary reload and have emit_move_sequence()
5799 fix the instruction sequence. However, reload occasionally wouldn't
5800 generate the reload and we would end up with an invalid REG+D memory
5801 address. So, now we use an intermediate general register for most
5802 memory loads and stores. */
5803 if ((regno
>= FIRST_PSEUDO_REGISTER
|| regno
== -1)
5804 && GET_MODE_CLASS (mode
) == MODE_INT
5805 && FP_REG_CLASS_P (rclass
))
5807 /* Reload passes (mem:SI (reg/f:DI 30 %r30) when it wants to check
5808 the secondary reload needed for a pseudo. It never passes a
5810 if (GET_CODE (x
) == MEM
)
5814 /* We don't need an intermediate for indexed and LO_SUM DLT
5815 memory addresses. When INT14_OK_STRICT is true, it might
5816 appear that we could directly allow register indirect
5817 memory addresses. However, this doesn't work because we
5818 don't support SUBREGs in floating-point register copies
5819 and reload doesn't tell us when it's going to use a SUBREG. */
5820 if (IS_INDEX_ADDR_P (x
)
5821 || IS_LO_SUM_DLT_ADDR_P (x
))
5824 /* Otherwise, we need an intermediate general register. */
5825 return GENERAL_REGS
;
5828 /* Request a secondary reload with a general scratch register
5829 for everthing else. ??? Could symbolic operands be handled
5830 directly when generating non-pic PA 2.0 code? */
5832 ? direct_optab_handler (reload_in_optab
, mode
)
5833 : direct_optab_handler (reload_out_optab
, mode
));
5837 /* We need a secondary register (GPR) for copies between the SAR
5838 and anything other than a general register. */
5839 if (rclass
== SHIFT_REGS
&& (regno
<= 0 || regno
>= 32))
5842 ? direct_optab_handler (reload_in_optab
, mode
)
5843 : direct_optab_handler (reload_out_optab
, mode
));
5847 /* A SAR<->FP register copy requires a secondary register (GPR) as
5848 well as secondary memory. */
5849 if (regno
>= 0 && regno
< FIRST_PSEUDO_REGISTER
5850 && (REGNO_REG_CLASS (regno
) == SHIFT_REGS
5851 && FP_REG_CLASS_P (rclass
)))
5854 ? direct_optab_handler (reload_in_optab
, mode
)
5855 : direct_optab_handler (reload_out_optab
, mode
));
5859 /* Secondary reloads of symbolic operands require %r1 as a scratch
5860 register when we're generating PIC code and when the operand isn't
5862 if (GET_CODE (x
) == HIGH
)
5865 /* Profiling has showed GCC spends about 2.6% of its compilation
5866 time in symbolic_operand from calls inside pa_secondary_reload_class.
5867 So, we use an inline copy to avoid useless work. */
5868 switch (GET_CODE (x
))
5873 is_symbolic
= !SYMBOL_REF_TLS_MODEL (x
);
5880 is_symbolic
= (GET_CODE (op
) == PLUS
5881 && ((GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
5882 && !SYMBOL_REF_TLS_MODEL (XEXP (op
, 0)))
5883 || GET_CODE (XEXP (op
, 0)) == LABEL_REF
)
5884 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
5891 if (is_symbolic
&& (flag_pic
|| !read_only_operand (x
, VOIDmode
)))
5893 gcc_assert (mode
== SImode
|| mode
== DImode
);
5894 sri
->icode
= (mode
== SImode
? CODE_FOR_reload_insi_r1
5895 : CODE_FOR_reload_indi_r1
);
5901 /* Implement TARGET_EXTRA_LIVE_ON_ENTRY. The argument pointer
5902 is only marked as live on entry by df-scan when it is a fixed
5903 register. It isn't a fixed register in the 64-bit runtime,
5904 so we need to mark it here. */
5907 pa_extra_live_on_entry (bitmap regs
)
5910 bitmap_set_bit (regs
, ARG_POINTER_REGNUM
);
5913 /* Implement EH_RETURN_HANDLER_RTX. The MEM needs to be volatile
5914 to prevent it from being deleted. */
5917 pa_eh_return_handler_rtx (void)
5921 tmp
= gen_rtx_PLUS (word_mode
, frame_pointer_rtx
,
5922 TARGET_64BIT
? GEN_INT (-16) : GEN_INT (-20));
5923 tmp
= gen_rtx_MEM (word_mode
, tmp
);
5928 /* In the 32-bit runtime, arguments larger than eight bytes are passed
5929 by invisible reference. As a GCC extension, we also pass anything
5930 with a zero or variable size by reference.
5932 The 64-bit runtime does not describe passing any types by invisible
5933 reference. The internals of GCC can't currently handle passing
5934 empty structures, and zero or variable length arrays when they are
5935 not passed entirely on the stack or by reference. Thus, as a GCC
5936 extension, we pass these types by reference. The HP compiler doesn't
5937 support these types, so hopefully there shouldn't be any compatibility
5938 issues. This may have to be revisited when HP releases a C99 compiler
5939 or updates the ABI. */
5942 pa_pass_by_reference (CUMULATIVE_ARGS
*ca ATTRIBUTE_UNUSED
,
5943 enum machine_mode mode
, const_tree type
,
5944 bool named ATTRIBUTE_UNUSED
)
5949 size
= int_size_in_bytes (type
);
5951 size
= GET_MODE_SIZE (mode
);
5956 return size
<= 0 || size
> 8;
5960 function_arg_padding (enum machine_mode mode
, const_tree type
)
5965 && (AGGREGATE_TYPE_P (type
)
5966 || TREE_CODE (type
) == COMPLEX_TYPE
5967 || TREE_CODE (type
) == VECTOR_TYPE
)))
5969 /* Return none if justification is not required. */
5971 && TREE_CODE (TYPE_SIZE (type
)) == INTEGER_CST
5972 && (int_size_in_bytes (type
) * BITS_PER_UNIT
) % PARM_BOUNDARY
== 0)
5975 /* The directions set here are ignored when a BLKmode argument larger
5976 than a word is placed in a register. Different code is used for
5977 the stack and registers. This makes it difficult to have a
5978 consistent data representation for both the stack and registers.
5979 For both runtimes, the justification and padding for arguments on
5980 the stack and in registers should be identical. */
5982 /* The 64-bit runtime specifies left justification for aggregates. */
5985 /* The 32-bit runtime architecture specifies right justification.
5986 When the argument is passed on the stack, the argument is padded
5987 with garbage on the left. The HP compiler pads with zeros. */
5991 if (GET_MODE_BITSIZE (mode
) < PARM_BOUNDARY
)
5998 /* Do what is necessary for `va_start'. We look at the current function
5999 to determine if stdargs or varargs is used and fill in an initial
6000 va_list. A pointer to this constructor is returned. */
6003 hppa_builtin_saveregs (void)
6006 tree fntype
= TREE_TYPE (current_function_decl
);
6007 int argadj
= ((!stdarg_p (fntype
))
6008 ? UNITS_PER_WORD
: 0);
6011 offset
= plus_constant (crtl
->args
.arg_offset_rtx
, argadj
);
6013 offset
= crtl
->args
.arg_offset_rtx
;
6019 /* Adjust for varargs/stdarg differences. */
6021 offset
= plus_constant (crtl
->args
.arg_offset_rtx
, -argadj
);
6023 offset
= crtl
->args
.arg_offset_rtx
;
6025 /* We need to save %r26 .. %r19 inclusive starting at offset -64
6026 from the incoming arg pointer and growing to larger addresses. */
6027 for (i
= 26, off
= -64; i
>= 19; i
--, off
+= 8)
6028 emit_move_insn (gen_rtx_MEM (word_mode
,
6029 plus_constant (arg_pointer_rtx
, off
)),
6030 gen_rtx_REG (word_mode
, i
));
6032 /* The incoming args pointer points just beyond the flushback area;
6033 normally this is not a serious concern. However, when we are doing
6034 varargs/stdargs we want to make the arg pointer point to the start
6035 of the incoming argument area. */
6036 emit_move_insn (virtual_incoming_args_rtx
,
6037 plus_constant (arg_pointer_rtx
, -64));
6039 /* Now return a pointer to the first anonymous argument. */
6040 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6041 virtual_incoming_args_rtx
,
6042 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6045 /* Store general registers on the stack. */
6046 dest
= gen_rtx_MEM (BLKmode
,
6047 plus_constant (crtl
->args
.internal_arg_pointer
,
6049 set_mem_alias_set (dest
, get_varargs_alias_set ());
6050 set_mem_align (dest
, BITS_PER_WORD
);
6051 move_block_from_reg (23, dest
, 4);
6053 /* move_block_from_reg will emit code to store the argument registers
6054 individually as scalar stores.
6056 However, other insns may later load from the same addresses for
6057 a structure load (passing a struct to a varargs routine).
6059 The alias code assumes that such aliasing can never happen, so we
6060 have to keep memory referencing insns from moving up beyond the
6061 last argument register store. So we emit a blockage insn here. */
6062 emit_insn (gen_blockage ());
6064 return copy_to_reg (expand_binop (Pmode
, add_optab
,
6065 crtl
->args
.internal_arg_pointer
,
6066 offset
, 0, 0, OPTAB_LIB_WIDEN
));
6070 hppa_va_start (tree valist
, rtx nextarg
)
6072 nextarg
= expand_builtin_saveregs ();
6073 std_expand_builtin_va_start (valist
, nextarg
);
6077 hppa_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6082 /* Args grow upward. We can use the generic routines. */
6083 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
6085 else /* !TARGET_64BIT */
6087 tree ptr
= build_pointer_type (type
);
6090 unsigned int size
, ofs
;
6093 indirect
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, 0);
6097 ptr
= build_pointer_type (type
);
6099 size
= int_size_in_bytes (type
);
6100 valist_type
= TREE_TYPE (valist
);
6102 /* Args grow down. Not handled by generic routines. */
6104 u
= fold_convert (sizetype
, size_in_bytes (type
));
6105 u
= fold_build1 (NEGATE_EXPR
, sizetype
, u
);
6106 t
= build2 (POINTER_PLUS_EXPR
, valist_type
, valist
, u
);
6108 /* Align to 4 or 8 byte boundary depending on argument size. */
6110 u
= build_int_cst (TREE_TYPE (t
), (HOST_WIDE_INT
)(size
> 4 ? -8 : -4));
6111 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
, u
);
6112 t
= fold_convert (valist_type
, t
);
6114 t
= build2 (MODIFY_EXPR
, valist_type
, valist
, t
);
6116 ofs
= (8 - size
) % 4;
6120 t
= build2 (POINTER_PLUS_EXPR
, valist_type
, t
, u
);
6123 t
= fold_convert (ptr
, t
);
6124 t
= build_va_arg_indirect_ref (t
);
6127 t
= build_va_arg_indirect_ref (t
);
6133 /* True if MODE is valid for the target. By "valid", we mean able to
6134 be manipulated in non-trivial ways. In particular, this means all
6135 the arithmetic is supported.
6137 Currently, TImode is not valid as the HP 64-bit runtime documentation
6138 doesn't document the alignment and calling conventions for this type.
6139 Thus, we return false when PRECISION is 2 * BITS_PER_WORD and
6140 2 * BITS_PER_WORD isn't equal LONG_LONG_TYPE_SIZE. */
6143 pa_scalar_mode_supported_p (enum machine_mode mode
)
6145 int precision
= GET_MODE_PRECISION (mode
);
6147 switch (GET_MODE_CLASS (mode
))
6149 case MODE_PARTIAL_INT
:
6151 if (precision
== CHAR_TYPE_SIZE
)
6153 if (precision
== SHORT_TYPE_SIZE
)
6155 if (precision
== INT_TYPE_SIZE
)
6157 if (precision
== LONG_TYPE_SIZE
)
6159 if (precision
== LONG_LONG_TYPE_SIZE
)
6164 if (precision
== FLOAT_TYPE_SIZE
)
6166 if (precision
== DOUBLE_TYPE_SIZE
)
6168 if (precision
== LONG_DOUBLE_TYPE_SIZE
)
6172 case MODE_DECIMAL_FLOAT
:
6180 /* Return TRUE if INSN, a jump insn, has an unfilled delay slot and
6181 it branches to the next real instruction. Otherwise, return FALSE. */
6184 branch_to_delay_slot_p (rtx insn
)
6186 if (dbr_sequence_length ())
6189 return next_real_insn (JUMP_LABEL (insn
)) == next_real_insn (insn
);
6192 /* Return TRUE if INSN, a jump insn, needs a nop in its delay slot.
6194 This occurs when INSN has an unfilled delay slot and is followed
6195 by an ASM_INPUT. Disaster can occur if the ASM_INPUT is empty and
6196 the jump branches into the delay slot. So, we add a nop in the delay
6197 slot just to be safe. This messes up our instruction count, but we
6198 don't know how big the ASM_INPUT insn is anyway. */
6201 branch_needs_nop_p (rtx insn
)
6205 if (dbr_sequence_length ())
6208 next_insn
= next_real_insn (insn
);
6209 return GET_CODE (PATTERN (next_insn
)) == ASM_INPUT
;
6212 /* This routine handles all the normal conditional branch sequences we
6213 might need to generate. It handles compare immediate vs compare
6214 register, nullification of delay slots, varying length branches,
6215 negated branches, and all combinations of the above. It returns the
6216 output appropriate to emit the branch corresponding to all given
6220 output_cbranch (rtx
*operands
, int negated
, rtx insn
)
6222 static char buf
[100];
6224 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6225 int length
= get_attr_length (insn
);
6228 /* A conditional branch to the following instruction (e.g. the delay slot)
6229 is asking for a disaster. This can happen when not optimizing and
6230 when jump optimization fails.
6232 While it is usually safe to emit nothing, this can fail if the
6233 preceding instruction is a nullified branch with an empty delay
6234 slot and the same branch target as this branch. We could check
6235 for this but jump optimization should eliminate nop jumps. It
6236 is always safe to emit a nop. */
6237 if (branch_to_delay_slot_p (insn
))
6240 /* The doubleword form of the cmpib instruction doesn't have the LEU
6241 and GTU conditions while the cmpb instruction does. Since we accept
6242 zero for cmpb, we must ensure that we use cmpb for the comparison. */
6243 if (GET_MODE (operands
[1]) == DImode
&& operands
[2] == const0_rtx
)
6244 operands
[2] = gen_rtx_REG (DImode
, 0);
6245 if (GET_MODE (operands
[2]) == DImode
&& operands
[1] == const0_rtx
)
6246 operands
[1] = gen_rtx_REG (DImode
, 0);
6248 /* If this is a long branch with its delay slot unfilled, set `nullify'
6249 as it can nullify the delay slot and save a nop. */
6250 if (length
== 8 && dbr_sequence_length () == 0)
6253 /* If this is a short forward conditional branch which did not get
6254 its delay slot filled, the delay slot can still be nullified. */
6255 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6256 nullify
= forward_branch_p (insn
);
6258 /* A forward branch over a single nullified insn can be done with a
6259 comclr instruction. This avoids a single cycle penalty due to
6260 mis-predicted branch if we fall through (branch not taken). */
6262 && next_real_insn (insn
) != 0
6263 && get_attr_length (next_real_insn (insn
)) == 4
6264 && JUMP_LABEL (insn
) == next_nonnote_insn (next_real_insn (insn
))
6270 /* All short conditional branches except backwards with an unfilled
6274 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6276 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6277 if (GET_MODE (operands
[1]) == DImode
)
6280 strcat (buf
, "%B3");
6282 strcat (buf
, "%S3");
6284 strcat (buf
, " %2,%r1,%%r0");
6287 if (branch_needs_nop_p (insn
))
6288 strcat (buf
, ",n %2,%r1,%0%#");
6290 strcat (buf
, ",n %2,%r1,%0");
6293 strcat (buf
, " %2,%r1,%0");
6296 /* All long conditionals. Note a short backward branch with an
6297 unfilled delay slot is treated just like a long backward branch
6298 with an unfilled delay slot. */
6300 /* Handle weird backwards branch with a filled delay slot
6301 which is nullified. */
6302 if (dbr_sequence_length () != 0
6303 && ! forward_branch_p (insn
)
6306 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6307 if (GET_MODE (operands
[1]) == DImode
)
6310 strcat (buf
, "%S3");
6312 strcat (buf
, "%B3");
6313 strcat (buf
, ",n %2,%r1,.+12\n\tb %0");
6315 /* Handle short backwards branch with an unfilled delay slot.
6316 Using a comb;nop rather than comiclr;bl saves 1 cycle for both
6317 taken and untaken branches. */
6318 else if (dbr_sequence_length () == 0
6319 && ! forward_branch_p (insn
)
6320 && INSN_ADDRESSES_SET_P ()
6321 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6322 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6324 strcpy (buf
, "{com%I2b,|cmp%I2b,}");
6325 if (GET_MODE (operands
[1]) == DImode
)
6328 strcat (buf
, "%B3 %2,%r1,%0%#");
6330 strcat (buf
, "%S3 %2,%r1,%0%#");
6334 strcpy (buf
, "{com%I2clr,|cmp%I2clr,}");
6335 if (GET_MODE (operands
[1]) == DImode
)
6338 strcat (buf
, "%S3");
6340 strcat (buf
, "%B3");
6342 strcat (buf
, " %2,%r1,%%r0\n\tb,n %0");
6344 strcat (buf
, " %2,%r1,%%r0\n\tb %0");
6349 /* The reversed conditional branch must branch over one additional
6350 instruction if the delay slot is filled and needs to be extracted
6351 by output_lbranch. If the delay slot is empty or this is a
6352 nullified forward branch, the instruction after the reversed
6353 condition branch must be nullified. */
6354 if (dbr_sequence_length () == 0
6355 || (nullify
&& forward_branch_p (insn
)))
6359 operands
[4] = GEN_INT (length
);
6364 operands
[4] = GEN_INT (length
+ 4);
6367 /* Create a reversed conditional branch which branches around
6368 the following insns. */
6369 if (GET_MODE (operands
[1]) != DImode
)
6375 "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
6378 "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
6384 "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
6387 "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
6396 "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
6399 "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
6405 "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
6408 "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
6412 output_asm_insn (buf
, operands
);
6413 return output_lbranch (operands
[0], insn
, xdelay
);
6418 /* This routine handles output of long unconditional branches that
6419 exceed the maximum range of a simple branch instruction. Since
6420 we don't have a register available for the branch, we save register
6421 %r1 in the frame marker, load the branch destination DEST into %r1,
6422 execute the branch, and restore %r1 in the delay slot of the branch.
6424 Since long branches may have an insn in the delay slot and the
6425 delay slot is used to restore %r1, we in general need to extract
6426 this insn and execute it before the branch. However, to facilitate
6427 use of this function by conditional branches, we also provide an
6428 option to not extract the delay insn so that it will be emitted
6429 after the long branch. So, if there is an insn in the delay slot,
6430 it is extracted if XDELAY is nonzero.
6432 The lengths of the various long-branch sequences are 20, 16 and 24
6433 bytes for the portable runtime, non-PIC and PIC cases, respectively. */
6436 output_lbranch (rtx dest
, rtx insn
, int xdelay
)
6440 xoperands
[0] = dest
;
6442 /* First, free up the delay slot. */
6443 if (xdelay
&& dbr_sequence_length () != 0)
6445 /* We can't handle a jump in the delay slot. */
6446 gcc_assert (GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
);
6448 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
6451 /* Now delete the delay insn. */
6452 SET_INSN_DELETED (NEXT_INSN (insn
));
6455 /* Output an insn to save %r1. The runtime documentation doesn't
6456 specify whether the "Clean Up" slot in the callers frame can
6457 be clobbered by the callee. It isn't copied by HP's builtin
6458 alloca, so this suggests that it can be clobbered if necessary.
6459 The "Static Link" location is copied by HP builtin alloca, so
6460 we avoid using it. Using the cleanup slot might be a problem
6461 if we have to interoperate with languages that pass cleanup
6462 information. However, it should be possible to handle these
6463 situations with GCC's asm feature.
6465 The "Current RP" slot is reserved for the called procedure, so
6466 we try to use it when we don't have a frame of our own. It's
6467 rather unlikely that we won't have a frame when we need to emit
6470 Really the way to go long term is a register scavenger; goto
6471 the target of the jump and find a register which we can use
6472 as a scratch to hold the value in %r1. Then, we wouldn't have
6473 to free up the delay slot or clobber a slot that may be needed
6474 for other purposes. */
6477 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6478 /* Use the return pointer slot in the frame marker. */
6479 output_asm_insn ("std %%r1,-16(%%r30)", xoperands
);
6481 /* Use the slot at -40 in the frame marker since HP builtin
6482 alloca doesn't copy it. */
6483 output_asm_insn ("std %%r1,-40(%%r30)", xoperands
);
6487 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6488 /* Use the return pointer slot in the frame marker. */
6489 output_asm_insn ("stw %%r1,-20(%%r30)", xoperands
);
6491 /* Use the "Clean Up" slot in the frame marker. In GCC,
6492 the only other use of this location is for copying a
6493 floating point double argument from a floating-point
6494 register to two general registers. The copy is done
6495 as an "atomic" operation when outputting a call, so it
6496 won't interfere with our using the location here. */
6497 output_asm_insn ("stw %%r1,-12(%%r30)", xoperands
);
6500 if (TARGET_PORTABLE_RUNTIME
)
6502 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
6503 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
6504 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6508 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
6509 if (TARGET_SOM
|| !TARGET_GAS
)
6511 xoperands
[1] = gen_label_rtx ();
6512 output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands
);
6513 targetm
.asm_out
.internal_label (asm_out_file
, "L",
6514 CODE_LABEL_NUMBER (xoperands
[1]));
6515 output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands
);
6519 output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands
);
6520 output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
6522 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
6525 /* Now output a very long branch to the original target. */
6526 output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands
);
6528 /* Now restore the value of %r1 in the delay slot. */
6531 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6532 return "ldd -16(%%r30),%%r1";
6534 return "ldd -40(%%r30),%%r1";
6538 if (actual_fsize
== 0 && !df_regs_ever_live_p (2))
6539 return "ldw -20(%%r30),%%r1";
6541 return "ldw -12(%%r30),%%r1";
6545 /* This routine handles all the branch-on-bit conditional branch sequences we
6546 might need to generate. It handles nullification of delay slots,
6547 varying length branches, negated branches and all combinations of the
6548 above. it returns the appropriate output template to emit the branch. */
6551 output_bb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
, int which
)
6553 static char buf
[100];
6555 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6556 int length
= get_attr_length (insn
);
6559 /* A conditional branch to the following instruction (e.g. the delay slot) is
6560 asking for a disaster. I do not think this can happen as this pattern
6561 is only used when optimizing; jump optimization should eliminate the
6562 jump. But be prepared just in case. */
6564 if (branch_to_delay_slot_p (insn
))
6567 /* If this is a long branch with its delay slot unfilled, set `nullify'
6568 as it can nullify the delay slot and save a nop. */
6569 if (length
== 8 && dbr_sequence_length () == 0)
6572 /* If this is a short forward conditional branch which did not get
6573 its delay slot filled, the delay slot can still be nullified. */
6574 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6575 nullify
= forward_branch_p (insn
);
6577 /* A forward branch over a single nullified insn can be done with a
6578 extrs instruction. This avoids a single cycle penalty due to
6579 mis-predicted branch if we fall through (branch not taken). */
6582 && next_real_insn (insn
) != 0
6583 && get_attr_length (next_real_insn (insn
)) == 4
6584 && JUMP_LABEL (insn
) == next_nonnote_insn (next_real_insn (insn
))
6591 /* All short conditional branches except backwards with an unfilled
6595 strcpy (buf
, "{extrs,|extrw,s,}");
6597 strcpy (buf
, "bb,");
6598 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6599 strcpy (buf
, "extrd,s,*");
6600 else if (GET_MODE (operands
[0]) == DImode
)
6601 strcpy (buf
, "bb,*");
6602 if ((which
== 0 && negated
)
6603 || (which
== 1 && ! negated
))
6608 strcat (buf
, " %0,%1,1,%%r0");
6609 else if (nullify
&& negated
)
6611 if (branch_needs_nop_p (insn
))
6612 strcat (buf
, ",n %0,%1,%3%#");
6614 strcat (buf
, ",n %0,%1,%3");
6616 else if (nullify
&& ! negated
)
6618 if (branch_needs_nop_p (insn
))
6619 strcat (buf
, ",n %0,%1,%2%#");
6621 strcat (buf
, ",n %0,%1,%2");
6623 else if (! nullify
&& negated
)
6624 strcat (buf
, " %0,%1,%3");
6625 else if (! nullify
&& ! negated
)
6626 strcat (buf
, " %0,%1,%2");
6629 /* All long conditionals. Note a short backward branch with an
6630 unfilled delay slot is treated just like a long backward branch
6631 with an unfilled delay slot. */
6633 /* Handle weird backwards branch with a filled delay slot
6634 which is nullified. */
6635 if (dbr_sequence_length () != 0
6636 && ! forward_branch_p (insn
)
6639 strcpy (buf
, "bb,");
6640 if (GET_MODE (operands
[0]) == DImode
)
6642 if ((which
== 0 && negated
)
6643 || (which
== 1 && ! negated
))
6648 strcat (buf
, ",n %0,%1,.+12\n\tb %3");
6650 strcat (buf
, ",n %0,%1,.+12\n\tb %2");
6652 /* Handle short backwards branch with an unfilled delay slot.
6653 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6654 taken and untaken branches. */
6655 else if (dbr_sequence_length () == 0
6656 && ! forward_branch_p (insn
)
6657 && INSN_ADDRESSES_SET_P ()
6658 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6659 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6661 strcpy (buf
, "bb,");
6662 if (GET_MODE (operands
[0]) == DImode
)
6664 if ((which
== 0 && negated
)
6665 || (which
== 1 && ! negated
))
6670 strcat (buf
, " %0,%1,%3%#");
6672 strcat (buf
, " %0,%1,%2%#");
6676 if (GET_MODE (operands
[0]) == DImode
)
6677 strcpy (buf
, "extrd,s,*");
6679 strcpy (buf
, "{extrs,|extrw,s,}");
6680 if ((which
== 0 && negated
)
6681 || (which
== 1 && ! negated
))
6685 if (nullify
&& negated
)
6686 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %3");
6687 else if (nullify
&& ! negated
)
6688 strcat (buf
, " %0,%1,1,%%r0\n\tb,n %2");
6690 strcat (buf
, " %0,%1,1,%%r0\n\tb %3");
6692 strcat (buf
, " %0,%1,1,%%r0\n\tb %2");
6697 /* The reversed conditional branch must branch over one additional
6698 instruction if the delay slot is filled and needs to be extracted
6699 by output_lbranch. If the delay slot is empty or this is a
6700 nullified forward branch, the instruction after the reversed
6701 condition branch must be nullified. */
6702 if (dbr_sequence_length () == 0
6703 || (nullify
&& forward_branch_p (insn
)))
6707 operands
[4] = GEN_INT (length
);
6712 operands
[4] = GEN_INT (length
+ 4);
6715 if (GET_MODE (operands
[0]) == DImode
)
6716 strcpy (buf
, "bb,*");
6718 strcpy (buf
, "bb,");
6719 if ((which
== 0 && negated
)
6720 || (which
== 1 && !negated
))
6725 strcat (buf
, ",n %0,%1,.+%4");
6727 strcat (buf
, " %0,%1,.+%4");
6728 output_asm_insn (buf
, operands
);
6729 return output_lbranch (negated
? operands
[3] : operands
[2],
6735 /* This routine handles all the branch-on-variable-bit conditional branch
6736 sequences we might need to generate. It handles nullification of delay
6737 slots, varying length branches, negated branches and all combinations
6738 of the above. it returns the appropriate output template to emit the
6742 output_bvb (rtx
*operands ATTRIBUTE_UNUSED
, int negated
, rtx insn
, int which
)
6744 static char buf
[100];
6746 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6747 int length
= get_attr_length (insn
);
6750 /* A conditional branch to the following instruction (e.g. the delay slot) is
6751 asking for a disaster. I do not think this can happen as this pattern
6752 is only used when optimizing; jump optimization should eliminate the
6753 jump. But be prepared just in case. */
6755 if (branch_to_delay_slot_p (insn
))
6758 /* If this is a long branch with its delay slot unfilled, set `nullify'
6759 as it can nullify the delay slot and save a nop. */
6760 if (length
== 8 && dbr_sequence_length () == 0)
6763 /* If this is a short forward conditional branch which did not get
6764 its delay slot filled, the delay slot can still be nullified. */
6765 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6766 nullify
= forward_branch_p (insn
);
6768 /* A forward branch over a single nullified insn can be done with a
6769 extrs instruction. This avoids a single cycle penalty due to
6770 mis-predicted branch if we fall through (branch not taken). */
6773 && next_real_insn (insn
) != 0
6774 && get_attr_length (next_real_insn (insn
)) == 4
6775 && JUMP_LABEL (insn
) == next_nonnote_insn (next_real_insn (insn
))
6782 /* All short conditional branches except backwards with an unfilled
6786 strcpy (buf
, "{vextrs,|extrw,s,}");
6788 strcpy (buf
, "{bvb,|bb,}");
6789 if (useskip
&& GET_MODE (operands
[0]) == DImode
)
6790 strcpy (buf
, "extrd,s,*");
6791 else if (GET_MODE (operands
[0]) == DImode
)
6792 strcpy (buf
, "bb,*");
6793 if ((which
== 0 && negated
)
6794 || (which
== 1 && ! negated
))
6799 strcat (buf
, "{ %0,1,%%r0| %0,%%sar,1,%%r0}");
6800 else if (nullify
&& negated
)
6802 if (branch_needs_nop_p (insn
))
6803 strcat (buf
, "{,n %0,%3%#|,n %0,%%sar,%3%#}");
6805 strcat (buf
, "{,n %0,%3|,n %0,%%sar,%3}");
6807 else if (nullify
&& ! negated
)
6809 if (branch_needs_nop_p (insn
))
6810 strcat (buf
, "{,n %0,%2%#|,n %0,%%sar,%2%#}");
6812 strcat (buf
, "{,n %0,%2|,n %0,%%sar,%2}");
6814 else if (! nullify
&& negated
)
6815 strcat (buf
, "{ %0,%3| %0,%%sar,%3}");
6816 else if (! nullify
&& ! negated
)
6817 strcat (buf
, "{ %0,%2| %0,%%sar,%2}");
6820 /* All long conditionals. Note a short backward branch with an
6821 unfilled delay slot is treated just like a long backward branch
6822 with an unfilled delay slot. */
6824 /* Handle weird backwards branch with a filled delay slot
6825 which is nullified. */
6826 if (dbr_sequence_length () != 0
6827 && ! forward_branch_p (insn
)
6830 strcpy (buf
, "{bvb,|bb,}");
6831 if (GET_MODE (operands
[0]) == DImode
)
6833 if ((which
== 0 && negated
)
6834 || (which
== 1 && ! negated
))
6839 strcat (buf
, "{,n %0,.+12\n\tb %3|,n %0,%%sar,.+12\n\tb %3}");
6841 strcat (buf
, "{,n %0,.+12\n\tb %2|,n %0,%%sar,.+12\n\tb %2}");
6843 /* Handle short backwards branch with an unfilled delay slot.
6844 Using a bb;nop rather than extrs;bl saves 1 cycle for both
6845 taken and untaken branches. */
6846 else if (dbr_sequence_length () == 0
6847 && ! forward_branch_p (insn
)
6848 && INSN_ADDRESSES_SET_P ()
6849 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6850 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6852 strcpy (buf
, "{bvb,|bb,}");
6853 if (GET_MODE (operands
[0]) == DImode
)
6855 if ((which
== 0 && negated
)
6856 || (which
== 1 && ! negated
))
6861 strcat (buf
, "{ %0,%3%#| %0,%%sar,%3%#}");
6863 strcat (buf
, "{ %0,%2%#| %0,%%sar,%2%#}");
6867 strcpy (buf
, "{vextrs,|extrw,s,}");
6868 if (GET_MODE (operands
[0]) == DImode
)
6869 strcpy (buf
, "extrd,s,*");
6870 if ((which
== 0 && negated
)
6871 || (which
== 1 && ! negated
))
6875 if (nullify
&& negated
)
6876 strcat (buf
, "{ %0,1,%%r0\n\tb,n %3| %0,%%sar,1,%%r0\n\tb,n %3}");
6877 else if (nullify
&& ! negated
)
6878 strcat (buf
, "{ %0,1,%%r0\n\tb,n %2| %0,%%sar,1,%%r0\n\tb,n %2}");
6880 strcat (buf
, "{ %0,1,%%r0\n\tb %3| %0,%%sar,1,%%r0\n\tb %3}");
6882 strcat (buf
, "{ %0,1,%%r0\n\tb %2| %0,%%sar,1,%%r0\n\tb %2}");
6887 /* The reversed conditional branch must branch over one additional
6888 instruction if the delay slot is filled and needs to be extracted
6889 by output_lbranch. If the delay slot is empty or this is a
6890 nullified forward branch, the instruction after the reversed
6891 condition branch must be nullified. */
6892 if (dbr_sequence_length () == 0
6893 || (nullify
&& forward_branch_p (insn
)))
6897 operands
[4] = GEN_INT (length
);
6902 operands
[4] = GEN_INT (length
+ 4);
6905 if (GET_MODE (operands
[0]) == DImode
)
6906 strcpy (buf
, "bb,*");
6908 strcpy (buf
, "{bvb,|bb,}");
6909 if ((which
== 0 && negated
)
6910 || (which
== 1 && !negated
))
6915 strcat (buf
, ",n {%0,.+%4|%0,%%sar,.+%4}");
6917 strcat (buf
, " {%0,.+%4|%0,%%sar,.+%4}");
6918 output_asm_insn (buf
, operands
);
6919 return output_lbranch (negated
? operands
[3] : operands
[2],
6925 /* Return the output template for emitting a dbra type insn.
6927 Note it may perform some output operations on its own before
6928 returning the final output string. */
6930 output_dbra (rtx
*operands
, rtx insn
, int which_alternative
)
6932 int length
= get_attr_length (insn
);
6934 /* A conditional branch to the following instruction (e.g. the delay slot) is
6935 asking for a disaster. Be prepared! */
6937 if (branch_to_delay_slot_p (insn
))
6939 if (which_alternative
== 0)
6940 return "ldo %1(%0),%0";
6941 else if (which_alternative
== 1)
6943 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands
);
6944 output_asm_insn ("ldw -16(%%r30),%4", operands
);
6945 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
6946 return "{fldws|fldw} -16(%%r30),%0";
6950 output_asm_insn ("ldw %0,%4", operands
);
6951 return "ldo %1(%4),%4\n\tstw %4,%0";
6955 if (which_alternative
== 0)
6957 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
6960 /* If this is a long branch with its delay slot unfilled, set `nullify'
6961 as it can nullify the delay slot and save a nop. */
6962 if (length
== 8 && dbr_sequence_length () == 0)
6965 /* If this is a short forward conditional branch which did not get
6966 its delay slot filled, the delay slot can still be nullified. */
6967 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
6968 nullify
= forward_branch_p (insn
);
6975 if (branch_needs_nop_p (insn
))
6976 return "addib,%C2,n %1,%0,%3%#";
6978 return "addib,%C2,n %1,%0,%3";
6981 return "addib,%C2 %1,%0,%3";
6984 /* Handle weird backwards branch with a fulled delay slot
6985 which is nullified. */
6986 if (dbr_sequence_length () != 0
6987 && ! forward_branch_p (insn
)
6989 return "addib,%N2,n %1,%0,.+12\n\tb %3";
6990 /* Handle short backwards branch with an unfilled delay slot.
6991 Using a addb;nop rather than addi;bl saves 1 cycle for both
6992 taken and untaken branches. */
6993 else if (dbr_sequence_length () == 0
6994 && ! forward_branch_p (insn
)
6995 && INSN_ADDRESSES_SET_P ()
6996 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
6997 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
6998 return "addib,%C2 %1,%0,%3%#";
7000 /* Handle normal cases. */
7002 return "addi,%N2 %1,%0,%0\n\tb,n %3";
7004 return "addi,%N2 %1,%0,%0\n\tb %3";
7007 /* The reversed conditional branch must branch over one additional
7008 instruction if the delay slot is filled and needs to be extracted
7009 by output_lbranch. If the delay slot is empty or this is a
7010 nullified forward branch, the instruction after the reversed
7011 condition branch must be nullified. */
7012 if (dbr_sequence_length () == 0
7013 || (nullify
&& forward_branch_p (insn
)))
7017 operands
[4] = GEN_INT (length
);
7022 operands
[4] = GEN_INT (length
+ 4);
7026 output_asm_insn ("addib,%N2,n %1,%0,.+%4", operands
);
7028 output_asm_insn ("addib,%N2 %1,%0,.+%4", operands
);
7030 return output_lbranch (operands
[3], insn
, xdelay
);
7034 /* Deal with gross reload from FP register case. */
7035 else if (which_alternative
== 1)
7037 /* Move loop counter from FP register to MEM then into a GR,
7038 increment the GR, store the GR into MEM, and finally reload
7039 the FP register from MEM from within the branch's delay slot. */
7040 output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
7042 output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands
);
7044 return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
7045 else if (length
== 28)
7046 return "{comclr|cmpclr},%B2 %%r0,%4,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7049 operands
[5] = GEN_INT (length
- 16);
7050 output_asm_insn ("{comb|cmpb},%B2 %%r0,%4,.+%5", operands
);
7051 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7052 return output_lbranch (operands
[3], insn
, 0);
7055 /* Deal with gross reload from memory case. */
7058 /* Reload loop counter from memory, the store back to memory
7059 happens in the branch's delay slot. */
7060 output_asm_insn ("ldw %0,%4", operands
);
7062 return "addib,%C2 %1,%4,%3\n\tstw %4,%0";
7063 else if (length
== 16)
7064 return "addi,%N2 %1,%4,%4\n\tb %3\n\tstw %4,%0";
7067 operands
[5] = GEN_INT (length
- 4);
7068 output_asm_insn ("addib,%N2 %1,%4,.+%5\n\tstw %4,%0", operands
);
7069 return output_lbranch (operands
[3], insn
, 0);
7074 /* Return the output template for emitting a movb type insn.
7076 Note it may perform some output operations on its own before
7077 returning the final output string. */
7079 output_movb (rtx
*operands
, rtx insn
, int which_alternative
,
7080 int reverse_comparison
)
7082 int length
= get_attr_length (insn
);
7084 /* A conditional branch to the following instruction (e.g. the delay slot) is
7085 asking for a disaster. Be prepared! */
7087 if (branch_to_delay_slot_p (insn
))
7089 if (which_alternative
== 0)
7090 return "copy %1,%0";
7091 else if (which_alternative
== 1)
7093 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7094 return "{fldws|fldw} -16(%%r30),%0";
7096 else if (which_alternative
== 2)
7102 /* Support the second variant. */
7103 if (reverse_comparison
)
7104 PUT_CODE (operands
[2], reverse_condition (GET_CODE (operands
[2])));
7106 if (which_alternative
== 0)
7108 int nullify
= INSN_ANNULLED_BRANCH_P (insn
);
7111 /* If this is a long branch with its delay slot unfilled, set `nullify'
7112 as it can nullify the delay slot and save a nop. */
7113 if (length
== 8 && dbr_sequence_length () == 0)
7116 /* If this is a short forward conditional branch which did not get
7117 its delay slot filled, the delay slot can still be nullified. */
7118 if (! nullify
&& length
== 4 && dbr_sequence_length () == 0)
7119 nullify
= forward_branch_p (insn
);
7126 if (branch_needs_nop_p (insn
))
7127 return "movb,%C2,n %1,%0,%3%#";
7129 return "movb,%C2,n %1,%0,%3";
7132 return "movb,%C2 %1,%0,%3";
7135 /* Handle weird backwards branch with a filled delay slot
7136 which is nullified. */
7137 if (dbr_sequence_length () != 0
7138 && ! forward_branch_p (insn
)
7140 return "movb,%N2,n %1,%0,.+12\n\tb %3";
7142 /* Handle short backwards branch with an unfilled delay slot.
7143 Using a movb;nop rather than or;bl saves 1 cycle for both
7144 taken and untaken branches. */
7145 else if (dbr_sequence_length () == 0
7146 && ! forward_branch_p (insn
)
7147 && INSN_ADDRESSES_SET_P ()
7148 && VAL_14_BITS_P (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (insn
)))
7149 - INSN_ADDRESSES (INSN_UID (insn
)) - 8))
7150 return "movb,%C2 %1,%0,%3%#";
7151 /* Handle normal cases. */
7153 return "or,%N2 %1,%%r0,%0\n\tb,n %3";
7155 return "or,%N2 %1,%%r0,%0\n\tb %3";
7158 /* The reversed conditional branch must branch over one additional
7159 instruction if the delay slot is filled and needs to be extracted
7160 by output_lbranch. If the delay slot is empty or this is a
7161 nullified forward branch, the instruction after the reversed
7162 condition branch must be nullified. */
7163 if (dbr_sequence_length () == 0
7164 || (nullify
&& forward_branch_p (insn
)))
7168 operands
[4] = GEN_INT (length
);
7173 operands
[4] = GEN_INT (length
+ 4);
7177 output_asm_insn ("movb,%N2,n %1,%0,.+%4", operands
);
7179 output_asm_insn ("movb,%N2 %1,%0,.+%4", operands
);
7181 return output_lbranch (operands
[3], insn
, xdelay
);
7184 /* Deal with gross reload for FP destination register case. */
7185 else if (which_alternative
== 1)
7187 /* Move source register to MEM, perform the branch test, then
7188 finally load the FP register from MEM from within the branch's
7190 output_asm_insn ("stw %1,-16(%%r30)", operands
);
7192 return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
7193 else if (length
== 16)
7194 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\t{fldws|fldw} -16(%%r30),%0";
7197 operands
[4] = GEN_INT (length
- 4);
7198 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4", operands
);
7199 output_asm_insn ("{fldws|fldw} -16(%%r30),%0", operands
);
7200 return output_lbranch (operands
[3], insn
, 0);
7203 /* Deal with gross reload from memory case. */
7204 else if (which_alternative
== 2)
7206 /* Reload loop counter from memory, the store back to memory
7207 happens in the branch's delay slot. */
7209 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tstw %1,%0";
7210 else if (length
== 12)
7211 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tstw %1,%0";
7214 operands
[4] = GEN_INT (length
);
7215 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tstw %1,%0",
7217 return output_lbranch (operands
[3], insn
, 0);
7220 /* Handle SAR as a destination. */
7224 return "{comb|cmpb},%S2 %%r0,%1,%3\n\tmtsar %r1";
7225 else if (length
== 12)
7226 return "{comclr|cmpclr},%B2 %%r0,%1,%%r0\n\tb %3\n\tmtsar %r1";
7229 operands
[4] = GEN_INT (length
);
7230 output_asm_insn ("{comb|cmpb},%B2 %%r0,%1,.+%4\n\tmtsar %r1",
7232 return output_lbranch (operands
[3], insn
, 0);
7237 /* Copy any FP arguments in INSN into integer registers. */
7239 copy_fp_args (rtx insn
)
7244 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7246 int arg_mode
, regno
;
7247 rtx use
= XEXP (link
, 0);
7249 if (! (GET_CODE (use
) == USE
7250 && GET_CODE (XEXP (use
, 0)) == REG
7251 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7254 arg_mode
= GET_MODE (XEXP (use
, 0));
7255 regno
= REGNO (XEXP (use
, 0));
7257 /* Is it a floating point register? */
7258 if (regno
>= 32 && regno
<= 39)
7260 /* Copy the FP register into an integer register via memory. */
7261 if (arg_mode
== SFmode
)
7263 xoperands
[0] = XEXP (use
, 0);
7264 xoperands
[1] = gen_rtx_REG (SImode
, 26 - (regno
- 32) / 2);
7265 output_asm_insn ("{fstws|fstw} %0,-16(%%sr0,%%r30)", xoperands
);
7266 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7270 xoperands
[0] = XEXP (use
, 0);
7271 xoperands
[1] = gen_rtx_REG (DImode
, 25 - (regno
- 34) / 2);
7272 output_asm_insn ("{fstds|fstd} %0,-16(%%sr0,%%r30)", xoperands
);
7273 output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands
);
7274 output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands
);
7280 /* Compute length of the FP argument copy sequence for INSN. */
7282 length_fp_args (rtx insn
)
7287 for (link
= CALL_INSN_FUNCTION_USAGE (insn
); link
; link
= XEXP (link
, 1))
7289 int arg_mode
, regno
;
7290 rtx use
= XEXP (link
, 0);
7292 if (! (GET_CODE (use
) == USE
7293 && GET_CODE (XEXP (use
, 0)) == REG
7294 && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use
, 0)))))
7297 arg_mode
= GET_MODE (XEXP (use
, 0));
7298 regno
= REGNO (XEXP (use
, 0));
7300 /* Is it a floating point register? */
7301 if (regno
>= 32 && regno
<= 39)
7303 if (arg_mode
== SFmode
)
7313 /* Return the attribute length for the millicode call instruction INSN.
7314 The length must match the code generated by output_millicode_call.
7315 We include the delay slot in the returned length as it is better to
7316 over estimate the length than to under estimate it. */
7319 attr_length_millicode_call (rtx insn
)
7321 unsigned long distance
= -1;
7322 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7324 if (INSN_ADDRESSES_SET_P ())
7326 distance
= (total
+ insn_current_reference_address (insn
));
7327 if (distance
< total
)
7333 if (!TARGET_LONG_CALLS
&& distance
< 7600000)
7338 else if (TARGET_PORTABLE_RUNTIME
)
7342 if (!TARGET_LONG_CALLS
&& distance
< 240000)
7345 if (TARGET_LONG_ABS_CALL
&& !flag_pic
)
7352 /* INSN is a function call. It may have an unconditional jump
7355 CALL_DEST is the routine we are calling. */
7358 output_millicode_call (rtx insn
, rtx call_dest
)
7360 int attr_length
= get_attr_length (insn
);
7361 int seq_length
= dbr_sequence_length ();
7366 xoperands
[0] = call_dest
;
7367 xoperands
[2] = gen_rtx_REG (Pmode
, TARGET_64BIT
? 2 : 31);
7369 /* Handle the common case where we are sure that the branch will
7370 reach the beginning of the $CODE$ subspace. The within reach
7371 form of the $$sh_func_adrs call has a length of 28. Because
7372 it has an attribute type of multi, it never has a nonzero
7373 sequence length. The length of the $$sh_func_adrs is the same
7374 as certain out of reach PIC calls to other routines. */
7375 if (!TARGET_LONG_CALLS
7376 && ((seq_length
== 0
7377 && (attr_length
== 12
7378 || (attr_length
== 28 && get_attr_type (insn
) == TYPE_MULTI
)))
7379 || (seq_length
!= 0 && attr_length
== 8)))
7381 output_asm_insn ("{bl|b,l} %0,%2", xoperands
);
7387 /* It might seem that one insn could be saved by accessing
7388 the millicode function using the linkage table. However,
7389 this doesn't work in shared libraries and other dynamically
7390 loaded objects. Using a pc-relative sequence also avoids
7391 problems related to the implicit use of the gp register. */
7392 output_asm_insn ("b,l .+8,%%r1", xoperands
);
7396 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
7397 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
7401 xoperands
[1] = gen_label_rtx ();
7402 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7403 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7404 CODE_LABEL_NUMBER (xoperands
[1]));
7405 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7408 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7410 else if (TARGET_PORTABLE_RUNTIME
)
7412 /* Pure portable runtime doesn't allow be/ble; we also don't
7413 have PIC support in the assembler/linker, so this sequence
7416 /* Get the address of our target into %r1. */
7417 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7418 output_asm_insn ("ldo R'%0(%%r1),%%r1", xoperands
);
7420 /* Get our return address into %r31. */
7421 output_asm_insn ("{bl|b,l} .+8,%%r31", xoperands
);
7422 output_asm_insn ("addi 8,%%r31,%%r31", xoperands
);
7424 /* Jump to our target address in %r1. */
7425 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7429 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7431 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31", xoperands
);
7433 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7437 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7438 output_asm_insn ("addi 16,%%r1,%%r31", xoperands
);
7440 if (TARGET_SOM
|| !TARGET_GAS
)
7442 /* The HP assembler can generate relocations for the
7443 difference of two symbols. GAS can do this for a
7444 millicode symbol but not an arbitrary external
7445 symbol when generating SOM output. */
7446 xoperands
[1] = gen_label_rtx ();
7447 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7448 CODE_LABEL_NUMBER (xoperands
[1]));
7449 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7450 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7454 output_asm_insn ("addil L'%0-$PIC_pcrel$0+8,%%r1", xoperands
);
7455 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+12(%%r1),%%r1",
7459 /* Jump to our target address in %r1. */
7460 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
7464 if (seq_length
== 0)
7465 output_asm_insn ("nop", xoperands
);
7467 /* We are done if there isn't a jump in the delay slot. */
7468 if (seq_length
== 0 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
7471 /* This call has an unconditional jump in its delay slot. */
7472 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
7474 /* See if the return address can be adjusted. Use the containing
7475 sequence insn's address. */
7476 if (INSN_ADDRESSES_SET_P ())
7478 seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
7479 distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
7480 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
7482 if (VAL_14_BITS_P (distance
))
7484 xoperands
[1] = gen_label_rtx ();
7485 output_asm_insn ("ldo %0-%1(%2),%2", xoperands
);
7486 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7487 CODE_LABEL_NUMBER (xoperands
[1]));
7490 /* ??? This branch may not reach its target. */
7491 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7494 /* ??? This branch may not reach its target. */
7495 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7497 /* Delete the jump. */
7498 SET_INSN_DELETED (NEXT_INSN (insn
));
7503 /* Return the attribute length of the call instruction INSN. The SIBCALL
7504 flag indicates whether INSN is a regular call or a sibling call. The
7505 length returned must be longer than the code actually generated by
7506 output_call. Since branch shortening is done before delay branch
7507 sequencing, there is no way to determine whether or not the delay
7508 slot will be filled during branch shortening. Even when the delay
7509 slot is filled, we may have to add a nop if the delay slot contains
7510 a branch that can't reach its target. Thus, we always have to include
7511 the delay slot in the length estimate. This used to be done in
7512 pa_adjust_insn_length but we do it here now as some sequences always
7513 fill the delay slot and we can save four bytes in the estimate for
7517 attr_length_call (rtx insn
, int sibcall
)
7520 rtx call
, call_dest
;
7523 rtx pat
= PATTERN (insn
);
7524 unsigned long distance
= -1;
7526 gcc_assert (GET_CODE (insn
) == CALL_INSN
);
7528 if (INSN_ADDRESSES_SET_P ())
7530 unsigned long total
;
7532 total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7533 distance
= (total
+ insn_current_reference_address (insn
));
7534 if (distance
< total
)
7538 gcc_assert (GET_CODE (pat
) == PARALLEL
);
7540 /* Get the call rtx. */
7541 call
= XVECEXP (pat
, 0, 0);
7542 if (GET_CODE (call
) == SET
)
7543 call
= SET_SRC (call
);
7545 gcc_assert (GET_CODE (call
) == CALL
);
7547 /* Determine if this is a local call. */
7548 call_dest
= XEXP (XEXP (call
, 0), 0);
7549 call_decl
= SYMBOL_REF_DECL (call_dest
);
7550 local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7552 /* pc-relative branch. */
7553 if (!TARGET_LONG_CALLS
7554 && ((TARGET_PA_20
&& !sibcall
&& distance
< 7600000)
7555 || distance
< 240000))
7558 /* 64-bit plabel sequence. */
7559 else if (TARGET_64BIT
&& !local_call
)
7560 length
+= sibcall
? 28 : 24;
7562 /* non-pic long absolute branch sequence. */
7563 else if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7566 /* long pc-relative branch sequence. */
7567 else if (TARGET_LONG_PIC_SDIFF_CALL
7568 || (TARGET_GAS
&& !TARGET_SOM
7569 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
)))
7573 if (!TARGET_PA_20
&& !TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7577 /* 32-bit plabel sequence. */
7583 length
+= length_fp_args (insn
);
7593 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7601 /* INSN is a function call. It may have an unconditional jump
7604 CALL_DEST is the routine we are calling. */
7607 output_call (rtx insn
, rtx call_dest
, int sibcall
)
7609 int delay_insn_deleted
= 0;
7610 int delay_slot_filled
= 0;
7611 int seq_length
= dbr_sequence_length ();
7612 tree call_decl
= SYMBOL_REF_DECL (call_dest
);
7613 int local_call
= call_decl
&& targetm
.binds_local_p (call_decl
);
7616 xoperands
[0] = call_dest
;
7618 /* Handle the common case where we're sure that the branch will reach
7619 the beginning of the "$CODE$" subspace. This is the beginning of
7620 the current function if we are in a named section. */
7621 if (!TARGET_LONG_CALLS
&& attr_length_call (insn
, sibcall
) == 8)
7623 xoperands
[1] = gen_rtx_REG (word_mode
, sibcall
? 0 : 2);
7624 output_asm_insn ("{bl|b,l} %0,%1", xoperands
);
7628 if (TARGET_64BIT
&& !local_call
)
7630 /* ??? As far as I can tell, the HP linker doesn't support the
7631 long pc-relative sequence described in the 64-bit runtime
7632 architecture. So, we use a slightly longer indirect call. */
7633 xoperands
[0] = get_deferred_plabel (call_dest
);
7634 xoperands
[1] = gen_label_rtx ();
7636 /* If this isn't a sibcall, we put the load of %r27 into the
7637 delay slot. We can't do this in a sibcall as we don't
7638 have a second call-clobbered scratch register available. */
7640 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7643 final_scan_insn (NEXT_INSN (insn
), asm_out_file
,
7646 /* Now delete the delay insn. */
7647 SET_INSN_DELETED (NEXT_INSN (insn
));
7648 delay_insn_deleted
= 1;
7651 output_asm_insn ("addil LT'%0,%%r27", xoperands
);
7652 output_asm_insn ("ldd RT'%0(%%r1),%%r1", xoperands
);
7653 output_asm_insn ("ldd 0(%%r1),%%r1", xoperands
);
7657 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7658 output_asm_insn ("ldd 16(%%r1),%%r1", xoperands
);
7659 output_asm_insn ("bve (%%r1)", xoperands
);
7663 output_asm_insn ("ldd 16(%%r1),%%r2", xoperands
);
7664 output_asm_insn ("bve,l (%%r2),%%r2", xoperands
);
7665 output_asm_insn ("ldd 24(%%r1),%%r27", xoperands
);
7666 delay_slot_filled
= 1;
7671 int indirect_call
= 0;
7673 /* Emit a long call. There are several different sequences
7674 of increasing length and complexity. In most cases,
7675 they don't allow an instruction in the delay slot. */
7676 if (!((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7677 && !TARGET_LONG_PIC_SDIFF_CALL
7678 && !(TARGET_GAS
&& !TARGET_SOM
7679 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7684 && GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
7688 || ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)))
7690 /* A non-jump insn in the delay slot. By definition we can
7691 emit this insn before the call (and in fact before argument
7693 final_scan_insn (NEXT_INSN (insn
), asm_out_file
, optimize
, 0,
7696 /* Now delete the delay insn. */
7697 SET_INSN_DELETED (NEXT_INSN (insn
));
7698 delay_insn_deleted
= 1;
7701 if ((TARGET_LONG_ABS_CALL
|| local_call
) && !flag_pic
)
7703 /* This is the best sequence for making long calls in
7704 non-pic code. Unfortunately, GNU ld doesn't provide
7705 the stub needed for external calls, and GAS's support
7706 for this with the SOM linker is buggy. It is safe
7707 to use this for local calls. */
7708 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
7710 output_asm_insn ("be R'%0(%%sr4,%%r1)", xoperands
);
7714 output_asm_insn ("be,l R'%0(%%sr4,%%r1),%%sr0,%%r31",
7717 output_asm_insn ("ble R'%0(%%sr4,%%r1)", xoperands
);
7719 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7720 delay_slot_filled
= 1;
7725 if (TARGET_LONG_PIC_SDIFF_CALL
)
7727 /* The HP assembler and linker can handle relocations
7728 for the difference of two symbols. The HP assembler
7729 recognizes the sequence as a pc-relative call and
7730 the linker provides stubs when needed. */
7731 xoperands
[1] = gen_label_rtx ();
7732 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7733 output_asm_insn ("addil L'%0-%l1,%%r1", xoperands
);
7734 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7735 CODE_LABEL_NUMBER (xoperands
[1]));
7736 output_asm_insn ("ldo R'%0-%l1(%%r1),%%r1", xoperands
);
7738 else if (TARGET_GAS
&& !TARGET_SOM
7739 && (TARGET_LONG_PIC_PCREL_CALL
|| local_call
))
7741 /* GAS currently can't generate the relocations that
7742 are needed for the SOM linker under HP-UX using this
7743 sequence. The GNU linker doesn't generate the stubs
7744 that are needed for external calls on TARGET_ELF32
7745 with this sequence. For now, we have to use a
7746 longer plabel sequence when using GAS. */
7747 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7748 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1",
7750 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1",
7755 /* Emit a long plabel-based call sequence. This is
7756 essentially an inline implementation of $$dyncall.
7757 We don't actually try to call $$dyncall as this is
7758 as difficult as calling the function itself. */
7759 xoperands
[0] = get_deferred_plabel (call_dest
);
7760 xoperands
[1] = gen_label_rtx ();
7762 /* Since the call is indirect, FP arguments in registers
7763 need to be copied to the general registers. Then, the
7764 argument relocation stub will copy them back. */
7766 copy_fp_args (insn
);
7770 output_asm_insn ("addil LT'%0,%%r19", xoperands
);
7771 output_asm_insn ("ldw RT'%0(%%r1),%%r1", xoperands
);
7772 output_asm_insn ("ldw 0(%%r1),%%r1", xoperands
);
7776 output_asm_insn ("addil LR'%0-$global$,%%r27",
7778 output_asm_insn ("ldw RR'%0-$global$(%%r1),%%r1",
7782 output_asm_insn ("bb,>=,n %%r1,30,.+16", xoperands
);
7783 output_asm_insn ("depi 0,31,2,%%r1", xoperands
);
7784 output_asm_insn ("ldw 4(%%sr0,%%r1),%%r19", xoperands
);
7785 output_asm_insn ("ldw 0(%%sr0,%%r1),%%r1", xoperands
);
7787 if (!sibcall
&& !TARGET_PA_20
)
7789 output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands
);
7790 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7791 output_asm_insn ("addi 8,%%r2,%%r2", xoperands
);
7793 output_asm_insn ("addi 16,%%r2,%%r2", xoperands
);
7800 output_asm_insn ("bve (%%r1)", xoperands
);
7805 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7806 output_asm_insn ("stw %%r2,-24(%%sp)", xoperands
);
7807 delay_slot_filled
= 1;
7810 output_asm_insn ("bve,l (%%r1),%%r2", xoperands
);
7815 if (!TARGET_NO_SPACE_REGS
&& (!local_call
|| flag_pic
))
7816 output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
7821 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7822 output_asm_insn ("be 0(%%sr4,%%r1)", xoperands
);
7824 output_asm_insn ("be 0(%%sr0,%%r1)", xoperands
);
7828 if (TARGET_NO_SPACE_REGS
|| (local_call
&& !flag_pic
))
7829 output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands
);
7831 output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands
);
7834 output_asm_insn ("stw %%r31,-24(%%sp)", xoperands
);
7836 output_asm_insn ("copy %%r31,%%r2", xoperands
);
7837 delay_slot_filled
= 1;
7844 if (!delay_slot_filled
&& (seq_length
== 0 || delay_insn_deleted
))
7845 output_asm_insn ("nop", xoperands
);
7847 /* We are done if there isn't a jump in the delay slot. */
7849 || delay_insn_deleted
7850 || GET_CODE (NEXT_INSN (insn
)) != JUMP_INSN
)
7853 /* A sibcall should never have a branch in the delay slot. */
7854 gcc_assert (!sibcall
);
7856 /* This call has an unconditional jump in its delay slot. */
7857 xoperands
[0] = XEXP (PATTERN (NEXT_INSN (insn
)), 1);
7859 if (!delay_slot_filled
&& INSN_ADDRESSES_SET_P ())
7861 /* See if the return address can be adjusted. Use the containing
7862 sequence insn's address. This would break the regular call/return@
7863 relationship assumed by the table based eh unwinder, so only do that
7864 if the call is not possibly throwing. */
7865 rtx seq_insn
= NEXT_INSN (PREV_INSN (XVECEXP (final_sequence
, 0, 0)));
7866 int distance
= (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn
))))
7867 - INSN_ADDRESSES (INSN_UID (seq_insn
)) - 8);
7869 if (VAL_14_BITS_P (distance
)
7870 && !(can_throw_internal (insn
) || can_throw_external (insn
)))
7872 xoperands
[1] = gen_label_rtx ();
7873 output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands
);
7874 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7875 CODE_LABEL_NUMBER (xoperands
[1]));
7878 output_asm_insn ("nop\n\tb,n %0", xoperands
);
7881 output_asm_insn ("b,n %0", xoperands
);
7883 /* Delete the jump. */
7884 SET_INSN_DELETED (NEXT_INSN (insn
));
7889 /* Return the attribute length of the indirect call instruction INSN.
7890 The length must match the code generated by output_indirect call.
7891 The returned length includes the delay slot. Currently, the delay
7892 slot of an indirect call sequence is not exposed and it is used by
7893 the sequence itself. */
7896 attr_length_indirect_call (rtx insn
)
7898 unsigned long distance
= -1;
7899 unsigned long total
= IN_NAMED_SECTION_P (cfun
->decl
) ? 0 : total_code_bytes
;
7901 if (INSN_ADDRESSES_SET_P ())
7903 distance
= (total
+ insn_current_reference_address (insn
));
7904 if (distance
< total
)
7911 if (TARGET_FAST_INDIRECT_CALLS
7912 || (!TARGET_PORTABLE_RUNTIME
7913 && ((TARGET_PA_20
&& !TARGET_SOM
&& distance
< 7600000)
7914 || distance
< 240000)))
7920 if (TARGET_PORTABLE_RUNTIME
)
7923 /* Out of reach, can use ble. */
7928 output_indirect_call (rtx insn
, rtx call_dest
)
7934 xoperands
[0] = call_dest
;
7935 output_asm_insn ("ldd 16(%0),%%r2", xoperands
);
7936 output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands
);
7940 /* First the special case for kernels, level 0 systems, etc. */
7941 if (TARGET_FAST_INDIRECT_CALLS
)
7942 return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
7944 /* Now the normal case -- we can reach $$dyncall directly or
7945 we're sure that we can get there via a long-branch stub.
7947 No need to check target flags as the length uniquely identifies
7948 the remaining cases. */
7949 if (attr_length_indirect_call (insn
) == 8)
7951 /* The HP linker sometimes substitutes a BLE for BL/B,L calls to
7952 $$dyncall. Since BLE uses %r31 as the link register, the 22-bit
7953 variant of the B,L instruction can't be used on the SOM target. */
7954 if (TARGET_PA_20
&& !TARGET_SOM
)
7955 return ".CALL\tARGW0=GR\n\tb,l $$dyncall,%%r2\n\tcopy %%r2,%%r31";
7957 return ".CALL\tARGW0=GR\n\tbl $$dyncall,%%r31\n\tcopy %%r31,%%r2";
7960 /* Long millicode call, but we are not generating PIC or portable runtime
7962 if (attr_length_indirect_call (insn
) == 12)
7963 return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
7965 /* Long millicode call for portable runtime. */
7966 if (attr_length_indirect_call (insn
) == 20)
7967 return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
7969 /* We need a long PIC call to $$dyncall. */
7970 xoperands
[0] = NULL_RTX
;
7971 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
7972 if (TARGET_SOM
|| !TARGET_GAS
)
7974 xoperands
[0] = gen_label_rtx ();
7975 output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands
);
7976 targetm
.asm_out
.internal_label (asm_out_file
, "L",
7977 CODE_LABEL_NUMBER (xoperands
[0]));
7978 output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands
);
7982 output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands
);
7983 output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
7986 output_asm_insn ("blr %%r0,%%r2", xoperands
);
7987 output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands
);
7991 /* Return the total length of the save and restore instructions needed for
7992 the data linkage table pointer (i.e., the PIC register) across the call
7993 instruction INSN. No-return calls do not require a save and restore.
7994 In addition, we may be able to avoid the save and restore for calls
7995 within the same translation unit. */
7998 attr_length_save_restore_dltp (rtx insn
)
8000 if (find_reg_note (insn
, REG_NORETURN
, NULL_RTX
))
8006 /* In HPUX 8.0's shared library scheme, special relocations are needed
8007 for function labels if they might be passed to a function
8008 in a shared library (because shared libraries don't live in code
8009 space), and special magic is needed to construct their address. */
8012 hppa_encode_label (rtx sym
)
8014 const char *str
= XSTR (sym
, 0);
8015 int len
= strlen (str
) + 1;
8018 p
= newstr
= XALLOCAVEC (char, len
+ 1);
8022 XSTR (sym
, 0) = ggc_alloc_string (newstr
, len
);
8026 pa_encode_section_info (tree decl
, rtx rtl
, int first
)
8028 int old_referenced
= 0;
8030 if (!first
&& MEM_P (rtl
) && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
)
8032 = SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) & SYMBOL_FLAG_REFERENCED
;
8034 default_encode_section_info (decl
, rtl
, first
);
8036 if (first
&& TEXT_SPACE_P (decl
))
8038 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
8039 if (TREE_CODE (decl
) == FUNCTION_DECL
)
8040 hppa_encode_label (XEXP (rtl
, 0));
8042 else if (old_referenced
)
8043 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= old_referenced
;
8046 /* This is sort of inverse to pa_encode_section_info. */
8049 pa_strip_name_encoding (const char *str
)
8051 str
+= (*str
== '@');
8052 str
+= (*str
== '*');
8057 function_label_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
8059 return GET_CODE (op
) == SYMBOL_REF
&& FUNCTION_NAME_P (XSTR (op
, 0));
8062 /* Returns 1 if OP is a function label involved in a simple addition
8063 with a constant. Used to keep certain patterns from matching
8064 during instruction combination. */
8066 is_function_label_plus_const (rtx op
)
8068 /* Strip off any CONST. */
8069 if (GET_CODE (op
) == CONST
)
8072 return (GET_CODE (op
) == PLUS
8073 && function_label_operand (XEXP (op
, 0), Pmode
)
8074 && GET_CODE (XEXP (op
, 1)) == CONST_INT
);
8077 /* Output assembly code for a thunk to FUNCTION. */
8080 pa_asm_output_mi_thunk (FILE *file
, tree thunk_fndecl
, HOST_WIDE_INT delta
,
8081 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
8084 static unsigned int current_thunk_number
;
8085 int val_14
= VAL_14_BITS_P (delta
);
8086 unsigned int old_last_address
= last_address
, nbytes
= 0;
8090 xoperands
[0] = XEXP (DECL_RTL (function
), 0);
8091 xoperands
[1] = XEXP (DECL_RTL (thunk_fndecl
), 0);
8092 xoperands
[2] = GEN_INT (delta
);
8094 ASM_OUTPUT_LABEL (file
, XSTR (xoperands
[1], 0));
8095 fprintf (file
, "\t.PROC\n\t.CALLINFO FRAME=0,NO_CALLS\n\t.ENTRY\n");
8097 /* Output the thunk. We know that the function is in the same
8098 translation unit (i.e., the same space) as the thunk, and that
8099 thunks are output after their method. Thus, we don't need an
8100 external branch to reach the function. With SOM and GAS,
8101 functions and thunks are effectively in different sections.
8102 Thus, we can always use a IA-relative branch and the linker
8103 will add a long branch stub if necessary.
8105 However, we have to be careful when generating PIC code on the
8106 SOM port to ensure that the sequence does not transfer to an
8107 import stub for the target function as this could clobber the
8108 return value saved at SP-24. This would also apply to the
8109 32-bit linux port if the multi-space model is implemented. */
8110 if ((!TARGET_LONG_CALLS
&& TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8111 && !(flag_pic
&& TREE_PUBLIC (function
))
8112 && (TARGET_GAS
|| last_address
< 262132))
8113 || (!TARGET_LONG_CALLS
&& !TARGET_SOM
&& !TARGET_PORTABLE_RUNTIME
8114 && ((targetm
.have_named_sections
8115 && DECL_SECTION_NAME (thunk_fndecl
) != NULL
8116 /* The GNU 64-bit linker has rather poor stub management.
8117 So, we use a long branch from thunks that aren't in
8118 the same section as the target function. */
8120 && (DECL_SECTION_NAME (thunk_fndecl
)
8121 != DECL_SECTION_NAME (function
)))
8122 || ((DECL_SECTION_NAME (thunk_fndecl
)
8123 == DECL_SECTION_NAME (function
))
8124 && last_address
< 262132)))
8125 || (targetm
.have_named_sections
8126 && DECL_SECTION_NAME (thunk_fndecl
) == NULL
8127 && DECL_SECTION_NAME (function
) == NULL
8128 && last_address
< 262132)
8129 || (!targetm
.have_named_sections
&& last_address
< 262132))))
8132 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8134 output_asm_insn ("b %0", xoperands
);
8138 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8143 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8147 else if (TARGET_64BIT
)
8149 /* We only have one call-clobbered scratch register, so we can't
8150 make use of the delay slot if delta doesn't fit in 14 bits. */
8153 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8154 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8157 output_asm_insn ("b,l .+8,%%r1", xoperands
);
8161 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8162 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands
);
8166 xoperands
[3] = GEN_INT (val_14
? 8 : 16);
8167 output_asm_insn ("addil L'%0-%1-%3,%%r1", xoperands
);
8172 output_asm_insn ("bv %%r0(%%r1)", xoperands
);
8173 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8178 output_asm_insn ("bv,n %%r0(%%r1)", xoperands
);
8182 else if (TARGET_PORTABLE_RUNTIME
)
8184 output_asm_insn ("ldil L'%0,%%r1", xoperands
);
8185 output_asm_insn ("ldo R'%0(%%r1),%%r22", xoperands
);
8188 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8190 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8194 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8199 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8203 else if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8205 /* The function is accessible from outside this module. The only
8206 way to avoid an import stub between the thunk and function is to
8207 call the function directly with an indirect sequence similar to
8208 that used by $$dyncall. This is possible because $$dyncall acts
8209 as the import stub in an indirect call. */
8210 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHN", current_thunk_number
);
8211 xoperands
[3] = gen_rtx_SYMBOL_REF (Pmode
, label
);
8212 output_asm_insn ("addil LT'%3,%%r19", xoperands
);
8213 output_asm_insn ("ldw RT'%3(%%r1),%%r22", xoperands
);
8214 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8215 output_asm_insn ("bb,>=,n %%r22,30,.+16", xoperands
);
8216 output_asm_insn ("depi 0,31,2,%%r22", xoperands
);
8217 output_asm_insn ("ldw 4(%%sr0,%%r22),%%r19", xoperands
);
8218 output_asm_insn ("ldw 0(%%sr0,%%r22),%%r22", xoperands
);
8222 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8228 output_asm_insn ("bve (%%r22)", xoperands
);
8231 else if (TARGET_NO_SPACE_REGS
)
8233 output_asm_insn ("be 0(%%sr4,%%r22)", xoperands
);
8238 output_asm_insn ("ldsid (%%sr0,%%r22),%%r21", xoperands
);
8239 output_asm_insn ("mtsp %%r21,%%sr0", xoperands
);
8240 output_asm_insn ("be 0(%%sr0,%%r22)", xoperands
);
8245 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8247 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8251 output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands
);
8253 if (TARGET_SOM
|| !TARGET_GAS
)
8255 output_asm_insn ("addil L'%0-%1-8,%%r1", xoperands
);
8256 output_asm_insn ("ldo R'%0-%1-8(%%r1),%%r22", xoperands
);
8260 output_asm_insn ("addil L'%0-$PIC_pcrel$0+4,%%r1", xoperands
);
8261 output_asm_insn ("ldo R'%0-$PIC_pcrel$0+8(%%r1),%%r22", xoperands
);
8265 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8267 output_asm_insn ("bv %%r0(%%r22)", xoperands
);
8271 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8276 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8283 output_asm_insn ("addil L'%2,%%r26", xoperands
);
8285 output_asm_insn ("ldil L'%0,%%r22", xoperands
);
8286 output_asm_insn ("be R'%0(%%sr4,%%r22)", xoperands
);
8290 output_asm_insn ("ldo %2(%%r26),%%r26", xoperands
);
8295 output_asm_insn ("ldo R'%2(%%r1),%%r26", xoperands
);
8300 fprintf (file
, "\t.EXIT\n\t.PROCEND\n");
8302 if (TARGET_SOM
&& TARGET_GAS
)
8304 /* We done with this subspace except possibly for some additional
8305 debug information. Forget that we are in this subspace to ensure
8306 that the next function is output in its own subspace. */
8308 cfun
->machine
->in_nsubspa
= 2;
8311 if (TARGET_SOM
&& flag_pic
&& TREE_PUBLIC (function
))
8313 switch_to_section (data_section
);
8314 output_asm_insn (".align 4", xoperands
);
8315 ASM_OUTPUT_LABEL (file
, label
);
8316 output_asm_insn (".word P'%0", xoperands
);
8319 current_thunk_number
++;
8320 nbytes
= ((nbytes
+ FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1)
8321 & ~(FUNCTION_BOUNDARY
/ BITS_PER_UNIT
- 1));
8322 last_address
+= nbytes
;
8323 if (old_last_address
> last_address
)
8324 last_address
= UINT_MAX
;
8325 update_total_code_bytes (nbytes
);
8328 /* Only direct calls to static functions are allowed to be sibling (tail)
8331 This restriction is necessary because some linker generated stubs will
8332 store return pointers into rp' in some cases which might clobber a
8333 live value already in rp'.
8335 In a sibcall the current function and the target function share stack
8336 space. Thus if the path to the current function and the path to the
8337 target function save a value in rp', they save the value into the
8338 same stack slot, which has undesirable consequences.
8340 Because of the deferred binding nature of shared libraries any function
8341 with external scope could be in a different load module and thus require
8342 rp' to be saved when calling that function. So sibcall optimizations
8343 can only be safe for static function.
8345 Note that GCC never needs return value relocations, so we don't have to
8346 worry about static calls with return value relocations (which require
8349 It is safe to perform a sibcall optimization when the target function
8350 will never return. */
8352 pa_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
8354 if (TARGET_PORTABLE_RUNTIME
)
8357 /* Sibcalls are ok for TARGET_ELF32 as along as the linker is used in
8358 single subspace mode and the call is not indirect. As far as I know,
8359 there is no operating system support for the multiple subspace mode.
8360 It might be possible to support indirect calls if we didn't use
8361 $$dyncall (see the indirect sequence generated in output_call). */
8363 return (decl
!= NULL_TREE
);
8365 /* Sibcalls are not ok because the arg pointer register is not a fixed
8366 register. This prevents the sibcall optimization from occurring. In
8367 addition, there are problems with stub placement using GNU ld. This
8368 is because a normal sibcall branch uses a 17-bit relocation while
8369 a regular call branch uses a 22-bit relocation. As a result, more
8370 care needs to be taken in the placement of long-branch stubs. */
8374 /* Sibcalls are only ok within a translation unit. */
8375 return (decl
&& !TREE_PUBLIC (decl
));
8378 /* ??? Addition is not commutative on the PA due to the weird implicit
8379 space register selection rules for memory addresses. Therefore, we
8380 don't consider a + b == b + a, as this might be inside a MEM. */
8382 pa_commutative_p (const_rtx x
, int outer_code
)
8384 return (COMMUTATIVE_P (x
)
8385 && (TARGET_NO_SPACE_REGS
8386 || (outer_code
!= UNKNOWN
&& outer_code
!= MEM
)
8387 || GET_CODE (x
) != PLUS
));
8390 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8391 use in fmpyadd instructions. */
8393 fmpyaddoperands (rtx
*operands
)
8395 enum machine_mode mode
= GET_MODE (operands
[0]);
8397 /* Must be a floating point mode. */
8398 if (mode
!= SFmode
&& mode
!= DFmode
)
8401 /* All modes must be the same. */
8402 if (! (mode
== GET_MODE (operands
[1])
8403 && mode
== GET_MODE (operands
[2])
8404 && mode
== GET_MODE (operands
[3])
8405 && mode
== GET_MODE (operands
[4])
8406 && mode
== GET_MODE (operands
[5])))
8409 /* All operands must be registers. */
8410 if (! (GET_CODE (operands
[1]) == REG
8411 && GET_CODE (operands
[2]) == REG
8412 && GET_CODE (operands
[3]) == REG
8413 && GET_CODE (operands
[4]) == REG
8414 && GET_CODE (operands
[5]) == REG
))
8417 /* Only 2 real operands to the addition. One of the input operands must
8418 be the same as the output operand. */
8419 if (! rtx_equal_p (operands
[3], operands
[4])
8420 && ! rtx_equal_p (operands
[3], operands
[5]))
8423 /* Inout operand of add cannot conflict with any operands from multiply. */
8424 if (rtx_equal_p (operands
[3], operands
[0])
8425 || rtx_equal_p (operands
[3], operands
[1])
8426 || rtx_equal_p (operands
[3], operands
[2]))
8429 /* multiply cannot feed into addition operands. */
8430 if (rtx_equal_p (operands
[4], operands
[0])
8431 || rtx_equal_p (operands
[5], operands
[0]))
8434 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8436 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8437 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8438 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8439 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8440 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8441 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8444 /* Passed. Operands are suitable for fmpyadd. */
8448 #if !defined(USE_COLLECT2)
8450 pa_asm_out_constructor (rtx symbol
, int priority
)
8452 if (!function_label_operand (symbol
, VOIDmode
))
8453 hppa_encode_label (symbol
);
8455 #ifdef CTORS_SECTION_ASM_OP
8456 default_ctor_section_asm_out_constructor (symbol
, priority
);
8458 # ifdef TARGET_ASM_NAMED_SECTION
8459 default_named_section_asm_out_constructor (symbol
, priority
);
8461 default_stabs_asm_out_constructor (symbol
, priority
);
8467 pa_asm_out_destructor (rtx symbol
, int priority
)
8469 if (!function_label_operand (symbol
, VOIDmode
))
8470 hppa_encode_label (symbol
);
8472 #ifdef DTORS_SECTION_ASM_OP
8473 default_dtor_section_asm_out_destructor (symbol
, priority
);
8475 # ifdef TARGET_ASM_NAMED_SECTION
8476 default_named_section_asm_out_destructor (symbol
, priority
);
8478 default_stabs_asm_out_destructor (symbol
, priority
);
8484 /* This function places uninitialized global data in the bss section.
8485 The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
8486 function on the SOM port to prevent uninitialized global data from
8487 being placed in the data section. */
8490 pa_asm_output_aligned_bss (FILE *stream
,
8492 unsigned HOST_WIDE_INT size
,
8495 switch_to_section (bss_section
);
8496 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8498 #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
8499 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "object");
8502 #ifdef ASM_OUTPUT_SIZE_DIRECTIVE
8503 ASM_OUTPUT_SIZE_DIRECTIVE (stream
, name
, size
);
8506 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8507 ASM_OUTPUT_LABEL (stream
, name
);
8508 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8511 /* Both the HP and GNU assemblers under HP-UX provide a .comm directive
8512 that doesn't allow the alignment of global common storage to be directly
8513 specified. The SOM linker aligns common storage based on the rounded
8514 value of the NUM_BYTES parameter in the .comm directive. It's not
8515 possible to use the .align directive as it doesn't affect the alignment
8516 of the label associated with a .comm directive. */
8519 pa_asm_output_aligned_common (FILE *stream
,
8521 unsigned HOST_WIDE_INT size
,
8524 unsigned int max_common_align
;
8526 max_common_align
= TARGET_64BIT
? 128 : (size
>= 4096 ? 256 : 64);
8527 if (align
> max_common_align
)
8529 warning (0, "alignment (%u) for %s exceeds maximum alignment "
8530 "for global common data. Using %u",
8531 align
/ BITS_PER_UNIT
, name
, max_common_align
/ BITS_PER_UNIT
);
8532 align
= max_common_align
;
8535 switch_to_section (bss_section
);
8537 assemble_name (stream
, name
);
8538 fprintf (stream
, "\t.comm "HOST_WIDE_INT_PRINT_UNSIGNED
"\n",
8539 MAX (size
, align
/ BITS_PER_UNIT
));
8542 /* We can't use .comm for local common storage as the SOM linker effectively
8543 treats the symbol as universal and uses the same storage for local symbols
8544 with the same name in different object files. The .block directive
8545 reserves an uninitialized block of storage. However, it's not common
8546 storage. Fortunately, GCC never requests common storage with the same
8547 name in any given translation unit. */
8550 pa_asm_output_aligned_local (FILE *stream
,
8552 unsigned HOST_WIDE_INT size
,
8555 switch_to_section (bss_section
);
8556 fprintf (stream
, "\t.align %u\n", align
/ BITS_PER_UNIT
);
8559 fprintf (stream
, "%s", LOCAL_ASM_OP
);
8560 assemble_name (stream
, name
);
8561 fprintf (stream
, "\n");
8564 ASM_OUTPUT_LABEL (stream
, name
);
8565 fprintf (stream
, "\t.block "HOST_WIDE_INT_PRINT_UNSIGNED
"\n", size
);
8568 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
8569 use in fmpysub instructions. */
8571 fmpysuboperands (rtx
*operands
)
8573 enum machine_mode mode
= GET_MODE (operands
[0]);
8575 /* Must be a floating point mode. */
8576 if (mode
!= SFmode
&& mode
!= DFmode
)
8579 /* All modes must be the same. */
8580 if (! (mode
== GET_MODE (operands
[1])
8581 && mode
== GET_MODE (operands
[2])
8582 && mode
== GET_MODE (operands
[3])
8583 && mode
== GET_MODE (operands
[4])
8584 && mode
== GET_MODE (operands
[5])))
8587 /* All operands must be registers. */
8588 if (! (GET_CODE (operands
[1]) == REG
8589 && GET_CODE (operands
[2]) == REG
8590 && GET_CODE (operands
[3]) == REG
8591 && GET_CODE (operands
[4]) == REG
8592 && GET_CODE (operands
[5]) == REG
))
8595 /* Only 2 real operands to the subtraction. Subtraction is not a commutative
8596 operation, so operands[4] must be the same as operand[3]. */
8597 if (! rtx_equal_p (operands
[3], operands
[4]))
8600 /* multiply cannot feed into subtraction. */
8601 if (rtx_equal_p (operands
[5], operands
[0]))
8604 /* Inout operand of sub cannot conflict with any operands from multiply. */
8605 if (rtx_equal_p (operands
[3], operands
[0])
8606 || rtx_equal_p (operands
[3], operands
[1])
8607 || rtx_equal_p (operands
[3], operands
[2]))
8610 /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */
8612 && (REGNO_REG_CLASS (REGNO (operands
[0])) != FPUPPER_REGS
8613 || REGNO_REG_CLASS (REGNO (operands
[1])) != FPUPPER_REGS
8614 || REGNO_REG_CLASS (REGNO (operands
[2])) != FPUPPER_REGS
8615 || REGNO_REG_CLASS (REGNO (operands
[3])) != FPUPPER_REGS
8616 || REGNO_REG_CLASS (REGNO (operands
[4])) != FPUPPER_REGS
8617 || REGNO_REG_CLASS (REGNO (operands
[5])) != FPUPPER_REGS
))
8620 /* Passed. Operands are suitable for fmpysub. */
8624 /* Return 1 if the given constant is 2, 4, or 8. These are the valid
8625 constants for shadd instructions. */
8627 shadd_constant_p (int val
)
8629 if (val
== 2 || val
== 4 || val
== 8)
8635 /* Return 1 if OP is valid as a base or index register in a
8639 borx_reg_operand (rtx op
, enum machine_mode mode
)
8641 if (GET_CODE (op
) != REG
)
8644 /* We must reject virtual registers as the only expressions that
8645 can be instantiated are REG and REG+CONST. */
8646 if (op
== virtual_incoming_args_rtx
8647 || op
== virtual_stack_vars_rtx
8648 || op
== virtual_stack_dynamic_rtx
8649 || op
== virtual_outgoing_args_rtx
8650 || op
== virtual_cfa_rtx
)
8653 /* While it's always safe to index off the frame pointer, it's not
8654 profitable to do so when the frame pointer is being eliminated. */
8655 if (!reload_completed
8656 && flag_omit_frame_pointer
8657 && !cfun
->calls_alloca
8658 && op
== frame_pointer_rtx
)
8661 return register_operand (op
, mode
);
8664 /* Return 1 if this operand is anything other than a hard register. */
8667 non_hard_reg_operand (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
8669 return ! (GET_CODE (op
) == REG
&& REGNO (op
) < FIRST_PSEUDO_REGISTER
);
8672 /* Return TRUE if INSN branches forward. */
8675 forward_branch_p (rtx insn
)
8677 rtx lab
= JUMP_LABEL (insn
);
8679 /* The INSN must have a jump label. */
8680 gcc_assert (lab
!= NULL_RTX
);
8682 if (INSN_ADDRESSES_SET_P ())
8683 return INSN_ADDRESSES (INSN_UID (lab
)) > INSN_ADDRESSES (INSN_UID (insn
));
8690 insn
= NEXT_INSN (insn
);
8696 /* Return 1 if OP is an equality comparison, else return 0. */
8698 eq_neq_comparison_operator (rtx op
, enum machine_mode mode ATTRIBUTE_UNUSED
)
8700 return (GET_CODE (op
) == EQ
|| GET_CODE (op
) == NE
);
8703 /* Return 1 if INSN is in the delay slot of a call instruction. */
8705 jump_in_call_delay (rtx insn
)
8708 if (GET_CODE (insn
) != JUMP_INSN
)
8711 if (PREV_INSN (insn
)
8712 && PREV_INSN (PREV_INSN (insn
))
8713 && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn
)))) == INSN
)
8715 rtx test_insn
= next_real_insn (PREV_INSN (PREV_INSN (insn
)));
8717 return (GET_CODE (PATTERN (test_insn
)) == SEQUENCE
8718 && XVECEXP (PATTERN (test_insn
), 0, 1) == insn
);
8725 /* Output an unconditional move and branch insn. */
8728 output_parallel_movb (rtx
*operands
, rtx insn
)
8730 int length
= get_attr_length (insn
);
8732 /* These are the cases in which we win. */
8734 return "mov%I1b,tr %1,%0,%2";
8736 /* None of the following cases win, but they don't lose either. */
8739 if (dbr_sequence_length () == 0)
8741 /* Nothing in the delay slot, fake it by putting the combined
8742 insn (the copy or add) in the delay slot of a bl. */
8743 if (GET_CODE (operands
[1]) == CONST_INT
)
8744 return "b %2\n\tldi %1,%0";
8746 return "b %2\n\tcopy %1,%0";
8750 /* Something in the delay slot, but we've got a long branch. */
8751 if (GET_CODE (operands
[1]) == CONST_INT
)
8752 return "ldi %1,%0\n\tb %2";
8754 return "copy %1,%0\n\tb %2";
8758 if (GET_CODE (operands
[1]) == CONST_INT
)
8759 output_asm_insn ("ldi %1,%0", operands
);
8761 output_asm_insn ("copy %1,%0", operands
);
8762 return output_lbranch (operands
[2], insn
, 1);
8765 /* Output an unconditional add and branch insn. */
8768 output_parallel_addb (rtx
*operands
, rtx insn
)
8770 int length
= get_attr_length (insn
);
8772 /* To make life easy we want operand0 to be the shared input/output
8773 operand and operand1 to be the readonly operand. */
8774 if (operands
[0] == operands
[1])
8775 operands
[1] = operands
[2];
8777 /* These are the cases in which we win. */
8779 return "add%I1b,tr %1,%0,%3";
8781 /* None of the following cases win, but they don't lose either. */
8784 if (dbr_sequence_length () == 0)
8785 /* Nothing in the delay slot, fake it by putting the combined
8786 insn (the copy or add) in the delay slot of a bl. */
8787 return "b %3\n\tadd%I1 %1,%0,%0";
8789 /* Something in the delay slot, but we've got a long branch. */
8790 return "add%I1 %1,%0,%0\n\tb %3";
8793 output_asm_insn ("add%I1 %1,%0,%0", operands
);
8794 return output_lbranch (operands
[3], insn
, 1);
8797 /* Return nonzero if INSN (a jump insn) immediately follows a call
8798 to a named function. This is used to avoid filling the delay slot
8799 of the jump since it can usually be eliminated by modifying RP in
8800 the delay slot of the call. */
8803 following_call (rtx insn
)
8805 if (! TARGET_JUMP_IN_DELAY
)
8808 /* Find the previous real insn, skipping NOTEs. */
8809 insn
= PREV_INSN (insn
);
8810 while (insn
&& GET_CODE (insn
) == NOTE
)
8811 insn
= PREV_INSN (insn
);
8813 /* Check for CALL_INSNs and millicode calls. */
8815 && ((GET_CODE (insn
) == CALL_INSN
8816 && get_attr_type (insn
) != TYPE_DYNCALL
)
8817 || (GET_CODE (insn
) == INSN
8818 && GET_CODE (PATTERN (insn
)) != SEQUENCE
8819 && GET_CODE (PATTERN (insn
)) != USE
8820 && GET_CODE (PATTERN (insn
)) != CLOBBER
8821 && get_attr_type (insn
) == TYPE_MILLI
)))
8827 /* We use this hook to perform a PA specific optimization which is difficult
8828 to do in earlier passes.
8830 We want the delay slots of branches within jump tables to be filled.
8831 None of the compiler passes at the moment even has the notion that a
8832 PA jump table doesn't contain addresses, but instead contains actual
8835 Because we actually jump into the table, the addresses of each entry
8836 must stay constant in relation to the beginning of the table (which
8837 itself must stay constant relative to the instruction to jump into
8838 it). I don't believe we can guarantee earlier passes of the compiler
8839 will adhere to those rules.
8841 So, late in the compilation process we find all the jump tables, and
8842 expand them into real code -- e.g. each entry in the jump table vector
8843 will get an appropriate label followed by a jump to the final target.
8845 Reorg and the final jump pass can then optimize these branches and
8846 fill their delay slots. We end up with smaller, more efficient code.
8848 The jump instructions within the table are special; we must be able
8849 to identify them during assembly output (if the jumps don't get filled
8850 we need to emit a nop rather than nullifying the delay slot)). We
8851 identify jumps in switch tables by using insns with the attribute
8852 type TYPE_BTABLE_BRANCH.
8854 We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
8855 insns. This serves two purposes, first it prevents jump.c from
8856 noticing that the last N entries in the table jump to the instruction
8857 immediately after the table and deleting the jumps. Second, those
8858 insns mark where we should emit .begin_brtab and .end_brtab directives
8859 when using GAS (allows for better link time optimizations). */
8866 remove_useless_addtr_insns (1);
8868 if (pa_cpu
< PROCESSOR_8000
)
8869 pa_combine_instructions ();
8872 /* This is fairly cheap, so always run it if optimizing. */
8873 if (optimize
> 0 && !TARGET_BIG_SWITCH
)
8875 /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */
8876 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8878 rtx pattern
, tmp
, location
, label
;
8879 unsigned int length
, i
;
8881 /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */
8882 if (GET_CODE (insn
) != JUMP_INSN
8883 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
8884 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
8887 /* Emit marker for the beginning of the branch table. */
8888 emit_insn_before (gen_begin_brtab (), insn
);
8890 pattern
= PATTERN (insn
);
8891 location
= PREV_INSN (insn
);
8892 length
= XVECLEN (pattern
, GET_CODE (pattern
) == ADDR_DIFF_VEC
);
8894 for (i
= 0; i
< length
; i
++)
8896 /* Emit a label before each jump to keep jump.c from
8897 removing this code. */
8898 tmp
= gen_label_rtx ();
8899 LABEL_NUSES (tmp
) = 1;
8900 emit_label_after (tmp
, location
);
8901 location
= NEXT_INSN (location
);
8903 if (GET_CODE (pattern
) == ADDR_VEC
)
8904 label
= XEXP (XVECEXP (pattern
, 0, i
), 0);
8906 label
= XEXP (XVECEXP (pattern
, 1, i
), 0);
8908 tmp
= gen_short_jump (label
);
8910 /* Emit the jump itself. */
8911 tmp
= emit_jump_insn_after (tmp
, location
);
8912 JUMP_LABEL (tmp
) = label
;
8913 LABEL_NUSES (label
)++;
8914 location
= NEXT_INSN (location
);
8916 /* Emit a BARRIER after the jump. */
8917 emit_barrier_after (location
);
8918 location
= NEXT_INSN (location
);
8921 /* Emit marker for the end of the branch table. */
8922 emit_insn_before (gen_end_brtab (), location
);
8923 location
= NEXT_INSN (location
);
8924 emit_barrier_after (location
);
8926 /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */
8932 /* Still need brtab marker insns. FIXME: the presence of these
8933 markers disables output of the branch table to readonly memory,
8934 and any alignment directives that might be needed. Possibly,
8935 the begin_brtab insn should be output before the label for the
8936 table. This doesn't matter at the moment since the tables are
8937 always output in the text section. */
8938 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
8940 /* Find an ADDR_VEC insn. */
8941 if (GET_CODE (insn
) != JUMP_INSN
8942 || (GET_CODE (PATTERN (insn
)) != ADDR_VEC
8943 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
))
8946 /* Now generate markers for the beginning and end of the
8948 emit_insn_before (gen_begin_brtab (), insn
);
8949 emit_insn_after (gen_end_brtab (), insn
);
8954 /* The PA has a number of odd instructions which can perform multiple
8955 tasks at once. On first generation PA machines (PA1.0 and PA1.1)
8956 it may be profitable to combine two instructions into one instruction
8957 with two outputs. It's not profitable PA2.0 machines because the
8958 two outputs would take two slots in the reorder buffers.
8960 This routine finds instructions which can be combined and combines
8961 them. We only support some of the potential combinations, and we
8962 only try common ways to find suitable instructions.
8964 * addb can add two registers or a register and a small integer
8965 and jump to a nearby (+-8k) location. Normally the jump to the
8966 nearby location is conditional on the result of the add, but by
8967 using the "true" condition we can make the jump unconditional.
8968 Thus addb can perform two independent operations in one insn.
8970 * movb is similar to addb in that it can perform a reg->reg
8971 or small immediate->reg copy and jump to a nearby (+-8k location).
8973 * fmpyadd and fmpysub can perform a FP multiply and either an
8974 FP add or FP sub if the operands of the multiply and add/sub are
8975 independent (there are other minor restrictions). Note both
8976 the fmpy and fadd/fsub can in theory move to better spots according
8977 to data dependencies, but for now we require the fmpy stay at a
8980 * Many of the memory operations can perform pre & post updates
8981 of index registers. GCC's pre/post increment/decrement addressing
8982 is far too simple to take advantage of all the possibilities. This
8983 pass may not be suitable since those insns may not be independent.
8985 * comclr can compare two ints or an int and a register, nullify
8986 the following instruction and zero some other register. This
8987 is more difficult to use as it's harder to find an insn which
8988 will generate a comclr than finding something like an unconditional
8989 branch. (conditional moves & long branches create comclr insns).
8991 * Most arithmetic operations can conditionally skip the next
8992 instruction. They can be viewed as "perform this operation
8993 and conditionally jump to this nearby location" (where nearby
8994 is an insns away). These are difficult to use due to the
8995 branch length restrictions. */
8998 pa_combine_instructions (void)
9000 rtx anchor
, new_rtx
;
9002 /* This can get expensive since the basic algorithm is on the
9003 order of O(n^2) (or worse). Only do it for -O2 or higher
9004 levels of optimization. */
9008 /* Walk down the list of insns looking for "anchor" insns which
9009 may be combined with "floating" insns. As the name implies,
9010 "anchor" instructions don't move, while "floating" insns may
9012 new_rtx
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, NULL_RTX
, NULL_RTX
));
9013 new_rtx
= make_insn_raw (new_rtx
);
9015 for (anchor
= get_insns (); anchor
; anchor
= NEXT_INSN (anchor
))
9017 enum attr_pa_combine_type anchor_attr
;
9018 enum attr_pa_combine_type floater_attr
;
9020 /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs.
9021 Also ignore any special USE insns. */
9022 if ((GET_CODE (anchor
) != INSN
9023 && GET_CODE (anchor
) != JUMP_INSN
9024 && GET_CODE (anchor
) != CALL_INSN
)
9025 || GET_CODE (PATTERN (anchor
)) == USE
9026 || GET_CODE (PATTERN (anchor
)) == CLOBBER
9027 || GET_CODE (PATTERN (anchor
)) == ADDR_VEC
9028 || GET_CODE (PATTERN (anchor
)) == ADDR_DIFF_VEC
)
9031 anchor_attr
= get_attr_pa_combine_type (anchor
);
9032 /* See if anchor is an insn suitable for combination. */
9033 if (anchor_attr
== PA_COMBINE_TYPE_FMPY
9034 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9035 || (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9036 && ! forward_branch_p (anchor
)))
9040 for (floater
= PREV_INSN (anchor
);
9042 floater
= PREV_INSN (floater
))
9044 if (GET_CODE (floater
) == NOTE
9045 || (GET_CODE (floater
) == INSN
9046 && (GET_CODE (PATTERN (floater
)) == USE
9047 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9050 /* Anything except a regular INSN will stop our search. */
9051 if (GET_CODE (floater
) != INSN
9052 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9053 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9059 /* See if FLOATER is suitable for combination with the
9061 floater_attr
= get_attr_pa_combine_type (floater
);
9062 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9063 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9064 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9065 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9067 /* If ANCHOR and FLOATER can be combined, then we're
9068 done with this pass. */
9069 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9070 SET_DEST (PATTERN (floater
)),
9071 XEXP (SET_SRC (PATTERN (floater
)), 0),
9072 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9076 else if (anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
9077 && floater_attr
== PA_COMBINE_TYPE_ADDMOVE
)
9079 if (GET_CODE (SET_SRC (PATTERN (floater
))) == PLUS
)
9081 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9082 SET_DEST (PATTERN (floater
)),
9083 XEXP (SET_SRC (PATTERN (floater
)), 0),
9084 XEXP (SET_SRC (PATTERN (floater
)), 1)))
9089 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 0,
9090 SET_DEST (PATTERN (floater
)),
9091 SET_SRC (PATTERN (floater
)),
9092 SET_SRC (PATTERN (floater
))))
9098 /* If we didn't find anything on the backwards scan try forwards. */
9100 && (anchor_attr
== PA_COMBINE_TYPE_FMPY
9101 || anchor_attr
== PA_COMBINE_TYPE_FADDSUB
))
9103 for (floater
= anchor
; floater
; floater
= NEXT_INSN (floater
))
9105 if (GET_CODE (floater
) == NOTE
9106 || (GET_CODE (floater
) == INSN
9107 && (GET_CODE (PATTERN (floater
)) == USE
9108 || GET_CODE (PATTERN (floater
)) == CLOBBER
)))
9112 /* Anything except a regular INSN will stop our search. */
9113 if (GET_CODE (floater
) != INSN
9114 || GET_CODE (PATTERN (floater
)) == ADDR_VEC
9115 || GET_CODE (PATTERN (floater
)) == ADDR_DIFF_VEC
)
9121 /* See if FLOATER is suitable for combination with the
9123 floater_attr
= get_attr_pa_combine_type (floater
);
9124 if ((anchor_attr
== PA_COMBINE_TYPE_FMPY
9125 && floater_attr
== PA_COMBINE_TYPE_FADDSUB
)
9126 || (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9127 && floater_attr
== PA_COMBINE_TYPE_FMPY
))
9129 /* If ANCHOR and FLOATER can be combined, then we're
9130 done with this pass. */
9131 if (pa_can_combine_p (new_rtx
, anchor
, floater
, 1,
9132 SET_DEST (PATTERN (floater
)),
9133 XEXP (SET_SRC (PATTERN (floater
)),
9135 XEXP (SET_SRC (PATTERN (floater
)),
9142 /* FLOATER will be nonzero if we found a suitable floating
9143 insn for combination with ANCHOR. */
9145 && (anchor_attr
== PA_COMBINE_TYPE_FADDSUB
9146 || anchor_attr
== PA_COMBINE_TYPE_FMPY
))
9148 /* Emit the new instruction and delete the old anchor. */
9149 emit_insn_before (gen_rtx_PARALLEL
9151 gen_rtvec (2, PATTERN (anchor
),
9152 PATTERN (floater
))),
9155 SET_INSN_DELETED (anchor
);
9157 /* Emit a special USE insn for FLOATER, then delete
9158 the floating insn. */
9159 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9160 delete_insn (floater
);
9165 && anchor_attr
== PA_COMBINE_TYPE_UNCOND_BRANCH
)
9168 /* Emit the new_jump instruction and delete the old anchor. */
9170 = emit_jump_insn_before (gen_rtx_PARALLEL
9172 gen_rtvec (2, PATTERN (anchor
),
9173 PATTERN (floater
))),
9176 JUMP_LABEL (temp
) = JUMP_LABEL (anchor
);
9177 SET_INSN_DELETED (anchor
);
9179 /* Emit a special USE insn for FLOATER, then delete
9180 the floating insn. */
9181 emit_insn_before (gen_rtx_USE (VOIDmode
, floater
), floater
);
9182 delete_insn (floater
);
9190 pa_can_combine_p (rtx new_rtx
, rtx anchor
, rtx floater
, int reversed
, rtx dest
,
9193 int insn_code_number
;
9196 /* Create a PARALLEL with the patterns of ANCHOR and
9197 FLOATER, try to recognize it, then test constraints
9198 for the resulting pattern.
9200 If the pattern doesn't match or the constraints
9201 aren't met keep searching for a suitable floater
9203 XVECEXP (PATTERN (new_rtx
), 0, 0) = PATTERN (anchor
);
9204 XVECEXP (PATTERN (new_rtx
), 0, 1) = PATTERN (floater
);
9205 INSN_CODE (new_rtx
) = -1;
9206 insn_code_number
= recog_memoized (new_rtx
);
9207 if (insn_code_number
< 0
9208 || (extract_insn (new_rtx
), ! constrain_operands (1)))
9222 /* There's up to three operands to consider. One
9223 output and two inputs.
9225 The output must not be used between FLOATER & ANCHOR
9226 exclusive. The inputs must not be set between
9227 FLOATER and ANCHOR exclusive. */
9229 if (reg_used_between_p (dest
, start
, end
))
9232 if (reg_set_between_p (src1
, start
, end
))
9235 if (reg_set_between_p (src2
, start
, end
))
9238 /* If we get here, then everything is good. */
9242 /* Return nonzero if references for INSN are delayed.
9244 Millicode insns are actually function calls with some special
9245 constraints on arguments and register usage.
9247 Millicode calls always expect their arguments in the integer argument
9248 registers, and always return their result in %r29 (ret1). They
9249 are expected to clobber their arguments, %r1, %r29, and the return
9250 pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.
9252 This function tells reorg that the references to arguments and
9253 millicode calls do not appear to happen until after the millicode call.
9254 This allows reorg to put insns which set the argument registers into the
9255 delay slot of the millicode call -- thus they act more like traditional
9258 Note we cannot consider side effects of the insn to be delayed because
9259 the branch and link insn will clobber the return pointer. If we happened
9260 to use the return pointer in the delay slot of the call, then we lose.
9262 get_attr_type will try to recognize the given insn, so make sure to
9263 filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
9266 insn_refs_are_delayed (rtx insn
)
9268 return ((GET_CODE (insn
) == INSN
9269 && GET_CODE (PATTERN (insn
)) != SEQUENCE
9270 && GET_CODE (PATTERN (insn
)) != USE
9271 && GET_CODE (PATTERN (insn
)) != CLOBBER
9272 && get_attr_type (insn
) == TYPE_MILLI
));
9275 /* Promote the return value, but not the arguments. */
9277 static enum machine_mode
9278 pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
9279 enum machine_mode mode
,
9280 int *punsignedp ATTRIBUTE_UNUSED
,
9281 const_tree fntype ATTRIBUTE_UNUSED
,
9284 if (for_return
== 0)
9286 return promote_mode (type
, mode
, punsignedp
);
9289 /* On the HP-PA the value is found in register(s) 28(-29), unless
9290 the mode is SF or DF. Then the value is returned in fr4 (32).
9292 This must perform the same promotions as PROMOTE_MODE, else promoting
9293 return values in TARGET_PROMOTE_FUNCTION_MODE will not work correctly.
9295 Small structures must be returned in a PARALLEL on PA64 in order
9296 to match the HP Compiler ABI. */
9299 pa_function_value (const_tree valtype
,
9300 const_tree func ATTRIBUTE_UNUSED
,
9301 bool outgoing ATTRIBUTE_UNUSED
)
9303 enum machine_mode valmode
;
9305 if (AGGREGATE_TYPE_P (valtype
)
9306 || TREE_CODE (valtype
) == COMPLEX_TYPE
9307 || TREE_CODE (valtype
) == VECTOR_TYPE
)
9311 /* Aggregates with a size less than or equal to 128 bits are
9312 returned in GR 28(-29). They are left justified. The pad
9313 bits are undefined. Larger aggregates are returned in
9317 int ub
= int_size_in_bytes (valtype
) <= UNITS_PER_WORD
? 1 : 2;
9319 for (i
= 0; i
< ub
; i
++)
9321 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9322 gen_rtx_REG (DImode
, 28 + i
),
9327 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec_v (ub
, loc
));
9329 else if (int_size_in_bytes (valtype
) > UNITS_PER_WORD
)
9331 /* Aggregates 5 to 8 bytes in size are returned in general
9332 registers r28-r29 in the same manner as other non
9333 floating-point objects. The data is right-justified and
9334 zero-extended to 64 bits. This is opposite to the normal
9335 justification used on big endian targets and requires
9336 special treatment. */
9337 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9338 gen_rtx_REG (DImode
, 28), const0_rtx
);
9339 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9343 if ((INTEGRAL_TYPE_P (valtype
)
9344 && GET_MODE_BITSIZE (TYPE_MODE (valtype
)) < BITS_PER_WORD
)
9345 || POINTER_TYPE_P (valtype
))
9346 valmode
= word_mode
;
9348 valmode
= TYPE_MODE (valtype
);
9350 if (TREE_CODE (valtype
) == REAL_TYPE
9351 && !AGGREGATE_TYPE_P (valtype
)
9352 && TYPE_MODE (valtype
) != TFmode
9353 && !TARGET_SOFT_FLOAT
)
9354 return gen_rtx_REG (valmode
, 32);
9356 return gen_rtx_REG (valmode
, 28);
9359 /* Implement the TARGET_LIBCALL_VALUE hook. */
9362 pa_libcall_value (enum machine_mode mode
,
9363 const_rtx fun ATTRIBUTE_UNUSED
)
9365 if (! TARGET_SOFT_FLOAT
9366 && (mode
== SFmode
|| mode
== DFmode
))
9367 return gen_rtx_REG (mode
, 32);
9369 return gen_rtx_REG (mode
, 28);
9372 /* Implement the TARGET_FUNCTION_VALUE_REGNO_P hook. */
9375 pa_function_value_regno_p (const unsigned int regno
)
9378 || (! TARGET_SOFT_FLOAT
&& regno
== 32))
9384 /* Return the location of a parameter that is passed in a register or NULL
9385 if the parameter has any component that is passed in memory.
9387 This is new code and will be pushed to into the net sources after
9390 ??? We might want to restructure this so that it looks more like other
9393 function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
, tree type
,
9394 int named ATTRIBUTE_UNUSED
)
9396 int max_arg_words
= (TARGET_64BIT
? 8 : 4);
9403 if (mode
== VOIDmode
)
9406 arg_size
= FUNCTION_ARG_SIZE (mode
, type
);
9408 /* If this arg would be passed partially or totally on the stack, then
9409 this routine should return zero. pa_arg_partial_bytes will
9410 handle arguments which are split between regs and stack slots if
9411 the ABI mandates split arguments. */
9414 /* The 32-bit ABI does not split arguments. */
9415 if (cum
->words
+ arg_size
> max_arg_words
)
9421 alignment
= cum
->words
& 1;
9422 if (cum
->words
+ alignment
>= max_arg_words
)
9426 /* The 32bit ABIs and the 64bit ABIs are rather different,
9427 particularly in their handling of FP registers. We might
9428 be able to cleverly share code between them, but I'm not
9429 going to bother in the hope that splitting them up results
9430 in code that is more easily understood. */
9434 /* Advance the base registers to their current locations.
9436 Remember, gprs grow towards smaller register numbers while
9437 fprs grow to higher register numbers. Also remember that
9438 although FP regs are 32-bit addressable, we pretend that
9439 the registers are 64-bits wide. */
9440 gpr_reg_base
= 26 - cum
->words
;
9441 fpr_reg_base
= 32 + cum
->words
;
9443 /* Arguments wider than one word and small aggregates need special
9447 || (type
&& (AGGREGATE_TYPE_P (type
)
9448 || TREE_CODE (type
) == COMPLEX_TYPE
9449 || TREE_CODE (type
) == VECTOR_TYPE
)))
9451 /* Double-extended precision (80-bit), quad-precision (128-bit)
9452 and aggregates including complex numbers are aligned on
9453 128-bit boundaries. The first eight 64-bit argument slots
9454 are associated one-to-one, with general registers r26
9455 through r19, and also with floating-point registers fr4
9456 through fr11. Arguments larger than one word are always
9457 passed in general registers.
9459 Using a PARALLEL with a word mode register results in left
9460 justified data on a big-endian target. */
9463 int i
, offset
= 0, ub
= arg_size
;
9465 /* Align the base register. */
9466 gpr_reg_base
-= alignment
;
9468 ub
= MIN (ub
, max_arg_words
- cum
->words
- alignment
);
9469 for (i
= 0; i
< ub
; i
++)
9471 loc
[i
] = gen_rtx_EXPR_LIST (VOIDmode
,
9472 gen_rtx_REG (DImode
, gpr_reg_base
),
9478 return gen_rtx_PARALLEL (mode
, gen_rtvec_v (ub
, loc
));
9483 /* If the argument is larger than a word, then we know precisely
9484 which registers we must use. */
9498 /* Structures 5 to 8 bytes in size are passed in the general
9499 registers in the same manner as other non floating-point
9500 objects. The data is right-justified and zero-extended
9501 to 64 bits. This is opposite to the normal justification
9502 used on big endian targets and requires special treatment.
9503 We now define BLOCK_REG_PADDING to pad these objects.
9504 Aggregates, complex and vector types are passed in the same
9505 manner as structures. */
9507 || (type
&& (AGGREGATE_TYPE_P (type
)
9508 || TREE_CODE (type
) == COMPLEX_TYPE
9509 || TREE_CODE (type
) == VECTOR_TYPE
)))
9511 rtx loc
= gen_rtx_EXPR_LIST (VOIDmode
,
9512 gen_rtx_REG (DImode
, gpr_reg_base
),
9514 return gen_rtx_PARALLEL (BLKmode
, gen_rtvec (1, loc
));
9519 /* We have a single word (32 bits). A simple computation
9520 will get us the register #s we need. */
9521 gpr_reg_base
= 26 - cum
->words
;
9522 fpr_reg_base
= 32 + 2 * cum
->words
;
9526 /* Determine if the argument needs to be passed in both general and
9527 floating point registers. */
9528 if (((TARGET_PORTABLE_RUNTIME
|| TARGET_64BIT
|| TARGET_ELF32
)
9529 /* If we are doing soft-float with portable runtime, then there
9530 is no need to worry about FP regs. */
9531 && !TARGET_SOFT_FLOAT
9532 /* The parameter must be some kind of scalar float, else we just
9533 pass it in integer registers. */
9534 && GET_MODE_CLASS (mode
) == MODE_FLOAT
9535 /* The target function must not have a prototype. */
9536 && cum
->nargs_prototype
<= 0
9537 /* libcalls do not need to pass items in both FP and general
9539 && type
!= NULL_TREE
9540 /* All this hair applies to "outgoing" args only. This includes
9541 sibcall arguments setup with FUNCTION_INCOMING_ARG. */
9543 /* Also pass outgoing floating arguments in both registers in indirect
9544 calls with the 32 bit ABI and the HP assembler since there is no
9545 way to the specify argument locations in static functions. */
9550 && GET_MODE_CLASS (mode
) == MODE_FLOAT
))
9556 gen_rtx_EXPR_LIST (VOIDmode
,
9557 gen_rtx_REG (mode
, fpr_reg_base
),
9559 gen_rtx_EXPR_LIST (VOIDmode
,
9560 gen_rtx_REG (mode
, gpr_reg_base
),
9565 /* See if we should pass this parameter in a general register. */
9566 if (TARGET_SOFT_FLOAT
9567 /* Indirect calls in the normal 32bit ABI require all arguments
9568 to be passed in general registers. */
9569 || (!TARGET_PORTABLE_RUNTIME
9573 /* If the parameter is not a scalar floating-point parameter,
9574 then it belongs in GPRs. */
9575 || GET_MODE_CLASS (mode
) != MODE_FLOAT
9576 /* Structure with single SFmode field belongs in GPR. */
9577 || (type
&& AGGREGATE_TYPE_P (type
)))
9578 retval
= gen_rtx_REG (mode
, gpr_reg_base
);
9580 retval
= gen_rtx_REG (mode
, fpr_reg_base
);
9586 /* If this arg would be passed totally in registers or totally on the stack,
9587 then this routine should return zero. */
9590 pa_arg_partial_bytes (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
9591 tree type
, bool named ATTRIBUTE_UNUSED
)
9593 unsigned int max_arg_words
= 8;
9594 unsigned int offset
= 0;
9599 if (FUNCTION_ARG_SIZE (mode
, type
) > 1 && (cum
->words
& 1))
9602 if (cum
->words
+ offset
+ FUNCTION_ARG_SIZE (mode
, type
) <= max_arg_words
)
9603 /* Arg fits fully into registers. */
9605 else if (cum
->words
+ offset
>= max_arg_words
)
9606 /* Arg fully on the stack. */
9610 return (max_arg_words
- cum
->words
- offset
) * UNITS_PER_WORD
;
9614 /* A get_unnamed_section callback for switching to the text section.
9616 This function is only used with SOM. Because we don't support
9617 named subspaces, we can only create a new subspace or switch back
9618 to the default text subspace. */
9621 som_output_text_section_asm_op (const void *data ATTRIBUTE_UNUSED
)
9623 gcc_assert (TARGET_SOM
);
9626 if (cfun
&& cfun
->machine
&& !cfun
->machine
->in_nsubspa
)
9628 /* We only want to emit a .nsubspa directive once at the
9629 start of the function. */
9630 cfun
->machine
->in_nsubspa
= 1;
9632 /* Create a new subspace for the text. This provides
9633 better stub placement and one-only functions. */
9635 && DECL_ONE_ONLY (cfun
->decl
)
9636 && !DECL_WEAK (cfun
->decl
))
9638 output_section_asm_op ("\t.SPACE $TEXT$\n"
9639 "\t.NSUBSPA $CODE$,QUAD=0,ALIGN=8,"
9640 "ACCESS=44,SORT=24,COMDAT");
9646 /* There isn't a current function or the body of the current
9647 function has been completed. So, we are changing to the
9648 text section to output debugging information. Thus, we
9649 need to forget that we are in the text section so that
9650 varasm.c will call us when text_section is selected again. */
9651 gcc_assert (!cfun
|| !cfun
->machine
9652 || cfun
->machine
->in_nsubspa
== 2);
9655 output_section_asm_op ("\t.SPACE $TEXT$\n\t.NSUBSPA $CODE$");
9658 output_section_asm_op ("\t.SPACE $TEXT$\n\t.SUBSPA $CODE$");
9661 /* A get_unnamed_section callback for switching to comdat data
9662 sections. This function is only used with SOM. */
9665 som_output_comdat_data_section_asm_op (const void *data
)
9668 output_section_asm_op (data
);
9671 /* Implement TARGET_ASM_INITIALIZE_SECTIONS */
9674 pa_som_asm_init_sections (void)
9677 = get_unnamed_section (0, som_output_text_section_asm_op
, NULL
);
9679 /* SOM puts readonly data in the default $LIT$ subspace when PIC code
9680 is not being generated. */
9681 som_readonly_data_section
9682 = get_unnamed_section (0, output_section_asm_op
,
9683 "\t.SPACE $TEXT$\n\t.SUBSPA $LIT$");
9685 /* When secondary definitions are not supported, SOM makes readonly
9686 data one-only by creating a new $LIT$ subspace in $TEXT$ with
9688 som_one_only_readonly_data_section
9689 = get_unnamed_section (0, som_output_comdat_data_section_asm_op
,
9691 "\t.NSUBSPA $LIT$,QUAD=0,ALIGN=8,"
9692 "ACCESS=0x2c,SORT=16,COMDAT");
9695 /* When secondary definitions are not supported, SOM makes data one-only
9696 by creating a new $DATA$ subspace in $PRIVATE$ with the comdat flag. */
9697 som_one_only_data_section
9698 = get_unnamed_section (SECTION_WRITE
,
9699 som_output_comdat_data_section_asm_op
,
9700 "\t.SPACE $PRIVATE$\n"
9701 "\t.NSUBSPA $DATA$,QUAD=1,ALIGN=8,"
9702 "ACCESS=31,SORT=24,COMDAT");
9704 /* FIXME: HPUX ld generates incorrect GOT entries for "T" fixups
9705 which reference data within the $TEXT$ space (for example constant
9706 strings in the $LIT$ subspace).
9708 The assemblers (GAS and HP as) both have problems with handling
9709 the difference of two symbols which is the other correct way to
9710 reference constant data during PIC code generation.
9712 So, there's no way to reference constant data which is in the
9713 $TEXT$ space during PIC generation. Instead place all constant
9714 data into the $PRIVATE$ subspace (this reduces sharing, but it
9715 works correctly). */
9716 readonly_data_section
= flag_pic
? data_section
: som_readonly_data_section
;
9718 /* We must not have a reference to an external symbol defined in a
9719 shared library in a readonly section, else the SOM linker will
9722 So, we force exception information into the data section. */
9723 exception_section
= data_section
;
9726 /* On hpux10, the linker will give an error if we have a reference
9727 in the read-only data section to a symbol defined in a shared
9728 library. Therefore, expressions that might require a reloc can
9729 not be placed in the read-only data section. */
9732 pa_select_section (tree exp
, int reloc
,
9733 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
9735 if (TREE_CODE (exp
) == VAR_DECL
9736 && TREE_READONLY (exp
)
9737 && !TREE_THIS_VOLATILE (exp
)
9738 && DECL_INITIAL (exp
)
9739 && (DECL_INITIAL (exp
) == error_mark_node
9740 || TREE_CONSTANT (DECL_INITIAL (exp
)))
9744 && DECL_ONE_ONLY (exp
)
9745 && !DECL_WEAK (exp
))
9746 return som_one_only_readonly_data_section
;
9748 return readonly_data_section
;
9750 else if (CONSTANT_CLASS_P (exp
) && !reloc
)
9751 return readonly_data_section
;
9753 && TREE_CODE (exp
) == VAR_DECL
9754 && DECL_ONE_ONLY (exp
)
9755 && !DECL_WEAK (exp
))
9756 return som_one_only_data_section
;
9758 return data_section
;
9762 pa_globalize_label (FILE *stream
, const char *name
)
9764 /* We only handle DATA objects here, functions are globalized in
9765 ASM_DECLARE_FUNCTION_NAME. */
9766 if (! FUNCTION_NAME_P (name
))
9768 fputs ("\t.EXPORT ", stream
);
9769 assemble_name (stream
, name
);
9770 fputs (",DATA\n", stream
);
9774 /* Worker function for TARGET_STRUCT_VALUE_RTX. */
9777 pa_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED
,
9778 int incoming ATTRIBUTE_UNUSED
)
9780 return gen_rtx_REG (Pmode
, PA_STRUCT_VALUE_REGNUM
);
9783 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9786 pa_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
9788 /* SOM ABI says that objects larger than 64 bits are returned in memory.
9789 PA64 ABI says that objects larger than 128 bits are returned in memory.
9790 Note, int_size_in_bytes can return -1 if the size of the object is
9791 variable or larger than the maximum value that can be expressed as
9792 a HOST_WIDE_INT. It can also return zero for an empty type. The
9793 simplest way to handle variable and empty types is to pass them in
9794 memory. This avoids problems in defining the boundaries of argument
9795 slots, allocating registers, etc. */
9796 return (int_size_in_bytes (type
) > (TARGET_64BIT
? 16 : 8)
9797 || int_size_in_bytes (type
) <= 0);
9800 /* Structure to hold declaration and name of external symbols that are
9801 emitted by GCC. We generate a vector of these symbols and output them
9802 at the end of the file if and only if SYMBOL_REF_REFERENCED_P is true.
9803 This avoids putting out names that are never really used. */
9805 typedef struct GTY(()) extern_symbol
9811 /* Define gc'd vector type for extern_symbol. */
9812 DEF_VEC_O(extern_symbol
);
9813 DEF_VEC_ALLOC_O(extern_symbol
,gc
);
9815 /* Vector of extern_symbol pointers. */
9816 static GTY(()) VEC(extern_symbol
,gc
) *extern_symbols
;
9818 #ifdef ASM_OUTPUT_EXTERNAL_REAL
9819 /* Mark DECL (name NAME) as an external reference (assembler output
9820 file FILE). This saves the names to output at the end of the file
9821 if actually referenced. */
9824 pa_hpux_asm_output_external (FILE *file
, tree decl
, const char *name
)
9826 extern_symbol
* p
= VEC_safe_push (extern_symbol
, gc
, extern_symbols
, NULL
);
9828 gcc_assert (file
== asm_out_file
);
9833 /* Output text required at the end of an assembler file.
9834 This includes deferred plabels and .import directives for
9835 all external symbols that were actually referenced. */
9838 pa_hpux_file_end (void)
9843 if (!NO_DEFERRED_PROFILE_COUNTERS
)
9844 output_deferred_profile_counters ();
9846 output_deferred_plabels ();
9848 for (i
= 0; VEC_iterate (extern_symbol
, extern_symbols
, i
, p
); i
++)
9850 tree decl
= p
->decl
;
9852 if (!TREE_ASM_WRITTEN (decl
)
9853 && SYMBOL_REF_REFERENCED_P (XEXP (DECL_RTL (decl
), 0)))
9854 ASM_OUTPUT_EXTERNAL_REAL (asm_out_file
, decl
, p
->name
);
9857 VEC_free (extern_symbol
, gc
, extern_symbols
);
9861 /* Return true if a change from mode FROM to mode TO for a register
9862 in register class RCLASS is invalid. */
9865 pa_cannot_change_mode_class (enum machine_mode from
, enum machine_mode to
,
9866 enum reg_class rclass
)
9871 /* Reject changes to/from complex and vector modes. */
9872 if (COMPLEX_MODE_P (from
) || VECTOR_MODE_P (from
)
9873 || COMPLEX_MODE_P (to
) || VECTOR_MODE_P (to
))
9876 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
))
9879 /* There is no way to load QImode or HImode values directly from
9880 memory. SImode loads to the FP registers are not zero extended.
9881 On the 64-bit target, this conflicts with the definition of
9882 LOAD_EXTEND_OP. Thus, we can't allow changing between modes
9883 with different sizes in the floating-point registers. */
9884 if (MAYBE_FP_REG_CLASS_P (rclass
))
9887 /* HARD_REGNO_MODE_OK places modes with sizes larger than a word
9888 in specific sets of registers. Thus, we cannot allow changing
9889 to a larger mode when it's larger than a word. */
9890 if (GET_MODE_SIZE (to
) > UNITS_PER_WORD
9891 && GET_MODE_SIZE (to
) > GET_MODE_SIZE (from
))
9897 /* Returns TRUE if it is a good idea to tie two pseudo registers
9898 when one has mode MODE1 and one has mode MODE2.
9899 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
9900 for any hard reg, then this must be FALSE for correct output.
9902 We should return FALSE for QImode and HImode because these modes
9903 are not ok in the floating-point registers. However, this prevents
9904 tieing these modes to SImode and DImode in the general registers.
9905 So, this isn't a good idea. We rely on HARD_REGNO_MODE_OK and
9906 CANNOT_CHANGE_MODE_CLASS to prevent these modes from being used
9907 in the floating-point registers. */
9910 pa_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
9912 /* Don't tie modes in different classes. */
9913 if (GET_MODE_CLASS (mode1
) != GET_MODE_CLASS (mode2
))
9920 /* Length in units of the trampoline instruction code. */
9922 #define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
9925 /* Output assembler code for a block containing the constant parts
9926 of a trampoline, leaving space for the variable parts.\
9928 The trampoline sets the static chain pointer to STATIC_CHAIN_REGNUM
9929 and then branches to the specified routine.
9931 This code template is copied from text segment to stack location
9932 and then patched with pa_trampoline_init to contain valid values,
9933 and then entered as a subroutine.
9935 It is best to keep this as small as possible to avoid having to
9936 flush multiple lines in the cache. */
9939 pa_asm_trampoline_template (FILE *f
)
9943 fputs ("\tldw 36(%r22),%r21\n", f
);
9944 fputs ("\tbb,>=,n %r21,30,.+16\n", f
);
9945 if (ASSEMBLER_DIALECT
== 0)
9946 fputs ("\tdepi 0,31,2,%r21\n", f
);
9948 fputs ("\tdepwi 0,31,2,%r21\n", f
);
9949 fputs ("\tldw 4(%r21),%r19\n", f
);
9950 fputs ("\tldw 0(%r21),%r21\n", f
);
9953 fputs ("\tbve (%r21)\n", f
);
9954 fputs ("\tldw 40(%r22),%r29\n", f
);
9955 fputs ("\t.word 0\n", f
);
9956 fputs ("\t.word 0\n", f
);
9960 fputs ("\tldsid (%r21),%r1\n", f
);
9961 fputs ("\tmtsp %r1,%sr0\n", f
);
9962 fputs ("\tbe 0(%sr0,%r21)\n", f
);
9963 fputs ("\tldw 40(%r22),%r29\n", f
);
9965 fputs ("\t.word 0\n", f
);
9966 fputs ("\t.word 0\n", f
);
9967 fputs ("\t.word 0\n", f
);
9968 fputs ("\t.word 0\n", f
);
9972 fputs ("\t.dword 0\n", f
);
9973 fputs ("\t.dword 0\n", f
);
9974 fputs ("\t.dword 0\n", f
);
9975 fputs ("\t.dword 0\n", f
);
9976 fputs ("\tmfia %r31\n", f
);
9977 fputs ("\tldd 24(%r31),%r1\n", f
);
9978 fputs ("\tldd 24(%r1),%r27\n", f
);
9979 fputs ("\tldd 16(%r1),%r1\n", f
);
9980 fputs ("\tbve (%r1)\n", f
);
9981 fputs ("\tldd 32(%r31),%r31\n", f
);
9982 fputs ("\t.dword 0 ; fptr\n", f
);
9983 fputs ("\t.dword 0 ; static link\n", f
);
9987 /* Emit RTL insns to initialize the variable parts of a trampoline.
9988 FNADDR is an RTX for the address of the function's pure code.
9989 CXT is an RTX for the static chain value for the function.
9991 Move the function address to the trampoline template at offset 36.
9992 Move the static chain value to trampoline template at offset 40.
9993 Move the trampoline address to trampoline template at offset 44.
9994 Move r19 to trampoline template at offset 48. The latter two
9995 words create a plabel for the indirect call to the trampoline.
9997 A similar sequence is used for the 64-bit port but the plabel is
9998 at the beginning of the trampoline.
10000 Finally, the cache entries for the trampoline code are flushed.
10001 This is necessary to ensure that the trampoline instruction sequence
10002 is written to memory prior to any attempts at prefetching the code
10006 pa_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
10008 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
10009 rtx start_addr
= gen_reg_rtx (Pmode
);
10010 rtx end_addr
= gen_reg_rtx (Pmode
);
10011 rtx line_length
= gen_reg_rtx (Pmode
);
10014 emit_block_move (m_tramp
, assemble_trampoline_template (),
10015 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
10016 r_tramp
= force_reg (Pmode
, XEXP (m_tramp
, 0));
10020 tmp
= adjust_address (m_tramp
, Pmode
, 36);
10021 emit_move_insn (tmp
, fnaddr
);
10022 tmp
= adjust_address (m_tramp
, Pmode
, 40);
10023 emit_move_insn (tmp
, chain_value
);
10025 /* Create a fat pointer for the trampoline. */
10026 tmp
= adjust_address (m_tramp
, Pmode
, 44);
10027 emit_move_insn (tmp
, r_tramp
);
10028 tmp
= adjust_address (m_tramp
, Pmode
, 48);
10029 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 19));
10031 /* fdc and fic only use registers for the address to flush,
10032 they do not accept integer displacements. We align the
10033 start and end addresses to the beginning of their respective
10034 cache lines to minimize the number of lines flushed. */
10035 emit_insn (gen_andsi3 (start_addr
, r_tramp
,
10036 GEN_INT (-MIN_CACHELINE_SIZE
)));
10037 tmp
= force_reg (Pmode
, plus_constant (r_tramp
, TRAMPOLINE_CODE_SIZE
-1));
10038 emit_insn (gen_andsi3 (end_addr
, tmp
,
10039 GEN_INT (-MIN_CACHELINE_SIZE
)));
10040 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10041 emit_insn (gen_dcacheflushsi (start_addr
, end_addr
, line_length
));
10042 emit_insn (gen_icacheflushsi (start_addr
, end_addr
, line_length
,
10043 gen_reg_rtx (Pmode
),
10044 gen_reg_rtx (Pmode
)));
10048 tmp
= adjust_address (m_tramp
, Pmode
, 56);
10049 emit_move_insn (tmp
, fnaddr
);
10050 tmp
= adjust_address (m_tramp
, Pmode
, 64);
10051 emit_move_insn (tmp
, chain_value
);
10053 /* Create a fat pointer for the trampoline. */
10054 tmp
= adjust_address (m_tramp
, Pmode
, 16);
10055 emit_move_insn (tmp
, force_reg (Pmode
, plus_constant (r_tramp
, 32)));
10056 tmp
= adjust_address (m_tramp
, Pmode
, 24);
10057 emit_move_insn (tmp
, gen_rtx_REG (Pmode
, 27));
10059 /* fdc and fic only use registers for the address to flush,
10060 they do not accept integer displacements. We align the
10061 start and end addresses to the beginning of their respective
10062 cache lines to minimize the number of lines flushed. */
10063 tmp
= force_reg (Pmode
, plus_constant (r_tramp
, 32));
10064 emit_insn (gen_anddi3 (start_addr
, tmp
,
10065 GEN_INT (-MIN_CACHELINE_SIZE
)));
10066 tmp
= force_reg (Pmode
, plus_constant (tmp
, TRAMPOLINE_CODE_SIZE
- 1));
10067 emit_insn (gen_anddi3 (end_addr
, tmp
,
10068 GEN_INT (-MIN_CACHELINE_SIZE
)));
10069 emit_move_insn (line_length
, GEN_INT (MIN_CACHELINE_SIZE
));
10070 emit_insn (gen_dcacheflushdi (start_addr
, end_addr
, line_length
));
10071 emit_insn (gen_icacheflushdi (start_addr
, end_addr
, line_length
,
10072 gen_reg_rtx (Pmode
),
10073 gen_reg_rtx (Pmode
)));
10077 /* Perform any machine-specific adjustment in the address of the trampoline.
10078 ADDR contains the address that was passed to pa_trampoline_init.
10079 Adjust the trampoline address to point to the plabel at offset 44. */
10082 pa_trampoline_adjust_address (rtx addr
)
10085 addr
= memory_address (Pmode
, plus_constant (addr
, 46));
10090 pa_delegitimize_address (rtx orig_x
)
10092 rtx x
= delegitimize_mem_from_attrs (orig_x
);
10094 if (GET_CODE (x
) == LO_SUM
10095 && GET_CODE (XEXP (x
, 1)) == UNSPEC
10096 && XINT (XEXP (x
, 1), 1) == UNSPEC_DLTIND14R
)
10097 return gen_const_mem (Pmode
, XVECEXP (XEXP (x
, 1), 0, 0));